]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/camellia/asm/cmllt4-sparcv9.pl
Many spelling fixes/typo's corrected.
[thirdparty/openssl.git] / crypto / camellia / asm / cmllt4-sparcv9.pl
1 #! /usr/bin/env perl
2 # Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by David S. Miller and Andy Polyakov.
12 # The module is licensed under 2-clause BSD
13 # license. October 2012. All rights reserved.
14 # ====================================================================
15
16 ######################################################################
17 # Camellia for SPARC T4.
18 #
19 # As with AES below results [for aligned data] are virtually identical
20 # to critical path lengths for 3-cycle instruction latency:
21 #
22 # 128-bit key 192/256-
23 # CBC encrypt 4.14/4.21(*) 5.46/5.52
24 # (*) numbers after slash are for
25 # misaligned data.
26 #
27 # As with Intel AES-NI, question is if it's possible to improve
28 # performance of parallelizable modes by interleaving round
29 # instructions. In Camellia every instruction is dependent on
30 # previous, which means that there is place for 2 additional ones
31 # in between two dependent. Can we expect 3x performance improvement?
32 # At least one can argue that it should be possible to break 2x
33 # barrier... For some reason not even 2x appears to be possible:
34 #
35 # 128-bit key 192/256-
36 # CBC decrypt 2.21/2.74 2.99/3.40
37 # CTR 2.15/2.68(*) 2.93/3.34
38 # (*) numbers after slash are for
39 # misaligned data.
40 #
41 # This is for 2x interleave. But compared to 1x interleave CBC decrypt
42 # improved by ... 0% for 128-bit key, and 11% for 192/256-bit one.
43 # So that out-of-order execution logic can take non-interleaved code
44 # to 1.87x, but can't take 2x interleaved one any further. There
45 # surely is some explanation... As result 3x interleave was not even
46 # attempted. Instead an effort was made to share specific modes
47 # implementations with AES module (therefore sparct4_modes.pl).
48 #
49 # To anchor to something else, software C implementation processes
50 # one byte in 38 cycles with 128-bit key on same processor.
51
52 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
53 push(@INC,"${dir}","${dir}../../perlasm");
54 require "sparcv9_modes.pl";
55
56 $output = pop;
57 open STDOUT,">$output";
58
59 $::evp=1; # if $evp is set to 0, script generates module with
60 # Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt
61 # entry points. These are fully compatible with openssl/camellia.h.
62
63 ######################################################################
64 # single-round subroutines
65 #
66 {
67 my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
68
69 $code=<<___;
70 #include "sparc_arch.h"
71
72 .text
73
74 .globl cmll_t4_encrypt
75 .align 32
76 cmll_t4_encrypt:
77 andcc $inp, 7, %g1 ! is input aligned?
78 andn $inp, 7, $inp
79
80 ldx [$key + 0], %g4
81 ldx [$key + 8], %g5
82
83 ldx [$inp + 0], %o4
84 bz,pt %icc, 1f
85 ldx [$inp + 8], %o5
86 ldx [$inp + 16], $inp
87 sll %g1, 3, %g1
88 sub %g0, %g1, %o3
89 sllx %o4, %g1, %o4
90 sllx %o5, %g1, %g1
91 srlx %o5, %o3, %o5
92 srlx $inp, %o3, %o3
93 or %o5, %o4, %o4
94 or %o3, %g1, %o5
95 1:
96 ld [$key + 272], $rounds ! grandRounds, 3 or 4
97 ldd [$key + 16], %f12
98 ldd [$key + 24], %f14
99 xor %g4, %o4, %o4
100 xor %g5, %o5, %o5
101 ldd [$key + 32], %f16
102 ldd [$key + 40], %f18
103 movxtod %o4, %f0
104 movxtod %o5, %f2
105 ldd [$key + 48], %f20
106 ldd [$key + 56], %f22
107 sub $rounds, 1, $rounds
108 ldd [$key + 64], %f24
109 ldd [$key + 72], %f26
110 add $key, 80, $key
111
112 .Lenc:
113 camellia_f %f12, %f2, %f0, %f2
114 ldd [$key + 0], %f12
115 sub $rounds,1,$rounds
116 camellia_f %f14, %f0, %f2, %f0
117 ldd [$key + 8], %f14
118 camellia_f %f16, %f2, %f0, %f2
119 ldd [$key + 16], %f16
120 camellia_f %f18, %f0, %f2, %f0
121 ldd [$key + 24], %f18
122 camellia_f %f20, %f2, %f0, %f2
123 ldd [$key + 32], %f20
124 camellia_f %f22, %f0, %f2, %f0
125 ldd [$key + 40], %f22
126 camellia_fl %f24, %f0, %f0
127 ldd [$key + 48], %f24
128 camellia_fli %f26, %f2, %f2
129 ldd [$key + 56], %f26
130 brnz,pt $rounds, .Lenc
131 add $key, 64, $key
132
133 andcc $out, 7, $tmp ! is output aligned?
134 camellia_f %f12, %f2, %f0, %f2
135 camellia_f %f14, %f0, %f2, %f0
136 camellia_f %f16, %f2, %f0, %f2
137 camellia_f %f18, %f0, %f2, %f0
138 camellia_f %f20, %f2, %f0, %f4
139 camellia_f %f22, %f0, %f4, %f2
140 fxor %f24, %f4, %f0
141 fxor %f26, %f2, %f2
142
143 bnz,pn %icc, 2f
144 nop
145
146 std %f0, [$out + 0]
147 retl
148 std %f2, [$out + 8]
149
150 2: alignaddrl $out, %g0, $out
151 mov 0xff, $mask
152 srl $mask, $tmp, $mask
153
154 faligndata %f0, %f0, %f4
155 faligndata %f0, %f2, %f6
156 faligndata %f2, %f2, %f8
157
158 stda %f4, [$out + $mask]0xc0 ! partial store
159 std %f6, [$out + 8]
160 add $out, 16, $out
161 orn %g0, $mask, $mask
162 retl
163 stda %f8, [$out + $mask]0xc0 ! partial store
164 .type cmll_t4_encrypt,#function
165 .size cmll_t4_encrypt,.-cmll_t4_encrypt
166
167 .globl cmll_t4_decrypt
168 .align 32
169 cmll_t4_decrypt:
170 ld [$key + 272], $rounds ! grandRounds, 3 or 4
171 andcc $inp, 7, %g1 ! is input aligned?
172 andn $inp, 7, $inp
173
174 sll $rounds, 6, $rounds
175 add $rounds, $key, $key
176
177 ldx [$inp + 0], %o4
178 bz,pt %icc, 1f
179 ldx [$inp + 8], %o5
180 ldx [$inp + 16], $inp
181 sll %g1, 3, %g1
182 sub %g0, %g1, %g4
183 sllx %o4, %g1, %o4
184 sllx %o5, %g1, %g1
185 srlx %o5, %g4, %o5
186 srlx $inp, %g4, %g4
187 or %o5, %o4, %o4
188 or %g4, %g1, %o5
189 1:
190 ldx [$key + 0], %g4
191 ldx [$key + 8], %g5
192 ldd [$key - 8], %f12
193 ldd [$key - 16], %f14
194 xor %g4, %o4, %o4
195 xor %g5, %o5, %o5
196 ldd [$key - 24], %f16
197 ldd [$key - 32], %f18
198 movxtod %o4, %f0
199 movxtod %o5, %f2
200 ldd [$key - 40], %f20
201 ldd [$key - 48], %f22
202 sub $rounds, 64, $rounds
203 ldd [$key - 56], %f24
204 ldd [$key - 64], %f26
205 sub $key, 64, $key
206
207 .Ldec:
208 camellia_f %f12, %f2, %f0, %f2
209 ldd [$key - 8], %f12
210 sub $rounds, 64, $rounds
211 camellia_f %f14, %f0, %f2, %f0
212 ldd [$key - 16], %f14
213 camellia_f %f16, %f2, %f0, %f2
214 ldd [$key - 24], %f16
215 camellia_f %f18, %f0, %f2, %f0
216 ldd [$key - 32], %f18
217 camellia_f %f20, %f2, %f0, %f2
218 ldd [$key - 40], %f20
219 camellia_f %f22, %f0, %f2, %f0
220 ldd [$key - 48], %f22
221 camellia_fl %f24, %f0, %f0
222 ldd [$key - 56], %f24
223 camellia_fli %f26, %f2, %f2
224 ldd [$key - 64], %f26
225 brnz,pt $rounds, .Ldec
226 sub $key, 64, $key
227
228 andcc $out, 7, $tmp ! is output aligned?
229 camellia_f %f12, %f2, %f0, %f2
230 camellia_f %f14, %f0, %f2, %f0
231 camellia_f %f16, %f2, %f0, %f2
232 camellia_f %f18, %f0, %f2, %f0
233 camellia_f %f20, %f2, %f0, %f4
234 camellia_f %f22, %f0, %f4, %f2
235 fxor %f26, %f4, %f0
236 fxor %f24, %f2, %f2
237
238 bnz,pn %icc, 2f
239 nop
240
241 std %f0, [$out + 0]
242 retl
243 std %f2, [$out + 8]
244
245 2: alignaddrl $out, %g0, $out
246 mov 0xff, $mask
247 srl $mask, $tmp, $mask
248
249 faligndata %f0, %f0, %f4
250 faligndata %f0, %f2, %f6
251 faligndata %f2, %f2, %f8
252
253 stda %f4, [$out + $mask]0xc0 ! partial store
254 std %f6, [$out + 8]
255 add $out, 16, $out
256 orn %g0, $mask, $mask
257 retl
258 stda %f8, [$out + $mask]0xc0 ! partial store
259 .type cmll_t4_decrypt,#function
260 .size cmll_t4_decrypt,.-cmll_t4_decrypt
261 ___
262 }
263
264 ######################################################################
265 # key setup subroutines
266 #
267 {
268 sub ROTL128 {
269 my $rot = shift;
270
271 "srlx %o4, 64-$rot, %g4\n\t".
272 "sllx %o4, $rot, %o4\n\t".
273 "srlx %o5, 64-$rot, %g5\n\t".
274 "sllx %o5, $rot, %o5\n\t".
275 "or %o4, %g5, %o4\n\t".
276 "or %o5, %g4, %o5";
277 }
278
279 my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
280 $code.=<<___;
281 .globl cmll_t4_set_key
282 .align 32
283 cmll_t4_set_key:
284 and $inp, 7, $tmp
285 alignaddr $inp, %g0, $inp
286 cmp $bits, 192
287 ldd [$inp + 0], %f0
288 bl,pt %icc,.L128
289 ldd [$inp + 8], %f2
290
291 be,pt %icc,.L192
292 ldd [$inp + 16], %f4
293
294 brz,pt $tmp, .L256aligned
295 ldd [$inp + 24], %f6
296
297 ldd [$inp + 32], %f8
298 faligndata %f0, %f2, %f0
299 faligndata %f2, %f4, %f2
300 faligndata %f4, %f6, %f4
301 b .L256aligned
302 faligndata %f6, %f8, %f6
303
304 .align 16
305 .L192:
306 brz,a,pt $tmp, .L256aligned
307 fnot2 %f4, %f6
308
309 ldd [$inp + 24], %f6
310 nop
311 faligndata %f0, %f2, %f0
312 faligndata %f2, %f4, %f2
313 faligndata %f4, %f6, %f4
314 fnot2 %f4, %f6
315
316 .L256aligned:
317 std %f0, [$out + 0] ! k[0, 1]
318 fsrc2 %f0, %f28
319 std %f2, [$out + 8] ! k[2, 3]
320 fsrc2 %f2, %f30
321 fxor %f4, %f0, %f0
322 b .L128key
323 fxor %f6, %f2, %f2
324
325 .align 16
326 .L128:
327 brz,pt $tmp, .L128aligned
328 nop
329
330 ldd [$inp + 16], %f4
331 nop
332 faligndata %f0, %f2, %f0
333 faligndata %f2, %f4, %f2
334
335 .L128aligned:
336 std %f0, [$out + 0] ! k[0, 1]
337 fsrc2 %f0, %f28
338 std %f2, [$out + 8] ! k[2, 3]
339 fsrc2 %f2, %f30
340
341 .L128key:
342 mov %o7, %o5
343 1: call .+8
344 add %o7, SIGMA-1b, %o4
345 mov %o5, %o7
346
347 ldd [%o4 + 0], %f16
348 ldd [%o4 + 8], %f18
349 ldd [%o4 + 16], %f20
350 ldd [%o4 + 24], %f22
351
352 camellia_f %f16, %f2, %f0, %f2
353 camellia_f %f18, %f0, %f2, %f0
354 fxor %f28, %f0, %f0
355 fxor %f30, %f2, %f2
356 camellia_f %f20, %f2, %f0, %f2
357 camellia_f %f22, %f0, %f2, %f0
358
359 bge,pn %icc, .L256key
360 nop
361 std %f0, [$out + 0x10] ! k[ 4, 5]
362 std %f2, [$out + 0x18] ! k[ 6, 7]
363
364 movdtox %f0, %o4
365 movdtox %f2, %o5
366 `&ROTL128(15)`
367 stx %o4, [$out + 0x30] ! k[12, 13]
368 stx %o5, [$out + 0x38] ! k[14, 15]
369 `&ROTL128(15)`
370 stx %o4, [$out + 0x40] ! k[16, 17]
371 stx %o5, [$out + 0x48] ! k[18, 19]
372 `&ROTL128(15)`
373 stx %o4, [$out + 0x60] ! k[24, 25]
374 `&ROTL128(15)`
375 stx %o4, [$out + 0x70] ! k[28, 29]
376 stx %o5, [$out + 0x78] ! k[30, 31]
377 `&ROTL128(34)`
378 stx %o4, [$out + 0xa0] ! k[40, 41]
379 stx %o5, [$out + 0xa8] ! k[42, 43]
380 `&ROTL128(17)`
381 stx %o4, [$out + 0xc0] ! k[48, 49]
382 stx %o5, [$out + 0xc8] ! k[50, 51]
383
384 movdtox %f28, %o4 ! k[ 0, 1]
385 movdtox %f30, %o5 ! k[ 2, 3]
386 `&ROTL128(15)`
387 stx %o4, [$out + 0x20] ! k[ 8, 9]
388 stx %o5, [$out + 0x28] ! k[10, 11]
389 `&ROTL128(30)`
390 stx %o4, [$out + 0x50] ! k[20, 21]
391 stx %o5, [$out + 0x58] ! k[22, 23]
392 `&ROTL128(15)`
393 stx %o5, [$out + 0x68] ! k[26, 27]
394 `&ROTL128(17)`
395 stx %o4, [$out + 0x80] ! k[32, 33]
396 stx %o5, [$out + 0x88] ! k[34, 35]
397 `&ROTL128(17)`
398 stx %o4, [$out + 0x90] ! k[36, 37]
399 stx %o5, [$out + 0x98] ! k[38, 39]
400 `&ROTL128(17)`
401 stx %o4, [$out + 0xb0] ! k[44, 45]
402 stx %o5, [$out + 0xb8] ! k[46, 47]
403
404 mov 3, $tmp
405 st $tmp, [$out + 0x110]
406 retl
407 xor %o0, %o0, %o0
408
409 .align 16
410 .L256key:
411 ldd [%o4 + 32], %f24
412 ldd [%o4 + 40], %f26
413
414 std %f0, [$out + 0x30] ! k[12, 13]
415 std %f2, [$out + 0x38] ! k[14, 15]
416
417 fxor %f4, %f0, %f0
418 fxor %f6, %f2, %f2
419 camellia_f %f24, %f2, %f0, %f2
420 camellia_f %f26, %f0, %f2, %f0
421
422 std %f0, [$out + 0x10] ! k[ 4, 5]
423 std %f2, [$out + 0x18] ! k[ 6, 7]
424
425 movdtox %f0, %o4
426 movdtox %f2, %o5
427 `&ROTL128(30)`
428 stx %o4, [$out + 0x50] ! k[20, 21]
429 stx %o5, [$out + 0x58] ! k[22, 23]
430 `&ROTL128(30)`
431 stx %o4, [$out + 0xa0] ! k[40, 41]
432 stx %o5, [$out + 0xa8] ! k[42, 43]
433 `&ROTL128(51)`
434 stx %o4, [$out + 0x100] ! k[64, 65]
435 stx %o5, [$out + 0x108] ! k[66, 67]
436
437 movdtox %f4, %o4 ! k[ 8, 9]
438 movdtox %f6, %o5 ! k[10, 11]
439 `&ROTL128(15)`
440 stx %o4, [$out + 0x20] ! k[ 8, 9]
441 stx %o5, [$out + 0x28] ! k[10, 11]
442 `&ROTL128(15)`
443 stx %o4, [$out + 0x40] ! k[16, 17]
444 stx %o5, [$out + 0x48] ! k[18, 19]
445 `&ROTL128(30)`
446 stx %o4, [$out + 0x90] ! k[36, 37]
447 stx %o5, [$out + 0x98] ! k[38, 39]
448 `&ROTL128(34)`
449 stx %o4, [$out + 0xd0] ! k[52, 53]
450 stx %o5, [$out + 0xd8] ! k[54, 55]
451 ldx [$out + 0x30], %o4 ! k[12, 13]
452 ldx [$out + 0x38], %o5 ! k[14, 15]
453 `&ROTL128(15)`
454 stx %o4, [$out + 0x30] ! k[12, 13]
455 stx %o5, [$out + 0x38] ! k[14, 15]
456 `&ROTL128(30)`
457 stx %o4, [$out + 0x70] ! k[28, 29]
458 stx %o5, [$out + 0x78] ! k[30, 31]
459 srlx %o4, 32, %g4
460 srlx %o5, 32, %g5
461 st %o4, [$out + 0xc0] ! k[48]
462 st %g5, [$out + 0xc4] ! k[49]
463 st %o5, [$out + 0xc8] ! k[50]
464 st %g4, [$out + 0xcc] ! k[51]
465 `&ROTL128(49)`
466 stx %o4, [$out + 0xe0] ! k[56, 57]
467 stx %o5, [$out + 0xe8] ! k[58, 59]
468
469 movdtox %f28, %o4 ! k[ 0, 1]
470 movdtox %f30, %o5 ! k[ 2, 3]
471 `&ROTL128(45)`
472 stx %o4, [$out + 0x60] ! k[24, 25]
473 stx %o5, [$out + 0x68] ! k[26, 27]
474 `&ROTL128(15)`
475 stx %o4, [$out + 0x80] ! k[32, 33]
476 stx %o5, [$out + 0x88] ! k[34, 35]
477 `&ROTL128(17)`
478 stx %o4, [$out + 0xb0] ! k[44, 45]
479 stx %o5, [$out + 0xb8] ! k[46, 47]
480 `&ROTL128(34)`
481 stx %o4, [$out + 0xf0] ! k[60, 61]
482 stx %o5, [$out + 0xf8] ! k[62, 63]
483
484 mov 4, $tmp
485 st $tmp, [$out + 0x110]
486 retl
487 xor %o0, %o0, %o0
488 .type cmll_t4_set_key,#function
489 .size cmll_t4_set_key,.-cmll_t4_set_key
490 .align 32
491 SIGMA:
492 .long 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
493 .long 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
494 .long 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
495 .type SIGMA,#object
496 .size SIGMA,.-SIGMA
497 .asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov"
498 ___
499 }
500
501 {{{
502 my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
503 my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
504
505 $code.=<<___;
506 .align 32
507 _cmll128_load_enckey:
508 ldx [$key + 0], %g4
509 ldx [$key + 8], %g5
510 ___
511 for ($i=2; $i<26;$i++) { # load key schedule
512 $code.=<<___;
513 ldd [$key + `8*$i`], %f`12+2*$i`
514 ___
515 }
516 $code.=<<___;
517 retl
518 nop
519 .type _cmll128_load_enckey,#function
520 .size _cmll128_load_enckey,.-_cmll128_load_enckey
521 _cmll256_load_enckey=_cmll128_load_enckey
522
523 .align 32
524 _cmll256_load_deckey:
525 ldd [$key + 64], %f62
526 ldd [$key + 72], %f60
527 b .Load_deckey
528 add $key, 64, $key
529 _cmll128_load_deckey:
530 ldd [$key + 0], %f60
531 ldd [$key + 8], %f62
532 .Load_deckey:
533 ___
534 for ($i=2; $i<24;$i++) { # load key schedule
535 $code.=<<___;
536 ldd [$key + `8*$i`], %f`62-2*$i`
537 ___
538 }
539 $code.=<<___;
540 ldx [$key + 192], %g4
541 retl
542 ldx [$key + 200], %g5
543 .type _cmll256_load_deckey,#function
544 .size _cmll256_load_deckey,.-_cmll256_load_deckey
545
546 .align 32
547 _cmll128_encrypt_1x:
548 ___
549 for ($i=0; $i<3; $i++) {
550 $code.=<<___;
551 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
552 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
553 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
554 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
555 ___
556 $code.=<<___ if ($i<2);
557 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
558 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
559 camellia_fl %f`16+16*$i+12`, %f0, %f0
560 camellia_fli %f`16+16*$i+14`, %f2, %f2
561 ___
562 }
563 $code.=<<___;
564 camellia_f %f56, %f2, %f0, %f4
565 camellia_f %f58, %f0, %f4, %f2
566 fxor %f60, %f4, %f0
567 retl
568 fxor %f62, %f2, %f2
569 .type _cmll128_encrypt_1x,#function
570 .size _cmll128_encrypt_1x,.-_cmll128_encrypt_1x
571 _cmll128_decrypt_1x=_cmll128_encrypt_1x
572
573 .align 32
574 _cmll128_encrypt_2x:
575 ___
576 for ($i=0; $i<3; $i++) {
577 $code.=<<___;
578 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
579 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
580 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
581 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
582 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
583 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
584 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
585 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
586 ___
587 $code.=<<___ if ($i<2);
588 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
589 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
590 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
591 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
592 camellia_fl %f`16+16*$i+12`, %f0, %f0
593 camellia_fl %f`16+16*$i+12`, %f4, %f4
594 camellia_fli %f`16+16*$i+14`, %f2, %f2
595 camellia_fli %f`16+16*$i+14`, %f6, %f6
596 ___
597 }
598 $code.=<<___;
599 camellia_f %f56, %f2, %f0, %f8
600 camellia_f %f56, %f6, %f4, %f10
601 camellia_f %f58, %f0, %f8, %f2
602 camellia_f %f58, %f4, %f10, %f6
603 fxor %f60, %f8, %f0
604 fxor %f60, %f10, %f4
605 fxor %f62, %f2, %f2
606 retl
607 fxor %f62, %f6, %f6
608 .type _cmll128_encrypt_2x,#function
609 .size _cmll128_encrypt_2x,.-_cmll128_encrypt_2x
610 _cmll128_decrypt_2x=_cmll128_encrypt_2x
611
612 .align 32
613 _cmll256_encrypt_1x:
614 camellia_f %f16, %f2, %f0, %f2
615 camellia_f %f18, %f0, %f2, %f0
616 ldd [$key + 208], %f16
617 ldd [$key + 216], %f18
618 camellia_f %f20, %f2, %f0, %f2
619 camellia_f %f22, %f0, %f2, %f0
620 ldd [$key + 224], %f20
621 ldd [$key + 232], %f22
622 camellia_f %f24, %f2, %f0, %f2
623 camellia_f %f26, %f0, %f2, %f0
624 ldd [$key + 240], %f24
625 ldd [$key + 248], %f26
626 camellia_fl %f28, %f0, %f0
627 camellia_fli %f30, %f2, %f2
628 ldd [$key + 256], %f28
629 ldd [$key + 264], %f30
630 ___
631 for ($i=1; $i<3; $i++) {
632 $code.=<<___;
633 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
634 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
635 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
636 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
637 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
638 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
639 camellia_fl %f`16+16*$i+12`, %f0, %f0
640 camellia_fli %f`16+16*$i+14`, %f2, %f2
641 ___
642 }
643 $code.=<<___;
644 camellia_f %f16, %f2, %f0, %f2
645 camellia_f %f18, %f0, %f2, %f0
646 ldd [$key + 16], %f16
647 ldd [$key + 24], %f18
648 camellia_f %f20, %f2, %f0, %f2
649 camellia_f %f22, %f0, %f2, %f0
650 ldd [$key + 32], %f20
651 ldd [$key + 40], %f22
652 camellia_f %f24, %f2, %f0, %f4
653 camellia_f %f26, %f0, %f4, %f2
654 ldd [$key + 48], %f24
655 ldd [$key + 56], %f26
656 fxor %f28, %f4, %f0
657 fxor %f30, %f2, %f2
658 ldd [$key + 64], %f28
659 retl
660 ldd [$key + 72], %f30
661 .type _cmll256_encrypt_1x,#function
662 .size _cmll256_encrypt_1x,.-_cmll256_encrypt_1x
663
664 .align 32
665 _cmll256_encrypt_2x:
666 camellia_f %f16, %f2, %f0, %f2
667 camellia_f %f16, %f6, %f4, %f6
668 camellia_f %f18, %f0, %f2, %f0
669 camellia_f %f18, %f4, %f6, %f4
670 ldd [$key + 208], %f16
671 ldd [$key + 216], %f18
672 camellia_f %f20, %f2, %f0, %f2
673 camellia_f %f20, %f6, %f4, %f6
674 camellia_f %f22, %f0, %f2, %f0
675 camellia_f %f22, %f4, %f6, %f4
676 ldd [$key + 224], %f20
677 ldd [$key + 232], %f22
678 camellia_f %f24, %f2, %f0, %f2
679 camellia_f %f24, %f6, %f4, %f6
680 camellia_f %f26, %f0, %f2, %f0
681 camellia_f %f26, %f4, %f6, %f4
682 ldd [$key + 240], %f24
683 ldd [$key + 248], %f26
684 camellia_fl %f28, %f0, %f0
685 camellia_fl %f28, %f4, %f4
686 camellia_fli %f30, %f2, %f2
687 camellia_fli %f30, %f6, %f6
688 ldd [$key + 256], %f28
689 ldd [$key + 264], %f30
690 ___
691 for ($i=1; $i<3; $i++) {
692 $code.=<<___;
693 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
694 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
695 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
696 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
697 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
698 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
699 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
700 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
701 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
702 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
703 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
704 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
705 camellia_fl %f`16+16*$i+12`, %f0, %f0
706 camellia_fl %f`16+16*$i+12`, %f4, %f4
707 camellia_fli %f`16+16*$i+14`, %f2, %f2
708 camellia_fli %f`16+16*$i+14`, %f6, %f6
709 ___
710 }
711 $code.=<<___;
712 camellia_f %f16, %f2, %f0, %f2
713 camellia_f %f16, %f6, %f4, %f6
714 camellia_f %f18, %f0, %f2, %f0
715 camellia_f %f18, %f4, %f6, %f4
716 ldd [$key + 16], %f16
717 ldd [$key + 24], %f18
718 camellia_f %f20, %f2, %f0, %f2
719 camellia_f %f20, %f6, %f4, %f6
720 camellia_f %f22, %f0, %f2, %f0
721 camellia_f %f22, %f4, %f6, %f4
722 ldd [$key + 32], %f20
723 ldd [$key + 40], %f22
724 camellia_f %f24, %f2, %f0, %f8
725 camellia_f %f24, %f6, %f4, %f10
726 camellia_f %f26, %f0, %f8, %f2
727 camellia_f %f26, %f4, %f10, %f6
728 ldd [$key + 48], %f24
729 ldd [$key + 56], %f26
730 fxor %f28, %f8, %f0
731 fxor %f28, %f10, %f4
732 fxor %f30, %f2, %f2
733 fxor %f30, %f6, %f6
734 ldd [$key + 64], %f28
735 retl
736 ldd [$key + 72], %f30
737 .type _cmll256_encrypt_2x,#function
738 .size _cmll256_encrypt_2x,.-_cmll256_encrypt_2x
739
740 .align 32
741 _cmll256_decrypt_1x:
742 camellia_f %f16, %f2, %f0, %f2
743 camellia_f %f18, %f0, %f2, %f0
744 ldd [$key - 8], %f16
745 ldd [$key - 16], %f18
746 camellia_f %f20, %f2, %f0, %f2
747 camellia_f %f22, %f0, %f2, %f0
748 ldd [$key - 24], %f20
749 ldd [$key - 32], %f22
750 camellia_f %f24, %f2, %f0, %f2
751 camellia_f %f26, %f0, %f2, %f0
752 ldd [$key - 40], %f24
753 ldd [$key - 48], %f26
754 camellia_fl %f28, %f0, %f0
755 camellia_fli %f30, %f2, %f2
756 ldd [$key - 56], %f28
757 ldd [$key - 64], %f30
758 ___
759 for ($i=1; $i<3; $i++) {
760 $code.=<<___;
761 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
762 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
763 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
764 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
765 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
766 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
767 camellia_fl %f`16+16*$i+12`, %f0, %f0
768 camellia_fli %f`16+16*$i+14`, %f2, %f2
769 ___
770 }
771 $code.=<<___;
772 camellia_f %f16, %f2, %f0, %f2
773 camellia_f %f18, %f0, %f2, %f0
774 ldd [$key + 184], %f16
775 ldd [$key + 176], %f18
776 camellia_f %f20, %f2, %f0, %f2
777 camellia_f %f22, %f0, %f2, %f0
778 ldd [$key + 168], %f20
779 ldd [$key + 160], %f22
780 camellia_f %f24, %f2, %f0, %f4
781 camellia_f %f26, %f0, %f4, %f2
782 ldd [$key + 152], %f24
783 ldd [$key + 144], %f26
784 fxor %f30, %f4, %f0
785 fxor %f28, %f2, %f2
786 ldd [$key + 136], %f28
787 retl
788 ldd [$key + 128], %f30
789 .type _cmll256_decrypt_1x,#function
790 .size _cmll256_decrypt_1x,.-_cmll256_decrypt_1x
791
792 .align 32
793 _cmll256_decrypt_2x:
794 camellia_f %f16, %f2, %f0, %f2
795 camellia_f %f16, %f6, %f4, %f6
796 camellia_f %f18, %f0, %f2, %f0
797 camellia_f %f18, %f4, %f6, %f4
798 ldd [$key - 8], %f16
799 ldd [$key - 16], %f18
800 camellia_f %f20, %f2, %f0, %f2
801 camellia_f %f20, %f6, %f4, %f6
802 camellia_f %f22, %f0, %f2, %f0
803 camellia_f %f22, %f4, %f6, %f4
804 ldd [$key - 24], %f20
805 ldd [$key - 32], %f22
806 camellia_f %f24, %f2, %f0, %f2
807 camellia_f %f24, %f6, %f4, %f6
808 camellia_f %f26, %f0, %f2, %f0
809 camellia_f %f26, %f4, %f6, %f4
810 ldd [$key - 40], %f24
811 ldd [$key - 48], %f26
812 camellia_fl %f28, %f0, %f0
813 camellia_fl %f28, %f4, %f4
814 camellia_fli %f30, %f2, %f2
815 camellia_fli %f30, %f6, %f6
816 ldd [$key - 56], %f28
817 ldd [$key - 64], %f30
818 ___
819 for ($i=1; $i<3; $i++) {
820 $code.=<<___;
821 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
822 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
823 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
824 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
825 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
826 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
827 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
828 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
829 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
830 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
831 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
832 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
833 camellia_fl %f`16+16*$i+12`, %f0, %f0
834 camellia_fl %f`16+16*$i+12`, %f4, %f4
835 camellia_fli %f`16+16*$i+14`, %f2, %f2
836 camellia_fli %f`16+16*$i+14`, %f6, %f6
837 ___
838 }
839 $code.=<<___;
840 camellia_f %f16, %f2, %f0, %f2
841 camellia_f %f16, %f6, %f4, %f6
842 camellia_f %f18, %f0, %f2, %f0
843 camellia_f %f18, %f4, %f6, %f4
844 ldd [$key + 184], %f16
845 ldd [$key + 176], %f18
846 camellia_f %f20, %f2, %f0, %f2
847 camellia_f %f20, %f6, %f4, %f6
848 camellia_f %f22, %f0, %f2, %f0
849 camellia_f %f22, %f4, %f6, %f4
850 ldd [$key + 168], %f20
851 ldd [$key + 160], %f22
852 camellia_f %f24, %f2, %f0, %f8
853 camellia_f %f24, %f6, %f4, %f10
854 camellia_f %f26, %f0, %f8, %f2
855 camellia_f %f26, %f4, %f10, %f6
856 ldd [$key + 152], %f24
857 ldd [$key + 144], %f26
858 fxor %f30, %f8, %f0
859 fxor %f30, %f10, %f4
860 fxor %f28, %f2, %f2
861 fxor %f28, %f6, %f6
862 ldd [$key + 136], %f28
863 retl
864 ldd [$key + 128], %f30
865 .type _cmll256_decrypt_2x,#function
866 .size _cmll256_decrypt_2x,.-_cmll256_decrypt_2x
867 ___
868
869 &alg_cbc_encrypt_implement("cmll",128);
870 &alg_cbc_encrypt_implement("cmll",256);
871
872 &alg_cbc_decrypt_implement("cmll",128);
873 &alg_cbc_decrypt_implement("cmll",256);
874
875 if ($::evp) {
876 &alg_ctr32_implement("cmll",128);
877 &alg_ctr32_implement("cmll",256);
878 }
879 }}}
880
881 if (!$::evp) {
882 $code.=<<___;
883 .global Camellia_encrypt
884 Camellia_encrypt=cmll_t4_encrypt
885 .global Camellia_decrypt
886 Camellia_decrypt=cmll_t4_decrypt
887 .global Camellia_set_key
888 .align 32
889 Camellia_set_key:
890 andcc %o2, 7, %g0 ! double-check alignment
891 bnz,a,pn %icc, 1f
892 mov -1, %o0
893 brz,a,pn %o0, 1f
894 mov -1, %o0
895 brz,a,pn %o2, 1f
896 mov -1, %o0
897 andncc %o1, 0x1c0, %g0
898 bnz,a,pn %icc, 1f
899 mov -2, %o0
900 cmp %o1, 128
901 bl,a,pn %icc, 1f
902 mov -2, %o0
903 b cmll_t4_set_key
904 nop
905 1: retl
906 nop
907 .type Camellia_set_key,#function
908 .size Camellia_set_key,.-Camellia_set_key
909 ___
910
911 my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
912
913 $code.=<<___;
914 .globl Camellia_cbc_encrypt
915 .align 32
916 Camellia_cbc_encrypt:
917 ld [$key + 272], %g1
918 nop
919 brz $enc, .Lcbc_decrypt
920 cmp %g1, 3
921
922 be,pt %icc, cmll128_t4_cbc_encrypt
923 nop
924 ba cmll256_t4_cbc_encrypt
925 nop
926
927 .Lcbc_decrypt:
928 be,pt %icc, cmll128_t4_cbc_decrypt
929 nop
930 ba cmll256_t4_cbc_decrypt
931 nop
932 .type Camellia_cbc_encrypt,#function
933 .size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
934 ___
935 }
936
937 &emit_assembler();
938
939 close STDOUT;