]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/camellia/asm/cmllt4-sparcv9.pl
Also check for errors in x86_64-xlate.pl.
[thirdparty/openssl.git] / crypto / camellia / asm / cmllt4-sparcv9.pl
CommitLineData
6aa36e8e
RS
1#! /usr/bin/env perl
2# Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
5593d9c9 4# Licensed under the Apache License 2.0 (the "License"). You may not use
6aa36e8e
RS
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
4739ccdb
AP
9
10# ====================================================================
e3713c36
RS
11# Written by David S. Miller and Andy Polyakov.
12# The module is licensed under 2-clause BSD
4739ccdb
AP
13# license. October 2012. All rights reserved.
14# ====================================================================
15
16######################################################################
17# Camellia for SPARC T4.
18#
19# As with AES below results [for aligned data] are virtually identical
46f4e1be 20# to critical path lengths for 3-cycle instruction latency:
4739ccdb
AP
21#
22# 128-bit key 192/256-
23# CBC encrypt 4.14/4.21(*) 5.46/5.52
24# (*) numbers after slash are for
25# misaligned data.
26#
27# As with Intel AES-NI, question is if it's possible to improve
46f4e1be 28# performance of parallelizable modes by interleaving round
4739ccdb
AP
29# instructions. In Camellia every instruction is dependent on
30# previous, which means that there is place for 2 additional ones
31# in between two dependent. Can we expect 3x performance improvement?
32# At least one can argue that it should be possible to break 2x
33# barrier... For some reason not even 2x appears to be possible:
34#
35# 128-bit key 192/256-
36# CBC decrypt 2.21/2.74 2.99/3.40
37# CTR 2.15/2.68(*) 2.93/3.34
38# (*) numbers after slash are for
39# misaligned data.
40#
41# This is for 2x interleave. But compared to 1x interleave CBC decrypt
42# improved by ... 0% for 128-bit key, and 11% for 192/256-bit one.
43# So that out-of-order execution logic can take non-interleaved code
44# to 1.87x, but can't take 2x interleaved one any further. There
45# surely is some explanation... As result 3x interleave was not even
46# attempted. Instead an effort was made to share specific modes
47# implementations with AES module (therefore sparct4_modes.pl).
48#
49# To anchor to something else, software C implementation processes
50# one byte in 38 cycles with 128-bit key on same processor.
51
52$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
53push(@INC,"${dir}","${dir}../../perlasm");
54require "sparcv9_modes.pl";
55
1aa89a7a 56$output = pop and open STDOUT,">$output";
4739ccdb
AP
57
58$::evp=1; # if $evp is set to 0, script generates module with
59# Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt
60# entry points. These are fully compatible with openssl/camellia.h.
61
62######################################################################
63# single-round subroutines
64#
65{
66my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
67
68$code=<<___;
eb77e888
AP
69#include "sparc_arch.h"
70
4739ccdb
AP
71.text
72
73.globl cmll_t4_encrypt
74.align 32
75cmll_t4_encrypt:
76 andcc $inp, 7, %g1 ! is input aligned?
77 andn $inp, 7, $inp
78
79 ldx [$key + 0], %g4
80 ldx [$key + 8], %g5
81
82 ldx [$inp + 0], %o4
83 bz,pt %icc, 1f
84 ldx [$inp + 8], %o5
85 ldx [$inp + 16], $inp
86 sll %g1, 3, %g1
87 sub %g0, %g1, %o3
88 sllx %o4, %g1, %o4
89 sllx %o5, %g1, %g1
90 srlx %o5, %o3, %o5
91 srlx $inp, %o3, %o3
92 or %o5, %o4, %o4
93 or %o3, %g1, %o5
941:
95 ld [$key + 272], $rounds ! grandRounds, 3 or 4
96 ldd [$key + 16], %f12
97 ldd [$key + 24], %f14
98 xor %g4, %o4, %o4
99 xor %g5, %o5, %o5
100 ldd [$key + 32], %f16
101 ldd [$key + 40], %f18
102 movxtod %o4, %f0
103 movxtod %o5, %f2
104 ldd [$key + 48], %f20
105 ldd [$key + 56], %f22
106 sub $rounds, 1, $rounds
107 ldd [$key + 64], %f24
108 ldd [$key + 72], %f26
109 add $key, 80, $key
110
111.Lenc:
112 camellia_f %f12, %f2, %f0, %f2
113 ldd [$key + 0], %f12
114 sub $rounds,1,$rounds
115 camellia_f %f14, %f0, %f2, %f0
116 ldd [$key + 8], %f14
117 camellia_f %f16, %f2, %f0, %f2
118 ldd [$key + 16], %f16
119 camellia_f %f18, %f0, %f2, %f0
120 ldd [$key + 24], %f18
121 camellia_f %f20, %f2, %f0, %f2
122 ldd [$key + 32], %f20
123 camellia_f %f22, %f0, %f2, %f0
124 ldd [$key + 40], %f22
125 camellia_fl %f24, %f0, %f0
126 ldd [$key + 48], %f24
127 camellia_fli %f26, %f2, %f2
128 ldd [$key + 56], %f26
129 brnz,pt $rounds, .Lenc
130 add $key, 64, $key
131
132 andcc $out, 7, $tmp ! is output aligned?
133 camellia_f %f12, %f2, %f0, %f2
134 camellia_f %f14, %f0, %f2, %f0
135 camellia_f %f16, %f2, %f0, %f2
136 camellia_f %f18, %f0, %f2, %f0
137 camellia_f %f20, %f2, %f0, %f4
138 camellia_f %f22, %f0, %f4, %f2
139 fxor %f24, %f4, %f0
140 fxor %f26, %f2, %f2
141
142 bnz,pn %icc, 2f
143 nop
144
145 std %f0, [$out + 0]
146 retl
147 std %f2, [$out + 8]
148
1492: alignaddrl $out, %g0, $out
150 mov 0xff, $mask
151 srl $mask, $tmp, $mask
152
153 faligndata %f0, %f0, %f4
154 faligndata %f0, %f2, %f6
155 faligndata %f2, %f2, %f8
156
157 stda %f4, [$out + $mask]0xc0 ! partial store
158 std %f6, [$out + 8]
159 add $out, 16, $out
160 orn %g0, $mask, $mask
161 retl
162 stda %f8, [$out + $mask]0xc0 ! partial store
163.type cmll_t4_encrypt,#function
164.size cmll_t4_encrypt,.-cmll_t4_encrypt
165
166.globl cmll_t4_decrypt
167.align 32
168cmll_t4_decrypt:
169 ld [$key + 272], $rounds ! grandRounds, 3 or 4
170 andcc $inp, 7, %g1 ! is input aligned?
171 andn $inp, 7, $inp
172
173 sll $rounds, 6, $rounds
174 add $rounds, $key, $key
175
176 ldx [$inp + 0], %o4
177 bz,pt %icc, 1f
178 ldx [$inp + 8], %o5
179 ldx [$inp + 16], $inp
180 sll %g1, 3, %g1
181 sub %g0, %g1, %g4
182 sllx %o4, %g1, %o4
183 sllx %o5, %g1, %g1
184 srlx %o5, %g4, %o5
185 srlx $inp, %g4, %g4
186 or %o5, %o4, %o4
187 or %g4, %g1, %o5
1881:
189 ldx [$key + 0], %g4
190 ldx [$key + 8], %g5
191 ldd [$key - 8], %f12
192 ldd [$key - 16], %f14
193 xor %g4, %o4, %o4
194 xor %g5, %o5, %o5
195 ldd [$key - 24], %f16
196 ldd [$key - 32], %f18
197 movxtod %o4, %f0
198 movxtod %o5, %f2
199 ldd [$key - 40], %f20
200 ldd [$key - 48], %f22
201 sub $rounds, 64, $rounds
202 ldd [$key - 56], %f24
203 ldd [$key - 64], %f26
204 sub $key, 64, $key
205
206.Ldec:
207 camellia_f %f12, %f2, %f0, %f2
208 ldd [$key - 8], %f12
209 sub $rounds, 64, $rounds
210 camellia_f %f14, %f0, %f2, %f0
211 ldd [$key - 16], %f14
212 camellia_f %f16, %f2, %f0, %f2
213 ldd [$key - 24], %f16
214 camellia_f %f18, %f0, %f2, %f0
215 ldd [$key - 32], %f18
216 camellia_f %f20, %f2, %f0, %f2
217 ldd [$key - 40], %f20
218 camellia_f %f22, %f0, %f2, %f0
219 ldd [$key - 48], %f22
220 camellia_fl %f24, %f0, %f0
221 ldd [$key - 56], %f24
222 camellia_fli %f26, %f2, %f2
223 ldd [$key - 64], %f26
224 brnz,pt $rounds, .Ldec
225 sub $key, 64, $key
226
227 andcc $out, 7, $tmp ! is output aligned?
228 camellia_f %f12, %f2, %f0, %f2
229 camellia_f %f14, %f0, %f2, %f0
230 camellia_f %f16, %f2, %f0, %f2
231 camellia_f %f18, %f0, %f2, %f0
232 camellia_f %f20, %f2, %f0, %f4
233 camellia_f %f22, %f0, %f4, %f2
234 fxor %f26, %f4, %f0
235 fxor %f24, %f2, %f2
236
237 bnz,pn %icc, 2f
238 nop
239
240 std %f0, [$out + 0]
241 retl
242 std %f2, [$out + 8]
243
2442: alignaddrl $out, %g0, $out
245 mov 0xff, $mask
246 srl $mask, $tmp, $mask
247
248 faligndata %f0, %f0, %f4
249 faligndata %f0, %f2, %f6
250 faligndata %f2, %f2, %f8
251
252 stda %f4, [$out + $mask]0xc0 ! partial store
253 std %f6, [$out + 8]
254 add $out, 16, $out
255 orn %g0, $mask, $mask
256 retl
257 stda %f8, [$out + $mask]0xc0 ! partial store
258.type cmll_t4_decrypt,#function
259.size cmll_t4_decrypt,.-cmll_t4_decrypt
260___
261}
262
263######################################################################
264# key setup subroutines
265#
266{
267sub ROTL128 {
268 my $rot = shift;
269
270 "srlx %o4, 64-$rot, %g4\n\t".
271 "sllx %o4, $rot, %o4\n\t".
272 "srlx %o5, 64-$rot, %g5\n\t".
273 "sllx %o5, $rot, %o5\n\t".
274 "or %o4, %g5, %o4\n\t".
275 "or %o5, %g4, %o5";
276}
277
278my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
279$code.=<<___;
280.globl cmll_t4_set_key
281.align 32
282cmll_t4_set_key:
283 and $inp, 7, $tmp
284 alignaddr $inp, %g0, $inp
285 cmp $bits, 192
286 ldd [$inp + 0], %f0
287 bl,pt %icc,.L128
288 ldd [$inp + 8], %f2
289
290 be,pt %icc,.L192
291 ldd [$inp + 16], %f4
292
293 brz,pt $tmp, .L256aligned
294 ldd [$inp + 24], %f6
295
296 ldd [$inp + 32], %f8
297 faligndata %f0, %f2, %f0
298 faligndata %f2, %f4, %f2
299 faligndata %f4, %f6, %f4
300 b .L256aligned
301 faligndata %f6, %f8, %f6
302
303.align 16
304.L192:
305 brz,a,pt $tmp, .L256aligned
306 fnot2 %f4, %f6
307
308 ldd [$inp + 24], %f6
309 nop
310 faligndata %f0, %f2, %f0
311 faligndata %f2, %f4, %f2
312 faligndata %f4, %f6, %f4
313 fnot2 %f4, %f6
314
315.L256aligned:
316 std %f0, [$out + 0] ! k[0, 1]
317 fsrc2 %f0, %f28
318 std %f2, [$out + 8] ! k[2, 3]
319 fsrc2 %f2, %f30
320 fxor %f4, %f0, %f0
321 b .L128key
322 fxor %f6, %f2, %f2
323
324.align 16
325.L128:
326 brz,pt $tmp, .L128aligned
327 nop
328
329 ldd [$inp + 16], %f4
330 nop
331 faligndata %f0, %f2, %f0
332 faligndata %f2, %f4, %f2
333
334.L128aligned:
335 std %f0, [$out + 0] ! k[0, 1]
336 fsrc2 %f0, %f28
337 std %f2, [$out + 8] ! k[2, 3]
338 fsrc2 %f2, %f30
339
340.L128key:
341 mov %o7, %o5
3421: call .+8
343 add %o7, SIGMA-1b, %o4
344 mov %o5, %o7
345
346 ldd [%o4 + 0], %f16
347 ldd [%o4 + 8], %f18
348 ldd [%o4 + 16], %f20
349 ldd [%o4 + 24], %f22
350
351 camellia_f %f16, %f2, %f0, %f2
352 camellia_f %f18, %f0, %f2, %f0
353 fxor %f28, %f0, %f0
354 fxor %f30, %f2, %f2
355 camellia_f %f20, %f2, %f0, %f2
356 camellia_f %f22, %f0, %f2, %f0
357
358 bge,pn %icc, .L256key
359 nop
360 std %f0, [$out + 0x10] ! k[ 4, 5]
361 std %f2, [$out + 0x18] ! k[ 6, 7]
362
363 movdtox %f0, %o4
364 movdtox %f2, %o5
365 `&ROTL128(15)`
366 stx %o4, [$out + 0x30] ! k[12, 13]
367 stx %o5, [$out + 0x38] ! k[14, 15]
368 `&ROTL128(15)`
369 stx %o4, [$out + 0x40] ! k[16, 17]
370 stx %o5, [$out + 0x48] ! k[18, 19]
371 `&ROTL128(15)`
372 stx %o4, [$out + 0x60] ! k[24, 25]
373 `&ROTL128(15)`
374 stx %o4, [$out + 0x70] ! k[28, 29]
375 stx %o5, [$out + 0x78] ! k[30, 31]
376 `&ROTL128(34)`
377 stx %o4, [$out + 0xa0] ! k[40, 41]
378 stx %o5, [$out + 0xa8] ! k[42, 43]
379 `&ROTL128(17)`
380 stx %o4, [$out + 0xc0] ! k[48, 49]
381 stx %o5, [$out + 0xc8] ! k[50, 51]
382
383 movdtox %f28, %o4 ! k[ 0, 1]
384 movdtox %f30, %o5 ! k[ 2, 3]
385 `&ROTL128(15)`
386 stx %o4, [$out + 0x20] ! k[ 8, 9]
387 stx %o5, [$out + 0x28] ! k[10, 11]
388 `&ROTL128(30)`
389 stx %o4, [$out + 0x50] ! k[20, 21]
390 stx %o5, [$out + 0x58] ! k[22, 23]
391 `&ROTL128(15)`
392 stx %o5, [$out + 0x68] ! k[26, 27]
393 `&ROTL128(17)`
394 stx %o4, [$out + 0x80] ! k[32, 33]
395 stx %o5, [$out + 0x88] ! k[34, 35]
396 `&ROTL128(17)`
397 stx %o4, [$out + 0x90] ! k[36, 37]
398 stx %o5, [$out + 0x98] ! k[38, 39]
399 `&ROTL128(17)`
400 stx %o4, [$out + 0xb0] ! k[44, 45]
401 stx %o5, [$out + 0xb8] ! k[46, 47]
402
403 mov 3, $tmp
404 st $tmp, [$out + 0x110]
405 retl
406 xor %o0, %o0, %o0
407
408.align 16
409.L256key:
410 ldd [%o4 + 32], %f24
411 ldd [%o4 + 40], %f26
412
413 std %f0, [$out + 0x30] ! k[12, 13]
414 std %f2, [$out + 0x38] ! k[14, 15]
415
416 fxor %f4, %f0, %f0
417 fxor %f6, %f2, %f2
418 camellia_f %f24, %f2, %f0, %f2
419 camellia_f %f26, %f0, %f2, %f0
420
421 std %f0, [$out + 0x10] ! k[ 4, 5]
422 std %f2, [$out + 0x18] ! k[ 6, 7]
423
424 movdtox %f0, %o4
425 movdtox %f2, %o5
426 `&ROTL128(30)`
427 stx %o4, [$out + 0x50] ! k[20, 21]
428 stx %o5, [$out + 0x58] ! k[22, 23]
429 `&ROTL128(30)`
430 stx %o4, [$out + 0xa0] ! k[40, 41]
431 stx %o5, [$out + 0xa8] ! k[42, 43]
432 `&ROTL128(51)`
433 stx %o4, [$out + 0x100] ! k[64, 65]
434 stx %o5, [$out + 0x108] ! k[66, 67]
435
436 movdtox %f4, %o4 ! k[ 8, 9]
437 movdtox %f6, %o5 ! k[10, 11]
438 `&ROTL128(15)`
439 stx %o4, [$out + 0x20] ! k[ 8, 9]
440 stx %o5, [$out + 0x28] ! k[10, 11]
441 `&ROTL128(15)`
442 stx %o4, [$out + 0x40] ! k[16, 17]
443 stx %o5, [$out + 0x48] ! k[18, 19]
444 `&ROTL128(30)`
445 stx %o4, [$out + 0x90] ! k[36, 37]
446 stx %o5, [$out + 0x98] ! k[38, 39]
447 `&ROTL128(34)`
448 stx %o4, [$out + 0xd0] ! k[52, 53]
449 stx %o5, [$out + 0xd8] ! k[54, 55]
450 ldx [$out + 0x30], %o4 ! k[12, 13]
451 ldx [$out + 0x38], %o5 ! k[14, 15]
452 `&ROTL128(15)`
453 stx %o4, [$out + 0x30] ! k[12, 13]
454 stx %o5, [$out + 0x38] ! k[14, 15]
455 `&ROTL128(30)`
456 stx %o4, [$out + 0x70] ! k[28, 29]
457 stx %o5, [$out + 0x78] ! k[30, 31]
458 srlx %o4, 32, %g4
459 srlx %o5, 32, %g5
460 st %o4, [$out + 0xc0] ! k[48]
461 st %g5, [$out + 0xc4] ! k[49]
462 st %o5, [$out + 0xc8] ! k[50]
463 st %g4, [$out + 0xcc] ! k[51]
464 `&ROTL128(49)`
465 stx %o4, [$out + 0xe0] ! k[56, 57]
466 stx %o5, [$out + 0xe8] ! k[58, 59]
467
468 movdtox %f28, %o4 ! k[ 0, 1]
469 movdtox %f30, %o5 ! k[ 2, 3]
470 `&ROTL128(45)`
471 stx %o4, [$out + 0x60] ! k[24, 25]
472 stx %o5, [$out + 0x68] ! k[26, 27]
473 `&ROTL128(15)`
474 stx %o4, [$out + 0x80] ! k[32, 33]
475 stx %o5, [$out + 0x88] ! k[34, 35]
476 `&ROTL128(17)`
477 stx %o4, [$out + 0xb0] ! k[44, 45]
478 stx %o5, [$out + 0xb8] ! k[46, 47]
479 `&ROTL128(34)`
480 stx %o4, [$out + 0xf0] ! k[60, 61]
481 stx %o5, [$out + 0xf8] ! k[62, 63]
482
483 mov 4, $tmp
484 st $tmp, [$out + 0x110]
485 retl
486 xor %o0, %o0, %o0
487.type cmll_t4_set_key,#function
488.size cmll_t4_set_key,.-cmll_t4_set_key
489.align 32
490SIGMA:
491 .long 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
492 .long 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
493 .long 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
494.type SIGMA,#object
495.size SIGMA,.-SIGMA
496.asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov"
497___
498}
499
500{{{
501my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
502my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
503
504$code.=<<___;
505.align 32
506_cmll128_load_enckey:
507 ldx [$key + 0], %g4
508 ldx [$key + 8], %g5
509___
510for ($i=2; $i<26;$i++) { # load key schedule
511 $code.=<<___;
512 ldd [$key + `8*$i`], %f`12+2*$i`
513___
514}
515$code.=<<___;
516 retl
517 nop
518.type _cmll128_load_enckey,#function
519.size _cmll128_load_enckey,.-_cmll128_load_enckey
520_cmll256_load_enckey=_cmll128_load_enckey
521
522.align 32
523_cmll256_load_deckey:
524 ldd [$key + 64], %f62
525 ldd [$key + 72], %f60
526 b .Load_deckey
527 add $key, 64, $key
528_cmll128_load_deckey:
529 ldd [$key + 0], %f60
530 ldd [$key + 8], %f62
531.Load_deckey:
532___
533for ($i=2; $i<24;$i++) { # load key schedule
534 $code.=<<___;
535 ldd [$key + `8*$i`], %f`62-2*$i`
536___
537}
538$code.=<<___;
539 ldx [$key + 192], %g4
540 retl
541 ldx [$key + 200], %g5
542.type _cmll256_load_deckey,#function
543.size _cmll256_load_deckey,.-_cmll256_load_deckey
544
545.align 32
546_cmll128_encrypt_1x:
547___
548for ($i=0; $i<3; $i++) {
549 $code.=<<___;
550 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
551 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
552 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
553 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
554___
555$code.=<<___ if ($i<2);
556 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
557 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
558 camellia_fl %f`16+16*$i+12`, %f0, %f0
559 camellia_fli %f`16+16*$i+14`, %f2, %f2
560___
561}
562$code.=<<___;
563 camellia_f %f56, %f2, %f0, %f4
564 camellia_f %f58, %f0, %f4, %f2
565 fxor %f60, %f4, %f0
566 retl
567 fxor %f62, %f2, %f2
568.type _cmll128_encrypt_1x,#function
569.size _cmll128_encrypt_1x,.-_cmll128_encrypt_1x
570_cmll128_decrypt_1x=_cmll128_encrypt_1x
571
572.align 32
573_cmll128_encrypt_2x:
574___
575for ($i=0; $i<3; $i++) {
576 $code.=<<___;
577 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
578 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
579 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
580 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
581 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
582 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
583 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
584 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
585___
586$code.=<<___ if ($i<2);
587 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
588 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
589 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
590 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
591 camellia_fl %f`16+16*$i+12`, %f0, %f0
592 camellia_fl %f`16+16*$i+12`, %f4, %f4
593 camellia_fli %f`16+16*$i+14`, %f2, %f2
594 camellia_fli %f`16+16*$i+14`, %f6, %f6
595___
596}
597$code.=<<___;
598 camellia_f %f56, %f2, %f0, %f8
599 camellia_f %f56, %f6, %f4, %f10
600 camellia_f %f58, %f0, %f8, %f2
601 camellia_f %f58, %f4, %f10, %f6
602 fxor %f60, %f8, %f0
603 fxor %f60, %f10, %f4
604 fxor %f62, %f2, %f2
605 retl
606 fxor %f62, %f6, %f6
607.type _cmll128_encrypt_2x,#function
608.size _cmll128_encrypt_2x,.-_cmll128_encrypt_2x
609_cmll128_decrypt_2x=_cmll128_encrypt_2x
610
611.align 32
612_cmll256_encrypt_1x:
613 camellia_f %f16, %f2, %f0, %f2
614 camellia_f %f18, %f0, %f2, %f0
615 ldd [$key + 208], %f16
616 ldd [$key + 216], %f18
617 camellia_f %f20, %f2, %f0, %f2
618 camellia_f %f22, %f0, %f2, %f0
619 ldd [$key + 224], %f20
620 ldd [$key + 232], %f22
621 camellia_f %f24, %f2, %f0, %f2
622 camellia_f %f26, %f0, %f2, %f0
623 ldd [$key + 240], %f24
624 ldd [$key + 248], %f26
625 camellia_fl %f28, %f0, %f0
626 camellia_fli %f30, %f2, %f2
627 ldd [$key + 256], %f28
628 ldd [$key + 264], %f30
629___
630for ($i=1; $i<3; $i++) {
631 $code.=<<___;
632 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
633 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
634 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
635 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
636 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
637 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
638 camellia_fl %f`16+16*$i+12`, %f0, %f0
639 camellia_fli %f`16+16*$i+14`, %f2, %f2
640___
641}
642$code.=<<___;
643 camellia_f %f16, %f2, %f0, %f2
644 camellia_f %f18, %f0, %f2, %f0
645 ldd [$key + 16], %f16
646 ldd [$key + 24], %f18
647 camellia_f %f20, %f2, %f0, %f2
648 camellia_f %f22, %f0, %f2, %f0
649 ldd [$key + 32], %f20
650 ldd [$key + 40], %f22
651 camellia_f %f24, %f2, %f0, %f4
652 camellia_f %f26, %f0, %f4, %f2
653 ldd [$key + 48], %f24
654 ldd [$key + 56], %f26
655 fxor %f28, %f4, %f0
656 fxor %f30, %f2, %f2
657 ldd [$key + 64], %f28
658 retl
659 ldd [$key + 72], %f30
660.type _cmll256_encrypt_1x,#function
661.size _cmll256_encrypt_1x,.-_cmll256_encrypt_1x
662
663.align 32
664_cmll256_encrypt_2x:
665 camellia_f %f16, %f2, %f0, %f2
666 camellia_f %f16, %f6, %f4, %f6
667 camellia_f %f18, %f0, %f2, %f0
668 camellia_f %f18, %f4, %f6, %f4
669 ldd [$key + 208], %f16
670 ldd [$key + 216], %f18
671 camellia_f %f20, %f2, %f0, %f2
672 camellia_f %f20, %f6, %f4, %f6
673 camellia_f %f22, %f0, %f2, %f0
674 camellia_f %f22, %f4, %f6, %f4
675 ldd [$key + 224], %f20
676 ldd [$key + 232], %f22
677 camellia_f %f24, %f2, %f0, %f2
678 camellia_f %f24, %f6, %f4, %f6
679 camellia_f %f26, %f0, %f2, %f0
680 camellia_f %f26, %f4, %f6, %f4
681 ldd [$key + 240], %f24
682 ldd [$key + 248], %f26
683 camellia_fl %f28, %f0, %f0
684 camellia_fl %f28, %f4, %f4
685 camellia_fli %f30, %f2, %f2
686 camellia_fli %f30, %f6, %f6
687 ldd [$key + 256], %f28
688 ldd [$key + 264], %f30
689___
690for ($i=1; $i<3; $i++) {
691 $code.=<<___;
692 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
693 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
694 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
695 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
696 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
697 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
698 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
699 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
700 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
701 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
702 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
703 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
704 camellia_fl %f`16+16*$i+12`, %f0, %f0
705 camellia_fl %f`16+16*$i+12`, %f4, %f4
706 camellia_fli %f`16+16*$i+14`, %f2, %f2
707 camellia_fli %f`16+16*$i+14`, %f6, %f6
708___
709}
710$code.=<<___;
711 camellia_f %f16, %f2, %f0, %f2
712 camellia_f %f16, %f6, %f4, %f6
713 camellia_f %f18, %f0, %f2, %f0
714 camellia_f %f18, %f4, %f6, %f4
715 ldd [$key + 16], %f16
716 ldd [$key + 24], %f18
717 camellia_f %f20, %f2, %f0, %f2
718 camellia_f %f20, %f6, %f4, %f6
719 camellia_f %f22, %f0, %f2, %f0
720 camellia_f %f22, %f4, %f6, %f4
721 ldd [$key + 32], %f20
722 ldd [$key + 40], %f22
723 camellia_f %f24, %f2, %f0, %f8
724 camellia_f %f24, %f6, %f4, %f10
725 camellia_f %f26, %f0, %f8, %f2
726 camellia_f %f26, %f4, %f10, %f6
727 ldd [$key + 48], %f24
728 ldd [$key + 56], %f26
729 fxor %f28, %f8, %f0
730 fxor %f28, %f10, %f4
731 fxor %f30, %f2, %f2
732 fxor %f30, %f6, %f6
733 ldd [$key + 64], %f28
734 retl
735 ldd [$key + 72], %f30
736.type _cmll256_encrypt_2x,#function
737.size _cmll256_encrypt_2x,.-_cmll256_encrypt_2x
738
739.align 32
740_cmll256_decrypt_1x:
741 camellia_f %f16, %f2, %f0, %f2
742 camellia_f %f18, %f0, %f2, %f0
743 ldd [$key - 8], %f16
744 ldd [$key - 16], %f18
745 camellia_f %f20, %f2, %f0, %f2
746 camellia_f %f22, %f0, %f2, %f0
747 ldd [$key - 24], %f20
748 ldd [$key - 32], %f22
749 camellia_f %f24, %f2, %f0, %f2
750 camellia_f %f26, %f0, %f2, %f0
751 ldd [$key - 40], %f24
752 ldd [$key - 48], %f26
753 camellia_fl %f28, %f0, %f0
754 camellia_fli %f30, %f2, %f2
755 ldd [$key - 56], %f28
756 ldd [$key - 64], %f30
757___
758for ($i=1; $i<3; $i++) {
759 $code.=<<___;
760 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
761 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
762 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
763 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
764 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
765 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
766 camellia_fl %f`16+16*$i+12`, %f0, %f0
767 camellia_fli %f`16+16*$i+14`, %f2, %f2
768___
769}
770$code.=<<___;
771 camellia_f %f16, %f2, %f0, %f2
772 camellia_f %f18, %f0, %f2, %f0
773 ldd [$key + 184], %f16
774 ldd [$key + 176], %f18
775 camellia_f %f20, %f2, %f0, %f2
776 camellia_f %f22, %f0, %f2, %f0
777 ldd [$key + 168], %f20
778 ldd [$key + 160], %f22
779 camellia_f %f24, %f2, %f0, %f4
780 camellia_f %f26, %f0, %f4, %f2
781 ldd [$key + 152], %f24
782 ldd [$key + 144], %f26
783 fxor %f30, %f4, %f0
784 fxor %f28, %f2, %f2
785 ldd [$key + 136], %f28
786 retl
787 ldd [$key + 128], %f30
788.type _cmll256_decrypt_1x,#function
789.size _cmll256_decrypt_1x,.-_cmll256_decrypt_1x
790
791.align 32
792_cmll256_decrypt_2x:
793 camellia_f %f16, %f2, %f0, %f2
794 camellia_f %f16, %f6, %f4, %f6
795 camellia_f %f18, %f0, %f2, %f0
796 camellia_f %f18, %f4, %f6, %f4
797 ldd [$key - 8], %f16
798 ldd [$key - 16], %f18
799 camellia_f %f20, %f2, %f0, %f2
800 camellia_f %f20, %f6, %f4, %f6
801 camellia_f %f22, %f0, %f2, %f0
802 camellia_f %f22, %f4, %f6, %f4
803 ldd [$key - 24], %f20
804 ldd [$key - 32], %f22
805 camellia_f %f24, %f2, %f0, %f2
806 camellia_f %f24, %f6, %f4, %f6
807 camellia_f %f26, %f0, %f2, %f0
808 camellia_f %f26, %f4, %f6, %f4
809 ldd [$key - 40], %f24
810 ldd [$key - 48], %f26
811 camellia_fl %f28, %f0, %f0
812 camellia_fl %f28, %f4, %f4
813 camellia_fli %f30, %f2, %f2
814 camellia_fli %f30, %f6, %f6
815 ldd [$key - 56], %f28
816 ldd [$key - 64], %f30
817___
818for ($i=1; $i<3; $i++) {
819 $code.=<<___;
820 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
821 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
822 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
823 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
824 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
825 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
826 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
827 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
828 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
829 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
830 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
831 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
832 camellia_fl %f`16+16*$i+12`, %f0, %f0
833 camellia_fl %f`16+16*$i+12`, %f4, %f4
834 camellia_fli %f`16+16*$i+14`, %f2, %f2
835 camellia_fli %f`16+16*$i+14`, %f6, %f6
836___
837}
838$code.=<<___;
839 camellia_f %f16, %f2, %f0, %f2
840 camellia_f %f16, %f6, %f4, %f6
841 camellia_f %f18, %f0, %f2, %f0
842 camellia_f %f18, %f4, %f6, %f4
843 ldd [$key + 184], %f16
844 ldd [$key + 176], %f18
845 camellia_f %f20, %f2, %f0, %f2
846 camellia_f %f20, %f6, %f4, %f6
847 camellia_f %f22, %f0, %f2, %f0
848 camellia_f %f22, %f4, %f6, %f4
849 ldd [$key + 168], %f20
850 ldd [$key + 160], %f22
851 camellia_f %f24, %f2, %f0, %f8
852 camellia_f %f24, %f6, %f4, %f10
853 camellia_f %f26, %f0, %f8, %f2
854 camellia_f %f26, %f4, %f10, %f6
855 ldd [$key + 152], %f24
856 ldd [$key + 144], %f26
857 fxor %f30, %f8, %f0
858 fxor %f30, %f10, %f4
859 fxor %f28, %f2, %f2
860 fxor %f28, %f6, %f6
861 ldd [$key + 136], %f28
862 retl
863 ldd [$key + 128], %f30
864.type _cmll256_decrypt_2x,#function
865.size _cmll256_decrypt_2x,.-_cmll256_decrypt_2x
866___
867
868&alg_cbc_encrypt_implement("cmll",128);
869&alg_cbc_encrypt_implement("cmll",256);
870
871&alg_cbc_decrypt_implement("cmll",128);
872&alg_cbc_decrypt_implement("cmll",256);
873
874if ($::evp) {
875 &alg_ctr32_implement("cmll",128);
876 &alg_ctr32_implement("cmll",256);
877}
878}}}
879
880if (!$::evp) {
881$code.=<<___;
882.global Camellia_encrypt
883Camellia_encrypt=cmll_t4_encrypt
884.global Camellia_decrypt
885Camellia_decrypt=cmll_t4_decrypt
886.global Camellia_set_key
887.align 32
888Camellia_set_key:
889 andcc %o2, 7, %g0 ! double-check alignment
890 bnz,a,pn %icc, 1f
891 mov -1, %o0
892 brz,a,pn %o0, 1f
893 mov -1, %o0
894 brz,a,pn %o2, 1f
895 mov -1, %o0
896 andncc %o1, 0x1c0, %g0
897 bnz,a,pn %icc, 1f
898 mov -2, %o0
899 cmp %o1, 128
900 bl,a,pn %icc, 1f
901 mov -2, %o0
902 b cmll_t4_set_key
903 nop
9041: retl
905 nop
906.type Camellia_set_key,#function
907.size Camellia_set_key,.-Camellia_set_key
908___
909
910my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
911
912$code.=<<___;
913.globl Camellia_cbc_encrypt
914.align 32
915Camellia_cbc_encrypt:
916 ld [$key + 272], %g1
917 nop
918 brz $enc, .Lcbc_decrypt
919 cmp %g1, 3
920
921 be,pt %icc, cmll128_t4_cbc_encrypt
922 nop
923 ba cmll256_t4_cbc_encrypt
924 nop
925
926.Lcbc_decrypt:
927 be,pt %icc, cmll128_t4_cbc_decrypt
928 nop
929 ba cmll256_t4_cbc_decrypt
930 nop
931.type Camellia_cbc_encrypt,#function
932.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
933___
934}
935
936&emit_assembler();
937
a21314db 938close STDOUT or die "error closing STDOUT: $!";