]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/camellia/asm/cmllt4-sparcv9.pl
Split bignum code out of the sparcv9cap.c
[thirdparty/openssl.git] / crypto / camellia / asm / cmllt4-sparcv9.pl
CommitLineData
6aa36e8e 1#! /usr/bin/env perl
33388b44 2# Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
6aa36e8e 3#
5593d9c9 4# Licensed under the Apache License 2.0 (the "License"). You may not use
6aa36e8e
RS
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
4739ccdb
AP
9
10# ====================================================================
e3713c36
RS
11# Written by David S. Miller and Andy Polyakov.
12# The module is licensed under 2-clause BSD
4739ccdb
AP
13# license. October 2012. All rights reserved.
14# ====================================================================
15
16######################################################################
17# Camellia for SPARC T4.
18#
19# As with AES below results [for aligned data] are virtually identical
46f4e1be 20# to critical path lengths for 3-cycle instruction latency:
4739ccdb
AP
21#
22# 128-bit key 192/256-
23# CBC encrypt 4.14/4.21(*) 5.46/5.52
24# (*) numbers after slash are for
25# misaligned data.
26#
27# As with Intel AES-NI, question is if it's possible to improve
46f4e1be 28# performance of parallelizable modes by interleaving round
4739ccdb
AP
29# instructions. In Camellia every instruction is dependent on
30# previous, which means that there is place for 2 additional ones
31# in between two dependent. Can we expect 3x performance improvement?
32# At least one can argue that it should be possible to break 2x
33# barrier... For some reason not even 2x appears to be possible:
34#
35# 128-bit key 192/256-
36# CBC decrypt 2.21/2.74 2.99/3.40
37# CTR 2.15/2.68(*) 2.93/3.34
38# (*) numbers after slash are for
39# misaligned data.
40#
41# This is for 2x interleave. But compared to 1x interleave CBC decrypt
42# improved by ... 0% for 128-bit key, and 11% for 192/256-bit one.
43# So that out-of-order execution logic can take non-interleaved code
44# to 1.87x, but can't take 2x interleaved one any further. There
45# surely is some explanation... As result 3x interleave was not even
46# attempted. Instead an effort was made to share specific modes
47# implementations with AES module (therefore sparct4_modes.pl).
48#
49# To anchor to something else, software C implementation processes
50# one byte in 38 cycles with 128-bit key on same processor.
51
52$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
53push(@INC,"${dir}","${dir}../../perlasm");
54require "sparcv9_modes.pl";
55
1aa89a7a 56$output = pop and open STDOUT,">$output";
4739ccdb
AP
57
58$::evp=1; # if $evp is set to 0, script generates module with
59# Camellia_[en|de]crypt, Camellia_set_key and Camellia_cbc_encrypt
60# entry points. These are fully compatible with openssl/camellia.h.
61
62######################################################################
63# single-round subroutines
64#
65{
66my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
67
68$code=<<___;
52f7e44e
TM
69#ifndef __ASSEMBLER__
70# define __ASSEMBLER__ 1
71#endif
72#include "crypto/sparc_arch.h"
eb77e888 73
4739ccdb
AP
74.text
75
76.globl cmll_t4_encrypt
77.align 32
78cmll_t4_encrypt:
79 andcc $inp, 7, %g1 ! is input aligned?
80 andn $inp, 7, $inp
81
82 ldx [$key + 0], %g4
83 ldx [$key + 8], %g5
84
85 ldx [$inp + 0], %o4
86 bz,pt %icc, 1f
87 ldx [$inp + 8], %o5
88 ldx [$inp + 16], $inp
89 sll %g1, 3, %g1
90 sub %g0, %g1, %o3
91 sllx %o4, %g1, %o4
92 sllx %o5, %g1, %g1
93 srlx %o5, %o3, %o5
94 srlx $inp, %o3, %o3
95 or %o5, %o4, %o4
96 or %o3, %g1, %o5
971:
98 ld [$key + 272], $rounds ! grandRounds, 3 or 4
99 ldd [$key + 16], %f12
100 ldd [$key + 24], %f14
101 xor %g4, %o4, %o4
102 xor %g5, %o5, %o5
103 ldd [$key + 32], %f16
104 ldd [$key + 40], %f18
105 movxtod %o4, %f0
106 movxtod %o5, %f2
107 ldd [$key + 48], %f20
108 ldd [$key + 56], %f22
109 sub $rounds, 1, $rounds
110 ldd [$key + 64], %f24
111 ldd [$key + 72], %f26
112 add $key, 80, $key
113
114.Lenc:
115 camellia_f %f12, %f2, %f0, %f2
116 ldd [$key + 0], %f12
117 sub $rounds,1,$rounds
118 camellia_f %f14, %f0, %f2, %f0
119 ldd [$key + 8], %f14
120 camellia_f %f16, %f2, %f0, %f2
121 ldd [$key + 16], %f16
122 camellia_f %f18, %f0, %f2, %f0
123 ldd [$key + 24], %f18
124 camellia_f %f20, %f2, %f0, %f2
125 ldd [$key + 32], %f20
126 camellia_f %f22, %f0, %f2, %f0
127 ldd [$key + 40], %f22
128 camellia_fl %f24, %f0, %f0
129 ldd [$key + 48], %f24
130 camellia_fli %f26, %f2, %f2
131 ldd [$key + 56], %f26
132 brnz,pt $rounds, .Lenc
133 add $key, 64, $key
134
135 andcc $out, 7, $tmp ! is output aligned?
136 camellia_f %f12, %f2, %f0, %f2
137 camellia_f %f14, %f0, %f2, %f0
138 camellia_f %f16, %f2, %f0, %f2
139 camellia_f %f18, %f0, %f2, %f0
140 camellia_f %f20, %f2, %f0, %f4
141 camellia_f %f22, %f0, %f4, %f2
142 fxor %f24, %f4, %f0
143 fxor %f26, %f2, %f2
144
145 bnz,pn %icc, 2f
146 nop
147
148 std %f0, [$out + 0]
149 retl
150 std %f2, [$out + 8]
151
1522: alignaddrl $out, %g0, $out
153 mov 0xff, $mask
154 srl $mask, $tmp, $mask
155
156 faligndata %f0, %f0, %f4
157 faligndata %f0, %f2, %f6
158 faligndata %f2, %f2, %f8
159
160 stda %f4, [$out + $mask]0xc0 ! partial store
161 std %f6, [$out + 8]
162 add $out, 16, $out
163 orn %g0, $mask, $mask
164 retl
165 stda %f8, [$out + $mask]0xc0 ! partial store
166.type cmll_t4_encrypt,#function
167.size cmll_t4_encrypt,.-cmll_t4_encrypt
168
169.globl cmll_t4_decrypt
170.align 32
171cmll_t4_decrypt:
172 ld [$key + 272], $rounds ! grandRounds, 3 or 4
173 andcc $inp, 7, %g1 ! is input aligned?
174 andn $inp, 7, $inp
175
176 sll $rounds, 6, $rounds
177 add $rounds, $key, $key
178
179 ldx [$inp + 0], %o4
180 bz,pt %icc, 1f
181 ldx [$inp + 8], %o5
182 ldx [$inp + 16], $inp
183 sll %g1, 3, %g1
184 sub %g0, %g1, %g4
185 sllx %o4, %g1, %o4
186 sllx %o5, %g1, %g1
187 srlx %o5, %g4, %o5
188 srlx $inp, %g4, %g4
189 or %o5, %o4, %o4
190 or %g4, %g1, %o5
1911:
192 ldx [$key + 0], %g4
193 ldx [$key + 8], %g5
194 ldd [$key - 8], %f12
195 ldd [$key - 16], %f14
196 xor %g4, %o4, %o4
197 xor %g5, %o5, %o5
198 ldd [$key - 24], %f16
199 ldd [$key - 32], %f18
200 movxtod %o4, %f0
201 movxtod %o5, %f2
202 ldd [$key - 40], %f20
203 ldd [$key - 48], %f22
204 sub $rounds, 64, $rounds
205 ldd [$key - 56], %f24
206 ldd [$key - 64], %f26
207 sub $key, 64, $key
208
209.Ldec:
210 camellia_f %f12, %f2, %f0, %f2
211 ldd [$key - 8], %f12
212 sub $rounds, 64, $rounds
213 camellia_f %f14, %f0, %f2, %f0
214 ldd [$key - 16], %f14
215 camellia_f %f16, %f2, %f0, %f2
216 ldd [$key - 24], %f16
217 camellia_f %f18, %f0, %f2, %f0
218 ldd [$key - 32], %f18
219 camellia_f %f20, %f2, %f0, %f2
220 ldd [$key - 40], %f20
221 camellia_f %f22, %f0, %f2, %f0
222 ldd [$key - 48], %f22
223 camellia_fl %f24, %f0, %f0
224 ldd [$key - 56], %f24
225 camellia_fli %f26, %f2, %f2
226 ldd [$key - 64], %f26
227 brnz,pt $rounds, .Ldec
228 sub $key, 64, $key
229
230 andcc $out, 7, $tmp ! is output aligned?
231 camellia_f %f12, %f2, %f0, %f2
232 camellia_f %f14, %f0, %f2, %f0
233 camellia_f %f16, %f2, %f0, %f2
234 camellia_f %f18, %f0, %f2, %f0
235 camellia_f %f20, %f2, %f0, %f4
236 camellia_f %f22, %f0, %f4, %f2
237 fxor %f26, %f4, %f0
238 fxor %f24, %f2, %f2
239
240 bnz,pn %icc, 2f
241 nop
242
243 std %f0, [$out + 0]
244 retl
245 std %f2, [$out + 8]
246
2472: alignaddrl $out, %g0, $out
248 mov 0xff, $mask
249 srl $mask, $tmp, $mask
250
251 faligndata %f0, %f0, %f4
252 faligndata %f0, %f2, %f6
253 faligndata %f2, %f2, %f8
254
255 stda %f4, [$out + $mask]0xc0 ! partial store
256 std %f6, [$out + 8]
257 add $out, 16, $out
258 orn %g0, $mask, $mask
259 retl
260 stda %f8, [$out + $mask]0xc0 ! partial store
261.type cmll_t4_decrypt,#function
262.size cmll_t4_decrypt,.-cmll_t4_decrypt
263___
264}
265
266######################################################################
267# key setup subroutines
268#
269{
270sub ROTL128 {
271 my $rot = shift;
272
273 "srlx %o4, 64-$rot, %g4\n\t".
274 "sllx %o4, $rot, %o4\n\t".
275 "srlx %o5, 64-$rot, %g5\n\t".
276 "sllx %o5, $rot, %o5\n\t".
277 "or %o4, %g5, %o4\n\t".
278 "or %o5, %g4, %o5";
279}
280
281my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
282$code.=<<___;
283.globl cmll_t4_set_key
284.align 32
285cmll_t4_set_key:
286 and $inp, 7, $tmp
287 alignaddr $inp, %g0, $inp
288 cmp $bits, 192
289 ldd [$inp + 0], %f0
290 bl,pt %icc,.L128
291 ldd [$inp + 8], %f2
292
293 be,pt %icc,.L192
294 ldd [$inp + 16], %f4
295
296 brz,pt $tmp, .L256aligned
297 ldd [$inp + 24], %f6
298
299 ldd [$inp + 32], %f8
300 faligndata %f0, %f2, %f0
301 faligndata %f2, %f4, %f2
302 faligndata %f4, %f6, %f4
303 b .L256aligned
304 faligndata %f6, %f8, %f6
305
306.align 16
307.L192:
308 brz,a,pt $tmp, .L256aligned
309 fnot2 %f4, %f6
310
311 ldd [$inp + 24], %f6
312 nop
313 faligndata %f0, %f2, %f0
314 faligndata %f2, %f4, %f2
315 faligndata %f4, %f6, %f4
316 fnot2 %f4, %f6
317
318.L256aligned:
319 std %f0, [$out + 0] ! k[0, 1]
320 fsrc2 %f0, %f28
321 std %f2, [$out + 8] ! k[2, 3]
322 fsrc2 %f2, %f30
323 fxor %f4, %f0, %f0
324 b .L128key
325 fxor %f6, %f2, %f2
326
327.align 16
328.L128:
329 brz,pt $tmp, .L128aligned
330 nop
331
332 ldd [$inp + 16], %f4
333 nop
334 faligndata %f0, %f2, %f0
335 faligndata %f2, %f4, %f2
336
337.L128aligned:
338 std %f0, [$out + 0] ! k[0, 1]
339 fsrc2 %f0, %f28
340 std %f2, [$out + 8] ! k[2, 3]
341 fsrc2 %f2, %f30
342
343.L128key:
344 mov %o7, %o5
3451: call .+8
346 add %o7, SIGMA-1b, %o4
347 mov %o5, %o7
348
349 ldd [%o4 + 0], %f16
350 ldd [%o4 + 8], %f18
351 ldd [%o4 + 16], %f20
352 ldd [%o4 + 24], %f22
353
354 camellia_f %f16, %f2, %f0, %f2
355 camellia_f %f18, %f0, %f2, %f0
356 fxor %f28, %f0, %f0
357 fxor %f30, %f2, %f2
358 camellia_f %f20, %f2, %f0, %f2
359 camellia_f %f22, %f0, %f2, %f0
360
361 bge,pn %icc, .L256key
362 nop
363 std %f0, [$out + 0x10] ! k[ 4, 5]
364 std %f2, [$out + 0x18] ! k[ 6, 7]
365
366 movdtox %f0, %o4
367 movdtox %f2, %o5
368 `&ROTL128(15)`
369 stx %o4, [$out + 0x30] ! k[12, 13]
370 stx %o5, [$out + 0x38] ! k[14, 15]
371 `&ROTL128(15)`
372 stx %o4, [$out + 0x40] ! k[16, 17]
373 stx %o5, [$out + 0x48] ! k[18, 19]
374 `&ROTL128(15)`
375 stx %o4, [$out + 0x60] ! k[24, 25]
376 `&ROTL128(15)`
377 stx %o4, [$out + 0x70] ! k[28, 29]
378 stx %o5, [$out + 0x78] ! k[30, 31]
379 `&ROTL128(34)`
380 stx %o4, [$out + 0xa0] ! k[40, 41]
381 stx %o5, [$out + 0xa8] ! k[42, 43]
382 `&ROTL128(17)`
383 stx %o4, [$out + 0xc0] ! k[48, 49]
384 stx %o5, [$out + 0xc8] ! k[50, 51]
385
386 movdtox %f28, %o4 ! k[ 0, 1]
387 movdtox %f30, %o5 ! k[ 2, 3]
388 `&ROTL128(15)`
389 stx %o4, [$out + 0x20] ! k[ 8, 9]
390 stx %o5, [$out + 0x28] ! k[10, 11]
391 `&ROTL128(30)`
392 stx %o4, [$out + 0x50] ! k[20, 21]
393 stx %o5, [$out + 0x58] ! k[22, 23]
394 `&ROTL128(15)`
395 stx %o5, [$out + 0x68] ! k[26, 27]
396 `&ROTL128(17)`
397 stx %o4, [$out + 0x80] ! k[32, 33]
398 stx %o5, [$out + 0x88] ! k[34, 35]
399 `&ROTL128(17)`
400 stx %o4, [$out + 0x90] ! k[36, 37]
401 stx %o5, [$out + 0x98] ! k[38, 39]
402 `&ROTL128(17)`
403 stx %o4, [$out + 0xb0] ! k[44, 45]
404 stx %o5, [$out + 0xb8] ! k[46, 47]
405
406 mov 3, $tmp
407 st $tmp, [$out + 0x110]
408 retl
409 xor %o0, %o0, %o0
410
411.align 16
412.L256key:
413 ldd [%o4 + 32], %f24
414 ldd [%o4 + 40], %f26
415
416 std %f0, [$out + 0x30] ! k[12, 13]
417 std %f2, [$out + 0x38] ! k[14, 15]
418
419 fxor %f4, %f0, %f0
420 fxor %f6, %f2, %f2
421 camellia_f %f24, %f2, %f0, %f2
422 camellia_f %f26, %f0, %f2, %f0
423
424 std %f0, [$out + 0x10] ! k[ 4, 5]
425 std %f2, [$out + 0x18] ! k[ 6, 7]
426
427 movdtox %f0, %o4
428 movdtox %f2, %o5
429 `&ROTL128(30)`
430 stx %o4, [$out + 0x50] ! k[20, 21]
431 stx %o5, [$out + 0x58] ! k[22, 23]
432 `&ROTL128(30)`
433 stx %o4, [$out + 0xa0] ! k[40, 41]
434 stx %o5, [$out + 0xa8] ! k[42, 43]
435 `&ROTL128(51)`
436 stx %o4, [$out + 0x100] ! k[64, 65]
437 stx %o5, [$out + 0x108] ! k[66, 67]
438
439 movdtox %f4, %o4 ! k[ 8, 9]
440 movdtox %f6, %o5 ! k[10, 11]
441 `&ROTL128(15)`
442 stx %o4, [$out + 0x20] ! k[ 8, 9]
443 stx %o5, [$out + 0x28] ! k[10, 11]
444 `&ROTL128(15)`
445 stx %o4, [$out + 0x40] ! k[16, 17]
446 stx %o5, [$out + 0x48] ! k[18, 19]
447 `&ROTL128(30)`
448 stx %o4, [$out + 0x90] ! k[36, 37]
449 stx %o5, [$out + 0x98] ! k[38, 39]
450 `&ROTL128(34)`
451 stx %o4, [$out + 0xd0] ! k[52, 53]
452 stx %o5, [$out + 0xd8] ! k[54, 55]
453 ldx [$out + 0x30], %o4 ! k[12, 13]
454 ldx [$out + 0x38], %o5 ! k[14, 15]
455 `&ROTL128(15)`
456 stx %o4, [$out + 0x30] ! k[12, 13]
457 stx %o5, [$out + 0x38] ! k[14, 15]
458 `&ROTL128(30)`
459 stx %o4, [$out + 0x70] ! k[28, 29]
460 stx %o5, [$out + 0x78] ! k[30, 31]
461 srlx %o4, 32, %g4
462 srlx %o5, 32, %g5
463 st %o4, [$out + 0xc0] ! k[48]
464 st %g5, [$out + 0xc4] ! k[49]
465 st %o5, [$out + 0xc8] ! k[50]
466 st %g4, [$out + 0xcc] ! k[51]
467 `&ROTL128(49)`
468 stx %o4, [$out + 0xe0] ! k[56, 57]
469 stx %o5, [$out + 0xe8] ! k[58, 59]
470
471 movdtox %f28, %o4 ! k[ 0, 1]
472 movdtox %f30, %o5 ! k[ 2, 3]
473 `&ROTL128(45)`
474 stx %o4, [$out + 0x60] ! k[24, 25]
475 stx %o5, [$out + 0x68] ! k[26, 27]
476 `&ROTL128(15)`
477 stx %o4, [$out + 0x80] ! k[32, 33]
478 stx %o5, [$out + 0x88] ! k[34, 35]
479 `&ROTL128(17)`
480 stx %o4, [$out + 0xb0] ! k[44, 45]
481 stx %o5, [$out + 0xb8] ! k[46, 47]
482 `&ROTL128(34)`
483 stx %o4, [$out + 0xf0] ! k[60, 61]
484 stx %o5, [$out + 0xf8] ! k[62, 63]
485
486 mov 4, $tmp
487 st $tmp, [$out + 0x110]
488 retl
489 xor %o0, %o0, %o0
490.type cmll_t4_set_key,#function
491.size cmll_t4_set_key,.-cmll_t4_set_key
492.align 32
493SIGMA:
494 .long 0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2
495 .long 0xc6ef372f, 0xe94f82be, 0x54ff53a5, 0xf1d36f1c
496 .long 0x10e527fa, 0xde682d1d, 0xb05688c2, 0xb3e6c1fd
497.type SIGMA,#object
498.size SIGMA,.-SIGMA
499.asciz "Camellia for SPARC T4, David S. Miller, Andy Polyakov"
500___
501}
502
503{{{
504my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
505my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
506
507$code.=<<___;
508.align 32
509_cmll128_load_enckey:
510 ldx [$key + 0], %g4
511 ldx [$key + 8], %g5
512___
513for ($i=2; $i<26;$i++) { # load key schedule
514 $code.=<<___;
515 ldd [$key + `8*$i`], %f`12+2*$i`
516___
517}
518$code.=<<___;
519 retl
520 nop
521.type _cmll128_load_enckey,#function
522.size _cmll128_load_enckey,.-_cmll128_load_enckey
523_cmll256_load_enckey=_cmll128_load_enckey
524
525.align 32
526_cmll256_load_deckey:
527 ldd [$key + 64], %f62
528 ldd [$key + 72], %f60
529 b .Load_deckey
530 add $key, 64, $key
531_cmll128_load_deckey:
532 ldd [$key + 0], %f60
533 ldd [$key + 8], %f62
534.Load_deckey:
535___
536for ($i=2; $i<24;$i++) { # load key schedule
537 $code.=<<___;
538 ldd [$key + `8*$i`], %f`62-2*$i`
539___
540}
541$code.=<<___;
542 ldx [$key + 192], %g4
543 retl
544 ldx [$key + 200], %g5
545.type _cmll256_load_deckey,#function
546.size _cmll256_load_deckey,.-_cmll256_load_deckey
547
548.align 32
549_cmll128_encrypt_1x:
550___
551for ($i=0; $i<3; $i++) {
552 $code.=<<___;
553 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
554 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
555 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
556 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
557___
558$code.=<<___ if ($i<2);
559 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
560 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
561 camellia_fl %f`16+16*$i+12`, %f0, %f0
562 camellia_fli %f`16+16*$i+14`, %f2, %f2
563___
564}
565$code.=<<___;
566 camellia_f %f56, %f2, %f0, %f4
567 camellia_f %f58, %f0, %f4, %f2
568 fxor %f60, %f4, %f0
569 retl
570 fxor %f62, %f2, %f2
571.type _cmll128_encrypt_1x,#function
572.size _cmll128_encrypt_1x,.-_cmll128_encrypt_1x
573_cmll128_decrypt_1x=_cmll128_encrypt_1x
574
575.align 32
576_cmll128_encrypt_2x:
577___
578for ($i=0; $i<3; $i++) {
579 $code.=<<___;
580 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
581 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
582 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
583 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
584 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
585 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
586 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
587 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
588___
589$code.=<<___ if ($i<2);
590 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
591 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
592 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
593 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
594 camellia_fl %f`16+16*$i+12`, %f0, %f0
595 camellia_fl %f`16+16*$i+12`, %f4, %f4
596 camellia_fli %f`16+16*$i+14`, %f2, %f2
597 camellia_fli %f`16+16*$i+14`, %f6, %f6
598___
599}
600$code.=<<___;
601 camellia_f %f56, %f2, %f0, %f8
602 camellia_f %f56, %f6, %f4, %f10
603 camellia_f %f58, %f0, %f8, %f2
604 camellia_f %f58, %f4, %f10, %f6
605 fxor %f60, %f8, %f0
606 fxor %f60, %f10, %f4
607 fxor %f62, %f2, %f2
608 retl
609 fxor %f62, %f6, %f6
610.type _cmll128_encrypt_2x,#function
611.size _cmll128_encrypt_2x,.-_cmll128_encrypt_2x
612_cmll128_decrypt_2x=_cmll128_encrypt_2x
613
614.align 32
615_cmll256_encrypt_1x:
616 camellia_f %f16, %f2, %f0, %f2
617 camellia_f %f18, %f0, %f2, %f0
618 ldd [$key + 208], %f16
619 ldd [$key + 216], %f18
620 camellia_f %f20, %f2, %f0, %f2
621 camellia_f %f22, %f0, %f2, %f0
622 ldd [$key + 224], %f20
623 ldd [$key + 232], %f22
624 camellia_f %f24, %f2, %f0, %f2
625 camellia_f %f26, %f0, %f2, %f0
626 ldd [$key + 240], %f24
627 ldd [$key + 248], %f26
628 camellia_fl %f28, %f0, %f0
629 camellia_fli %f30, %f2, %f2
630 ldd [$key + 256], %f28
631 ldd [$key + 264], %f30
632___
633for ($i=1; $i<3; $i++) {
634 $code.=<<___;
635 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
636 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
637 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
638 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
639 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
640 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
641 camellia_fl %f`16+16*$i+12`, %f0, %f0
642 camellia_fli %f`16+16*$i+14`, %f2, %f2
643___
644}
645$code.=<<___;
646 camellia_f %f16, %f2, %f0, %f2
647 camellia_f %f18, %f0, %f2, %f0
648 ldd [$key + 16], %f16
649 ldd [$key + 24], %f18
650 camellia_f %f20, %f2, %f0, %f2
651 camellia_f %f22, %f0, %f2, %f0
652 ldd [$key + 32], %f20
653 ldd [$key + 40], %f22
654 camellia_f %f24, %f2, %f0, %f4
655 camellia_f %f26, %f0, %f4, %f2
656 ldd [$key + 48], %f24
657 ldd [$key + 56], %f26
658 fxor %f28, %f4, %f0
659 fxor %f30, %f2, %f2
660 ldd [$key + 64], %f28
661 retl
662 ldd [$key + 72], %f30
663.type _cmll256_encrypt_1x,#function
664.size _cmll256_encrypt_1x,.-_cmll256_encrypt_1x
665
666.align 32
667_cmll256_encrypt_2x:
668 camellia_f %f16, %f2, %f0, %f2
669 camellia_f %f16, %f6, %f4, %f6
670 camellia_f %f18, %f0, %f2, %f0
671 camellia_f %f18, %f4, %f6, %f4
672 ldd [$key + 208], %f16
673 ldd [$key + 216], %f18
674 camellia_f %f20, %f2, %f0, %f2
675 camellia_f %f20, %f6, %f4, %f6
676 camellia_f %f22, %f0, %f2, %f0
677 camellia_f %f22, %f4, %f6, %f4
678 ldd [$key + 224], %f20
679 ldd [$key + 232], %f22
680 camellia_f %f24, %f2, %f0, %f2
681 camellia_f %f24, %f6, %f4, %f6
682 camellia_f %f26, %f0, %f2, %f0
683 camellia_f %f26, %f4, %f6, %f4
684 ldd [$key + 240], %f24
685 ldd [$key + 248], %f26
686 camellia_fl %f28, %f0, %f0
687 camellia_fl %f28, %f4, %f4
688 camellia_fli %f30, %f2, %f2
689 camellia_fli %f30, %f6, %f6
690 ldd [$key + 256], %f28
691 ldd [$key + 264], %f30
692___
693for ($i=1; $i<3; $i++) {
694 $code.=<<___;
695 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
696 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
697 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
698 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
699 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
700 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
701 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
702 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
703 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
704 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
705 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
706 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
707 camellia_fl %f`16+16*$i+12`, %f0, %f0
708 camellia_fl %f`16+16*$i+12`, %f4, %f4
709 camellia_fli %f`16+16*$i+14`, %f2, %f2
710 camellia_fli %f`16+16*$i+14`, %f6, %f6
711___
712}
713$code.=<<___;
714 camellia_f %f16, %f2, %f0, %f2
715 camellia_f %f16, %f6, %f4, %f6
716 camellia_f %f18, %f0, %f2, %f0
717 camellia_f %f18, %f4, %f6, %f4
718 ldd [$key + 16], %f16
719 ldd [$key + 24], %f18
720 camellia_f %f20, %f2, %f0, %f2
721 camellia_f %f20, %f6, %f4, %f6
722 camellia_f %f22, %f0, %f2, %f0
723 camellia_f %f22, %f4, %f6, %f4
724 ldd [$key + 32], %f20
725 ldd [$key + 40], %f22
726 camellia_f %f24, %f2, %f0, %f8
727 camellia_f %f24, %f6, %f4, %f10
728 camellia_f %f26, %f0, %f8, %f2
729 camellia_f %f26, %f4, %f10, %f6
730 ldd [$key + 48], %f24
731 ldd [$key + 56], %f26
732 fxor %f28, %f8, %f0
733 fxor %f28, %f10, %f4
734 fxor %f30, %f2, %f2
735 fxor %f30, %f6, %f6
736 ldd [$key + 64], %f28
737 retl
738 ldd [$key + 72], %f30
739.type _cmll256_encrypt_2x,#function
740.size _cmll256_encrypt_2x,.-_cmll256_encrypt_2x
741
742.align 32
743_cmll256_decrypt_1x:
744 camellia_f %f16, %f2, %f0, %f2
745 camellia_f %f18, %f0, %f2, %f0
746 ldd [$key - 8], %f16
747 ldd [$key - 16], %f18
748 camellia_f %f20, %f2, %f0, %f2
749 camellia_f %f22, %f0, %f2, %f0
750 ldd [$key - 24], %f20
751 ldd [$key - 32], %f22
752 camellia_f %f24, %f2, %f0, %f2
753 camellia_f %f26, %f0, %f2, %f0
754 ldd [$key - 40], %f24
755 ldd [$key - 48], %f26
756 camellia_fl %f28, %f0, %f0
757 camellia_fli %f30, %f2, %f2
758 ldd [$key - 56], %f28
759 ldd [$key - 64], %f30
760___
761for ($i=1; $i<3; $i++) {
762 $code.=<<___;
763 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
764 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
765 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
766 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
767 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
768 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
769 camellia_fl %f`16+16*$i+12`, %f0, %f0
770 camellia_fli %f`16+16*$i+14`, %f2, %f2
771___
772}
773$code.=<<___;
774 camellia_f %f16, %f2, %f0, %f2
775 camellia_f %f18, %f0, %f2, %f0
776 ldd [$key + 184], %f16
777 ldd [$key + 176], %f18
778 camellia_f %f20, %f2, %f0, %f2
779 camellia_f %f22, %f0, %f2, %f0
780 ldd [$key + 168], %f20
781 ldd [$key + 160], %f22
782 camellia_f %f24, %f2, %f0, %f4
783 camellia_f %f26, %f0, %f4, %f2
784 ldd [$key + 152], %f24
785 ldd [$key + 144], %f26
786 fxor %f30, %f4, %f0
787 fxor %f28, %f2, %f2
788 ldd [$key + 136], %f28
789 retl
790 ldd [$key + 128], %f30
791.type _cmll256_decrypt_1x,#function
792.size _cmll256_decrypt_1x,.-_cmll256_decrypt_1x
793
794.align 32
795_cmll256_decrypt_2x:
796 camellia_f %f16, %f2, %f0, %f2
797 camellia_f %f16, %f6, %f4, %f6
798 camellia_f %f18, %f0, %f2, %f0
799 camellia_f %f18, %f4, %f6, %f4
800 ldd [$key - 8], %f16
801 ldd [$key - 16], %f18
802 camellia_f %f20, %f2, %f0, %f2
803 camellia_f %f20, %f6, %f4, %f6
804 camellia_f %f22, %f0, %f2, %f0
805 camellia_f %f22, %f4, %f6, %f4
806 ldd [$key - 24], %f20
807 ldd [$key - 32], %f22
808 camellia_f %f24, %f2, %f0, %f2
809 camellia_f %f24, %f6, %f4, %f6
810 camellia_f %f26, %f0, %f2, %f0
811 camellia_f %f26, %f4, %f6, %f4
812 ldd [$key - 40], %f24
813 ldd [$key - 48], %f26
814 camellia_fl %f28, %f0, %f0
815 camellia_fl %f28, %f4, %f4
816 camellia_fli %f30, %f2, %f2
817 camellia_fli %f30, %f6, %f6
818 ldd [$key - 56], %f28
819 ldd [$key - 64], %f30
820___
821for ($i=1; $i<3; $i++) {
822 $code.=<<___;
823 camellia_f %f`16+16*$i+0`, %f2, %f0, %f2
824 camellia_f %f`16+16*$i+0`, %f6, %f4, %f6
825 camellia_f %f`16+16*$i+2`, %f0, %f2, %f0
826 camellia_f %f`16+16*$i+2`, %f4, %f6, %f4
827 camellia_f %f`16+16*$i+4`, %f2, %f0, %f2
828 camellia_f %f`16+16*$i+4`, %f6, %f4, %f6
829 camellia_f %f`16+16*$i+6`, %f0, %f2, %f0
830 camellia_f %f`16+16*$i+6`, %f4, %f6, %f4
831 camellia_f %f`16+16*$i+8`, %f2, %f0, %f2
832 camellia_f %f`16+16*$i+8`, %f6, %f4, %f6
833 camellia_f %f`16+16*$i+10`, %f0, %f2, %f0
834 camellia_f %f`16+16*$i+10`, %f4, %f6, %f4
835 camellia_fl %f`16+16*$i+12`, %f0, %f0
836 camellia_fl %f`16+16*$i+12`, %f4, %f4
837 camellia_fli %f`16+16*$i+14`, %f2, %f2
838 camellia_fli %f`16+16*$i+14`, %f6, %f6
839___
840}
841$code.=<<___;
842 camellia_f %f16, %f2, %f0, %f2
843 camellia_f %f16, %f6, %f4, %f6
844 camellia_f %f18, %f0, %f2, %f0
845 camellia_f %f18, %f4, %f6, %f4
846 ldd [$key + 184], %f16
847 ldd [$key + 176], %f18
848 camellia_f %f20, %f2, %f0, %f2
849 camellia_f %f20, %f6, %f4, %f6
850 camellia_f %f22, %f0, %f2, %f0
851 camellia_f %f22, %f4, %f6, %f4
852 ldd [$key + 168], %f20
853 ldd [$key + 160], %f22
854 camellia_f %f24, %f2, %f0, %f8
855 camellia_f %f24, %f6, %f4, %f10
856 camellia_f %f26, %f0, %f8, %f2
857 camellia_f %f26, %f4, %f10, %f6
858 ldd [$key + 152], %f24
859 ldd [$key + 144], %f26
860 fxor %f30, %f8, %f0
861 fxor %f30, %f10, %f4
862 fxor %f28, %f2, %f2
863 fxor %f28, %f6, %f6
864 ldd [$key + 136], %f28
865 retl
866 ldd [$key + 128], %f30
867.type _cmll256_decrypt_2x,#function
868.size _cmll256_decrypt_2x,.-_cmll256_decrypt_2x
869___
870
871&alg_cbc_encrypt_implement("cmll",128);
872&alg_cbc_encrypt_implement("cmll",256);
873
874&alg_cbc_decrypt_implement("cmll",128);
875&alg_cbc_decrypt_implement("cmll",256);
876
877if ($::evp) {
878 &alg_ctr32_implement("cmll",128);
879 &alg_ctr32_implement("cmll",256);
880}
881}}}
882
883if (!$::evp) {
884$code.=<<___;
885.global Camellia_encrypt
886Camellia_encrypt=cmll_t4_encrypt
887.global Camellia_decrypt
888Camellia_decrypt=cmll_t4_decrypt
889.global Camellia_set_key
890.align 32
891Camellia_set_key:
892 andcc %o2, 7, %g0 ! double-check alignment
893 bnz,a,pn %icc, 1f
894 mov -1, %o0
895 brz,a,pn %o0, 1f
896 mov -1, %o0
897 brz,a,pn %o2, 1f
898 mov -1, %o0
899 andncc %o1, 0x1c0, %g0
900 bnz,a,pn %icc, 1f
901 mov -2, %o0
902 cmp %o1, 128
903 bl,a,pn %icc, 1f
904 mov -2, %o0
905 b cmll_t4_set_key
906 nop
9071: retl
908 nop
909.type Camellia_set_key,#function
910.size Camellia_set_key,.-Camellia_set_key
911___
912
913my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
914
915$code.=<<___;
916.globl Camellia_cbc_encrypt
917.align 32
918Camellia_cbc_encrypt:
919 ld [$key + 272], %g1
920 nop
921 brz $enc, .Lcbc_decrypt
922 cmp %g1, 3
923
924 be,pt %icc, cmll128_t4_cbc_encrypt
925 nop
926 ba cmll256_t4_cbc_encrypt
927 nop
928
929.Lcbc_decrypt:
930 be,pt %icc, cmll128_t4_cbc_decrypt
931 nop
932 ba cmll256_t4_cbc_decrypt
933 nop
934.type Camellia_cbc_encrypt,#function
935.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
936___
937}
938
939&emit_assembler();
940
a21314db 941close STDOUT or die "error closing STDOUT: $!";