2 # Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by David S. Miller and Andy Polyakov.
12 # The module is licensed under 2-clause BSD
13 # license. March 2013. All rights reserved.
14 # ====================================================================
16 ######################################################################
19 # As with other hardware-assisted ciphers CBC encrypt results [for
20 # aligned data] are virtually identical to critical path lengths:
23 # CBC encrypt 4.14/4.15(*) 11.7/11.7
24 # CBC decrypt 1.77/4.11(**) 6.42/7.47
26 # (*) numbers after slash are for
28 # (**) this is result for largest
29 # block size, unlike all other
30 # cases smaller blocks results
33 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
34 push(@INC,"${dir}","${dir}../../perlasm");
35 require "sparcv9_modes.pl";
38 open STDOUT
,">$output";
41 #include "sparc_arch.h"
44 .register
%g2,#scratch
45 .register
%g3,#scratch
51 { my ($inp,$out)=("%o0","%o1");
55 .globl des_t4_key_expand
56 .type des_t4_key_expand
,#function
59 alignaddr
$inp, %g0, $inp
61 ldd
[$inp + 0x00], %f0
62 ldd
[$inp + 0x08], %f2
63 faligndata
%f0, %f2, %f0
64 1: des_kexpand
%f0, 0, %f0
65 des_kexpand
%f0, 1, %f2
66 std
%f0, [$out + 0x00]
67 des_kexpand
%f2, 3, %f6
68 std
%f2, [$out + 0x08]
69 des_kexpand
%f2, 2, %f4
70 des_kexpand
%f6, 3, %f10
71 std
%f6, [$out + 0x18]
72 des_kexpand
%f6, 2, %f8
73 std
%f4, [$out + 0x10]
74 des_kexpand
%f10, 3, %f14
75 std
%f10, [$out + 0x28]
76 des_kexpand
%f10, 2, %f12
77 std
%f8, [$out + 0x20]
78 des_kexpand
%f14, 1, %f16
79 std
%f14, [$out + 0x38]
80 des_kexpand
%f16, 3, %f20
81 std
%f12, [$out + 0x30]
82 des_kexpand
%f16, 2, %f18
83 std
%f16, [$out + 0x40]
84 des_kexpand
%f20, 3, %f24
85 std
%f20, [$out + 0x50]
86 des_kexpand
%f20, 2, %f22
87 std
%f18, [$out + 0x48]
88 des_kexpand
%f24, 3, %f28
89 std
%f24, [$out + 0x60]
90 des_kexpand
%f24, 2, %f26
91 std
%f22, [$out + 0x58]
92 des_kexpand
%f28, 1, %f30
93 std
%f28, [$out + 0x70]
94 std
%f26, [$out + 0x68]
96 std
%f30, [$out + 0x78]
97 .size des_t4_key_expand
,.-des_t4_key_expand
100 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
101 my ($ileft,$iright,$omask) = map("%g$_",(1..3));
104 .globl des_t4_cbc_encrypt
108 be
,pn
$::size_t_cc
, .Lcbc_abort
109 srln
$len, 0, $len ! needed on v8
+, "nop" on v9
110 ld
[$ivec + 0], %f0 ! load ivec
115 sll
$ileft, 3, $ileft
118 prefetch
[$inp + 63], 20
119 sub %g0, $ileft, $iright
121 alignaddrl
$out, %g0, $out
122 srl
$omask, %g4, $omask
127 ldd
[$key + 0x00], %f4 ! load key schedule
128 ldd
[$key + 0x08], %f6
129 ldd
[$key + 0x10], %f8
130 ldd
[$key + 0x18], %f10
131 ldd
[$key + 0x20], %f12
132 ldd
[$key + 0x28], %f14
133 ldd
[$key + 0x30], %f16
134 ldd
[$key + 0x38], %f18
135 ldd
[$key + 0x40], %f20
136 ldd
[$key + 0x48], %f22
137 ldd
[$key + 0x50], %f24
138 ldd
[$key + 0x58], %f26
139 ldd
[$key + 0x60], %f28
140 ldd
[$key + 0x68], %f30
141 ldd
[$key + 0x70], %f32
142 ldd
[$key + 0x78], %f34
150 sllx
%g4, $ileft, %g4
151 srlx
%g5, $iright, %g5
155 prefetch
[$inp + 8+63], 20
157 fxor
%f2, %f0, %f0 ! ^= ivec
158 prefetch
[$out + 63], 22
161 des_round
%f4, %f6, %f0, %f0
162 des_round
%f8, %f10, %f0, %f0
163 des_round
%f12, %f14, %f0, %f0
164 des_round
%f16, %f18, %f0, %f0
165 des_round
%f20, %f22, %f0, %f0
166 des_round
%f24, %f26, %f0, %f0
167 des_round
%f28, %f30, %f0, %f0
168 des_round
%f32, %f34, %f0, %f0
175 brnz
,pt
$len, .Ldes_cbc_enc_loop
178 st
%f0, [$ivec + 0] ! write out ivec
186 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
187 ! and ~4x deterioration
189 faligndata
%f0, %f0, %f2 ! handle unaligned output
191 stda
%f2, [$out + $omask]0xc0 ! partial store
193 orn
%g0, $omask, $omask
194 stda
%f2, [$out + $omask]0xc0 ! partial store
196 brnz
,pt
$len, .Ldes_cbc_enc_loop
+4
197 orn
%g0, $omask, $omask
199 st
%f0, [$ivec + 0] ! write out ivec
202 .type des_t4_cbc_encrypt
,#function
203 .size des_t4_cbc_encrypt
,.-des_t4_cbc_encrypt
205 .globl des_t4_cbc_decrypt
209 be
,pn
$::size_t_cc
, .Lcbc_abort
210 srln
$len, 0, $len ! needed on v8
+, "nop" on v9
211 ld
[$ivec + 0], %f2 ! load ivec
216 sll
$ileft, 3, $ileft
219 prefetch
[$inp + 63], 20
220 sub %g0, $ileft, $iright
222 alignaddrl
$out, %g0, $out
223 srl
$omask, %g4, $omask
228 ldd
[$key + 0x78], %f4 ! load key schedule
229 ldd
[$key + 0x70], %f6
230 ldd
[$key + 0x68], %f8
231 ldd
[$key + 0x60], %f10
232 ldd
[$key + 0x58], %f12
233 ldd
[$key + 0x50], %f14
234 ldd
[$key + 0x48], %f16
235 ldd
[$key + 0x40], %f18
236 ldd
[$key + 0x38], %f20
237 ldd
[$key + 0x30], %f22
238 ldd
[$key + 0x28], %f24
239 ldd
[$key + 0x20], %f26
240 ldd
[$key + 0x18], %f28
241 ldd
[$key + 0x10], %f30
242 ldd
[$key + 0x08], %f32
243 ldd
[$key + 0x00], %f34
251 sllx
%g4, $ileft, %g4
252 srlx
%g5, $iright, %g5
256 prefetch
[$inp + 8+63], 20
258 prefetch
[$out + 63], 22
261 des_round
%f4, %f6, %f0, %f0
262 des_round
%f8, %f10, %f0, %f0
263 des_round
%f12, %f14, %f0, %f0
264 des_round
%f16, %f18, %f0, %f0
265 des_round
%f20, %f22, %f0, %f0
266 des_round
%f24, %f26, %f0, %f0
267 des_round
%f28, %f30, %f0, %f0
268 des_round
%f32, %f34, %f0, %f0
271 fxor
%f2, %f0, %f0 ! ^= ivec
278 brnz
,pt
$len, .Ldes_cbc_dec_loop
281 st
%f2, [$ivec + 0] ! write out ivec
286 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
287 ! and ~4x deterioration
289 faligndata
%f0, %f0, %f0 ! handle unaligned output
291 stda
%f0, [$out + $omask]0xc0 ! partial store
293 orn
%g0, $omask, $omask
294 stda
%f0, [$out + $omask]0xc0 ! partial store
296 brnz
,pt
$len, .Ldes_cbc_dec_loop
+4
297 orn
%g0, $omask, $omask
299 st
%f2, [$ivec + 0] ! write out ivec
302 .type des_t4_cbc_decrypt
,#function
303 .size des_t4_cbc_decrypt
,.-des_t4_cbc_decrypt
306 # One might wonder why does one have back-to-back des_iip/des_ip
307 # pairs between EDE passes. Indeed, aren't they inverse of each other?
308 # They almost are. Outcome of the pair is 32-bit words being swapped
309 # in target register. Consider pair of des_iip/des_ip as a way to
310 # perform the due swap, it's actually fastest way in this case.
313 .globl des_t4_ede3_cbc_encrypt
315 des_t4_ede3_cbc_encrypt
:
317 be
,pn
$::size_t_cc
, .Lcbc_abort
318 srln
$len, 0, $len ! needed on v8
+, "nop" on v9
319 ld
[$ivec + 0], %f0 ! load ivec
324 sll
$ileft, 3, $ileft
327 prefetch
[$inp + 63], 20
328 sub %g0, $ileft, $iright
330 alignaddrl
$out, %g0, $out
331 srl
$omask, %g4, $omask
336 ldd
[$key + 0x00], %f4 ! load key schedule
337 ldd
[$key + 0x08], %f6
338 ldd
[$key + 0x10], %f8
339 ldd
[$key + 0x18], %f10
340 ldd
[$key + 0x20], %f12
341 ldd
[$key + 0x28], %f14
342 ldd
[$key + 0x30], %f16
343 ldd
[$key + 0x38], %f18
344 ldd
[$key + 0x40], %f20
345 ldd
[$key + 0x48], %f22
346 ldd
[$key + 0x50], %f24
347 ldd
[$key + 0x58], %f26
348 ldd
[$key + 0x60], %f28
349 ldd
[$key + 0x68], %f30
350 ldd
[$key + 0x70], %f32
351 ldd
[$key + 0x78], %f34
353 .Ldes_ede3_cbc_enc_loop
:
359 sllx
%g4, $ileft, %g4
360 srlx
%g5, $iright, %g5
364 prefetch
[$inp + 8+63], 20
366 fxor
%f2, %f0, %f0 ! ^= ivec
367 prefetch
[$out + 63], 22
370 des_round
%f4, %f6, %f0, %f0
371 des_round
%f8, %f10, %f0, %f0
372 des_round
%f12, %f14, %f0, %f0
373 des_round
%f16, %f18, %f0, %f0
374 ldd
[$key + 0x100-0x08], %f36
375 ldd
[$key + 0x100-0x10], %f38
376 des_round
%f20, %f22, %f0, %f0
377 ldd
[$key + 0x100-0x18], %f40
378 ldd
[$key + 0x100-0x20], %f42
379 des_round
%f24, %f26, %f0, %f0
380 ldd
[$key + 0x100-0x28], %f44
381 ldd
[$key + 0x100-0x30], %f46
382 des_round
%f28, %f30, %f0, %f0
383 ldd
[$key + 0x100-0x38], %f48
384 ldd
[$key + 0x100-0x40], %f50
385 des_round
%f32, %f34, %f0, %f0
386 ldd
[$key + 0x100-0x48], %f52
387 ldd
[$key + 0x100-0x50], %f54
390 ldd
[$key + 0x100-0x58], %f56
391 ldd
[$key + 0x100-0x60], %f58
393 ldd
[$key + 0x100-0x68], %f60
394 ldd
[$key + 0x100-0x70], %f62
395 des_round
%f36, %f38, %f0, %f0
396 ldd
[$key + 0x100-0x78], %f36
397 ldd
[$key + 0x100-0x80], %f38
398 des_round
%f40, %f42, %f0, %f0
399 des_round
%f44, %f46, %f0, %f0
400 des_round
%f48, %f50, %f0, %f0
401 ldd
[$key + 0x100+0x00], %f40
402 ldd
[$key + 0x100+0x08], %f42
403 des_round
%f52, %f54, %f0, %f0
404 ldd
[$key + 0x100+0x10], %f44
405 ldd
[$key + 0x100+0x18], %f46
406 des_round
%f56, %f58, %f0, %f0
407 ldd
[$key + 0x100+0x20], %f48
408 ldd
[$key + 0x100+0x28], %f50
409 des_round
%f60, %f62, %f0, %f0
410 ldd
[$key + 0x100+0x30], %f52
411 ldd
[$key + 0x100+0x38], %f54
412 des_round
%f36, %f38, %f0, %f0
413 ldd
[$key + 0x100+0x40], %f56
414 ldd
[$key + 0x100+0x48], %f58
417 ldd
[$key + 0x100+0x50], %f60
418 ldd
[$key + 0x100+0x58], %f62
420 ldd
[$key + 0x100+0x60], %f36
421 ldd
[$key + 0x100+0x68], %f38
422 des_round
%f40, %f42, %f0, %f0
423 ldd
[$key + 0x100+0x70], %f40
424 ldd
[$key + 0x100+0x78], %f42
425 des_round
%f44, %f46, %f0, %f0
426 des_round
%f48, %f50, %f0, %f0
427 des_round
%f52, %f54, %f0, %f0
428 des_round
%f56, %f58, %f0, %f0
429 des_round
%f60, %f62, %f0, %f0
430 des_round
%f36, %f38, %f0, %f0
431 des_round
%f40, %f42, %f0, %f0
438 brnz
,pt
$len, .Ldes_ede3_cbc_enc_loop
441 st
%f0, [$ivec + 0] ! write out ivec
446 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
447 ! and ~2x deterioration
449 faligndata
%f0, %f0, %f2 ! handle unaligned output
451 stda
%f2, [$out + $omask]0xc0 ! partial store
453 orn
%g0, $omask, $omask
454 stda
%f2, [$out + $omask]0xc0 ! partial store
456 brnz
,pt
$len, .Ldes_ede3_cbc_enc_loop
+4
457 orn
%g0, $omask, $omask
459 st
%f0, [$ivec + 0] ! write out ivec
462 .type des_t4_ede3_cbc_encrypt
,#function
463 .size des_t4_ede3_cbc_encrypt
,.-des_t4_ede3_cbc_encrypt
465 .globl des_t4_ede3_cbc_decrypt
467 des_t4_ede3_cbc_decrypt
:
469 be
,pn
$::size_t_cc
, .Lcbc_abort
470 srln
$len, 0, $len ! needed on v8
+, "nop" on v9
471 ld
[$ivec + 0], %f2 ! load ivec
476 sll
$ileft, 3, $ileft
479 prefetch
[$inp + 63], 20
480 sub %g0, $ileft, $iright
482 alignaddrl
$out, %g0, $out
483 srl
$omask, %g4, $omask
488 ldd
[$key + 0x100+0x78], %f4 ! load key schedule
489 ldd
[$key + 0x100+0x70], %f6
490 ldd
[$key + 0x100+0x68], %f8
491 ldd
[$key + 0x100+0x60], %f10
492 ldd
[$key + 0x100+0x58], %f12
493 ldd
[$key + 0x100+0x50], %f14
494 ldd
[$key + 0x100+0x48], %f16
495 ldd
[$key + 0x100+0x40], %f18
496 ldd
[$key + 0x100+0x38], %f20
497 ldd
[$key + 0x100+0x30], %f22
498 ldd
[$key + 0x100+0x28], %f24
499 ldd
[$key + 0x100+0x20], %f26
500 ldd
[$key + 0x100+0x18], %f28
501 ldd
[$key + 0x100+0x10], %f30
502 ldd
[$key + 0x100+0x08], %f32
503 ldd
[$key + 0x100+0x00], %f34
505 .Ldes_ede3_cbc_dec_loop
:
511 sllx
%g4, $ileft, %g4
512 srlx
%g5, $iright, %g5
516 prefetch
[$inp + 8+63], 20
518 prefetch
[$out + 63], 22
521 des_round
%f4, %f6, %f0, %f0
522 des_round
%f8, %f10, %f0, %f0
523 des_round
%f12, %f14, %f0, %f0
524 des_round
%f16, %f18, %f0, %f0
525 ldd
[$key + 0x80+0x00], %f36
526 ldd
[$key + 0x80+0x08], %f38
527 des_round
%f20, %f22, %f0, %f0
528 ldd
[$key + 0x80+0x10], %f40
529 ldd
[$key + 0x80+0x18], %f42
530 des_round
%f24, %f26, %f0, %f0
531 ldd
[$key + 0x80+0x20], %f44
532 ldd
[$key + 0x80+0x28], %f46
533 des_round
%f28, %f30, %f0, %f0
534 ldd
[$key + 0x80+0x30], %f48
535 ldd
[$key + 0x80+0x38], %f50
536 des_round
%f32, %f34, %f0, %f0
537 ldd
[$key + 0x80+0x40], %f52
538 ldd
[$key + 0x80+0x48], %f54
541 ldd
[$key + 0x80+0x50], %f56
542 ldd
[$key + 0x80+0x58], %f58
544 ldd
[$key + 0x80+0x60], %f60
545 ldd
[$key + 0x80+0x68], %f62
546 des_round
%f36, %f38, %f0, %f0
547 ldd
[$key + 0x80+0x70], %f36
548 ldd
[$key + 0x80+0x78], %f38
549 des_round
%f40, %f42, %f0, %f0
550 des_round
%f44, %f46, %f0, %f0
551 des_round
%f48, %f50, %f0, %f0
552 ldd
[$key + 0x80-0x08], %f40
553 ldd
[$key + 0x80-0x10], %f42
554 des_round
%f52, %f54, %f0, %f0
555 ldd
[$key + 0x80-0x18], %f44
556 ldd
[$key + 0x80-0x20], %f46
557 des_round
%f56, %f58, %f0, %f0
558 ldd
[$key + 0x80-0x28], %f48
559 ldd
[$key + 0x80-0x30], %f50
560 des_round
%f60, %f62, %f0, %f0
561 ldd
[$key + 0x80-0x38], %f52
562 ldd
[$key + 0x80-0x40], %f54
563 des_round
%f36, %f38, %f0, %f0
564 ldd
[$key + 0x80-0x48], %f56
565 ldd
[$key + 0x80-0x50], %f58
568 ldd
[$key + 0x80-0x58], %f60
569 ldd
[$key + 0x80-0x60], %f62
571 ldd
[$key + 0x80-0x68], %f36
572 ldd
[$key + 0x80-0x70], %f38
573 des_round
%f40, %f42, %f0, %f0
574 ldd
[$key + 0x80-0x78], %f40
575 ldd
[$key + 0x80-0x80], %f42
576 des_round
%f44, %f46, %f0, %f0
577 des_round
%f48, %f50, %f0, %f0
578 des_round
%f52, %f54, %f0, %f0
579 des_round
%f56, %f58, %f0, %f0
580 des_round
%f60, %f62, %f0, %f0
581 des_round
%f36, %f38, %f0, %f0
582 des_round
%f40, %f42, %f0, %f0
585 fxor
%f2, %f0, %f0 ! ^= ivec
592 brnz
,pt
$len, .Ldes_ede3_cbc_dec_loop
595 st
%f2, [$ivec + 0] ! write out ivec
600 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
601 ! and ~3x deterioration
603 faligndata
%f0, %f0, %f0 ! handle unaligned output
605 stda
%f0, [$out + $omask]0xc0 ! partial store
607 orn
%g0, $omask, $omask
608 stda
%f0, [$out + $omask]0xc0 ! partial store
610 brnz
,pt
$len, .Ldes_ede3_cbc_dec_loop
+4
611 orn
%g0, $omask, $omask
613 st
%f2, [$ivec + 0] ! write out ivec
616 .type des_t4_ede3_cbc_decrypt
,#function
617 .size des_t4_ede3_cbc_decrypt
,.-des_t4_ede3_cbc_decrypt
621 .asciz
"DES for SPARC T4, David S. Miller, Andy Polyakov"