2 # Copyright 2013-2021 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the Apache License 2.0 (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by David S. Miller and Andy Polyakov.
12 # The module is licensed under 2-clause BSD
13 # license. March 2013. All rights reserved.
14 # ====================================================================
16 ######################################################################
19 # As with other hardware-assisted ciphers CBC encrypt results [for
20 # aligned data] are virtually identical to critical path lengths:
23 # CBC encrypt 4.14/4.15(*) 11.7/11.7
24 # CBC decrypt 1.77/4.11(**) 6.42/7.47
26 # (*) numbers after slash are for
28 # (**) this is result for largest
29 # block size, unlike all other
30 # cases smaller blocks results
33 $0 =~ m/(.*[\/\\])[^\
/\\]+$/; $dir=$1;
34 push(@INC,"${dir}","${dir}../../perlasm");
35 require "sparcv9_modes.pl";
37 $output=pop and open STDOUT
,">$output";
41 # define __ASSEMBLER__ 1
43 #include "crypto/sparc_arch.h"
46 .register
%g2,#scratch
47 .register
%g3,#scratch
53 { my ($inp,$out)=("%o0","%o1");
57 .globl des_t4_key_expand
58 .type des_t4_key_expand
,#function
61 alignaddr
$inp, %g0, $inp
63 ldd
[$inp + 0x00], %f0
64 ldd
[$inp + 0x08], %f2
65 faligndata
%f0, %f2, %f0
66 1: des_kexpand
%f0, 0, %f0
67 des_kexpand
%f0, 1, %f2
68 std
%f0, [$out + 0x00]
69 des_kexpand
%f2, 3, %f6
70 std
%f2, [$out + 0x08]
71 des_kexpand
%f2, 2, %f4
72 des_kexpand
%f6, 3, %f10
73 std
%f6, [$out + 0x18]
74 des_kexpand
%f6, 2, %f8
75 std
%f4, [$out + 0x10]
76 des_kexpand
%f10, 3, %f14
77 std
%f10, [$out + 0x28]
78 des_kexpand
%f10, 2, %f12
79 std
%f8, [$out + 0x20]
80 des_kexpand
%f14, 1, %f16
81 std
%f14, [$out + 0x38]
82 des_kexpand
%f16, 3, %f20
83 std
%f12, [$out + 0x30]
84 des_kexpand
%f16, 2, %f18
85 std
%f16, [$out + 0x40]
86 des_kexpand
%f20, 3, %f24
87 std
%f20, [$out + 0x50]
88 des_kexpand
%f20, 2, %f22
89 std
%f18, [$out + 0x48]
90 des_kexpand
%f24, 3, %f28
91 std
%f24, [$out + 0x60]
92 des_kexpand
%f24, 2, %f26
93 std
%f22, [$out + 0x58]
94 des_kexpand
%f28, 1, %f30
95 std
%f28, [$out + 0x70]
96 std
%f26, [$out + 0x68]
98 std
%f30, [$out + 0x78]
99 .size des_t4_key_expand
,.-des_t4_key_expand
102 { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
103 my ($ileft,$iright,$omask) = map("%g$_",(1..3));
106 .globl des_t4_cbc_encrypt
110 be
,pn
$::size_t_cc
, .Lcbc_abort
111 srln
$len, 0, $len ! needed on v8
+, "nop" on v9
112 ld
[$ivec + 0], %f0 ! load ivec
117 sll
$ileft, 3, $ileft
120 prefetch
[$inp + 63], 20
121 sub %g0, $ileft, $iright
123 alignaddrl
$out, %g0, $out
124 srl
$omask, %g4, $omask
129 ldd
[$key + 0x00], %f4 ! load key schedule
130 ldd
[$key + 0x08], %f6
131 ldd
[$key + 0x10], %f8
132 ldd
[$key + 0x18], %f10
133 ldd
[$key + 0x20], %f12
134 ldd
[$key + 0x28], %f14
135 ldd
[$key + 0x30], %f16
136 ldd
[$key + 0x38], %f18
137 ldd
[$key + 0x40], %f20
138 ldd
[$key + 0x48], %f22
139 ldd
[$key + 0x50], %f24
140 ldd
[$key + 0x58], %f26
141 ldd
[$key + 0x60], %f28
142 ldd
[$key + 0x68], %f30
143 ldd
[$key + 0x70], %f32
144 ldd
[$key + 0x78], %f34
152 sllx
%g4, $ileft, %g4
153 srlx
%g5, $iright, %g5
157 prefetch
[$inp + 8+63], 20
159 fxor
%f2, %f0, %f0 ! ^= ivec
160 prefetch
[$out + 63], 22
163 des_round
%f4, %f6, %f0, %f0
164 des_round
%f8, %f10, %f0, %f0
165 des_round
%f12, %f14, %f0, %f0
166 des_round
%f16, %f18, %f0, %f0
167 des_round
%f20, %f22, %f0, %f0
168 des_round
%f24, %f26, %f0, %f0
169 des_round
%f28, %f30, %f0, %f0
170 des_round
%f32, %f34, %f0, %f0
177 brnz
,pt
$len, .Ldes_cbc_enc_loop
180 st
%f0, [$ivec + 0] ! write out ivec
188 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
189 ! and ~4x deterioration
191 faligndata
%f0, %f0, %f2 ! handle unaligned output
193 stda
%f2, [$out + $omask]0xc0 ! partial store
195 orn
%g0, $omask, $omask
196 stda
%f2, [$out + $omask]0xc0 ! partial store
198 brnz
,pt
$len, .Ldes_cbc_enc_loop
+4
199 orn
%g0, $omask, $omask
201 st
%f0, [$ivec + 0] ! write out ivec
204 .type des_t4_cbc_encrypt
,#function
205 .size des_t4_cbc_encrypt
,.-des_t4_cbc_encrypt
207 .globl des_t4_cbc_decrypt
211 be
,pn
$::size_t_cc
, .Lcbc_abort
212 srln
$len, 0, $len ! needed on v8
+, "nop" on v9
213 ld
[$ivec + 0], %f2 ! load ivec
218 sll
$ileft, 3, $ileft
221 prefetch
[$inp + 63], 20
222 sub %g0, $ileft, $iright
224 alignaddrl
$out, %g0, $out
225 srl
$omask, %g4, $omask
230 ldd
[$key + 0x78], %f4 ! load key schedule
231 ldd
[$key + 0x70], %f6
232 ldd
[$key + 0x68], %f8
233 ldd
[$key + 0x60], %f10
234 ldd
[$key + 0x58], %f12
235 ldd
[$key + 0x50], %f14
236 ldd
[$key + 0x48], %f16
237 ldd
[$key + 0x40], %f18
238 ldd
[$key + 0x38], %f20
239 ldd
[$key + 0x30], %f22
240 ldd
[$key + 0x28], %f24
241 ldd
[$key + 0x20], %f26
242 ldd
[$key + 0x18], %f28
243 ldd
[$key + 0x10], %f30
244 ldd
[$key + 0x08], %f32
245 ldd
[$key + 0x00], %f34
253 sllx
%g4, $ileft, %g4
254 srlx
%g5, $iright, %g5
258 prefetch
[$inp + 8+63], 20
260 prefetch
[$out + 63], 22
263 des_round
%f4, %f6, %f0, %f0
264 des_round
%f8, %f10, %f0, %f0
265 des_round
%f12, %f14, %f0, %f0
266 des_round
%f16, %f18, %f0, %f0
267 des_round
%f20, %f22, %f0, %f0
268 des_round
%f24, %f26, %f0, %f0
269 des_round
%f28, %f30, %f0, %f0
270 des_round
%f32, %f34, %f0, %f0
273 fxor
%f2, %f0, %f0 ! ^= ivec
280 brnz
,pt
$len, .Ldes_cbc_dec_loop
283 st
%f2, [$ivec + 0] ! write out ivec
288 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
289 ! and ~4x deterioration
291 faligndata
%f0, %f0, %f0 ! handle unaligned output
293 stda
%f0, [$out + $omask]0xc0 ! partial store
295 orn
%g0, $omask, $omask
296 stda
%f0, [$out + $omask]0xc0 ! partial store
298 brnz
,pt
$len, .Ldes_cbc_dec_loop
+4
299 orn
%g0, $omask, $omask
301 st
%f2, [$ivec + 0] ! write out ivec
304 .type des_t4_cbc_decrypt
,#function
305 .size des_t4_cbc_decrypt
,.-des_t4_cbc_decrypt
308 # One might wonder why does one have back-to-back des_iip/des_ip
309 # pairs between EDE passes. Indeed, aren't they inverse of each other?
310 # They almost are. Outcome of the pair is 32-bit words being swapped
311 # in target register. Consider pair of des_iip/des_ip as a way to
312 # perform the due swap, it's actually fastest way in this case.
315 .globl des_t4_ede3_cbc_encrypt
317 des_t4_ede3_cbc_encrypt
:
319 be
,pn
$::size_t_cc
, .Lcbc_abort
320 srln
$len, 0, $len ! needed on v8
+, "nop" on v9
321 ld
[$ivec + 0], %f0 ! load ivec
326 sll
$ileft, 3, $ileft
329 prefetch
[$inp + 63], 20
330 sub %g0, $ileft, $iright
332 alignaddrl
$out, %g0, $out
333 srl
$omask, %g4, $omask
338 ldd
[$key + 0x00], %f4 ! load key schedule
339 ldd
[$key + 0x08], %f6
340 ldd
[$key + 0x10], %f8
341 ldd
[$key + 0x18], %f10
342 ldd
[$key + 0x20], %f12
343 ldd
[$key + 0x28], %f14
344 ldd
[$key + 0x30], %f16
345 ldd
[$key + 0x38], %f18
346 ldd
[$key + 0x40], %f20
347 ldd
[$key + 0x48], %f22
348 ldd
[$key + 0x50], %f24
349 ldd
[$key + 0x58], %f26
350 ldd
[$key + 0x60], %f28
351 ldd
[$key + 0x68], %f30
352 ldd
[$key + 0x70], %f32
353 ldd
[$key + 0x78], %f34
355 .Ldes_ede3_cbc_enc_loop
:
361 sllx
%g4, $ileft, %g4
362 srlx
%g5, $iright, %g5
366 prefetch
[$inp + 8+63], 20
368 fxor
%f2, %f0, %f0 ! ^= ivec
369 prefetch
[$out + 63], 22
372 des_round
%f4, %f6, %f0, %f0
373 des_round
%f8, %f10, %f0, %f0
374 des_round
%f12, %f14, %f0, %f0
375 des_round
%f16, %f18, %f0, %f0
376 ldd
[$key + 0x100-0x08], %f36
377 ldd
[$key + 0x100-0x10], %f38
378 des_round
%f20, %f22, %f0, %f0
379 ldd
[$key + 0x100-0x18], %f40
380 ldd
[$key + 0x100-0x20], %f42
381 des_round
%f24, %f26, %f0, %f0
382 ldd
[$key + 0x100-0x28], %f44
383 ldd
[$key + 0x100-0x30], %f46
384 des_round
%f28, %f30, %f0, %f0
385 ldd
[$key + 0x100-0x38], %f48
386 ldd
[$key + 0x100-0x40], %f50
387 des_round
%f32, %f34, %f0, %f0
388 ldd
[$key + 0x100-0x48], %f52
389 ldd
[$key + 0x100-0x50], %f54
392 ldd
[$key + 0x100-0x58], %f56
393 ldd
[$key + 0x100-0x60], %f58
395 ldd
[$key + 0x100-0x68], %f60
396 ldd
[$key + 0x100-0x70], %f62
397 des_round
%f36, %f38, %f0, %f0
398 ldd
[$key + 0x100-0x78], %f36
399 ldd
[$key + 0x100-0x80], %f38
400 des_round
%f40, %f42, %f0, %f0
401 des_round
%f44, %f46, %f0, %f0
402 des_round
%f48, %f50, %f0, %f0
403 ldd
[$key + 0x100+0x00], %f40
404 ldd
[$key + 0x100+0x08], %f42
405 des_round
%f52, %f54, %f0, %f0
406 ldd
[$key + 0x100+0x10], %f44
407 ldd
[$key + 0x100+0x18], %f46
408 des_round
%f56, %f58, %f0, %f0
409 ldd
[$key + 0x100+0x20], %f48
410 ldd
[$key + 0x100+0x28], %f50
411 des_round
%f60, %f62, %f0, %f0
412 ldd
[$key + 0x100+0x30], %f52
413 ldd
[$key + 0x100+0x38], %f54
414 des_round
%f36, %f38, %f0, %f0
415 ldd
[$key + 0x100+0x40], %f56
416 ldd
[$key + 0x100+0x48], %f58
419 ldd
[$key + 0x100+0x50], %f60
420 ldd
[$key + 0x100+0x58], %f62
422 ldd
[$key + 0x100+0x60], %f36
423 ldd
[$key + 0x100+0x68], %f38
424 des_round
%f40, %f42, %f0, %f0
425 ldd
[$key + 0x100+0x70], %f40
426 ldd
[$key + 0x100+0x78], %f42
427 des_round
%f44, %f46, %f0, %f0
428 des_round
%f48, %f50, %f0, %f0
429 des_round
%f52, %f54, %f0, %f0
430 des_round
%f56, %f58, %f0, %f0
431 des_round
%f60, %f62, %f0, %f0
432 des_round
%f36, %f38, %f0, %f0
433 des_round
%f40, %f42, %f0, %f0
440 brnz
,pt
$len, .Ldes_ede3_cbc_enc_loop
443 st
%f0, [$ivec + 0] ! write out ivec
448 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
449 ! and ~2x deterioration
451 faligndata
%f0, %f0, %f2 ! handle unaligned output
453 stda
%f2, [$out + $omask]0xc0 ! partial store
455 orn
%g0, $omask, $omask
456 stda
%f2, [$out + $omask]0xc0 ! partial store
458 brnz
,pt
$len, .Ldes_ede3_cbc_enc_loop
+4
459 orn
%g0, $omask, $omask
461 st
%f0, [$ivec + 0] ! write out ivec
464 .type des_t4_ede3_cbc_encrypt
,#function
465 .size des_t4_ede3_cbc_encrypt
,.-des_t4_ede3_cbc_encrypt
467 .globl des_t4_ede3_cbc_decrypt
469 des_t4_ede3_cbc_decrypt
:
471 be
,pn
$::size_t_cc
, .Lcbc_abort
472 srln
$len, 0, $len ! needed on v8
+, "nop" on v9
473 ld
[$ivec + 0], %f2 ! load ivec
478 sll
$ileft, 3, $ileft
481 prefetch
[$inp + 63], 20
482 sub %g0, $ileft, $iright
484 alignaddrl
$out, %g0, $out
485 srl
$omask, %g4, $omask
490 ldd
[$key + 0x100+0x78], %f4 ! load key schedule
491 ldd
[$key + 0x100+0x70], %f6
492 ldd
[$key + 0x100+0x68], %f8
493 ldd
[$key + 0x100+0x60], %f10
494 ldd
[$key + 0x100+0x58], %f12
495 ldd
[$key + 0x100+0x50], %f14
496 ldd
[$key + 0x100+0x48], %f16
497 ldd
[$key + 0x100+0x40], %f18
498 ldd
[$key + 0x100+0x38], %f20
499 ldd
[$key + 0x100+0x30], %f22
500 ldd
[$key + 0x100+0x28], %f24
501 ldd
[$key + 0x100+0x20], %f26
502 ldd
[$key + 0x100+0x18], %f28
503 ldd
[$key + 0x100+0x10], %f30
504 ldd
[$key + 0x100+0x08], %f32
505 ldd
[$key + 0x100+0x00], %f34
507 .Ldes_ede3_cbc_dec_loop
:
513 sllx
%g4, $ileft, %g4
514 srlx
%g5, $iright, %g5
518 prefetch
[$inp + 8+63], 20
520 prefetch
[$out + 63], 22
523 des_round
%f4, %f6, %f0, %f0
524 des_round
%f8, %f10, %f0, %f0
525 des_round
%f12, %f14, %f0, %f0
526 des_round
%f16, %f18, %f0, %f0
527 ldd
[$key + 0x80+0x00], %f36
528 ldd
[$key + 0x80+0x08], %f38
529 des_round
%f20, %f22, %f0, %f0
530 ldd
[$key + 0x80+0x10], %f40
531 ldd
[$key + 0x80+0x18], %f42
532 des_round
%f24, %f26, %f0, %f0
533 ldd
[$key + 0x80+0x20], %f44
534 ldd
[$key + 0x80+0x28], %f46
535 des_round
%f28, %f30, %f0, %f0
536 ldd
[$key + 0x80+0x30], %f48
537 ldd
[$key + 0x80+0x38], %f50
538 des_round
%f32, %f34, %f0, %f0
539 ldd
[$key + 0x80+0x40], %f52
540 ldd
[$key + 0x80+0x48], %f54
543 ldd
[$key + 0x80+0x50], %f56
544 ldd
[$key + 0x80+0x58], %f58
546 ldd
[$key + 0x80+0x60], %f60
547 ldd
[$key + 0x80+0x68], %f62
548 des_round
%f36, %f38, %f0, %f0
549 ldd
[$key + 0x80+0x70], %f36
550 ldd
[$key + 0x80+0x78], %f38
551 des_round
%f40, %f42, %f0, %f0
552 des_round
%f44, %f46, %f0, %f0
553 des_round
%f48, %f50, %f0, %f0
554 ldd
[$key + 0x80-0x08], %f40
555 ldd
[$key + 0x80-0x10], %f42
556 des_round
%f52, %f54, %f0, %f0
557 ldd
[$key + 0x80-0x18], %f44
558 ldd
[$key + 0x80-0x20], %f46
559 des_round
%f56, %f58, %f0, %f0
560 ldd
[$key + 0x80-0x28], %f48
561 ldd
[$key + 0x80-0x30], %f50
562 des_round
%f60, %f62, %f0, %f0
563 ldd
[$key + 0x80-0x38], %f52
564 ldd
[$key + 0x80-0x40], %f54
565 des_round
%f36, %f38, %f0, %f0
566 ldd
[$key + 0x80-0x48], %f56
567 ldd
[$key + 0x80-0x50], %f58
570 ldd
[$key + 0x80-0x58], %f60
571 ldd
[$key + 0x80-0x60], %f62
573 ldd
[$key + 0x80-0x68], %f36
574 ldd
[$key + 0x80-0x70], %f38
575 des_round
%f40, %f42, %f0, %f0
576 ldd
[$key + 0x80-0x78], %f40
577 ldd
[$key + 0x80-0x80], %f42
578 des_round
%f44, %f46, %f0, %f0
579 des_round
%f48, %f50, %f0, %f0
580 des_round
%f52, %f54, %f0, %f0
581 des_round
%f56, %f58, %f0, %f0
582 des_round
%f60, %f62, %f0, %f0
583 des_round
%f36, %f38, %f0, %f0
584 des_round
%f40, %f42, %f0, %f0
587 fxor
%f2, %f0, %f0 ! ^= ivec
594 brnz
,pt
$len, .Ldes_ede3_cbc_dec_loop
597 st
%f2, [$ivec + 0] ! write out ivec
602 2: ldxa
[$inp]0x82, %g4 ! avoid
read-after
-write hazard
603 ! and ~3x deterioration
605 faligndata
%f0, %f0, %f0 ! handle unaligned output
607 stda
%f0, [$out + $omask]0xc0 ! partial store
609 orn
%g0, $omask, $omask
610 stda
%f0, [$out + $omask]0xc0 ! partial store
612 brnz
,pt
$len, .Ldes_ede3_cbc_dec_loop
+4
613 orn
%g0, $omask, $omask
615 st
%f2, [$ivec + 0] ! write out ivec
618 .type des_t4_ede3_cbc_decrypt
,#function
619 .size des_t4_ede3_cbc_decrypt
,.-des_t4_ede3_cbc_decrypt
623 .asciz
"DES for SPARC T4, David S. Miller, Andy Polyakov"
629 close STDOUT
or die "error closing STDOUT: $!";