]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/des/asm/des_enc.m4
Add final(?) set of copyrights.
[thirdparty/openssl.git] / crypto / des / asm / des_enc.m4
1 ! Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
2 !
3 ! Licensed under the OpenSSL license (the "License"). You may not use
4 ! this file except in compliance with the License. You can obtain a copy
5 ! in the file LICENSE in the source distribution or at
6 ! https://www.openssl.org/source/license.html
7 !
8 ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
9 !
10 ! Global registers 1 to 5 are used. This is the same as done by the
11 ! cc compiler. The UltraSPARC load/store little endian feature is used.
12 !
13 ! Instruction grouping often refers to one CPU cycle.
14 !
15 ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
16 !
17 ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
18 !
19 ! Performance improvement according to './apps/openssl speed des'
20 !
21 ! 32-bit build:
22 ! 23% faster than cc-5.2 -xarch=v8plus -xO5
23 ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
24 ! 64-bit build:
25 ! 50% faster than cc-5.2 -xarch=v9 -xO5
26 ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
27 !
28
29 .ident "des_enc.m4 2.1"
30 .file "des_enc-sparc.S"
31
32 #include <openssl/opensslconf.h>
33
34 #ifdef OPENSSL_FIPSCANISTER
35 #include <openssl/fipssyms.h>
36 #endif
37
38 #if defined(__SUNPRO_C) && defined(__sparcv9)
39 # define ABI64 /* They've said -xarch=v9 at command line */
40 #elif defined(__GNUC__) && defined(__arch64__)
41 # define ABI64 /* They've said -m64 at command line */
42 #endif
43
44 #ifdef ABI64
45 .register %g2,#scratch
46 .register %g3,#scratch
47 # define FRAME -192
48 # define BIAS 2047
49 # define LDPTR ldx
50 # define STPTR stx
51 # define ARG0 128
52 # define ARGSZ 8
53 #else
54 # define FRAME -96
55 # define BIAS 0
56 # define LDPTR ld
57 # define STPTR st
58 # define ARG0 68
59 # define ARGSZ 4
60 #endif
61
62 #define LOOPS 7
63
64 #define global0 %g0
65 #define global1 %g1
66 #define global2 %g2
67 #define global3 %g3
68 #define global4 %g4
69 #define global5 %g5
70
71 #define local0 %l0
72 #define local1 %l1
73 #define local2 %l2
74 #define local3 %l3
75 #define local4 %l4
76 #define local5 %l5
77 #define local7 %l6
78 #define local6 %l7
79
80 #define in0 %i0
81 #define in1 %i1
82 #define in2 %i2
83 #define in3 %i3
84 #define in4 %i4
85 #define in5 %i5
86 #define in6 %i6
87 #define in7 %i7
88
89 #define out0 %o0
90 #define out1 %o1
91 #define out2 %o2
92 #define out3 %o3
93 #define out4 %o4
94 #define out5 %o5
95 #define out6 %o6
96 #define out7 %o7
97
98 #define stub stb
99
100 changequote({,})
101
102
103 ! Macro definitions:
104
105
106 ! {ip_macro}
107 !
108 ! The logic used in initial and final permutations is the same as in
109 ! the C code. The permutations are done with a clever shift, xor, and
110 ! technique.
111 !
112 ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
113 ! sbox 6 to local6, and addres sbox 8 to out3.
114 !
115 ! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
116 !
117 ! Loads key first round from address in parameter 5 to out0, out1.
118 !
119 ! After the the original LibDES initial permutation, the resulting left
120 ! is in the variable initially used for right and vice versa. The macro
121 ! implements the possibility to keep the halfs in the original registers.
122 !
123 ! parameter 1 left
124 ! parameter 2 right
125 ! parameter 3 result left (modify in first round)
126 ! parameter 4 result right (use in first round)
127 ! parameter 5 key address
128 ! parameter 6 1/2 for include encryption/decryption
129 ! parameter 7 1 for move in1 to in3
130 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
131 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
132
133 define(ip_macro, {
134
135 ! {ip_macro}
136 ! $1 $2 $4 $3 $5 $6 $7 $8 $9
137
138 ld [out2+256], local1
139 srl $2, 4, local4
140
141 xor local4, $1, local4
142 ifelse($7,1,{mov in1, in3},{nop})
143
144 ld [out2+260], local2
145 and local4, local1, local4
146 ifelse($8,1,{mov in3, in4},{})
147 ifelse($8,2,{mov in4, in3},{})
148
149 ld [out2+280], out4 ! loop counter
150 sll local4, 4, local1
151 xor $1, local4, $1
152
153 ld [out2+264], local3
154 srl $1, 16, local4
155 xor $2, local1, $2
156
157 ifelse($9,1,{LDPTR KS3, in4},{})
158 xor local4, $2, local4
159 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
160
161 ifelse($9,1,{LDPTR KS2, in3},{})
162 and local4, local2, local4
163 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
164
165 sll local4, 16, local1
166 xor $2, local4, $2
167
168 srl $2, 2, local4
169 xor $1, local1, $1
170
171 sethi %hi(16711680), local5
172 xor local4, $1, local4
173
174 and local4, local3, local4
175 or local5, 255, local5
176
177 sll local4, 2, local2
178 xor $1, local4, $1
179
180 srl $1, 8, local4
181 xor $2, local2, $2
182
183 xor local4, $2, local4
184 add global1, 768, global4
185
186 and local4, local5, local4
187 add global1, 1024, global5
188
189 ld [out2+272], local7
190 sll local4, 8, local1
191 xor $2, local4, $2
192
193 srl $2, 1, local4
194 xor $1, local1, $1
195
196 ld [$5], out0 ! key 7531
197 xor local4, $1, local4
198 add global1, 256, global2
199
200 ld [$5+4], out1 ! key 8642
201 and local4, local7, local4
202 add global1, 512, global3
203
204 sll local4, 1, local1
205 xor $1, local4, $1
206
207 sll $1, 3, local3
208 xor $2, local1, $2
209
210 sll $2, 3, local2
211 add global1, 1280, local6 ! address sbox 8
212
213 srl $1, 29, local4
214 add global1, 1792, out3 ! address sbox 8
215
216 srl $2, 29, local1
217 or local4, local3, $4
218
219 or local2, local1, $3
220
221 ifelse($6, 1, {
222
223 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
224 or local2, local1, $3
225 xor $4, out0, local1
226
227 call .des_enc.1
228 and local1, 252, local1
229
230 },{})
231
232 ifelse($6, 2, {
233
234 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
235 or local2, local1, $3
236 xor $4, out0, local1
237
238 call .des_dec.1
239 and local1, 252, local1
240
241 },{})
242 })
243
244
245 ! {rounds_macro}
246 !
247 ! The logic used in the DES rounds is the same as in the C code,
248 ! except that calculations for sbox 1 and sbox 5 begin before
249 ! the previous round is finished.
250 !
251 ! In each round one half (work) is modified based on key and the
252 ! other half (use).
253 !
254 ! In this version we do two rounds in a loop repeated 7 times
255 ! and two rounds separately.
256 !
257 ! One half has the bits for the sboxes in the following positions:
258 !
259 ! 777777xx555555xx333333xx111111xx
260 !
261 ! 88xx666666xx444444xx222222xx8888
262 !
263 ! The bits for each sbox are xor-ed with the key bits for that box.
264 ! The above xx bits are cleared, and the result used for lookup in
265 ! the sbox table. Each sbox entry contains the 4 output bits permuted
266 ! into 32 bits according to the P permutation.
267 !
268 ! In the description of DES, left and right are switched after
269 ! each round, except after last round. In this code the original
270 ! left and right are kept in the same register in all rounds, meaning
271 ! that after the 16 rounds the result for right is in the register
272 ! originally used for left.
273 !
274 ! parameter 1 first work (left in first round)
275 ! parameter 2 first use (right in first round)
276 ! parameter 3 enc/dec 1/-1
277 ! parameter 4 loop label
278 ! parameter 5 key address register
279 ! parameter 6 optional address for key next encryption/decryption
280 ! parameter 7 not empty for include retl
281 !
282 ! also compares in2 to 8
283
284 define(rounds_macro, {
285
286 ! {rounds_macro}
287 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
288
289 xor $2, out0, local1
290
291 ld [out2+284], local5 ! 0x0000FC00
292 ba $4
293 and local1, 252, local1
294
295 .align 32
296
297 $4:
298 ! local6 is address sbox 6
299 ! out3 is address sbox 8
300 ! out4 is loop counter
301
302 ld [global1+local1], local1
303 xor $2, out1, out1 ! 8642
304 xor $2, out0, out0 ! 7531
305 ! fmovs %f0, %f0 ! fxor used for alignment
306
307 srl out1, 4, local0 ! rotate 4 right
308 and out0, local5, local3 ! 3
309 ! fmovs %f0, %f0
310
311 ld [$5+$3*8], local7 ! key 7531 next round
312 srl local3, 8, local3 ! 3
313 and local0, 252, local2 ! 2
314 ! fmovs %f0, %f0
315
316 ld [global3+local3],local3 ! 3
317 sll out1, 28, out1 ! rotate
318 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
319
320 ld [global2+local2], local2 ! 2
321 srl out0, 24, local1 ! 7
322 or out1, local0, out1 ! rotate
323
324 ldub [out2+local1], local1 ! 7 (and 0xFC)
325 srl out1, 24, local0 ! 8
326 and out1, local5, local4 ! 4
327
328 ldub [out2+local0], local0 ! 8 (and 0xFC)
329 srl local4, 8, local4 ! 4
330 xor $1, local2, $1 ! 2 finished local2 now sbox 6
331
332 ld [global4+local4],local4 ! 4
333 srl out1, 16, local2 ! 6
334 xor $1, local3, $1 ! 3 finished local3 now sbox 5
335
336 ld [out3+local0],local0 ! 8
337 and local2, 252, local2 ! 6
338 add global1, 1536, local5 ! address sbox 7
339
340 ld [local6+local2], local2 ! 6
341 srl out0, 16, local3 ! 5
342 xor $1, local4, $1 ! 4 finished
343
344 ld [local5+local1],local1 ! 7
345 and local3, 252, local3 ! 5
346 xor $1, local0, $1 ! 8 finished
347
348 ld [global5+local3],local3 ! 5
349 xor $1, local2, $1 ! 6 finished
350 subcc out4, 1, out4
351
352 ld [$5+$3*8+4], out0 ! key 8642 next round
353 xor $1, local7, local2 ! sbox 5 next round
354 xor $1, local1, $1 ! 7 finished
355
356 srl local2, 16, local2 ! sbox 5 next round
357 xor $1, local3, $1 ! 5 finished
358
359 ld [$5+$3*16+4], out1 ! key 8642 next round again
360 and local2, 252, local2 ! sbox5 next round
361 ! next round
362 xor $1, local7, local7 ! 7531
363
364 ld [global5+local2], local2 ! 5
365 srl local7, 24, local3 ! 7
366 xor $1, out0, out0 ! 8642
367
368 ldub [out2+local3], local3 ! 7 (and 0xFC)
369 srl out0, 4, local0 ! rotate 4 right
370 and local7, 252, local1 ! 1
371
372 sll out0, 28, out0 ! rotate
373 xor $2, local2, $2 ! 5 finished local2 used
374
375 srl local0, 8, local4 ! 4
376 and local0, 252, local2 ! 2
377 ld [local5+local3], local3 ! 7
378
379 srl local0, 16, local5 ! 6
380 or out0, local0, out0 ! rotate
381 ld [global2+local2], local2 ! 2
382
383 srl out0, 24, local0
384 ld [$5+$3*16], out0 ! key 7531 next round
385 and local4, 252, local4 ! 4
386
387 and local5, 252, local5 ! 6
388 ld [global4+local4], local4 ! 4
389 xor $2, local3, $2 ! 7 finished local3 used
390
391 and local0, 252, local0 ! 8
392 ld [local6+local5], local5 ! 6
393 xor $2, local2, $2 ! 2 finished local2 now sbox 3
394
395 srl local7, 8, local2 ! 3 start
396 ld [out3+local0], local0 ! 8
397 xor $2, local4, $2 ! 4 finished
398
399 and local2, 252, local2 ! 3
400 ld [global1+local1], local1 ! 1
401 xor $2, local5, $2 ! 6 finished local5 used
402
403 ld [global3+local2], local2 ! 3
404 xor $2, local0, $2 ! 8 finished
405 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
406
407 ld [out2+284], local5 ! 0x0000FC00
408 xor $2, out0, local4 ! sbox 1 next round
409 xor $2, local1, $2 ! 1 finished
410
411 xor $2, local2, $2 ! 3 finished
412 bne $4
413 and local4, 252, local1 ! sbox 1 next round
414
415 ! two rounds more:
416
417 ld [global1+local1], local1
418 xor $2, out1, out1
419 xor $2, out0, out0
420
421 srl out1, 4, local0 ! rotate
422 and out0, local5, local3
423
424 ld [$5+$3*8], local7 ! key 7531
425 srl local3, 8, local3
426 and local0, 252, local2
427
428 ld [global3+local3],local3
429 sll out1, 28, out1 ! rotate
430 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
431
432 ld [global2+local2], local2
433 srl out0, 24, local1
434 or out1, local0, out1 ! rotate
435
436 ldub [out2+local1], local1
437 srl out1, 24, local0
438 and out1, local5, local4
439
440 ldub [out2+local0], local0
441 srl local4, 8, local4
442 xor $1, local2, $1 ! 2 finished local2 now sbox 6
443
444 ld [global4+local4],local4
445 srl out1, 16, local2
446 xor $1, local3, $1 ! 3 finished local3 now sbox 5
447
448 ld [out3+local0],local0
449 and local2, 252, local2
450 add global1, 1536, local5 ! address sbox 7
451
452 ld [local6+local2], local2
453 srl out0, 16, local3
454 xor $1, local4, $1 ! 4 finished
455
456 ld [local5+local1],local1
457 and local3, 252, local3
458 xor $1, local0, $1
459
460 ld [global5+local3],local3
461 xor $1, local2, $1 ! 6 finished
462 cmp in2, 8
463
464 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
465 xor $1, local7, local2 ! sbox 5 next round
466 xor $1, local1, $1 ! 7 finished
467
468 ld [$5+$3*8+4], out0
469 srl local2, 16, local2 ! sbox 5 next round
470 xor $1, local3, $1 ! 5 finished
471
472 and local2, 252, local2
473 ! next round (two rounds more)
474 xor $1, local7, local7 ! 7531
475
476 ld [global5+local2], local2
477 srl local7, 24, local3
478 xor $1, out0, out0 ! 8642
479
480 ldub [out2+local3], local3
481 srl out0, 4, local0 ! rotate
482 and local7, 252, local1
483
484 sll out0, 28, out0 ! rotate
485 xor $2, local2, $2 ! 5 finished local2 used
486
487 srl local0, 8, local4
488 and local0, 252, local2
489 ld [local5+local3], local3
490
491 srl local0, 16, local5
492 or out0, local0, out0 ! rotate
493 ld [global2+local2], local2
494
495 srl out0, 24, local0
496 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
497 and local4, 252, local4
498
499 and local5, 252, local5
500 ld [global4+local4], local4
501 xor $2, local3, $2 ! 7 finished local3 used
502
503 and local0, 252, local0
504 ld [local6+local5], local5
505 xor $2, local2, $2 ! 2 finished local2 now sbox 3
506
507 srl local7, 8, local2 ! 3 start
508 ld [out3+local0], local0
509 xor $2, local4, $2
510
511 and local2, 252, local2
512 ld [global1+local1], local1
513 xor $2, local5, $2 ! 6 finished local5 used
514
515 ld [global3+local2], local2
516 srl $1, 3, local3
517 xor $2, local0, $2
518
519 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
520 sll $1, 29, local4
521 xor $2, local1, $2
522
523 ifelse($7,{}, {}, {retl})
524 xor $2, local2, $2
525 })
526
527
528 ! {fp_macro}
529 !
530 ! parameter 1 right (original left)
531 ! parameter 2 left (original right)
532 ! parameter 3 1 for optional store to [in0]
533 ! parameter 4 1 for load input/output address to local5/7
534 !
535 ! The final permutation logic switches the halfes, meaning that
536 ! left and right ends up the the registers originally used.
537
538 define(fp_macro, {
539
540 ! {fp_macro}
541 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
542
543 ! initially undo the rotate 3 left done after initial permutation
544 ! original left is received shifted 3 right and 29 left in local3/4
545
546 sll $2, 29, local1
547 or local3, local4, $1
548
549 srl $2, 3, $2
550 sethi %hi(0x55555555), local2
551
552 or $2, local1, $2
553 or local2, %lo(0x55555555), local2
554
555 srl $2, 1, local3
556 sethi %hi(0x00ff00ff), local1
557 xor local3, $1, local3
558 or local1, %lo(0x00ff00ff), local1
559 and local3, local2, local3
560 sethi %hi(0x33333333), local4
561 sll local3, 1, local2
562
563 xor $1, local3, $1
564
565 srl $1, 8, local3
566 xor $2, local2, $2
567 xor local3, $2, local3
568 or local4, %lo(0x33333333), local4
569 and local3, local1, local3
570 sethi %hi(0x0000ffff), local1
571 sll local3, 8, local2
572
573 xor $2, local3, $2
574
575 srl $2, 2, local3
576 xor $1, local2, $1
577 xor local3, $1, local3
578 or local1, %lo(0x0000ffff), local1
579 and local3, local4, local3
580 sethi %hi(0x0f0f0f0f), local4
581 sll local3, 2, local2
582
583 ifelse($4,1, {LDPTR INPUT, local5})
584 xor $1, local3, $1
585
586 ifelse($4,1, {LDPTR OUTPUT, local7})
587 srl $1, 16, local3
588 xor $2, local2, $2
589 xor local3, $2, local3
590 or local4, %lo(0x0f0f0f0f), local4
591 and local3, local1, local3
592 sll local3, 16, local2
593
594 xor $2, local3, local1
595
596 srl local1, 4, local3
597 xor $1, local2, $1
598 xor local3, $1, local3
599 and local3, local4, local3
600 sll local3, 4, local2
601
602 xor $1, local3, $1
603
604 ! optional store:
605
606 ifelse($3,1, {st $1, [in0]})
607
608 xor local1, local2, $2
609
610 ifelse($3,1, {st $2, [in0+4]})
611
612 })
613
614
615 ! {fp_ip_macro}
616 !
617 ! Does initial permutation for next block mixed with
618 ! final permutation for current block.
619 !
620 ! parameter 1 original left
621 ! parameter 2 original right
622 ! parameter 3 left ip
623 ! parameter 4 right ip
624 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
625 ! 2: mov in4 to in3
626 !
627 ! also adds -8 to length in2 and loads loop counter to out4
628
629 define(fp_ip_macro, {
630
631 ! {fp_ip_macro}
632 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
633
634 define({temp1},{out4})
635 define({temp2},{local3})
636
637 define({ip1},{local1})
638 define({ip2},{local2})
639 define({ip4},{local4})
640 define({ip5},{local5})
641
642 ! $1 in local3, local4
643
644 ld [out2+256], ip1
645 sll out5, 29, temp1
646 or local3, local4, $1
647
648 srl out5, 3, $2
649 ifelse($5,2,{mov in4, in3})
650
651 ld [out2+272], ip5
652 srl $4, 4, local0
653 or $2, temp1, $2
654
655 srl $2, 1, temp1
656 xor temp1, $1, temp1
657
658 and temp1, ip5, temp1
659 xor local0, $3, local0
660
661 sll temp1, 1, temp2
662 xor $1, temp1, $1
663
664 and local0, ip1, local0
665 add in2, -8, in2
666
667 sll local0, 4, local7
668 xor $3, local0, $3
669
670 ld [out2+268], ip4
671 srl $1, 8, temp1
672 xor $2, temp2, $2
673 ld [out2+260], ip2
674 srl $3, 16, local0
675 xor $4, local7, $4
676 xor temp1, $2, temp1
677 xor local0, $4, local0
678 and temp1, ip4, temp1
679 and local0, ip2, local0
680 sll temp1, 8, temp2
681 xor $2, temp1, $2
682 sll local0, 16, local7
683 xor $4, local0, $4
684
685 srl $2, 2, temp1
686 xor $1, temp2, $1
687
688 ld [out2+264], temp2 ! ip3
689 srl $4, 2, local0
690 xor $3, local7, $3
691 xor temp1, $1, temp1
692 xor local0, $3, local0
693 and temp1, temp2, temp1
694 and local0, temp2, local0
695 sll temp1, 2, temp2
696 xor $1, temp1, $1
697 sll local0, 2, local7
698 xor $3, local0, $3
699
700 srl $1, 16, temp1
701 xor $2, temp2, $2
702 srl $3, 8, local0
703 xor $4, local7, $4
704 xor temp1, $2, temp1
705 xor local0, $4, local0
706 and temp1, ip2, temp1
707 and local0, ip4, local0
708 sll temp1, 16, temp2
709 xor $2, temp1, local4
710 sll local0, 8, local7
711 xor $4, local0, $4
712
713 srl $4, 1, local0
714 xor $3, local7, $3
715
716 srl local4, 4, temp1
717 xor local0, $3, local0
718
719 xor $1, temp2, $1
720 and local0, ip5, local0
721
722 sll local0, 1, local7
723 xor temp1, $1, temp1
724
725 xor $3, local0, $3
726 xor $4, local7, $4
727
728 sll $3, 3, local5
729 and temp1, ip1, temp1
730
731 sll temp1, 4, temp2
732 xor $1, temp1, $1
733
734 ifelse($5,1,{LDPTR KS2, in4})
735 sll $4, 3, local2
736 xor local4, temp2, $2
737
738 ! reload since used as temporar:
739
740 ld [out2+280], out4 ! loop counter
741
742 srl $3, 29, local0
743 ifelse($5,1,{add in4, 120, in4})
744
745 ifelse($5,1,{LDPTR KS1, in3})
746 srl $4, 29, local7
747
748 or local0, local5, $4
749 or local2, local7, $3
750
751 })
752
753
754
755 ! {load_little_endian}
756 !
757 ! parameter 1 address
758 ! parameter 2 destination left
759 ! parameter 3 destination right
760 ! parameter 4 temporar
761 ! parameter 5 label
762
763 define(load_little_endian, {
764
765 ! {load_little_endian}
766 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
767
768 ! first in memory to rightmost in register
769
770 $5:
771 ldub [$1+3], $2
772
773 ldub [$1+2], $4
774 sll $2, 8, $2
775 or $2, $4, $2
776
777 ldub [$1+1], $4
778 sll $2, 8, $2
779 or $2, $4, $2
780
781 ldub [$1+0], $4
782 sll $2, 8, $2
783 or $2, $4, $2
784
785
786 ldub [$1+3+4], $3
787
788 ldub [$1+2+4], $4
789 sll $3, 8, $3
790 or $3, $4, $3
791
792 ldub [$1+1+4], $4
793 sll $3, 8, $3
794 or $3, $4, $3
795
796 ldub [$1+0+4], $4
797 sll $3, 8, $3
798 or $3, $4, $3
799 $5a:
800
801 })
802
803
804 ! {load_little_endian_inc}
805 !
806 ! parameter 1 address
807 ! parameter 2 destination left
808 ! parameter 3 destination right
809 ! parameter 4 temporar
810 ! parameter 4 label
811 !
812 ! adds 8 to address
813
814 define(load_little_endian_inc, {
815
816 ! {load_little_endian_inc}
817 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
818
819 ! first in memory to rightmost in register
820
821 $5:
822 ldub [$1+3], $2
823
824 ldub [$1+2], $4
825 sll $2, 8, $2
826 or $2, $4, $2
827
828 ldub [$1+1], $4
829 sll $2, 8, $2
830 or $2, $4, $2
831
832 ldub [$1+0], $4
833 sll $2, 8, $2
834 or $2, $4, $2
835
836 ldub [$1+3+4], $3
837 add $1, 8, $1
838
839 ldub [$1+2+4-8], $4
840 sll $3, 8, $3
841 or $3, $4, $3
842
843 ldub [$1+1+4-8], $4
844 sll $3, 8, $3
845 or $3, $4, $3
846
847 ldub [$1+0+4-8], $4
848 sll $3, 8, $3
849 or $3, $4, $3
850 $5a:
851
852 })
853
854
855 ! {load_n_bytes}
856 !
857 ! Loads 1 to 7 bytes little endian
858 ! Remaining bytes are zeroed.
859 !
860 ! parameter 1 address
861 ! parameter 2 length
862 ! parameter 3 destination register left
863 ! parameter 4 destination register right
864 ! parameter 5 temp
865 ! parameter 6 temp2
866 ! parameter 7 label
867 ! parameter 8 return label
868
869 define(load_n_bytes, {
870
871 ! {load_n_bytes}
872 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
873
874 $7.0: call .+8
875 sll $2, 2, $6
876
877 add %o7,$7.jmp.table-$7.0,$5
878
879 add $5, $6, $5
880 mov 0, $4
881
882 ld [$5], $5
883
884 jmp %o7+$5
885 mov 0, $3
886
887 $7.7:
888 ldub [$1+6], $5
889 sll $5, 16, $5
890 or $3, $5, $3
891 $7.6:
892 ldub [$1+5], $5
893 sll $5, 8, $5
894 or $3, $5, $3
895 $7.5:
896 ldub [$1+4], $5
897 or $3, $5, $3
898 $7.4:
899 ldub [$1+3], $5
900 sll $5, 24, $5
901 or $4, $5, $4
902 $7.3:
903 ldub [$1+2], $5
904 sll $5, 16, $5
905 or $4, $5, $4
906 $7.2:
907 ldub [$1+1], $5
908 sll $5, 8, $5
909 or $4, $5, $4
910 $7.1:
911 ldub [$1+0], $5
912 ba $8
913 or $4, $5, $4
914
915 .align 4
916
917 $7.jmp.table:
918 .word 0
919 .word $7.1-$7.0
920 .word $7.2-$7.0
921 .word $7.3-$7.0
922 .word $7.4-$7.0
923 .word $7.5-$7.0
924 .word $7.6-$7.0
925 .word $7.7-$7.0
926 })
927
928
929 ! {store_little_endian}
930 !
931 ! parameter 1 address
932 ! parameter 2 source left
933 ! parameter 3 source right
934 ! parameter 4 temporar
935
936 define(store_little_endian, {
937
938 ! {store_little_endian}
939 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
940
941 ! rightmost in register to first in memory
942
943 $5:
944 and $2, 255, $4
945 stub $4, [$1+0]
946
947 srl $2, 8, $4
948 and $4, 255, $4
949 stub $4, [$1+1]
950
951 srl $2, 16, $4
952 and $4, 255, $4
953 stub $4, [$1+2]
954
955 srl $2, 24, $4
956 stub $4, [$1+3]
957
958
959 and $3, 255, $4
960 stub $4, [$1+0+4]
961
962 srl $3, 8, $4
963 and $4, 255, $4
964 stub $4, [$1+1+4]
965
966 srl $3, 16, $4
967 and $4, 255, $4
968 stub $4, [$1+2+4]
969
970 srl $3, 24, $4
971 stub $4, [$1+3+4]
972
973 $5a:
974
975 })
976
977
978 ! {store_n_bytes}
979 !
980 ! Stores 1 to 7 bytes little endian
981 !
982 ! parameter 1 address
983 ! parameter 2 length
984 ! parameter 3 source register left
985 ! parameter 4 source register right
986 ! parameter 5 temp
987 ! parameter 6 temp2
988 ! parameter 7 label
989 ! parameter 8 return label
990
991 define(store_n_bytes, {
992
993 ! {store_n_bytes}
994 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
995
996 $7.0: call .+8
997 sll $2, 2, $6
998
999 add %o7,$7.jmp.table-$7.0,$5
1000
1001 add $5, $6, $5
1002
1003 ld [$5], $5
1004
1005 jmp %o7+$5
1006 nop
1007
1008 $7.7:
1009 srl $3, 16, $5
1010 and $5, 0xff, $5
1011 stub $5, [$1+6]
1012 $7.6:
1013 srl $3, 8, $5
1014 and $5, 0xff, $5
1015 stub $5, [$1+5]
1016 $7.5:
1017 and $3, 0xff, $5
1018 stub $5, [$1+4]
1019 $7.4:
1020 srl $4, 24, $5
1021 stub $5, [$1+3]
1022 $7.3:
1023 srl $4, 16, $5
1024 and $5, 0xff, $5
1025 stub $5, [$1+2]
1026 $7.2:
1027 srl $4, 8, $5
1028 and $5, 0xff, $5
1029 stub $5, [$1+1]
1030 $7.1:
1031 and $4, 0xff, $5
1032
1033
1034 ba $8
1035 stub $5, [$1]
1036
1037 .align 4
1038
1039 $7.jmp.table:
1040
1041 .word 0
1042 .word $7.1-$7.0
1043 .word $7.2-$7.0
1044 .word $7.3-$7.0
1045 .word $7.4-$7.0
1046 .word $7.5-$7.0
1047 .word $7.6-$7.0
1048 .word $7.7-$7.0
1049 })
1050
1051
1052 define(testvalue,{1})
1053
1054 define(register_init, {
1055
1056 ! For test purposes:
1057
1058 sethi %hi(testvalue), local0
1059 or local0, %lo(testvalue), local0
1060
1061 ifelse($1,{},{}, {mov local0, $1})
1062 ifelse($2,{},{}, {mov local0, $2})
1063 ifelse($3,{},{}, {mov local0, $3})
1064 ifelse($4,{},{}, {mov local0, $4})
1065 ifelse($5,{},{}, {mov local0, $5})
1066 ifelse($6,{},{}, {mov local0, $6})
1067 ifelse($7,{},{}, {mov local0, $7})
1068 ifelse($8,{},{}, {mov local0, $8})
1069
1070 mov local0, local1
1071 mov local0, local2
1072 mov local0, local3
1073 mov local0, local4
1074 mov local0, local5
1075 mov local0, local7
1076 mov local0, local6
1077 mov local0, out0
1078 mov local0, out1
1079 mov local0, out2
1080 mov local0, out3
1081 mov local0, out4
1082 mov local0, out5
1083 mov local0, global1
1084 mov local0, global2
1085 mov local0, global3
1086 mov local0, global4
1087 mov local0, global5
1088
1089 })
1090
1091 .section ".text"
1092
1093 .align 32
1094
1095 .des_enc:
1096
1097 ! key address in3
1098 ! loads key next encryption/decryption first round from [in4]
1099
1100 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1101
1102
1103 .align 32
1104
1105 .des_dec:
1106
1107 ! implemented with out5 as first parameter to avoid
1108 ! register exchange in ede modes
1109
1110 ! key address in4
1111 ! loads key next encryption/decryption first round from [in3]
1112
1113 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1114
1115
1116
1117 ! void DES_encrypt1(data, ks, enc)
1118 ! *******************************
1119
1120 .align 32
1121 .global DES_encrypt1
1122 .type DES_encrypt1,#function
1123
1124 DES_encrypt1:
1125
1126 save %sp, FRAME, %sp
1127
1128 sethi %hi(.PIC.DES_SPtrans-1f),global1
1129 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1130 1: call .+8
1131 add %o7,global1,global1
1132 sub global1,.PIC.DES_SPtrans-.des_and,out2
1133
1134 ld [in0], in5 ! left
1135 cmp in2, 0 ! enc
1136
1137 be .encrypt.dec
1138 ld [in0+4], out5 ! right
1139
1140 ! parameter 6 1/2 for include encryption/decryption
1141 ! parameter 7 1 for move in1 to in3
1142 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1143
1144 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1145
1146 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
1147
1148 fp_macro(in5, out5, 1) ! 1 for store to [in0]
1149
1150 ret
1151 restore
1152
1153 .encrypt.dec:
1154
1155 add in1, 120, in3 ! use last subkey for first round
1156
1157 ! parameter 6 1/2 for include encryption/decryption
1158 ! parameter 7 1 for move in1 to in3
1159 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1160
1161 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
1162
1163 fp_macro(out5, in5, 1) ! 1 for store to [in0]
1164
1165 ret
1166 restore
1167
1168 .DES_encrypt1.end:
1169 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
1170
1171
1172 ! void DES_encrypt2(data, ks, enc)
1173 !*********************************
1174
1175 ! encrypts/decrypts without initial/final permutation
1176
1177 .align 32
1178 .global DES_encrypt2
1179 .type DES_encrypt2,#function
1180
1181 DES_encrypt2:
1182
1183 save %sp, FRAME, %sp
1184
1185 sethi %hi(.PIC.DES_SPtrans-1f),global1
1186 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1187 1: call .+8
1188 add %o7,global1,global1
1189 sub global1,.PIC.DES_SPtrans-.des_and,out2
1190
1191 ! Set sbox address 1 to 6 and rotate halfs 3 left
1192 ! Errors caught by destest? Yes. Still? *NO*
1193
1194 !sethi %hi(DES_SPtrans), global1 ! address sbox 1
1195
1196 !or global1, %lo(DES_SPtrans), global1 ! sbox 1
1197
1198 add global1, 256, global2 ! sbox 2
1199 add global1, 512, global3 ! sbox 3
1200
1201 ld [in0], out5 ! right
1202 add global1, 768, global4 ! sbox 4
1203 add global1, 1024, global5 ! sbox 5
1204
1205 ld [in0+4], in5 ! left
1206 add global1, 1280, local6 ! sbox 6
1207 add global1, 1792, out3 ! sbox 8
1208
1209 ! rotate
1210
1211 sll in5, 3, local5
1212 mov in1, in3 ! key address to in3
1213
1214 sll out5, 3, local7
1215 srl in5, 29, in5
1216
1217 srl out5, 29, out5
1218 add in5, local5, in5
1219
1220 add out5, local7, out5
1221 cmp in2, 0
1222
1223 ! we use our own stackframe
1224
1225 be .encrypt2.dec
1226 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
1227
1228 ld [in3], out0 ! key 7531 first round
1229 mov LOOPS, out4 ! loop counter
1230
1231 ld [in3+4], out1 ! key 8642 first round
1232 sethi %hi(0x0000FC00), local5
1233
1234 call .des_enc
1235 mov in3, in4
1236
1237 ! rotate
1238 sll in5, 29, in0
1239 srl in5, 3, in5
1240 sll out5, 29, in1
1241 add in5, in0, in5
1242 srl out5, 3, out5
1243 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1244 add out5, in1, out5
1245 st in5, [in0]
1246 st out5, [in0+4]
1247
1248 ret
1249 restore
1250
1251
1252 .encrypt2.dec:
1253
1254 add in3, 120, in4
1255
1256 ld [in4], out0 ! key 7531 first round
1257 mov LOOPS, out4 ! loop counter
1258
1259 ld [in4+4], out1 ! key 8642 first round
1260 sethi %hi(0x0000FC00), local5
1261
1262 mov in5, local1 ! left expected in out5
1263 mov out5, in5
1264
1265 call .des_dec
1266 mov local1, out5
1267
1268 .encrypt2.finish:
1269
1270 ! rotate
1271 sll in5, 29, in0
1272 srl in5, 3, in5
1273 sll out5, 29, in1
1274 add in5, in0, in5
1275 srl out5, 3, out5
1276 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1277 add out5, in1, out5
1278 st out5, [in0]
1279 st in5, [in0+4]
1280
1281 ret
1282 restore
1283
1284 .DES_encrypt2.end:
1285 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
1286
1287
1288 ! void DES_encrypt3(data, ks1, ks2, ks3)
1289 ! **************************************
1290
1291 .align 32
1292 .global DES_encrypt3
1293 .type DES_encrypt3,#function
1294
1295 DES_encrypt3:
1296
1297 save %sp, FRAME, %sp
1298
1299 sethi %hi(.PIC.DES_SPtrans-1f),global1
1300 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1301 1: call .+8
1302 add %o7,global1,global1
1303 sub global1,.PIC.DES_SPtrans-.des_and,out2
1304
1305 ld [in0], in5 ! left
1306 add in2, 120, in4 ! ks2
1307
1308 ld [in0+4], out5 ! right
1309 mov in3, in2 ! save ks3
1310
1311 ! parameter 6 1/2 for include encryption/decryption
1312 ! parameter 7 1 for mov in1 to in3
1313 ! parameter 8 1 for mov in3 to in4
1314 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1315
1316 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1317
1318 call .des_dec
1319 mov in2, in3 ! preload ks3
1320
1321 call .des_enc
1322 nop
1323
1324 fp_macro(in5, out5, 1)
1325
1326 ret
1327 restore
1328
1329 .DES_encrypt3.end:
1330 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
1331
1332
1333 ! void DES_decrypt3(data, ks1, ks2, ks3)
1334 ! **************************************
1335
1336 .align 32
1337 .global DES_decrypt3
1338 .type DES_decrypt3,#function
1339
1340 DES_decrypt3:
1341
1342 save %sp, FRAME, %sp
1343
1344 sethi %hi(.PIC.DES_SPtrans-1f),global1
1345 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1346 1: call .+8
1347 add %o7,global1,global1
1348 sub global1,.PIC.DES_SPtrans-.des_and,out2
1349
1350 ld [in0], in5 ! left
1351 add in3, 120, in4 ! ks3
1352
1353 ld [in0+4], out5 ! right
1354 mov in2, in3 ! ks2
1355
1356 ! parameter 6 1/2 for include encryption/decryption
1357 ! parameter 7 1 for mov in1 to in3
1358 ! parameter 8 1 for mov in3 to in4
1359 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1360
1361 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1362
1363 call .des_enc
1364 add in1, 120, in4 ! preload ks1
1365
1366 call .des_dec
1367 nop
1368
1369 fp_macro(out5, in5, 1)
1370
1371 ret
1372 restore
1373
1374 .DES_decrypt3.end:
1375 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1376
1377 ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1378 ! *****************************************************************
1379
1380
1381 .align 32
1382 .global DES_ncbc_encrypt
1383 .type DES_ncbc_encrypt,#function
1384
1385 DES_ncbc_encrypt:
1386
1387 save %sp, FRAME, %sp
1388
1389 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
1390 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1391 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1392
1393 sethi %hi(.PIC.DES_SPtrans-1f),global1
1394 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1395 1: call .+8
1396 add %o7,global1,global1
1397 sub global1,.PIC.DES_SPtrans-.des_and,out2
1398
1399 cmp in5, 0 ! enc
1400
1401 be .ncbc.dec
1402 STPTR in4, IVEC
1403
1404 ! addr left right temp label
1405 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
1406
1407 addcc in2, -8, in2 ! bytes missing when first block done
1408
1409 bl .ncbc.enc.seven.or.less
1410 mov in3, in4 ! schedule
1411
1412 .ncbc.enc.next.block:
1413
1414 load_little_endian(in0, out4, global4, local3, .LLE2) ! block
1415
1416 .ncbc.enc.next.block_1:
1417
1418 xor in5, out4, in5 ! iv xor
1419 xor out5, global4, out5 ! iv xor
1420
1421 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1422 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1423
1424 .ncbc.enc.next.block_2:
1425
1426 !// call .des_enc ! compares in2 to 8
1427 ! rounds inlined for alignment purposes
1428
1429 add global1, 768, global4 ! address sbox 4 since register used below
1430
1431 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
1432
1433 bl .ncbc.enc.next.block_fp
1434 add in0, 8, in0 ! input address
1435
1436 ! If 8 or more bytes are to be encrypted after this block,
1437 ! we combine final permutation for this block with initial
1438 ! permutation for next block. Load next block:
1439
1440 load_little_endian(in0, global3, global4, local5, .LLE12)
1441
1442 ! parameter 1 original left
1443 ! parameter 2 original right
1444 ! parameter 3 left ip
1445 ! parameter 4 right ip
1446 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1447 ! 2: mov in4 to in3
1448 !
1449 ! also adds -8 to length in2 and loads loop counter to out4
1450
1451 fp_ip_macro(out0, out1, global3, global4, 2)
1452
1453 store_little_endian(in1, out0, out1, local3, .SLE10) ! block
1454
1455 ld [in3], out0 ! key 7531 first round next block
1456 mov in5, local1
1457 xor global3, out5, in5 ! iv xor next block
1458
1459 ld [in3+4], out1 ! key 8642
1460 add global1, 512, global3 ! address sbox 3 since register used
1461 xor global4, local1, out5 ! iv xor next block
1462
1463 ba .ncbc.enc.next.block_2
1464 add in1, 8, in1 ! output address
1465
1466 .ncbc.enc.next.block_fp:
1467
1468 fp_macro(in5, out5)
1469
1470 store_little_endian(in1, in5, out5, local3, .SLE1) ! block
1471
1472 addcc in2, -8, in2 ! bytes missing when next block done
1473
1474 bpos .ncbc.enc.next.block
1475 add in1, 8, in1
1476
1477 .ncbc.enc.seven.or.less:
1478
1479 cmp in2, -8
1480
1481 ble .ncbc.enc.finish
1482 nop
1483
1484 add in2, 8, local1 ! bytes to load
1485
1486 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1487 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1488
1489 ! Loads 1 to 7 bytes little endian to global4, out4
1490
1491
1492 .ncbc.enc.finish:
1493
1494 LDPTR IVEC, local4
1495 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
1496
1497 ret
1498 restore
1499
1500
1501 .ncbc.dec:
1502
1503 STPTR in0, INPUT
1504 cmp in2, 0 ! length
1505 add in3, 120, in3
1506
1507 LDPTR IVEC, local7 ! ivec
1508 ble .ncbc.dec.finish
1509 mov in3, in4 ! schedule
1510
1511 STPTR in1, OUTPUT
1512 mov in0, local5 ! input
1513
1514 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
1515
1516 .ncbc.dec.next.block:
1517
1518 load_little_endian(local5, in5, out5, local3, .LLE4) ! block
1519
1520 ! parameter 6 1/2 for include encryption/decryption
1521 ! parameter 7 1 for mov in1 to in3
1522 ! parameter 8 1 for mov in3 to in4
1523
1524 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryprion ks in4
1525
1526 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1527
1528 ! in2 is bytes left to be stored
1529 ! in2 is compared to 8 in the rounds
1530
1531 xor out5, in0, out4 ! iv xor
1532 bl .ncbc.dec.seven.or.less
1533 xor in5, in1, global4 ! iv xor
1534
1535 ! Load ivec next block now, since input and output address might be the same.
1536
1537 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
1538
1539 store_little_endian(local7, out4, global4, local3, .SLE3)
1540
1541 STPTR local5, INPUT
1542 add local7, 8, local7
1543 addcc in2, -8, in2
1544
1545 bg .ncbc.dec.next.block
1546 STPTR local7, OUTPUT
1547
1548
1549 .ncbc.dec.store.iv:
1550
1551 LDPTR IVEC, local4 ! ivec
1552 store_little_endian(local4, in0, in1, local5, .SLE4)
1553
1554 .ncbc.dec.finish:
1555
1556 ret
1557 restore
1558
1559 .ncbc.dec.seven.or.less:
1560
1561 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
1562
1563 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1564
1565
1566 .DES_ncbc_encrypt.end:
1567 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
1568
1569
1570 ! void DES_ede3_cbc_encrypt(input, output, lenght, ks1, ks2, ks3, ivec, enc)
1571 ! **************************************************************************
1572
1573
1574 .align 32
1575 .global DES_ede3_cbc_encrypt
1576 .type DES_ede3_cbc_encrypt,#function
1577
1578 DES_ede3_cbc_encrypt:
1579
1580 save %sp, FRAME, %sp
1581
1582 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1583 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1584 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1585
1586 sethi %hi(.PIC.DES_SPtrans-1f),global1
1587 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1588 1: call .+8
1589 add %o7,global1,global1
1590 sub global1,.PIC.DES_SPtrans-.des_and,out2
1591
1592 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
1593 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1594 cmp local3, 0 ! enc
1595
1596 be .ede3.dec
1597 STPTR in4, KS2
1598
1599 STPTR in5, KS3
1600
1601 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
1602
1603 addcc in2, -8, in2 ! bytes missing after next block
1604
1605 bl .ede3.enc.seven.or.less
1606 STPTR in3, KS1
1607
1608 .ede3.enc.next.block:
1609
1610 load_little_endian(in0, out4, global4, local3, .LLE7)
1611
1612 .ede3.enc.next.block_1:
1613
1614 LDPTR KS2, in4
1615 xor in5, out4, in5 ! iv xor
1616 xor out5, global4, out5 ! iv xor
1617
1618 LDPTR KS1, in3
1619 add in4, 120, in4 ! for decryption we use last subkey first
1620 nop
1621
1622 ip_macro(in5, out5, in5, out5, in3)
1623
1624 .ede3.enc.next.block_2:
1625
1626 call .des_enc ! ks1 in3
1627 nop
1628
1629 call .des_dec ! ks2 in4
1630 LDPTR KS3, in3
1631
1632 call .des_enc ! ks3 in3 compares in2 to 8
1633 nop
1634
1635 bl .ede3.enc.next.block_fp
1636 add in0, 8, in0
1637
1638 ! If 8 or more bytes are to be encrypted after this block,
1639 ! we combine final permutation for this block with initial
1640 ! permutation for next block. Load next block:
1641
1642 load_little_endian(in0, global3, global4, local5, .LLE11)
1643
1644 ! parameter 1 original left
1645 ! parameter 2 original right
1646 ! parameter 3 left ip
1647 ! parameter 4 right ip
1648 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1649 ! 2: mov in4 to in3
1650 !
1651 ! also adds -8 to length in2 and loads loop counter to out4
1652
1653 fp_ip_macro(out0, out1, global3, global4, 1)
1654
1655 store_little_endian(in1, out0, out1, local3, .SLE9) ! block
1656
1657 mov in5, local1
1658 xor global3, out5, in5 ! iv xor next block
1659
1660 ld [in3], out0 ! key 7531
1661 add global1, 512, global3 ! address sbox 3
1662 xor global4, local1, out5 ! iv xor next block
1663
1664 ld [in3+4], out1 ! key 8642
1665 add global1, 768, global4 ! address sbox 4
1666 ba .ede3.enc.next.block_2
1667 add in1, 8, in1
1668
1669 .ede3.enc.next.block_fp:
1670
1671 fp_macro(in5, out5)
1672
1673 store_little_endian(in1, in5, out5, local3, .SLE5) ! block
1674
1675 addcc in2, -8, in2 ! bytes missing when next block done
1676
1677 bpos .ede3.enc.next.block
1678 add in1, 8, in1
1679
1680 .ede3.enc.seven.or.less:
1681
1682 cmp in2, -8
1683
1684 ble .ede3.enc.finish
1685 nop
1686
1687 add in2, 8, local1 ! bytes to load
1688
1689 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1690 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1691
1692 .ede3.enc.finish:
1693
1694 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1695 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
1696
1697 ret
1698 restore
1699
1700 .ede3.dec:
1701
1702 STPTR in0, INPUT
1703 add in5, 120, in5
1704
1705 STPTR in1, OUTPUT
1706 mov in0, local5
1707 add in3, 120, in3
1708
1709 STPTR in3, KS1
1710 cmp in2, 0
1711
1712 ble .ede3.dec.finish
1713 STPTR in5, KS3
1714
1715 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
1716 load_little_endian(local7, in0, in1, local3, .LLE8)
1717
1718 .ede3.dec.next.block:
1719
1720 load_little_endian(local5, in5, out5, local3, .LLE9)
1721
1722 ! parameter 6 1/2 for include encryption/decryption
1723 ! parameter 7 1 for mov in1 to in3
1724 ! parameter 8 1 for mov in3 to in4
1725 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1726
1727 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1728
1729 call .des_enc ! ks2 in3
1730 LDPTR KS1, in4
1731
1732 call .des_dec ! ks1 in4
1733 nop
1734
1735 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
1736
1737 ! in2 is bytes left to be stored
1738 ! in2 is compared to 8 in the rounds
1739
1740 xor out5, in0, out4
1741 bl .ede3.dec.seven.or.less
1742 xor in5, in1, global4
1743
1744 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
1745
1746 store_little_endian(local7, out4, global4, local3, .SLE7) ! block
1747
1748 STPTR local5, INPUT
1749 addcc in2, -8, in2
1750 add local7, 8, local7
1751
1752 bg .ede3.dec.next.block
1753 STPTR local7, OUTPUT
1754
1755 .ede3.dec.store.iv:
1756
1757 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1758 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
1759
1760 .ede3.dec.finish:
1761
1762 ret
1763 restore
1764
1765 .ede3.dec.seven.or.less:
1766
1767 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
1768
1769 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1770
1771
1772 .DES_ede3_cbc_encrypt.end:
1773 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
1774
1775 .align 256
1776 .type .des_and,#object
1777 .size .des_and,284
1778
1779 .des_and:
1780
1781 ! This table is used for AND 0xFC when it is known that register
1782 ! bits 8-31 are zero. Makes it possible to do three arithmetic
1783 ! operations in one cycle.
1784
1785 .byte 0, 0, 0, 0, 4, 4, 4, 4
1786 .byte 8, 8, 8, 8, 12, 12, 12, 12
1787 .byte 16, 16, 16, 16, 20, 20, 20, 20
1788 .byte 24, 24, 24, 24, 28, 28, 28, 28
1789 .byte 32, 32, 32, 32, 36, 36, 36, 36
1790 .byte 40, 40, 40, 40, 44, 44, 44, 44
1791 .byte 48, 48, 48, 48, 52, 52, 52, 52
1792 .byte 56, 56, 56, 56, 60, 60, 60, 60
1793 .byte 64, 64, 64, 64, 68, 68, 68, 68
1794 .byte 72, 72, 72, 72, 76, 76, 76, 76
1795 .byte 80, 80, 80, 80, 84, 84, 84, 84
1796 .byte 88, 88, 88, 88, 92, 92, 92, 92
1797 .byte 96, 96, 96, 96, 100, 100, 100, 100
1798 .byte 104, 104, 104, 104, 108, 108, 108, 108
1799 .byte 112, 112, 112, 112, 116, 116, 116, 116
1800 .byte 120, 120, 120, 120, 124, 124, 124, 124
1801 .byte 128, 128, 128, 128, 132, 132, 132, 132
1802 .byte 136, 136, 136, 136, 140, 140, 140, 140
1803 .byte 144, 144, 144, 144, 148, 148, 148, 148
1804 .byte 152, 152, 152, 152, 156, 156, 156, 156
1805 .byte 160, 160, 160, 160, 164, 164, 164, 164
1806 .byte 168, 168, 168, 168, 172, 172, 172, 172
1807 .byte 176, 176, 176, 176, 180, 180, 180, 180
1808 .byte 184, 184, 184, 184, 188, 188, 188, 188
1809 .byte 192, 192, 192, 192, 196, 196, 196, 196
1810 .byte 200, 200, 200, 200, 204, 204, 204, 204
1811 .byte 208, 208, 208, 208, 212, 212, 212, 212
1812 .byte 216, 216, 216, 216, 220, 220, 220, 220
1813 .byte 224, 224, 224, 224, 228, 228, 228, 228
1814 .byte 232, 232, 232, 232, 236, 236, 236, 236
1815 .byte 240, 240, 240, 240, 244, 244, 244, 244
1816 .byte 248, 248, 248, 248, 252, 252, 252, 252
1817
1818 ! 5 numbers for initil/final permutation
1819
1820 .word 0x0f0f0f0f ! offset 256
1821 .word 0x0000ffff ! 260
1822 .word 0x33333333 ! 264
1823 .word 0x00ff00ff ! 268
1824 .word 0x55555555 ! 272
1825
1826 .word 0 ! 276
1827 .word LOOPS ! 280
1828 .word 0x0000FC00 ! 284
1829
1830 .global DES_SPtrans
1831 .type DES_SPtrans,#object
1832 .size DES_SPtrans,2048
1833 .align 64
1834 DES_SPtrans:
1835 .PIC.DES_SPtrans:
1836 ! nibble 0
1837 .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
1838 .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
1839 .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
1840 .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
1841 .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
1842 .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
1843 .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
1844 .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
1845 .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
1846 .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
1847 .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
1848 .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
1849 .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
1850 .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
1851 .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
1852 .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
1853 ! nibble 1
1854 .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
1855 .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
1856 .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
1857 .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
1858 .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
1859 .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
1860 .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
1861 .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
1862 .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
1863 .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
1864 .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
1865 .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
1866 .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
1867 .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
1868 .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
1869 .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
1870 ! nibble 2
1871 .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
1872 .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
1873 .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
1874 .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
1875 .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
1876 .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
1877 .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
1878 .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
1879 .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
1880 .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
1881 .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
1882 .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
1883 .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
1884 .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
1885 .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
1886 .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
1887 ! nibble 3
1888 .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
1889 .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
1890 .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
1891 .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
1892 .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
1893 .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
1894 .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
1895 .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
1896 .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
1897 .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
1898 .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
1899 .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
1900 .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
1901 .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
1902 .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
1903 .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
1904 ! nibble 4
1905 .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
1906 .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
1907 .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
1908 .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
1909 .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
1910 .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
1911 .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
1912 .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
1913 .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
1914 .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
1915 .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
1916 .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
1917 .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
1918 .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
1919 .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
1920 .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
1921 ! nibble 5
1922 .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
1923 .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
1924 .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
1925 .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
1926 .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
1927 .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
1928 .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
1929 .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
1930 .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
1931 .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
1932 .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
1933 .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
1934 .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
1935 .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
1936 .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
1937 .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
1938 ! nibble 6
1939 .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
1940 .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
1941 .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
1942 .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
1943 .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
1944 .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
1945 .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
1946 .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
1947 .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
1948 .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
1949 .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
1950 .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
1951 .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
1952 .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
1953 .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
1954 .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
1955 ! nibble 7
1956 .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
1957 .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
1958 .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
1959 .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
1960 .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
1961 .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
1962 .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
1963 .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
1964 .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
1965 .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
1966 .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
1967 .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
1968 .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
1969 .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
1970 .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
1971 .word 0x20000000, 0x20800080, 0x00020000, 0x00820080
1972