]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/des/asm/des_enc.m4
Many spelling fixes/typo's corrected.
[thirdparty/openssl.git] / crypto / des / asm / des_enc.m4
1 ! Copyright 2000-2016 The OpenSSL Project Authors. All Rights Reserved.
2 !
3 ! Licensed under the OpenSSL license (the "License"). You may not use
4 ! this file except in compliance with the License. You can obtain a copy
5 ! in the file LICENSE in the source distribution or at
6 ! https://www.openssl.org/source/license.html
7 !
8 ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
9 !
10 ! Global registers 1 to 5 are used. This is the same as done by the
11 ! cc compiler. The UltraSPARC load/store little endian feature is used.
12 !
13 ! Instruction grouping often refers to one CPU cycle.
14 !
15 ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
16 !
17 ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
18 !
19 ! Performance improvement according to './apps/openssl speed des'
20 !
21 ! 32-bit build:
22 ! 23% faster than cc-5.2 -xarch=v8plus -xO5
23 ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
24 ! 64-bit build:
25 ! 50% faster than cc-5.2 -xarch=v9 -xO5
26 ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
27 !
28
29 .ident "des_enc.m4 2.1"
30 .file "des_enc-sparc.S"
31
32 #include <openssl/opensslconf.h>
33
34 #if defined(__SUNPRO_C) && defined(__sparcv9)
35 # define ABI64 /* They've said -xarch=v9 at command line */
36 #elif defined(__GNUC__) && defined(__arch64__)
37 # define ABI64 /* They've said -m64 at command line */
38 #endif
39
40 #ifdef ABI64
41 .register %g2,#scratch
42 .register %g3,#scratch
43 # define FRAME -192
44 # define BIAS 2047
45 # define LDPTR ldx
46 # define STPTR stx
47 # define ARG0 128
48 # define ARGSZ 8
49 #else
50 # define FRAME -96
51 # define BIAS 0
52 # define LDPTR ld
53 # define STPTR st
54 # define ARG0 68
55 # define ARGSZ 4
56 #endif
57
58 #define LOOPS 7
59
60 #define global0 %g0
61 #define global1 %g1
62 #define global2 %g2
63 #define global3 %g3
64 #define global4 %g4
65 #define global5 %g5
66
67 #define local0 %l0
68 #define local1 %l1
69 #define local2 %l2
70 #define local3 %l3
71 #define local4 %l4
72 #define local5 %l5
73 #define local7 %l6
74 #define local6 %l7
75
76 #define in0 %i0
77 #define in1 %i1
78 #define in2 %i2
79 #define in3 %i3
80 #define in4 %i4
81 #define in5 %i5
82 #define in6 %i6
83 #define in7 %i7
84
85 #define out0 %o0
86 #define out1 %o1
87 #define out2 %o2
88 #define out3 %o3
89 #define out4 %o4
90 #define out5 %o5
91 #define out6 %o6
92 #define out7 %o7
93
94 #define stub stb
95
96 changequote({,})
97
98
99 ! Macro definitions:
100
101
102 ! {ip_macro}
103 !
104 ! The logic used in initial and final permutations is the same as in
105 ! the C code. The permutations are done with a clever shift, xor, and
106 ! technique.
107 !
108 ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
109 ! sbox 6 to local6, and addres sbox 8 to out3.
110 !
111 ! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
112 !
113 ! Loads key first round from address in parameter 5 to out0, out1.
114 !
115 ! After the the original LibDES initial permutation, the resulting left
116 ! is in the variable initially used for right and vice versa. The macro
117 ! implements the possibility to keep the halfs in the original registers.
118 !
119 ! parameter 1 left
120 ! parameter 2 right
121 ! parameter 3 result left (modify in first round)
122 ! parameter 4 result right (use in first round)
123 ! parameter 5 key address
124 ! parameter 6 1/2 for include encryption/decryption
125 ! parameter 7 1 for move in1 to in3
126 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
127 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
128
129 define(ip_macro, {
130
131 ! {ip_macro}
132 ! $1 $2 $4 $3 $5 $6 $7 $8 $9
133
134 ld [out2+256], local1
135 srl $2, 4, local4
136
137 xor local4, $1, local4
138 ifelse($7,1,{mov in1, in3},{nop})
139
140 ld [out2+260], local2
141 and local4, local1, local4
142 ifelse($8,1,{mov in3, in4},{})
143 ifelse($8,2,{mov in4, in3},{})
144
145 ld [out2+280], out4 ! loop counter
146 sll local4, 4, local1
147 xor $1, local4, $1
148
149 ld [out2+264], local3
150 srl $1, 16, local4
151 xor $2, local1, $2
152
153 ifelse($9,1,{LDPTR KS3, in4},{})
154 xor local4, $2, local4
155 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
156
157 ifelse($9,1,{LDPTR KS2, in3},{})
158 and local4, local2, local4
159 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
160
161 sll local4, 16, local1
162 xor $2, local4, $2
163
164 srl $2, 2, local4
165 xor $1, local1, $1
166
167 sethi %hi(16711680), local5
168 xor local4, $1, local4
169
170 and local4, local3, local4
171 or local5, 255, local5
172
173 sll local4, 2, local2
174 xor $1, local4, $1
175
176 srl $1, 8, local4
177 xor $2, local2, $2
178
179 xor local4, $2, local4
180 add global1, 768, global4
181
182 and local4, local5, local4
183 add global1, 1024, global5
184
185 ld [out2+272], local7
186 sll local4, 8, local1
187 xor $2, local4, $2
188
189 srl $2, 1, local4
190 xor $1, local1, $1
191
192 ld [$5], out0 ! key 7531
193 xor local4, $1, local4
194 add global1, 256, global2
195
196 ld [$5+4], out1 ! key 8642
197 and local4, local7, local4
198 add global1, 512, global3
199
200 sll local4, 1, local1
201 xor $1, local4, $1
202
203 sll $1, 3, local3
204 xor $2, local1, $2
205
206 sll $2, 3, local2
207 add global1, 1280, local6 ! address sbox 8
208
209 srl $1, 29, local4
210 add global1, 1792, out3 ! address sbox 8
211
212 srl $2, 29, local1
213 or local4, local3, $4
214
215 or local2, local1, $3
216
217 ifelse($6, 1, {
218
219 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
220 or local2, local1, $3
221 xor $4, out0, local1
222
223 call .des_enc.1
224 and local1, 252, local1
225
226 },{})
227
228 ifelse($6, 2, {
229
230 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
231 or local2, local1, $3
232 xor $4, out0, local1
233
234 call .des_dec.1
235 and local1, 252, local1
236
237 },{})
238 })
239
240
241 ! {rounds_macro}
242 !
243 ! The logic used in the DES rounds is the same as in the C code,
244 ! except that calculations for sbox 1 and sbox 5 begin before
245 ! the previous round is finished.
246 !
247 ! In each round one half (work) is modified based on key and the
248 ! other half (use).
249 !
250 ! In this version we do two rounds in a loop repeated 7 times
251 ! and two rounds separately.
252 !
253 ! One half has the bits for the sboxes in the following positions:
254 !
255 ! 777777xx555555xx333333xx111111xx
256 !
257 ! 88xx666666xx444444xx222222xx8888
258 !
259 ! The bits for each sbox are xor-ed with the key bits for that box.
260 ! The above xx bits are cleared, and the result used for lookup in
261 ! the sbox table. Each sbox entry contains the 4 output bits permuted
262 ! into 32 bits according to the P permutation.
263 !
264 ! In the description of DES, left and right are switched after
265 ! each round, except after last round. In this code the original
266 ! left and right are kept in the same register in all rounds, meaning
267 ! that after the 16 rounds the result for right is in the register
268 ! originally used for left.
269 !
270 ! parameter 1 first work (left in first round)
271 ! parameter 2 first use (right in first round)
272 ! parameter 3 enc/dec 1/-1
273 ! parameter 4 loop label
274 ! parameter 5 key address register
275 ! parameter 6 optional address for key next encryption/decryption
276 ! parameter 7 not empty for include retl
277 !
278 ! also compares in2 to 8
279
280 define(rounds_macro, {
281
282 ! {rounds_macro}
283 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
284
285 xor $2, out0, local1
286
287 ld [out2+284], local5 ! 0x0000FC00
288 ba $4
289 and local1, 252, local1
290
291 .align 32
292
293 $4:
294 ! local6 is address sbox 6
295 ! out3 is address sbox 8
296 ! out4 is loop counter
297
298 ld [global1+local1], local1
299 xor $2, out1, out1 ! 8642
300 xor $2, out0, out0 ! 7531
301 ! fmovs %f0, %f0 ! fxor used for alignment
302
303 srl out1, 4, local0 ! rotate 4 right
304 and out0, local5, local3 ! 3
305 ! fmovs %f0, %f0
306
307 ld [$5+$3*8], local7 ! key 7531 next round
308 srl local3, 8, local3 ! 3
309 and local0, 252, local2 ! 2
310 ! fmovs %f0, %f0
311
312 ld [global3+local3],local3 ! 3
313 sll out1, 28, out1 ! rotate
314 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
315
316 ld [global2+local2], local2 ! 2
317 srl out0, 24, local1 ! 7
318 or out1, local0, out1 ! rotate
319
320 ldub [out2+local1], local1 ! 7 (and 0xFC)
321 srl out1, 24, local0 ! 8
322 and out1, local5, local4 ! 4
323
324 ldub [out2+local0], local0 ! 8 (and 0xFC)
325 srl local4, 8, local4 ! 4
326 xor $1, local2, $1 ! 2 finished local2 now sbox 6
327
328 ld [global4+local4],local4 ! 4
329 srl out1, 16, local2 ! 6
330 xor $1, local3, $1 ! 3 finished local3 now sbox 5
331
332 ld [out3+local0],local0 ! 8
333 and local2, 252, local2 ! 6
334 add global1, 1536, local5 ! address sbox 7
335
336 ld [local6+local2], local2 ! 6
337 srl out0, 16, local3 ! 5
338 xor $1, local4, $1 ! 4 finished
339
340 ld [local5+local1],local1 ! 7
341 and local3, 252, local3 ! 5
342 xor $1, local0, $1 ! 8 finished
343
344 ld [global5+local3],local3 ! 5
345 xor $1, local2, $1 ! 6 finished
346 subcc out4, 1, out4
347
348 ld [$5+$3*8+4], out0 ! key 8642 next round
349 xor $1, local7, local2 ! sbox 5 next round
350 xor $1, local1, $1 ! 7 finished
351
352 srl local2, 16, local2 ! sbox 5 next round
353 xor $1, local3, $1 ! 5 finished
354
355 ld [$5+$3*16+4], out1 ! key 8642 next round again
356 and local2, 252, local2 ! sbox5 next round
357 ! next round
358 xor $1, local7, local7 ! 7531
359
360 ld [global5+local2], local2 ! 5
361 srl local7, 24, local3 ! 7
362 xor $1, out0, out0 ! 8642
363
364 ldub [out2+local3], local3 ! 7 (and 0xFC)
365 srl out0, 4, local0 ! rotate 4 right
366 and local7, 252, local1 ! 1
367
368 sll out0, 28, out0 ! rotate
369 xor $2, local2, $2 ! 5 finished local2 used
370
371 srl local0, 8, local4 ! 4
372 and local0, 252, local2 ! 2
373 ld [local5+local3], local3 ! 7
374
375 srl local0, 16, local5 ! 6
376 or out0, local0, out0 ! rotate
377 ld [global2+local2], local2 ! 2
378
379 srl out0, 24, local0
380 ld [$5+$3*16], out0 ! key 7531 next round
381 and local4, 252, local4 ! 4
382
383 and local5, 252, local5 ! 6
384 ld [global4+local4], local4 ! 4
385 xor $2, local3, $2 ! 7 finished local3 used
386
387 and local0, 252, local0 ! 8
388 ld [local6+local5], local5 ! 6
389 xor $2, local2, $2 ! 2 finished local2 now sbox 3
390
391 srl local7, 8, local2 ! 3 start
392 ld [out3+local0], local0 ! 8
393 xor $2, local4, $2 ! 4 finished
394
395 and local2, 252, local2 ! 3
396 ld [global1+local1], local1 ! 1
397 xor $2, local5, $2 ! 6 finished local5 used
398
399 ld [global3+local2], local2 ! 3
400 xor $2, local0, $2 ! 8 finished
401 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
402
403 ld [out2+284], local5 ! 0x0000FC00
404 xor $2, out0, local4 ! sbox 1 next round
405 xor $2, local1, $2 ! 1 finished
406
407 xor $2, local2, $2 ! 3 finished
408 bne $4
409 and local4, 252, local1 ! sbox 1 next round
410
411 ! two rounds more:
412
413 ld [global1+local1], local1
414 xor $2, out1, out1
415 xor $2, out0, out0
416
417 srl out1, 4, local0 ! rotate
418 and out0, local5, local3
419
420 ld [$5+$3*8], local7 ! key 7531
421 srl local3, 8, local3
422 and local0, 252, local2
423
424 ld [global3+local3],local3
425 sll out1, 28, out1 ! rotate
426 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
427
428 ld [global2+local2], local2
429 srl out0, 24, local1
430 or out1, local0, out1 ! rotate
431
432 ldub [out2+local1], local1
433 srl out1, 24, local0
434 and out1, local5, local4
435
436 ldub [out2+local0], local0
437 srl local4, 8, local4
438 xor $1, local2, $1 ! 2 finished local2 now sbox 6
439
440 ld [global4+local4],local4
441 srl out1, 16, local2
442 xor $1, local3, $1 ! 3 finished local3 now sbox 5
443
444 ld [out3+local0],local0
445 and local2, 252, local2
446 add global1, 1536, local5 ! address sbox 7
447
448 ld [local6+local2], local2
449 srl out0, 16, local3
450 xor $1, local4, $1 ! 4 finished
451
452 ld [local5+local1],local1
453 and local3, 252, local3
454 xor $1, local0, $1
455
456 ld [global5+local3],local3
457 xor $1, local2, $1 ! 6 finished
458 cmp in2, 8
459
460 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
461 xor $1, local7, local2 ! sbox 5 next round
462 xor $1, local1, $1 ! 7 finished
463
464 ld [$5+$3*8+4], out0
465 srl local2, 16, local2 ! sbox 5 next round
466 xor $1, local3, $1 ! 5 finished
467
468 and local2, 252, local2
469 ! next round (two rounds more)
470 xor $1, local7, local7 ! 7531
471
472 ld [global5+local2], local2
473 srl local7, 24, local3
474 xor $1, out0, out0 ! 8642
475
476 ldub [out2+local3], local3
477 srl out0, 4, local0 ! rotate
478 and local7, 252, local1
479
480 sll out0, 28, out0 ! rotate
481 xor $2, local2, $2 ! 5 finished local2 used
482
483 srl local0, 8, local4
484 and local0, 252, local2
485 ld [local5+local3], local3
486
487 srl local0, 16, local5
488 or out0, local0, out0 ! rotate
489 ld [global2+local2], local2
490
491 srl out0, 24, local0
492 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
493 and local4, 252, local4
494
495 and local5, 252, local5
496 ld [global4+local4], local4
497 xor $2, local3, $2 ! 7 finished local3 used
498
499 and local0, 252, local0
500 ld [local6+local5], local5
501 xor $2, local2, $2 ! 2 finished local2 now sbox 3
502
503 srl local7, 8, local2 ! 3 start
504 ld [out3+local0], local0
505 xor $2, local4, $2
506
507 and local2, 252, local2
508 ld [global1+local1], local1
509 xor $2, local5, $2 ! 6 finished local5 used
510
511 ld [global3+local2], local2
512 srl $1, 3, local3
513 xor $2, local0, $2
514
515 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
516 sll $1, 29, local4
517 xor $2, local1, $2
518
519 ifelse($7,{}, {}, {retl})
520 xor $2, local2, $2
521 })
522
523
524 ! {fp_macro}
525 !
526 ! parameter 1 right (original left)
527 ! parameter 2 left (original right)
528 ! parameter 3 1 for optional store to [in0]
529 ! parameter 4 1 for load input/output address to local5/7
530 !
531 ! The final permutation logic switches the halves, meaning that
532 ! left and right ends up the the registers originally used.
533
534 define(fp_macro, {
535
536 ! {fp_macro}
537 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
538
539 ! initially undo the rotate 3 left done after initial permutation
540 ! original left is received shifted 3 right and 29 left in local3/4
541
542 sll $2, 29, local1
543 or local3, local4, $1
544
545 srl $2, 3, $2
546 sethi %hi(0x55555555), local2
547
548 or $2, local1, $2
549 or local2, %lo(0x55555555), local2
550
551 srl $2, 1, local3
552 sethi %hi(0x00ff00ff), local1
553 xor local3, $1, local3
554 or local1, %lo(0x00ff00ff), local1
555 and local3, local2, local3
556 sethi %hi(0x33333333), local4
557 sll local3, 1, local2
558
559 xor $1, local3, $1
560
561 srl $1, 8, local3
562 xor $2, local2, $2
563 xor local3, $2, local3
564 or local4, %lo(0x33333333), local4
565 and local3, local1, local3
566 sethi %hi(0x0000ffff), local1
567 sll local3, 8, local2
568
569 xor $2, local3, $2
570
571 srl $2, 2, local3
572 xor $1, local2, $1
573 xor local3, $1, local3
574 or local1, %lo(0x0000ffff), local1
575 and local3, local4, local3
576 sethi %hi(0x0f0f0f0f), local4
577 sll local3, 2, local2
578
579 ifelse($4,1, {LDPTR INPUT, local5})
580 xor $1, local3, $1
581
582 ifelse($4,1, {LDPTR OUTPUT, local7})
583 srl $1, 16, local3
584 xor $2, local2, $2
585 xor local3, $2, local3
586 or local4, %lo(0x0f0f0f0f), local4
587 and local3, local1, local3
588 sll local3, 16, local2
589
590 xor $2, local3, local1
591
592 srl local1, 4, local3
593 xor $1, local2, $1
594 xor local3, $1, local3
595 and local3, local4, local3
596 sll local3, 4, local2
597
598 xor $1, local3, $1
599
600 ! optional store:
601
602 ifelse($3,1, {st $1, [in0]})
603
604 xor local1, local2, $2
605
606 ifelse($3,1, {st $2, [in0+4]})
607
608 })
609
610
611 ! {fp_ip_macro}
612 !
613 ! Does initial permutation for next block mixed with
614 ! final permutation for current block.
615 !
616 ! parameter 1 original left
617 ! parameter 2 original right
618 ! parameter 3 left ip
619 ! parameter 4 right ip
620 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
621 ! 2: mov in4 to in3
622 !
623 ! also adds -8 to length in2 and loads loop counter to out4
624
625 define(fp_ip_macro, {
626
627 ! {fp_ip_macro}
628 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
629
630 define({temp1},{out4})
631 define({temp2},{local3})
632
633 define({ip1},{local1})
634 define({ip2},{local2})
635 define({ip4},{local4})
636 define({ip5},{local5})
637
638 ! $1 in local3, local4
639
640 ld [out2+256], ip1
641 sll out5, 29, temp1
642 or local3, local4, $1
643
644 srl out5, 3, $2
645 ifelse($5,2,{mov in4, in3})
646
647 ld [out2+272], ip5
648 srl $4, 4, local0
649 or $2, temp1, $2
650
651 srl $2, 1, temp1
652 xor temp1, $1, temp1
653
654 and temp1, ip5, temp1
655 xor local0, $3, local0
656
657 sll temp1, 1, temp2
658 xor $1, temp1, $1
659
660 and local0, ip1, local0
661 add in2, -8, in2
662
663 sll local0, 4, local7
664 xor $3, local0, $3
665
666 ld [out2+268], ip4
667 srl $1, 8, temp1
668 xor $2, temp2, $2
669 ld [out2+260], ip2
670 srl $3, 16, local0
671 xor $4, local7, $4
672 xor temp1, $2, temp1
673 xor local0, $4, local0
674 and temp1, ip4, temp1
675 and local0, ip2, local0
676 sll temp1, 8, temp2
677 xor $2, temp1, $2
678 sll local0, 16, local7
679 xor $4, local0, $4
680
681 srl $2, 2, temp1
682 xor $1, temp2, $1
683
684 ld [out2+264], temp2 ! ip3
685 srl $4, 2, local0
686 xor $3, local7, $3
687 xor temp1, $1, temp1
688 xor local0, $3, local0
689 and temp1, temp2, temp1
690 and local0, temp2, local0
691 sll temp1, 2, temp2
692 xor $1, temp1, $1
693 sll local0, 2, local7
694 xor $3, local0, $3
695
696 srl $1, 16, temp1
697 xor $2, temp2, $2
698 srl $3, 8, local0
699 xor $4, local7, $4
700 xor temp1, $2, temp1
701 xor local0, $4, local0
702 and temp1, ip2, temp1
703 and local0, ip4, local0
704 sll temp1, 16, temp2
705 xor $2, temp1, local4
706 sll local0, 8, local7
707 xor $4, local0, $4
708
709 srl $4, 1, local0
710 xor $3, local7, $3
711
712 srl local4, 4, temp1
713 xor local0, $3, local0
714
715 xor $1, temp2, $1
716 and local0, ip5, local0
717
718 sll local0, 1, local7
719 xor temp1, $1, temp1
720
721 xor $3, local0, $3
722 xor $4, local7, $4
723
724 sll $3, 3, local5
725 and temp1, ip1, temp1
726
727 sll temp1, 4, temp2
728 xor $1, temp1, $1
729
730 ifelse($5,1,{LDPTR KS2, in4})
731 sll $4, 3, local2
732 xor local4, temp2, $2
733
734 ! reload since used as temporary:
735
736 ld [out2+280], out4 ! loop counter
737
738 srl $3, 29, local0
739 ifelse($5,1,{add in4, 120, in4})
740
741 ifelse($5,1,{LDPTR KS1, in3})
742 srl $4, 29, local7
743
744 or local0, local5, $4
745 or local2, local7, $3
746
747 })
748
749
750
751 ! {load_little_endian}
752 !
753 ! parameter 1 address
754 ! parameter 2 destination left
755 ! parameter 3 destination right
756 ! parameter 4 temporary
757 ! parameter 5 label
758
759 define(load_little_endian, {
760
761 ! {load_little_endian}
762 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
763
764 ! first in memory to rightmost in register
765
766 $5:
767 ldub [$1+3], $2
768
769 ldub [$1+2], $4
770 sll $2, 8, $2
771 or $2, $4, $2
772
773 ldub [$1+1], $4
774 sll $2, 8, $2
775 or $2, $4, $2
776
777 ldub [$1+0], $4
778 sll $2, 8, $2
779 or $2, $4, $2
780
781
782 ldub [$1+3+4], $3
783
784 ldub [$1+2+4], $4
785 sll $3, 8, $3
786 or $3, $4, $3
787
788 ldub [$1+1+4], $4
789 sll $3, 8, $3
790 or $3, $4, $3
791
792 ldub [$1+0+4], $4
793 sll $3, 8, $3
794 or $3, $4, $3
795 $5a:
796
797 })
798
799
800 ! {load_little_endian_inc}
801 !
802 ! parameter 1 address
803 ! parameter 2 destination left
804 ! parameter 3 destination right
805 ! parameter 4 temporary
806 ! parameter 4 label
807 !
808 ! adds 8 to address
809
810 define(load_little_endian_inc, {
811
812 ! {load_little_endian_inc}
813 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
814
815 ! first in memory to rightmost in register
816
817 $5:
818 ldub [$1+3], $2
819
820 ldub [$1+2], $4
821 sll $2, 8, $2
822 or $2, $4, $2
823
824 ldub [$1+1], $4
825 sll $2, 8, $2
826 or $2, $4, $2
827
828 ldub [$1+0], $4
829 sll $2, 8, $2
830 or $2, $4, $2
831
832 ldub [$1+3+4], $3
833 add $1, 8, $1
834
835 ldub [$1+2+4-8], $4
836 sll $3, 8, $3
837 or $3, $4, $3
838
839 ldub [$1+1+4-8], $4
840 sll $3, 8, $3
841 or $3, $4, $3
842
843 ldub [$1+0+4-8], $4
844 sll $3, 8, $3
845 or $3, $4, $3
846 $5a:
847
848 })
849
850
851 ! {load_n_bytes}
852 !
853 ! Loads 1 to 7 bytes little endian
854 ! Remaining bytes are zeroed.
855 !
856 ! parameter 1 address
857 ! parameter 2 length
858 ! parameter 3 destination register left
859 ! parameter 4 destination register right
860 ! parameter 5 temp
861 ! parameter 6 temp2
862 ! parameter 7 label
863 ! parameter 8 return label
864
865 define(load_n_bytes, {
866
867 ! {load_n_bytes}
868 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
869
870 $7.0: call .+8
871 sll $2, 2, $6
872
873 add %o7,$7.jmp.table-$7.0,$5
874
875 add $5, $6, $5
876 mov 0, $4
877
878 ld [$5], $5
879
880 jmp %o7+$5
881 mov 0, $3
882
883 $7.7:
884 ldub [$1+6], $5
885 sll $5, 16, $5
886 or $3, $5, $3
887 $7.6:
888 ldub [$1+5], $5
889 sll $5, 8, $5
890 or $3, $5, $3
891 $7.5:
892 ldub [$1+4], $5
893 or $3, $5, $3
894 $7.4:
895 ldub [$1+3], $5
896 sll $5, 24, $5
897 or $4, $5, $4
898 $7.3:
899 ldub [$1+2], $5
900 sll $5, 16, $5
901 or $4, $5, $4
902 $7.2:
903 ldub [$1+1], $5
904 sll $5, 8, $5
905 or $4, $5, $4
906 $7.1:
907 ldub [$1+0], $5
908 ba $8
909 or $4, $5, $4
910
911 .align 4
912
913 $7.jmp.table:
914 .word 0
915 .word $7.1-$7.0
916 .word $7.2-$7.0
917 .word $7.3-$7.0
918 .word $7.4-$7.0
919 .word $7.5-$7.0
920 .word $7.6-$7.0
921 .word $7.7-$7.0
922 })
923
924
925 ! {store_little_endian}
926 !
927 ! parameter 1 address
928 ! parameter 2 source left
929 ! parameter 3 source right
930 ! parameter 4 temporary
931
932 define(store_little_endian, {
933
934 ! {store_little_endian}
935 ! $1 $2 $3 $4 $5 $6 $7 $8 $9
936
937 ! rightmost in register to first in memory
938
939 $5:
940 and $2, 255, $4
941 stub $4, [$1+0]
942
943 srl $2, 8, $4
944 and $4, 255, $4
945 stub $4, [$1+1]
946
947 srl $2, 16, $4
948 and $4, 255, $4
949 stub $4, [$1+2]
950
951 srl $2, 24, $4
952 stub $4, [$1+3]
953
954
955 and $3, 255, $4
956 stub $4, [$1+0+4]
957
958 srl $3, 8, $4
959 and $4, 255, $4
960 stub $4, [$1+1+4]
961
962 srl $3, 16, $4
963 and $4, 255, $4
964 stub $4, [$1+2+4]
965
966 srl $3, 24, $4
967 stub $4, [$1+3+4]
968
969 $5a:
970
971 })
972
973
974 ! {store_n_bytes}
975 !
976 ! Stores 1 to 7 bytes little endian
977 !
978 ! parameter 1 address
979 ! parameter 2 length
980 ! parameter 3 source register left
981 ! parameter 4 source register right
982 ! parameter 5 temp
983 ! parameter 6 temp2
984 ! parameter 7 label
985 ! parameter 8 return label
986
987 define(store_n_bytes, {
988
989 ! {store_n_bytes}
990 ! $1 $2 $5 $6 $7 $8 $7 $8 $9
991
992 $7.0: call .+8
993 sll $2, 2, $6
994
995 add %o7,$7.jmp.table-$7.0,$5
996
997 add $5, $6, $5
998
999 ld [$5], $5
1000
1001 jmp %o7+$5
1002 nop
1003
1004 $7.7:
1005 srl $3, 16, $5
1006 and $5, 0xff, $5
1007 stub $5, [$1+6]
1008 $7.6:
1009 srl $3, 8, $5
1010 and $5, 0xff, $5
1011 stub $5, [$1+5]
1012 $7.5:
1013 and $3, 0xff, $5
1014 stub $5, [$1+4]
1015 $7.4:
1016 srl $4, 24, $5
1017 stub $5, [$1+3]
1018 $7.3:
1019 srl $4, 16, $5
1020 and $5, 0xff, $5
1021 stub $5, [$1+2]
1022 $7.2:
1023 srl $4, 8, $5
1024 and $5, 0xff, $5
1025 stub $5, [$1+1]
1026 $7.1:
1027 and $4, 0xff, $5
1028
1029
1030 ba $8
1031 stub $5, [$1]
1032
1033 .align 4
1034
1035 $7.jmp.table:
1036
1037 .word 0
1038 .word $7.1-$7.0
1039 .word $7.2-$7.0
1040 .word $7.3-$7.0
1041 .word $7.4-$7.0
1042 .word $7.5-$7.0
1043 .word $7.6-$7.0
1044 .word $7.7-$7.0
1045 })
1046
1047
1048 define(testvalue,{1})
1049
1050 define(register_init, {
1051
1052 ! For test purposes:
1053
1054 sethi %hi(testvalue), local0
1055 or local0, %lo(testvalue), local0
1056
1057 ifelse($1,{},{}, {mov local0, $1})
1058 ifelse($2,{},{}, {mov local0, $2})
1059 ifelse($3,{},{}, {mov local0, $3})
1060 ifelse($4,{},{}, {mov local0, $4})
1061 ifelse($5,{},{}, {mov local0, $5})
1062 ifelse($6,{},{}, {mov local0, $6})
1063 ifelse($7,{},{}, {mov local0, $7})
1064 ifelse($8,{},{}, {mov local0, $8})
1065
1066 mov local0, local1
1067 mov local0, local2
1068 mov local0, local3
1069 mov local0, local4
1070 mov local0, local5
1071 mov local0, local7
1072 mov local0, local6
1073 mov local0, out0
1074 mov local0, out1
1075 mov local0, out2
1076 mov local0, out3
1077 mov local0, out4
1078 mov local0, out5
1079 mov local0, global1
1080 mov local0, global2
1081 mov local0, global3
1082 mov local0, global4
1083 mov local0, global5
1084
1085 })
1086
1087 .section ".text"
1088
1089 .align 32
1090
1091 .des_enc:
1092
1093 ! key address in3
1094 ! loads key next encryption/decryption first round from [in4]
1095
1096 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1097
1098
1099 .align 32
1100
1101 .des_dec:
1102
1103 ! implemented with out5 as first parameter to avoid
1104 ! register exchange in ede modes
1105
1106 ! key address in4
1107 ! loads key next encryption/decryption first round from [in3]
1108
1109 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1110
1111
1112
1113 ! void DES_encrypt1(data, ks, enc)
1114 ! *******************************
1115
1116 .align 32
1117 .global DES_encrypt1
1118 .type DES_encrypt1,#function
1119
1120 DES_encrypt1:
1121
1122 save %sp, FRAME, %sp
1123
1124 sethi %hi(.PIC.DES_SPtrans-1f),global1
1125 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1126 1: call .+8
1127 add %o7,global1,global1
1128 sub global1,.PIC.DES_SPtrans-.des_and,out2
1129
1130 ld [in0], in5 ! left
1131 cmp in2, 0 ! enc
1132
1133 be .encrypt.dec
1134 ld [in0+4], out5 ! right
1135
1136 ! parameter 6 1/2 for include encryption/decryption
1137 ! parameter 7 1 for move in1 to in3
1138 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1139
1140 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1141
1142 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
1143
1144 fp_macro(in5, out5, 1) ! 1 for store to [in0]
1145
1146 ret
1147 restore
1148
1149 .encrypt.dec:
1150
1151 add in1, 120, in3 ! use last subkey for first round
1152
1153 ! parameter 6 1/2 for include encryption/decryption
1154 ! parameter 7 1 for move in1 to in3
1155 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1156
1157 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
1158
1159 fp_macro(out5, in5, 1) ! 1 for store to [in0]
1160
1161 ret
1162 restore
1163
1164 .DES_encrypt1.end:
1165 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
1166
1167
1168 ! void DES_encrypt2(data, ks, enc)
1169 !*********************************
1170
1171 ! encrypts/decrypts without initial/final permutation
1172
1173 .align 32
1174 .global DES_encrypt2
1175 .type DES_encrypt2,#function
1176
1177 DES_encrypt2:
1178
1179 save %sp, FRAME, %sp
1180
1181 sethi %hi(.PIC.DES_SPtrans-1f),global1
1182 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1183 1: call .+8
1184 add %o7,global1,global1
1185 sub global1,.PIC.DES_SPtrans-.des_and,out2
1186
1187 ! Set sbox address 1 to 6 and rotate halfs 3 left
1188 ! Errors caught by destest? Yes. Still? *NO*
1189
1190 !sethi %hi(DES_SPtrans), global1 ! address sbox 1
1191
1192 !or global1, %lo(DES_SPtrans), global1 ! sbox 1
1193
1194 add global1, 256, global2 ! sbox 2
1195 add global1, 512, global3 ! sbox 3
1196
1197 ld [in0], out5 ! right
1198 add global1, 768, global4 ! sbox 4
1199 add global1, 1024, global5 ! sbox 5
1200
1201 ld [in0+4], in5 ! left
1202 add global1, 1280, local6 ! sbox 6
1203 add global1, 1792, out3 ! sbox 8
1204
1205 ! rotate
1206
1207 sll in5, 3, local5
1208 mov in1, in3 ! key address to in3
1209
1210 sll out5, 3, local7
1211 srl in5, 29, in5
1212
1213 srl out5, 29, out5
1214 add in5, local5, in5
1215
1216 add out5, local7, out5
1217 cmp in2, 0
1218
1219 ! we use our own stackframe
1220
1221 be .encrypt2.dec
1222 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
1223
1224 ld [in3], out0 ! key 7531 first round
1225 mov LOOPS, out4 ! loop counter
1226
1227 ld [in3+4], out1 ! key 8642 first round
1228 sethi %hi(0x0000FC00), local5
1229
1230 call .des_enc
1231 mov in3, in4
1232
1233 ! rotate
1234 sll in5, 29, in0
1235 srl in5, 3, in5
1236 sll out5, 29, in1
1237 add in5, in0, in5
1238 srl out5, 3, out5
1239 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1240 add out5, in1, out5
1241 st in5, [in0]
1242 st out5, [in0+4]
1243
1244 ret
1245 restore
1246
1247
1248 .encrypt2.dec:
1249
1250 add in3, 120, in4
1251
1252 ld [in4], out0 ! key 7531 first round
1253 mov LOOPS, out4 ! loop counter
1254
1255 ld [in4+4], out1 ! key 8642 first round
1256 sethi %hi(0x0000FC00), local5
1257
1258 mov in5, local1 ! left expected in out5
1259 mov out5, in5
1260
1261 call .des_dec
1262 mov local1, out5
1263
1264 .encrypt2.finish:
1265
1266 ! rotate
1267 sll in5, 29, in0
1268 srl in5, 3, in5
1269 sll out5, 29, in1
1270 add in5, in0, in5
1271 srl out5, 3, out5
1272 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
1273 add out5, in1, out5
1274 st out5, [in0]
1275 st in5, [in0+4]
1276
1277 ret
1278 restore
1279
1280 .DES_encrypt2.end:
1281 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
1282
1283
1284 ! void DES_encrypt3(data, ks1, ks2, ks3)
1285 ! **************************************
1286
1287 .align 32
1288 .global DES_encrypt3
1289 .type DES_encrypt3,#function
1290
1291 DES_encrypt3:
1292
1293 save %sp, FRAME, %sp
1294
1295 sethi %hi(.PIC.DES_SPtrans-1f),global1
1296 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1297 1: call .+8
1298 add %o7,global1,global1
1299 sub global1,.PIC.DES_SPtrans-.des_and,out2
1300
1301 ld [in0], in5 ! left
1302 add in2, 120, in4 ! ks2
1303
1304 ld [in0+4], out5 ! right
1305 mov in3, in2 ! save ks3
1306
1307 ! parameter 6 1/2 for include encryption/decryption
1308 ! parameter 7 1 for mov in1 to in3
1309 ! parameter 8 1 for mov in3 to in4
1310 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1311
1312 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1313
1314 call .des_dec
1315 mov in2, in3 ! preload ks3
1316
1317 call .des_enc
1318 nop
1319
1320 fp_macro(in5, out5, 1)
1321
1322 ret
1323 restore
1324
1325 .DES_encrypt3.end:
1326 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
1327
1328
1329 ! void DES_decrypt3(data, ks1, ks2, ks3)
1330 ! **************************************
1331
1332 .align 32
1333 .global DES_decrypt3
1334 .type DES_decrypt3,#function
1335
1336 DES_decrypt3:
1337
1338 save %sp, FRAME, %sp
1339
1340 sethi %hi(.PIC.DES_SPtrans-1f),global1
1341 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1342 1: call .+8
1343 add %o7,global1,global1
1344 sub global1,.PIC.DES_SPtrans-.des_and,out2
1345
1346 ld [in0], in5 ! left
1347 add in3, 120, in4 ! ks3
1348
1349 ld [in0+4], out5 ! right
1350 mov in2, in3 ! ks2
1351
1352 ! parameter 6 1/2 for include encryption/decryption
1353 ! parameter 7 1 for mov in1 to in3
1354 ! parameter 8 1 for mov in3 to in4
1355 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1356
1357 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1358
1359 call .des_enc
1360 add in1, 120, in4 ! preload ks1
1361
1362 call .des_dec
1363 nop
1364
1365 fp_macro(out5, in5, 1)
1366
1367 ret
1368 restore
1369
1370 .DES_decrypt3.end:
1371 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1372
1373 ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
1374 ! *****************************************************************
1375
1376
1377 .align 32
1378 .global DES_ncbc_encrypt
1379 .type DES_ncbc_encrypt,#function
1380
1381 DES_ncbc_encrypt:
1382
1383 save %sp, FRAME, %sp
1384
1385 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
1386 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1387 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1388
1389 sethi %hi(.PIC.DES_SPtrans-1f),global1
1390 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1391 1: call .+8
1392 add %o7,global1,global1
1393 sub global1,.PIC.DES_SPtrans-.des_and,out2
1394
1395 cmp in5, 0 ! enc
1396
1397 be .ncbc.dec
1398 STPTR in4, IVEC
1399
1400 ! addr left right temp label
1401 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
1402
1403 addcc in2, -8, in2 ! bytes missing when first block done
1404
1405 bl .ncbc.enc.seven.or.less
1406 mov in3, in4 ! schedule
1407
1408 .ncbc.enc.next.block:
1409
1410 load_little_endian(in0, out4, global4, local3, .LLE2) ! block
1411
1412 .ncbc.enc.next.block_1:
1413
1414 xor in5, out4, in5 ! iv xor
1415 xor out5, global4, out5 ! iv xor
1416
1417 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1418 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1419
1420 .ncbc.enc.next.block_2:
1421
1422 !// call .des_enc ! compares in2 to 8
1423 ! rounds inlined for alignment purposes
1424
1425 add global1, 768, global4 ! address sbox 4 since register used below
1426
1427 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
1428
1429 bl .ncbc.enc.next.block_fp
1430 add in0, 8, in0 ! input address
1431
1432 ! If 8 or more bytes are to be encrypted after this block,
1433 ! we combine final permutation for this block with initial
1434 ! permutation for next block. Load next block:
1435
1436 load_little_endian(in0, global3, global4, local5, .LLE12)
1437
1438 ! parameter 1 original left
1439 ! parameter 2 original right
1440 ! parameter 3 left ip
1441 ! parameter 4 right ip
1442 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1443 ! 2: mov in4 to in3
1444 !
1445 ! also adds -8 to length in2 and loads loop counter to out4
1446
1447 fp_ip_macro(out0, out1, global3, global4, 2)
1448
1449 store_little_endian(in1, out0, out1, local3, .SLE10) ! block
1450
1451 ld [in3], out0 ! key 7531 first round next block
1452 mov in5, local1
1453 xor global3, out5, in5 ! iv xor next block
1454
1455 ld [in3+4], out1 ! key 8642
1456 add global1, 512, global3 ! address sbox 3 since register used
1457 xor global4, local1, out5 ! iv xor next block
1458
1459 ba .ncbc.enc.next.block_2
1460 add in1, 8, in1 ! output address
1461
1462 .ncbc.enc.next.block_fp:
1463
1464 fp_macro(in5, out5)
1465
1466 store_little_endian(in1, in5, out5, local3, .SLE1) ! block
1467
1468 addcc in2, -8, in2 ! bytes missing when next block done
1469
1470 bpos .ncbc.enc.next.block
1471 add in1, 8, in1
1472
1473 .ncbc.enc.seven.or.less:
1474
1475 cmp in2, -8
1476
1477 ble .ncbc.enc.finish
1478 nop
1479
1480 add in2, 8, local1 ! bytes to load
1481
1482 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1483 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1484
1485 ! Loads 1 to 7 bytes little endian to global4, out4
1486
1487
1488 .ncbc.enc.finish:
1489
1490 LDPTR IVEC, local4
1491 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
1492
1493 ret
1494 restore
1495
1496
1497 .ncbc.dec:
1498
1499 STPTR in0, INPUT
1500 cmp in2, 0 ! length
1501 add in3, 120, in3
1502
1503 LDPTR IVEC, local7 ! ivec
1504 ble .ncbc.dec.finish
1505 mov in3, in4 ! schedule
1506
1507 STPTR in1, OUTPUT
1508 mov in0, local5 ! input
1509
1510 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
1511
1512 .ncbc.dec.next.block:
1513
1514 load_little_endian(local5, in5, out5, local3, .LLE4) ! block
1515
1516 ! parameter 6 1/2 for include encryption/decryption
1517 ! parameter 7 1 for mov in1 to in3
1518 ! parameter 8 1 for mov in3 to in4
1519
1520 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption ks in4
1521
1522 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1523
1524 ! in2 is bytes left to be stored
1525 ! in2 is compared to 8 in the rounds
1526
1527 xor out5, in0, out4 ! iv xor
1528 bl .ncbc.dec.seven.or.less
1529 xor in5, in1, global4 ! iv xor
1530
1531 ! Load ivec next block now, since input and output address might be the same.
1532
1533 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
1534
1535 store_little_endian(local7, out4, global4, local3, .SLE3)
1536
1537 STPTR local5, INPUT
1538 add local7, 8, local7
1539 addcc in2, -8, in2
1540
1541 bg .ncbc.dec.next.block
1542 STPTR local7, OUTPUT
1543
1544
1545 .ncbc.dec.store.iv:
1546
1547 LDPTR IVEC, local4 ! ivec
1548 store_little_endian(local4, in0, in1, local5, .SLE4)
1549
1550 .ncbc.dec.finish:
1551
1552 ret
1553 restore
1554
1555 .ncbc.dec.seven.or.less:
1556
1557 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
1558
1559 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1560
1561
1562 .DES_ncbc_encrypt.end:
1563 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
1564
1565
1566 ! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc)
1567 ! **************************************************************************
1568
1569
1570 .align 32
1571 .global DES_ede3_cbc_encrypt
1572 .type DES_ede3_cbc_encrypt,#function
1573
1574 DES_ede3_cbc_encrypt:
1575
1576 save %sp, FRAME, %sp
1577
1578 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1579 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1580 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
1581
1582 sethi %hi(.PIC.DES_SPtrans-1f),global1
1583 or global1,%lo(.PIC.DES_SPtrans-1f),global1
1584 1: call .+8
1585 add %o7,global1,global1
1586 sub global1,.PIC.DES_SPtrans-.des_and,out2
1587
1588 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
1589 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1590 cmp local3, 0 ! enc
1591
1592 be .ede3.dec
1593 STPTR in4, KS2
1594
1595 STPTR in5, KS3
1596
1597 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
1598
1599 addcc in2, -8, in2 ! bytes missing after next block
1600
1601 bl .ede3.enc.seven.or.less
1602 STPTR in3, KS1
1603
1604 .ede3.enc.next.block:
1605
1606 load_little_endian(in0, out4, global4, local3, .LLE7)
1607
1608 .ede3.enc.next.block_1:
1609
1610 LDPTR KS2, in4
1611 xor in5, out4, in5 ! iv xor
1612 xor out5, global4, out5 ! iv xor
1613
1614 LDPTR KS1, in3
1615 add in4, 120, in4 ! for decryption we use last subkey first
1616 nop
1617
1618 ip_macro(in5, out5, in5, out5, in3)
1619
1620 .ede3.enc.next.block_2:
1621
1622 call .des_enc ! ks1 in3
1623 nop
1624
1625 call .des_dec ! ks2 in4
1626 LDPTR KS3, in3
1627
1628 call .des_enc ! ks3 in3 compares in2 to 8
1629 nop
1630
1631 bl .ede3.enc.next.block_fp
1632 add in0, 8, in0
1633
1634 ! If 8 or more bytes are to be encrypted after this block,
1635 ! we combine final permutation for this block with initial
1636 ! permutation for next block. Load next block:
1637
1638 load_little_endian(in0, global3, global4, local5, .LLE11)
1639
1640 ! parameter 1 original left
1641 ! parameter 2 original right
1642 ! parameter 3 left ip
1643 ! parameter 4 right ip
1644 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1645 ! 2: mov in4 to in3
1646 !
1647 ! also adds -8 to length in2 and loads loop counter to out4
1648
1649 fp_ip_macro(out0, out1, global3, global4, 1)
1650
1651 store_little_endian(in1, out0, out1, local3, .SLE9) ! block
1652
1653 mov in5, local1
1654 xor global3, out5, in5 ! iv xor next block
1655
1656 ld [in3], out0 ! key 7531
1657 add global1, 512, global3 ! address sbox 3
1658 xor global4, local1, out5 ! iv xor next block
1659
1660 ld [in3+4], out1 ! key 8642
1661 add global1, 768, global4 ! address sbox 4
1662 ba .ede3.enc.next.block_2
1663 add in1, 8, in1
1664
1665 .ede3.enc.next.block_fp:
1666
1667 fp_macro(in5, out5)
1668
1669 store_little_endian(in1, in5, out5, local3, .SLE5) ! block
1670
1671 addcc in2, -8, in2 ! bytes missing when next block done
1672
1673 bpos .ede3.enc.next.block
1674 add in1, 8, in1
1675
1676 .ede3.enc.seven.or.less:
1677
1678 cmp in2, -8
1679
1680 ble .ede3.enc.finish
1681 nop
1682
1683 add in2, 8, local1 ! bytes to load
1684
1685 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1686 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1687
1688 .ede3.enc.finish:
1689
1690 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1691 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
1692
1693 ret
1694 restore
1695
1696 .ede3.dec:
1697
1698 STPTR in0, INPUT
1699 add in5, 120, in5
1700
1701 STPTR in1, OUTPUT
1702 mov in0, local5
1703 add in3, 120, in3
1704
1705 STPTR in3, KS1
1706 cmp in2, 0
1707
1708 ble .ede3.dec.finish
1709 STPTR in5, KS3
1710
1711 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
1712 load_little_endian(local7, in0, in1, local3, .LLE8)
1713
1714 .ede3.dec.next.block:
1715
1716 load_little_endian(local5, in5, out5, local3, .LLE9)
1717
1718 ! parameter 6 1/2 for include encryption/decryption
1719 ! parameter 7 1 for mov in1 to in3
1720 ! parameter 8 1 for mov in3 to in4
1721 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1722
1723 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1724
1725 call .des_enc ! ks2 in3
1726 LDPTR KS1, in4
1727
1728 call .des_dec ! ks1 in4
1729 nop
1730
1731 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
1732
1733 ! in2 is bytes left to be stored
1734 ! in2 is compared to 8 in the rounds
1735
1736 xor out5, in0, out4
1737 bl .ede3.dec.seven.or.less
1738 xor in5, in1, global4
1739
1740 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
1741
1742 store_little_endian(local7, out4, global4, local3, .SLE7) ! block
1743
1744 STPTR local5, INPUT
1745 addcc in2, -8, in2
1746 add local7, 8, local7
1747
1748 bg .ede3.dec.next.block
1749 STPTR local7, OUTPUT
1750
1751 .ede3.dec.store.iv:
1752
1753 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
1754 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
1755
1756 .ede3.dec.finish:
1757
1758 ret
1759 restore
1760
1761 .ede3.dec.seven.or.less:
1762
1763 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
1764
1765 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1766
1767
1768 .DES_ede3_cbc_encrypt.end:
1769 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
1770
1771 .align 256
1772 .type .des_and,#object
1773 .size .des_and,284
1774
1775 .des_and:
1776
1777 ! This table is used for AND 0xFC when it is known that register
1778 ! bits 8-31 are zero. Makes it possible to do three arithmetic
1779 ! operations in one cycle.
1780
1781 .byte 0, 0, 0, 0, 4, 4, 4, 4
1782 .byte 8, 8, 8, 8, 12, 12, 12, 12
1783 .byte 16, 16, 16, 16, 20, 20, 20, 20
1784 .byte 24, 24, 24, 24, 28, 28, 28, 28
1785 .byte 32, 32, 32, 32, 36, 36, 36, 36
1786 .byte 40, 40, 40, 40, 44, 44, 44, 44
1787 .byte 48, 48, 48, 48, 52, 52, 52, 52
1788 .byte 56, 56, 56, 56, 60, 60, 60, 60
1789 .byte 64, 64, 64, 64, 68, 68, 68, 68
1790 .byte 72, 72, 72, 72, 76, 76, 76, 76
1791 .byte 80, 80, 80, 80, 84, 84, 84, 84
1792 .byte 88, 88, 88, 88, 92, 92, 92, 92
1793 .byte 96, 96, 96, 96, 100, 100, 100, 100
1794 .byte 104, 104, 104, 104, 108, 108, 108, 108
1795 .byte 112, 112, 112, 112, 116, 116, 116, 116
1796 .byte 120, 120, 120, 120, 124, 124, 124, 124
1797 .byte 128, 128, 128, 128, 132, 132, 132, 132
1798 .byte 136, 136, 136, 136, 140, 140, 140, 140
1799 .byte 144, 144, 144, 144, 148, 148, 148, 148
1800 .byte 152, 152, 152, 152, 156, 156, 156, 156
1801 .byte 160, 160, 160, 160, 164, 164, 164, 164
1802 .byte 168, 168, 168, 168, 172, 172, 172, 172
1803 .byte 176, 176, 176, 176, 180, 180, 180, 180
1804 .byte 184, 184, 184, 184, 188, 188, 188, 188
1805 .byte 192, 192, 192, 192, 196, 196, 196, 196
1806 .byte 200, 200, 200, 200, 204, 204, 204, 204
1807 .byte 208, 208, 208, 208, 212, 212, 212, 212
1808 .byte 216, 216, 216, 216, 220, 220, 220, 220
1809 .byte 224, 224, 224, 224, 228, 228, 228, 228
1810 .byte 232, 232, 232, 232, 236, 236, 236, 236
1811 .byte 240, 240, 240, 240, 244, 244, 244, 244
1812 .byte 248, 248, 248, 248, 252, 252, 252, 252
1813
1814 ! 5 numbers for initial/final permutation
1815
1816 .word 0x0f0f0f0f ! offset 256
1817 .word 0x0000ffff ! 260
1818 .word 0x33333333 ! 264
1819 .word 0x00ff00ff ! 268
1820 .word 0x55555555 ! 272
1821
1822 .word 0 ! 276
1823 .word LOOPS ! 280
1824 .word 0x0000FC00 ! 284
1825
1826 .global DES_SPtrans
1827 .type DES_SPtrans,#object
1828 .size DES_SPtrans,2048
1829 .align 64
1830 DES_SPtrans:
1831 .PIC.DES_SPtrans:
1832 ! nibble 0
1833 .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
1834 .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
1835 .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
1836 .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
1837 .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
1838 .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
1839 .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
1840 .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
1841 .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
1842 .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
1843 .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
1844 .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
1845 .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
1846 .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
1847 .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
1848 .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
1849 ! nibble 1
1850 .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
1851 .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
1852 .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
1853 .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
1854 .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
1855 .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
1856 .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
1857 .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
1858 .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
1859 .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
1860 .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
1861 .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
1862 .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
1863 .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
1864 .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
1865 .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
1866 ! nibble 2
1867 .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
1868 .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
1869 .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
1870 .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
1871 .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
1872 .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
1873 .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
1874 .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
1875 .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
1876 .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
1877 .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
1878 .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
1879 .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
1880 .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
1881 .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
1882 .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
1883 ! nibble 3
1884 .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
1885 .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
1886 .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
1887 .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
1888 .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
1889 .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
1890 .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
1891 .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
1892 .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
1893 .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
1894 .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
1895 .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
1896 .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
1897 .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
1898 .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
1899 .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
1900 ! nibble 4
1901 .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
1902 .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
1903 .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
1904 .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
1905 .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
1906 .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
1907 .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
1908 .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
1909 .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
1910 .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
1911 .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
1912 .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
1913 .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
1914 .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
1915 .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
1916 .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
1917 ! nibble 5
1918 .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
1919 .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
1920 .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
1921 .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
1922 .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
1923 .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
1924 .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
1925 .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
1926 .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
1927 .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
1928 .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
1929 .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
1930 .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
1931 .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
1932 .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
1933 .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
1934 ! nibble 6
1935 .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
1936 .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
1937 .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
1938 .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
1939 .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
1940 .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
1941 .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
1942 .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
1943 .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
1944 .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
1945 .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
1946 .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
1947 .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
1948 .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
1949 .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
1950 .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
1951 ! nibble 7
1952 .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
1953 .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
1954 .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
1955 .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
1956 .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
1957 .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
1958 .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
1959 .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
1960 .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
1961 .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
1962 .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
1963 .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
1964 .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
1965 .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
1966 .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
1967 .word 0x20000000, 0x20800080, 0x00020000, 0x00820080
1968