]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/des/asm/des_enc.m4
Remove unnecessary trailing whitespace
[thirdparty/openssl.git] / crypto / des / asm / des_enc.m4
CommitLineData
48e5119a 1! Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
e0d769ca 2!
2d48d5dd 3! Licensed under the Apache License 2.0 (the "License"). You may not use
44c8a5e2
RS
4! this file except in compliance with the License. You can obtain a copy
5! in the file LICENSE in the source distribution or at
6! https://www.openssl.org/source/license.html
e0d769ca
AP
7!
8! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
9!
10! Global registers 1 to 5 are used. This is the same as done by the
11! cc compiler. The UltraSPARC load/store little endian feature is used.
12!
13! Instruction grouping often refers to one CPU cycle.
14!
15! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
16!
17! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
f22e1e4d
AP
18!
19! Performance improvement according to './apps/openssl speed des'
20!
21! 32-bit build:
22! 23% faster than cc-5.2 -xarch=v8plus -xO5
23! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
24! 64-bit build:
25! 50% faster than cc-5.2 -xarch=v9 -xO5
26! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
27!
e0d769ca 28
45771abb 29.ident "des_enc.m4 2.1"
70532b7d 30.file "des_enc-sparc.S"
f22e1e4d
AP
31
32#if defined(__SUNPRO_C) && defined(__sparcv9)
33# define ABI64 /* They've said -xarch=v9 at command line */
34#elif defined(__GNUC__) && defined(__arch64__)
35# define ABI64 /* They've said -m64 at command line */
36#endif
37
38#ifdef ABI64
39 .register %g2,#scratch
40 .register %g3,#scratch
41# define FRAME -192
42# define BIAS 2047
43# define LDPTR ldx
44# define STPTR stx
45# define ARG0 128
46# define ARGSZ 8
f22e1e4d
AP
47#else
48# define FRAME -96
49# define BIAS 0
50# define LDPTR ld
51# define STPTR st
52# define ARG0 68
53# define ARGSZ 4
54#endif
e0d769ca
AP
55
56#define LOOPS 7
57
58#define global0 %g0
59#define global1 %g1
60#define global2 %g2
61#define global3 %g3
62#define global4 %g4
63#define global5 %g5
64
65#define local0 %l0
66#define local1 %l1
67#define local2 %l2
68#define local3 %l3
69#define local4 %l4
70#define local5 %l5
71#define local7 %l6
72#define local6 %l7
73
74#define in0 %i0
75#define in1 %i1
76#define in2 %i2
77#define in3 %i3
78#define in4 %i4
79#define in5 %i5
80#define in6 %i6
81#define in7 %i7
82
83#define out0 %o0
84#define out1 %o1
85#define out2 %o2
86#define out3 %o3
87#define out4 %o4
88#define out5 %o5
89#define out6 %o6
90#define out7 %o7
91
72997517 92#define stub stb
e0d769ca
AP
93
94changequote({,})
95
96
97! Macro definitions:
98
99
100! {ip_macro}
101!
102! The logic used in initial and final permutations is the same as in
103! the C code. The permutations are done with a clever shift, xor, and
104! technique.
105!
106! The macro also loads address sbox 1 to 5 to global 1 to 5, address
107! sbox 6 to local6, and addres sbox 8 to out3.
108!
109! Rotates the halfs 3 left to bring the sbox bits in convenient positions.
110!
111! Loads key first round from address in parameter 5 to out0, out1.
112!
436ad81f 113! After the original LibDES initial permutation, the resulting left
e0d769ca
AP
114! is in the variable initially used for right and vice versa. The macro
115! implements the possibility to keep the halfs in the original registers.
116!
117! parameter 1 left
118! parameter 2 right
119! parameter 3 result left (modify in first round)
120! parameter 4 result right (use in first round)
121! parameter 5 key address
122! parameter 6 1/2 for include encryption/decryption
123! parameter 7 1 for move in1 to in3
124! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
125! parameter 9 1 for load ks3 and ks2 to in4 and in3
126
127define(ip_macro, {
128
129! {ip_macro}
130! $1 $2 $4 $3 $5 $6 $7 $8 $9
131
132 ld [out2+256], local1
133 srl $2, 4, local4
134
135 xor local4, $1, local4
136 ifelse($7,1,{mov in1, in3},{nop})
137
138 ld [out2+260], local2
139 and local4, local1, local4
140 ifelse($8,1,{mov in3, in4},{})
141 ifelse($8,2,{mov in4, in3},{})
142
143 ld [out2+280], out4 ! loop counter
144 sll local4, 4, local1
145 xor $1, local4, $1
146
147 ld [out2+264], local3
148 srl $1, 16, local4
149 xor $2, local1, $2
150
f22e1e4d 151 ifelse($9,1,{LDPTR KS3, in4},{})
e0d769ca 152 xor local4, $2, local4
f22e1e4d 153 nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
e0d769ca 154
f22e1e4d 155 ifelse($9,1,{LDPTR KS2, in3},{})
e0d769ca 156 and local4, local2, local4
f22e1e4d 157 nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
e0d769ca
AP
158
159 sll local4, 16, local1
160 xor $2, local4, $2
161
162 srl $2, 2, local4
163 xor $1, local1, $1
164
165 sethi %hi(16711680), local5
166 xor local4, $1, local4
167
168 and local4, local3, local4
169 or local5, 255, local5
170
171 sll local4, 2, local2
172 xor $1, local4, $1
173
174 srl $1, 8, local4
175 xor $2, local2, $2
176
177 xor local4, $2, local4
178 add global1, 768, global4
179
180 and local4, local5, local4
181 add global1, 1024, global5
182
183 ld [out2+272], local7
184 sll local4, 8, local1
185 xor $2, local4, $2
186
187 srl $2, 1, local4
188 xor $1, local1, $1
189
190 ld [$5], out0 ! key 7531
191 xor local4, $1, local4
192 add global1, 256, global2
193
194 ld [$5+4], out1 ! key 8642
195 and local4, local7, local4
196 add global1, 512, global3
197
198 sll local4, 1, local1
199 xor $1, local4, $1
200
201 sll $1, 3, local3
202 xor $2, local1, $2
203
204 sll $2, 3, local2
205 add global1, 1280, local6 ! address sbox 8
206
207 srl $1, 29, local4
208 add global1, 1792, out3 ! address sbox 8
209
210 srl $2, 29, local1
211 or local4, local3, $4
212
213 or local2, local1, $3
214
215 ifelse($6, 1, {
216
217 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
218 or local2, local1, $3
219 xor $4, out0, local1
220
221 call .des_enc.1
222 and local1, 252, local1
223
224 },{})
225
226 ifelse($6, 2, {
227
228 ld [out2+284], local5 ! 0x0000FC00 used in the rounds
229 or local2, local1, $3
230 xor $4, out0, local1
231
232 call .des_dec.1
233 and local1, 252, local1
234
235 },{})
236})
237
238
239! {rounds_macro}
240!
241! The logic used in the DES rounds is the same as in the C code,
242! except that calculations for sbox 1 and sbox 5 begin before
243! the previous round is finished.
244!
245! In each round one half (work) is modified based on key and the
246! other half (use).
247!
248! In this version we do two rounds in a loop repeated 7 times
478b50cf 249! and two rounds separately.
e0d769ca
AP
250!
251! One half has the bits for the sboxes in the following positions:
252!
253! 777777xx555555xx333333xx111111xx
254!
255! 88xx666666xx444444xx222222xx8888
256!
257! The bits for each sbox are xor-ed with the key bits for that box.
258! The above xx bits are cleared, and the result used for lookup in
259! the sbox table. Each sbox entry contains the 4 output bits permuted
260! into 32 bits according to the P permutation.
261!
262! In the description of DES, left and right are switched after
263! each round, except after last round. In this code the original
264! left and right are kept in the same register in all rounds, meaning
265! that after the 16 rounds the result for right is in the register
266! originally used for left.
267!
268! parameter 1 first work (left in first round)
269! parameter 2 first use (right in first round)
270! parameter 3 enc/dec 1/-1
271! parameter 4 loop label
272! parameter 5 key address register
273! parameter 6 optional address for key next encryption/decryption
274! parameter 7 not empty for include retl
275!
276! also compares in2 to 8
277
278define(rounds_macro, {
279
280! {rounds_macro}
281! $1 $2 $3 $4 $5 $6 $7 $8 $9
282
283 xor $2, out0, local1
284
285 ld [out2+284], local5 ! 0x0000FC00
f22e1e4d 286 ba $4
e0d769ca
AP
287 and local1, 252, local1
288
289 .align 32
290
291$4:
292 ! local6 is address sbox 6
293 ! out3 is address sbox 8
294 ! out4 is loop counter
295
296 ld [global1+local1], local1
297 xor $2, out1, out1 ! 8642
298 xor $2, out0, out0 ! 7531
b884556e 299 ! fmovs %f0, %f0 ! fxor used for alignment
e0d769ca
AP
300
301 srl out1, 4, local0 ! rotate 4 right
302 and out0, local5, local3 ! 3
b884556e 303 ! fmovs %f0, %f0
e0d769ca
AP
304
305 ld [$5+$3*8], local7 ! key 7531 next round
306 srl local3, 8, local3 ! 3
307 and local0, 252, local2 ! 2
b884556e 308 ! fmovs %f0, %f0
e0d769ca
AP
309
310 ld [global3+local3],local3 ! 3
311 sll out1, 28, out1 ! rotate
312 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
313
df443918 314 ld [global2+local2], local2 ! 2
e0d769ca
AP
315 srl out0, 24, local1 ! 7
316 or out1, local0, out1 ! rotate
317
318 ldub [out2+local1], local1 ! 7 (and 0xFC)
319 srl out1, 24, local0 ! 8
320 and out1, local5, local4 ! 4
321
322 ldub [out2+local0], local0 ! 8 (and 0xFC)
323 srl local4, 8, local4 ! 4
324 xor $1, local2, $1 ! 2 finished local2 now sbox 6
325
326 ld [global4+local4],local4 ! 4
327 srl out1, 16, local2 ! 6
328 xor $1, local3, $1 ! 3 finished local3 now sbox 5
329
330 ld [out3+local0],local0 ! 8
331 and local2, 252, local2 ! 6
332 add global1, 1536, local5 ! address sbox 7
333
334 ld [local6+local2], local2 ! 6
335 srl out0, 16, local3 ! 5
336 xor $1, local4, $1 ! 4 finished
337
338 ld [local5+local1],local1 ! 7
339 and local3, 252, local3 ! 5
340 xor $1, local0, $1 ! 8 finished
341
342 ld [global5+local3],local3 ! 5
343 xor $1, local2, $1 ! 6 finished
344 subcc out4, 1, out4
345
346 ld [$5+$3*8+4], out0 ! key 8642 next round
347 xor $1, local7, local2 ! sbox 5 next round
348 xor $1, local1, $1 ! 7 finished
349
350 srl local2, 16, local2 ! sbox 5 next round
351 xor $1, local3, $1 ! 5 finished
352
353 ld [$5+$3*16+4], out1 ! key 8642 next round again
354 and local2, 252, local2 ! sbox5 next round
355! next round
356 xor $1, local7, local7 ! 7531
357
358 ld [global5+local2], local2 ! 5
359 srl local7, 24, local3 ! 7
360 xor $1, out0, out0 ! 8642
361
362 ldub [out2+local3], local3 ! 7 (and 0xFC)
363 srl out0, 4, local0 ! rotate 4 right
364 and local7, 252, local1 ! 1
365
366 sll out0, 28, out0 ! rotate
367 xor $2, local2, $2 ! 5 finished local2 used
368
369 srl local0, 8, local4 ! 4
370 and local0, 252, local2 ! 2
371 ld [local5+local3], local3 ! 7
372
373 srl local0, 16, local5 ! 6
374 or out0, local0, out0 ! rotate
375 ld [global2+local2], local2 ! 2
376
377 srl out0, 24, local0
378 ld [$5+$3*16], out0 ! key 7531 next round
379 and local4, 252, local4 ! 4
380
381 and local5, 252, local5 ! 6
382 ld [global4+local4], local4 ! 4
383 xor $2, local3, $2 ! 7 finished local3 used
384
385 and local0, 252, local0 ! 8
386 ld [local6+local5], local5 ! 6
387 xor $2, local2, $2 ! 2 finished local2 now sbox 3
388
389 srl local7, 8, local2 ! 3 start
390 ld [out3+local0], local0 ! 8
391 xor $2, local4, $2 ! 4 finished
392
393 and local2, 252, local2 ! 3
394 ld [global1+local1], local1 ! 1
395 xor $2, local5, $2 ! 6 finished local5 used
396
397 ld [global3+local2], local2 ! 3
398 xor $2, local0, $2 ! 8 finished
399 add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
400
401 ld [out2+284], local5 ! 0x0000FC00
402 xor $2, out0, local4 ! sbox 1 next round
403 xor $2, local1, $2 ! 1 finished
404
405 xor $2, local2, $2 ! 3 finished
f22e1e4d 406 bne $4
e0d769ca
AP
407 and local4, 252, local1 ! sbox 1 next round
408
409! two rounds more:
410
411 ld [global1+local1], local1
412 xor $2, out1, out1
413 xor $2, out0, out0
414
415 srl out1, 4, local0 ! rotate
416 and out0, local5, local3
417
418 ld [$5+$3*8], local7 ! key 7531
419 srl local3, 8, local3
420 and local0, 252, local2
421
422 ld [global3+local3],local3
423 sll out1, 28, out1 ! rotate
424 xor $1, local1, $1 ! 1 finished, local1 now sbox 7
425
426 ld [global2+local2], local2
427 srl out0, 24, local1
428 or out1, local0, out1 ! rotate
429
430 ldub [out2+local1], local1
431 srl out1, 24, local0
432 and out1, local5, local4
433
434 ldub [out2+local0], local0
435 srl local4, 8, local4
436 xor $1, local2, $1 ! 2 finished local2 now sbox 6
437
438 ld [global4+local4],local4
439 srl out1, 16, local2
440 xor $1, local3, $1 ! 3 finished local3 now sbox 5
441
442 ld [out3+local0],local0
443 and local2, 252, local2
444 add global1, 1536, local5 ! address sbox 7
445
446 ld [local6+local2], local2
447 srl out0, 16, local3
448 xor $1, local4, $1 ! 4 finished
449
450 ld [local5+local1],local1
451 and local3, 252, local3
452 xor $1, local0, $1
453
454 ld [global5+local3],local3
455 xor $1, local2, $1 ! 6 finished
456 cmp in2, 8
457
458 ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
459 xor $1, local7, local2 ! sbox 5 next round
460 xor $1, local1, $1 ! 7 finished
461
462 ld [$5+$3*8+4], out0
463 srl local2, 16, local2 ! sbox 5 next round
464 xor $1, local3, $1 ! 5 finished
465
466 and local2, 252, local2
467! next round (two rounds more)
468 xor $1, local7, local7 ! 7531
469
470 ld [global5+local2], local2
471 srl local7, 24, local3
472 xor $1, out0, out0 ! 8642
473
474 ldub [out2+local3], local3
475 srl out0, 4, local0 ! rotate
476 and local7, 252, local1
477
478 sll out0, 28, out0 ! rotate
479 xor $2, local2, $2 ! 5 finished local2 used
480
481 srl local0, 8, local4
482 and local0, 252, local2
483 ld [local5+local3], local3
484
485 srl local0, 16, local5
486 or out0, local0, out0 ! rotate
487 ld [global2+local2], local2
488
489 srl out0, 24, local0
490 ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
491 and local4, 252, local4
492
493 and local5, 252, local5
494 ld [global4+local4], local4
495 xor $2, local3, $2 ! 7 finished local3 used
496
497 and local0, 252, local0
498 ld [local6+local5], local5
499 xor $2, local2, $2 ! 2 finished local2 now sbox 3
500
501 srl local7, 8, local2 ! 3 start
502 ld [out3+local0], local0
503 xor $2, local4, $2
504
505 and local2, 252, local2
506 ld [global1+local1], local1
507 xor $2, local5, $2 ! 6 finished local5 used
508
509 ld [global3+local2], local2
510 srl $1, 3, local3
511 xor $2, local0, $2
512
513 ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
514 sll $1, 29, local4
515 xor $2, local1, $2
516
517 ifelse($7,{}, {}, {retl})
518 xor $2, local2, $2
519})
520
521
522! {fp_macro}
523!
524! parameter 1 right (original left)
525! parameter 2 left (original right)
526! parameter 3 1 for optional store to [in0]
527! parameter 4 1 for load input/output address to local5/7
528!
46f4e1be 529! The final permutation logic switches the halves, meaning that
436ad81f 530! left and right ends up the registers originally used.
e0d769ca
AP
531
532define(fp_macro, {
533
534! {fp_macro}
535! $1 $2 $3 $4 $5 $6 $7 $8 $9
536
537 ! initially undo the rotate 3 left done after initial permutation
538 ! original left is received shifted 3 right and 29 left in local3/4
539
540 sll $2, 29, local1
541 or local3, local4, $1
542
543 srl $2, 3, $2
544 sethi %hi(0x55555555), local2
545
546 or $2, local1, $2
547 or local2, %lo(0x55555555), local2
548
549 srl $2, 1, local3
550 sethi %hi(0x00ff00ff), local1
551 xor local3, $1, local3
552 or local1, %lo(0x00ff00ff), local1
553 and local3, local2, local3
554 sethi %hi(0x33333333), local4
555 sll local3, 1, local2
556
557 xor $1, local3, $1
558
559 srl $1, 8, local3
560 xor $2, local2, $2
561 xor local3, $2, local3
562 or local4, %lo(0x33333333), local4
563 and local3, local1, local3
564 sethi %hi(0x0000ffff), local1
565 sll local3, 8, local2
566
567 xor $2, local3, $2
568
569 srl $2, 2, local3
570 xor $1, local2, $1
571 xor local3, $1, local3
572 or local1, %lo(0x0000ffff), local1
573 and local3, local4, local3
574 sethi %hi(0x0f0f0f0f), local4
575 sll local3, 2, local2
576
f22e1e4d 577 ifelse($4,1, {LDPTR INPUT, local5})
e0d769ca
AP
578 xor $1, local3, $1
579
f22e1e4d 580 ifelse($4,1, {LDPTR OUTPUT, local7})
e0d769ca
AP
581 srl $1, 16, local3
582 xor $2, local2, $2
583 xor local3, $2, local3
584 or local4, %lo(0x0f0f0f0f), local4
585 and local3, local1, local3
586 sll local3, 16, local2
587
588 xor $2, local3, local1
589
590 srl local1, 4, local3
591 xor $1, local2, $1
592 xor local3, $1, local3
593 and local3, local4, local3
594 sll local3, 4, local2
595
596 xor $1, local3, $1
597
598 ! optional store:
599
600 ifelse($3,1, {st $1, [in0]})
601
602 xor local1, local2, $2
603
604 ifelse($3,1, {st $2, [in0+4]})
605
606})
607
608
609! {fp_ip_macro}
610!
611! Does initial permutation for next block mixed with
612! final permutation for current block.
613!
614! parameter 1 original left
615! parameter 2 original right
616! parameter 3 left ip
617! parameter 4 right ip
618! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
619! 2: mov in4 to in3
620!
621! also adds -8 to length in2 and loads loop counter to out4
622
623define(fp_ip_macro, {
624
625! {fp_ip_macro}
626! $1 $2 $3 $4 $5 $6 $7 $8 $9
627
628 define({temp1},{out4})
629 define({temp2},{local3})
630
631 define({ip1},{local1})
632 define({ip2},{local2})
633 define({ip4},{local4})
634 define({ip5},{local5})
635
636 ! $1 in local3, local4
637
638 ld [out2+256], ip1
639 sll out5, 29, temp1
640 or local3, local4, $1
641
642 srl out5, 3, $2
643 ifelse($5,2,{mov in4, in3})
644
645 ld [out2+272], ip5
646 srl $4, 4, local0
647 or $2, temp1, $2
648
649 srl $2, 1, temp1
650 xor temp1, $1, temp1
651
652 and temp1, ip5, temp1
653 xor local0, $3, local0
654
655 sll temp1, 1, temp2
656 xor $1, temp1, $1
657
658 and local0, ip1, local0
659 add in2, -8, in2
660
661 sll local0, 4, local7
662 xor $3, local0, $3
663
664 ld [out2+268], ip4
665 srl $1, 8, temp1
666 xor $2, temp2, $2
667 ld [out2+260], ip2
668 srl $3, 16, local0
669 xor $4, local7, $4
670 xor temp1, $2, temp1
671 xor local0, $4, local0
672 and temp1, ip4, temp1
673 and local0, ip2, local0
674 sll temp1, 8, temp2
675 xor $2, temp1, $2
676 sll local0, 16, local7
677 xor $4, local0, $4
678
679 srl $2, 2, temp1
680 xor $1, temp2, $1
681
682 ld [out2+264], temp2 ! ip3
683 srl $4, 2, local0
684 xor $3, local7, $3
685 xor temp1, $1, temp1
686 xor local0, $3, local0
687 and temp1, temp2, temp1
688 and local0, temp2, local0
689 sll temp1, 2, temp2
690 xor $1, temp1, $1
691 sll local0, 2, local7
692 xor $3, local0, $3
693
694 srl $1, 16, temp1
695 xor $2, temp2, $2
696 srl $3, 8, local0
697 xor $4, local7, $4
698 xor temp1, $2, temp1
699 xor local0, $4, local0
700 and temp1, ip2, temp1
701 and local0, ip4, local0
702 sll temp1, 16, temp2
703 xor $2, temp1, local4
704 sll local0, 8, local7
705 xor $4, local0, $4
706
707 srl $4, 1, local0
708 xor $3, local7, $3
709
710 srl local4, 4, temp1
711 xor local0, $3, local0
712
713 xor $1, temp2, $1
714 and local0, ip5, local0
715
716 sll local0, 1, local7
717 xor temp1, $1, temp1
718
719 xor $3, local0, $3
720 xor $4, local7, $4
721
722 sll $3, 3, local5
723 and temp1, ip1, temp1
724
725 sll temp1, 4, temp2
726 xor $1, temp1, $1
727
f22e1e4d 728 ifelse($5,1,{LDPTR KS2, in4})
e0d769ca
AP
729 sll $4, 3, local2
730 xor local4, temp2, $2
731
46f4e1be 732 ! reload since used as temporary:
e0d769ca
AP
733
734 ld [out2+280], out4 ! loop counter
735
736 srl $3, 29, local0
737 ifelse($5,1,{add in4, 120, in4})
738
f22e1e4d 739 ifelse($5,1,{LDPTR KS1, in3})
e0d769ca
AP
740 srl $4, 29, local7
741
742 or local0, local5, $4
743 or local2, local7, $3
744
745})
746
747
748
749! {load_little_endian}
750!
751! parameter 1 address
752! parameter 2 destination left
753! parameter 3 destination right
46f4e1be 754! parameter 4 temporary
e0d769ca
AP
755! parameter 5 label
756
757define(load_little_endian, {
758
759! {load_little_endian}
760! $1 $2 $3 $4 $5 $6 $7 $8 $9
761
762 ! first in memory to rightmost in register
763
e0d769ca
AP
764$5:
765 ldub [$1+3], $2
766
767 ldub [$1+2], $4
768 sll $2, 8, $2
769 or $2, $4, $2
770
771 ldub [$1+1], $4
772 sll $2, 8, $2
773 or $2, $4, $2
774
775 ldub [$1+0], $4
776 sll $2, 8, $2
777 or $2, $4, $2
778
779
780 ldub [$1+3+4], $3
781
782 ldub [$1+2+4], $4
783 sll $3, 8, $3
784 or $3, $4, $3
785
786 ldub [$1+1+4], $4
787 sll $3, 8, $3
788 or $3, $4, $3
789
790 ldub [$1+0+4], $4
791 sll $3, 8, $3
792 or $3, $4, $3
793$5a:
794
795})
796
797
798! {load_little_endian_inc}
799!
800! parameter 1 address
801! parameter 2 destination left
802! parameter 3 destination right
46f4e1be 803! parameter 4 temporary
e0d769ca
AP
804! parameter 4 label
805!
806! adds 8 to address
807
808define(load_little_endian_inc, {
809
810! {load_little_endian_inc}
811! $1 $2 $3 $4 $5 $6 $7 $8 $9
812
813 ! first in memory to rightmost in register
814
e0d769ca
AP
815$5:
816 ldub [$1+3], $2
817
818 ldub [$1+2], $4
819 sll $2, 8, $2
820 or $2, $4, $2
821
822 ldub [$1+1], $4
823 sll $2, 8, $2
824 or $2, $4, $2
825
826 ldub [$1+0], $4
827 sll $2, 8, $2
828 or $2, $4, $2
829
830 ldub [$1+3+4], $3
831 add $1, 8, $1
832
833 ldub [$1+2+4-8], $4
834 sll $3, 8, $3
835 or $3, $4, $3
836
837 ldub [$1+1+4-8], $4
838 sll $3, 8, $3
839 or $3, $4, $3
840
841 ldub [$1+0+4-8], $4
842 sll $3, 8, $3
843 or $3, $4, $3
844$5a:
845
846})
847
848
849! {load_n_bytes}
850!
851! Loads 1 to 7 bytes little endian
852! Remaining bytes are zeroed.
853!
854! parameter 1 address
855! parameter 2 length
856! parameter 3 destination register left
857! parameter 4 destination register right
858! parameter 5 temp
859! parameter 6 temp2
860! parameter 7 label
861! parameter 8 return label
862
863define(load_n_bytes, {
864
865! {load_n_bytes}
866! $1 $2 $5 $6 $7 $8 $7 $8 $9
867
f22e1e4d 868$7.0: call .+8
e0d769ca
AP
869 sll $2, 2, $6
870
f22e1e4d 871 add %o7,$7.jmp.table-$7.0,$5
e0d769ca
AP
872
873 add $5, $6, $5
874 mov 0, $4
875
876 ld [$5], $5
877
f22e1e4d 878 jmp %o7+$5
e0d769ca
AP
879 mov 0, $3
880
881$7.7:
882 ldub [$1+6], $5
883 sll $5, 16, $5
884 or $3, $5, $3
885$7.6:
886 ldub [$1+5], $5
887 sll $5, 8, $5
888 or $3, $5, $3
889$7.5:
890 ldub [$1+4], $5
891 or $3, $5, $3
892$7.4:
893 ldub [$1+3], $5
894 sll $5, 24, $5
895 or $4, $5, $4
896$7.3:
897 ldub [$1+2], $5
898 sll $5, 16, $5
899 or $4, $5, $4
900$7.2:
901 ldub [$1+1], $5
902 sll $5, 8, $5
903 or $4, $5, $4
904$7.1:
905 ldub [$1+0], $5
f22e1e4d 906 ba $8
e0d769ca
AP
907 or $4, $5, $4
908
909 .align 4
910
911$7.jmp.table:
912 .word 0
f22e1e4d
AP
913 .word $7.1-$7.0
914 .word $7.2-$7.0
915 .word $7.3-$7.0
916 .word $7.4-$7.0
917 .word $7.5-$7.0
918 .word $7.6-$7.0
919 .word $7.7-$7.0
e0d769ca
AP
920})
921
922
923! {store_little_endian}
924!
925! parameter 1 address
926! parameter 2 source left
927! parameter 3 source right
46f4e1be 928! parameter 4 temporary
e0d769ca
AP
929
930define(store_little_endian, {
931
932! {store_little_endian}
933! $1 $2 $3 $4 $5 $6 $7 $8 $9
934
935 ! rightmost in register to first in memory
936
e0d769ca
AP
937$5:
938 and $2, 255, $4
939 stub $4, [$1+0]
940
941 srl $2, 8, $4
942 and $4, 255, $4
943 stub $4, [$1+1]
944
945 srl $2, 16, $4
946 and $4, 255, $4
947 stub $4, [$1+2]
948
949 srl $2, 24, $4
950 stub $4, [$1+3]
951
952
953 and $3, 255, $4
954 stub $4, [$1+0+4]
955
956 srl $3, 8, $4
957 and $4, 255, $4
958 stub $4, [$1+1+4]
959
960 srl $3, 16, $4
961 and $4, 255, $4
962 stub $4, [$1+2+4]
963
964 srl $3, 24, $4
965 stub $4, [$1+3+4]
966
967$5a:
968
969})
970
971
972! {store_n_bytes}
973!
974! Stores 1 to 7 bytes little endian
975!
976! parameter 1 address
977! parameter 2 length
978! parameter 3 source register left
979! parameter 4 source register right
980! parameter 5 temp
981! parameter 6 temp2
982! parameter 7 label
983! parameter 8 return label
984
985define(store_n_bytes, {
986
987! {store_n_bytes}
988! $1 $2 $5 $6 $7 $8 $7 $8 $9
989
f22e1e4d 990$7.0: call .+8
e0d769ca
AP
991 sll $2, 2, $6
992
f22e1e4d 993 add %o7,$7.jmp.table-$7.0,$5
e0d769ca
AP
994
995 add $5, $6, $5
996
997 ld [$5], $5
f22e1e4d
AP
998
999 jmp %o7+$5
e0d769ca
AP
1000 nop
1001
1002$7.7:
1003 srl $3, 16, $5
1004 and $5, 0xff, $5
1005 stub $5, [$1+6]
1006$7.6:
1007 srl $3, 8, $5
1008 and $5, 0xff, $5
1009 stub $5, [$1+5]
1010$7.5:
1011 and $3, 0xff, $5
1012 stub $5, [$1+4]
1013$7.4:
1014 srl $4, 24, $5
1015 stub $5, [$1+3]
1016$7.3:
1017 srl $4, 16, $5
1018 and $5, 0xff, $5
1019 stub $5, [$1+2]
1020$7.2:
1021 srl $4, 8, $5
1022 and $5, 0xff, $5
1023 stub $5, [$1+1]
1024$7.1:
1025 and $4, 0xff, $5
1026
1027
f22e1e4d 1028 ba $8
e0d769ca
AP
1029 stub $5, [$1]
1030
1031 .align 4
1032
1033$7.jmp.table:
1034
1035 .word 0
f22e1e4d
AP
1036 .word $7.1-$7.0
1037 .word $7.2-$7.0
1038 .word $7.3-$7.0
1039 .word $7.4-$7.0
1040 .word $7.5-$7.0
1041 .word $7.6-$7.0
1042 .word $7.7-$7.0
e0d769ca
AP
1043})
1044
1045
1046define(testvalue,{1})
1047
1048define(register_init, {
1049
1050! For test purposes:
1051
1052 sethi %hi(testvalue), local0
1053 or local0, %lo(testvalue), local0
1054
1055 ifelse($1,{},{}, {mov local0, $1})
1056 ifelse($2,{},{}, {mov local0, $2})
1057 ifelse($3,{},{}, {mov local0, $3})
1058 ifelse($4,{},{}, {mov local0, $4})
1059 ifelse($5,{},{}, {mov local0, $5})
1060 ifelse($6,{},{}, {mov local0, $6})
1061 ifelse($7,{},{}, {mov local0, $7})
1062 ifelse($8,{},{}, {mov local0, $8})
1063
1064 mov local0, local1
1065 mov local0, local2
1066 mov local0, local3
1067 mov local0, local4
1068 mov local0, local5
1069 mov local0, local7
1070 mov local0, local6
1071 mov local0, out0
1072 mov local0, out1
1073 mov local0, out2
1074 mov local0, out3
1075 mov local0, out4
1076 mov local0, out5
1077 mov local0, global1
1078 mov local0, global2
1079 mov local0, global3
1080 mov local0, global4
1081 mov local0, global5
1082
1083})
1084
e0d769ca
AP
1085.section ".text"
1086
1087 .align 32
1088
1089.des_enc:
1090
1091 ! key address in3
1092 ! loads key next encryption/decryption first round from [in4]
1093
1094 rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
1095
1096
1097 .align 32
1098
1099.des_dec:
1100
1101 ! implemented with out5 as first parameter to avoid
1102 ! register exchange in ede modes
1103
1104 ! key address in4
1105 ! loads key next encryption/decryption first round from [in3]
1106
1107 rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
1108
1109
1110
f22e1e4d 1111! void DES_encrypt1(data, ks, enc)
e0d769ca
AP
1112! *******************************
1113
1114 .align 32
f22e1e4d
AP
1115 .global DES_encrypt1
1116 .type DES_encrypt1,#function
1117
1118DES_encrypt1:
e0d769ca 1119
f22e1e4d 1120 save %sp, FRAME, %sp
e0d769ca 1121
4c78bc05
AP
1122 sethi %hi(.PIC.DES_SPtrans-1f),global1
1123 or global1,%lo(.PIC.DES_SPtrans-1f),global1
11241: call .+8
1125 add %o7,global1,global1
1126 sub global1,.PIC.DES_SPtrans-.des_and,out2
e0d769ca
AP
1127
1128 ld [in0], in5 ! left
e0d769ca
AP
1129 cmp in2, 0 ! enc
1130
f22e1e4d 1131 be .encrypt.dec
f22e1e4d 1132 ld [in0+4], out5 ! right
e0d769ca
AP
1133
1134 ! parameter 6 1/2 for include encryption/decryption
1135 ! parameter 7 1 for move in1 to in3
1136 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1137
1138 ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
1139
f22e1e4d 1140 rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
e0d769ca
AP
1141
1142 fp_macro(in5, out5, 1) ! 1 for store to [in0]
1143
f22e1e4d
AP
1144 ret
1145 restore
e0d769ca
AP
1146
1147.encrypt.dec:
1148
1149 add in1, 120, in3 ! use last subkey for first round
1150
1151 ! parameter 6 1/2 for include encryption/decryption
1152 ! parameter 7 1 for move in1 to in3
1153 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1154
1155 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
1156
1157 fp_macro(out5, in5, 1) ! 1 for store to [in0]
1158
f22e1e4d
AP
1159 ret
1160 restore
e0d769ca 1161
f22e1e4d
AP
1162.DES_encrypt1.end:
1163 .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
e0d769ca
AP
1164
1165
f22e1e4d 1166! void DES_encrypt2(data, ks, enc)
e0d769ca
AP
1167!*********************************
1168
1169 ! encrypts/decrypts without initial/final permutation
1170
1171 .align 32
f22e1e4d
AP
1172 .global DES_encrypt2
1173 .type DES_encrypt2,#function
e0d769ca 1174
f22e1e4d 1175DES_encrypt2:
e0d769ca 1176
f22e1e4d
AP
1177 save %sp, FRAME, %sp
1178
4c78bc05
AP
1179 sethi %hi(.PIC.DES_SPtrans-1f),global1
1180 or global1,%lo(.PIC.DES_SPtrans-1f),global1
11811: call .+8
1182 add %o7,global1,global1
1183 sub global1,.PIC.DES_SPtrans-.des_and,out2
e0d769ca
AP
1184
1185 ! Set sbox address 1 to 6 and rotate halfs 3 left
1186 ! Errors caught by destest? Yes. Still? *NO*
1187
f22e1e4d 1188 !sethi %hi(DES_SPtrans), global1 ! address sbox 1
e0d769ca 1189
f22e1e4d 1190 !or global1, %lo(DES_SPtrans), global1 ! sbox 1
e0d769ca
AP
1191
1192 add global1, 256, global2 ! sbox 2
1193 add global1, 512, global3 ! sbox 3
1194
1195 ld [in0], out5 ! right
1196 add global1, 768, global4 ! sbox 4
1197 add global1, 1024, global5 ! sbox 5
1198
1199 ld [in0+4], in5 ! left
1200 add global1, 1280, local6 ! sbox 6
1201 add global1, 1792, out3 ! sbox 8
1202
1203 ! rotate
1204
1205 sll in5, 3, local5
1206 mov in1, in3 ! key address to in3
1207
1208 sll out5, 3, local7
1209 srl in5, 29, in5
1210
1211 srl out5, 29, out5
1212 add in5, local5, in5
1213
1214 add out5, local7, out5
1215 cmp in2, 0
1216
1217 ! we use our own stackframe
1218
f22e1e4d 1219 be .encrypt2.dec
f22e1e4d 1220 STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
e0d769ca
AP
1221
1222 ld [in3], out0 ! key 7531 first round
1223 mov LOOPS, out4 ! loop counter
1224
1225 ld [in3+4], out1 ! key 8642 first round
1226 sethi %hi(0x0000FC00), local5
1227
1228 call .des_enc
1229 mov in3, in4
1230
1231 ! rotate
1232 sll in5, 29, in0
1233 srl in5, 3, in5
1234 sll out5, 29, in1
1235 add in5, in0, in5
1236 srl out5, 3, out5
f22e1e4d 1237 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
e0d769ca
AP
1238 add out5, in1, out5
1239 st in5, [in0]
1240 st out5, [in0+4]
1241
f22e1e4d
AP
1242 ret
1243 restore
e0d769ca
AP
1244
1245
1246.encrypt2.dec:
1247
1248 add in3, 120, in4
1249
1250 ld [in4], out0 ! key 7531 first round
1251 mov LOOPS, out4 ! loop counter
1252
1253 ld [in4+4], out1 ! key 8642 first round
1254 sethi %hi(0x0000FC00), local5
1255
1256 mov in5, local1 ! left expected in out5
1257 mov out5, in5
1258
1259 call .des_dec
1260 mov local1, out5
1261
1262.encrypt2.finish:
1263
1264 ! rotate
1265 sll in5, 29, in0
1266 srl in5, 3, in5
1267 sll out5, 29, in1
1268 add in5, in0, in5
1269 srl out5, 3, out5
f22e1e4d 1270 LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
e0d769ca
AP
1271 add out5, in1, out5
1272 st out5, [in0]
1273 st in5, [in0+4]
1274
f22e1e4d
AP
1275 ret
1276 restore
e0d769ca 1277
f22e1e4d
AP
1278.DES_encrypt2.end:
1279 .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
e0d769ca
AP
1280
1281
f22e1e4d 1282! void DES_encrypt3(data, ks1, ks2, ks3)
e0d769ca
AP
1283! **************************************
1284
1285 .align 32
f22e1e4d
AP
1286 .global DES_encrypt3
1287 .type DES_encrypt3,#function
e0d769ca 1288
f22e1e4d 1289DES_encrypt3:
e0d769ca 1290
f22e1e4d 1291 save %sp, FRAME, %sp
e0d769ca 1292
4c78bc05
AP
1293 sethi %hi(.PIC.DES_SPtrans-1f),global1
1294 or global1,%lo(.PIC.DES_SPtrans-1f),global1
12951: call .+8
1296 add %o7,global1,global1
1297 sub global1,.PIC.DES_SPtrans-.des_and,out2
f22e1e4d 1298
e0d769ca
AP
1299 ld [in0], in5 ! left
1300 add in2, 120, in4 ! ks2
e0d769ca
AP
1301
1302 ld [in0+4], out5 ! right
1303 mov in3, in2 ! save ks3
e0d769ca
AP
1304
1305 ! parameter 6 1/2 for include encryption/decryption
1306 ! parameter 7 1 for mov in1 to in3
1307 ! parameter 8 1 for mov in3 to in4
1308 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1309
1310 ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
1311
1312 call .des_dec
1313 mov in2, in3 ! preload ks3
1314
1315 call .des_enc
1316 nop
1317
1318 fp_macro(in5, out5, 1)
1319
f22e1e4d
AP
1320 ret
1321 restore
e0d769ca 1322
f22e1e4d
AP
1323.DES_encrypt3.end:
1324 .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
e0d769ca
AP
1325
1326
f22e1e4d 1327! void DES_decrypt3(data, ks1, ks2, ks3)
e0d769ca
AP
1328! **************************************
1329
1330 .align 32
f22e1e4d
AP
1331 .global DES_decrypt3
1332 .type DES_decrypt3,#function
e0d769ca 1333
f22e1e4d 1334DES_decrypt3:
e0d769ca 1335
f22e1e4d 1336 save %sp, FRAME, %sp
e0d769ca 1337
4c78bc05
AP
1338 sethi %hi(.PIC.DES_SPtrans-1f),global1
1339 or global1,%lo(.PIC.DES_SPtrans-1f),global1
13401: call .+8
1341 add %o7,global1,global1
1342 sub global1,.PIC.DES_SPtrans-.des_and,out2
f22e1e4d 1343
e0d769ca
AP
1344 ld [in0], in5 ! left
1345 add in3, 120, in4 ! ks3
e0d769ca
AP
1346
1347 ld [in0+4], out5 ! right
1348 mov in2, in3 ! ks2
e0d769ca
AP
1349
1350 ! parameter 6 1/2 for include encryption/decryption
1351 ! parameter 7 1 for mov in1 to in3
1352 ! parameter 8 1 for mov in3 to in4
1353 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1354
1355 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
1356
1357 call .des_enc
1358 add in1, 120, in4 ! preload ks1
1359
1360 call .des_dec
1361 nop
1362
1363 fp_macro(out5, in5, 1)
1364
f22e1e4d
AP
1365 ret
1366 restore
1367
1368.DES_decrypt3.end:
1369 .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
1370
f22e1e4d 1371! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
e0d769ca
AP
1372! *****************************************************************
1373
1374
1375 .align 32
f22e1e4d
AP
1376 .global DES_ncbc_encrypt
1377 .type DES_ncbc_encrypt,#function
e0d769ca 1378
f22e1e4d 1379DES_ncbc_encrypt:
e0d769ca 1380
f22e1e4d 1381 save %sp, FRAME, %sp
e0d769ca 1382
f22e1e4d
AP
1383 define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
1384 define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
1385 define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1386
4c78bc05
AP
1387 sethi %hi(.PIC.DES_SPtrans-1f),global1
1388 or global1,%lo(.PIC.DES_SPtrans-1f),global1
13891: call .+8
1390 add %o7,global1,global1
1391 sub global1,.PIC.DES_SPtrans-.des_and,out2
e0d769ca 1392
df443918 1393 cmp in5, 0 ! enc
e0d769ca 1394
f22e1e4d 1395 be .ncbc.dec
f22e1e4d 1396 STPTR in4, IVEC
e0d769ca
AP
1397
1398 ! addr left right temp label
1399 load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
1400
1401 addcc in2, -8, in2 ! bytes missing when first block done
1402
f22e1e4d 1403 bl .ncbc.enc.seven.or.less
f22e1e4d 1404 mov in3, in4 ! schedule
e0d769ca
AP
1405
1406.ncbc.enc.next.block:
1407
1408 load_little_endian(in0, out4, global4, local3, .LLE2) ! block
1409
1410.ncbc.enc.next.block_1:
1411
1412 xor in5, out4, in5 ! iv xor
1413 xor out5, global4, out5 ! iv xor
1414
1415 ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
1416 ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
1417
1418.ncbc.enc.next.block_2:
1419
1420!// call .des_enc ! compares in2 to 8
1421! rounds inlined for alignment purposes
1422
1423 add global1, 768, global4 ! address sbox 4 since register used below
1424
1425 rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
1426
f22e1e4d 1427 bl .ncbc.enc.next.block_fp
e0d769ca
AP
1428 add in0, 8, in0 ! input address
1429
1430 ! If 8 or more bytes are to be encrypted after this block,
1431 ! we combine final permutation for this block with initial
1432 ! permutation for next block. Load next block:
1433
1434 load_little_endian(in0, global3, global4, local5, .LLE12)
1435
1436 ! parameter 1 original left
1437 ! parameter 2 original right
1438 ! parameter 3 left ip
1439 ! parameter 4 right ip
1440 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1441 ! 2: mov in4 to in3
1442 !
1443 ! also adds -8 to length in2 and loads loop counter to out4
1444
1445 fp_ip_macro(out0, out1, global3, global4, 2)
1446
1447 store_little_endian(in1, out0, out1, local3, .SLE10) ! block
1448
1449 ld [in3], out0 ! key 7531 first round next block
1450 mov in5, local1
1451 xor global3, out5, in5 ! iv xor next block
1452
1453 ld [in3+4], out1 ! key 8642
1454 add global1, 512, global3 ! address sbox 3 since register used
1455 xor global4, local1, out5 ! iv xor next block
1456
f22e1e4d 1457 ba .ncbc.enc.next.block_2
478b50cf 1458 add in1, 8, in1 ! output address
e0d769ca
AP
1459
1460.ncbc.enc.next.block_fp:
1461
1462 fp_macro(in5, out5)
1463
1464 store_little_endian(in1, in5, out5, local3, .SLE1) ! block
1465
1466 addcc in2, -8, in2 ! bytes missing when next block done
1467
f22e1e4d 1468 bpos .ncbc.enc.next.block
e0d769ca
AP
1469 add in1, 8, in1
1470
1471.ncbc.enc.seven.or.less:
1472
1473 cmp in2, -8
1474
f22e1e4d 1475 ble .ncbc.enc.finish
e0d769ca
AP
1476 nop
1477
1478 add in2, 8, local1 ! bytes to load
1479
1480 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1481 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
1482
1483 ! Loads 1 to 7 bytes little endian to global4, out4
1484
1485
1486.ncbc.enc.finish:
1487
f22e1e4d 1488 LDPTR IVEC, local4
e0d769ca
AP
1489 store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
1490
f22e1e4d
AP
1491 ret
1492 restore
e0d769ca
AP
1493
1494
1495.ncbc.dec:
1496
f22e1e4d 1497 STPTR in0, INPUT
e0d769ca
AP
1498 cmp in2, 0 ! length
1499 add in3, 120, in3
1500
f22e1e4d 1501 LDPTR IVEC, local7 ! ivec
f22e1e4d 1502 ble .ncbc.dec.finish
e0d769ca
AP
1503 mov in3, in4 ! schedule
1504
f22e1e4d 1505 STPTR in1, OUTPUT
e0d769ca
AP
1506 mov in0, local5 ! input
1507
1508 load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
1509
1510.ncbc.dec.next.block:
1511
1512 load_little_endian(local5, in5, out5, local3, .LLE4) ! block
1513
1514 ! parameter 6 1/2 for include encryption/decryption
1515 ! parameter 7 1 for mov in1 to in3
1516 ! parameter 8 1 for mov in3 to in4
1517
46f4e1be 1518 ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption ks in4
e0d769ca
AP
1519
1520 fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
1521
1522 ! in2 is bytes left to be stored
1523 ! in2 is compared to 8 in the rounds
1524
1525 xor out5, in0, out4 ! iv xor
f22e1e4d 1526 bl .ncbc.dec.seven.or.less
e0d769ca
AP
1527 xor in5, in1, global4 ! iv xor
1528
1529 ! Load ivec next block now, since input and output address might be the same.
1530
1531 load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
1532
1533 store_little_endian(local7, out4, global4, local3, .SLE3)
1534
f22e1e4d 1535 STPTR local5, INPUT
e0d769ca
AP
1536 add local7, 8, local7
1537 addcc in2, -8, in2
1538
f22e1e4d 1539 bg .ncbc.dec.next.block
f22e1e4d 1540 STPTR local7, OUTPUT
e0d769ca
AP
1541
1542
1543.ncbc.dec.store.iv:
1544
f22e1e4d 1545 LDPTR IVEC, local4 ! ivec
e0d769ca
AP
1546 store_little_endian(local4, in0, in1, local5, .SLE4)
1547
1548.ncbc.dec.finish:
1549
f22e1e4d
AP
1550 ret
1551 restore
e0d769ca
AP
1552
1553.ncbc.dec.seven.or.less:
1554
1555 load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
1556
1557 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
1558
1559
f22e1e4d
AP
1560.DES_ncbc_encrypt.end:
1561 .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
e0d769ca
AP
1562
1563
46f4e1be 1564! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc)
e0d769ca
AP
1565! **************************************************************************
1566
1567
1568 .align 32
f22e1e4d
AP
1569 .global DES_ede3_cbc_encrypt
1570 .type DES_ede3_cbc_encrypt,#function
e0d769ca 1571
f22e1e4d 1572DES_ede3_cbc_encrypt:
e0d769ca 1573
f22e1e4d 1574 save %sp, FRAME, %sp
e0d769ca 1575
f22e1e4d
AP
1576 define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
1577 define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
1578 define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
e0d769ca 1579
4c78bc05
AP
1580 sethi %hi(.PIC.DES_SPtrans-1f),global1
1581 or global1,%lo(.PIC.DES_SPtrans-1f),global1
15821: call .+8
1583 add %o7,global1,global1
1584 sub global1,.PIC.DES_SPtrans-.des_and,out2
e0d769ca 1585
f22e1e4d
AP
1586 LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
1587 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
e0d769ca
AP
1588 cmp local3, 0 ! enc
1589
719122c7 1590 be .ede3.dec
f22e1e4d 1591 STPTR in4, KS2
e0d769ca 1592
f22e1e4d 1593 STPTR in5, KS3
e0d769ca
AP
1594
1595 load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
1596
1597 addcc in2, -8, in2 ! bytes missing after next block
1598
f22e1e4d 1599 bl .ede3.enc.seven.or.less
f22e1e4d 1600 STPTR in3, KS1
e0d769ca
AP
1601
1602.ede3.enc.next.block:
1603
1604 load_little_endian(in0, out4, global4, local3, .LLE7)
1605
1606.ede3.enc.next.block_1:
1607
f22e1e4d 1608 LDPTR KS2, in4
e0d769ca
AP
1609 xor in5, out4, in5 ! iv xor
1610 xor out5, global4, out5 ! iv xor
1611
f22e1e4d 1612 LDPTR KS1, in3
e0d769ca
AP
1613 add in4, 120, in4 ! for decryption we use last subkey first
1614 nop
1615
1616 ip_macro(in5, out5, in5, out5, in3)
1617
1618.ede3.enc.next.block_2:
1619
1620 call .des_enc ! ks1 in3
1621 nop
1622
1623 call .des_dec ! ks2 in4
f22e1e4d 1624 LDPTR KS3, in3
e0d769ca
AP
1625
1626 call .des_enc ! ks3 in3 compares in2 to 8
1627 nop
1628
f22e1e4d 1629 bl .ede3.enc.next.block_fp
e0d769ca
AP
1630 add in0, 8, in0
1631
1632 ! If 8 or more bytes are to be encrypted after this block,
1633 ! we combine final permutation for this block with initial
1634 ! permutation for next block. Load next block:
1635
1636 load_little_endian(in0, global3, global4, local5, .LLE11)
1637
1638 ! parameter 1 original left
1639 ! parameter 2 original right
1640 ! parameter 3 left ip
1641 ! parameter 4 right ip
1642 ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
1643 ! 2: mov in4 to in3
1644 !
1645 ! also adds -8 to length in2 and loads loop counter to out4
1646
1647 fp_ip_macro(out0, out1, global3, global4, 1)
1648
1649 store_little_endian(in1, out0, out1, local3, .SLE9) ! block
1650
1651 mov in5, local1
1652 xor global3, out5, in5 ! iv xor next block
1653
1654 ld [in3], out0 ! key 7531
1655 add global1, 512, global3 ! address sbox 3
1656 xor global4, local1, out5 ! iv xor next block
1657
1658 ld [in3+4], out1 ! key 8642
1659 add global1, 768, global4 ! address sbox 4
f22e1e4d 1660 ba .ede3.enc.next.block_2
e0d769ca
AP
1661 add in1, 8, in1
1662
1663.ede3.enc.next.block_fp:
1664
1665 fp_macro(in5, out5)
1666
1667 store_little_endian(in1, in5, out5, local3, .SLE5) ! block
1668
1669 addcc in2, -8, in2 ! bytes missing when next block done
1670
f22e1e4d 1671 bpos .ede3.enc.next.block
e0d769ca
AP
1672 add in1, 8, in1
1673
1674.ede3.enc.seven.or.less:
1675
1676 cmp in2, -8
1677
f22e1e4d 1678 ble .ede3.enc.finish
e0d769ca
AP
1679 nop
1680
1681 add in2, 8, local1 ! bytes to load
1682
1683 ! addr, length, dest left, dest right, temp, temp2, label, ret label
1684 load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
1685
1686.ede3.enc.finish:
1687
f22e1e4d 1688 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
e0d769ca
AP
1689 store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
1690
f22e1e4d
AP
1691 ret
1692 restore
e0d769ca
AP
1693
1694.ede3.dec:
1695
f22e1e4d 1696 STPTR in0, INPUT
e0d769ca
AP
1697 add in5, 120, in5
1698
f22e1e4d 1699 STPTR in1, OUTPUT
e0d769ca
AP
1700 mov in0, local5
1701 add in3, 120, in3
1702
f22e1e4d 1703 STPTR in3, KS1
e0d769ca
AP
1704 cmp in2, 0
1705
f22e1e4d 1706 ble .ede3.dec.finish
f22e1e4d 1707 STPTR in5, KS3
e0d769ca 1708
f22e1e4d 1709 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
e0d769ca
AP
1710 load_little_endian(local7, in0, in1, local3, .LLE8)
1711
1712.ede3.dec.next.block:
1713
1714 load_little_endian(local5, in5, out5, local3, .LLE9)
1715
1716 ! parameter 6 1/2 for include encryption/decryption
1717 ! parameter 7 1 for mov in1 to in3
1718 ! parameter 8 1 for mov in3 to in4
1719 ! parameter 9 1 for load ks3 and ks2 to in4 and in3
1720
1721 ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
1722
1723 call .des_enc ! ks2 in3
f22e1e4d 1724 LDPTR KS1, in4
e0d769ca
AP
1725
1726 call .des_dec ! ks1 in4
1727 nop
1728
1729 fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
1730
1731 ! in2 is bytes left to be stored
1732 ! in2 is compared to 8 in the rounds
1733
1734 xor out5, in0, out4
f22e1e4d 1735 bl .ede3.dec.seven.or.less
e0d769ca
AP
1736 xor in5, in1, global4
1737
1738 load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
1739
1740 store_little_endian(local7, out4, global4, local3, .SLE7) ! block
1741
f22e1e4d 1742 STPTR local5, INPUT
e0d769ca
AP
1743 addcc in2, -8, in2
1744 add local7, 8, local7
1745
f22e1e4d 1746 bg .ede3.dec.next.block
f22e1e4d 1747 STPTR local7, OUTPUT
e0d769ca
AP
1748
1749.ede3.dec.store.iv:
1750
f22e1e4d 1751 LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
e0d769ca
AP
1752 store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
1753
1754.ede3.dec.finish:
1755
f22e1e4d
AP
1756 ret
1757 restore
e0d769ca
AP
1758
1759.ede3.dec.seven.or.less:
1760
1761 load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
1762
1763 store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
1764
1765
f22e1e4d
AP
1766.DES_ede3_cbc_encrypt.end:
1767 .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
70532b7d
AP
1768
1769 .align 256
1770 .type .des_and,#object
1771 .size .des_and,284
1772
1773.des_and:
1774
1775! This table is used for AND 0xFC when it is known that register
1776! bits 8-31 are zero. Makes it possible to do three arithmetic
1777! operations in one cycle.
1778
1779 .byte 0, 0, 0, 0, 4, 4, 4, 4
1780 .byte 8, 8, 8, 8, 12, 12, 12, 12
1781 .byte 16, 16, 16, 16, 20, 20, 20, 20
1782 .byte 24, 24, 24, 24, 28, 28, 28, 28
1783 .byte 32, 32, 32, 32, 36, 36, 36, 36
1784 .byte 40, 40, 40, 40, 44, 44, 44, 44
1785 .byte 48, 48, 48, 48, 52, 52, 52, 52
1786 .byte 56, 56, 56, 56, 60, 60, 60, 60
1787 .byte 64, 64, 64, 64, 68, 68, 68, 68
1788 .byte 72, 72, 72, 72, 76, 76, 76, 76
1789 .byte 80, 80, 80, 80, 84, 84, 84, 84
1790 .byte 88, 88, 88, 88, 92, 92, 92, 92
1791 .byte 96, 96, 96, 96, 100, 100, 100, 100
1792 .byte 104, 104, 104, 104, 108, 108, 108, 108
1793 .byte 112, 112, 112, 112, 116, 116, 116, 116
1794 .byte 120, 120, 120, 120, 124, 124, 124, 124
1795 .byte 128, 128, 128, 128, 132, 132, 132, 132
1796 .byte 136, 136, 136, 136, 140, 140, 140, 140
1797 .byte 144, 144, 144, 144, 148, 148, 148, 148
1798 .byte 152, 152, 152, 152, 156, 156, 156, 156
1799 .byte 160, 160, 160, 160, 164, 164, 164, 164
1800 .byte 168, 168, 168, 168, 172, 172, 172, 172
1801 .byte 176, 176, 176, 176, 180, 180, 180, 180
1802 .byte 184, 184, 184, 184, 188, 188, 188, 188
1803 .byte 192, 192, 192, 192, 196, 196, 196, 196
1804 .byte 200, 200, 200, 200, 204, 204, 204, 204
1805 .byte 208, 208, 208, 208, 212, 212, 212, 212
1806 .byte 216, 216, 216, 216, 220, 220, 220, 220
1807 .byte 224, 224, 224, 224, 228, 228, 228, 228
1808 .byte 232, 232, 232, 232, 236, 236, 236, 236
1809 .byte 240, 240, 240, 240, 244, 244, 244, 244
1810 .byte 248, 248, 248, 248, 252, 252, 252, 252
1811
46f4e1be 1812 ! 5 numbers for initial/final permutation
70532b7d
AP
1813
1814 .word 0x0f0f0f0f ! offset 256
1815 .word 0x0000ffff ! 260
1816 .word 0x33333333 ! 264
1817 .word 0x00ff00ff ! 268
1818 .word 0x55555555 ! 272
1819
1820 .word 0 ! 276
1821 .word LOOPS ! 280
1822 .word 0x0000FC00 ! 284
1823
1824 .global DES_SPtrans
1825 .type DES_SPtrans,#object
1826 .size DES_SPtrans,2048
1827.align 64
1828DES_SPtrans:
1829.PIC.DES_SPtrans:
1830 ! nibble 0
1831 .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
1832 .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
1833 .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
1834 .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
1835 .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
1836 .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
1837 .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
1838 .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
1839 .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
1840 .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
1841 .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
1842 .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
1843 .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
1844 .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
1845 .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
1846 .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
1847 ! nibble 1
1848 .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
1849 .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
1850 .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
1851 .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
1852 .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
1853 .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
1854 .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
1855 .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
1856 .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
1857 .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
1858 .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
1859 .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
1860 .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
1861 .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
1862 .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
1863 .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
1864 ! nibble 2
1865 .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
1866 .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
1867 .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
1868 .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
1869 .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
1870 .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
1871 .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
1872 .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
1873 .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
1874 .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
1875 .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
1876 .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
1877 .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
1878 .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
1879 .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
1880 .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
1881 ! nibble 3
1882 .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
1883 .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
1884 .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
1885 .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
1886 .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
1887 .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
1888 .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
1889 .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
1890 .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
1891 .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
1892 .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
1893 .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
1894 .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
1895 .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
1896 .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
1897 .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
1898 ! nibble 4
1899 .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
1900 .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
1901 .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
1902 .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
1903 .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
1904 .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
1905 .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
1906 .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
1907 .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
1908 .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
1909 .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
1910 .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
1911 .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
1912 .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
1913 .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
1914 .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
1915 ! nibble 5
1916 .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
1917 .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
1918 .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
1919 .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
1920 .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
1921 .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
1922 .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
1923 .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
1924 .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
1925 .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
1926 .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
1927 .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
1928 .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
1929 .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
1930 .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
1931 .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
1932 ! nibble 6
1933 .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
1934 .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
1935 .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
1936 .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
1937 .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
1938 .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
1939 .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
1940 .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
1941 .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
1942 .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
1943 .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
1944 .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
1945 .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
1946 .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
1947 .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
1948 .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
1949 ! nibble 7
1950 .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
1951 .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
1952 .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
1953 .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
1954 .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
1955 .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
1956 .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
1957 .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
1958 .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
1959 .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
1960 .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
1961 .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
1962 .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
1963 .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
1964 .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
1965 .word 0x20000000, 0x20800080, 0x00020000, 0x00820080
1966