]>
Commit | Line | Data |
---|---|---|
48e5119a | 1 | ! Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved. |
e0d769ca | 2 | ! |
2d48d5dd | 3 | ! Licensed under the Apache License 2.0 (the "License"). You may not use |
44c8a5e2 RS |
4 | ! this file except in compliance with the License. You can obtain a copy |
5 | ! in the file LICENSE in the source distribution or at | |
6 | ! https://www.openssl.org/source/license.html | |
e0d769ca AP |
7 | ! |
8 | ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S | |
9 | ! | |
10 | ! Global registers 1 to 5 are used. This is the same as done by the | |
11 | ! cc compiler. The UltraSPARC load/store little endian feature is used. | |
12 | ! | |
13 | ! Instruction grouping often refers to one CPU cycle. | |
14 | ! | |
15 | ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S | |
16 | ! | |
17 | ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S | |
f22e1e4d AP |
18 | ! |
19 | ! Performance improvement according to './apps/openssl speed des' | |
20 | ! | |
21 | ! 32-bit build: | |
22 | ! 23% faster than cc-5.2 -xarch=v8plus -xO5 | |
23 | ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5 | |
24 | ! 64-bit build: | |
25 | ! 50% faster than cc-5.2 -xarch=v9 -xO5 | |
26 | ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5 | |
27 | ! | |
e0d769ca | 28 | |
45771abb | 29 | .ident "des_enc.m4 2.1" |
70532b7d | 30 | .file "des_enc-sparc.S" |
f22e1e4d AP |
31 | |
32 | #if defined(__SUNPRO_C) && defined(__sparcv9) | |
33 | # define ABI64 /* They've said -xarch=v9 at command line */ | |
34 | #elif defined(__GNUC__) && defined(__arch64__) | |
35 | # define ABI64 /* They've said -m64 at command line */ | |
36 | #endif | |
37 | ||
38 | #ifdef ABI64 | |
39 | .register %g2,#scratch | |
40 | .register %g3,#scratch | |
41 | # define FRAME -192 | |
42 | # define BIAS 2047 | |
43 | # define LDPTR ldx | |
44 | # define STPTR stx | |
45 | # define ARG0 128 | |
46 | # define ARGSZ 8 | |
f22e1e4d AP |
47 | #else |
48 | # define FRAME -96 | |
49 | # define BIAS 0 | |
50 | # define LDPTR ld | |
51 | # define STPTR st | |
52 | # define ARG0 68 | |
53 | # define ARGSZ 4 | |
54 | #endif | |
e0d769ca AP |
55 | |
56 | #define LOOPS 7 | |
57 | ||
58 | #define global0 %g0 | |
59 | #define global1 %g1 | |
60 | #define global2 %g2 | |
61 | #define global3 %g3 | |
62 | #define global4 %g4 | |
63 | #define global5 %g5 | |
64 | ||
65 | #define local0 %l0 | |
66 | #define local1 %l1 | |
67 | #define local2 %l2 | |
68 | #define local3 %l3 | |
69 | #define local4 %l4 | |
70 | #define local5 %l5 | |
71 | #define local7 %l6 | |
72 | #define local6 %l7 | |
73 | ||
74 | #define in0 %i0 | |
75 | #define in1 %i1 | |
76 | #define in2 %i2 | |
77 | #define in3 %i3 | |
78 | #define in4 %i4 | |
79 | #define in5 %i5 | |
80 | #define in6 %i6 | |
81 | #define in7 %i7 | |
82 | ||
83 | #define out0 %o0 | |
84 | #define out1 %o1 | |
85 | #define out2 %o2 | |
86 | #define out3 %o3 | |
87 | #define out4 %o4 | |
88 | #define out5 %o5 | |
89 | #define out6 %o6 | |
90 | #define out7 %o7 | |
91 | ||
72997517 | 92 | #define stub stb |
e0d769ca AP |
93 | |
94 | changequote({,}) | |
95 | ||
96 | ||
97 | ! Macro definitions: | |
98 | ||
99 | ||
100 | ! {ip_macro} | |
101 | ! | |
102 | ! The logic used in initial and final permutations is the same as in | |
103 | ! the C code. The permutations are done with a clever shift, xor, and | |
104 | ! technique. | |
105 | ! | |
106 | ! The macro also loads address sbox 1 to 5 to global 1 to 5, address | |
107 | ! sbox 6 to local6, and addres sbox 8 to out3. | |
108 | ! | |
109 | ! Rotates the halfs 3 left to bring the sbox bits in convenient positions. | |
110 | ! | |
111 | ! Loads key first round from address in parameter 5 to out0, out1. | |
112 | ! | |
436ad81f | 113 | ! After the original LibDES initial permutation, the resulting left |
e0d769ca AP |
114 | ! is in the variable initially used for right and vice versa. The macro |
115 | ! implements the possibility to keep the halfs in the original registers. | |
116 | ! | |
117 | ! parameter 1 left | |
118 | ! parameter 2 right | |
119 | ! parameter 3 result left (modify in first round) | |
120 | ! parameter 4 result right (use in first round) | |
121 | ! parameter 5 key address | |
122 | ! parameter 6 1/2 for include encryption/decryption | |
123 | ! parameter 7 1 for move in1 to in3 | |
124 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | |
125 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | |
126 | ||
127 | define(ip_macro, { | |
128 | ||
129 | ! {ip_macro} | |
130 | ! $1 $2 $4 $3 $5 $6 $7 $8 $9 | |
131 | ||
132 | ld [out2+256], local1 | |
133 | srl $2, 4, local4 | |
134 | ||
135 | xor local4, $1, local4 | |
136 | ifelse($7,1,{mov in1, in3},{nop}) | |
137 | ||
138 | ld [out2+260], local2 | |
139 | and local4, local1, local4 | |
140 | ifelse($8,1,{mov in3, in4},{}) | |
141 | ifelse($8,2,{mov in4, in3},{}) | |
142 | ||
143 | ld [out2+280], out4 ! loop counter | |
144 | sll local4, 4, local1 | |
145 | xor $1, local4, $1 | |
146 | ||
147 | ld [out2+264], local3 | |
148 | srl $1, 16, local4 | |
149 | xor $2, local1, $2 | |
150 | ||
f22e1e4d | 151 | ifelse($9,1,{LDPTR KS3, in4},{}) |
e0d769ca | 152 | xor local4, $2, local4 |
f22e1e4d | 153 | nop !sethi %hi(DES_SPtrans), global1 ! sbox addr |
e0d769ca | 154 | |
f22e1e4d | 155 | ifelse($9,1,{LDPTR KS2, in3},{}) |
e0d769ca | 156 | and local4, local2, local4 |
f22e1e4d | 157 | nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr |
e0d769ca AP |
158 | |
159 | sll local4, 16, local1 | |
160 | xor $2, local4, $2 | |
161 | ||
162 | srl $2, 2, local4 | |
163 | xor $1, local1, $1 | |
164 | ||
165 | sethi %hi(16711680), local5 | |
166 | xor local4, $1, local4 | |
167 | ||
168 | and local4, local3, local4 | |
169 | or local5, 255, local5 | |
170 | ||
171 | sll local4, 2, local2 | |
172 | xor $1, local4, $1 | |
173 | ||
174 | srl $1, 8, local4 | |
175 | xor $2, local2, $2 | |
176 | ||
177 | xor local4, $2, local4 | |
178 | add global1, 768, global4 | |
179 | ||
180 | and local4, local5, local4 | |
181 | add global1, 1024, global5 | |
182 | ||
183 | ld [out2+272], local7 | |
184 | sll local4, 8, local1 | |
185 | xor $2, local4, $2 | |
186 | ||
187 | srl $2, 1, local4 | |
188 | xor $1, local1, $1 | |
189 | ||
190 | ld [$5], out0 ! key 7531 | |
191 | xor local4, $1, local4 | |
192 | add global1, 256, global2 | |
193 | ||
194 | ld [$5+4], out1 ! key 8642 | |
195 | and local4, local7, local4 | |
196 | add global1, 512, global3 | |
197 | ||
198 | sll local4, 1, local1 | |
199 | xor $1, local4, $1 | |
200 | ||
201 | sll $1, 3, local3 | |
202 | xor $2, local1, $2 | |
203 | ||
204 | sll $2, 3, local2 | |
205 | add global1, 1280, local6 ! address sbox 8 | |
206 | ||
207 | srl $1, 29, local4 | |
208 | add global1, 1792, out3 ! address sbox 8 | |
209 | ||
210 | srl $2, 29, local1 | |
211 | or local4, local3, $4 | |
212 | ||
213 | or local2, local1, $3 | |
214 | ||
215 | ifelse($6, 1, { | |
216 | ||
217 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds | |
218 | or local2, local1, $3 | |
219 | xor $4, out0, local1 | |
220 | ||
221 | call .des_enc.1 | |
222 | and local1, 252, local1 | |
223 | ||
224 | },{}) | |
225 | ||
226 | ifelse($6, 2, { | |
227 | ||
228 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds | |
229 | or local2, local1, $3 | |
230 | xor $4, out0, local1 | |
231 | ||
232 | call .des_dec.1 | |
233 | and local1, 252, local1 | |
234 | ||
235 | },{}) | |
236 | }) | |
237 | ||
238 | ||
239 | ! {rounds_macro} | |
240 | ! | |
241 | ! The logic used in the DES rounds is the same as in the C code, | |
242 | ! except that calculations for sbox 1 and sbox 5 begin before | |
243 | ! the previous round is finished. | |
244 | ! | |
245 | ! In each round one half (work) is modified based on key and the | |
246 | ! other half (use). | |
247 | ! | |
248 | ! In this version we do two rounds in a loop repeated 7 times | |
478b50cf | 249 | ! and two rounds separately. |
e0d769ca AP |
250 | ! |
251 | ! One half has the bits for the sboxes in the following positions: | |
252 | ! | |
253 | ! 777777xx555555xx333333xx111111xx | |
254 | ! | |
255 | ! 88xx666666xx444444xx222222xx8888 | |
256 | ! | |
257 | ! The bits for each sbox are xor-ed with the key bits for that box. | |
258 | ! The above xx bits are cleared, and the result used for lookup in | |
259 | ! the sbox table. Each sbox entry contains the 4 output bits permuted | |
260 | ! into 32 bits according to the P permutation. | |
261 | ! | |
262 | ! In the description of DES, left and right are switched after | |
263 | ! each round, except after last round. In this code the original | |
264 | ! left and right are kept in the same register in all rounds, meaning | |
265 | ! that after the 16 rounds the result for right is in the register | |
266 | ! originally used for left. | |
267 | ! | |
268 | ! parameter 1 first work (left in first round) | |
269 | ! parameter 2 first use (right in first round) | |
270 | ! parameter 3 enc/dec 1/-1 | |
271 | ! parameter 4 loop label | |
272 | ! parameter 5 key address register | |
273 | ! parameter 6 optional address for key next encryption/decryption | |
274 | ! parameter 7 not empty for include retl | |
275 | ! | |
276 | ! also compares in2 to 8 | |
277 | ||
278 | define(rounds_macro, { | |
279 | ||
280 | ! {rounds_macro} | |
281 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | |
282 | ||
283 | xor $2, out0, local1 | |
284 | ||
285 | ld [out2+284], local5 ! 0x0000FC00 | |
f22e1e4d | 286 | ba $4 |
e0d769ca AP |
287 | and local1, 252, local1 |
288 | ||
289 | .align 32 | |
290 | ||
291 | $4: | |
292 | ! local6 is address sbox 6 | |
293 | ! out3 is address sbox 8 | |
294 | ! out4 is loop counter | |
295 | ||
296 | ld [global1+local1], local1 | |
297 | xor $2, out1, out1 ! 8642 | |
298 | xor $2, out0, out0 ! 7531 | |
b884556e | 299 | ! fmovs %f0, %f0 ! fxor used for alignment |
e0d769ca AP |
300 | |
301 | srl out1, 4, local0 ! rotate 4 right | |
302 | and out0, local5, local3 ! 3 | |
b884556e | 303 | ! fmovs %f0, %f0 |
e0d769ca AP |
304 | |
305 | ld [$5+$3*8], local7 ! key 7531 next round | |
306 | srl local3, 8, local3 ! 3 | |
307 | and local0, 252, local2 ! 2 | |
b884556e | 308 | ! fmovs %f0, %f0 |
e0d769ca AP |
309 | |
310 | ld [global3+local3],local3 ! 3 | |
311 | sll out1, 28, out1 ! rotate | |
312 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7 | |
313 | ||
df443918 | 314 | ld [global2+local2], local2 ! 2 |
e0d769ca AP |
315 | srl out0, 24, local1 ! 7 |
316 | or out1, local0, out1 ! rotate | |
317 | ||
318 | ldub [out2+local1], local1 ! 7 (and 0xFC) | |
319 | srl out1, 24, local0 ! 8 | |
320 | and out1, local5, local4 ! 4 | |
321 | ||
322 | ldub [out2+local0], local0 ! 8 (and 0xFC) | |
323 | srl local4, 8, local4 ! 4 | |
324 | xor $1, local2, $1 ! 2 finished local2 now sbox 6 | |
325 | ||
326 | ld [global4+local4],local4 ! 4 | |
327 | srl out1, 16, local2 ! 6 | |
328 | xor $1, local3, $1 ! 3 finished local3 now sbox 5 | |
329 | ||
330 | ld [out3+local0],local0 ! 8 | |
331 | and local2, 252, local2 ! 6 | |
332 | add global1, 1536, local5 ! address sbox 7 | |
333 | ||
334 | ld [local6+local2], local2 ! 6 | |
335 | srl out0, 16, local3 ! 5 | |
336 | xor $1, local4, $1 ! 4 finished | |
337 | ||
338 | ld [local5+local1],local1 ! 7 | |
339 | and local3, 252, local3 ! 5 | |
340 | xor $1, local0, $1 ! 8 finished | |
341 | ||
342 | ld [global5+local3],local3 ! 5 | |
343 | xor $1, local2, $1 ! 6 finished | |
344 | subcc out4, 1, out4 | |
345 | ||
346 | ld [$5+$3*8+4], out0 ! key 8642 next round | |
347 | xor $1, local7, local2 ! sbox 5 next round | |
348 | xor $1, local1, $1 ! 7 finished | |
349 | ||
350 | srl local2, 16, local2 ! sbox 5 next round | |
351 | xor $1, local3, $1 ! 5 finished | |
352 | ||
353 | ld [$5+$3*16+4], out1 ! key 8642 next round again | |
354 | and local2, 252, local2 ! sbox5 next round | |
355 | ! next round | |
356 | xor $1, local7, local7 ! 7531 | |
357 | ||
358 | ld [global5+local2], local2 ! 5 | |
359 | srl local7, 24, local3 ! 7 | |
360 | xor $1, out0, out0 ! 8642 | |
361 | ||
362 | ldub [out2+local3], local3 ! 7 (and 0xFC) | |
363 | srl out0, 4, local0 ! rotate 4 right | |
364 | and local7, 252, local1 ! 1 | |
365 | ||
366 | sll out0, 28, out0 ! rotate | |
367 | xor $2, local2, $2 ! 5 finished local2 used | |
368 | ||
369 | srl local0, 8, local4 ! 4 | |
370 | and local0, 252, local2 ! 2 | |
371 | ld [local5+local3], local3 ! 7 | |
372 | ||
373 | srl local0, 16, local5 ! 6 | |
374 | or out0, local0, out0 ! rotate | |
375 | ld [global2+local2], local2 ! 2 | |
376 | ||
377 | srl out0, 24, local0 | |
378 | ld [$5+$3*16], out0 ! key 7531 next round | |
379 | and local4, 252, local4 ! 4 | |
380 | ||
381 | and local5, 252, local5 ! 6 | |
382 | ld [global4+local4], local4 ! 4 | |
383 | xor $2, local3, $2 ! 7 finished local3 used | |
384 | ||
385 | and local0, 252, local0 ! 8 | |
386 | ld [local6+local5], local5 ! 6 | |
387 | xor $2, local2, $2 ! 2 finished local2 now sbox 3 | |
388 | ||
389 | srl local7, 8, local2 ! 3 start | |
390 | ld [out3+local0], local0 ! 8 | |
391 | xor $2, local4, $2 ! 4 finished | |
392 | ||
393 | and local2, 252, local2 ! 3 | |
394 | ld [global1+local1], local1 ! 1 | |
395 | xor $2, local5, $2 ! 6 finished local5 used | |
396 | ||
397 | ld [global3+local2], local2 ! 3 | |
398 | xor $2, local0, $2 ! 8 finished | |
399 | add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer | |
400 | ||
401 | ld [out2+284], local5 ! 0x0000FC00 | |
402 | xor $2, out0, local4 ! sbox 1 next round | |
403 | xor $2, local1, $2 ! 1 finished | |
404 | ||
405 | xor $2, local2, $2 ! 3 finished | |
f22e1e4d | 406 | bne $4 |
e0d769ca AP |
407 | and local4, 252, local1 ! sbox 1 next round |
408 | ||
409 | ! two rounds more: | |
410 | ||
411 | ld [global1+local1], local1 | |
412 | xor $2, out1, out1 | |
413 | xor $2, out0, out0 | |
414 | ||
415 | srl out1, 4, local0 ! rotate | |
416 | and out0, local5, local3 | |
417 | ||
418 | ld [$5+$3*8], local7 ! key 7531 | |
419 | srl local3, 8, local3 | |
420 | and local0, 252, local2 | |
421 | ||
422 | ld [global3+local3],local3 | |
423 | sll out1, 28, out1 ! rotate | |
424 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7 | |
425 | ||
426 | ld [global2+local2], local2 | |
427 | srl out0, 24, local1 | |
428 | or out1, local0, out1 ! rotate | |
429 | ||
430 | ldub [out2+local1], local1 | |
431 | srl out1, 24, local0 | |
432 | and out1, local5, local4 | |
433 | ||
434 | ldub [out2+local0], local0 | |
435 | srl local4, 8, local4 | |
436 | xor $1, local2, $1 ! 2 finished local2 now sbox 6 | |
437 | ||
438 | ld [global4+local4],local4 | |
439 | srl out1, 16, local2 | |
440 | xor $1, local3, $1 ! 3 finished local3 now sbox 5 | |
441 | ||
442 | ld [out3+local0],local0 | |
443 | and local2, 252, local2 | |
444 | add global1, 1536, local5 ! address sbox 7 | |
445 | ||
446 | ld [local6+local2], local2 | |
447 | srl out0, 16, local3 | |
448 | xor $1, local4, $1 ! 4 finished | |
449 | ||
450 | ld [local5+local1],local1 | |
451 | and local3, 252, local3 | |
452 | xor $1, local0, $1 | |
453 | ||
454 | ld [global5+local3],local3 | |
455 | xor $1, local2, $1 ! 6 finished | |
456 | cmp in2, 8 | |
457 | ||
458 | ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter | |
459 | xor $1, local7, local2 ! sbox 5 next round | |
460 | xor $1, local1, $1 ! 7 finished | |
461 | ||
462 | ld [$5+$3*8+4], out0 | |
463 | srl local2, 16, local2 ! sbox 5 next round | |
464 | xor $1, local3, $1 ! 5 finished | |
465 | ||
466 | and local2, 252, local2 | |
467 | ! next round (two rounds more) | |
468 | xor $1, local7, local7 ! 7531 | |
469 | ||
470 | ld [global5+local2], local2 | |
471 | srl local7, 24, local3 | |
472 | xor $1, out0, out0 ! 8642 | |
473 | ||
474 | ldub [out2+local3], local3 | |
475 | srl out0, 4, local0 ! rotate | |
476 | and local7, 252, local1 | |
477 | ||
478 | sll out0, 28, out0 ! rotate | |
479 | xor $2, local2, $2 ! 5 finished local2 used | |
480 | ||
481 | srl local0, 8, local4 | |
482 | and local0, 252, local2 | |
483 | ld [local5+local3], local3 | |
484 | ||
485 | srl local0, 16, local5 | |
486 | or out0, local0, out0 ! rotate | |
487 | ld [global2+local2], local2 | |
488 | ||
489 | srl out0, 24, local0 | |
490 | ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption | |
491 | and local4, 252, local4 | |
492 | ||
493 | and local5, 252, local5 | |
494 | ld [global4+local4], local4 | |
495 | xor $2, local3, $2 ! 7 finished local3 used | |
496 | ||
497 | and local0, 252, local0 | |
498 | ld [local6+local5], local5 | |
499 | xor $2, local2, $2 ! 2 finished local2 now sbox 3 | |
500 | ||
501 | srl local7, 8, local2 ! 3 start | |
502 | ld [out3+local0], local0 | |
503 | xor $2, local4, $2 | |
504 | ||
505 | and local2, 252, local2 | |
506 | ld [global1+local1], local1 | |
507 | xor $2, local5, $2 ! 6 finished local5 used | |
508 | ||
509 | ld [global3+local2], local2 | |
510 | srl $1, 3, local3 | |
511 | xor $2, local0, $2 | |
512 | ||
513 | ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption | |
514 | sll $1, 29, local4 | |
515 | xor $2, local1, $2 | |
516 | ||
517 | ifelse($7,{}, {}, {retl}) | |
518 | xor $2, local2, $2 | |
519 | }) | |
520 | ||
521 | ||
522 | ! {fp_macro} | |
523 | ! | |
524 | ! parameter 1 right (original left) | |
525 | ! parameter 2 left (original right) | |
526 | ! parameter 3 1 for optional store to [in0] | |
527 | ! parameter 4 1 for load input/output address to local5/7 | |
528 | ! | |
46f4e1be | 529 | ! The final permutation logic switches the halves, meaning that |
436ad81f | 530 | ! left and right ends up the registers originally used. |
e0d769ca AP |
531 | |
532 | define(fp_macro, { | |
533 | ||
534 | ! {fp_macro} | |
535 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | |
536 | ||
537 | ! initially undo the rotate 3 left done after initial permutation | |
538 | ! original left is received shifted 3 right and 29 left in local3/4 | |
539 | ||
540 | sll $2, 29, local1 | |
541 | or local3, local4, $1 | |
542 | ||
543 | srl $2, 3, $2 | |
544 | sethi %hi(0x55555555), local2 | |
545 | ||
546 | or $2, local1, $2 | |
547 | or local2, %lo(0x55555555), local2 | |
548 | ||
549 | srl $2, 1, local3 | |
550 | sethi %hi(0x00ff00ff), local1 | |
551 | xor local3, $1, local3 | |
552 | or local1, %lo(0x00ff00ff), local1 | |
553 | and local3, local2, local3 | |
554 | sethi %hi(0x33333333), local4 | |
555 | sll local3, 1, local2 | |
556 | ||
557 | xor $1, local3, $1 | |
558 | ||
559 | srl $1, 8, local3 | |
560 | xor $2, local2, $2 | |
561 | xor local3, $2, local3 | |
562 | or local4, %lo(0x33333333), local4 | |
563 | and local3, local1, local3 | |
564 | sethi %hi(0x0000ffff), local1 | |
565 | sll local3, 8, local2 | |
566 | ||
567 | xor $2, local3, $2 | |
568 | ||
569 | srl $2, 2, local3 | |
570 | xor $1, local2, $1 | |
571 | xor local3, $1, local3 | |
572 | or local1, %lo(0x0000ffff), local1 | |
573 | and local3, local4, local3 | |
574 | sethi %hi(0x0f0f0f0f), local4 | |
575 | sll local3, 2, local2 | |
576 | ||
f22e1e4d | 577 | ifelse($4,1, {LDPTR INPUT, local5}) |
e0d769ca AP |
578 | xor $1, local3, $1 |
579 | ||
f22e1e4d | 580 | ifelse($4,1, {LDPTR OUTPUT, local7}) |
e0d769ca AP |
581 | srl $1, 16, local3 |
582 | xor $2, local2, $2 | |
583 | xor local3, $2, local3 | |
584 | or local4, %lo(0x0f0f0f0f), local4 | |
585 | and local3, local1, local3 | |
586 | sll local3, 16, local2 | |
587 | ||
588 | xor $2, local3, local1 | |
589 | ||
590 | srl local1, 4, local3 | |
591 | xor $1, local2, $1 | |
592 | xor local3, $1, local3 | |
593 | and local3, local4, local3 | |
594 | sll local3, 4, local2 | |
595 | ||
596 | xor $1, local3, $1 | |
597 | ||
598 | ! optional store: | |
599 | ||
600 | ifelse($3,1, {st $1, [in0]}) | |
601 | ||
602 | xor local1, local2, $2 | |
603 | ||
604 | ifelse($3,1, {st $2, [in0+4]}) | |
605 | ||
606 | }) | |
607 | ||
608 | ||
609 | ! {fp_ip_macro} | |
610 | ! | |
611 | ! Does initial permutation for next block mixed with | |
612 | ! final permutation for current block. | |
613 | ! | |
614 | ! parameter 1 original left | |
615 | ! parameter 2 original right | |
616 | ! parameter 3 left ip | |
617 | ! parameter 4 right ip | |
618 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | |
619 | ! 2: mov in4 to in3 | |
620 | ! | |
621 | ! also adds -8 to length in2 and loads loop counter to out4 | |
622 | ||
623 | define(fp_ip_macro, { | |
624 | ||
625 | ! {fp_ip_macro} | |
626 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | |
627 | ||
628 | define({temp1},{out4}) | |
629 | define({temp2},{local3}) | |
630 | ||
631 | define({ip1},{local1}) | |
632 | define({ip2},{local2}) | |
633 | define({ip4},{local4}) | |
634 | define({ip5},{local5}) | |
635 | ||
636 | ! $1 in local3, local4 | |
637 | ||
638 | ld [out2+256], ip1 | |
639 | sll out5, 29, temp1 | |
640 | or local3, local4, $1 | |
641 | ||
642 | srl out5, 3, $2 | |
643 | ifelse($5,2,{mov in4, in3}) | |
644 | ||
645 | ld [out2+272], ip5 | |
646 | srl $4, 4, local0 | |
647 | or $2, temp1, $2 | |
648 | ||
649 | srl $2, 1, temp1 | |
650 | xor temp1, $1, temp1 | |
651 | ||
652 | and temp1, ip5, temp1 | |
653 | xor local0, $3, local0 | |
654 | ||
655 | sll temp1, 1, temp2 | |
656 | xor $1, temp1, $1 | |
657 | ||
658 | and local0, ip1, local0 | |
659 | add in2, -8, in2 | |
660 | ||
661 | sll local0, 4, local7 | |
662 | xor $3, local0, $3 | |
663 | ||
664 | ld [out2+268], ip4 | |
665 | srl $1, 8, temp1 | |
666 | xor $2, temp2, $2 | |
667 | ld [out2+260], ip2 | |
668 | srl $3, 16, local0 | |
669 | xor $4, local7, $4 | |
670 | xor temp1, $2, temp1 | |
671 | xor local0, $4, local0 | |
672 | and temp1, ip4, temp1 | |
673 | and local0, ip2, local0 | |
674 | sll temp1, 8, temp2 | |
675 | xor $2, temp1, $2 | |
676 | sll local0, 16, local7 | |
677 | xor $4, local0, $4 | |
678 | ||
679 | srl $2, 2, temp1 | |
680 | xor $1, temp2, $1 | |
681 | ||
682 | ld [out2+264], temp2 ! ip3 | |
683 | srl $4, 2, local0 | |
684 | xor $3, local7, $3 | |
685 | xor temp1, $1, temp1 | |
686 | xor local0, $3, local0 | |
687 | and temp1, temp2, temp1 | |
688 | and local0, temp2, local0 | |
689 | sll temp1, 2, temp2 | |
690 | xor $1, temp1, $1 | |
691 | sll local0, 2, local7 | |
692 | xor $3, local0, $3 | |
693 | ||
694 | srl $1, 16, temp1 | |
695 | xor $2, temp2, $2 | |
696 | srl $3, 8, local0 | |
697 | xor $4, local7, $4 | |
698 | xor temp1, $2, temp1 | |
699 | xor local0, $4, local0 | |
700 | and temp1, ip2, temp1 | |
701 | and local0, ip4, local0 | |
702 | sll temp1, 16, temp2 | |
703 | xor $2, temp1, local4 | |
704 | sll local0, 8, local7 | |
705 | xor $4, local0, $4 | |
706 | ||
707 | srl $4, 1, local0 | |
708 | xor $3, local7, $3 | |
709 | ||
710 | srl local4, 4, temp1 | |
711 | xor local0, $3, local0 | |
712 | ||
713 | xor $1, temp2, $1 | |
714 | and local0, ip5, local0 | |
715 | ||
716 | sll local0, 1, local7 | |
717 | xor temp1, $1, temp1 | |
718 | ||
719 | xor $3, local0, $3 | |
720 | xor $4, local7, $4 | |
721 | ||
722 | sll $3, 3, local5 | |
723 | and temp1, ip1, temp1 | |
724 | ||
725 | sll temp1, 4, temp2 | |
726 | xor $1, temp1, $1 | |
727 | ||
f22e1e4d | 728 | ifelse($5,1,{LDPTR KS2, in4}) |
e0d769ca AP |
729 | sll $4, 3, local2 |
730 | xor local4, temp2, $2 | |
731 | ||
46f4e1be | 732 | ! reload since used as temporary: |
e0d769ca AP |
733 | |
734 | ld [out2+280], out4 ! loop counter | |
735 | ||
736 | srl $3, 29, local0 | |
737 | ifelse($5,1,{add in4, 120, in4}) | |
738 | ||
f22e1e4d | 739 | ifelse($5,1,{LDPTR KS1, in3}) |
e0d769ca AP |
740 | srl $4, 29, local7 |
741 | ||
742 | or local0, local5, $4 | |
743 | or local2, local7, $3 | |
744 | ||
745 | }) | |
746 | ||
747 | ||
748 | ||
749 | ! {load_little_endian} | |
750 | ! | |
751 | ! parameter 1 address | |
752 | ! parameter 2 destination left | |
753 | ! parameter 3 destination right | |
46f4e1be | 754 | ! parameter 4 temporary |
e0d769ca AP |
755 | ! parameter 5 label |
756 | ||
757 | define(load_little_endian, { | |
758 | ||
759 | ! {load_little_endian} | |
760 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | |
761 | ||
762 | ! first in memory to rightmost in register | |
763 | ||
e0d769ca AP |
764 | $5: |
765 | ldub [$1+3], $2 | |
766 | ||
767 | ldub [$1+2], $4 | |
768 | sll $2, 8, $2 | |
769 | or $2, $4, $2 | |
770 | ||
771 | ldub [$1+1], $4 | |
772 | sll $2, 8, $2 | |
773 | or $2, $4, $2 | |
774 | ||
775 | ldub [$1+0], $4 | |
776 | sll $2, 8, $2 | |
777 | or $2, $4, $2 | |
778 | ||
779 | ||
780 | ldub [$1+3+4], $3 | |
781 | ||
782 | ldub [$1+2+4], $4 | |
783 | sll $3, 8, $3 | |
784 | or $3, $4, $3 | |
785 | ||
786 | ldub [$1+1+4], $4 | |
787 | sll $3, 8, $3 | |
788 | or $3, $4, $3 | |
789 | ||
790 | ldub [$1+0+4], $4 | |
791 | sll $3, 8, $3 | |
792 | or $3, $4, $3 | |
793 | $5a: | |
794 | ||
795 | }) | |
796 | ||
797 | ||
798 | ! {load_little_endian_inc} | |
799 | ! | |
800 | ! parameter 1 address | |
801 | ! parameter 2 destination left | |
802 | ! parameter 3 destination right | |
46f4e1be | 803 | ! parameter 4 temporary |
e0d769ca AP |
804 | ! parameter 4 label |
805 | ! | |
806 | ! adds 8 to address | |
807 | ||
808 | define(load_little_endian_inc, { | |
809 | ||
810 | ! {load_little_endian_inc} | |
811 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | |
812 | ||
813 | ! first in memory to rightmost in register | |
814 | ||
e0d769ca AP |
815 | $5: |
816 | ldub [$1+3], $2 | |
817 | ||
818 | ldub [$1+2], $4 | |
819 | sll $2, 8, $2 | |
820 | or $2, $4, $2 | |
821 | ||
822 | ldub [$1+1], $4 | |
823 | sll $2, 8, $2 | |
824 | or $2, $4, $2 | |
825 | ||
826 | ldub [$1+0], $4 | |
827 | sll $2, 8, $2 | |
828 | or $2, $4, $2 | |
829 | ||
830 | ldub [$1+3+4], $3 | |
831 | add $1, 8, $1 | |
832 | ||
833 | ldub [$1+2+4-8], $4 | |
834 | sll $3, 8, $3 | |
835 | or $3, $4, $3 | |
836 | ||
837 | ldub [$1+1+4-8], $4 | |
838 | sll $3, 8, $3 | |
839 | or $3, $4, $3 | |
840 | ||
841 | ldub [$1+0+4-8], $4 | |
842 | sll $3, 8, $3 | |
843 | or $3, $4, $3 | |
844 | $5a: | |
845 | ||
846 | }) | |
847 | ||
848 | ||
849 | ! {load_n_bytes} | |
850 | ! | |
851 | ! Loads 1 to 7 bytes little endian | |
852 | ! Remaining bytes are zeroed. | |
853 | ! | |
854 | ! parameter 1 address | |
855 | ! parameter 2 length | |
856 | ! parameter 3 destination register left | |
857 | ! parameter 4 destination register right | |
858 | ! parameter 5 temp | |
859 | ! parameter 6 temp2 | |
860 | ! parameter 7 label | |
861 | ! parameter 8 return label | |
862 | ||
863 | define(load_n_bytes, { | |
864 | ||
865 | ! {load_n_bytes} | |
866 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9 | |
867 | ||
f22e1e4d | 868 | $7.0: call .+8 |
e0d769ca AP |
869 | sll $2, 2, $6 |
870 | ||
f22e1e4d | 871 | add %o7,$7.jmp.table-$7.0,$5 |
e0d769ca AP |
872 | |
873 | add $5, $6, $5 | |
874 | mov 0, $4 | |
875 | ||
876 | ld [$5], $5 | |
877 | ||
f22e1e4d | 878 | jmp %o7+$5 |
e0d769ca AP |
879 | mov 0, $3 |
880 | ||
881 | $7.7: | |
882 | ldub [$1+6], $5 | |
883 | sll $5, 16, $5 | |
884 | or $3, $5, $3 | |
885 | $7.6: | |
886 | ldub [$1+5], $5 | |
887 | sll $5, 8, $5 | |
888 | or $3, $5, $3 | |
889 | $7.5: | |
890 | ldub [$1+4], $5 | |
891 | or $3, $5, $3 | |
892 | $7.4: | |
893 | ldub [$1+3], $5 | |
894 | sll $5, 24, $5 | |
895 | or $4, $5, $4 | |
896 | $7.3: | |
897 | ldub [$1+2], $5 | |
898 | sll $5, 16, $5 | |
899 | or $4, $5, $4 | |
900 | $7.2: | |
901 | ldub [$1+1], $5 | |
902 | sll $5, 8, $5 | |
903 | or $4, $5, $4 | |
904 | $7.1: | |
905 | ldub [$1+0], $5 | |
f22e1e4d | 906 | ba $8 |
e0d769ca AP |
907 | or $4, $5, $4 |
908 | ||
909 | .align 4 | |
910 | ||
911 | $7.jmp.table: | |
912 | .word 0 | |
f22e1e4d AP |
913 | .word $7.1-$7.0 |
914 | .word $7.2-$7.0 | |
915 | .word $7.3-$7.0 | |
916 | .word $7.4-$7.0 | |
917 | .word $7.5-$7.0 | |
918 | .word $7.6-$7.0 | |
919 | .word $7.7-$7.0 | |
e0d769ca AP |
920 | }) |
921 | ||
922 | ||
923 | ! {store_little_endian} | |
924 | ! | |
925 | ! parameter 1 address | |
926 | ! parameter 2 source left | |
927 | ! parameter 3 source right | |
46f4e1be | 928 | ! parameter 4 temporary |
e0d769ca AP |
929 | |
930 | define(store_little_endian, { | |
931 | ||
932 | ! {store_little_endian} | |
933 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9 | |
934 | ||
935 | ! rightmost in register to first in memory | |
936 | ||
e0d769ca AP |
937 | $5: |
938 | and $2, 255, $4 | |
939 | stub $4, [$1+0] | |
940 | ||
941 | srl $2, 8, $4 | |
942 | and $4, 255, $4 | |
943 | stub $4, [$1+1] | |
944 | ||
945 | srl $2, 16, $4 | |
946 | and $4, 255, $4 | |
947 | stub $4, [$1+2] | |
948 | ||
949 | srl $2, 24, $4 | |
950 | stub $4, [$1+3] | |
951 | ||
952 | ||
953 | and $3, 255, $4 | |
954 | stub $4, [$1+0+4] | |
955 | ||
956 | srl $3, 8, $4 | |
957 | and $4, 255, $4 | |
958 | stub $4, [$1+1+4] | |
959 | ||
960 | srl $3, 16, $4 | |
961 | and $4, 255, $4 | |
962 | stub $4, [$1+2+4] | |
963 | ||
964 | srl $3, 24, $4 | |
965 | stub $4, [$1+3+4] | |
966 | ||
967 | $5a: | |
968 | ||
969 | }) | |
970 | ||
971 | ||
972 | ! {store_n_bytes} | |
973 | ! | |
974 | ! Stores 1 to 7 bytes little endian | |
975 | ! | |
976 | ! parameter 1 address | |
977 | ! parameter 2 length | |
978 | ! parameter 3 source register left | |
979 | ! parameter 4 source register right | |
980 | ! parameter 5 temp | |
981 | ! parameter 6 temp2 | |
982 | ! parameter 7 label | |
983 | ! parameter 8 return label | |
984 | ||
985 | define(store_n_bytes, { | |
986 | ||
987 | ! {store_n_bytes} | |
988 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9 | |
989 | ||
f22e1e4d | 990 | $7.0: call .+8 |
e0d769ca AP |
991 | sll $2, 2, $6 |
992 | ||
f22e1e4d | 993 | add %o7,$7.jmp.table-$7.0,$5 |
e0d769ca AP |
994 | |
995 | add $5, $6, $5 | |
996 | ||
997 | ld [$5], $5 | |
f22e1e4d AP |
998 | |
999 | jmp %o7+$5 | |
e0d769ca AP |
1000 | nop |
1001 | ||
1002 | $7.7: | |
1003 | srl $3, 16, $5 | |
1004 | and $5, 0xff, $5 | |
1005 | stub $5, [$1+6] | |
1006 | $7.6: | |
1007 | srl $3, 8, $5 | |
1008 | and $5, 0xff, $5 | |
1009 | stub $5, [$1+5] | |
1010 | $7.5: | |
1011 | and $3, 0xff, $5 | |
1012 | stub $5, [$1+4] | |
1013 | $7.4: | |
1014 | srl $4, 24, $5 | |
1015 | stub $5, [$1+3] | |
1016 | $7.3: | |
1017 | srl $4, 16, $5 | |
1018 | and $5, 0xff, $5 | |
1019 | stub $5, [$1+2] | |
1020 | $7.2: | |
1021 | srl $4, 8, $5 | |
1022 | and $5, 0xff, $5 | |
1023 | stub $5, [$1+1] | |
1024 | $7.1: | |
1025 | and $4, 0xff, $5 | |
1026 | ||
1027 | ||
f22e1e4d | 1028 | ba $8 |
e0d769ca AP |
1029 | stub $5, [$1] |
1030 | ||
1031 | .align 4 | |
1032 | ||
1033 | $7.jmp.table: | |
1034 | ||
1035 | .word 0 | |
f22e1e4d AP |
1036 | .word $7.1-$7.0 |
1037 | .word $7.2-$7.0 | |
1038 | .word $7.3-$7.0 | |
1039 | .word $7.4-$7.0 | |
1040 | .word $7.5-$7.0 | |
1041 | .word $7.6-$7.0 | |
1042 | .word $7.7-$7.0 | |
e0d769ca AP |
1043 | }) |
1044 | ||
1045 | ||
1046 | define(testvalue,{1}) | |
1047 | ||
1048 | define(register_init, { | |
1049 | ||
1050 | ! For test purposes: | |
1051 | ||
1052 | sethi %hi(testvalue), local0 | |
1053 | or local0, %lo(testvalue), local0 | |
1054 | ||
1055 | ifelse($1,{},{}, {mov local0, $1}) | |
1056 | ifelse($2,{},{}, {mov local0, $2}) | |
1057 | ifelse($3,{},{}, {mov local0, $3}) | |
1058 | ifelse($4,{},{}, {mov local0, $4}) | |
1059 | ifelse($5,{},{}, {mov local0, $5}) | |
1060 | ifelse($6,{},{}, {mov local0, $6}) | |
1061 | ifelse($7,{},{}, {mov local0, $7}) | |
1062 | ifelse($8,{},{}, {mov local0, $8}) | |
1063 | ||
1064 | mov local0, local1 | |
1065 | mov local0, local2 | |
1066 | mov local0, local3 | |
1067 | mov local0, local4 | |
1068 | mov local0, local5 | |
1069 | mov local0, local7 | |
1070 | mov local0, local6 | |
1071 | mov local0, out0 | |
1072 | mov local0, out1 | |
1073 | mov local0, out2 | |
1074 | mov local0, out3 | |
1075 | mov local0, out4 | |
1076 | mov local0, out5 | |
1077 | mov local0, global1 | |
1078 | mov local0, global2 | |
1079 | mov local0, global3 | |
1080 | mov local0, global4 | |
1081 | mov local0, global5 | |
1082 | ||
1083 | }) | |
1084 | ||
e0d769ca AP |
1085 | .section ".text" |
1086 | ||
1087 | .align 32 | |
1088 | ||
1089 | .des_enc: | |
1090 | ||
1091 | ! key address in3 | |
1092 | ! loads key next encryption/decryption first round from [in4] | |
1093 | ||
1094 | rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl) | |
1095 | ||
1096 | ||
1097 | .align 32 | |
1098 | ||
1099 | .des_dec: | |
1100 | ||
1101 | ! implemented with out5 as first parameter to avoid | |
1102 | ! register exchange in ede modes | |
1103 | ||
1104 | ! key address in4 | |
1105 | ! loads key next encryption/decryption first round from [in3] | |
1106 | ||
1107 | rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl) | |
1108 | ||
1109 | ||
1110 | ||
f22e1e4d | 1111 | ! void DES_encrypt1(data, ks, enc) |
e0d769ca AP |
1112 | ! ******************************* |
1113 | ||
1114 | .align 32 | |
f22e1e4d AP |
1115 | .global DES_encrypt1 |
1116 | .type DES_encrypt1,#function | |
1117 | ||
1118 | DES_encrypt1: | |
e0d769ca | 1119 | |
f22e1e4d | 1120 | save %sp, FRAME, %sp |
e0d769ca | 1121 | |
4c78bc05 AP |
1122 | sethi %hi(.PIC.DES_SPtrans-1f),global1 |
1123 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | |
1124 | 1: call .+8 | |
1125 | add %o7,global1,global1 | |
1126 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | |
e0d769ca AP |
1127 | |
1128 | ld [in0], in5 ! left | |
e0d769ca AP |
1129 | cmp in2, 0 ! enc |
1130 | ||
f22e1e4d | 1131 | be .encrypt.dec |
f22e1e4d | 1132 | ld [in0+4], out5 ! right |
e0d769ca AP |
1133 | |
1134 | ! parameter 6 1/2 for include encryption/decryption | |
1135 | ! parameter 7 1 for move in1 to in3 | |
1136 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | |
1137 | ||
1138 | ip_macro(in5, out5, in5, out5, in3, 0, 1, 1) | |
1139 | ||
f22e1e4d | 1140 | rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used |
e0d769ca AP |
1141 | |
1142 | fp_macro(in5, out5, 1) ! 1 for store to [in0] | |
1143 | ||
f22e1e4d AP |
1144 | ret |
1145 | restore | |
e0d769ca AP |
1146 | |
1147 | .encrypt.dec: | |
1148 | ||
1149 | add in1, 120, in3 ! use last subkey for first round | |
1150 | ||
1151 | ! parameter 6 1/2 for include encryption/decryption | |
1152 | ! parameter 7 1 for move in1 to in3 | |
1153 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | |
1154 | ||
1155 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4 | |
1156 | ||
1157 | fp_macro(out5, in5, 1) ! 1 for store to [in0] | |
1158 | ||
f22e1e4d AP |
1159 | ret |
1160 | restore | |
e0d769ca | 1161 | |
f22e1e4d AP |
1162 | .DES_encrypt1.end: |
1163 | .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1 | |
e0d769ca AP |
1164 | |
1165 | ||
f22e1e4d | 1166 | ! void DES_encrypt2(data, ks, enc) |
e0d769ca AP |
1167 | !********************************* |
1168 | ||
1169 | ! encrypts/decrypts without initial/final permutation | |
1170 | ||
1171 | .align 32 | |
f22e1e4d AP |
1172 | .global DES_encrypt2 |
1173 | .type DES_encrypt2,#function | |
e0d769ca | 1174 | |
f22e1e4d | 1175 | DES_encrypt2: |
e0d769ca | 1176 | |
f22e1e4d AP |
1177 | save %sp, FRAME, %sp |
1178 | ||
4c78bc05 AP |
1179 | sethi %hi(.PIC.DES_SPtrans-1f),global1 |
1180 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | |
1181 | 1: call .+8 | |
1182 | add %o7,global1,global1 | |
1183 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | |
e0d769ca AP |
1184 | |
1185 | ! Set sbox address 1 to 6 and rotate halfs 3 left | |
1186 | ! Errors caught by destest? Yes. Still? *NO* | |
1187 | ||
f22e1e4d | 1188 | !sethi %hi(DES_SPtrans), global1 ! address sbox 1 |
e0d769ca | 1189 | |
f22e1e4d | 1190 | !or global1, %lo(DES_SPtrans), global1 ! sbox 1 |
e0d769ca AP |
1191 | |
1192 | add global1, 256, global2 ! sbox 2 | |
1193 | add global1, 512, global3 ! sbox 3 | |
1194 | ||
1195 | ld [in0], out5 ! right | |
1196 | add global1, 768, global4 ! sbox 4 | |
1197 | add global1, 1024, global5 ! sbox 5 | |
1198 | ||
1199 | ld [in0+4], in5 ! left | |
1200 | add global1, 1280, local6 ! sbox 6 | |
1201 | add global1, 1792, out3 ! sbox 8 | |
1202 | ||
1203 | ! rotate | |
1204 | ||
1205 | sll in5, 3, local5 | |
1206 | mov in1, in3 ! key address to in3 | |
1207 | ||
1208 | sll out5, 3, local7 | |
1209 | srl in5, 29, in5 | |
1210 | ||
1211 | srl out5, 29, out5 | |
1212 | add in5, local5, in5 | |
1213 | ||
1214 | add out5, local7, out5 | |
1215 | cmp in2, 0 | |
1216 | ||
1217 | ! we use our own stackframe | |
1218 | ||
f22e1e4d | 1219 | be .encrypt2.dec |
f22e1e4d | 1220 | STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ] |
e0d769ca AP |
1221 | |
1222 | ld [in3], out0 ! key 7531 first round | |
1223 | mov LOOPS, out4 ! loop counter | |
1224 | ||
1225 | ld [in3+4], out1 ! key 8642 first round | |
1226 | sethi %hi(0x0000FC00), local5 | |
1227 | ||
1228 | call .des_enc | |
1229 | mov in3, in4 | |
1230 | ||
1231 | ! rotate | |
1232 | sll in5, 29, in0 | |
1233 | srl in5, 3, in5 | |
1234 | sll out5, 29, in1 | |
1235 | add in5, in0, in5 | |
1236 | srl out5, 3, out5 | |
f22e1e4d | 1237 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 |
e0d769ca AP |
1238 | add out5, in1, out5 |
1239 | st in5, [in0] | |
1240 | st out5, [in0+4] | |
1241 | ||
f22e1e4d AP |
1242 | ret |
1243 | restore | |
e0d769ca AP |
1244 | |
1245 | ||
1246 | .encrypt2.dec: | |
1247 | ||
1248 | add in3, 120, in4 | |
1249 | ||
1250 | ld [in4], out0 ! key 7531 first round | |
1251 | mov LOOPS, out4 ! loop counter | |
1252 | ||
1253 | ld [in4+4], out1 ! key 8642 first round | |
1254 | sethi %hi(0x0000FC00), local5 | |
1255 | ||
1256 | mov in5, local1 ! left expected in out5 | |
1257 | mov out5, in5 | |
1258 | ||
1259 | call .des_dec | |
1260 | mov local1, out5 | |
1261 | ||
1262 | .encrypt2.finish: | |
1263 | ||
1264 | ! rotate | |
1265 | sll in5, 29, in0 | |
1266 | srl in5, 3, in5 | |
1267 | sll out5, 29, in1 | |
1268 | add in5, in0, in5 | |
1269 | srl out5, 3, out5 | |
f22e1e4d | 1270 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0 |
e0d769ca AP |
1271 | add out5, in1, out5 |
1272 | st out5, [in0] | |
1273 | st in5, [in0+4] | |
1274 | ||
f22e1e4d AP |
1275 | ret |
1276 | restore | |
e0d769ca | 1277 | |
f22e1e4d AP |
1278 | .DES_encrypt2.end: |
1279 | .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2 | |
e0d769ca AP |
1280 | |
1281 | ||
f22e1e4d | 1282 | ! void DES_encrypt3(data, ks1, ks2, ks3) |
e0d769ca AP |
1283 | ! ************************************** |
1284 | ||
1285 | .align 32 | |
f22e1e4d AP |
1286 | .global DES_encrypt3 |
1287 | .type DES_encrypt3,#function | |
e0d769ca | 1288 | |
f22e1e4d | 1289 | DES_encrypt3: |
e0d769ca | 1290 | |
f22e1e4d | 1291 | save %sp, FRAME, %sp |
e0d769ca | 1292 | |
4c78bc05 AP |
1293 | sethi %hi(.PIC.DES_SPtrans-1f),global1 |
1294 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | |
1295 | 1: call .+8 | |
1296 | add %o7,global1,global1 | |
1297 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | |
f22e1e4d | 1298 | |
e0d769ca AP |
1299 | ld [in0], in5 ! left |
1300 | add in2, 120, in4 ! ks2 | |
e0d769ca AP |
1301 | |
1302 | ld [in0+4], out5 ! right | |
1303 | mov in3, in2 ! save ks3 | |
e0d769ca AP |
1304 | |
1305 | ! parameter 6 1/2 for include encryption/decryption | |
1306 | ! parameter 7 1 for mov in1 to in3 | |
1307 | ! parameter 8 1 for mov in3 to in4 | |
1308 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | |
1309 | ||
1310 | ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0) | |
1311 | ||
1312 | call .des_dec | |
1313 | mov in2, in3 ! preload ks3 | |
1314 | ||
1315 | call .des_enc | |
1316 | nop | |
1317 | ||
1318 | fp_macro(in5, out5, 1) | |
1319 | ||
f22e1e4d AP |
1320 | ret |
1321 | restore | |
e0d769ca | 1322 | |
f22e1e4d AP |
1323 | .DES_encrypt3.end: |
1324 | .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3 | |
e0d769ca AP |
1325 | |
1326 | ||
f22e1e4d | 1327 | ! void DES_decrypt3(data, ks1, ks2, ks3) |
e0d769ca AP |
1328 | ! ************************************** |
1329 | ||
1330 | .align 32 | |
f22e1e4d AP |
1331 | .global DES_decrypt3 |
1332 | .type DES_decrypt3,#function | |
e0d769ca | 1333 | |
f22e1e4d | 1334 | DES_decrypt3: |
e0d769ca | 1335 | |
f22e1e4d | 1336 | save %sp, FRAME, %sp |
e0d769ca | 1337 | |
4c78bc05 AP |
1338 | sethi %hi(.PIC.DES_SPtrans-1f),global1 |
1339 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | |
1340 | 1: call .+8 | |
1341 | add %o7,global1,global1 | |
1342 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | |
f22e1e4d | 1343 | |
e0d769ca AP |
1344 | ld [in0], in5 ! left |
1345 | add in3, 120, in4 ! ks3 | |
e0d769ca AP |
1346 | |
1347 | ld [in0+4], out5 ! right | |
1348 | mov in2, in3 ! ks2 | |
e0d769ca AP |
1349 | |
1350 | ! parameter 6 1/2 for include encryption/decryption | |
1351 | ! parameter 7 1 for mov in1 to in3 | |
1352 | ! parameter 8 1 for mov in3 to in4 | |
1353 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | |
1354 | ||
1355 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0) | |
1356 | ||
1357 | call .des_enc | |
1358 | add in1, 120, in4 ! preload ks1 | |
1359 | ||
1360 | call .des_dec | |
1361 | nop | |
1362 | ||
1363 | fp_macro(out5, in5, 1) | |
1364 | ||
f22e1e4d AP |
1365 | ret |
1366 | restore | |
1367 | ||
1368 | .DES_decrypt3.end: | |
1369 | .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3 | |
1370 | ||
f22e1e4d | 1371 | ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc) |
e0d769ca AP |
1372 | ! ***************************************************************** |
1373 | ||
1374 | ||
1375 | .align 32 | |
f22e1e4d AP |
1376 | .global DES_ncbc_encrypt |
1377 | .type DES_ncbc_encrypt,#function | |
e0d769ca | 1378 | |
f22e1e4d | 1379 | DES_ncbc_encrypt: |
e0d769ca | 1380 | |
f22e1e4d | 1381 | save %sp, FRAME, %sp |
e0d769ca | 1382 | |
f22e1e4d AP |
1383 | define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] }) |
1384 | define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] }) | |
1385 | define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] }) | |
1386 | ||
4c78bc05 AP |
1387 | sethi %hi(.PIC.DES_SPtrans-1f),global1 |
1388 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | |
1389 | 1: call .+8 | |
1390 | add %o7,global1,global1 | |
1391 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | |
e0d769ca | 1392 | |
df443918 | 1393 | cmp in5, 0 ! enc |
e0d769ca | 1394 | |
f22e1e4d | 1395 | be .ncbc.dec |
f22e1e4d | 1396 | STPTR in4, IVEC |
e0d769ca AP |
1397 | |
1398 | ! addr left right temp label | |
1399 | load_little_endian(in4, in5, out5, local3, .LLE1) ! iv | |
1400 | ||
1401 | addcc in2, -8, in2 ! bytes missing when first block done | |
1402 | ||
f22e1e4d | 1403 | bl .ncbc.enc.seven.or.less |
f22e1e4d | 1404 | mov in3, in4 ! schedule |
e0d769ca AP |
1405 | |
1406 | .ncbc.enc.next.block: | |
1407 | ||
1408 | load_little_endian(in0, out4, global4, local3, .LLE2) ! block | |
1409 | ||
1410 | .ncbc.enc.next.block_1: | |
1411 | ||
1412 | xor in5, out4, in5 ! iv xor | |
1413 | xor out5, global4, out5 ! iv xor | |
1414 | ||
1415 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3 | |
1416 | ip_macro(in5, out5, in5, out5, in3, 0, 0, 2) | |
1417 | ||
1418 | .ncbc.enc.next.block_2: | |
1419 | ||
1420 | !// call .des_enc ! compares in2 to 8 | |
1421 | ! rounds inlined for alignment purposes | |
1422 | ||
1423 | add global1, 768, global4 ! address sbox 4 since register used below | |
1424 | ||
1425 | rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3 | |
1426 | ||
f22e1e4d | 1427 | bl .ncbc.enc.next.block_fp |
e0d769ca AP |
1428 | add in0, 8, in0 ! input address |
1429 | ||
1430 | ! If 8 or more bytes are to be encrypted after this block, | |
1431 | ! we combine final permutation for this block with initial | |
1432 | ! permutation for next block. Load next block: | |
1433 | ||
1434 | load_little_endian(in0, global3, global4, local5, .LLE12) | |
1435 | ||
1436 | ! parameter 1 original left | |
1437 | ! parameter 2 original right | |
1438 | ! parameter 3 left ip | |
1439 | ! parameter 4 right ip | |
1440 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | |
1441 | ! 2: mov in4 to in3 | |
1442 | ! | |
1443 | ! also adds -8 to length in2 and loads loop counter to out4 | |
1444 | ||
1445 | fp_ip_macro(out0, out1, global3, global4, 2) | |
1446 | ||
1447 | store_little_endian(in1, out0, out1, local3, .SLE10) ! block | |
1448 | ||
1449 | ld [in3], out0 ! key 7531 first round next block | |
1450 | mov in5, local1 | |
1451 | xor global3, out5, in5 ! iv xor next block | |
1452 | ||
1453 | ld [in3+4], out1 ! key 8642 | |
1454 | add global1, 512, global3 ! address sbox 3 since register used | |
1455 | xor global4, local1, out5 ! iv xor next block | |
1456 | ||
f22e1e4d | 1457 | ba .ncbc.enc.next.block_2 |
478b50cf | 1458 | add in1, 8, in1 ! output address |
e0d769ca AP |
1459 | |
1460 | .ncbc.enc.next.block_fp: | |
1461 | ||
1462 | fp_macro(in5, out5) | |
1463 | ||
1464 | store_little_endian(in1, in5, out5, local3, .SLE1) ! block | |
1465 | ||
1466 | addcc in2, -8, in2 ! bytes missing when next block done | |
1467 | ||
f22e1e4d | 1468 | bpos .ncbc.enc.next.block |
e0d769ca AP |
1469 | add in1, 8, in1 |
1470 | ||
1471 | .ncbc.enc.seven.or.less: | |
1472 | ||
1473 | cmp in2, -8 | |
1474 | ||
f22e1e4d | 1475 | ble .ncbc.enc.finish |
e0d769ca AP |
1476 | nop |
1477 | ||
1478 | add in2, 8, local1 ! bytes to load | |
1479 | ||
1480 | ! addr, length, dest left, dest right, temp, temp2, label, ret label | |
1481 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1) | |
1482 | ||
1483 | ! Loads 1 to 7 bytes little endian to global4, out4 | |
1484 | ||
1485 | ||
1486 | .ncbc.enc.finish: | |
1487 | ||
f22e1e4d | 1488 | LDPTR IVEC, local4 |
e0d769ca AP |
1489 | store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec |
1490 | ||
f22e1e4d AP |
1491 | ret |
1492 | restore | |
e0d769ca AP |
1493 | |
1494 | ||
1495 | .ncbc.dec: | |
1496 | ||
f22e1e4d | 1497 | STPTR in0, INPUT |
e0d769ca AP |
1498 | cmp in2, 0 ! length |
1499 | add in3, 120, in3 | |
1500 | ||
f22e1e4d | 1501 | LDPTR IVEC, local7 ! ivec |
f22e1e4d | 1502 | ble .ncbc.dec.finish |
e0d769ca AP |
1503 | mov in3, in4 ! schedule |
1504 | ||
f22e1e4d | 1505 | STPTR in1, OUTPUT |
e0d769ca AP |
1506 | mov in0, local5 ! input |
1507 | ||
1508 | load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec | |
1509 | ||
1510 | .ncbc.dec.next.block: | |
1511 | ||
1512 | load_little_endian(local5, in5, out5, local3, .LLE4) ! block | |
1513 | ||
1514 | ! parameter 6 1/2 for include encryption/decryption | |
1515 | ! parameter 7 1 for mov in1 to in3 | |
1516 | ! parameter 8 1 for mov in3 to in4 | |
1517 | ||
46f4e1be | 1518 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption ks in4 |
e0d769ca AP |
1519 | |
1520 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7 | |
1521 | ||
1522 | ! in2 is bytes left to be stored | |
1523 | ! in2 is compared to 8 in the rounds | |
1524 | ||
1525 | xor out5, in0, out4 ! iv xor | |
f22e1e4d | 1526 | bl .ncbc.dec.seven.or.less |
e0d769ca AP |
1527 | xor in5, in1, global4 ! iv xor |
1528 | ||
1529 | ! Load ivec next block now, since input and output address might be the same. | |
1530 | ||
1531 | load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv | |
1532 | ||
1533 | store_little_endian(local7, out4, global4, local3, .SLE3) | |
1534 | ||
f22e1e4d | 1535 | STPTR local5, INPUT |
e0d769ca AP |
1536 | add local7, 8, local7 |
1537 | addcc in2, -8, in2 | |
1538 | ||
f22e1e4d | 1539 | bg .ncbc.dec.next.block |
f22e1e4d | 1540 | STPTR local7, OUTPUT |
e0d769ca AP |
1541 | |
1542 | ||
1543 | .ncbc.dec.store.iv: | |
1544 | ||
f22e1e4d | 1545 | LDPTR IVEC, local4 ! ivec |
e0d769ca AP |
1546 | store_little_endian(local4, in0, in1, local5, .SLE4) |
1547 | ||
1548 | .ncbc.dec.finish: | |
1549 | ||
f22e1e4d AP |
1550 | ret |
1551 | restore | |
e0d769ca AP |
1552 | |
1553 | .ncbc.dec.seven.or.less: | |
1554 | ||
1555 | load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec | |
1556 | ||
1557 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv) | |
1558 | ||
1559 | ||
f22e1e4d AP |
1560 | .DES_ncbc_encrypt.end: |
1561 | .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt | |
e0d769ca AP |
1562 | |
1563 | ||
46f4e1be | 1564 | ! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc) |
e0d769ca AP |
1565 | ! ************************************************************************** |
1566 | ||
1567 | ||
1568 | .align 32 | |
f22e1e4d AP |
1569 | .global DES_ede3_cbc_encrypt |
1570 | .type DES_ede3_cbc_encrypt,#function | |
e0d769ca | 1571 | |
f22e1e4d | 1572 | DES_ede3_cbc_encrypt: |
e0d769ca | 1573 | |
f22e1e4d | 1574 | save %sp, FRAME, %sp |
e0d769ca | 1575 | |
f22e1e4d AP |
1576 | define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] }) |
1577 | define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] }) | |
1578 | define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] }) | |
e0d769ca | 1579 | |
4c78bc05 AP |
1580 | sethi %hi(.PIC.DES_SPtrans-1f),global1 |
1581 | or global1,%lo(.PIC.DES_SPtrans-1f),global1 | |
1582 | 1: call .+8 | |
1583 | add %o7,global1,global1 | |
1584 | sub global1,.PIC.DES_SPtrans-.des_and,out2 | |
e0d769ca | 1585 | |
f22e1e4d AP |
1586 | LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc |
1587 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec | |
e0d769ca AP |
1588 | cmp local3, 0 ! enc |
1589 | ||
719122c7 | 1590 | be .ede3.dec |
f22e1e4d | 1591 | STPTR in4, KS2 |
e0d769ca | 1592 | |
f22e1e4d | 1593 | STPTR in5, KS3 |
e0d769ca AP |
1594 | |
1595 | load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec | |
1596 | ||
1597 | addcc in2, -8, in2 ! bytes missing after next block | |
1598 | ||
f22e1e4d | 1599 | bl .ede3.enc.seven.or.less |
f22e1e4d | 1600 | STPTR in3, KS1 |
e0d769ca AP |
1601 | |
1602 | .ede3.enc.next.block: | |
1603 | ||
1604 | load_little_endian(in0, out4, global4, local3, .LLE7) | |
1605 | ||
1606 | .ede3.enc.next.block_1: | |
1607 | ||
f22e1e4d | 1608 | LDPTR KS2, in4 |
e0d769ca AP |
1609 | xor in5, out4, in5 ! iv xor |
1610 | xor out5, global4, out5 ! iv xor | |
1611 | ||
f22e1e4d | 1612 | LDPTR KS1, in3 |
e0d769ca AP |
1613 | add in4, 120, in4 ! for decryption we use last subkey first |
1614 | nop | |
1615 | ||
1616 | ip_macro(in5, out5, in5, out5, in3) | |
1617 | ||
1618 | .ede3.enc.next.block_2: | |
1619 | ||
1620 | call .des_enc ! ks1 in3 | |
1621 | nop | |
1622 | ||
1623 | call .des_dec ! ks2 in4 | |
f22e1e4d | 1624 | LDPTR KS3, in3 |
e0d769ca AP |
1625 | |
1626 | call .des_enc ! ks3 in3 compares in2 to 8 | |
1627 | nop | |
1628 | ||
f22e1e4d | 1629 | bl .ede3.enc.next.block_fp |
e0d769ca AP |
1630 | add in0, 8, in0 |
1631 | ||
1632 | ! If 8 or more bytes are to be encrypted after this block, | |
1633 | ! we combine final permutation for this block with initial | |
1634 | ! permutation for next block. Load next block: | |
1635 | ||
1636 | load_little_endian(in0, global3, global4, local5, .LLE11) | |
1637 | ||
1638 | ! parameter 1 original left | |
1639 | ! parameter 2 original right | |
1640 | ! parameter 3 left ip | |
1641 | ! parameter 4 right ip | |
1642 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4 | |
1643 | ! 2: mov in4 to in3 | |
1644 | ! | |
1645 | ! also adds -8 to length in2 and loads loop counter to out4 | |
1646 | ||
1647 | fp_ip_macro(out0, out1, global3, global4, 1) | |
1648 | ||
1649 | store_little_endian(in1, out0, out1, local3, .SLE9) ! block | |
1650 | ||
1651 | mov in5, local1 | |
1652 | xor global3, out5, in5 ! iv xor next block | |
1653 | ||
1654 | ld [in3], out0 ! key 7531 | |
1655 | add global1, 512, global3 ! address sbox 3 | |
1656 | xor global4, local1, out5 ! iv xor next block | |
1657 | ||
1658 | ld [in3+4], out1 ! key 8642 | |
1659 | add global1, 768, global4 ! address sbox 4 | |
f22e1e4d | 1660 | ba .ede3.enc.next.block_2 |
e0d769ca AP |
1661 | add in1, 8, in1 |
1662 | ||
1663 | .ede3.enc.next.block_fp: | |
1664 | ||
1665 | fp_macro(in5, out5) | |
1666 | ||
1667 | store_little_endian(in1, in5, out5, local3, .SLE5) ! block | |
1668 | ||
1669 | addcc in2, -8, in2 ! bytes missing when next block done | |
1670 | ||
f22e1e4d | 1671 | bpos .ede3.enc.next.block |
e0d769ca AP |
1672 | add in1, 8, in1 |
1673 | ||
1674 | .ede3.enc.seven.or.less: | |
1675 | ||
1676 | cmp in2, -8 | |
1677 | ||
f22e1e4d | 1678 | ble .ede3.enc.finish |
e0d769ca AP |
1679 | nop |
1680 | ||
1681 | add in2, 8, local1 ! bytes to load | |
1682 | ||
1683 | ! addr, length, dest left, dest right, temp, temp2, label, ret label | |
1684 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1) | |
1685 | ||
1686 | .ede3.enc.finish: | |
1687 | ||
f22e1e4d | 1688 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec |
e0d769ca AP |
1689 | store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec |
1690 | ||
f22e1e4d AP |
1691 | ret |
1692 | restore | |
e0d769ca AP |
1693 | |
1694 | .ede3.dec: | |
1695 | ||
f22e1e4d | 1696 | STPTR in0, INPUT |
e0d769ca AP |
1697 | add in5, 120, in5 |
1698 | ||
f22e1e4d | 1699 | STPTR in1, OUTPUT |
e0d769ca AP |
1700 | mov in0, local5 |
1701 | add in3, 120, in3 | |
1702 | ||
f22e1e4d | 1703 | STPTR in3, KS1 |
e0d769ca AP |
1704 | cmp in2, 0 |
1705 | ||
f22e1e4d | 1706 | ble .ede3.dec.finish |
f22e1e4d | 1707 | STPTR in5, KS3 |
e0d769ca | 1708 | |
f22e1e4d | 1709 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv |
e0d769ca AP |
1710 | load_little_endian(local7, in0, in1, local3, .LLE8) |
1711 | ||
1712 | .ede3.dec.next.block: | |
1713 | ||
1714 | load_little_endian(local5, in5, out5, local3, .LLE9) | |
1715 | ||
1716 | ! parameter 6 1/2 for include encryption/decryption | |
1717 | ! parameter 7 1 for mov in1 to in3 | |
1718 | ! parameter 8 1 for mov in3 to in4 | |
1719 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3 | |
1720 | ||
1721 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4 | |
1722 | ||
1723 | call .des_enc ! ks2 in3 | |
f22e1e4d | 1724 | LDPTR KS1, in4 |
e0d769ca AP |
1725 | |
1726 | call .des_dec ! ks1 in4 | |
1727 | nop | |
1728 | ||
1729 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7 | |
1730 | ||
1731 | ! in2 is bytes left to be stored | |
1732 | ! in2 is compared to 8 in the rounds | |
1733 | ||
1734 | xor out5, in0, out4 | |
f22e1e4d | 1735 | bl .ede3.dec.seven.or.less |
e0d769ca AP |
1736 | xor in5, in1, global4 |
1737 | ||
1738 | load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block | |
1739 | ||
1740 | store_little_endian(local7, out4, global4, local3, .SLE7) ! block | |
1741 | ||
f22e1e4d | 1742 | STPTR local5, INPUT |
e0d769ca AP |
1743 | addcc in2, -8, in2 |
1744 | add local7, 8, local7 | |
1745 | ||
f22e1e4d | 1746 | bg .ede3.dec.next.block |
f22e1e4d | 1747 | STPTR local7, OUTPUT |
e0d769ca AP |
1748 | |
1749 | .ede3.dec.store.iv: | |
1750 | ||
f22e1e4d | 1751 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec |
e0d769ca AP |
1752 | store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec |
1753 | ||
1754 | .ede3.dec.finish: | |
1755 | ||
f22e1e4d AP |
1756 | ret |
1757 | restore | |
e0d769ca AP |
1758 | |
1759 | .ede3.dec.seven.or.less: | |
1760 | ||
1761 | load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv | |
1762 | ||
1763 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv) | |
1764 | ||
1765 | ||
f22e1e4d AP |
1766 | .DES_ede3_cbc_encrypt.end: |
1767 | .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt | |
70532b7d AP |
1768 | |
1769 | .align 256 | |
1770 | .type .des_and,#object | |
1771 | .size .des_and,284 | |
1772 | ||
1773 | .des_and: | |
1774 | ||
1775 | ! This table is used for AND 0xFC when it is known that register | |
1776 | ! bits 8-31 are zero. Makes it possible to do three arithmetic | |
1777 | ! operations in one cycle. | |
1778 | ||
1779 | .byte 0, 0, 0, 0, 4, 4, 4, 4 | |
1780 | .byte 8, 8, 8, 8, 12, 12, 12, 12 | |
1781 | .byte 16, 16, 16, 16, 20, 20, 20, 20 | |
1782 | .byte 24, 24, 24, 24, 28, 28, 28, 28 | |
1783 | .byte 32, 32, 32, 32, 36, 36, 36, 36 | |
1784 | .byte 40, 40, 40, 40, 44, 44, 44, 44 | |
1785 | .byte 48, 48, 48, 48, 52, 52, 52, 52 | |
1786 | .byte 56, 56, 56, 56, 60, 60, 60, 60 | |
1787 | .byte 64, 64, 64, 64, 68, 68, 68, 68 | |
1788 | .byte 72, 72, 72, 72, 76, 76, 76, 76 | |
1789 | .byte 80, 80, 80, 80, 84, 84, 84, 84 | |
1790 | .byte 88, 88, 88, 88, 92, 92, 92, 92 | |
1791 | .byte 96, 96, 96, 96, 100, 100, 100, 100 | |
1792 | .byte 104, 104, 104, 104, 108, 108, 108, 108 | |
1793 | .byte 112, 112, 112, 112, 116, 116, 116, 116 | |
1794 | .byte 120, 120, 120, 120, 124, 124, 124, 124 | |
1795 | .byte 128, 128, 128, 128, 132, 132, 132, 132 | |
1796 | .byte 136, 136, 136, 136, 140, 140, 140, 140 | |
1797 | .byte 144, 144, 144, 144, 148, 148, 148, 148 | |
1798 | .byte 152, 152, 152, 152, 156, 156, 156, 156 | |
1799 | .byte 160, 160, 160, 160, 164, 164, 164, 164 | |
1800 | .byte 168, 168, 168, 168, 172, 172, 172, 172 | |
1801 | .byte 176, 176, 176, 176, 180, 180, 180, 180 | |
1802 | .byte 184, 184, 184, 184, 188, 188, 188, 188 | |
1803 | .byte 192, 192, 192, 192, 196, 196, 196, 196 | |
1804 | .byte 200, 200, 200, 200, 204, 204, 204, 204 | |
1805 | .byte 208, 208, 208, 208, 212, 212, 212, 212 | |
1806 | .byte 216, 216, 216, 216, 220, 220, 220, 220 | |
1807 | .byte 224, 224, 224, 224, 228, 228, 228, 228 | |
1808 | .byte 232, 232, 232, 232, 236, 236, 236, 236 | |
1809 | .byte 240, 240, 240, 240, 244, 244, 244, 244 | |
1810 | .byte 248, 248, 248, 248, 252, 252, 252, 252 | |
1811 | ||
46f4e1be | 1812 | ! 5 numbers for initial/final permutation |
70532b7d AP |
1813 | |
1814 | .word 0x0f0f0f0f ! offset 256 | |
1815 | .word 0x0000ffff ! 260 | |
1816 | .word 0x33333333 ! 264 | |
1817 | .word 0x00ff00ff ! 268 | |
1818 | .word 0x55555555 ! 272 | |
1819 | ||
1820 | .word 0 ! 276 | |
1821 | .word LOOPS ! 280 | |
1822 | .word 0x0000FC00 ! 284 | |
1823 | ||
1824 | .global DES_SPtrans | |
1825 | .type DES_SPtrans,#object | |
1826 | .size DES_SPtrans,2048 | |
1827 | .align 64 | |
1828 | DES_SPtrans: | |
1829 | .PIC.DES_SPtrans: | |
1830 | ! nibble 0 | |
1831 | .word 0x02080800, 0x00080000, 0x02000002, 0x02080802 | |
1832 | .word 0x02000000, 0x00080802, 0x00080002, 0x02000002 | |
1833 | .word 0x00080802, 0x02080800, 0x02080000, 0x00000802 | |
1834 | .word 0x02000802, 0x02000000, 0x00000000, 0x00080002 | |
1835 | .word 0x00080000, 0x00000002, 0x02000800, 0x00080800 | |
1836 | .word 0x02080802, 0x02080000, 0x00000802, 0x02000800 | |
1837 | .word 0x00000002, 0x00000800, 0x00080800, 0x02080002 | |
1838 | .word 0x00000800, 0x02000802, 0x02080002, 0x00000000 | |
1839 | .word 0x00000000, 0x02080802, 0x02000800, 0x00080002 | |
1840 | .word 0x02080800, 0x00080000, 0x00000802, 0x02000800 | |
1841 | .word 0x02080002, 0x00000800, 0x00080800, 0x02000002 | |
1842 | .word 0x00080802, 0x00000002, 0x02000002, 0x02080000 | |
1843 | .word 0x02080802, 0x00080800, 0x02080000, 0x02000802 | |
1844 | .word 0x02000000, 0x00000802, 0x00080002, 0x00000000 | |
1845 | .word 0x00080000, 0x02000000, 0x02000802, 0x02080800 | |
1846 | .word 0x00000002, 0x02080002, 0x00000800, 0x00080802 | |
1847 | ! nibble 1 | |
1848 | .word 0x40108010, 0x00000000, 0x00108000, 0x40100000 | |
1849 | .word 0x40000010, 0x00008010, 0x40008000, 0x00108000 | |
1850 | .word 0x00008000, 0x40100010, 0x00000010, 0x40008000 | |
1851 | .word 0x00100010, 0x40108000, 0x40100000, 0x00000010 | |
1852 | .word 0x00100000, 0x40008010, 0x40100010, 0x00008000 | |
1853 | .word 0x00108010, 0x40000000, 0x00000000, 0x00100010 | |
1854 | .word 0x40008010, 0x00108010, 0x40108000, 0x40000010 | |
1855 | .word 0x40000000, 0x00100000, 0x00008010, 0x40108010 | |
1856 | .word 0x00100010, 0x40108000, 0x40008000, 0x00108010 | |
1857 | .word 0x40108010, 0x00100010, 0x40000010, 0x00000000 | |
1858 | .word 0x40000000, 0x00008010, 0x00100000, 0x40100010 | |
1859 | .word 0x00008000, 0x40000000, 0x00108010, 0x40008010 | |
1860 | .word 0x40108000, 0x00008000, 0x00000000, 0x40000010 | |
1861 | .word 0x00000010, 0x40108010, 0x00108000, 0x40100000 | |
1862 | .word 0x40100010, 0x00100000, 0x00008010, 0x40008000 | |
1863 | .word 0x40008010, 0x00000010, 0x40100000, 0x00108000 | |
1864 | ! nibble 2 | |
1865 | .word 0x04000001, 0x04040100, 0x00000100, 0x04000101 | |
1866 | .word 0x00040001, 0x04000000, 0x04000101, 0x00040100 | |
1867 | .word 0x04000100, 0x00040000, 0x04040000, 0x00000001 | |
1868 | .word 0x04040101, 0x00000101, 0x00000001, 0x04040001 | |
1869 | .word 0x00000000, 0x00040001, 0x04040100, 0x00000100 | |
1870 | .word 0x00000101, 0x04040101, 0x00040000, 0x04000001 | |
1871 | .word 0x04040001, 0x04000100, 0x00040101, 0x04040000 | |
1872 | .word 0x00040100, 0x00000000, 0x04000000, 0x00040101 | |
1873 | .word 0x04040100, 0x00000100, 0x00000001, 0x00040000 | |
1874 | .word 0x00000101, 0x00040001, 0x04040000, 0x04000101 | |
1875 | .word 0x00000000, 0x04040100, 0x00040100, 0x04040001 | |
1876 | .word 0x00040001, 0x04000000, 0x04040101, 0x00000001 | |
1877 | .word 0x00040101, 0x04000001, 0x04000000, 0x04040101 | |
1878 | .word 0x00040000, 0x04000100, 0x04000101, 0x00040100 | |
1879 | .word 0x04000100, 0x00000000, 0x04040001, 0x00000101 | |
1880 | .word 0x04000001, 0x00040101, 0x00000100, 0x04040000 | |
1881 | ! nibble 3 | |
1882 | .word 0x00401008, 0x10001000, 0x00000008, 0x10401008 | |
1883 | .word 0x00000000, 0x10400000, 0x10001008, 0x00400008 | |
1884 | .word 0x10401000, 0x10000008, 0x10000000, 0x00001008 | |
1885 | .word 0x10000008, 0x00401008, 0x00400000, 0x10000000 | |
1886 | .word 0x10400008, 0x00401000, 0x00001000, 0x00000008 | |
1887 | .word 0x00401000, 0x10001008, 0x10400000, 0x00001000 | |
1888 | .word 0x00001008, 0x00000000, 0x00400008, 0x10401000 | |
1889 | .word 0x10001000, 0x10400008, 0x10401008, 0x00400000 | |
1890 | .word 0x10400008, 0x00001008, 0x00400000, 0x10000008 | |
1891 | .word 0x00401000, 0x10001000, 0x00000008, 0x10400000 | |
1892 | .word 0x10001008, 0x00000000, 0x00001000, 0x00400008 | |
1893 | .word 0x00000000, 0x10400008, 0x10401000, 0x00001000 | |
1894 | .word 0x10000000, 0x10401008, 0x00401008, 0x00400000 | |
1895 | .word 0x10401008, 0x00000008, 0x10001000, 0x00401008 | |
1896 | .word 0x00400008, 0x00401000, 0x10400000, 0x10001008 | |
1897 | .word 0x00001008, 0x10000000, 0x10000008, 0x10401000 | |
1898 | ! nibble 4 | |
1899 | .word 0x08000000, 0x00010000, 0x00000400, 0x08010420 | |
1900 | .word 0x08010020, 0x08000400, 0x00010420, 0x08010000 | |
1901 | .word 0x00010000, 0x00000020, 0x08000020, 0x00010400 | |
1902 | .word 0x08000420, 0x08010020, 0x08010400, 0x00000000 | |
1903 | .word 0x00010400, 0x08000000, 0x00010020, 0x00000420 | |
1904 | .word 0x08000400, 0x00010420, 0x00000000, 0x08000020 | |
1905 | .word 0x00000020, 0x08000420, 0x08010420, 0x00010020 | |
1906 | .word 0x08010000, 0x00000400, 0x00000420, 0x08010400 | |
1907 | .word 0x08010400, 0x08000420, 0x00010020, 0x08010000 | |
1908 | .word 0x00010000, 0x00000020, 0x08000020, 0x08000400 | |
1909 | .word 0x08000000, 0x00010400, 0x08010420, 0x00000000 | |
1910 | .word 0x00010420, 0x08000000, 0x00000400, 0x00010020 | |
1911 | .word 0x08000420, 0x00000400, 0x00000000, 0x08010420 | |
1912 | .word 0x08010020, 0x08010400, 0x00000420, 0x00010000 | |
1913 | .word 0x00010400, 0x08010020, 0x08000400, 0x00000420 | |
1914 | .word 0x00000020, 0x00010420, 0x08010000, 0x08000020 | |
1915 | ! nibble 5 | |
1916 | .word 0x80000040, 0x00200040, 0x00000000, 0x80202000 | |
1917 | .word 0x00200040, 0x00002000, 0x80002040, 0x00200000 | |
1918 | .word 0x00002040, 0x80202040, 0x00202000, 0x80000000 | |
1919 | .word 0x80002000, 0x80000040, 0x80200000, 0x00202040 | |
1920 | .word 0x00200000, 0x80002040, 0x80200040, 0x00000000 | |
1921 | .word 0x00002000, 0x00000040, 0x80202000, 0x80200040 | |
1922 | .word 0x80202040, 0x80200000, 0x80000000, 0x00002040 | |
1923 | .word 0x00000040, 0x00202000, 0x00202040, 0x80002000 | |
1924 | .word 0x00002040, 0x80000000, 0x80002000, 0x00202040 | |
1925 | .word 0x80202000, 0x00200040, 0x00000000, 0x80002000 | |
1926 | .word 0x80000000, 0x00002000, 0x80200040, 0x00200000 | |
1927 | .word 0x00200040, 0x80202040, 0x00202000, 0x00000040 | |
1928 | .word 0x80202040, 0x00202000, 0x00200000, 0x80002040 | |
1929 | .word 0x80000040, 0x80200000, 0x00202040, 0x00000000 | |
1930 | .word 0x00002000, 0x80000040, 0x80002040, 0x80202000 | |
1931 | .word 0x80200000, 0x00002040, 0x00000040, 0x80200040 | |
1932 | ! nibble 6 | |
1933 | .word 0x00004000, 0x00000200, 0x01000200, 0x01000004 | |
1934 | .word 0x01004204, 0x00004004, 0x00004200, 0x00000000 | |
1935 | .word 0x01000000, 0x01000204, 0x00000204, 0x01004000 | |
1936 | .word 0x00000004, 0x01004200, 0x01004000, 0x00000204 | |
1937 | .word 0x01000204, 0x00004000, 0x00004004, 0x01004204 | |
1938 | .word 0x00000000, 0x01000200, 0x01000004, 0x00004200 | |
1939 | .word 0x01004004, 0x00004204, 0x01004200, 0x00000004 | |
1940 | .word 0x00004204, 0x01004004, 0x00000200, 0x01000000 | |
1941 | .word 0x00004204, 0x01004000, 0x01004004, 0x00000204 | |
1942 | .word 0x00004000, 0x00000200, 0x01000000, 0x01004004 | |
1943 | .word 0x01000204, 0x00004204, 0x00004200, 0x00000000 | |
1944 | .word 0x00000200, 0x01000004, 0x00000004, 0x01000200 | |
1945 | .word 0x00000000, 0x01000204, 0x01000200, 0x00004200 | |
1946 | .word 0x00000204, 0x00004000, 0x01004204, 0x01000000 | |
1947 | .word 0x01004200, 0x00000004, 0x00004004, 0x01004204 | |
1948 | .word 0x01000004, 0x01004200, 0x01004000, 0x00004004 | |
1949 | ! nibble 7 | |
1950 | .word 0x20800080, 0x20820000, 0x00020080, 0x00000000 | |
1951 | .word 0x20020000, 0x00800080, 0x20800000, 0x20820080 | |
1952 | .word 0x00000080, 0x20000000, 0x00820000, 0x00020080 | |
1953 | .word 0x00820080, 0x20020080, 0x20000080, 0x20800000 | |
1954 | .word 0x00020000, 0x00820080, 0x00800080, 0x20020000 | |
1955 | .word 0x20820080, 0x20000080, 0x00000000, 0x00820000 | |
1956 | .word 0x20000000, 0x00800000, 0x20020080, 0x20800080 | |
1957 | .word 0x00800000, 0x00020000, 0x20820000, 0x00000080 | |
1958 | .word 0x00800000, 0x00020000, 0x20000080, 0x20820080 | |
1959 | .word 0x00020080, 0x20000000, 0x00000000, 0x00820000 | |
1960 | .word 0x20800080, 0x20020080, 0x20020000, 0x00800080 | |
1961 | .word 0x20820000, 0x00000080, 0x00800080, 0x20020000 | |
1962 | .word 0x20820080, 0x00800000, 0x20800000, 0x20000080 | |
1963 | .word 0x00820000, 0x00020080, 0x20020080, 0x20800000 | |
1964 | .word 0x00000080, 0x20820000, 0x00820080, 0x00000000 | |
1965 | .word 0x20000000, 0x20800080, 0x00020000, 0x00820080 | |
1966 |