]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/modes/asm/aes-gcm-ppc.pl
29d4e2e6fb109baed6ffe322e65c215c205386b4
[thirdparty/openssl.git] / crypto / modes / asm / aes-gcm-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
3 # Copyright 2021- IBM Inc. All rights reserved
4 #
5 # Licensed under the Apache License 2.0 (the "License"). You may not use
6 # this file except in compliance with the License. You can obtain a copy
7 # in the file LICENSE in the source distribution or at
8 # https://www.openssl.org/source/license.html
9 #
10 #===================================================================================
11 # Written by Danny Tsen <dtsen@us.ibm.com> for OpenSSL Project,
12 #
13 # GHASH is based on the Karatsuba multiplication method.
14 #
15 # Xi xor X1
16 #
17 # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
18 # (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
19 # (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
20 # (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
21 # (X4.h * H.h + X4.l * H.l + X4 * H)
22 #
23 # Xi = v0
24 # H Poly = v2
25 # Hash keys = v3 - v14
26 # ( H.l, H, H.h)
27 # ( H^2.l, H^2, H^2.h)
28 # ( H^3.l, H^3, H^3.h)
29 # ( H^4.l, H^4, H^4.h)
30 #
31 # v30 is IV
32 # v31 - counter 1
33 #
34 # AES used,
35 # vs0 - vs14 for round keys
36 # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
37 #
38 # This implementation uses stitched AES-GCM approach to improve overall performance.
39 # AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
40 #
41 # Current large block (16384 bytes) performance per second with 128 bit key --
42 #
43 # Encrypt Decrypt
44 # Power10[le] (3.5GHz) 5.32G 5.26G
45 #
46 # ===================================================================================
47 #
48 # $output is the last argument if it looks like a file (it has an extension)
49 # $flavour is the first argument if it doesn't look like a file
50 $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
51 $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
52
53 if ($flavour =~ /64/) {
54 $SIZE_T=8;
55 $LRSAVE=2*$SIZE_T;
56 $STU="stdu";
57 $POP="ld";
58 $PUSH="std";
59 $UCMP="cmpld";
60 $SHRI="srdi";
61 } elsif ($flavour =~ /32/) {
62 $SIZE_T=4;
63 $LRSAVE=$SIZE_T;
64 $STU="stwu";
65 $POP="lwz";
66 $PUSH="stw";
67 $UCMP="cmplw";
68 $SHRI="srwi";
69 } else { die "nonsense $flavour"; }
70
71 $sp="r1";
72 $FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
73
74 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
75 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
76 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
77 die "can't locate ppc-xlate.pl";
78
79 open STDOUT,"| $^X $xlate $flavour \"$output\""
80 or die "can't call $xlate: $!";
81
82 $code=<<___;
83 .machine "any"
84 .abiversion 2
85 .text
86
87 # 4x loops
88 # v15 - v18 - input states
89 # vs1 - vs9 - round keys
90 #
91 .macro Loop_aes_middle4x
92 xxlor 19+32, 1, 1
93 xxlor 20+32, 2, 2
94 xxlor 21+32, 3, 3
95 xxlor 22+32, 4, 4
96
97 vcipher 15, 15, 19
98 vcipher 16, 16, 19
99 vcipher 17, 17, 19
100 vcipher 18, 18, 19
101
102 vcipher 15, 15, 20
103 vcipher 16, 16, 20
104 vcipher 17, 17, 20
105 vcipher 18, 18, 20
106
107 vcipher 15, 15, 21
108 vcipher 16, 16, 21
109 vcipher 17, 17, 21
110 vcipher 18, 18, 21
111
112 vcipher 15, 15, 22
113 vcipher 16, 16, 22
114 vcipher 17, 17, 22
115 vcipher 18, 18, 22
116
117 xxlor 19+32, 5, 5
118 xxlor 20+32, 6, 6
119 xxlor 21+32, 7, 7
120 xxlor 22+32, 8, 8
121
122 vcipher 15, 15, 19
123 vcipher 16, 16, 19
124 vcipher 17, 17, 19
125 vcipher 18, 18, 19
126
127 vcipher 15, 15, 20
128 vcipher 16, 16, 20
129 vcipher 17, 17, 20
130 vcipher 18, 18, 20
131
132 vcipher 15, 15, 21
133 vcipher 16, 16, 21
134 vcipher 17, 17, 21
135 vcipher 18, 18, 21
136
137 vcipher 15, 15, 22
138 vcipher 16, 16, 22
139 vcipher 17, 17, 22
140 vcipher 18, 18, 22
141
142 xxlor 23+32, 9, 9
143 vcipher 15, 15, 23
144 vcipher 16, 16, 23
145 vcipher 17, 17, 23
146 vcipher 18, 18, 23
147 .endm
148
149 # 8x loops
150 # v15 - v22 - input states
151 # vs1 - vs9 - round keys
152 #
153 .macro Loop_aes_middle8x
154 xxlor 23+32, 1, 1
155 xxlor 24+32, 2, 2
156 xxlor 25+32, 3, 3
157 xxlor 26+32, 4, 4
158
159 vcipher 15, 15, 23
160 vcipher 16, 16, 23
161 vcipher 17, 17, 23
162 vcipher 18, 18, 23
163 vcipher 19, 19, 23
164 vcipher 20, 20, 23
165 vcipher 21, 21, 23
166 vcipher 22, 22, 23
167
168 vcipher 15, 15, 24
169 vcipher 16, 16, 24
170 vcipher 17, 17, 24
171 vcipher 18, 18, 24
172 vcipher 19, 19, 24
173 vcipher 20, 20, 24
174 vcipher 21, 21, 24
175 vcipher 22, 22, 24
176
177 vcipher 15, 15, 25
178 vcipher 16, 16, 25
179 vcipher 17, 17, 25
180 vcipher 18, 18, 25
181 vcipher 19, 19, 25
182 vcipher 20, 20, 25
183 vcipher 21, 21, 25
184 vcipher 22, 22, 25
185
186 vcipher 15, 15, 26
187 vcipher 16, 16, 26
188 vcipher 17, 17, 26
189 vcipher 18, 18, 26
190 vcipher 19, 19, 26
191 vcipher 20, 20, 26
192 vcipher 21, 21, 26
193 vcipher 22, 22, 26
194
195 xxlor 23+32, 5, 5
196 xxlor 24+32, 6, 6
197 xxlor 25+32, 7, 7
198 xxlor 26+32, 8, 8
199
200 vcipher 15, 15, 23
201 vcipher 16, 16, 23
202 vcipher 17, 17, 23
203 vcipher 18, 18, 23
204 vcipher 19, 19, 23
205 vcipher 20, 20, 23
206 vcipher 21, 21, 23
207 vcipher 22, 22, 23
208
209 vcipher 15, 15, 24
210 vcipher 16, 16, 24
211 vcipher 17, 17, 24
212 vcipher 18, 18, 24
213 vcipher 19, 19, 24
214 vcipher 20, 20, 24
215 vcipher 21, 21, 24
216 vcipher 22, 22, 24
217
218 vcipher 15, 15, 25
219 vcipher 16, 16, 25
220 vcipher 17, 17, 25
221 vcipher 18, 18, 25
222 vcipher 19, 19, 25
223 vcipher 20, 20, 25
224 vcipher 21, 21, 25
225 vcipher 22, 22, 25
226
227 vcipher 15, 15, 26
228 vcipher 16, 16, 26
229 vcipher 17, 17, 26
230 vcipher 18, 18, 26
231 vcipher 19, 19, 26
232 vcipher 20, 20, 26
233 vcipher 21, 21, 26
234 vcipher 22, 22, 26
235
236 xxlor 23+32, 9, 9
237 vcipher 15, 15, 23
238 vcipher 16, 16, 23
239 vcipher 17, 17, 23
240 vcipher 18, 18, 23
241 vcipher 19, 19, 23
242 vcipher 20, 20, 23
243 vcipher 21, 21, 23
244 vcipher 22, 22, 23
245 .endm
246
247 #
248 # Compute 4x hash values based on Karatsuba method.
249 #
250 ppc_aes_gcm_ghash:
251 vxor 15, 15, 0
252
253 xxlxor 29, 29, 29
254
255 vpmsumd 23, 12, 15 # H4.L * X.L
256 vpmsumd 24, 9, 16
257 vpmsumd 25, 6, 17
258 vpmsumd 26, 3, 18
259
260 vxor 23, 23, 24
261 vxor 23, 23, 25
262 vxor 23, 23, 26 # L
263
264 vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
265 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
266 vpmsumd 26, 7, 17
267 vpmsumd 27, 4, 18
268
269 vxor 24, 24, 25
270 vxor 24, 24, 26
271 vxor 24, 24, 27 # M
272
273 # sum hash and reduction with H Poly
274 vpmsumd 28, 23, 2 # reduction
275
276 xxlor 29+32, 29, 29
277 vsldoi 26, 24, 29, 8 # mL
278 vsldoi 29, 29, 24, 8 # mH
279 vxor 23, 23, 26 # mL + L
280
281 vsldoi 23, 23, 23, 8 # swap
282 vxor 23, 23, 28
283
284 vpmsumd 24, 14, 15 # H4.H * X.H
285 vpmsumd 25, 11, 16
286 vpmsumd 26, 8, 17
287 vpmsumd 27, 5, 18
288
289 vxor 24, 24, 25
290 vxor 24, 24, 26
291 vxor 24, 24, 27
292
293 vxor 24, 24, 29
294
295 # sum hash and reduction with H Poly
296 vsldoi 27, 23, 23, 8 # swap
297 vpmsumd 23, 23, 2
298 vxor 27, 27, 24
299 vxor 23, 23, 27
300
301 xxlor 32, 23+32, 23+32 # update hash
302
303 blr
304
305 #
306 # Combine two 4x ghash
307 # v15 - v22 - input blocks
308 #
309 .macro ppc_aes_gcm_ghash2_4x
310 # first 4x hash
311 vxor 15, 15, 0 # Xi + X
312
313 xxlxor 29, 29, 29
314
315 vpmsumd 23, 12, 15 # H4.L * X.L
316 vpmsumd 24, 9, 16
317 vpmsumd 25, 6, 17
318 vpmsumd 26, 3, 18
319
320 vxor 23, 23, 24
321 vxor 23, 23, 25
322 vxor 23, 23, 26 # L
323
324 vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L
325 vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L
326 vpmsumd 26, 7, 17
327 vpmsumd 27, 4, 18
328
329 vxor 24, 24, 25
330 vxor 24, 24, 26
331
332 # sum hash and reduction with H Poly
333 vpmsumd 28, 23, 2 # reduction
334
335 xxlor 29+32, 29, 29
336
337 vxor 24, 24, 27 # M
338 vsldoi 26, 24, 29, 8 # mL
339 vsldoi 29, 29, 24, 8 # mH
340 vxor 23, 23, 26 # mL + L
341
342 vsldoi 23, 23, 23, 8 # swap
343 vxor 23, 23, 28
344
345 vpmsumd 24, 14, 15 # H4.H * X.H
346 vpmsumd 25, 11, 16
347 vpmsumd 26, 8, 17
348 vpmsumd 27, 5, 18
349
350 vxor 24, 24, 25
351 vxor 24, 24, 26
352 vxor 24, 24, 27 # H
353
354 vxor 24, 24, 29 # H + mH
355
356 # sum hash and reduction with H Poly
357 vsldoi 27, 23, 23, 8 # swap
358 vpmsumd 23, 23, 2
359 vxor 27, 27, 24
360 vxor 27, 23, 27 # 1st Xi
361
362 # 2nd 4x hash
363 vpmsumd 24, 9, 20
364 vpmsumd 25, 6, 21
365 vpmsumd 26, 3, 22
366 vxor 19, 19, 27 # Xi + X
367 vpmsumd 23, 12, 19 # H4.L * X.L
368
369 vxor 23, 23, 24
370 vxor 23, 23, 25
371 vxor 23, 23, 26 # L
372
373 vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L
374 vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L
375 vpmsumd 26, 7, 21
376 vpmsumd 27, 4, 22
377
378 vxor 24, 24, 25
379 vxor 24, 24, 26
380
381 # sum hash and reduction with H Poly
382 vpmsumd 28, 23, 2 # reduction
383
384 xxlor 29+32, 29, 29
385
386 vxor 24, 24, 27 # M
387 vsldoi 26, 24, 29, 8 # mL
388 vsldoi 29, 29, 24, 8 # mH
389 vxor 23, 23, 26 # mL + L
390
391 vsldoi 23, 23, 23, 8 # swap
392 vxor 23, 23, 28
393
394 vpmsumd 24, 14, 19 # H4.H * X.H
395 vpmsumd 25, 11, 20
396 vpmsumd 26, 8, 21
397 vpmsumd 27, 5, 22
398
399 vxor 24, 24, 25
400 vxor 24, 24, 26
401 vxor 24, 24, 27 # H
402
403 vxor 24, 24, 29 # H + mH
404
405 # sum hash and reduction with H Poly
406 vsldoi 27, 23, 23, 8 # swap
407 vpmsumd 23, 23, 2
408 vxor 27, 27, 24
409 vxor 23, 23, 27
410
411 xxlor 32, 23+32, 23+32 # update hash
412
413 .endm
414
415 #
416 # Compute update single hash
417 #
418 .macro ppc_update_hash_1x
419 vxor 28, 28, 0
420
421 vxor 19, 19, 19
422
423 vpmsumd 22, 3, 28 # L
424 vpmsumd 23, 4, 28 # M
425 vpmsumd 24, 5, 28 # H
426
427 vpmsumd 27, 22, 2 # reduction
428
429 vsldoi 25, 23, 19, 8 # mL
430 vsldoi 26, 19, 23, 8 # mH
431 vxor 22, 22, 25 # LL + LL
432 vxor 24, 24, 26 # HH + HH
433
434 vsldoi 22, 22, 22, 8 # swap
435 vxor 22, 22, 27
436
437 vsldoi 20, 22, 22, 8 # swap
438 vpmsumd 22, 22, 2 # reduction
439 vxor 20, 20, 24
440 vxor 22, 22, 20
441
442 vmr 0, 22 # update hash
443
444 .endm
445
446 #
447 # ppc_aes_gcm_encrypt (const void *inp, void *out, size_t len,
448 # const AES_KEY *key, unsigned char iv[16],
449 # void *Xip);
450 #
451 # r3 - inp
452 # r4 - out
453 # r5 - len
454 # r6 - AES round keys
455 # r7 - iv
456 # r8 - Xi, HPoli, hash keys
457 #
458 .global ppc_aes_gcm_encrypt
459 .align 5
460 ppc_aes_gcm_encrypt:
461 _ppc_aes_gcm_encrypt:
462
463 stdu 1,-512(1)
464 mflr 0
465
466 std 14,112(1)
467 std 15,120(1)
468 std 16,128(1)
469 std 17,136(1)
470 std 18,144(1)
471 std 19,152(1)
472 std 20,160(1)
473 std 21,168(1)
474 li 9, 256
475 stvx 20, 9, 1
476 addi 9, 9, 16
477 stvx 21, 9, 1
478 addi 9, 9, 16
479 stvx 22, 9, 1
480 addi 9, 9, 16
481 stvx 23, 9, 1
482 addi 9, 9, 16
483 stvx 24, 9, 1
484 addi 9, 9, 16
485 stvx 25, 9, 1
486 addi 9, 9, 16
487 stvx 26, 9, 1
488 addi 9, 9, 16
489 stvx 27, 9, 1
490 addi 9, 9, 16
491 stvx 28, 9, 1
492 addi 9, 9, 16
493 stvx 29, 9, 1
494 addi 9, 9, 16
495 stvx 30, 9, 1
496 addi 9, 9, 16
497 stvx 31, 9, 1
498 std 0, 528(1)
499
500 # Load Xi
501 lxvb16x 32, 0, 8 # load Xi
502
503 # load Hash - h^4, h^3, h^2, h
504 li 10, 32
505 lxvd2x 2+32, 10, 8 # H Poli
506 li 10, 48
507 lxvd2x 3+32, 10, 8 # Hl
508 li 10, 64
509 lxvd2x 4+32, 10, 8 # H
510 li 10, 80
511 lxvd2x 5+32, 10, 8 # Hh
512
513 li 10, 96
514 lxvd2x 6+32, 10, 8 # H^2l
515 li 10, 112
516 lxvd2x 7+32, 10, 8 # H^2
517 li 10, 128
518 lxvd2x 8+32, 10, 8 # H^2h
519
520 li 10, 144
521 lxvd2x 9+32, 10, 8 # H^3l
522 li 10, 160
523 lxvd2x 10+32, 10, 8 # H^3
524 li 10, 176
525 lxvd2x 11+32, 10, 8 # H^3h
526
527 li 10, 192
528 lxvd2x 12+32, 10, 8 # H^4l
529 li 10, 208
530 lxvd2x 13+32, 10, 8 # H^4
531 li 10, 224
532 lxvd2x 14+32, 10, 8 # H^4h
533
534 # initialize ICB: GHASH( IV ), IV - r7
535 lxvb16x 30+32, 0, 7 # load IV - v30
536
537 mr 12, 5 # length
538 li 11, 0 # block index
539
540 # counter 1
541 vxor 31, 31, 31
542 vspltisb 22, 1
543 vsldoi 31, 31, 22,1 # counter 1
544
545 # load round key to VSR
546 lxv 0, 0(6)
547 lxv 1, 0x10(6)
548 lxv 2, 0x20(6)
549 lxv 3, 0x30(6)
550 lxv 4, 0x40(6)
551 lxv 5, 0x50(6)
552 lxv 6, 0x60(6)
553 lxv 7, 0x70(6)
554 lxv 8, 0x80(6)
555 lxv 9, 0x90(6)
556 lxv 10, 0xa0(6)
557
558 # load rounds - 10 (128), 12 (192), 14 (256)
559 lwz 9,240(6)
560
561 #
562 # vxor state, state, w # addroundkey
563 xxlor 32+29, 0, 0
564 vxor 15, 30, 29 # IV + round key - add round key 0
565
566 cmpdi 9, 10
567 beq Loop_aes_gcm_8x
568
569 # load 2 more round keys (v11, v12)
570 lxv 11, 0xb0(6)
571 lxv 12, 0xc0(6)
572
573 cmpdi 9, 12
574 beq Loop_aes_gcm_8x
575
576 # load 2 more round keys (v11, v12, v13, v14)
577 lxv 13, 0xd0(6)
578 lxv 14, 0xe0(6)
579 cmpdi 9, 14
580 beq Loop_aes_gcm_8x
581
582 b aes_gcm_out
583
584 .align 5
585 Loop_aes_gcm_8x:
586 mr 14, 3
587 mr 9, 4
588
589 # n blcoks
590 li 10, 128
591 divdu 10, 5, 10 # n 128 bytes-blocks
592 cmpdi 10, 0
593 beq Loop_last_block
594
595 vaddudm 30, 30, 31 # IV + counter
596 vxor 16, 30, 29
597 vaddudm 30, 30, 31
598 vxor 17, 30, 29
599 vaddudm 30, 30, 31
600 vxor 18, 30, 29
601 vaddudm 30, 30, 31
602 vxor 19, 30, 29
603 vaddudm 30, 30, 31
604 vxor 20, 30, 29
605 vaddudm 30, 30, 31
606 vxor 21, 30, 29
607 vaddudm 30, 30, 31
608 vxor 22, 30, 29
609
610 mtctr 10
611
612 li 15, 16
613 li 16, 32
614 li 17, 48
615 li 18, 64
616 li 19, 80
617 li 20, 96
618 li 21, 112
619
620 lwz 10, 240(6)
621
622 Loop_8x_block:
623
624 lxvb16x 15, 0, 14 # load block
625 lxvb16x 16, 15, 14 # load block
626 lxvb16x 17, 16, 14 # load block
627 lxvb16x 18, 17, 14 # load block
628 lxvb16x 19, 18, 14 # load block
629 lxvb16x 20, 19, 14 # load block
630 lxvb16x 21, 20, 14 # load block
631 lxvb16x 22, 21, 14 # load block
632 addi 14, 14, 128
633
634 Loop_aes_middle8x
635
636 xxlor 23+32, 10, 10
637
638 cmpdi 10, 10
639 beq Do_next_ghash
640
641 # 192 bits
642 xxlor 24+32, 11, 11
643
644 vcipher 15, 15, 23
645 vcipher 16, 16, 23
646 vcipher 17, 17, 23
647 vcipher 18, 18, 23
648 vcipher 19, 19, 23
649 vcipher 20, 20, 23
650 vcipher 21, 21, 23
651 vcipher 22, 22, 23
652
653 vcipher 15, 15, 24
654 vcipher 16, 16, 24
655 vcipher 17, 17, 24
656 vcipher 18, 18, 24
657 vcipher 19, 19, 24
658 vcipher 20, 20, 24
659 vcipher 21, 21, 24
660 vcipher 22, 22, 24
661
662 xxlor 23+32, 12, 12
663
664 cmpdi 10, 12
665 beq Do_next_ghash
666
667 # 256 bits
668 xxlor 24+32, 13, 13
669
670 vcipher 15, 15, 23
671 vcipher 16, 16, 23
672 vcipher 17, 17, 23
673 vcipher 18, 18, 23
674 vcipher 19, 19, 23
675 vcipher 20, 20, 23
676 vcipher 21, 21, 23
677 vcipher 22, 22, 23
678
679 vcipher 15, 15, 24
680 vcipher 16, 16, 24
681 vcipher 17, 17, 24
682 vcipher 18, 18, 24
683 vcipher 19, 19, 24
684 vcipher 20, 20, 24
685 vcipher 21, 21, 24
686 vcipher 22, 22, 24
687
688 xxlor 23+32, 14, 14
689
690 cmpdi 10, 14
691 beq Do_next_ghash
692 b aes_gcm_out
693
694 Do_next_ghash:
695
696 #
697 # last round
698 vcipherlast 15, 15, 23
699 vcipherlast 16, 16, 23
700
701 xxlxor 47, 47, 15
702 stxvb16x 47, 0, 9 # store output
703 xxlxor 48, 48, 16
704 stxvb16x 48, 15, 9 # store output
705
706 vcipherlast 17, 17, 23
707 vcipherlast 18, 18, 23
708
709 xxlxor 49, 49, 17
710 stxvb16x 49, 16, 9 # store output
711 xxlxor 50, 50, 18
712 stxvb16x 50, 17, 9 # store output
713
714 vcipherlast 19, 19, 23
715 vcipherlast 20, 20, 23
716
717 xxlxor 51, 51, 19
718 stxvb16x 51, 18, 9 # store output
719 xxlxor 52, 52, 20
720 stxvb16x 52, 19, 9 # store output
721
722 vcipherlast 21, 21, 23
723 vcipherlast 22, 22, 23
724
725 xxlxor 53, 53, 21
726 stxvb16x 53, 20, 9 # store output
727 xxlxor 54, 54, 22
728 stxvb16x 54, 21, 9 # store output
729
730 addi 9, 9, 128
731
732 # ghash here
733 ppc_aes_gcm_ghash2_4x
734
735 xxlor 27+32, 0, 0
736 vaddudm 30, 30, 31 # IV + counter
737 vmr 29, 30
738 vxor 15, 30, 27 # add round key
739 vaddudm 30, 30, 31
740 vxor 16, 30, 27
741 vaddudm 30, 30, 31
742 vxor 17, 30, 27
743 vaddudm 30, 30, 31
744 vxor 18, 30, 27
745 vaddudm 30, 30, 31
746 vxor 19, 30, 27
747 vaddudm 30, 30, 31
748 vxor 20, 30, 27
749 vaddudm 30, 30, 31
750 vxor 21, 30, 27
751 vaddudm 30, 30, 31
752 vxor 22, 30, 27
753
754 addi 12, 12, -128
755 addi 11, 11, 128
756
757 bdnz Loop_8x_block
758
759 vmr 30, 29
760
761 Loop_last_block:
762 cmpdi 12, 0
763 beq aes_gcm_out
764
765 # loop last few blocks
766 li 10, 16
767 divdu 10, 12, 10
768
769 mtctr 10
770
771 lwz 10, 240(6)
772
773 cmpdi 12, 16
774 blt Final_block
775
776 .macro Loop_aes_middle_1x
777 xxlor 19+32, 1, 1
778 xxlor 20+32, 2, 2
779 xxlor 21+32, 3, 3
780 xxlor 22+32, 4, 4
781
782 vcipher 15, 15, 19
783 vcipher 15, 15, 20
784 vcipher 15, 15, 21
785 vcipher 15, 15, 22
786
787 xxlor 19+32, 5, 5
788 xxlor 20+32, 6, 6
789 xxlor 21+32, 7, 7
790 xxlor 22+32, 8, 8
791
792 vcipher 15, 15, 19
793 vcipher 15, 15, 20
794 vcipher 15, 15, 21
795 vcipher 15, 15, 22
796
797 xxlor 19+32, 9, 9
798 vcipher 15, 15, 19
799 .endm
800
801 Next_rem_block:
802 lxvb16x 15, 0, 14 # load block
803
804 Loop_aes_middle_1x
805
806 xxlor 23+32, 10, 10
807
808 cmpdi 10, 10
809 beq Do_next_1x
810
811 # 192 bits
812 xxlor 24+32, 11, 11
813
814 vcipher 15, 15, 23
815 vcipher 15, 15, 24
816
817 xxlor 23+32, 12, 12
818
819 cmpdi 10, 12
820 beq Do_next_1x
821
822 # 256 bits
823 xxlor 24+32, 13, 13
824
825 vcipher 15, 15, 23
826 vcipher 15, 15, 24
827
828 xxlor 23+32, 14, 14
829
830 cmpdi 10, 14
831 beq Do_next_1x
832
833 Do_next_1x:
834 vcipherlast 15, 15, 23
835
836 xxlxor 47, 47, 15
837 stxvb16x 47, 0, 9 # store output
838 addi 14, 14, 16
839 addi 9, 9, 16
840
841 vmr 28, 15
842 ppc_update_hash_1x
843
844 addi 12, 12, -16
845 addi 11, 11, 16
846 xxlor 19+32, 0, 0
847 vaddudm 30, 30, 31 # IV + counter
848 vxor 15, 30, 19 # add round key
849
850 bdnz Next_rem_block
851
852 cmpdi 12, 0
853 beq aes_gcm_out
854
855 Final_block:
856 Loop_aes_middle_1x
857
858 xxlor 23+32, 10, 10
859
860 cmpdi 10, 10
861 beq Do_final_1x
862
863 # 192 bits
864 xxlor 24+32, 11, 11
865
866 vcipher 15, 15, 23
867 vcipher 15, 15, 24
868
869 xxlor 23+32, 12, 12
870
871 cmpdi 10, 12
872 beq Do_final_1x
873
874 # 256 bits
875 xxlor 24+32, 13, 13
876
877 vcipher 15, 15, 23
878 vcipher 15, 15, 24
879
880 xxlor 23+32, 14, 14
881
882 cmpdi 10, 14
883 beq Do_final_1x
884
885 Do_final_1x:
886 vcipherlast 15, 15, 23
887
888 lxvb16x 15, 0, 14 # load last block
889 xxlxor 47, 47, 15
890
891 # create partial block mask
892 li 15, 16
893 sub 15, 15, 12 # index to the mask
894
895 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
896 vspltisb 17, 0 # second 16 bytes - 0x0000...00
897 li 10, 192
898 stvx 16, 10, 1
899 addi 10, 10, 16
900 stvx 17, 10, 1
901
902 addi 10, 1, 192
903 lxvb16x 16, 15, 10 # load partial block mask
904 xxland 47, 47, 16
905
906 vmr 28, 15
907 ppc_update_hash_1x
908
909 # * should store only the remaining bytes.
910 bl Write_partial_block
911
912 b aes_gcm_out
913
914 #
915 # Write partial block
916 # r9 - output
917 # r12 - remaining bytes
918 # v15 - partial input data
919 #
920 Write_partial_block:
921 li 10, 192
922 stxvb16x 15+32, 10, 1 # last block
923
924 #add 10, 9, 11 # Output
925 addi 10, 9, -1
926 addi 16, 1, 191
927
928 mtctr 12 # remaining bytes
929 li 15, 0
930
931 Write_last_byte:
932 lbzu 14, 1(16)
933 stbu 14, 1(10)
934 bdnz Write_last_byte
935 blr
936
937 aes_gcm_out:
938 # out = state
939 stxvb16x 32, 0, 8 # write out Xi
940 add 3, 11, 12 # return count
941
942 li 9, 256
943 lvx 20, 9, 1
944 addi 9, 9, 16
945 lvx 21, 9, 1
946 addi 9, 9, 16
947 lvx 22, 9, 1
948 addi 9, 9, 16
949 lvx 23, 9, 1
950 addi 9, 9, 16
951 lvx 24, 9, 1
952 addi 9, 9, 16
953 lvx 25, 9, 1
954 addi 9, 9, 16
955 lvx 26, 9, 1
956 addi 9, 9, 16
957 lvx 27, 9, 1
958 addi 9, 9, 16
959 lvx 28, 9, 1
960 addi 9, 9, 16
961 lvx 29, 9, 1
962 addi 9, 9, 16
963 lvx 30, 9, 1
964 addi 9, 9, 16
965 lvx 31, 9, 1
966
967 ld 0, 528(1)
968 ld 14,112(1)
969 ld 15,120(1)
970 ld 16,128(1)
971 ld 17,136(1)
972 ld 18,144(1)
973 ld 19,152(1)
974 ld 20,160(1)
975 ld 21,168(1)
976
977 mtlr 0
978 addi 1, 1, 512
979 blr
980
981 #
982 # 8x Decrypt
983 #
984 .global ppc_aes_gcm_decrypt
985 .align 5
986 ppc_aes_gcm_decrypt:
987 _ppc_aes_gcm_decrypt:
988
989 stdu 1,-512(1)
990 mflr 0
991
992 std 14,112(1)
993 std 15,120(1)
994 std 16,128(1)
995 std 17,136(1)
996 std 18,144(1)
997 std 19,152(1)
998 std 20,160(1)
999 std 21,168(1)
1000 li 9, 256
1001 stvx 20, 9, 1
1002 addi 9, 9, 16
1003 stvx 21, 9, 1
1004 addi 9, 9, 16
1005 stvx 22, 9, 1
1006 addi 9, 9, 16
1007 stvx 23, 9, 1
1008 addi 9, 9, 16
1009 stvx 24, 9, 1
1010 addi 9, 9, 16
1011 stvx 25, 9, 1
1012 addi 9, 9, 16
1013 stvx 26, 9, 1
1014 addi 9, 9, 16
1015 stvx 27, 9, 1
1016 addi 9, 9, 16
1017 stvx 28, 9, 1
1018 addi 9, 9, 16
1019 stvx 29, 9, 1
1020 addi 9, 9, 16
1021 stvx 30, 9, 1
1022 addi 9, 9, 16
1023 stvx 31, 9, 1
1024 std 0, 528(1)
1025
1026 # Load Xi
1027 lxvb16x 32, 0, 8 # load Xi
1028
1029 # load Hash - h^4, h^3, h^2, h
1030 li 10, 32
1031 lxvd2x 2+32, 10, 8 # H Poli
1032 li 10, 48
1033 lxvd2x 3+32, 10, 8 # Hl
1034 li 10, 64
1035 lxvd2x 4+32, 10, 8 # H
1036 li 10, 80
1037 lxvd2x 5+32, 10, 8 # Hh
1038
1039 li 10, 96
1040 lxvd2x 6+32, 10, 8 # H^2l
1041 li 10, 112
1042 lxvd2x 7+32, 10, 8 # H^2
1043 li 10, 128
1044 lxvd2x 8+32, 10, 8 # H^2h
1045
1046 li 10, 144
1047 lxvd2x 9+32, 10, 8 # H^3l
1048 li 10, 160
1049 lxvd2x 10+32, 10, 8 # H^3
1050 li 10, 176
1051 lxvd2x 11+32, 10, 8 # H^3h
1052
1053 li 10, 192
1054 lxvd2x 12+32, 10, 8 # H^4l
1055 li 10, 208
1056 lxvd2x 13+32, 10, 8 # H^4
1057 li 10, 224
1058 lxvd2x 14+32, 10, 8 # H^4h
1059
1060 # initialize ICB: GHASH( IV ), IV - r7
1061 lxvb16x 30+32, 0, 7 # load IV - v30
1062
1063 mr 12, 5 # length
1064 li 11, 0 # block index
1065
1066 # counter 1
1067 vxor 31, 31, 31
1068 vspltisb 22, 1
1069 vsldoi 31, 31, 22,1 # counter 1
1070
1071 # load round key to VSR
1072 lxv 0, 0(6)
1073 lxv 1, 0x10(6)
1074 lxv 2, 0x20(6)
1075 lxv 3, 0x30(6)
1076 lxv 4, 0x40(6)
1077 lxv 5, 0x50(6)
1078 lxv 6, 0x60(6)
1079 lxv 7, 0x70(6)
1080 lxv 8, 0x80(6)
1081 lxv 9, 0x90(6)
1082 lxv 10, 0xa0(6)
1083
1084 # load rounds - 10 (128), 12 (192), 14 (256)
1085 lwz 9,240(6)
1086
1087 #
1088 # vxor state, state, w # addroundkey
1089 xxlor 32+29, 0, 0
1090 vxor 15, 30, 29 # IV + round key - add round key 0
1091
1092 cmpdi 9, 10
1093 beq Loop_aes_gcm_8x_dec
1094
1095 # load 2 more round keys (v11, v12)
1096 lxv 11, 0xb0(6)
1097 lxv 12, 0xc0(6)
1098
1099 cmpdi 9, 12
1100 beq Loop_aes_gcm_8x_dec
1101
1102 # load 2 more round keys (v11, v12, v13, v14)
1103 lxv 13, 0xd0(6)
1104 lxv 14, 0xe0(6)
1105 cmpdi 9, 14
1106 beq Loop_aes_gcm_8x_dec
1107
1108 b aes_gcm_out
1109
1110 .align 5
1111 Loop_aes_gcm_8x_dec:
1112 mr 14, 3
1113 mr 9, 4
1114
1115 # n blcoks
1116 li 10, 128
1117 divdu 10, 5, 10 # n 128 bytes-blocks
1118 cmpdi 10, 0
1119 beq Loop_last_block_dec
1120
1121 vaddudm 30, 30, 31 # IV + counter
1122 vxor 16, 30, 29
1123 vaddudm 30, 30, 31
1124 vxor 17, 30, 29
1125 vaddudm 30, 30, 31
1126 vxor 18, 30, 29
1127 vaddudm 30, 30, 31
1128 vxor 19, 30, 29
1129 vaddudm 30, 30, 31
1130 vxor 20, 30, 29
1131 vaddudm 30, 30, 31
1132 vxor 21, 30, 29
1133 vaddudm 30, 30, 31
1134 vxor 22, 30, 29
1135
1136 mtctr 10
1137
1138 li 15, 16
1139 li 16, 32
1140 li 17, 48
1141 li 18, 64
1142 li 19, 80
1143 li 20, 96
1144 li 21, 112
1145
1146 lwz 10, 240(6)
1147
1148 Loop_8x_block_dec:
1149
1150 lxvb16x 15, 0, 14 # load block
1151 lxvb16x 16, 15, 14 # load block
1152 lxvb16x 17, 16, 14 # load block
1153 lxvb16x 18, 17, 14 # load block
1154 lxvb16x 19, 18, 14 # load block
1155 lxvb16x 20, 19, 14 # load block
1156 lxvb16x 21, 20, 14 # load block
1157 lxvb16x 22, 21, 14 # load block
1158 addi 14, 14, 128
1159
1160 Loop_aes_middle8x
1161
1162 xxlor 23+32, 10, 10
1163
1164 cmpdi 10, 10
1165 beq Do_last_aes_dec
1166
1167 # 192 bits
1168 xxlor 24+32, 11, 11
1169
1170 vcipher 15, 15, 23
1171 vcipher 16, 16, 23
1172 vcipher 17, 17, 23
1173 vcipher 18, 18, 23
1174 vcipher 19, 19, 23
1175 vcipher 20, 20, 23
1176 vcipher 21, 21, 23
1177 vcipher 22, 22, 23
1178
1179 vcipher 15, 15, 24
1180 vcipher 16, 16, 24
1181 vcipher 17, 17, 24
1182 vcipher 18, 18, 24
1183 vcipher 19, 19, 24
1184 vcipher 20, 20, 24
1185 vcipher 21, 21, 24
1186 vcipher 22, 22, 24
1187
1188 xxlor 23+32, 12, 12
1189
1190 cmpdi 10, 12
1191 beq Do_last_aes_dec
1192
1193 # 256 bits
1194 xxlor 24+32, 13, 13
1195
1196 vcipher 15, 15, 23
1197 vcipher 16, 16, 23
1198 vcipher 17, 17, 23
1199 vcipher 18, 18, 23
1200 vcipher 19, 19, 23
1201 vcipher 20, 20, 23
1202 vcipher 21, 21, 23
1203 vcipher 22, 22, 23
1204
1205 vcipher 15, 15, 24
1206 vcipher 16, 16, 24
1207 vcipher 17, 17, 24
1208 vcipher 18, 18, 24
1209 vcipher 19, 19, 24
1210 vcipher 20, 20, 24
1211 vcipher 21, 21, 24
1212 vcipher 22, 22, 24
1213
1214 xxlor 23+32, 14, 14
1215
1216 cmpdi 10, 14
1217 beq Do_last_aes_dec
1218 b aes_gcm_out
1219
1220 Do_last_aes_dec:
1221
1222 #
1223 # last round
1224 vcipherlast 15, 15, 23
1225 vcipherlast 16, 16, 23
1226
1227 xxlxor 47, 47, 15
1228 stxvb16x 47, 0, 9 # store output
1229 xxlxor 48, 48, 16
1230 stxvb16x 48, 15, 9 # store output
1231
1232 vcipherlast 17, 17, 23
1233 vcipherlast 18, 18, 23
1234
1235 xxlxor 49, 49, 17
1236 stxvb16x 49, 16, 9 # store output
1237 xxlxor 50, 50, 18
1238 stxvb16x 50, 17, 9 # store output
1239
1240 vcipherlast 19, 19, 23
1241 vcipherlast 20, 20, 23
1242
1243 xxlxor 51, 51, 19
1244 stxvb16x 51, 18, 9 # store output
1245 xxlxor 52, 52, 20
1246 stxvb16x 52, 19, 9 # store output
1247
1248 vcipherlast 21, 21, 23
1249 vcipherlast 22, 22, 23
1250
1251 xxlxor 53, 53, 21
1252 stxvb16x 53, 20, 9 # store output
1253 xxlxor 54, 54, 22
1254 stxvb16x 54, 21, 9 # store output
1255
1256 addi 9, 9, 128
1257
1258 xxlor 15+32, 15, 15
1259 xxlor 16+32, 16, 16
1260 xxlor 17+32, 17, 17
1261 xxlor 18+32, 18, 18
1262 xxlor 19+32, 19, 19
1263 xxlor 20+32, 20, 20
1264 xxlor 21+32, 21, 21
1265 xxlor 22+32, 22, 22
1266
1267 # ghash here
1268 ppc_aes_gcm_ghash2_4x
1269
1270 xxlor 27+32, 0, 0
1271 vaddudm 30, 30, 31 # IV + counter
1272 vmr 29, 30
1273 vxor 15, 30, 27 # add round key
1274 vaddudm 30, 30, 31
1275 vxor 16, 30, 27
1276 vaddudm 30, 30, 31
1277 vxor 17, 30, 27
1278 vaddudm 30, 30, 31
1279 vxor 18, 30, 27
1280 vaddudm 30, 30, 31
1281 vxor 19, 30, 27
1282 vaddudm 30, 30, 31
1283 vxor 20, 30, 27
1284 vaddudm 30, 30, 31
1285 vxor 21, 30, 27
1286 vaddudm 30, 30, 31
1287 vxor 22, 30, 27
1288 addi 12, 12, -128
1289 addi 11, 11, 128
1290
1291 bdnz Loop_8x_block_dec
1292
1293 vmr 30, 29
1294
1295 Loop_last_block_dec:
1296 cmpdi 12, 0
1297 beq aes_gcm_out
1298
1299 # loop last few blocks
1300 li 10, 16
1301 divdu 10, 12, 10
1302
1303 mtctr 10
1304
1305 lwz 10,240(6)
1306
1307 cmpdi 12, 16
1308 blt Final_block_dec
1309
1310 Next_rem_block_dec:
1311 lxvb16x 15, 0, 14 # load block
1312
1313 Loop_aes_middle_1x
1314
1315 xxlor 23+32, 10, 10
1316
1317 cmpdi 10, 10
1318 beq Do_next_1x_dec
1319
1320 # 192 bits
1321 xxlor 24+32, 11, 11
1322
1323 vcipher 15, 15, 23
1324 vcipher 15, 15, 24
1325
1326 xxlor 23+32, 12, 12
1327
1328 cmpdi 10, 12
1329 beq Do_next_1x_dec
1330
1331 # 256 bits
1332 xxlor 24+32, 13, 13
1333
1334 vcipher 15, 15, 23
1335 vcipher 15, 15, 24
1336
1337 xxlor 23+32, 14, 14
1338
1339 cmpdi 10, 14
1340 beq Do_next_1x_dec
1341
1342 Do_next_1x_dec:
1343 vcipherlast 15, 15, 23
1344
1345 xxlxor 47, 47, 15
1346 stxvb16x 47, 0, 9 # store output
1347 addi 14, 14, 16
1348 addi 9, 9, 16
1349
1350 xxlor 28+32, 15, 15
1351 ppc_update_hash_1x
1352
1353 addi 12, 12, -16
1354 addi 11, 11, 16
1355 xxlor 19+32, 0, 0
1356 vaddudm 30, 30, 31 # IV + counter
1357 vxor 15, 30, 19 # add round key
1358
1359 bdnz Next_rem_block_dec
1360
1361 cmpdi 12, 0
1362 beq aes_gcm_out
1363
1364 Final_block_dec:
1365 Loop_aes_middle_1x
1366
1367 xxlor 23+32, 10, 10
1368
1369 cmpdi 10, 10
1370 beq Do_final_1x_dec
1371
1372 # 192 bits
1373 xxlor 24+32, 11, 11
1374
1375 vcipher 15, 15, 23
1376 vcipher 15, 15, 24
1377
1378 xxlor 23+32, 12, 12
1379
1380 cmpdi 10, 12
1381 beq Do_final_1x_dec
1382
1383 # 256 bits
1384 xxlor 24+32, 13, 13
1385
1386 vcipher 15, 15, 23
1387 vcipher 15, 15, 24
1388
1389 xxlor 23+32, 14, 14
1390
1391 cmpdi 10, 14
1392 beq Do_final_1x_dec
1393
1394 Do_final_1x_dec:
1395 vcipherlast 15, 15, 23
1396
1397 lxvb16x 15, 0, 14 # load block
1398 xxlxor 47, 47, 15
1399
1400 # create partial block mask
1401 li 15, 16
1402 sub 15, 15, 12 # index to the mask
1403
1404 vspltisb 16, -1 # first 16 bytes - 0xffff...ff
1405 vspltisb 17, 0 # second 16 bytes - 0x0000...00
1406 li 10, 192
1407 stvx 16, 10, 1
1408 addi 10, 10, 16
1409 stvx 17, 10, 1
1410
1411 addi 10, 1, 192
1412 lxvb16x 16, 15, 10 # load block mask
1413 xxland 47, 47, 16
1414
1415 xxlor 28+32, 15, 15
1416 ppc_update_hash_1x
1417
1418 # * should store only the remaining bytes.
1419 bl Write_partial_block
1420
1421 b aes_gcm_out
1422
1423
1424 ___
1425
1426 foreach (split("\n",$code)) {
1427 s/\`([^\`]*)\`/eval $1/geo;
1428
1429 if ($flavour =~ /le$/o) { # little-endian
1430 s/le\?//o or
1431 s/be\?/#be#/o;
1432 } else {
1433 s/le\?/#le#/o or
1434 s/be\?//o;
1435 }
1436 print $_,"\n";
1437 }
1438
1439 close STDOUT or die "error closing STDOUT: $!"; # enforce flush