]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/aes/asm/aes-ppc.pl
spelling fixes, just comments and readme.
[thirdparty/openssl.git] / crypto / aes / asm / aes-ppc.pl
1 #! /usr/bin/env perl
2 # Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # Needs more work: key setup, CBC routine...
18 #
19 # ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
20 # 128-bit key, which is ~40% better than 64-bit code generated by gcc
21 # 4.0. But these are not the ones currently used! Their "compact"
22 # counterparts are, for security reason. ppc_AES_encrypt_compact runs
23 # at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
24 # at 1/3 of ppc_AES_decrypt.
25
26 # February 2010
27 #
28 # Rescheduling instructions to favour Power6 pipeline gave 10%
29 # performance improvement on the platform in question (and marginal
30 # improvement even on others). It should be noted that Power6 fails
31 # to process byte in 18 cycles, only in 23, because it fails to issue
32 # 4 load instructions in two cycles, only in 3. As result non-compact
33 # block subroutines are 25% slower than one would expect. Compact
34 # functions scale better, because they have pure computational part,
35 # which scales perfectly with clock frequency. To be specific
36 # ppc_AES_encrypt_compact operates at 42 cycles per byte, while
37 # ppc_AES_decrypt_compact - at 55 (in 64-bit build).
38
39 $flavour = shift;
40
41 if ($flavour =~ /64/) {
42 $SIZE_T =8;
43 $LRSAVE =2*$SIZE_T;
44 $STU ="stdu";
45 $POP ="ld";
46 $PUSH ="std";
47 } elsif ($flavour =~ /32/) {
48 $SIZE_T =4;
49 $LRSAVE =$SIZE_T;
50 $STU ="stwu";
51 $POP ="lwz";
52 $PUSH ="stw";
53 } else { die "nonsense $flavour"; }
54
55 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
56
57 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
58 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
59 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
60 die "can't locate ppc-xlate.pl";
61
62 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
63
64 $FRAME=32*$SIZE_T;
65
66 sub _data_word()
67 { my $i;
68 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
69 }
70
71 $sp="r1";
72 $toc="r2";
73 $inp="r3";
74 $out="r4";
75 $key="r5";
76
77 $Tbl0="r3";
78 $Tbl1="r6";
79 $Tbl2="r7";
80 $Tbl3=$out; # stay away from "r2"; $out is offloaded to stack
81
82 $s0="r8";
83 $s1="r9";
84 $s2="r10";
85 $s3="r11";
86
87 $t0="r12";
88 $t1="r0"; # stay away from "r13";
89 $t2="r14";
90 $t3="r15";
91
92 $acc00="r16";
93 $acc01="r17";
94 $acc02="r18";
95 $acc03="r19";
96
97 $acc04="r20";
98 $acc05="r21";
99 $acc06="r22";
100 $acc07="r23";
101
102 $acc08="r24";
103 $acc09="r25";
104 $acc10="r26";
105 $acc11="r27";
106
107 $acc12="r28";
108 $acc13="r29";
109 $acc14="r30";
110 $acc15="r31";
111
112 $mask80=$Tbl2;
113 $mask1b=$Tbl3;
114
115 $code.=<<___;
116 .machine "any"
117 .text
118
119 .align 7
120 LAES_Te:
121 mflr r0
122 bcl 20,31,\$+4
123 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
124 addi $Tbl0,$Tbl0,`128-8`
125 mtlr r0
126 blr
127 .long 0
128 .byte 0,12,0x14,0,0,0,0,0
129 .space `64-9*4`
130 LAES_Td:
131 mflr r0
132 bcl 20,31,\$+4
133 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
134 addi $Tbl0,$Tbl0,`128-64-8+2048+256`
135 mtlr r0
136 blr
137 .long 0
138 .byte 0,12,0x14,0,0,0,0,0
139 .space `128-64-9*4`
140 ___
141 &_data_word(
142 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
143 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
144 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
145 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
146 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
147 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
148 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
149 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
150 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
151 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
152 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
153 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
154 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
155 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
156 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
157 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
158 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
159 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
160 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
161 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
162 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
163 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
164 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
165 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
166 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
167 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
168 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
169 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
170 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
171 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
172 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
173 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
174 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
175 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
176 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
177 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
178 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
179 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
180 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
181 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
182 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
183 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
184 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
185 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
186 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
187 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
188 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
189 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
190 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
191 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
192 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
193 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
194 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
195 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
196 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
197 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
198 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
199 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
200 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
201 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
202 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
203 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
204 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
205 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
206 $code.=<<___;
207 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
208 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
209 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
210 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
211 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
212 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
213 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
214 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
215 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
216 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
217 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
218 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
219 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
220 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
221 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
222 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
223 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
224 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
225 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
226 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
227 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
228 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
229 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
230 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
231 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
232 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
233 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
234 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
235 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
236 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
237 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
238 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
239 ___
240 &_data_word(
241 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
242 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
243 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
244 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
245 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
246 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
247 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
248 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
249 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
250 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
251 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
252 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
253 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
254 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
255 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
256 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
257 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
258 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
259 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
260 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
261 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
262 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
263 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
264 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
265 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
266 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
267 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
268 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
269 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
270 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
271 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
272 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
273 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
274 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
275 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
276 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
277 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
278 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
279 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
280 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
281 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
282 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
283 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
284 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
285 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
286 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
287 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
288 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
289 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
290 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
291 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
292 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
293 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
294 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
295 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
296 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
297 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
298 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
299 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
300 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
301 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
302 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
303 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
304 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
305 $code.=<<___;
306 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
307 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
308 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
309 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
310 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
311 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
312 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
313 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
314 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
315 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
316 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
317 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
318 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
319 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
320 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
321 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
322 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
323 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
324 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
325 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
326 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
327 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
328 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
329 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
330 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
331 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
332 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
333 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
334 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
335 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
336 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
337 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
338
339
340 .globl .AES_encrypt
341 .align 7
342 .AES_encrypt:
343 $STU $sp,-$FRAME($sp)
344 mflr r0
345
346 $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
347 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
348 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
349 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
350 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
351 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
352 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
353 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
354 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
355 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
356 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
357 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
358 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
359 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
360 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
361 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
362 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
363 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
364 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
365 $PUSH r0,`$FRAME+$LRSAVE`($sp)
366
367 andi. $t0,$inp,3
368 andi. $t1,$out,3
369 or. $t0,$t0,$t1
370 bne Lenc_unaligned
371
372 Lenc_unaligned_ok:
373 ___
374 $code.=<<___ if (!$LITTLE_ENDIAN);
375 lwz $s0,0($inp)
376 lwz $s1,4($inp)
377 lwz $s2,8($inp)
378 lwz $s3,12($inp)
379 ___
380 $code.=<<___ if ($LITTLE_ENDIAN);
381 lwz $t0,0($inp)
382 lwz $t1,4($inp)
383 lwz $t2,8($inp)
384 lwz $t3,12($inp)
385 rotlwi $s0,$t0,8
386 rotlwi $s1,$t1,8
387 rotlwi $s2,$t2,8
388 rotlwi $s3,$t3,8
389 rlwimi $s0,$t0,24,0,7
390 rlwimi $s1,$t1,24,0,7
391 rlwimi $s2,$t2,24,0,7
392 rlwimi $s3,$t3,24,0,7
393 rlwimi $s0,$t0,24,16,23
394 rlwimi $s1,$t1,24,16,23
395 rlwimi $s2,$t2,24,16,23
396 rlwimi $s3,$t3,24,16,23
397 ___
398 $code.=<<___;
399 bl LAES_Te
400 bl Lppc_AES_encrypt_compact
401 $POP $out,`$FRAME-$SIZE_T*19`($sp)
402 ___
403 $code.=<<___ if ($LITTLE_ENDIAN);
404 rotlwi $t0,$s0,8
405 rotlwi $t1,$s1,8
406 rotlwi $t2,$s2,8
407 rotlwi $t3,$s3,8
408 rlwimi $t0,$s0,24,0,7
409 rlwimi $t1,$s1,24,0,7
410 rlwimi $t2,$s2,24,0,7
411 rlwimi $t3,$s3,24,0,7
412 rlwimi $t0,$s0,24,16,23
413 rlwimi $t1,$s1,24,16,23
414 rlwimi $t2,$s2,24,16,23
415 rlwimi $t3,$s3,24,16,23
416 stw $t0,0($out)
417 stw $t1,4($out)
418 stw $t2,8($out)
419 stw $t3,12($out)
420 ___
421 $code.=<<___ if (!$LITTLE_ENDIAN);
422 stw $s0,0($out)
423 stw $s1,4($out)
424 stw $s2,8($out)
425 stw $s3,12($out)
426 ___
427 $code.=<<___;
428 b Lenc_done
429
430 Lenc_unaligned:
431 subfic $t0,$inp,4096
432 subfic $t1,$out,4096
433 andi. $t0,$t0,4096-16
434 beq Lenc_xpage
435 andi. $t1,$t1,4096-16
436 bne Lenc_unaligned_ok
437
438 Lenc_xpage:
439 lbz $acc00,0($inp)
440 lbz $acc01,1($inp)
441 lbz $acc02,2($inp)
442 lbz $s0,3($inp)
443 lbz $acc04,4($inp)
444 lbz $acc05,5($inp)
445 lbz $acc06,6($inp)
446 lbz $s1,7($inp)
447 lbz $acc08,8($inp)
448 lbz $acc09,9($inp)
449 lbz $acc10,10($inp)
450 insrwi $s0,$acc00,8,0
451 lbz $s2,11($inp)
452 insrwi $s1,$acc04,8,0
453 lbz $acc12,12($inp)
454 insrwi $s0,$acc01,8,8
455 lbz $acc13,13($inp)
456 insrwi $s1,$acc05,8,8
457 lbz $acc14,14($inp)
458 insrwi $s0,$acc02,8,16
459 lbz $s3,15($inp)
460 insrwi $s1,$acc06,8,16
461 insrwi $s2,$acc08,8,0
462 insrwi $s3,$acc12,8,0
463 insrwi $s2,$acc09,8,8
464 insrwi $s3,$acc13,8,8
465 insrwi $s2,$acc10,8,16
466 insrwi $s3,$acc14,8,16
467
468 bl LAES_Te
469 bl Lppc_AES_encrypt_compact
470 $POP $out,`$FRAME-$SIZE_T*19`($sp)
471
472 extrwi $acc00,$s0,8,0
473 extrwi $acc01,$s0,8,8
474 stb $acc00,0($out)
475 extrwi $acc02,$s0,8,16
476 stb $acc01,1($out)
477 stb $acc02,2($out)
478 extrwi $acc04,$s1,8,0
479 stb $s0,3($out)
480 extrwi $acc05,$s1,8,8
481 stb $acc04,4($out)
482 extrwi $acc06,$s1,8,16
483 stb $acc05,5($out)
484 stb $acc06,6($out)
485 extrwi $acc08,$s2,8,0
486 stb $s1,7($out)
487 extrwi $acc09,$s2,8,8
488 stb $acc08,8($out)
489 extrwi $acc10,$s2,8,16
490 stb $acc09,9($out)
491 stb $acc10,10($out)
492 extrwi $acc12,$s3,8,0
493 stb $s2,11($out)
494 extrwi $acc13,$s3,8,8
495 stb $acc12,12($out)
496 extrwi $acc14,$s3,8,16
497 stb $acc13,13($out)
498 stb $acc14,14($out)
499 stb $s3,15($out)
500
501 Lenc_done:
502 $POP r0,`$FRAME+$LRSAVE`($sp)
503 $POP r14,`$FRAME-$SIZE_T*18`($sp)
504 $POP r15,`$FRAME-$SIZE_T*17`($sp)
505 $POP r16,`$FRAME-$SIZE_T*16`($sp)
506 $POP r17,`$FRAME-$SIZE_T*15`($sp)
507 $POP r18,`$FRAME-$SIZE_T*14`($sp)
508 $POP r19,`$FRAME-$SIZE_T*13`($sp)
509 $POP r20,`$FRAME-$SIZE_T*12`($sp)
510 $POP r21,`$FRAME-$SIZE_T*11`($sp)
511 $POP r22,`$FRAME-$SIZE_T*10`($sp)
512 $POP r23,`$FRAME-$SIZE_T*9`($sp)
513 $POP r24,`$FRAME-$SIZE_T*8`($sp)
514 $POP r25,`$FRAME-$SIZE_T*7`($sp)
515 $POP r26,`$FRAME-$SIZE_T*6`($sp)
516 $POP r27,`$FRAME-$SIZE_T*5`($sp)
517 $POP r28,`$FRAME-$SIZE_T*4`($sp)
518 $POP r29,`$FRAME-$SIZE_T*3`($sp)
519 $POP r30,`$FRAME-$SIZE_T*2`($sp)
520 $POP r31,`$FRAME-$SIZE_T*1`($sp)
521 mtlr r0
522 addi $sp,$sp,$FRAME
523 blr
524 .long 0
525 .byte 0,12,4,1,0x80,18,3,0
526 .long 0
527
528 .align 5
529 Lppc_AES_encrypt:
530 lwz $acc00,240($key)
531 addi $Tbl1,$Tbl0,3
532 lwz $t0,0($key)
533 addi $Tbl2,$Tbl0,2
534 lwz $t1,4($key)
535 addi $Tbl3,$Tbl0,1
536 lwz $t2,8($key)
537 addi $acc00,$acc00,-1
538 lwz $t3,12($key)
539 addi $key,$key,16
540 xor $s0,$s0,$t0
541 xor $s1,$s1,$t1
542 xor $s2,$s2,$t2
543 xor $s3,$s3,$t3
544 mtctr $acc00
545 .align 4
546 Lenc_loop:
547 rlwinm $acc00,$s0,`32-24+3`,21,28
548 rlwinm $acc01,$s1,`32-24+3`,21,28
549 rlwinm $acc02,$s2,`32-24+3`,21,28
550 rlwinm $acc03,$s3,`32-24+3`,21,28
551 lwz $t0,0($key)
552 rlwinm $acc04,$s1,`32-16+3`,21,28
553 lwz $t1,4($key)
554 rlwinm $acc05,$s2,`32-16+3`,21,28
555 lwz $t2,8($key)
556 rlwinm $acc06,$s3,`32-16+3`,21,28
557 lwz $t3,12($key)
558 rlwinm $acc07,$s0,`32-16+3`,21,28
559 lwzx $acc00,$Tbl0,$acc00
560 rlwinm $acc08,$s2,`32-8+3`,21,28
561 lwzx $acc01,$Tbl0,$acc01
562 rlwinm $acc09,$s3,`32-8+3`,21,28
563 lwzx $acc02,$Tbl0,$acc02
564 rlwinm $acc10,$s0,`32-8+3`,21,28
565 lwzx $acc03,$Tbl0,$acc03
566 rlwinm $acc11,$s1,`32-8+3`,21,28
567 lwzx $acc04,$Tbl1,$acc04
568 rlwinm $acc12,$s3,`0+3`,21,28
569 lwzx $acc05,$Tbl1,$acc05
570 rlwinm $acc13,$s0,`0+3`,21,28
571 lwzx $acc06,$Tbl1,$acc06
572 rlwinm $acc14,$s1,`0+3`,21,28
573 lwzx $acc07,$Tbl1,$acc07
574 rlwinm $acc15,$s2,`0+3`,21,28
575 lwzx $acc08,$Tbl2,$acc08
576 xor $t0,$t0,$acc00
577 lwzx $acc09,$Tbl2,$acc09
578 xor $t1,$t1,$acc01
579 lwzx $acc10,$Tbl2,$acc10
580 xor $t2,$t2,$acc02
581 lwzx $acc11,$Tbl2,$acc11
582 xor $t3,$t3,$acc03
583 lwzx $acc12,$Tbl3,$acc12
584 xor $t0,$t0,$acc04
585 lwzx $acc13,$Tbl3,$acc13
586 xor $t1,$t1,$acc05
587 lwzx $acc14,$Tbl3,$acc14
588 xor $t2,$t2,$acc06
589 lwzx $acc15,$Tbl3,$acc15
590 xor $t3,$t3,$acc07
591 xor $t0,$t0,$acc08
592 xor $t1,$t1,$acc09
593 xor $t2,$t2,$acc10
594 xor $t3,$t3,$acc11
595 xor $s0,$t0,$acc12
596 xor $s1,$t1,$acc13
597 xor $s2,$t2,$acc14
598 xor $s3,$t3,$acc15
599 addi $key,$key,16
600 bdnz Lenc_loop
601
602 addi $Tbl2,$Tbl0,2048
603 nop
604 lwz $t0,0($key)
605 rlwinm $acc00,$s0,`32-24`,24,31
606 lwz $t1,4($key)
607 rlwinm $acc01,$s1,`32-24`,24,31
608 lwz $t2,8($key)
609 rlwinm $acc02,$s2,`32-24`,24,31
610 lwz $t3,12($key)
611 rlwinm $acc03,$s3,`32-24`,24,31
612 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
613 rlwinm $acc04,$s1,`32-16`,24,31
614 lwz $acc09,`2048+32`($Tbl0)
615 rlwinm $acc05,$s2,`32-16`,24,31
616 lwz $acc10,`2048+64`($Tbl0)
617 rlwinm $acc06,$s3,`32-16`,24,31
618 lwz $acc11,`2048+96`($Tbl0)
619 rlwinm $acc07,$s0,`32-16`,24,31
620 lwz $acc12,`2048+128`($Tbl0)
621 rlwinm $acc08,$s2,`32-8`,24,31
622 lwz $acc13,`2048+160`($Tbl0)
623 rlwinm $acc09,$s3,`32-8`,24,31
624 lwz $acc14,`2048+192`($Tbl0)
625 rlwinm $acc10,$s0,`32-8`,24,31
626 lwz $acc15,`2048+224`($Tbl0)
627 rlwinm $acc11,$s1,`32-8`,24,31
628 lbzx $acc00,$Tbl2,$acc00
629 rlwinm $acc12,$s3,`0`,24,31
630 lbzx $acc01,$Tbl2,$acc01
631 rlwinm $acc13,$s0,`0`,24,31
632 lbzx $acc02,$Tbl2,$acc02
633 rlwinm $acc14,$s1,`0`,24,31
634 lbzx $acc03,$Tbl2,$acc03
635 rlwinm $acc15,$s2,`0`,24,31
636 lbzx $acc04,$Tbl2,$acc04
637 rlwinm $s0,$acc00,24,0,7
638 lbzx $acc05,$Tbl2,$acc05
639 rlwinm $s1,$acc01,24,0,7
640 lbzx $acc06,$Tbl2,$acc06
641 rlwinm $s2,$acc02,24,0,7
642 lbzx $acc07,$Tbl2,$acc07
643 rlwinm $s3,$acc03,24,0,7
644 lbzx $acc08,$Tbl2,$acc08
645 rlwimi $s0,$acc04,16,8,15
646 lbzx $acc09,$Tbl2,$acc09
647 rlwimi $s1,$acc05,16,8,15
648 lbzx $acc10,$Tbl2,$acc10
649 rlwimi $s2,$acc06,16,8,15
650 lbzx $acc11,$Tbl2,$acc11
651 rlwimi $s3,$acc07,16,8,15
652 lbzx $acc12,$Tbl2,$acc12
653 rlwimi $s0,$acc08,8,16,23
654 lbzx $acc13,$Tbl2,$acc13
655 rlwimi $s1,$acc09,8,16,23
656 lbzx $acc14,$Tbl2,$acc14
657 rlwimi $s2,$acc10,8,16,23
658 lbzx $acc15,$Tbl2,$acc15
659 rlwimi $s3,$acc11,8,16,23
660 or $s0,$s0,$acc12
661 or $s1,$s1,$acc13
662 or $s2,$s2,$acc14
663 or $s3,$s3,$acc15
664 xor $s0,$s0,$t0
665 xor $s1,$s1,$t1
666 xor $s2,$s2,$t2
667 xor $s3,$s3,$t3
668 blr
669 .long 0
670 .byte 0,12,0x14,0,0,0,0,0
671
672 .align 4
673 Lppc_AES_encrypt_compact:
674 lwz $acc00,240($key)
675 addi $Tbl1,$Tbl0,2048
676 lwz $t0,0($key)
677 lis $mask80,0x8080
678 lwz $t1,4($key)
679 lis $mask1b,0x1b1b
680 lwz $t2,8($key)
681 ori $mask80,$mask80,0x8080
682 lwz $t3,12($key)
683 ori $mask1b,$mask1b,0x1b1b
684 addi $key,$key,16
685 mtctr $acc00
686 .align 4
687 Lenc_compact_loop:
688 xor $s0,$s0,$t0
689 xor $s1,$s1,$t1
690 rlwinm $acc00,$s0,`32-24`,24,31
691 xor $s2,$s2,$t2
692 rlwinm $acc01,$s1,`32-24`,24,31
693 xor $s3,$s3,$t3
694 rlwinm $acc02,$s2,`32-24`,24,31
695 rlwinm $acc03,$s3,`32-24`,24,31
696 rlwinm $acc04,$s1,`32-16`,24,31
697 rlwinm $acc05,$s2,`32-16`,24,31
698 rlwinm $acc06,$s3,`32-16`,24,31
699 rlwinm $acc07,$s0,`32-16`,24,31
700 lbzx $acc00,$Tbl1,$acc00
701 rlwinm $acc08,$s2,`32-8`,24,31
702 lbzx $acc01,$Tbl1,$acc01
703 rlwinm $acc09,$s3,`32-8`,24,31
704 lbzx $acc02,$Tbl1,$acc02
705 rlwinm $acc10,$s0,`32-8`,24,31
706 lbzx $acc03,$Tbl1,$acc03
707 rlwinm $acc11,$s1,`32-8`,24,31
708 lbzx $acc04,$Tbl1,$acc04
709 rlwinm $acc12,$s3,`0`,24,31
710 lbzx $acc05,$Tbl1,$acc05
711 rlwinm $acc13,$s0,`0`,24,31
712 lbzx $acc06,$Tbl1,$acc06
713 rlwinm $acc14,$s1,`0`,24,31
714 lbzx $acc07,$Tbl1,$acc07
715 rlwinm $acc15,$s2,`0`,24,31
716 lbzx $acc08,$Tbl1,$acc08
717 rlwinm $s0,$acc00,24,0,7
718 lbzx $acc09,$Tbl1,$acc09
719 rlwinm $s1,$acc01,24,0,7
720 lbzx $acc10,$Tbl1,$acc10
721 rlwinm $s2,$acc02,24,0,7
722 lbzx $acc11,$Tbl1,$acc11
723 rlwinm $s3,$acc03,24,0,7
724 lbzx $acc12,$Tbl1,$acc12
725 rlwimi $s0,$acc04,16,8,15
726 lbzx $acc13,$Tbl1,$acc13
727 rlwimi $s1,$acc05,16,8,15
728 lbzx $acc14,$Tbl1,$acc14
729 rlwimi $s2,$acc06,16,8,15
730 lbzx $acc15,$Tbl1,$acc15
731 rlwimi $s3,$acc07,16,8,15
732 rlwimi $s0,$acc08,8,16,23
733 rlwimi $s1,$acc09,8,16,23
734 rlwimi $s2,$acc10,8,16,23
735 rlwimi $s3,$acc11,8,16,23
736 lwz $t0,0($key)
737 or $s0,$s0,$acc12
738 lwz $t1,4($key)
739 or $s1,$s1,$acc13
740 lwz $t2,8($key)
741 or $s2,$s2,$acc14
742 lwz $t3,12($key)
743 or $s3,$s3,$acc15
744
745 addi $key,$key,16
746 bdz Lenc_compact_done
747
748 and $acc00,$s0,$mask80 # r1=r0&0x80808080
749 and $acc01,$s1,$mask80
750 and $acc02,$s2,$mask80
751 and $acc03,$s3,$mask80
752 srwi $acc04,$acc00,7 # r1>>7
753 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
754 srwi $acc05,$acc01,7
755 andc $acc09,$s1,$mask80
756 srwi $acc06,$acc02,7
757 andc $acc10,$s2,$mask80
758 srwi $acc07,$acc03,7
759 andc $acc11,$s3,$mask80
760 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
761 sub $acc01,$acc01,$acc05
762 sub $acc02,$acc02,$acc06
763 sub $acc03,$acc03,$acc07
764 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
765 add $acc09,$acc09,$acc09
766 add $acc10,$acc10,$acc10
767 add $acc11,$acc11,$acc11
768 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
769 and $acc01,$acc01,$mask1b
770 and $acc02,$acc02,$mask1b
771 and $acc03,$acc03,$mask1b
772 xor $acc00,$acc00,$acc08 # r2
773 xor $acc01,$acc01,$acc09
774 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
775 xor $acc02,$acc02,$acc10
776 rotlwi $acc13,$s1,16
777 xor $acc03,$acc03,$acc11
778 rotlwi $acc14,$s2,16
779
780 xor $s0,$s0,$acc00 # r0^r2
781 rotlwi $acc15,$s3,16
782 xor $s1,$s1,$acc01
783 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
784 xor $s2,$s2,$acc02
785 rotrwi $s1,$s1,24
786 xor $s3,$s3,$acc03
787 rotrwi $s2,$s2,24
788 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
789 rotrwi $s3,$s3,24
790 xor $s1,$s1,$acc01
791 xor $s2,$s2,$acc02
792 xor $s3,$s3,$acc03
793 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
794 xor $s0,$s0,$acc12 #
795 rotlwi $acc09,$acc13,8
796 xor $s1,$s1,$acc13
797 rotlwi $acc10,$acc14,8
798 xor $s2,$s2,$acc14
799 rotlwi $acc11,$acc15,8
800 xor $s3,$s3,$acc15
801 xor $s0,$s0,$acc08 #
802 xor $s1,$s1,$acc09
803 xor $s2,$s2,$acc10
804 xor $s3,$s3,$acc11
805
806 b Lenc_compact_loop
807 .align 4
808 Lenc_compact_done:
809 xor $s0,$s0,$t0
810 xor $s1,$s1,$t1
811 xor $s2,$s2,$t2
812 xor $s3,$s3,$t3
813 blr
814 .long 0
815 .byte 0,12,0x14,0,0,0,0,0
816 .size .AES_encrypt,.-.AES_encrypt
817
818 .globl .AES_decrypt
819 .align 7
820 .AES_decrypt:
821 $STU $sp,-$FRAME($sp)
822 mflr r0
823
824 $PUSH $out,`$FRAME-$SIZE_T*19`($sp)
825 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
826 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
827 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
828 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
829 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
830 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
831 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
832 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
833 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
834 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
835 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
836 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
837 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
838 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
839 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
840 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
841 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
842 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
843 $PUSH r0,`$FRAME+$LRSAVE`($sp)
844
845 andi. $t0,$inp,3
846 andi. $t1,$out,3
847 or. $t0,$t0,$t1
848 bne Ldec_unaligned
849
850 Ldec_unaligned_ok:
851 ___
852 $code.=<<___ if (!$LITTLE_ENDIAN);
853 lwz $s0,0($inp)
854 lwz $s1,4($inp)
855 lwz $s2,8($inp)
856 lwz $s3,12($inp)
857 ___
858 $code.=<<___ if ($LITTLE_ENDIAN);
859 lwz $t0,0($inp)
860 lwz $t1,4($inp)
861 lwz $t2,8($inp)
862 lwz $t3,12($inp)
863 rotlwi $s0,$t0,8
864 rotlwi $s1,$t1,8
865 rotlwi $s2,$t2,8
866 rotlwi $s3,$t3,8
867 rlwimi $s0,$t0,24,0,7
868 rlwimi $s1,$t1,24,0,7
869 rlwimi $s2,$t2,24,0,7
870 rlwimi $s3,$t3,24,0,7
871 rlwimi $s0,$t0,24,16,23
872 rlwimi $s1,$t1,24,16,23
873 rlwimi $s2,$t2,24,16,23
874 rlwimi $s3,$t3,24,16,23
875 ___
876 $code.=<<___;
877 bl LAES_Td
878 bl Lppc_AES_decrypt_compact
879 $POP $out,`$FRAME-$SIZE_T*19`($sp)
880 ___
881 $code.=<<___ if ($LITTLE_ENDIAN);
882 rotlwi $t0,$s0,8
883 rotlwi $t1,$s1,8
884 rotlwi $t2,$s2,8
885 rotlwi $t3,$s3,8
886 rlwimi $t0,$s0,24,0,7
887 rlwimi $t1,$s1,24,0,7
888 rlwimi $t2,$s2,24,0,7
889 rlwimi $t3,$s3,24,0,7
890 rlwimi $t0,$s0,24,16,23
891 rlwimi $t1,$s1,24,16,23
892 rlwimi $t2,$s2,24,16,23
893 rlwimi $t3,$s3,24,16,23
894 stw $t0,0($out)
895 stw $t1,4($out)
896 stw $t2,8($out)
897 stw $t3,12($out)
898 ___
899 $code.=<<___ if (!$LITTLE_ENDIAN);
900 stw $s0,0($out)
901 stw $s1,4($out)
902 stw $s2,8($out)
903 stw $s3,12($out)
904 ___
905 $code.=<<___;
906 b Ldec_done
907
908 Ldec_unaligned:
909 subfic $t0,$inp,4096
910 subfic $t1,$out,4096
911 andi. $t0,$t0,4096-16
912 beq Ldec_xpage
913 andi. $t1,$t1,4096-16
914 bne Ldec_unaligned_ok
915
916 Ldec_xpage:
917 lbz $acc00,0($inp)
918 lbz $acc01,1($inp)
919 lbz $acc02,2($inp)
920 lbz $s0,3($inp)
921 lbz $acc04,4($inp)
922 lbz $acc05,5($inp)
923 lbz $acc06,6($inp)
924 lbz $s1,7($inp)
925 lbz $acc08,8($inp)
926 lbz $acc09,9($inp)
927 lbz $acc10,10($inp)
928 insrwi $s0,$acc00,8,0
929 lbz $s2,11($inp)
930 insrwi $s1,$acc04,8,0
931 lbz $acc12,12($inp)
932 insrwi $s0,$acc01,8,8
933 lbz $acc13,13($inp)
934 insrwi $s1,$acc05,8,8
935 lbz $acc14,14($inp)
936 insrwi $s0,$acc02,8,16
937 lbz $s3,15($inp)
938 insrwi $s1,$acc06,8,16
939 insrwi $s2,$acc08,8,0
940 insrwi $s3,$acc12,8,0
941 insrwi $s2,$acc09,8,8
942 insrwi $s3,$acc13,8,8
943 insrwi $s2,$acc10,8,16
944 insrwi $s3,$acc14,8,16
945
946 bl LAES_Td
947 bl Lppc_AES_decrypt_compact
948 $POP $out,`$FRAME-$SIZE_T*19`($sp)
949
950 extrwi $acc00,$s0,8,0
951 extrwi $acc01,$s0,8,8
952 stb $acc00,0($out)
953 extrwi $acc02,$s0,8,16
954 stb $acc01,1($out)
955 stb $acc02,2($out)
956 extrwi $acc04,$s1,8,0
957 stb $s0,3($out)
958 extrwi $acc05,$s1,8,8
959 stb $acc04,4($out)
960 extrwi $acc06,$s1,8,16
961 stb $acc05,5($out)
962 stb $acc06,6($out)
963 extrwi $acc08,$s2,8,0
964 stb $s1,7($out)
965 extrwi $acc09,$s2,8,8
966 stb $acc08,8($out)
967 extrwi $acc10,$s2,8,16
968 stb $acc09,9($out)
969 stb $acc10,10($out)
970 extrwi $acc12,$s3,8,0
971 stb $s2,11($out)
972 extrwi $acc13,$s3,8,8
973 stb $acc12,12($out)
974 extrwi $acc14,$s3,8,16
975 stb $acc13,13($out)
976 stb $acc14,14($out)
977 stb $s3,15($out)
978
979 Ldec_done:
980 $POP r0,`$FRAME+$LRSAVE`($sp)
981 $POP r14,`$FRAME-$SIZE_T*18`($sp)
982 $POP r15,`$FRAME-$SIZE_T*17`($sp)
983 $POP r16,`$FRAME-$SIZE_T*16`($sp)
984 $POP r17,`$FRAME-$SIZE_T*15`($sp)
985 $POP r18,`$FRAME-$SIZE_T*14`($sp)
986 $POP r19,`$FRAME-$SIZE_T*13`($sp)
987 $POP r20,`$FRAME-$SIZE_T*12`($sp)
988 $POP r21,`$FRAME-$SIZE_T*11`($sp)
989 $POP r22,`$FRAME-$SIZE_T*10`($sp)
990 $POP r23,`$FRAME-$SIZE_T*9`($sp)
991 $POP r24,`$FRAME-$SIZE_T*8`($sp)
992 $POP r25,`$FRAME-$SIZE_T*7`($sp)
993 $POP r26,`$FRAME-$SIZE_T*6`($sp)
994 $POP r27,`$FRAME-$SIZE_T*5`($sp)
995 $POP r28,`$FRAME-$SIZE_T*4`($sp)
996 $POP r29,`$FRAME-$SIZE_T*3`($sp)
997 $POP r30,`$FRAME-$SIZE_T*2`($sp)
998 $POP r31,`$FRAME-$SIZE_T*1`($sp)
999 mtlr r0
1000 addi $sp,$sp,$FRAME
1001 blr
1002 .long 0
1003 .byte 0,12,4,1,0x80,18,3,0
1004 .long 0
1005
1006 .align 5
1007 Lppc_AES_decrypt:
1008 lwz $acc00,240($key)
1009 addi $Tbl1,$Tbl0,3
1010 lwz $t0,0($key)
1011 addi $Tbl2,$Tbl0,2
1012 lwz $t1,4($key)
1013 addi $Tbl3,$Tbl0,1
1014 lwz $t2,8($key)
1015 addi $acc00,$acc00,-1
1016 lwz $t3,12($key)
1017 addi $key,$key,16
1018 xor $s0,$s0,$t0
1019 xor $s1,$s1,$t1
1020 xor $s2,$s2,$t2
1021 xor $s3,$s3,$t3
1022 mtctr $acc00
1023 .align 4
1024 Ldec_loop:
1025 rlwinm $acc00,$s0,`32-24+3`,21,28
1026 rlwinm $acc01,$s1,`32-24+3`,21,28
1027 rlwinm $acc02,$s2,`32-24+3`,21,28
1028 rlwinm $acc03,$s3,`32-24+3`,21,28
1029 lwz $t0,0($key)
1030 rlwinm $acc04,$s3,`32-16+3`,21,28
1031 lwz $t1,4($key)
1032 rlwinm $acc05,$s0,`32-16+3`,21,28
1033 lwz $t2,8($key)
1034 rlwinm $acc06,$s1,`32-16+3`,21,28
1035 lwz $t3,12($key)
1036 rlwinm $acc07,$s2,`32-16+3`,21,28
1037 lwzx $acc00,$Tbl0,$acc00
1038 rlwinm $acc08,$s2,`32-8+3`,21,28
1039 lwzx $acc01,$Tbl0,$acc01
1040 rlwinm $acc09,$s3,`32-8+3`,21,28
1041 lwzx $acc02,$Tbl0,$acc02
1042 rlwinm $acc10,$s0,`32-8+3`,21,28
1043 lwzx $acc03,$Tbl0,$acc03
1044 rlwinm $acc11,$s1,`32-8+3`,21,28
1045 lwzx $acc04,$Tbl1,$acc04
1046 rlwinm $acc12,$s1,`0+3`,21,28
1047 lwzx $acc05,$Tbl1,$acc05
1048 rlwinm $acc13,$s2,`0+3`,21,28
1049 lwzx $acc06,$Tbl1,$acc06
1050 rlwinm $acc14,$s3,`0+3`,21,28
1051 lwzx $acc07,$Tbl1,$acc07
1052 rlwinm $acc15,$s0,`0+3`,21,28
1053 lwzx $acc08,$Tbl2,$acc08
1054 xor $t0,$t0,$acc00
1055 lwzx $acc09,$Tbl2,$acc09
1056 xor $t1,$t1,$acc01
1057 lwzx $acc10,$Tbl2,$acc10
1058 xor $t2,$t2,$acc02
1059 lwzx $acc11,$Tbl2,$acc11
1060 xor $t3,$t3,$acc03
1061 lwzx $acc12,$Tbl3,$acc12
1062 xor $t0,$t0,$acc04
1063 lwzx $acc13,$Tbl3,$acc13
1064 xor $t1,$t1,$acc05
1065 lwzx $acc14,$Tbl3,$acc14
1066 xor $t2,$t2,$acc06
1067 lwzx $acc15,$Tbl3,$acc15
1068 xor $t3,$t3,$acc07
1069 xor $t0,$t0,$acc08
1070 xor $t1,$t1,$acc09
1071 xor $t2,$t2,$acc10
1072 xor $t3,$t3,$acc11
1073 xor $s0,$t0,$acc12
1074 xor $s1,$t1,$acc13
1075 xor $s2,$t2,$acc14
1076 xor $s3,$t3,$acc15
1077 addi $key,$key,16
1078 bdnz Ldec_loop
1079
1080 addi $Tbl2,$Tbl0,2048
1081 nop
1082 lwz $t0,0($key)
1083 rlwinm $acc00,$s0,`32-24`,24,31
1084 lwz $t1,4($key)
1085 rlwinm $acc01,$s1,`32-24`,24,31
1086 lwz $t2,8($key)
1087 rlwinm $acc02,$s2,`32-24`,24,31
1088 lwz $t3,12($key)
1089 rlwinm $acc03,$s3,`32-24`,24,31
1090 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
1091 rlwinm $acc04,$s3,`32-16`,24,31
1092 lwz $acc09,`2048+32`($Tbl0)
1093 rlwinm $acc05,$s0,`32-16`,24,31
1094 lwz $acc10,`2048+64`($Tbl0)
1095 lbzx $acc00,$Tbl2,$acc00
1096 lwz $acc11,`2048+96`($Tbl0)
1097 lbzx $acc01,$Tbl2,$acc01
1098 lwz $acc12,`2048+128`($Tbl0)
1099 rlwinm $acc06,$s1,`32-16`,24,31
1100 lwz $acc13,`2048+160`($Tbl0)
1101 rlwinm $acc07,$s2,`32-16`,24,31
1102 lwz $acc14,`2048+192`($Tbl0)
1103 rlwinm $acc08,$s2,`32-8`,24,31
1104 lwz $acc15,`2048+224`($Tbl0)
1105 rlwinm $acc09,$s3,`32-8`,24,31
1106 lbzx $acc02,$Tbl2,$acc02
1107 rlwinm $acc10,$s0,`32-8`,24,31
1108 lbzx $acc03,$Tbl2,$acc03
1109 rlwinm $acc11,$s1,`32-8`,24,31
1110 lbzx $acc04,$Tbl2,$acc04
1111 rlwinm $acc12,$s1,`0`,24,31
1112 lbzx $acc05,$Tbl2,$acc05
1113 rlwinm $acc13,$s2,`0`,24,31
1114 lbzx $acc06,$Tbl2,$acc06
1115 rlwinm $acc14,$s3,`0`,24,31
1116 lbzx $acc07,$Tbl2,$acc07
1117 rlwinm $acc15,$s0,`0`,24,31
1118 lbzx $acc08,$Tbl2,$acc08
1119 rlwinm $s0,$acc00,24,0,7
1120 lbzx $acc09,$Tbl2,$acc09
1121 rlwinm $s1,$acc01,24,0,7
1122 lbzx $acc10,$Tbl2,$acc10
1123 rlwinm $s2,$acc02,24,0,7
1124 lbzx $acc11,$Tbl2,$acc11
1125 rlwinm $s3,$acc03,24,0,7
1126 lbzx $acc12,$Tbl2,$acc12
1127 rlwimi $s0,$acc04,16,8,15
1128 lbzx $acc13,$Tbl2,$acc13
1129 rlwimi $s1,$acc05,16,8,15
1130 lbzx $acc14,$Tbl2,$acc14
1131 rlwimi $s2,$acc06,16,8,15
1132 lbzx $acc15,$Tbl2,$acc15
1133 rlwimi $s3,$acc07,16,8,15
1134 rlwimi $s0,$acc08,8,16,23
1135 rlwimi $s1,$acc09,8,16,23
1136 rlwimi $s2,$acc10,8,16,23
1137 rlwimi $s3,$acc11,8,16,23
1138 or $s0,$s0,$acc12
1139 or $s1,$s1,$acc13
1140 or $s2,$s2,$acc14
1141 or $s3,$s3,$acc15
1142 xor $s0,$s0,$t0
1143 xor $s1,$s1,$t1
1144 xor $s2,$s2,$t2
1145 xor $s3,$s3,$t3
1146 blr
1147 .long 0
1148 .byte 0,12,0x14,0,0,0,0,0
1149
1150 .align 4
1151 Lppc_AES_decrypt_compact:
1152 lwz $acc00,240($key)
1153 addi $Tbl1,$Tbl0,2048
1154 lwz $t0,0($key)
1155 lis $mask80,0x8080
1156 lwz $t1,4($key)
1157 lis $mask1b,0x1b1b
1158 lwz $t2,8($key)
1159 ori $mask80,$mask80,0x8080
1160 lwz $t3,12($key)
1161 ori $mask1b,$mask1b,0x1b1b
1162 addi $key,$key,16
1163 ___
1164 $code.=<<___ if ($SIZE_T==8);
1165 insrdi $mask80,$mask80,32,0
1166 insrdi $mask1b,$mask1b,32,0
1167 ___
1168 $code.=<<___;
1169 mtctr $acc00
1170 .align 4
1171 Ldec_compact_loop:
1172 xor $s0,$s0,$t0
1173 xor $s1,$s1,$t1
1174 rlwinm $acc00,$s0,`32-24`,24,31
1175 xor $s2,$s2,$t2
1176 rlwinm $acc01,$s1,`32-24`,24,31
1177 xor $s3,$s3,$t3
1178 rlwinm $acc02,$s2,`32-24`,24,31
1179 rlwinm $acc03,$s3,`32-24`,24,31
1180 rlwinm $acc04,$s3,`32-16`,24,31
1181 rlwinm $acc05,$s0,`32-16`,24,31
1182 rlwinm $acc06,$s1,`32-16`,24,31
1183 rlwinm $acc07,$s2,`32-16`,24,31
1184 lbzx $acc00,$Tbl1,$acc00
1185 rlwinm $acc08,$s2,`32-8`,24,31
1186 lbzx $acc01,$Tbl1,$acc01
1187 rlwinm $acc09,$s3,`32-8`,24,31
1188 lbzx $acc02,$Tbl1,$acc02
1189 rlwinm $acc10,$s0,`32-8`,24,31
1190 lbzx $acc03,$Tbl1,$acc03
1191 rlwinm $acc11,$s1,`32-8`,24,31
1192 lbzx $acc04,$Tbl1,$acc04
1193 rlwinm $acc12,$s1,`0`,24,31
1194 lbzx $acc05,$Tbl1,$acc05
1195 rlwinm $acc13,$s2,`0`,24,31
1196 lbzx $acc06,$Tbl1,$acc06
1197 rlwinm $acc14,$s3,`0`,24,31
1198 lbzx $acc07,$Tbl1,$acc07
1199 rlwinm $acc15,$s0,`0`,24,31
1200 lbzx $acc08,$Tbl1,$acc08
1201 rlwinm $s0,$acc00,24,0,7
1202 lbzx $acc09,$Tbl1,$acc09
1203 rlwinm $s1,$acc01,24,0,7
1204 lbzx $acc10,$Tbl1,$acc10
1205 rlwinm $s2,$acc02,24,0,7
1206 lbzx $acc11,$Tbl1,$acc11
1207 rlwinm $s3,$acc03,24,0,7
1208 lbzx $acc12,$Tbl1,$acc12
1209 rlwimi $s0,$acc04,16,8,15
1210 lbzx $acc13,$Tbl1,$acc13
1211 rlwimi $s1,$acc05,16,8,15
1212 lbzx $acc14,$Tbl1,$acc14
1213 rlwimi $s2,$acc06,16,8,15
1214 lbzx $acc15,$Tbl1,$acc15
1215 rlwimi $s3,$acc07,16,8,15
1216 rlwimi $s0,$acc08,8,16,23
1217 rlwimi $s1,$acc09,8,16,23
1218 rlwimi $s2,$acc10,8,16,23
1219 rlwimi $s3,$acc11,8,16,23
1220 lwz $t0,0($key)
1221 or $s0,$s0,$acc12
1222 lwz $t1,4($key)
1223 or $s1,$s1,$acc13
1224 lwz $t2,8($key)
1225 or $s2,$s2,$acc14
1226 lwz $t3,12($key)
1227 or $s3,$s3,$acc15
1228
1229 addi $key,$key,16
1230 bdz Ldec_compact_done
1231 ___
1232 $code.=<<___ if ($SIZE_T==8);
1233 # vectorized permutation improves decrypt performance by 10%
1234 insrdi $s0,$s1,32,0
1235 insrdi $s2,$s3,32,0
1236
1237 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1238 and $acc02,$s2,$mask80
1239 srdi $acc04,$acc00,7 # r1>>7
1240 srdi $acc06,$acc02,7
1241 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1242 andc $acc10,$s2,$mask80
1243 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1244 sub $acc02,$acc02,$acc06
1245 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1246 add $acc10,$acc10,$acc10
1247 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1248 and $acc02,$acc02,$mask1b
1249 xor $acc00,$acc00,$acc08 # r2
1250 xor $acc02,$acc02,$acc10
1251
1252 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1253 and $acc06,$acc02,$mask80
1254 srdi $acc08,$acc04,7 # r1>>7
1255 srdi $acc10,$acc06,7
1256 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1257 andc $acc14,$acc02,$mask80
1258 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1259 sub $acc06,$acc06,$acc10
1260 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1261 add $acc14,$acc14,$acc14
1262 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1263 and $acc06,$acc06,$mask1b
1264 xor $acc04,$acc04,$acc12 # r4
1265 xor $acc06,$acc06,$acc14
1266
1267 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1268 and $acc10,$acc06,$mask80
1269 srdi $acc12,$acc08,7 # r1>>7
1270 srdi $acc14,$acc10,7
1271 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1272 sub $acc10,$acc10,$acc14
1273 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1274 andc $acc14,$acc06,$mask80
1275 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1276 add $acc14,$acc14,$acc14
1277 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1278 and $acc10,$acc10,$mask1b
1279 xor $acc08,$acc08,$acc12 # r8
1280 xor $acc10,$acc10,$acc14
1281
1282 xor $acc00,$acc00,$s0 # r2^r0
1283 xor $acc02,$acc02,$s2
1284 xor $acc04,$acc04,$s0 # r4^r0
1285 xor $acc06,$acc06,$s2
1286
1287 extrdi $acc01,$acc00,32,0
1288 extrdi $acc03,$acc02,32,0
1289 extrdi $acc05,$acc04,32,0
1290 extrdi $acc07,$acc06,32,0
1291 extrdi $acc09,$acc08,32,0
1292 extrdi $acc11,$acc10,32,0
1293 ___
1294 $code.=<<___ if ($SIZE_T==4);
1295 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1296 and $acc01,$s1,$mask80
1297 and $acc02,$s2,$mask80
1298 and $acc03,$s3,$mask80
1299 srwi $acc04,$acc00,7 # r1>>7
1300 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1301 srwi $acc05,$acc01,7
1302 andc $acc09,$s1,$mask80
1303 srwi $acc06,$acc02,7
1304 andc $acc10,$s2,$mask80
1305 srwi $acc07,$acc03,7
1306 andc $acc11,$s3,$mask80
1307 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1308 sub $acc01,$acc01,$acc05
1309 sub $acc02,$acc02,$acc06
1310 sub $acc03,$acc03,$acc07
1311 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1312 add $acc09,$acc09,$acc09
1313 add $acc10,$acc10,$acc10
1314 add $acc11,$acc11,$acc11
1315 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1316 and $acc01,$acc01,$mask1b
1317 and $acc02,$acc02,$mask1b
1318 and $acc03,$acc03,$mask1b
1319 xor $acc00,$acc00,$acc08 # r2
1320 xor $acc01,$acc01,$acc09
1321 xor $acc02,$acc02,$acc10
1322 xor $acc03,$acc03,$acc11
1323
1324 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1325 and $acc05,$acc01,$mask80
1326 and $acc06,$acc02,$mask80
1327 and $acc07,$acc03,$mask80
1328 srwi $acc08,$acc04,7 # r1>>7
1329 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1330 srwi $acc09,$acc05,7
1331 andc $acc13,$acc01,$mask80
1332 srwi $acc10,$acc06,7
1333 andc $acc14,$acc02,$mask80
1334 srwi $acc11,$acc07,7
1335 andc $acc15,$acc03,$mask80
1336 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1337 sub $acc05,$acc05,$acc09
1338 sub $acc06,$acc06,$acc10
1339 sub $acc07,$acc07,$acc11
1340 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1341 add $acc13,$acc13,$acc13
1342 add $acc14,$acc14,$acc14
1343 add $acc15,$acc15,$acc15
1344 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1345 and $acc05,$acc05,$mask1b
1346 and $acc06,$acc06,$mask1b
1347 and $acc07,$acc07,$mask1b
1348 xor $acc04,$acc04,$acc12 # r4
1349 xor $acc05,$acc05,$acc13
1350 xor $acc06,$acc06,$acc14
1351 xor $acc07,$acc07,$acc15
1352
1353 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1354 and $acc09,$acc05,$mask80
1355 srwi $acc12,$acc08,7 # r1>>7
1356 and $acc10,$acc06,$mask80
1357 srwi $acc13,$acc09,7
1358 and $acc11,$acc07,$mask80
1359 srwi $acc14,$acc10,7
1360 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1361 srwi $acc15,$acc11,7
1362 sub $acc09,$acc09,$acc13
1363 sub $acc10,$acc10,$acc14
1364 sub $acc11,$acc11,$acc15
1365 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1366 andc $acc13,$acc05,$mask80
1367 andc $acc14,$acc06,$mask80
1368 andc $acc15,$acc07,$mask80
1369 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1370 add $acc13,$acc13,$acc13
1371 add $acc14,$acc14,$acc14
1372 add $acc15,$acc15,$acc15
1373 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1374 and $acc09,$acc09,$mask1b
1375 and $acc10,$acc10,$mask1b
1376 and $acc11,$acc11,$mask1b
1377 xor $acc08,$acc08,$acc12 # r8
1378 xor $acc09,$acc09,$acc13
1379 xor $acc10,$acc10,$acc14
1380 xor $acc11,$acc11,$acc15
1381
1382 xor $acc00,$acc00,$s0 # r2^r0
1383 xor $acc01,$acc01,$s1
1384 xor $acc02,$acc02,$s2
1385 xor $acc03,$acc03,$s3
1386 xor $acc04,$acc04,$s0 # r4^r0
1387 xor $acc05,$acc05,$s1
1388 xor $acc06,$acc06,$s2
1389 xor $acc07,$acc07,$s3
1390 ___
1391 $code.=<<___;
1392 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1393 rotrwi $s1,$s1,8
1394 xor $s0,$s0,$acc00 # ^= r2^r0
1395 rotrwi $s2,$s2,8
1396 xor $s1,$s1,$acc01
1397 rotrwi $s3,$s3,8
1398 xor $s2,$s2,$acc02
1399 xor $s3,$s3,$acc03
1400 xor $acc00,$acc00,$acc08
1401 xor $acc01,$acc01,$acc09
1402 xor $acc02,$acc02,$acc10
1403 xor $acc03,$acc03,$acc11
1404 xor $s0,$s0,$acc04 # ^= r4^r0
1405 rotrwi $acc00,$acc00,24
1406 xor $s1,$s1,$acc05
1407 rotrwi $acc01,$acc01,24
1408 xor $s2,$s2,$acc06
1409 rotrwi $acc02,$acc02,24
1410 xor $s3,$s3,$acc07
1411 rotrwi $acc03,$acc03,24
1412 xor $acc04,$acc04,$acc08
1413 xor $acc05,$acc05,$acc09
1414 xor $acc06,$acc06,$acc10
1415 xor $acc07,$acc07,$acc11
1416 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1417 rotrwi $acc04,$acc04,16
1418 xor $s1,$s1,$acc09
1419 rotrwi $acc05,$acc05,16
1420 xor $s2,$s2,$acc10
1421 rotrwi $acc06,$acc06,16
1422 xor $s3,$s3,$acc11
1423 rotrwi $acc07,$acc07,16
1424 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1425 rotrwi $acc08,$acc08,8
1426 xor $s1,$s1,$acc01
1427 rotrwi $acc09,$acc09,8
1428 xor $s2,$s2,$acc02
1429 rotrwi $acc10,$acc10,8
1430 xor $s3,$s3,$acc03
1431 rotrwi $acc11,$acc11,8
1432 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1433 xor $s1,$s1,$acc05
1434 xor $s2,$s2,$acc06
1435 xor $s3,$s3,$acc07
1436 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1437 xor $s1,$s1,$acc09
1438 xor $s2,$s2,$acc10
1439 xor $s3,$s3,$acc11
1440
1441 b Ldec_compact_loop
1442 .align 4
1443 Ldec_compact_done:
1444 xor $s0,$s0,$t0
1445 xor $s1,$s1,$t1
1446 xor $s2,$s2,$t2
1447 xor $s3,$s3,$t3
1448 blr
1449 .long 0
1450 .byte 0,12,0x14,0,0,0,0,0
1451 .size .AES_decrypt,.-.AES_decrypt
1452
1453 .asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1454 .align 7
1455 ___
1456
1457 $code =~ s/\`([^\`]*)\`/eval $1/gem;
1458 print $code;
1459 close STDOUT;