]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/aes/asm/aes-ppc.pl
PPC assembler pack: adhere closer to ABI specs, add PowerOpen traceback data.
[thirdparty/openssl.git] / crypto / aes / asm / aes-ppc.pl
CommitLineData
9c200f54
AP
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# Needs more work: key setup, page boundaries, CBC routine...
11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
96b0f6c1
AP
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
9c200f54 18
109757d2
AP
19# February 2010
20#
67150340 21# Rescheduling instructions to favour Power6 pipeline gave 10%
109757d2
AP
22# performance improvement on the platfrom in question (and marginal
23# improvement even on others). It should be noted that Power6 fails
24# to process byte in 18 cycles, only in 23, because it fails to issue
25# 4 load instructions in two cycles, only in 3. As result non-compact
26# block subroutines are 25% slower than one would expect. Compact
27# functions scale better, because they have pure computational part,
28# which scales perfectly with clock frequency. To be specific
29# ppc_AES_encrypt_compact operates at 42 cycles per byte, while
30# ppc_AES_decrypt_compact - at 55 (in 64-bit build).
31
addd641f 32$flavour = shift;
9c200f54 33
addd641f 34if ($flavour =~ /64/) {
9c200f54 35 $SIZE_T =8;
67150340 36 $LRSAVE =2*$SIZE_T;
9c200f54
AP
37 $STU ="stdu";
38 $POP ="ld";
39 $PUSH ="std";
addd641f 40} elsif ($flavour =~ /32/) {
9c200f54 41 $SIZE_T =4;
67150340 42 $LRSAVE =$SIZE_T;
9c200f54
AP
43 $STU ="stwu";
44 $POP ="lwz";
45 $PUSH ="stw";
addd641f 46} else { die "nonsense $flavour"; }
9c200f54
AP
47
48$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
49( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
50( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
51die "can't locate ppc-xlate.pl";
52
addd641f 53open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
9c200f54
AP
54
55$FRAME=32*$SIZE_T;
56
57sub _data_word()
58{ my $i;
59 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
60}
61
62$sp="r1";
63$toc="r2";
64$inp="r3";
65$out="r4";
66$key="r5";
67
68$Tbl0="r3";
69$Tbl1="r6";
70$Tbl2="r7";
71$Tbl3="r2";
72
73$s0="r8";
74$s1="r9";
75$s2="r10";
76$s3="r11";
77
78$t0="r12";
79$t1="r13";
80$t2="r14";
81$t3="r15";
82
83$acc00="r16";
84$acc01="r17";
85$acc02="r18";
86$acc03="r19";
87
88$acc04="r20";
89$acc05="r21";
90$acc06="r22";
91$acc07="r23";
92
93$acc08="r24";
94$acc09="r25";
95$acc10="r26";
96$acc11="r27";
97
98$acc12="r28";
99$acc13="r29";
100$acc14="r30";
101$acc15="r31";
102
103# stay away from TLS pointer
86d8f3ee
AP
104if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
105else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
9c200f54
AP
106$mask80=$Tbl2;
107$mask1b=$Tbl3;
108
109$code.=<<___;
492279f6 110.machine "any"
9c200f54
AP
111.text
112
113.align 7
114LAES_Te:
115 mflr r0
116 bcl 20,31,\$+4
117 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
118 addi $Tbl0,$Tbl0,`128-8`
119 mtlr r0
120 blr
67150340
AP
121 .long 0
122 .byte 0,12,0x14,0,0,0,0,0
123 .space `64-9*4`
9c200f54
AP
124LAES_Td:
125 mflr r0
126 bcl 20,31,\$+4
127 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
67150340 128 addi $Tbl0,$Tbl0,`128-64-8+2048+256`
9c200f54
AP
129 mtlr r0
130 blr
67150340
AP
131 .long 0
132 .byte 0,12,0x14,0,0,0,0,0
133 .space `128-64-9*4`
9c200f54
AP
134___
135&_data_word(
136 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
137 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
138 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
139 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
140 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
141 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
142 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
143 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
144 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
145 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
146 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
147 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
148 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
149 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
150 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
151 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
152 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
153 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
154 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
155 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
156 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
157 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
158 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
159 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
160 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
161 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
162 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
163 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
164 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
165 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
166 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
167 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
168 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
169 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
170 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
171 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
172 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
173 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
174 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
175 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
176 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
177 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
178 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
179 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
180 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
181 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
182 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
183 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
184 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
185 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
186 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
187 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
188 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
189 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
190 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
191 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
192 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
193 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
194 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
195 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
196 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
197 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
198 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
199 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
200$code.=<<___;
201.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
202.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
203.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
204.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
205.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
206.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
207.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
208.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
209.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
210.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
211.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
212.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
213.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
214.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
215.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
216.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
217.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
218.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
219.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
220.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
221.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
222.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
223.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
224.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
225.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
226.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
227.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
228.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
229.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
230.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
231.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
232.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
233___
234&_data_word(
235 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
236 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
237 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
238 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
239 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
240 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
241 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
242 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
243 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
244 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
245 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
246 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
247 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
248 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
249 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
250 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
251 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
252 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
253 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
254 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
255 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
256 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
257 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
258 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
259 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
260 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
261 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
262 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
263 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
264 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
265 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
266 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
267 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
268 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
269 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
270 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
271 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
272 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
273 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
274 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
275 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
276 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
277 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
278 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
279 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
280 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
281 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
282 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
283 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
284 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
285 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
286 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
287 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
288 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
289 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
290 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
291 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
292 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
293 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
294 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
295 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
296 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
297 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
298 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
299$code.=<<___;
300.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
301.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
302.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
303.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
304.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
305.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
306.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
307.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
308.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
309.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
310.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
311.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
312.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
313.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
314.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
315.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
316.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
317.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
318.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
319.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
320.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
321.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
322.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
323.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
324.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
325.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
326.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
327.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
328.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
329.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
330.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
331.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
332
333
334.globl .AES_encrypt
335.align 7
336.AES_encrypt:
9c200f54 337 $STU $sp,-$FRAME($sp)
67150340 338 mflr r0
9c200f54 339
9c200f54
AP
340 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
341 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
342 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
343 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
344 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
345 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
346 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
347 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
348 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
349 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
350 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
351 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
352 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
353 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
354 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
355 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
356 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
357 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
358 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
359 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
67150340 360 $PUSH r0,`$FRAME+$LRSAVE`($sp)
9c200f54
AP
361
362 lwz $s0,0($inp)
363 lwz $s1,4($inp)
364 lwz $s2,8($inp)
365 lwz $s3,12($inp)
366 bl LAES_Te
367 bl Lppc_AES_encrypt_compact
368 stw $s0,0($out)
369 stw $s1,4($out)
370 stw $s2,8($out)
371 stw $s3,12($out)
372
67150340 373 $POP r0,`$FRAME+$LRSAVE`($sp)
9c200f54
AP
374 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
375 $POP r13,`$FRAME-$SIZE_T*19`($sp)
376 $POP r14,`$FRAME-$SIZE_T*18`($sp)
377 $POP r15,`$FRAME-$SIZE_T*17`($sp)
378 $POP r16,`$FRAME-$SIZE_T*16`($sp)
379 $POP r17,`$FRAME-$SIZE_T*15`($sp)
380 $POP r18,`$FRAME-$SIZE_T*14`($sp)
381 $POP r19,`$FRAME-$SIZE_T*13`($sp)
382 $POP r20,`$FRAME-$SIZE_T*12`($sp)
383 $POP r21,`$FRAME-$SIZE_T*11`($sp)
384 $POP r22,`$FRAME-$SIZE_T*10`($sp)
385 $POP r23,`$FRAME-$SIZE_T*9`($sp)
386 $POP r24,`$FRAME-$SIZE_T*8`($sp)
387 $POP r25,`$FRAME-$SIZE_T*7`($sp)
388 $POP r26,`$FRAME-$SIZE_T*6`($sp)
389 $POP r27,`$FRAME-$SIZE_T*5`($sp)
390 $POP r28,`$FRAME-$SIZE_T*4`($sp)
391 $POP r29,`$FRAME-$SIZE_T*3`($sp)
392 $POP r30,`$FRAME-$SIZE_T*2`($sp)
393 $POP r31,`$FRAME-$SIZE_T*1`($sp)
394 mtlr r0
395 addi $sp,$sp,$FRAME
396 blr
67150340
AP
397 .long 0
398 .byte 0,12,4,1,0x80,18,3,0
399 .long 0
9c200f54 400
109757d2 401.align 5
9c200f54
AP
402Lppc_AES_encrypt:
403 lwz $acc00,240($key)
404 lwz $t0,0($key)
405 lwz $t1,4($key)
406 lwz $t2,8($key)
407 lwz $t3,12($key)
408 addi $Tbl1,$Tbl0,3
409 addi $Tbl2,$Tbl0,2
410 addi $Tbl3,$Tbl0,1
411 addi $acc00,$acc00,-1
412 addi $key,$key,16
413 xor $s0,$s0,$t0
414 xor $s1,$s1,$t1
415 xor $s2,$s2,$t2
416 xor $s3,$s3,$t3
417 mtctr $acc00
418.align 4
419Lenc_loop:
420 rlwinm $acc00,$s0,`32-24+3`,21,28
421 rlwinm $acc01,$s1,`32-24+3`,21,28
96d13fe6
AP
422 rlwinm $acc02,$s2,`32-24+3`,21,28
423 rlwinm $acc03,$s3,`32-24+3`,21,28
109757d2
AP
424 lwz $t0,0($key)
425 lwz $t1,4($key)
9c200f54
AP
426 rlwinm $acc04,$s1,`32-16+3`,21,28
427 rlwinm $acc05,$s2,`32-16+3`,21,28
109757d2
AP
428 lwz $t2,8($key)
429 lwz $t3,12($key)
96d13fe6
AP
430 rlwinm $acc06,$s3,`32-16+3`,21,28
431 rlwinm $acc07,$s0,`32-16+3`,21,28
109757d2
AP
432 lwzx $acc00,$Tbl0,$acc00
433 lwzx $acc01,$Tbl0,$acc01
9c200f54
AP
434 rlwinm $acc08,$s2,`32-8+3`,21,28
435 rlwinm $acc09,$s3,`32-8+3`,21,28
109757d2
AP
436 lwzx $acc02,$Tbl0,$acc02
437 lwzx $acc03,$Tbl0,$acc03
96d13fe6
AP
438 rlwinm $acc10,$s0,`32-8+3`,21,28
439 rlwinm $acc11,$s1,`32-8+3`,21,28
109757d2
AP
440 lwzx $acc04,$Tbl1,$acc04
441 lwzx $acc05,$Tbl1,$acc05
9c200f54
AP
442 rlwinm $acc12,$s3,`0+3`,21,28
443 rlwinm $acc13,$s0,`0+3`,21,28
109757d2
AP
444 lwzx $acc06,$Tbl1,$acc06
445 lwzx $acc07,$Tbl1,$acc07
96d13fe6
AP
446 rlwinm $acc14,$s1,`0+3`,21,28
447 rlwinm $acc15,$s2,`0+3`,21,28
109757d2
AP
448 lwzx $acc08,$Tbl2,$acc08
449 lwzx $acc09,$Tbl2,$acc09
9c200f54
AP
450 xor $t0,$t0,$acc00
451 xor $t1,$t1,$acc01
109757d2
AP
452 lwzx $acc10,$Tbl2,$acc10
453 lwzx $acc11,$Tbl2,$acc11
96d13fe6
AP
454 xor $t2,$t2,$acc02
455 xor $t3,$t3,$acc03
109757d2
AP
456 lwzx $acc12,$Tbl3,$acc12
457 lwzx $acc13,$Tbl3,$acc13
9c200f54
AP
458 xor $t0,$t0,$acc04
459 xor $t1,$t1,$acc05
109757d2
AP
460 lwzx $acc14,$Tbl3,$acc14
461 lwzx $acc15,$Tbl3,$acc15
9c200f54
AP
462 xor $t2,$t2,$acc06
463 xor $t3,$t3,$acc07
464 xor $t0,$t0,$acc08
465 xor $t1,$t1,$acc09
466 xor $t2,$t2,$acc10
467 xor $t3,$t3,$acc11
468 xor $s0,$t0,$acc12
469 xor $s1,$t1,$acc13
470 xor $s2,$t2,$acc14
471 xor $s3,$t3,$acc15
472 addi $key,$key,16
473 bdnz- Lenc_loop
474
475 addi $Tbl2,$Tbl0,2048
476 nop
9c200f54
AP
477 lwz $t0,0($key)
478 lwz $t1,4($key)
109757d2
AP
479 rlwinm $acc00,$s0,`32-24`,24,31
480 rlwinm $acc01,$s1,`32-24`,24,31
9c200f54
AP
481 lwz $t2,8($key)
482 lwz $t3,12($key)
109757d2
AP
483 rlwinm $acc02,$s2,`32-24`,24,31
484 rlwinm $acc03,$s3,`32-24`,24,31
485 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
486 lwz $acc09,`2048+32`($Tbl0)
9c200f54
AP
487 rlwinm $acc04,$s1,`32-16`,24,31
488 rlwinm $acc05,$s2,`32-16`,24,31
109757d2
AP
489 lwz $acc10,`2048+64`($Tbl0)
490 lwz $acc11,`2048+96`($Tbl0)
96d13fe6
AP
491 rlwinm $acc06,$s3,`32-16`,24,31
492 rlwinm $acc07,$s0,`32-16`,24,31
109757d2
AP
493 lwz $acc12,`2048+128`($Tbl0)
494 lwz $acc13,`2048+160`($Tbl0)
9c200f54
AP
495 rlwinm $acc08,$s2,`32-8`,24,31
496 rlwinm $acc09,$s3,`32-8`,24,31
109757d2
AP
497 lwz $acc14,`2048+192`($Tbl0)
498 lwz $acc15,`2048+224`($Tbl0)
96d13fe6
AP
499 rlwinm $acc10,$s0,`32-8`,24,31
500 rlwinm $acc11,$s1,`32-8`,24,31
109757d2
AP
501 lbzx $acc00,$Tbl2,$acc00
502 lbzx $acc01,$Tbl2,$acc01
9c200f54
AP
503 rlwinm $acc12,$s3,`0`,24,31
504 rlwinm $acc13,$s0,`0`,24,31
109757d2
AP
505 lbzx $acc02,$Tbl2,$acc02
506 lbzx $acc03,$Tbl2,$acc03
96d13fe6
AP
507 rlwinm $acc14,$s1,`0`,24,31
508 rlwinm $acc15,$s2,`0`,24,31
109757d2
AP
509 lbzx $acc04,$Tbl2,$acc04
510 lbzx $acc05,$Tbl2,$acc05
9c200f54
AP
511 rlwinm $s0,$acc00,24,0,7
512 rlwinm $s1,$acc01,24,0,7
109757d2
AP
513 lbzx $acc06,$Tbl2,$acc06
514 lbzx $acc07,$Tbl2,$acc07
96d13fe6
AP
515 rlwinm $s2,$acc02,24,0,7
516 rlwinm $s3,$acc03,24,0,7
109757d2
AP
517 lbzx $acc08,$Tbl2,$acc08
518 lbzx $acc09,$Tbl2,$acc09
9c200f54
AP
519 rlwimi $s0,$acc04,16,8,15
520 rlwimi $s1,$acc05,16,8,15
109757d2
AP
521 lbzx $acc10,$Tbl2,$acc10
522 lbzx $acc11,$Tbl2,$acc11
9c200f54
AP
523 rlwimi $s2,$acc06,16,8,15
524 rlwimi $s3,$acc07,16,8,15
109757d2
AP
525 lbzx $acc12,$Tbl2,$acc12
526 lbzx $acc13,$Tbl2,$acc13
9c200f54
AP
527 rlwimi $s0,$acc08,8,16,23
528 rlwimi $s1,$acc09,8,16,23
109757d2
AP
529 lbzx $acc14,$Tbl2,$acc14
530 lbzx $acc15,$Tbl2,$acc15
9c200f54
AP
531 rlwimi $s2,$acc10,8,16,23
532 rlwimi $s3,$acc11,8,16,23
533 or $s0,$s0,$acc12
534 or $s1,$s1,$acc13
535 or $s2,$s2,$acc14
536 or $s3,$s3,$acc15
537 xor $s0,$s0,$t0
538 xor $s1,$s1,$t1
539 xor $s2,$s2,$t2
540 xor $s3,$s3,$t3
541 blr
67150340
AP
542 .long 0
543 .byte 0,12,0x14,0,0,0,0,0
9c200f54
AP
544
545.align 4
546Lppc_AES_encrypt_compact:
547 lwz $acc00,240($key)
548 lwz $t0,0($key)
549 lwz $t1,4($key)
550 lwz $t2,8($key)
551 lwz $t3,12($key)
552 addi $Tbl1,$Tbl0,2048
553 lis $mask80,0x8080
554 lis $mask1b,0x1b1b
9c200f54
AP
555 addi $key,$key,16
556 ori $mask80,$mask80,0x8080
557 ori $mask1b,$mask1b,0x1b1b
52ee3d01
AP
558 mtctr $acc00
559.align 4
560Lenc_compact_loop:
9c200f54
AP
561 xor $s0,$s0,$t0
562 xor $s1,$s1,$t1
563 xor $s2,$s2,$t2
564 xor $s3,$s3,$t3
9c200f54
AP
565 rlwinm $acc00,$s0,`32-24`,24,31
566 rlwinm $acc01,$s1,`32-24`,24,31
567 rlwinm $acc02,$s2,`32-24`,24,31
568 rlwinm $acc03,$s3,`32-24`,24,31
96d13fe6
AP
569 rlwinm $acc04,$s1,`32-16`,24,31
570 rlwinm $acc05,$s2,`32-16`,24,31
96d13fe6
AP
571 rlwinm $acc06,$s3,`32-16`,24,31
572 rlwinm $acc07,$s0,`32-16`,24,31
109757d2
AP
573 lbzx $acc00,$Tbl1,$acc00
574 lbzx $acc01,$Tbl1,$acc01
96d13fe6
AP
575 rlwinm $acc08,$s2,`32-8`,24,31
576 rlwinm $acc09,$s3,`32-8`,24,31
109757d2
AP
577 lbzx $acc02,$Tbl1,$acc02
578 lbzx $acc03,$Tbl1,$acc03
96d13fe6
AP
579 rlwinm $acc10,$s0,`32-8`,24,31
580 rlwinm $acc11,$s1,`32-8`,24,31
109757d2
AP
581 lbzx $acc04,$Tbl1,$acc04
582 lbzx $acc05,$Tbl1,$acc05
96d13fe6
AP
583 rlwinm $acc12,$s3,`0`,24,31
584 rlwinm $acc13,$s0,`0`,24,31
109757d2
AP
585 lbzx $acc06,$Tbl1,$acc06
586 lbzx $acc07,$Tbl1,$acc07
96d13fe6
AP
587 rlwinm $acc14,$s1,`0`,24,31
588 rlwinm $acc15,$s2,`0`,24,31
109757d2
AP
589 lbzx $acc08,$Tbl1,$acc08
590 lbzx $acc09,$Tbl1,$acc09
96d13fe6
AP
591 rlwinm $s0,$acc00,24,0,7
592 rlwinm $s1,$acc01,24,0,7
109757d2
AP
593 lbzx $acc10,$Tbl1,$acc10
594 lbzx $acc11,$Tbl1,$acc11
96d13fe6
AP
595 rlwinm $s2,$acc02,24,0,7
596 rlwinm $s3,$acc03,24,0,7
109757d2
AP
597 lbzx $acc12,$Tbl1,$acc12
598 lbzx $acc13,$Tbl1,$acc13
9c200f54
AP
599 rlwimi $s0,$acc04,16,8,15
600 rlwimi $s1,$acc05,16,8,15
109757d2
AP
601 lbzx $acc14,$Tbl1,$acc14
602 lbzx $acc15,$Tbl1,$acc15
9c200f54
AP
603 rlwimi $s2,$acc06,16,8,15
604 rlwimi $s3,$acc07,16,8,15
605 rlwimi $s0,$acc08,8,16,23
606 rlwimi $s1,$acc09,8,16,23
607 rlwimi $s2,$acc10,8,16,23
608 rlwimi $s3,$acc11,8,16,23
96d13fe6
AP
609 lwz $t0,0($key)
610 lwz $t1,4($key)
9c200f54
AP
611 or $s0,$s0,$acc12
612 or $s1,$s1,$acc13
96d13fe6
AP
613 lwz $t2,8($key)
614 lwz $t3,12($key)
9c200f54
AP
615 or $s2,$s2,$acc14
616 or $s3,$s3,$acc15
617
52ee3d01
AP
618 addi $key,$key,16
619 bdz Lenc_compact_done
620
9c200f54
AP
621 and $acc00,$s0,$mask80 # r1=r0&0x80808080
622 and $acc01,$s1,$mask80
623 and $acc02,$s2,$mask80
624 and $acc03,$s3,$mask80
625 srwi $acc04,$acc00,7 # r1>>7
626 srwi $acc05,$acc01,7
627 srwi $acc06,$acc02,7
628 srwi $acc07,$acc03,7
629 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
630 andc $acc09,$s1,$mask80
631 andc $acc10,$s2,$mask80
632 andc $acc11,$s3,$mask80
633 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
634 sub $acc01,$acc01,$acc05
635 sub $acc02,$acc02,$acc06
636 sub $acc03,$acc03,$acc07
637 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
638 add $acc09,$acc09,$acc09
639 add $acc10,$acc10,$acc10
640 add $acc11,$acc11,$acc11
641 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
642 and $acc01,$acc01,$mask1b
643 and $acc02,$acc02,$mask1b
644 and $acc03,$acc03,$mask1b
645 xor $acc00,$acc00,$acc08 # r2
646 xor $acc01,$acc01,$acc09
647 xor $acc02,$acc02,$acc10
648 xor $acc03,$acc03,$acc11
649
650 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
651 rotlwi $acc13,$s1,16
652 rotlwi $acc14,$s2,16
653 rotlwi $acc15,$s3,16
654 xor $s0,$s0,$acc00 # r0^r2
655 xor $s1,$s1,$acc01
656 xor $s2,$s2,$acc02
657 xor $s3,$s3,$acc03
658 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
659 rotrwi $s1,$s1,24
660 rotrwi $s2,$s2,24
661 rotrwi $s3,$s3,24
662 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
663 xor $s1,$s1,$acc01
664 xor $s2,$s2,$acc02
665 xor $s3,$s3,$acc03
666 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
667 rotlwi $acc09,$acc13,8
668 rotlwi $acc10,$acc14,8
669 rotlwi $acc11,$acc15,8
670 xor $s0,$s0,$acc12 #
671 xor $s1,$s1,$acc13
672 xor $s2,$s2,$acc14
673 xor $s3,$s3,$acc15
674 xor $s0,$s0,$acc08 #
675 xor $s1,$s1,$acc09
676 xor $s2,$s2,$acc10
677 xor $s3,$s3,$acc11
678
52ee3d01
AP
679 b Lenc_compact_loop
680.align 4
681Lenc_compact_done:
9c200f54
AP
682 xor $s0,$s0,$t0
683 xor $s1,$s1,$t1
684 xor $s2,$s2,$t2
685 xor $s3,$s3,$t3
686 blr
67150340
AP
687 .long 0
688 .byte 0,12,0x14,0,0,0,0,0
9c200f54
AP
689
690.globl .AES_decrypt
691.align 7
692.AES_decrypt:
9c200f54 693 $STU $sp,-$FRAME($sp)
67150340 694 mflr r0
9c200f54 695
9c200f54
AP
696 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
697 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
698 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
699 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
700 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
701 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
702 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
703 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
704 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
705 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
706 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
707 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
708 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
709 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
710 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
711 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
712 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
713 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
714 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
715 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
67150340 716 $PUSH r0,`$FRAME+$LRSAVE`($sp)
9c200f54
AP
717
718 lwz $s0,0($inp)
719 lwz $s1,4($inp)
720 lwz $s2,8($inp)
721 lwz $s3,12($inp)
722 bl LAES_Td
723 bl Lppc_AES_decrypt_compact
724 stw $s0,0($out)
725 stw $s1,4($out)
726 stw $s2,8($out)
727 stw $s3,12($out)
728
67150340 729 $POP r0,`$FRAME+$LRSAVE`($sp)
9c200f54
AP
730 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
731 $POP r13,`$FRAME-$SIZE_T*19`($sp)
732 $POP r14,`$FRAME-$SIZE_T*18`($sp)
733 $POP r15,`$FRAME-$SIZE_T*17`($sp)
734 $POP r16,`$FRAME-$SIZE_T*16`($sp)
735 $POP r17,`$FRAME-$SIZE_T*15`($sp)
736 $POP r18,`$FRAME-$SIZE_T*14`($sp)
737 $POP r19,`$FRAME-$SIZE_T*13`($sp)
738 $POP r20,`$FRAME-$SIZE_T*12`($sp)
739 $POP r21,`$FRAME-$SIZE_T*11`($sp)
740 $POP r22,`$FRAME-$SIZE_T*10`($sp)
741 $POP r23,`$FRAME-$SIZE_T*9`($sp)
742 $POP r24,`$FRAME-$SIZE_T*8`($sp)
743 $POP r25,`$FRAME-$SIZE_T*7`($sp)
744 $POP r26,`$FRAME-$SIZE_T*6`($sp)
745 $POP r27,`$FRAME-$SIZE_T*5`($sp)
746 $POP r28,`$FRAME-$SIZE_T*4`($sp)
747 $POP r29,`$FRAME-$SIZE_T*3`($sp)
748 $POP r30,`$FRAME-$SIZE_T*2`($sp)
749 $POP r31,`$FRAME-$SIZE_T*1`($sp)
750 mtlr r0
751 addi $sp,$sp,$FRAME
752 blr
67150340
AP
753 .long 0
754 .byte 0,12,4,1,0x80,18,3,0
755 .long 0
9c200f54 756
109757d2 757.align 5
9c200f54
AP
758Lppc_AES_decrypt:
759 lwz $acc00,240($key)
760 lwz $t0,0($key)
761 lwz $t1,4($key)
762 lwz $t2,8($key)
763 lwz $t3,12($key)
764 addi $Tbl1,$Tbl0,3
765 addi $Tbl2,$Tbl0,2
766 addi $Tbl3,$Tbl0,1
767 addi $acc00,$acc00,-1
768 addi $key,$key,16
769 xor $s0,$s0,$t0
770 xor $s1,$s1,$t1
771 xor $s2,$s2,$t2
772 xor $s3,$s3,$t3
773 mtctr $acc00
774.align 4
775Ldec_loop:
776 rlwinm $acc00,$s0,`32-24+3`,21,28
777 rlwinm $acc01,$s1,`32-24+3`,21,28
96d13fe6
AP
778 rlwinm $acc02,$s2,`32-24+3`,21,28
779 rlwinm $acc03,$s3,`32-24+3`,21,28
109757d2
AP
780 lwz $t0,0($key)
781 lwz $t1,4($key)
9c200f54
AP
782 rlwinm $acc04,$s3,`32-16+3`,21,28
783 rlwinm $acc05,$s0,`32-16+3`,21,28
109757d2
AP
784 lwz $t2,8($key)
785 lwz $t3,12($key)
96d13fe6
AP
786 rlwinm $acc06,$s1,`32-16+3`,21,28
787 rlwinm $acc07,$s2,`32-16+3`,21,28
109757d2
AP
788 lwzx $acc00,$Tbl0,$acc00
789 lwzx $acc01,$Tbl0,$acc01
9c200f54
AP
790 rlwinm $acc08,$s2,`32-8+3`,21,28
791 rlwinm $acc09,$s3,`32-8+3`,21,28
109757d2
AP
792 lwzx $acc02,$Tbl0,$acc02
793 lwzx $acc03,$Tbl0,$acc03
96d13fe6
AP
794 rlwinm $acc10,$s0,`32-8+3`,21,28
795 rlwinm $acc11,$s1,`32-8+3`,21,28
109757d2
AP
796 lwzx $acc04,$Tbl1,$acc04
797 lwzx $acc05,$Tbl1,$acc05
9c200f54
AP
798 rlwinm $acc12,$s1,`0+3`,21,28
799 rlwinm $acc13,$s2,`0+3`,21,28
109757d2
AP
800 lwzx $acc06,$Tbl1,$acc06
801 lwzx $acc07,$Tbl1,$acc07
96d13fe6
AP
802 rlwinm $acc14,$s3,`0+3`,21,28
803 rlwinm $acc15,$s0,`0+3`,21,28
109757d2
AP
804 lwzx $acc08,$Tbl2,$acc08
805 lwzx $acc09,$Tbl2,$acc09
9c200f54
AP
806 xor $t0,$t0,$acc00
807 xor $t1,$t1,$acc01
109757d2
AP
808 lwzx $acc10,$Tbl2,$acc10
809 lwzx $acc11,$Tbl2,$acc11
96d13fe6
AP
810 xor $t2,$t2,$acc02
811 xor $t3,$t3,$acc03
109757d2
AP
812 lwzx $acc12,$Tbl3,$acc12
813 lwzx $acc13,$Tbl3,$acc13
9c200f54
AP
814 xor $t0,$t0,$acc04
815 xor $t1,$t1,$acc05
109757d2
AP
816 lwzx $acc14,$Tbl3,$acc14
817 lwzx $acc15,$Tbl3,$acc15
9c200f54
AP
818 xor $t2,$t2,$acc06
819 xor $t3,$t3,$acc07
820 xor $t0,$t0,$acc08
821 xor $t1,$t1,$acc09
822 xor $t2,$t2,$acc10
823 xor $t3,$t3,$acc11
824 xor $s0,$t0,$acc12
825 xor $s1,$t1,$acc13
826 xor $s2,$t2,$acc14
827 xor $s3,$t3,$acc15
828 addi $key,$key,16
829 bdnz- Ldec_loop
830
831 addi $Tbl2,$Tbl0,2048
832 nop
9c200f54
AP
833 lwz $t0,0($key)
834 lwz $t1,4($key)
109757d2
AP
835 rlwinm $acc00,$s0,`32-24`,24,31
836 rlwinm $acc01,$s1,`32-24`,24,31
9c200f54
AP
837 lwz $t2,8($key)
838 lwz $t3,12($key)
109757d2
AP
839 rlwinm $acc02,$s2,`32-24`,24,31
840 rlwinm $acc03,$s3,`32-24`,24,31
841 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
842 lwz $acc09,`2048+32`($Tbl0)
9c200f54
AP
843 rlwinm $acc04,$s3,`32-16`,24,31
844 rlwinm $acc05,$s0,`32-16`,24,31
109757d2
AP
845 lwz $acc10,`2048+64`($Tbl0)
846 lwz $acc11,`2048+96`($Tbl0)
9c200f54
AP
847 lbzx $acc00,$Tbl2,$acc00
848 lbzx $acc01,$Tbl2,$acc01
109757d2
AP
849 lwz $acc12,`2048+128`($Tbl0)
850 lwz $acc13,`2048+160`($Tbl0)
96d13fe6
AP
851 rlwinm $acc06,$s1,`32-16`,24,31
852 rlwinm $acc07,$s2,`32-16`,24,31
109757d2
AP
853 lwz $acc14,`2048+192`($Tbl0)
854 lwz $acc15,`2048+224`($Tbl0)
9c200f54
AP
855 rlwinm $acc08,$s2,`32-8`,24,31
856 rlwinm $acc09,$s3,`32-8`,24,31
109757d2
AP
857 lbzx $acc02,$Tbl2,$acc02
858 lbzx $acc03,$Tbl2,$acc03
96d13fe6
AP
859 rlwinm $acc10,$s0,`32-8`,24,31
860 rlwinm $acc11,$s1,`32-8`,24,31
109757d2
AP
861 lbzx $acc04,$Tbl2,$acc04
862 lbzx $acc05,$Tbl2,$acc05
9c200f54
AP
863 rlwinm $acc12,$s1,`0`,24,31
864 rlwinm $acc13,$s2,`0`,24,31
109757d2
AP
865 lbzx $acc06,$Tbl2,$acc06
866 lbzx $acc07,$Tbl2,$acc07
96d13fe6
AP
867 rlwinm $acc14,$s3,`0`,24,31
868 rlwinm $acc15,$s0,`0`,24,31
109757d2
AP
869 lbzx $acc08,$Tbl2,$acc08
870 lbzx $acc09,$Tbl2,$acc09
9c200f54
AP
871 rlwinm $s0,$acc00,24,0,7
872 rlwinm $s1,$acc01,24,0,7
109757d2
AP
873 lbzx $acc10,$Tbl2,$acc10
874 lbzx $acc11,$Tbl2,$acc11
96d13fe6
AP
875 rlwinm $s2,$acc02,24,0,7
876 rlwinm $s3,$acc03,24,0,7
109757d2
AP
877 lbzx $acc12,$Tbl2,$acc12
878 lbzx $acc13,$Tbl2,$acc13
9c200f54
AP
879 rlwimi $s0,$acc04,16,8,15
880 rlwimi $s1,$acc05,16,8,15
109757d2
AP
881 lbzx $acc14,$Tbl2,$acc14
882 lbzx $acc15,$Tbl2,$acc15
9c200f54
AP
883 rlwimi $s2,$acc06,16,8,15
884 rlwimi $s3,$acc07,16,8,15
885 rlwimi $s0,$acc08,8,16,23
886 rlwimi $s1,$acc09,8,16,23
887 rlwimi $s2,$acc10,8,16,23
888 rlwimi $s3,$acc11,8,16,23
889 or $s0,$s0,$acc12
890 or $s1,$s1,$acc13
891 or $s2,$s2,$acc14
892 or $s3,$s3,$acc15
893 xor $s0,$s0,$t0
894 xor $s1,$s1,$t1
895 xor $s2,$s2,$t2
896 xor $s3,$s3,$t3
897 blr
67150340
AP
898 .long 0
899 .byte 0,12,0x14,0,0,0,0,0
9c200f54
AP
900
901.align 4
902Lppc_AES_decrypt_compact:
903 lwz $acc00,240($key)
904 lwz $t0,0($key)
905 lwz $t1,4($key)
906 lwz $t2,8($key)
907 lwz $t3,12($key)
908 addi $Tbl1,$Tbl0,2048
909 lis $mask80,0x8080
910 lis $mask1b,0x1b1b
9c200f54
AP
911 addi $key,$key,16
912 ori $mask80,$mask80,0x8080
913 ori $mask1b,$mask1b,0x1b1b
d7e91561
AP
914___
915$code.=<<___ if ($SIZE_T==8);
916 insrdi $mask80,$mask80,32,0
917 insrdi $mask1b,$mask1b,32,0
918___
919$code.=<<___;
52ee3d01
AP
920 mtctr $acc00
921.align 4
922Ldec_compact_loop:
9c200f54
AP
923 xor $s0,$s0,$t0
924 xor $s1,$s1,$t1
925 xor $s2,$s2,$t2
926 xor $s3,$s3,$t3
9c200f54
AP
927 rlwinm $acc00,$s0,`32-24`,24,31
928 rlwinm $acc01,$s1,`32-24`,24,31
929 rlwinm $acc02,$s2,`32-24`,24,31
930 rlwinm $acc03,$s3,`32-24`,24,31
96d13fe6
AP
931 rlwinm $acc04,$s3,`32-16`,24,31
932 rlwinm $acc05,$s0,`32-16`,24,31
96d13fe6
AP
933 rlwinm $acc06,$s1,`32-16`,24,31
934 rlwinm $acc07,$s2,`32-16`,24,31
109757d2
AP
935 lbzx $acc00,$Tbl1,$acc00
936 lbzx $acc01,$Tbl1,$acc01
96d13fe6
AP
937 rlwinm $acc08,$s2,`32-8`,24,31
938 rlwinm $acc09,$s3,`32-8`,24,31
109757d2
AP
939 lbzx $acc02,$Tbl1,$acc02
940 lbzx $acc03,$Tbl1,$acc03
96d13fe6
AP
941 rlwinm $acc10,$s0,`32-8`,24,31
942 rlwinm $acc11,$s1,`32-8`,24,31
109757d2
AP
943 lbzx $acc04,$Tbl1,$acc04
944 lbzx $acc05,$Tbl1,$acc05
96d13fe6
AP
945 rlwinm $acc12,$s1,`0`,24,31
946 rlwinm $acc13,$s2,`0`,24,31
109757d2
AP
947 lbzx $acc06,$Tbl1,$acc06
948 lbzx $acc07,$Tbl1,$acc07
96d13fe6
AP
949 rlwinm $acc14,$s3,`0`,24,31
950 rlwinm $acc15,$s0,`0`,24,31
109757d2
AP
951 lbzx $acc08,$Tbl1,$acc08
952 lbzx $acc09,$Tbl1,$acc09
96d13fe6
AP
953 rlwinm $s0,$acc00,24,0,7
954 rlwinm $s1,$acc01,24,0,7
109757d2
AP
955 lbzx $acc10,$Tbl1,$acc10
956 lbzx $acc11,$Tbl1,$acc11
96d13fe6
AP
957 rlwinm $s2,$acc02,24,0,7
958 rlwinm $s3,$acc03,24,0,7
109757d2
AP
959 lbzx $acc12,$Tbl1,$acc12
960 lbzx $acc13,$Tbl1,$acc13
9c200f54
AP
961 rlwimi $s0,$acc04,16,8,15
962 rlwimi $s1,$acc05,16,8,15
109757d2
AP
963 lbzx $acc14,$Tbl1,$acc14
964 lbzx $acc15,$Tbl1,$acc15
9c200f54
AP
965 rlwimi $s2,$acc06,16,8,15
966 rlwimi $s3,$acc07,16,8,15
967 rlwimi $s0,$acc08,8,16,23
968 rlwimi $s1,$acc09,8,16,23
969 rlwimi $s2,$acc10,8,16,23
970 rlwimi $s3,$acc11,8,16,23
96d13fe6
AP
971 lwz $t0,0($key)
972 lwz $t1,4($key)
9c200f54
AP
973 or $s0,$s0,$acc12
974 or $s1,$s1,$acc13
96d13fe6
AP
975 lwz $t2,8($key)
976 lwz $t3,12($key)
9c200f54
AP
977 or $s2,$s2,$acc14
978 or $s3,$s3,$acc15
979
52ee3d01
AP
980 addi $key,$key,16
981 bdz Ldec_compact_done
d7e91561
AP
982___
983$code.=<<___ if ($SIZE_T==8);
984 # vectorized permutation improves decrypt performance by 10%
985 insrdi $s0,$s1,32,0
986 insrdi $s2,$s3,32,0
52ee3d01 987
d7e91561
AP
988 and $acc00,$s0,$mask80 # r1=r0&0x80808080
989 and $acc02,$s2,$mask80
990 srdi $acc04,$acc00,7 # r1>>7
991 srdi $acc06,$acc02,7
992 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
993 andc $acc10,$s2,$mask80
994 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
995 sub $acc02,$acc02,$acc06
996 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
997 add $acc10,$acc10,$acc10
998 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
999 and $acc02,$acc02,$mask1b
1000 xor $acc00,$acc00,$acc08 # r2
1001 xor $acc02,$acc02,$acc10
1002
1003 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1004 and $acc06,$acc02,$mask80
1005 srdi $acc08,$acc04,7 # r1>>7
1006 srdi $acc10,$acc06,7
1007 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1008 andc $acc14,$acc02,$mask80
1009 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1010 sub $acc06,$acc06,$acc10
1011 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1012 add $acc14,$acc14,$acc14
1013 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1014 and $acc06,$acc06,$mask1b
1015 xor $acc04,$acc04,$acc12 # r4
1016 xor $acc06,$acc06,$acc14
1017
1018 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1019 and $acc10,$acc06,$mask80
1020 srdi $acc12,$acc08,7 # r1>>7
1021 srdi $acc14,$acc10,7
1022 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1023 sub $acc10,$acc10,$acc14
1024 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1025 andc $acc14,$acc06,$mask80
1026 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1027 add $acc14,$acc14,$acc14
1028 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1029 and $acc10,$acc10,$mask1b
1030 xor $acc08,$acc08,$acc12 # r8
1031 xor $acc10,$acc10,$acc14
1032
1033 xor $acc00,$acc00,$s0 # r2^r0
1034 xor $acc02,$acc02,$s2
1035 xor $acc04,$acc04,$s0 # r4^r0
1036 xor $acc06,$acc06,$s2
1037
96d13fe6
AP
1038 extrdi $acc01,$acc00,32,0
1039 extrdi $acc03,$acc02,32,0
1040 extrdi $acc05,$acc04,32,0
1041 extrdi $acc07,$acc06,32,0
1042 extrdi $acc09,$acc08,32,0
1043 extrdi $acc11,$acc10,32,0
d7e91561
AP
1044___
1045$code.=<<___ if ($SIZE_T==4);
9c200f54
AP
1046 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1047 and $acc01,$s1,$mask80
1048 and $acc02,$s2,$mask80
1049 and $acc03,$s3,$mask80
1050 srwi $acc04,$acc00,7 # r1>>7
1051 srwi $acc05,$acc01,7
1052 srwi $acc06,$acc02,7
1053 srwi $acc07,$acc03,7
1054 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1055 andc $acc09,$s1,$mask80
1056 andc $acc10,$s2,$mask80
1057 andc $acc11,$s3,$mask80
1058 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1059 sub $acc01,$acc01,$acc05
1060 sub $acc02,$acc02,$acc06
1061 sub $acc03,$acc03,$acc07
1062 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1063 add $acc09,$acc09,$acc09
1064 add $acc10,$acc10,$acc10
1065 add $acc11,$acc11,$acc11
1066 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1067 and $acc01,$acc01,$mask1b
1068 and $acc02,$acc02,$mask1b
1069 and $acc03,$acc03,$mask1b
1070 xor $acc00,$acc00,$acc08 # r2
1071 xor $acc01,$acc01,$acc09
1072 xor $acc02,$acc02,$acc10
1073 xor $acc03,$acc03,$acc11
1074
1075 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1076 and $acc05,$acc01,$mask80
1077 and $acc06,$acc02,$mask80
1078 and $acc07,$acc03,$mask80
1079 srwi $acc08,$acc04,7 # r1>>7
1080 srwi $acc09,$acc05,7
1081 srwi $acc10,$acc06,7
1082 srwi $acc11,$acc07,7
1083 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1084 andc $acc13,$acc01,$mask80
1085 andc $acc14,$acc02,$mask80
1086 andc $acc15,$acc03,$mask80
1087 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1088 sub $acc05,$acc05,$acc09
1089 sub $acc06,$acc06,$acc10
1090 sub $acc07,$acc07,$acc11
1091 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1092 add $acc13,$acc13,$acc13
1093 add $acc14,$acc14,$acc14
1094 add $acc15,$acc15,$acc15
1095 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1096 and $acc05,$acc05,$mask1b
1097 and $acc06,$acc06,$mask1b
1098 and $acc07,$acc07,$mask1b
1099 xor $acc04,$acc04,$acc12 # r4
1100 xor $acc05,$acc05,$acc13
1101 xor $acc06,$acc06,$acc14
1102 xor $acc07,$acc07,$acc15
1103
1104 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1105 and $acc09,$acc05,$mask80
1106 and $acc10,$acc06,$mask80
1107 and $acc11,$acc07,$mask80
1108 srwi $acc12,$acc08,7 # r1>>7
1109 srwi $acc13,$acc09,7
1110 srwi $acc14,$acc10,7
1111 srwi $acc15,$acc11,7
1112 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1113 sub $acc09,$acc09,$acc13
1114 sub $acc10,$acc10,$acc14
1115 sub $acc11,$acc11,$acc15
1116 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1117 andc $acc13,$acc05,$mask80
1118 andc $acc14,$acc06,$mask80
1119 andc $acc15,$acc07,$mask80
1120 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1121 add $acc13,$acc13,$acc13
1122 add $acc14,$acc14,$acc14
1123 add $acc15,$acc15,$acc15
1124 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1125 and $acc09,$acc09,$mask1b
1126 and $acc10,$acc10,$mask1b
1127 and $acc11,$acc11,$mask1b
1128 xor $acc08,$acc08,$acc12 # r8
1129 xor $acc09,$acc09,$acc13
1130 xor $acc10,$acc10,$acc14
1131 xor $acc11,$acc11,$acc15
1132
1133 xor $acc00,$acc00,$s0 # r2^r0
1134 xor $acc01,$acc01,$s1
1135 xor $acc02,$acc02,$s2
1136 xor $acc03,$acc03,$s3
1137 xor $acc04,$acc04,$s0 # r4^r0
1138 xor $acc05,$acc05,$s1
1139 xor $acc06,$acc06,$s2
1140 xor $acc07,$acc07,$s3
d7e91561
AP
1141___
1142$code.=<<___;
9c200f54
AP
1143 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1144 rotrwi $s1,$s1,8
1145 rotrwi $s2,$s2,8
1146 rotrwi $s3,$s3,8
1147 xor $s0,$s0,$acc00 # ^= r2^r0
1148 xor $s1,$s1,$acc01
1149 xor $s2,$s2,$acc02
1150 xor $s3,$s3,$acc03
1151 xor $acc00,$acc00,$acc08
1152 xor $acc01,$acc01,$acc09
1153 xor $acc02,$acc02,$acc10
1154 xor $acc03,$acc03,$acc11
1155 xor $s0,$s0,$acc04 # ^= r4^r0
1156 xor $s1,$s1,$acc05
1157 xor $s2,$s2,$acc06
1158 xor $s3,$s3,$acc07
1159 rotrwi $acc00,$acc00,24
1160 rotrwi $acc01,$acc01,24
1161 rotrwi $acc02,$acc02,24
1162 rotrwi $acc03,$acc03,24
1163 xor $acc04,$acc04,$acc08
1164 xor $acc05,$acc05,$acc09
1165 xor $acc06,$acc06,$acc10
1166 xor $acc07,$acc07,$acc11
1167 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1168 xor $s1,$s1,$acc09
1169 xor $s2,$s2,$acc10
1170 xor $s3,$s3,$acc11
1171 rotrwi $acc04,$acc04,16
1172 rotrwi $acc05,$acc05,16
1173 rotrwi $acc06,$acc06,16
1174 rotrwi $acc07,$acc07,16
1175 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1176 xor $s1,$s1,$acc01
1177 xor $s2,$s2,$acc02
1178 xor $s3,$s3,$acc03
1179 rotrwi $acc08,$acc08,8
1180 rotrwi $acc09,$acc09,8
1181 rotrwi $acc10,$acc10,8
1182 rotrwi $acc11,$acc11,8
1183 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1184 xor $s1,$s1,$acc05
1185 xor $s2,$s2,$acc06
1186 xor $s3,$s3,$acc07
1187 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1188 xor $s1,$s1,$acc09
1189 xor $s2,$s2,$acc10
1190 xor $s3,$s3,$acc11
1191
52ee3d01
AP
1192 b Ldec_compact_loop
1193.align 4
1194Ldec_compact_done:
9c200f54
AP
1195 xor $s0,$s0,$t0
1196 xor $s1,$s1,$t1
1197 xor $s2,$s2,$t2
1198 xor $s3,$s3,$t3
1199 blr
67150340
AP
1200 .long 0
1201 .byte 0,12,0x14,0,0,0,0,0
1202
9c200f54
AP
1203.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1204.align 7
1205___
1206
1207$code =~ s/\`([^\`]*)\`/eval $1/gem;
1208print $code;
1209close STDOUT;