]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/aes/asm/aes-ppc.pl
AIX build updates.
[thirdparty/openssl.git] / crypto / aes / asm / aes-ppc.pl
CommitLineData
9c200f54
AP
1#!/usr/bin/env perl
2
3# ====================================================================
4# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
5# project. The module is, however, dual licensed under OpenSSL and
6# CRYPTOGAMS licenses depending on where you obtain it. For further
7# details see http://www.openssl.org/~appro/cryptogams/.
8# ====================================================================
9
10# Needs more work: key setup, page boundaries, CBC routine...
11#
12# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
13# 128-bit key, which is ~40% better than 64-bit code generated by gcc
14# 4.0. But these are not the ones currently used! Their "compact"
96b0f6c1
AP
15# counterparts are, for security reason. ppc_AES_encrypt_compact runs
16# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
17# at 1/3 of ppc_AES_decrypt.
9c200f54 18
addd641f 19$flavour = shift;
9c200f54 20
addd641f 21if ($flavour =~ /64/) {
9c200f54
AP
22 $SIZE_T =8;
23 $STU ="stdu";
24 $POP ="ld";
25 $PUSH ="std";
addd641f 26} elsif ($flavour =~ /32/) {
9c200f54
AP
27 $SIZE_T =4;
28 $STU ="stwu";
29 $POP ="lwz";
30 $PUSH ="stw";
addd641f 31} else { die "nonsense $flavour"; }
9c200f54
AP
32
33$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
34( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
35( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
36die "can't locate ppc-xlate.pl";
37
addd641f 38open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
9c200f54
AP
39
40$FRAME=32*$SIZE_T;
41
42sub _data_word()
43{ my $i;
44 while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
45}
46
47$sp="r1";
48$toc="r2";
49$inp="r3";
50$out="r4";
51$key="r5";
52
53$Tbl0="r3";
54$Tbl1="r6";
55$Tbl2="r7";
56$Tbl3="r2";
57
58$s0="r8";
59$s1="r9";
60$s2="r10";
61$s3="r11";
62
63$t0="r12";
64$t1="r13";
65$t2="r14";
66$t3="r15";
67
68$acc00="r16";
69$acc01="r17";
70$acc02="r18";
71$acc03="r19";
72
73$acc04="r20";
74$acc05="r21";
75$acc06="r22";
76$acc07="r23";
77
78$acc08="r24";
79$acc09="r25";
80$acc10="r26";
81$acc11="r27";
82
83$acc12="r28";
84$acc13="r29";
85$acc14="r30";
86$acc15="r31";
87
88# stay away from TLS pointer
86d8f3ee
AP
89if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
90else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
9c200f54
AP
91$mask80=$Tbl2;
92$mask1b=$Tbl3;
93
94$code.=<<___;
492279f6 95.machine "any"
9c200f54
AP
96.text
97
98.align 7
99LAES_Te:
100 mflr r0
101 bcl 20,31,\$+4
102 mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
103 addi $Tbl0,$Tbl0,`128-8`
104 mtlr r0
105 blr
106 .space `32-24`
107LAES_Td:
108 mflr r0
109 bcl 20,31,\$+4
110 mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
111 addi $Tbl0,$Tbl0,`128-8-32+2048+256`
112 mtlr r0
113 blr
114 .space `128-32-24`
115___
116&_data_word(
117 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
118 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
119 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
120 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
121 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
122 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
123 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
124 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
125 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
126 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
127 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
128 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
129 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
130 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
131 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
132 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
133 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
134 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
135 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
136 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
137 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
138 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
139 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
140 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
141 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
142 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
143 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
144 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
145 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
146 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
147 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
148 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
149 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
150 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
151 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
152 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
153 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
154 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
155 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
156 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
157 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
158 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
159 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
160 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
161 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
162 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
163 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
164 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
165 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
166 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
167 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
168 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
169 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
170 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
171 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
172 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
173 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
174 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
175 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
176 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
177 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
178 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
179 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
180 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
181$code.=<<___;
182.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
183.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
184.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
185.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
186.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
187.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
188.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
189.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
190.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
191.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
192.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
193.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
194.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
195.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
196.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
197.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
198.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
199.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
200.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
201.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
202.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
203.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
204.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
205.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
206.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
207.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
208.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
209.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
210.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
211.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
212.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
213.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
214___
215&_data_word(
216 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
217 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
218 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
219 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
220 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
221 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
222 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
223 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
224 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
225 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
226 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
227 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
228 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
229 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
230 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
231 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
232 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
233 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
234 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
235 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
236 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
237 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
238 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
239 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
240 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
241 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
242 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
243 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
244 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
245 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
246 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
247 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
248 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
249 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
250 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
251 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
252 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
253 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
254 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
255 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
256 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
257 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
258 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
259 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
260 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
261 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
262 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
263 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
264 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
265 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
266 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
267 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
268 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
269 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
270 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
271 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
272 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
273 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
274 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
275 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
276 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
277 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
278 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
279 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
280$code.=<<___;
281.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
282.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
283.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
284.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
285.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
286.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
287.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
288.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
289.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
290.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
291.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
292.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
293.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
294.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
295.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
296.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
297.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
298.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
299.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
300.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
301.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
302.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
303.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
304.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
305.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
306.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
307.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
308.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
309.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
310.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
311.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
312.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
313
314
315.globl .AES_encrypt
316.align 7
317.AES_encrypt:
318 mflr r0
319 $STU $sp,-$FRAME($sp)
320
321 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
322 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
323 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
324 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
325 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
326 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
327 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
328 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
329 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
330 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
331 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
332 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
333 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
334 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
335 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
336 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
337 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
338 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
339 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
340 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
341 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
342
343 lwz $s0,0($inp)
344 lwz $s1,4($inp)
345 lwz $s2,8($inp)
346 lwz $s3,12($inp)
347 bl LAES_Te
348 bl Lppc_AES_encrypt_compact
349 stw $s0,0($out)
350 stw $s1,4($out)
351 stw $s2,8($out)
352 stw $s3,12($out)
353
354 $POP r0,`$FRAME-$SIZE_T*21`($sp)
355 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
356 $POP r13,`$FRAME-$SIZE_T*19`($sp)
357 $POP r14,`$FRAME-$SIZE_T*18`($sp)
358 $POP r15,`$FRAME-$SIZE_T*17`($sp)
359 $POP r16,`$FRAME-$SIZE_T*16`($sp)
360 $POP r17,`$FRAME-$SIZE_T*15`($sp)
361 $POP r18,`$FRAME-$SIZE_T*14`($sp)
362 $POP r19,`$FRAME-$SIZE_T*13`($sp)
363 $POP r20,`$FRAME-$SIZE_T*12`($sp)
364 $POP r21,`$FRAME-$SIZE_T*11`($sp)
365 $POP r22,`$FRAME-$SIZE_T*10`($sp)
366 $POP r23,`$FRAME-$SIZE_T*9`($sp)
367 $POP r24,`$FRAME-$SIZE_T*8`($sp)
368 $POP r25,`$FRAME-$SIZE_T*7`($sp)
369 $POP r26,`$FRAME-$SIZE_T*6`($sp)
370 $POP r27,`$FRAME-$SIZE_T*5`($sp)
371 $POP r28,`$FRAME-$SIZE_T*4`($sp)
372 $POP r29,`$FRAME-$SIZE_T*3`($sp)
373 $POP r30,`$FRAME-$SIZE_T*2`($sp)
374 $POP r31,`$FRAME-$SIZE_T*1`($sp)
375 mtlr r0
376 addi $sp,$sp,$FRAME
377 blr
378
379.align 4
380Lppc_AES_encrypt:
381 lwz $acc00,240($key)
382 lwz $t0,0($key)
383 lwz $t1,4($key)
384 lwz $t2,8($key)
385 lwz $t3,12($key)
386 addi $Tbl1,$Tbl0,3
387 addi $Tbl2,$Tbl0,2
388 addi $Tbl3,$Tbl0,1
389 addi $acc00,$acc00,-1
390 addi $key,$key,16
391 xor $s0,$s0,$t0
392 xor $s1,$s1,$t1
393 xor $s2,$s2,$t2
394 xor $s3,$s3,$t3
395 mtctr $acc00
396.align 4
397Lenc_loop:
398 rlwinm $acc00,$s0,`32-24+3`,21,28
399 rlwinm $acc01,$s1,`32-24+3`,21,28
9c200f54
AP
400 lwz $t0,0($key)
401 lwz $t1,4($key)
96d13fe6
AP
402 rlwinm $acc02,$s2,`32-24+3`,21,28
403 rlwinm $acc03,$s3,`32-24+3`,21,28
9c200f54
AP
404 lwz $t2,8($key)
405 lwz $t3,12($key)
406 rlwinm $acc04,$s1,`32-16+3`,21,28
407 rlwinm $acc05,$s2,`32-16+3`,21,28
9c200f54
AP
408 lwzx $acc00,$Tbl0,$acc00
409 lwzx $acc01,$Tbl0,$acc01
96d13fe6
AP
410 rlwinm $acc06,$s3,`32-16+3`,21,28
411 rlwinm $acc07,$s0,`32-16+3`,21,28
9c200f54
AP
412 lwzx $acc02,$Tbl0,$acc02
413 lwzx $acc03,$Tbl0,$acc03
414 rlwinm $acc08,$s2,`32-8+3`,21,28
415 rlwinm $acc09,$s3,`32-8+3`,21,28
9c200f54
AP
416 lwzx $acc04,$Tbl1,$acc04
417 lwzx $acc05,$Tbl1,$acc05
96d13fe6
AP
418 rlwinm $acc10,$s0,`32-8+3`,21,28
419 rlwinm $acc11,$s1,`32-8+3`,21,28
9c200f54
AP
420 lwzx $acc06,$Tbl1,$acc06
421 lwzx $acc07,$Tbl1,$acc07
422 rlwinm $acc12,$s3,`0+3`,21,28
423 rlwinm $acc13,$s0,`0+3`,21,28
9c200f54
AP
424 lwzx $acc08,$Tbl2,$acc08
425 lwzx $acc09,$Tbl2,$acc09
96d13fe6
AP
426 rlwinm $acc14,$s1,`0+3`,21,28
427 rlwinm $acc15,$s2,`0+3`,21,28
9c200f54
AP
428 lwzx $acc10,$Tbl2,$acc10
429 lwzx $acc11,$Tbl2,$acc11
430 xor $t0,$t0,$acc00
431 xor $t1,$t1,$acc01
9c200f54
AP
432 lwzx $acc12,$Tbl3,$acc12
433 lwzx $acc13,$Tbl3,$acc13
96d13fe6
AP
434 xor $t2,$t2,$acc02
435 xor $t3,$t3,$acc03
9c200f54
AP
436 lwzx $acc14,$Tbl3,$acc14
437 lwzx $acc15,$Tbl3,$acc15
438 xor $t0,$t0,$acc04
439 xor $t1,$t1,$acc05
440 xor $t2,$t2,$acc06
441 xor $t3,$t3,$acc07
442 xor $t0,$t0,$acc08
443 xor $t1,$t1,$acc09
444 xor $t2,$t2,$acc10
445 xor $t3,$t3,$acc11
446 xor $s0,$t0,$acc12
447 xor $s1,$t1,$acc13
448 xor $s2,$t2,$acc14
449 xor $s3,$t3,$acc15
450 addi $key,$key,16
451 bdnz- Lenc_loop
452
453 addi $Tbl2,$Tbl0,2048
454 nop
455 lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
456 lwz $acc09,`2048+32`($Tbl0)
457 lwz $acc10,`2048+64`($Tbl0)
458 lwz $acc11,`2048+96`($Tbl0)
459 lwz $acc08,`2048+128`($Tbl0)
460 lwz $acc09,`2048+160`($Tbl0)
461 lwz $acc10,`2048+192`($Tbl0)
462 lwz $acc11,`2048+224`($Tbl0)
463 rlwinm $acc00,$s0,`32-24`,24,31
464 rlwinm $acc01,$s1,`32-24`,24,31
9c200f54
AP
465 lwz $t0,0($key)
466 lwz $t1,4($key)
96d13fe6
AP
467 rlwinm $acc02,$s2,`32-24`,24,31
468 rlwinm $acc03,$s3,`32-24`,24,31
9c200f54
AP
469 lwz $t2,8($key)
470 lwz $t3,12($key)
471 rlwinm $acc04,$s1,`32-16`,24,31
472 rlwinm $acc05,$s2,`32-16`,24,31
9c200f54
AP
473 lbzx $acc00,$Tbl2,$acc00
474 lbzx $acc01,$Tbl2,$acc01
96d13fe6
AP
475 rlwinm $acc06,$s3,`32-16`,24,31
476 rlwinm $acc07,$s0,`32-16`,24,31
9c200f54
AP
477 lbzx $acc02,$Tbl2,$acc02
478 lbzx $acc03,$Tbl2,$acc03
479 rlwinm $acc08,$s2,`32-8`,24,31
480 rlwinm $acc09,$s3,`32-8`,24,31
9c200f54
AP
481 lbzx $acc04,$Tbl2,$acc04
482 lbzx $acc05,$Tbl2,$acc05
96d13fe6
AP
483 rlwinm $acc10,$s0,`32-8`,24,31
484 rlwinm $acc11,$s1,`32-8`,24,31
9c200f54
AP
485 lbzx $acc06,$Tbl2,$acc06
486 lbzx $acc07,$Tbl2,$acc07
487 rlwinm $acc12,$s3,`0`,24,31
488 rlwinm $acc13,$s0,`0`,24,31
9c200f54
AP
489 lbzx $acc08,$Tbl2,$acc08
490 lbzx $acc09,$Tbl2,$acc09
96d13fe6
AP
491 rlwinm $acc14,$s1,`0`,24,31
492 rlwinm $acc15,$s2,`0`,24,31
9c200f54
AP
493 lbzx $acc10,$Tbl2,$acc10
494 lbzx $acc11,$Tbl2,$acc11
495 rlwinm $s0,$acc00,24,0,7
496 rlwinm $s1,$acc01,24,0,7
9c200f54
AP
497 lbzx $acc12,$Tbl2,$acc12
498 lbzx $acc13,$Tbl2,$acc13
96d13fe6
AP
499 rlwinm $s2,$acc02,24,0,7
500 rlwinm $s3,$acc03,24,0,7
9c200f54
AP
501 lbzx $acc14,$Tbl2,$acc14
502 lbzx $acc15,$Tbl2,$acc15
503 rlwimi $s0,$acc04,16,8,15
504 rlwimi $s1,$acc05,16,8,15
505 rlwimi $s2,$acc06,16,8,15
506 rlwimi $s3,$acc07,16,8,15
507 rlwimi $s0,$acc08,8,16,23
508 rlwimi $s1,$acc09,8,16,23
509 rlwimi $s2,$acc10,8,16,23
510 rlwimi $s3,$acc11,8,16,23
511 or $s0,$s0,$acc12
512 or $s1,$s1,$acc13
513 or $s2,$s2,$acc14
514 or $s3,$s3,$acc15
515 xor $s0,$s0,$t0
516 xor $s1,$s1,$t1
517 xor $s2,$s2,$t2
518 xor $s3,$s3,$t3
519 blr
520
521.align 4
522Lppc_AES_encrypt_compact:
523 lwz $acc00,240($key)
524 lwz $t0,0($key)
525 lwz $t1,4($key)
526 lwz $t2,8($key)
527 lwz $t3,12($key)
528 addi $Tbl1,$Tbl0,2048
529 lis $mask80,0x8080
530 lis $mask1b,0x1b1b
9c200f54
AP
531 addi $key,$key,16
532 ori $mask80,$mask80,0x8080
533 ori $mask1b,$mask1b,0x1b1b
52ee3d01
AP
534 mtctr $acc00
535.align 4
536Lenc_compact_loop:
9c200f54
AP
537 xor $s0,$s0,$t0
538 xor $s1,$s1,$t1
539 xor $s2,$s2,$t2
540 xor $s3,$s3,$t3
9c200f54
AP
541 rlwinm $acc00,$s0,`32-24`,24,31
542 rlwinm $acc01,$s1,`32-24`,24,31
543 rlwinm $acc02,$s2,`32-24`,24,31
544 rlwinm $acc03,$s3,`32-24`,24,31
9c200f54
AP
545 lbzx $acc00,$Tbl1,$acc00
546 lbzx $acc01,$Tbl1,$acc01
96d13fe6
AP
547 rlwinm $acc04,$s1,`32-16`,24,31
548 rlwinm $acc05,$s2,`32-16`,24,31
9c200f54
AP
549 lbzx $acc02,$Tbl1,$acc02
550 lbzx $acc03,$Tbl1,$acc03
96d13fe6
AP
551 rlwinm $acc06,$s3,`32-16`,24,31
552 rlwinm $acc07,$s0,`32-16`,24,31
9c200f54
AP
553 lbzx $acc04,$Tbl1,$acc04
554 lbzx $acc05,$Tbl1,$acc05
96d13fe6
AP
555 rlwinm $acc08,$s2,`32-8`,24,31
556 rlwinm $acc09,$s3,`32-8`,24,31
9c200f54
AP
557 lbzx $acc06,$Tbl1,$acc06
558 lbzx $acc07,$Tbl1,$acc07
96d13fe6
AP
559 rlwinm $acc10,$s0,`32-8`,24,31
560 rlwinm $acc11,$s1,`32-8`,24,31
9c200f54
AP
561 lbzx $acc08,$Tbl1,$acc08
562 lbzx $acc09,$Tbl1,$acc09
96d13fe6
AP
563 rlwinm $acc12,$s3,`0`,24,31
564 rlwinm $acc13,$s0,`0`,24,31
9c200f54
AP
565 lbzx $acc10,$Tbl1,$acc10
566 lbzx $acc11,$Tbl1,$acc11
96d13fe6
AP
567 rlwinm $acc14,$s1,`0`,24,31
568 rlwinm $acc15,$s2,`0`,24,31
9c200f54
AP
569 lbzx $acc12,$Tbl1,$acc12
570 lbzx $acc13,$Tbl1,$acc13
96d13fe6
AP
571 rlwinm $s0,$acc00,24,0,7
572 rlwinm $s1,$acc01,24,0,7
9c200f54
AP
573 lbzx $acc14,$Tbl1,$acc14
574 lbzx $acc15,$Tbl1,$acc15
96d13fe6
AP
575 rlwinm $s2,$acc02,24,0,7
576 rlwinm $s3,$acc03,24,0,7
9c200f54
AP
577 rlwimi $s0,$acc04,16,8,15
578 rlwimi $s1,$acc05,16,8,15
579 rlwimi $s2,$acc06,16,8,15
580 rlwimi $s3,$acc07,16,8,15
581 rlwimi $s0,$acc08,8,16,23
582 rlwimi $s1,$acc09,8,16,23
583 rlwimi $s2,$acc10,8,16,23
584 rlwimi $s3,$acc11,8,16,23
96d13fe6
AP
585 lwz $t0,0($key)
586 lwz $t1,4($key)
9c200f54
AP
587 or $s0,$s0,$acc12
588 or $s1,$s1,$acc13
96d13fe6
AP
589 lwz $t2,8($key)
590 lwz $t3,12($key)
9c200f54
AP
591 or $s2,$s2,$acc14
592 or $s3,$s3,$acc15
593
52ee3d01
AP
594 addi $key,$key,16
595 bdz Lenc_compact_done
596
9c200f54
AP
597 and $acc00,$s0,$mask80 # r1=r0&0x80808080
598 and $acc01,$s1,$mask80
599 and $acc02,$s2,$mask80
600 and $acc03,$s3,$mask80
601 srwi $acc04,$acc00,7 # r1>>7
602 srwi $acc05,$acc01,7
603 srwi $acc06,$acc02,7
604 srwi $acc07,$acc03,7
605 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
606 andc $acc09,$s1,$mask80
607 andc $acc10,$s2,$mask80
608 andc $acc11,$s3,$mask80
609 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
610 sub $acc01,$acc01,$acc05
611 sub $acc02,$acc02,$acc06
612 sub $acc03,$acc03,$acc07
613 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
614 add $acc09,$acc09,$acc09
615 add $acc10,$acc10,$acc10
616 add $acc11,$acc11,$acc11
617 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
618 and $acc01,$acc01,$mask1b
619 and $acc02,$acc02,$mask1b
620 and $acc03,$acc03,$mask1b
621 xor $acc00,$acc00,$acc08 # r2
622 xor $acc01,$acc01,$acc09
623 xor $acc02,$acc02,$acc10
624 xor $acc03,$acc03,$acc11
625
626 rotlwi $acc12,$s0,16 # ROTATE(r0,16)
627 rotlwi $acc13,$s1,16
628 rotlwi $acc14,$s2,16
629 rotlwi $acc15,$s3,16
630 xor $s0,$s0,$acc00 # r0^r2
631 xor $s1,$s1,$acc01
632 xor $s2,$s2,$acc02
633 xor $s3,$s3,$acc03
634 rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
635 rotrwi $s1,$s1,24
636 rotrwi $s2,$s2,24
637 rotrwi $s3,$s3,24
638 xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
639 xor $s1,$s1,$acc01
640 xor $s2,$s2,$acc02
641 xor $s3,$s3,$acc03
642 rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
643 rotlwi $acc09,$acc13,8
644 rotlwi $acc10,$acc14,8
645 rotlwi $acc11,$acc15,8
646 xor $s0,$s0,$acc12 #
647 xor $s1,$s1,$acc13
648 xor $s2,$s2,$acc14
649 xor $s3,$s3,$acc15
650 xor $s0,$s0,$acc08 #
651 xor $s1,$s1,$acc09
652 xor $s2,$s2,$acc10
653 xor $s3,$s3,$acc11
654
52ee3d01
AP
655 b Lenc_compact_loop
656.align 4
657Lenc_compact_done:
9c200f54
AP
658 xor $s0,$s0,$t0
659 xor $s1,$s1,$t1
660 xor $s2,$s2,$t2
661 xor $s3,$s3,$t3
662 blr
663
664.globl .AES_decrypt
665.align 7
666.AES_decrypt:
667 mflr r0
668 $STU $sp,-$FRAME($sp)
669
670 $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
671 $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
672 $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
673 $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
674 $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
675 $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
676 $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
677 $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
678 $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
679 $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
680 $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
681 $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
682 $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
683 $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
684 $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
685 $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
686 $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
687 $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
688 $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
689 $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
690 $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
691
692 lwz $s0,0($inp)
693 lwz $s1,4($inp)
694 lwz $s2,8($inp)
695 lwz $s3,12($inp)
696 bl LAES_Td
697 bl Lppc_AES_decrypt_compact
698 stw $s0,0($out)
699 stw $s1,4($out)
700 stw $s2,8($out)
701 stw $s3,12($out)
702
703 $POP r0,`$FRAME-$SIZE_T*21`($sp)
704 $POP $toc,`$FRAME-$SIZE_T*20`($sp)
705 $POP r13,`$FRAME-$SIZE_T*19`($sp)
706 $POP r14,`$FRAME-$SIZE_T*18`($sp)
707 $POP r15,`$FRAME-$SIZE_T*17`($sp)
708 $POP r16,`$FRAME-$SIZE_T*16`($sp)
709 $POP r17,`$FRAME-$SIZE_T*15`($sp)
710 $POP r18,`$FRAME-$SIZE_T*14`($sp)
711 $POP r19,`$FRAME-$SIZE_T*13`($sp)
712 $POP r20,`$FRAME-$SIZE_T*12`($sp)
713 $POP r21,`$FRAME-$SIZE_T*11`($sp)
714 $POP r22,`$FRAME-$SIZE_T*10`($sp)
715 $POP r23,`$FRAME-$SIZE_T*9`($sp)
716 $POP r24,`$FRAME-$SIZE_T*8`($sp)
717 $POP r25,`$FRAME-$SIZE_T*7`($sp)
718 $POP r26,`$FRAME-$SIZE_T*6`($sp)
719 $POP r27,`$FRAME-$SIZE_T*5`($sp)
720 $POP r28,`$FRAME-$SIZE_T*4`($sp)
721 $POP r29,`$FRAME-$SIZE_T*3`($sp)
722 $POP r30,`$FRAME-$SIZE_T*2`($sp)
723 $POP r31,`$FRAME-$SIZE_T*1`($sp)
724 mtlr r0
725 addi $sp,$sp,$FRAME
726 blr
727
728.align 4
729Lppc_AES_decrypt:
730 lwz $acc00,240($key)
731 lwz $t0,0($key)
732 lwz $t1,4($key)
733 lwz $t2,8($key)
734 lwz $t3,12($key)
735 addi $Tbl1,$Tbl0,3
736 addi $Tbl2,$Tbl0,2
737 addi $Tbl3,$Tbl0,1
738 addi $acc00,$acc00,-1
739 addi $key,$key,16
740 xor $s0,$s0,$t0
741 xor $s1,$s1,$t1
742 xor $s2,$s2,$t2
743 xor $s3,$s3,$t3
744 mtctr $acc00
745.align 4
746Ldec_loop:
747 rlwinm $acc00,$s0,`32-24+3`,21,28
748 rlwinm $acc01,$s1,`32-24+3`,21,28
9c200f54
AP
749 lwz $t0,0($key)
750 lwz $t1,4($key)
96d13fe6
AP
751 rlwinm $acc02,$s2,`32-24+3`,21,28
752 rlwinm $acc03,$s3,`32-24+3`,21,28
9c200f54
AP
753 lwz $t2,8($key)
754 lwz $t3,12($key)
755 rlwinm $acc04,$s3,`32-16+3`,21,28
756 rlwinm $acc05,$s0,`32-16+3`,21,28
9c200f54
AP
757 lwzx $acc00,$Tbl0,$acc00
758 lwzx $acc01,$Tbl0,$acc01
96d13fe6
AP
759 rlwinm $acc06,$s1,`32-16+3`,21,28
760 rlwinm $acc07,$s2,`32-16+3`,21,28
9c200f54
AP
761 lwzx $acc02,$Tbl0,$acc02
762 lwzx $acc03,$Tbl0,$acc03
763 rlwinm $acc08,$s2,`32-8+3`,21,28
764 rlwinm $acc09,$s3,`32-8+3`,21,28
9c200f54
AP
765 lwzx $acc04,$Tbl1,$acc04
766 lwzx $acc05,$Tbl1,$acc05
96d13fe6
AP
767 rlwinm $acc10,$s0,`32-8+3`,21,28
768 rlwinm $acc11,$s1,`32-8+3`,21,28
9c200f54
AP
769 lwzx $acc06,$Tbl1,$acc06
770 lwzx $acc07,$Tbl1,$acc07
771 rlwinm $acc12,$s1,`0+3`,21,28
772 rlwinm $acc13,$s2,`0+3`,21,28
9c200f54
AP
773 lwzx $acc08,$Tbl2,$acc08
774 lwzx $acc09,$Tbl2,$acc09
96d13fe6
AP
775 rlwinm $acc14,$s3,`0+3`,21,28
776 rlwinm $acc15,$s0,`0+3`,21,28
9c200f54
AP
777 lwzx $acc10,$Tbl2,$acc10
778 lwzx $acc11,$Tbl2,$acc11
779 xor $t0,$t0,$acc00
780 xor $t1,$t1,$acc01
9c200f54
AP
781 lwzx $acc12,$Tbl3,$acc12
782 lwzx $acc13,$Tbl3,$acc13
96d13fe6
AP
783 xor $t2,$t2,$acc02
784 xor $t3,$t3,$acc03
9c200f54
AP
785 lwzx $acc14,$Tbl3,$acc14
786 lwzx $acc15,$Tbl3,$acc15
787 xor $t0,$t0,$acc04
788 xor $t1,$t1,$acc05
789 xor $t2,$t2,$acc06
790 xor $t3,$t3,$acc07
791 xor $t0,$t0,$acc08
792 xor $t1,$t1,$acc09
793 xor $t2,$t2,$acc10
794 xor $t3,$t3,$acc11
795 xor $s0,$t0,$acc12
796 xor $s1,$t1,$acc13
797 xor $s2,$t2,$acc14
798 xor $s3,$t3,$acc15
799 addi $key,$key,16
800 bdnz- Ldec_loop
801
802 addi $Tbl2,$Tbl0,2048
803 nop
804 lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
805 lwz $acc09,`2048+32`($Tbl0)
806 lwz $acc10,`2048+64`($Tbl0)
807 lwz $acc11,`2048+96`($Tbl0)
808 lwz $acc08,`2048+128`($Tbl0)
809 lwz $acc09,`2048+160`($Tbl0)
810 lwz $acc10,`2048+192`($Tbl0)
811 lwz $acc11,`2048+224`($Tbl0)
812 rlwinm $acc00,$s0,`32-24`,24,31
813 rlwinm $acc01,$s1,`32-24`,24,31
9c200f54
AP
814 lwz $t0,0($key)
815 lwz $t1,4($key)
96d13fe6
AP
816 rlwinm $acc02,$s2,`32-24`,24,31
817 rlwinm $acc03,$s3,`32-24`,24,31
9c200f54
AP
818 lwz $t2,8($key)
819 lwz $t3,12($key)
820 rlwinm $acc04,$s3,`32-16`,24,31
821 rlwinm $acc05,$s0,`32-16`,24,31
9c200f54
AP
822 lbzx $acc00,$Tbl2,$acc00
823 lbzx $acc01,$Tbl2,$acc01
96d13fe6
AP
824 rlwinm $acc06,$s1,`32-16`,24,31
825 rlwinm $acc07,$s2,`32-16`,24,31
9c200f54
AP
826 lbzx $acc02,$Tbl2,$acc02
827 lbzx $acc03,$Tbl2,$acc03
828 rlwinm $acc08,$s2,`32-8`,24,31
829 rlwinm $acc09,$s3,`32-8`,24,31
9c200f54
AP
830 lbzx $acc04,$Tbl2,$acc04
831 lbzx $acc05,$Tbl2,$acc05
96d13fe6
AP
832 rlwinm $acc10,$s0,`32-8`,24,31
833 rlwinm $acc11,$s1,`32-8`,24,31
9c200f54
AP
834 lbzx $acc06,$Tbl2,$acc06
835 lbzx $acc07,$Tbl2,$acc07
836 rlwinm $acc12,$s1,`0`,24,31
837 rlwinm $acc13,$s2,`0`,24,31
9c200f54
AP
838 lbzx $acc08,$Tbl2,$acc08
839 lbzx $acc09,$Tbl2,$acc09
96d13fe6
AP
840 rlwinm $acc14,$s3,`0`,24,31
841 rlwinm $acc15,$s0,`0`,24,31
9c200f54
AP
842 lbzx $acc10,$Tbl2,$acc10
843 lbzx $acc11,$Tbl2,$acc11
844 rlwinm $s0,$acc00,24,0,7
845 rlwinm $s1,$acc01,24,0,7
9c200f54
AP
846 lbzx $acc12,$Tbl2,$acc12
847 lbzx $acc13,$Tbl2,$acc13
96d13fe6
AP
848 rlwinm $s2,$acc02,24,0,7
849 rlwinm $s3,$acc03,24,0,7
9c200f54
AP
850 lbzx $acc14,$Tbl2,$acc14
851 lbzx $acc15,$Tbl2,$acc15
852 rlwimi $s0,$acc04,16,8,15
853 rlwimi $s1,$acc05,16,8,15
854 rlwimi $s2,$acc06,16,8,15
855 rlwimi $s3,$acc07,16,8,15
856 rlwimi $s0,$acc08,8,16,23
857 rlwimi $s1,$acc09,8,16,23
858 rlwimi $s2,$acc10,8,16,23
859 rlwimi $s3,$acc11,8,16,23
860 or $s0,$s0,$acc12
861 or $s1,$s1,$acc13
862 or $s2,$s2,$acc14
863 or $s3,$s3,$acc15
864 xor $s0,$s0,$t0
865 xor $s1,$s1,$t1
866 xor $s2,$s2,$t2
867 xor $s3,$s3,$t3
868 blr
869
870.align 4
871Lppc_AES_decrypt_compact:
872 lwz $acc00,240($key)
873 lwz $t0,0($key)
874 lwz $t1,4($key)
875 lwz $t2,8($key)
876 lwz $t3,12($key)
877 addi $Tbl1,$Tbl0,2048
878 lis $mask80,0x8080
879 lis $mask1b,0x1b1b
9c200f54
AP
880 addi $key,$key,16
881 ori $mask80,$mask80,0x8080
882 ori $mask1b,$mask1b,0x1b1b
d7e91561
AP
883___
884$code.=<<___ if ($SIZE_T==8);
885 insrdi $mask80,$mask80,32,0
886 insrdi $mask1b,$mask1b,32,0
887___
888$code.=<<___;
52ee3d01
AP
889 mtctr $acc00
890.align 4
891Ldec_compact_loop:
9c200f54
AP
892 xor $s0,$s0,$t0
893 xor $s1,$s1,$t1
894 xor $s2,$s2,$t2
895 xor $s3,$s3,$t3
9c200f54
AP
896 rlwinm $acc00,$s0,`32-24`,24,31
897 rlwinm $acc01,$s1,`32-24`,24,31
898 rlwinm $acc02,$s2,`32-24`,24,31
899 rlwinm $acc03,$s3,`32-24`,24,31
9c200f54
AP
900 lbzx $acc00,$Tbl1,$acc00
901 lbzx $acc01,$Tbl1,$acc01
96d13fe6
AP
902 rlwinm $acc04,$s3,`32-16`,24,31
903 rlwinm $acc05,$s0,`32-16`,24,31
9c200f54
AP
904 lbzx $acc02,$Tbl1,$acc02
905 lbzx $acc03,$Tbl1,$acc03
96d13fe6
AP
906 rlwinm $acc06,$s1,`32-16`,24,31
907 rlwinm $acc07,$s2,`32-16`,24,31
9c200f54
AP
908 lbzx $acc04,$Tbl1,$acc04
909 lbzx $acc05,$Tbl1,$acc05
96d13fe6
AP
910 rlwinm $acc08,$s2,`32-8`,24,31
911 rlwinm $acc09,$s3,`32-8`,24,31
9c200f54
AP
912 lbzx $acc06,$Tbl1,$acc06
913 lbzx $acc07,$Tbl1,$acc07
96d13fe6
AP
914 rlwinm $acc10,$s0,`32-8`,24,31
915 rlwinm $acc11,$s1,`32-8`,24,31
9c200f54
AP
916 lbzx $acc08,$Tbl1,$acc08
917 lbzx $acc09,$Tbl1,$acc09
96d13fe6
AP
918 rlwinm $acc12,$s1,`0`,24,31
919 rlwinm $acc13,$s2,`0`,24,31
9c200f54
AP
920 lbzx $acc10,$Tbl1,$acc10
921 lbzx $acc11,$Tbl1,$acc11
96d13fe6
AP
922 rlwinm $acc14,$s3,`0`,24,31
923 rlwinm $acc15,$s0,`0`,24,31
9c200f54
AP
924 lbzx $acc12,$Tbl1,$acc12
925 lbzx $acc13,$Tbl1,$acc13
96d13fe6
AP
926 rlwinm $s0,$acc00,24,0,7
927 rlwinm $s1,$acc01,24,0,7
9c200f54
AP
928 lbzx $acc14,$Tbl1,$acc14
929 lbzx $acc15,$Tbl1,$acc15
96d13fe6
AP
930 rlwinm $s2,$acc02,24,0,7
931 rlwinm $s3,$acc03,24,0,7
9c200f54
AP
932 rlwimi $s0,$acc04,16,8,15
933 rlwimi $s1,$acc05,16,8,15
934 rlwimi $s2,$acc06,16,8,15
935 rlwimi $s3,$acc07,16,8,15
936 rlwimi $s0,$acc08,8,16,23
937 rlwimi $s1,$acc09,8,16,23
938 rlwimi $s2,$acc10,8,16,23
939 rlwimi $s3,$acc11,8,16,23
96d13fe6
AP
940 lwz $t0,0($key)
941 lwz $t1,4($key)
9c200f54
AP
942 or $s0,$s0,$acc12
943 or $s1,$s1,$acc13
96d13fe6
AP
944 lwz $t2,8($key)
945 lwz $t3,12($key)
9c200f54
AP
946 or $s2,$s2,$acc14
947 or $s3,$s3,$acc15
948
52ee3d01
AP
949 addi $key,$key,16
950 bdz Ldec_compact_done
d7e91561
AP
951___
952$code.=<<___ if ($SIZE_T==8);
953 # vectorized permutation improves decrypt performance by 10%
954 insrdi $s0,$s1,32,0
955 insrdi $s2,$s3,32,0
52ee3d01 956
d7e91561
AP
957 and $acc00,$s0,$mask80 # r1=r0&0x80808080
958 and $acc02,$s2,$mask80
959 srdi $acc04,$acc00,7 # r1>>7
960 srdi $acc06,$acc02,7
961 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
962 andc $acc10,$s2,$mask80
963 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
964 sub $acc02,$acc02,$acc06
965 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
966 add $acc10,$acc10,$acc10
967 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
968 and $acc02,$acc02,$mask1b
969 xor $acc00,$acc00,$acc08 # r2
970 xor $acc02,$acc02,$acc10
971
972 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
973 and $acc06,$acc02,$mask80
974 srdi $acc08,$acc04,7 # r1>>7
975 srdi $acc10,$acc06,7
976 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
977 andc $acc14,$acc02,$mask80
978 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
979 sub $acc06,$acc06,$acc10
980 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
981 add $acc14,$acc14,$acc14
982 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
983 and $acc06,$acc06,$mask1b
984 xor $acc04,$acc04,$acc12 # r4
985 xor $acc06,$acc06,$acc14
986
987 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
988 and $acc10,$acc06,$mask80
989 srdi $acc12,$acc08,7 # r1>>7
990 srdi $acc14,$acc10,7
991 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
992 sub $acc10,$acc10,$acc14
993 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
994 andc $acc14,$acc06,$mask80
995 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
996 add $acc14,$acc14,$acc14
997 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
998 and $acc10,$acc10,$mask1b
999 xor $acc08,$acc08,$acc12 # r8
1000 xor $acc10,$acc10,$acc14
1001
1002 xor $acc00,$acc00,$s0 # r2^r0
1003 xor $acc02,$acc02,$s2
1004 xor $acc04,$acc04,$s0 # r4^r0
1005 xor $acc06,$acc06,$s2
1006
96d13fe6
AP
1007 extrdi $acc01,$acc00,32,0
1008 extrdi $acc03,$acc02,32,0
1009 extrdi $acc05,$acc04,32,0
1010 extrdi $acc07,$acc06,32,0
1011 extrdi $acc09,$acc08,32,0
1012 extrdi $acc11,$acc10,32,0
d7e91561
AP
1013___
1014$code.=<<___ if ($SIZE_T==4);
9c200f54
AP
1015 and $acc00,$s0,$mask80 # r1=r0&0x80808080
1016 and $acc01,$s1,$mask80
1017 and $acc02,$s2,$mask80
1018 and $acc03,$s3,$mask80
1019 srwi $acc04,$acc00,7 # r1>>7
1020 srwi $acc05,$acc01,7
1021 srwi $acc06,$acc02,7
1022 srwi $acc07,$acc03,7
1023 andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
1024 andc $acc09,$s1,$mask80
1025 andc $acc10,$s2,$mask80
1026 andc $acc11,$s3,$mask80
1027 sub $acc00,$acc00,$acc04 # r1-(r1>>7)
1028 sub $acc01,$acc01,$acc05
1029 sub $acc02,$acc02,$acc06
1030 sub $acc03,$acc03,$acc07
1031 add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
1032 add $acc09,$acc09,$acc09
1033 add $acc10,$acc10,$acc10
1034 add $acc11,$acc11,$acc11
1035 and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1036 and $acc01,$acc01,$mask1b
1037 and $acc02,$acc02,$mask1b
1038 and $acc03,$acc03,$mask1b
1039 xor $acc00,$acc00,$acc08 # r2
1040 xor $acc01,$acc01,$acc09
1041 xor $acc02,$acc02,$acc10
1042 xor $acc03,$acc03,$acc11
1043
1044 and $acc04,$acc00,$mask80 # r1=r2&0x80808080
1045 and $acc05,$acc01,$mask80
1046 and $acc06,$acc02,$mask80
1047 and $acc07,$acc03,$mask80
1048 srwi $acc08,$acc04,7 # r1>>7
1049 srwi $acc09,$acc05,7
1050 srwi $acc10,$acc06,7
1051 srwi $acc11,$acc07,7
1052 andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
1053 andc $acc13,$acc01,$mask80
1054 andc $acc14,$acc02,$mask80
1055 andc $acc15,$acc03,$mask80
1056 sub $acc04,$acc04,$acc08 # r1-(r1>>7)
1057 sub $acc05,$acc05,$acc09
1058 sub $acc06,$acc06,$acc10
1059 sub $acc07,$acc07,$acc11
1060 add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
1061 add $acc13,$acc13,$acc13
1062 add $acc14,$acc14,$acc14
1063 add $acc15,$acc15,$acc15
1064 and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1065 and $acc05,$acc05,$mask1b
1066 and $acc06,$acc06,$mask1b
1067 and $acc07,$acc07,$mask1b
1068 xor $acc04,$acc04,$acc12 # r4
1069 xor $acc05,$acc05,$acc13
1070 xor $acc06,$acc06,$acc14
1071 xor $acc07,$acc07,$acc15
1072
1073 and $acc08,$acc04,$mask80 # r1=r4&0x80808080
1074 and $acc09,$acc05,$mask80
1075 and $acc10,$acc06,$mask80
1076 and $acc11,$acc07,$mask80
1077 srwi $acc12,$acc08,7 # r1>>7
1078 srwi $acc13,$acc09,7
1079 srwi $acc14,$acc10,7
1080 srwi $acc15,$acc11,7
1081 sub $acc08,$acc08,$acc12 # r1-(r1>>7)
1082 sub $acc09,$acc09,$acc13
1083 sub $acc10,$acc10,$acc14
1084 sub $acc11,$acc11,$acc15
1085 andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
1086 andc $acc13,$acc05,$mask80
1087 andc $acc14,$acc06,$mask80
1088 andc $acc15,$acc07,$mask80
1089 add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
1090 add $acc13,$acc13,$acc13
1091 add $acc14,$acc14,$acc14
1092 add $acc15,$acc15,$acc15
1093 and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
1094 and $acc09,$acc09,$mask1b
1095 and $acc10,$acc10,$mask1b
1096 and $acc11,$acc11,$mask1b
1097 xor $acc08,$acc08,$acc12 # r8
1098 xor $acc09,$acc09,$acc13
1099 xor $acc10,$acc10,$acc14
1100 xor $acc11,$acc11,$acc15
1101
1102 xor $acc00,$acc00,$s0 # r2^r0
1103 xor $acc01,$acc01,$s1
1104 xor $acc02,$acc02,$s2
1105 xor $acc03,$acc03,$s3
1106 xor $acc04,$acc04,$s0 # r4^r0
1107 xor $acc05,$acc05,$s1
1108 xor $acc06,$acc06,$s2
1109 xor $acc07,$acc07,$s3
d7e91561
AP
1110___
1111$code.=<<___;
9c200f54
AP
1112 rotrwi $s0,$s0,8 # = ROTATE(r0,8)
1113 rotrwi $s1,$s1,8
1114 rotrwi $s2,$s2,8
1115 rotrwi $s3,$s3,8
1116 xor $s0,$s0,$acc00 # ^= r2^r0
1117 xor $s1,$s1,$acc01
1118 xor $s2,$s2,$acc02
1119 xor $s3,$s3,$acc03
1120 xor $acc00,$acc00,$acc08
1121 xor $acc01,$acc01,$acc09
1122 xor $acc02,$acc02,$acc10
1123 xor $acc03,$acc03,$acc11
1124 xor $s0,$s0,$acc04 # ^= r4^r0
1125 xor $s1,$s1,$acc05
1126 xor $s2,$s2,$acc06
1127 xor $s3,$s3,$acc07
1128 rotrwi $acc00,$acc00,24
1129 rotrwi $acc01,$acc01,24
1130 rotrwi $acc02,$acc02,24
1131 rotrwi $acc03,$acc03,24
1132 xor $acc04,$acc04,$acc08
1133 xor $acc05,$acc05,$acc09
1134 xor $acc06,$acc06,$acc10
1135 xor $acc07,$acc07,$acc11
1136 xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
1137 xor $s1,$s1,$acc09
1138 xor $s2,$s2,$acc10
1139 xor $s3,$s3,$acc11
1140 rotrwi $acc04,$acc04,16
1141 rotrwi $acc05,$acc05,16
1142 rotrwi $acc06,$acc06,16
1143 rotrwi $acc07,$acc07,16
1144 xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
1145 xor $s1,$s1,$acc01
1146 xor $s2,$s2,$acc02
1147 xor $s3,$s3,$acc03
1148 rotrwi $acc08,$acc08,8
1149 rotrwi $acc09,$acc09,8
1150 rotrwi $acc10,$acc10,8
1151 rotrwi $acc11,$acc11,8
1152 xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
1153 xor $s1,$s1,$acc05
1154 xor $s2,$s2,$acc06
1155 xor $s3,$s3,$acc07
1156 xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
1157 xor $s1,$s1,$acc09
1158 xor $s2,$s2,$acc10
1159 xor $s3,$s3,$acc11
1160
52ee3d01
AP
1161 b Ldec_compact_loop
1162.align 4
1163Ldec_compact_done:
9c200f54
AP
1164 xor $s0,$s0,$t0
1165 xor $s1,$s1,$t1
1166 xor $s2,$s2,$t2
1167 xor $s3,$s3,$t3
1168 blr
1169.long 0
1170.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
1171.align 7
1172___
1173
1174$code =~ s/\`([^\`]*)\`/eval $1/gem;
1175print $code;
1176close STDOUT;