]>
Commit | Line | Data |
---|---|---|
a2a54ffc AP |
1 | #!/usr/bin/env perl |
2 | ||
3 | # ==================================================================== | |
4 | # Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | |
5 | # project. The module is, however, dual licensed under OpenSSL and | |
6 | # CRYPTOGAMS licenses depending on where you obtain it. For further | |
7 | # details see http://www.openssl.org/~appro/cryptogams/. | |
8 | # ==================================================================== | |
9 | ||
10 | # AES for s390x. | |
11 | ||
12 | # April 2007. | |
13 | # | |
14 | # Software performance improvement over gcc-generated code is ~70% and | |
15 | # in absolute terms is ~73 cycles per byte processed with 128-bit key. | |
16 | # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are | |
17 | # *strictly* in-order execution and issued instruction [in this case | |
18 | # load value from memory is critical] has to complete before execution | |
19 | # flow proceeds. S-boxes are compressed to 2KB. | |
20 | # | |
21 | # As for hardware acceleration support. It's basically a "teaser," as | |
22 | # it can and should be improved in several ways. Most notably support | |
23 | # for CBC is not utilized, nor multiple blocks are ever processed. | |
24 | # Then software key schedule can be postponed till hardware support | |
25 | # detection... Performance improvement over assembler is reportedly | |
251718e4 | 26 | # ~2.5x, but can reach >8x [naturally on larger chunks] if proper |
a2a54ffc AP |
27 | # support is implemented. |
28 | ||
29 | $t1="%r0"; | |
30 | $t2="%r1"; | |
31 | $t3="%r2"; $inp="%r2"; | |
32 | $out="%r3"; $mask="%r3"; | |
33 | $key="%r4"; | |
34 | $i1="%r5"; | |
35 | $i2="%r6"; | |
36 | $i3="%r7"; | |
37 | $s0="%r8"; | |
38 | $s1="%r9"; | |
39 | $s2="%r10"; | |
40 | $s3="%r11"; | |
41 | $tbl="%r12"; | |
42 | $rounds="%r13"; | |
43 | $ra="%r14"; | |
44 | $sp="%r15"; | |
45 | ||
46 | sub _data_word() | |
47 | { my $i; | |
48 | while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; } | |
49 | } | |
50 | ||
51 | $code=<<___; | |
52 | .text | |
53 | ||
54 | .type AES_Te,\@object | |
55 | .align 64 | |
56 | AES_Te: | |
57 | ___ | |
58 | &_data_word( | |
59 | 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, | |
60 | 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554, | |
61 | 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, | |
62 | 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, | |
63 | 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87, | |
64 | 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, | |
65 | 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, | |
66 | 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b, | |
67 | 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, | |
68 | 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, | |
69 | 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108, | |
70 | 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, | |
71 | 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, | |
72 | 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5, | |
73 | 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, | |
74 | 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, | |
75 | 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e, | |
76 | 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, | |
77 | 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, | |
78 | 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497, | |
79 | 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, | |
80 | 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, | |
81 | 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b, | |
82 | 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, | |
83 | 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, | |
84 | 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594, | |
85 | 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, | |
86 | 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, | |
87 | 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a, | |
88 | 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, | |
89 | 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, | |
90 | 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d, | |
91 | 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, | |
92 | 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, | |
93 | 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47, | |
94 | 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, | |
95 | 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, | |
96 | 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883, | |
97 | 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, | |
98 | 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, | |
99 | 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e, | |
100 | 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, | |
101 | 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, | |
102 | 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b, | |
103 | 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, | |
104 | 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, | |
105 | 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25, | |
106 | 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, | |
107 | 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, | |
108 | 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651, | |
109 | 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, | |
110 | 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, | |
111 | 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa, | |
112 | 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, | |
113 | 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, | |
114 | 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9, | |
115 | 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, | |
116 | 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, | |
117 | 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920, | |
118 | 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, | |
119 | 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, | |
120 | 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8, | |
121 | 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, | |
122 | 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a); | |
123 | $code.=<<___; | |
124 | .size AES_Te,.-AES_Te | |
125 | ||
126 | # void AES_encrypt(const unsigned char *in, unsigned char *out, | |
127 | # const AES_KEY *key) { | |
128 | .globl AES_encrypt | |
129 | .type AES_encrypt,\@function | |
130 | AES_encrypt: | |
131 | lghi %r0,10 | |
132 | c %r0,240($key) | |
133 | jne .Lesoft | |
134 | lghi %r0,0 # query capability vector | |
135 | la %r1,16($sp) | |
136 | .long 0xb92e0042 # km %r4,%r2 | |
137 | lg %r0,16($sp) | |
138 | tmhl %r0,`0x8000>>2` | |
139 | jz .Lesoft | |
140 | lghi %r0,`0x00|0x12` # encrypt AES-128 | |
141 | la %r1,0($key) | |
3f6916cf | 142 | #la %r2,0($inp) |
a2a54ffc AP |
143 | la %r4,0($out) |
144 | lghi %r3,16 # single block length | |
145 | .long 0xb92e0042 # km %r4,%r2 | |
a4470ae7 | 146 | bcr 8,%r14 |
3f6916cf AP |
147 | la $out,0(%r4) # restore arguments |
148 | la $key,0(%r1) | |
a2a54ffc AP |
149 | .Lesoft: |
150 | stmg %r3,%r15,24($sp) | |
151 | ||
152 | bras $tbl,.Lepic | |
153 | .Lepic: aghi $tbl,AES_Te-.Lepic | |
154 | ||
155 | llgf $s0,0($inp) | |
156 | llgf $s1,4($inp) | |
157 | llgf $s2,8($inp) | |
158 | llgf $s3,12($inp) | |
159 | ||
160 | llill $mask,`0xff<<3` | |
161 | bras $ra,_s390x_AES_encrypt | |
162 | ||
163 | lg $out,24($sp) | |
164 | st $s0,0($out) | |
165 | st $s1,4($out) | |
166 | st $s2,8($out) | |
167 | st $s3,12($out) | |
168 | ||
169 | lmg %r6,%r15,48($sp) | |
170 | br %r14 | |
171 | .size AES_encrypt,.-AES_encrypt | |
172 | ||
173 | .type _s390x_AES_encrypt,\@function | |
174 | .align 16 | |
175 | _s390x_AES_encrypt: | |
176 | x $s0,0($key) | |
177 | x $s1,4($key) | |
178 | x $s2,8($key) | |
179 | x $s3,12($key) | |
180 | l $rounds,240($key) | |
181 | aghi $rounds,-1 | |
182 | ||
183 | .Lenc_loop: | |
184 | sllg $i1,$s0,`0+3` | |
185 | srlg $i2,$s0,`8-3` | |
186 | srlg $i3,$s0,`16-3` | |
187 | srl $s0,`24-3` | |
188 | nr $s0,$mask | |
189 | ngr $i1,$mask | |
190 | nr $i2,$mask | |
191 | nr $i3,$mask | |
192 | l $s0,0($s0,$tbl) # Te0[s0>>24] | |
193 | l $t1,1($i1,$tbl) # Te3[s0>>0] | |
194 | l $t2,2($i2,$tbl) # Te2[s0>>8] | |
195 | l $t3,3($i3,$tbl) # Te1[s0>>16] | |
196 | ||
197 | srlg $i1,$s1,`16-3` # i0 | |
198 | sllg $i2,$s1,`0+3` | |
199 | srlg $i3,$s1,`8-3` | |
200 | srl $s1,`24-3` | |
201 | nr $i1,$mask | |
202 | nr $s1,$mask | |
203 | ngr $i2,$mask | |
204 | nr $i3,$mask | |
205 | x $s0,3($i1,$tbl) # Te1[s1>>16] | |
206 | l $s1,0($s1,$tbl) # Te0[s1>>24] | |
207 | x $t2,1($i2,$tbl) # Te3[s1>>0] | |
208 | x $t3,2($i3,$tbl) # Te2[s1>>8] | |
209 | xr $s1,$t1 | |
210 | ||
211 | srlg $i1,$s2,`8-3` # i0 | |
212 | srlg $i2,$s2,`16-3` # i1 | |
213 | sllg $i3,$s2,`0+3` | |
214 | srl $s2,`24-3` | |
215 | nr $i1,$mask | |
216 | nr $i2,$mask | |
217 | nr $s2,$mask | |
218 | ngr $i3,$mask | |
219 | x $s0,2($i1,$tbl) # Te2[s2>>8] | |
220 | x $s1,3($i2,$tbl) # Te1[s2>>16] | |
221 | l $s2,0($s2,$tbl) # Te0[s2>>24] | |
222 | x $t3,1($i3,$tbl) # Te3[s2>>0] | |
223 | xr $s2,$t2 | |
224 | ||
225 | sllg $i1,$s3,`0+3` # i0 | |
226 | srlg $i2,$s3,`8-3` # i1 | |
227 | srlg $i3,$s3,`16-3` # i2 | |
228 | srl $s3,`24-3` | |
229 | ngr $i1,$mask | |
230 | nr $i2,$mask | |
231 | nr $i3,$mask | |
232 | nr $s3,$mask | |
233 | x $s0,1($i1,$tbl) # Te3[s3>>0] | |
234 | x $s1,2($i2,$tbl) # Te2[s3>>8] | |
235 | x $s2,3($i3,$tbl) # Te1[s3>>16] | |
236 | l $s3,0($s3,$tbl) # Te0[s3>>24] | |
237 | xr $s3,$t3 | |
238 | ||
239 | la $key,16($key) | |
240 | x $s0,0($key) | |
241 | x $s1,4($key) | |
242 | x $s2,8($key) | |
243 | x $s3,12($key) | |
244 | ||
245 | brct $rounds,.Lenc_loop | |
246 | ||
247 | sllg $i1,$s0,`0+3` | |
248 | srlg $i2,$s0,`8-3` | |
249 | srlg $i3,$s0,`16-3` | |
250 | srl $s0,`24-3` | |
251 | nr $s0,$mask | |
252 | ngr $i1,$mask | |
253 | nr $i2,$mask | |
254 | nr $i3,$mask | |
255 | llgc $s0,2($s0,$tbl) # Te4[s0>>24] | |
256 | llgc $t1,2($i1,$tbl) # Te4[s0>>0] | |
257 | llgc $t2,2($i2,$tbl) # Te4[s0>>8] | |
258 | llgc $t3,2($i3,$tbl) # Te4[s0>>16] | |
259 | sll $s0,24 | |
260 | sll $t2,8 | |
261 | sll $t3,16 | |
262 | ||
263 | srlg $i1,$s1,`16-3` # i0 | |
264 | sllg $i2,$s1,`0+3` | |
265 | srlg $i3,$s1,`8-3` | |
266 | srl $s1,`24-3` | |
267 | nr $i1,$mask | |
268 | nr $s1,$mask | |
269 | ngr $i2,$mask | |
270 | nr $i3,$mask | |
271 | llgc $i1,2($i1,$tbl) # Te4[s1>>16] | |
272 | llgc $s1,2($s1,$tbl) # Te4[s1>>24] | |
273 | llgc $i2,2($i2,$tbl) # Te4[s1>>0] | |
274 | llgc $i3,2($i3,$tbl) # Te4[s1>>8] | |
275 | sll $i1,16 | |
276 | sll $s1,24 | |
277 | sll $i3,8 | |
278 | or $s0,$i1 | |
279 | or $s1,$t1 | |
280 | or $t2,$i2 | |
281 | or $t3,$i3 | |
282 | ||
283 | srlg $i1,$s2,`8-3` # i0 | |
284 | srlg $i2,$s2,`16-3` # i1 | |
285 | sllg $i3,$s2,`0+3` | |
286 | srl $s2,`24-3` | |
287 | nr $i1,$mask | |
288 | nr $i2,$mask | |
289 | nr $s2,$mask | |
290 | ngr $i3,$mask | |
291 | llgc $i1,2($i1,$tbl) # Te4[s2>>8] | |
292 | llgc $i2,2($i2,$tbl) # Te4[s2>>16] | |
293 | llgc $s2,2($s2,$tbl) # Te4[s2>>24] | |
294 | llgc $i3,2($i3,$tbl) # Te4[s2>>0] | |
295 | sll $i1,8 | |
296 | sll $i2,16 | |
297 | sll $s2,24 | |
298 | or $s0,$i1 | |
299 | or $s1,$i2 | |
300 | or $s2,$t2 | |
301 | or $t3,$i3 | |
302 | ||
303 | sllg $i1,$s3,`0+3` # i0 | |
304 | srlg $i2,$s3,`8-3` # i1 | |
305 | srlg $i3,$s3,`16-3` # i2 | |
306 | srl $s3,`24-3` | |
307 | ngr $i1,$mask | |
308 | nr $i2,$mask | |
309 | nr $i3,$mask | |
310 | nr $s3,$mask | |
311 | llgc $i1,2($i1,$tbl) # Te4[s3>>0] | |
312 | llgc $i2,2($i2,$tbl) # Te4[s3>>8] | |
313 | llgc $i3,2($i3,$tbl) # Te4[s3>>16] | |
314 | llgc $s3,2($s3,$tbl) # Te4[s3>>24] | |
315 | sll $i2,8 | |
316 | sll $i3,16 | |
317 | sll $s3,24 | |
318 | or $s0,$i1 | |
319 | or $s1,$i2 | |
320 | or $s2,$i3 | |
321 | or $s3,$t3 | |
322 | ||
323 | x $s0,16($key) | |
324 | x $s1,20($key) | |
325 | x $s2,24($key) | |
326 | x $s3,28($key) | |
327 | ||
328 | br $ra | |
329 | .size _s390x_AES_encrypt,.-_s390x_AES_encrypt | |
330 | ___ | |
331 | ||
332 | $code.=<<___; | |
333 | .type AES_Td,\@object | |
334 | .align 64 | |
335 | AES_Td: | |
336 | ___ | |
337 | &_data_word( | |
338 | 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, | |
339 | 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393, | |
340 | 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, | |
341 | 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, | |
342 | 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1, | |
343 | 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, | |
344 | 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, | |
345 | 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844, | |
346 | 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, | |
347 | 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, | |
348 | 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45, | |
349 | 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, | |
350 | 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, | |
351 | 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a, | |
352 | 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, | |
353 | 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, | |
354 | 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1, | |
355 | 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, | |
356 | 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, | |
357 | 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051, | |
358 | 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, | |
359 | 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, | |
360 | 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77, | |
361 | 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, | |
362 | 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, | |
363 | 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e, | |
364 | 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, | |
365 | 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, | |
366 | 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e, | |
367 | 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, | |
368 | 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, | |
369 | 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8, | |
370 | 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, | |
371 | 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, | |
372 | 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163, | |
373 | 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, | |
374 | 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, | |
375 | 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0, | |
376 | 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, | |
377 | 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, | |
378 | 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36, | |
379 | 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, | |
380 | 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, | |
381 | 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5, | |
382 | 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, | |
383 | 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, | |
384 | 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8, | |
385 | 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, | |
386 | 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, | |
387 | 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0, | |
388 | 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, | |
389 | 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, | |
390 | 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df, | |
391 | 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, | |
392 | 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, | |
393 | 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713, | |
394 | 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, | |
395 | 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, | |
396 | 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf, | |
397 | 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, | |
398 | 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, | |
399 | 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541, | |
400 | 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, | |
401 | 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742); | |
402 | $code.=<<___; | |
403 | .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38 | |
404 | .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb | |
405 | .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87 | |
406 | .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb | |
407 | .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d | |
408 | .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e | |
409 | .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2 | |
410 | .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 | |
411 | .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16 | |
412 | .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 | |
413 | .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda | |
414 | .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 | |
415 | .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a | |
416 | .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 | |
417 | .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02 | |
418 | .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b | |
419 | .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea | |
420 | .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 | |
421 | .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85 | |
422 | .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e | |
423 | .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89 | |
424 | .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b | |
425 | .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20 | |
426 | .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 | |
427 | .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31 | |
428 | .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f | |
429 | .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d | |
430 | .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef | |
431 | .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0 | |
432 | .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 | |
433 | .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26 | |
434 | .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d | |
435 | .size AES_Td,.-AES_Td | |
436 | ||
437 | # void AES_decrypt(const unsigned char *in, unsigned char *out, | |
438 | # const AES_KEY *key) { | |
439 | .globl AES_decrypt | |
440 | .type AES_decrypt,\@function | |
441 | AES_decrypt: | |
442 | lghi %r0,10 | |
443 | c %r0,240($key) | |
444 | jne .Ldsoft | |
445 | lghi %r0,0 # query capability vector | |
446 | la %r1,16($sp) | |
447 | .long 0xb92e0042 # km %r4,%r2 | |
448 | lg %r0,16($sp) | |
449 | tmhl %r0,`0x8000>>2` | |
450 | jz .Ldsoft | |
451 | lghi %r0,`0x80|0x12` # decrypt AES-128 | |
452 | la %r1,160($key) | |
3f6916cf | 453 | #la %r2,0($inp) |
a2a54ffc AP |
454 | la %r4,0($out) |
455 | lghi %r3,16 # single block length | |
456 | .long 0xb92e0042 # km %r4,%r2 | |
a4470ae7 | 457 | bcr 8,%r14 |
3f6916cf AP |
458 | la $out,0(%r4) # restore arguments |
459 | lghi $key,-160 | |
460 | la $key,0($key,%r1) | |
a2a54ffc AP |
461 | .Ldsoft: |
462 | stmg %r3,%r15,24($sp) | |
463 | ||
464 | bras $tbl,.Ldpic | |
465 | .Ldpic: aghi $tbl,AES_Td-.Ldpic | |
466 | ||
467 | llgf $s0,0($inp) | |
468 | llgf $s1,4($inp) | |
469 | llgf $s2,8($inp) | |
470 | llgf $s3,12($inp) | |
471 | ||
472 | llill $mask,`0xff<<3` | |
473 | bras $ra,_s390x_AES_decrypt | |
474 | ||
475 | lg $out,24($sp) | |
476 | st $s0,0($out) | |
477 | st $s1,4($out) | |
478 | st $s2,8($out) | |
479 | st $s3,12($out) | |
480 | ||
481 | lmg %r6,%r15,48($sp) | |
482 | br %r14 | |
483 | .size AES_decrypt,.-AES_decrypt | |
484 | ||
485 | .type _s390x_AES_decrypt,\@function | |
486 | .align 16 | |
487 | _s390x_AES_decrypt: | |
488 | x $s0,0($key) | |
489 | x $s1,4($key) | |
490 | x $s2,8($key) | |
491 | x $s3,12($key) | |
492 | l $rounds,240($key) | |
493 | aghi $rounds,-1 | |
494 | ||
495 | .Ldec_loop: | |
496 | srlg $i1,$s0,`16-3` | |
497 | srlg $i2,$s0,`8-3` | |
498 | sllg $i3,$s0,`0+3` | |
499 | srl $s0,`24-3` | |
500 | nr $s0,$mask | |
501 | nr $i1,$mask | |
502 | nr $i2,$mask | |
503 | ngr $i3,$mask | |
504 | l $s0,0($s0,$tbl) # Td0[s0>>24] | |
505 | l $t1,3($i1,$tbl) # Td1[s0>>16] | |
506 | l $t2,2($i2,$tbl) # Td2[s0>>8] | |
507 | l $t3,1($i3,$tbl) # Td3[s0>>0] | |
508 | ||
509 | sllg $i1,$s1,`0+3` # i0 | |
510 | srlg $i2,$s1,`16-3` | |
511 | srlg $i3,$s1,`8-3` | |
512 | srl $s1,`24-3` | |
513 | ngr $i1,$mask | |
514 | nr $s1,$mask | |
515 | nr $i2,$mask | |
516 | nr $i3,$mask | |
517 | x $s0,1($i1,$tbl) # Td3[s1>>0] | |
518 | l $s1,0($s1,$tbl) # Td0[s1>>24] | |
519 | x $t2,3($i2,$tbl) # Td1[s1>>16] | |
520 | x $t3,2($i3,$tbl) # Td2[s1>>8] | |
521 | xr $s1,$t1 | |
522 | ||
523 | srlg $i1,$s2,`8-3` # i0 | |
524 | sllg $i2,$s2,`0+3` # i1 | |
525 | srlg $i3,$s2,`16-3` | |
526 | srl $s2,`24-3` | |
527 | nr $i1,$mask | |
528 | ngr $i2,$mask | |
529 | nr $s2,$mask | |
530 | nr $i3,$mask | |
531 | x $s0,2($i1,$tbl) # Td2[s2>>8] | |
532 | x $s1,1($i2,$tbl) # Td3[s2>>0] | |
533 | l $s2,0($s2,$tbl) # Td0[s2>>24] | |
534 | x $t3,3($i3,$tbl) # Td1[s2>>16] | |
535 | xr $s2,$t2 | |
536 | ||
537 | srlg $i1,$s3,`16-3` # i0 | |
538 | srlg $i2,$s3,`8-3` # i1 | |
539 | sllg $i3,$s3,`0+3` # i2 | |
540 | srl $s3,`24-3` | |
541 | nr $i1,$mask | |
542 | nr $i2,$mask | |
543 | ngr $i3,$mask | |
544 | nr $s3,$mask | |
545 | x $s0,3($i1,$tbl) # Td1[s3>>16] | |
546 | x $s1,2($i2,$tbl) # Td2[s3>>8] | |
547 | x $s2,1($i3,$tbl) # Td3[s3>>0] | |
548 | l $s3,0($s3,$tbl) # Td0[s3>>24] | |
549 | xr $s3,$t3 | |
550 | ||
551 | la $key,16($key) | |
552 | x $s0,0($key) | |
553 | x $s1,4($key) | |
554 | x $s2,8($key) | |
555 | x $s3,12($key) | |
556 | ||
557 | brct $rounds,.Ldec_loop | |
558 | ||
559 | l $t1,`2048+0`($tbl) # prefetch Td4 | |
560 | l $t2,`2048+32`($tbl) | |
561 | l $t3,`2048+64`($tbl) | |
562 | l $i1,`2048+96`($tbl) | |
563 | l $i2,`2048+128`($tbl) | |
564 | l $i3,`2048+160`($tbl) | |
565 | l $t1,`2048+192`($tbl) | |
566 | l $t2,`2048+224`($tbl) | |
567 | llill $mask,0xff | |
568 | ||
569 | srlg $i3,$s0,24 # i0 | |
570 | srlg $i1,$s0,16 | |
571 | srlg $i2,$s0,8 | |
572 | nr $s0,$mask # i3 | |
573 | nr $i1,$mask | |
574 | nr $i2,$mask | |
575 | llgc $i3,2048($i3,$tbl) # Td4[s0>>24] | |
576 | llgc $t1,2048($i1,$tbl) # Td4[s0>>16] | |
577 | llgc $t2,2048($i2,$tbl) # Td4[s0>>8] | |
578 | llgc $t3,2048($s0,$tbl) # Td4[s0>>0] | |
579 | sllg $s0,$i3,24 | |
580 | sll $t1,16 | |
581 | sll $t2,8 | |
582 | ||
583 | srlg $i1,$s1,24 | |
584 | srlg $i2,$s1,16 | |
585 | srlg $i3,$s1,8 | |
586 | nr $s1,$mask # i0 | |
587 | nr $i2,$mask | |
588 | nr $i3,$mask | |
589 | llgc $s1,2048($s1,$tbl) # Td4[s1>>0] | |
590 | llgc $i1,2048($i1,$tbl) # Td4[s1>>24] | |
591 | llgc $i2,2048($i2,$tbl) # Td4[s1>>16] | |
592 | llgc $i3,2048($i3,$tbl) # Td4[s1>>8] | |
593 | sll $i1,24 | |
594 | sll $i2,16 | |
595 | sll $i3,8 | |
596 | or $s0,$s1 | |
597 | or $t1,$i1 | |
598 | or $t2,$i2 | |
599 | or $t3,$i3 | |
600 | ||
601 | srlg $i1,$s2,8 # i0 | |
602 | srlg $i2,$s2,24 | |
603 | srlg $i3,$s2,16 | |
604 | nr $s2,$mask # i1 | |
605 | nr $i1,$mask | |
606 | nr $i3,$mask | |
607 | llgc $i1,2048($i1,$tbl) # Td4[s2>>8] | |
608 | llgc $s1,2048($s2,$tbl) # Td4[s2>>0] | |
609 | llgc $i2,2048($i2,$tbl) # Td4[s2>>24] | |
610 | llgc $i3,2048($i3,$tbl) # Td4[s2>>16] | |
611 | sll $i1,8 | |
612 | sll $i2,24 | |
613 | sll $i3,16 | |
614 | or $s0,$i1 | |
615 | or $s1,$t1 | |
616 | or $t2,$i2 | |
617 | or $t3,$i3 | |
618 | ||
619 | srlg $i1,$s3,16 # i0 | |
620 | srlg $i2,$s3,8 # i1 | |
621 | srlg $i3,$s3,24 | |
622 | nr $s3,$mask # i2 | |
623 | nr $i1,$mask | |
624 | nr $i2,$mask | |
625 | llgc $i1,2048($i1,$tbl) # Td4[s3>>16] | |
626 | llgc $i2,2048($i2,$tbl) # Td4[s3>>8] | |
627 | llgc $s2,2048($s3,$tbl) # Td4[s3>>0] | |
628 | llgc $s3,2048($i3,$tbl) # Td4[s3>>24] | |
629 | sll $i1,16 | |
630 | sll $i2,8 | |
631 | sll $s3,24 | |
632 | or $s0,$i1 | |
633 | or $s1,$i2 | |
634 | or $s2,$t2 | |
635 | or $s3,$t3 | |
636 | ||
637 | x $s0,16($key) | |
638 | x $s1,20($key) | |
639 | x $s2,24($key) | |
640 | x $s3,28($key) | |
641 | ||
642 | br $ra | |
643 | .size _s390x_AES_decrypt,.-_s390x_AES_decrypt | |
644 | .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>" | |
645 | ___ | |
646 | ||
647 | $code =~ s/\`([^\`]*)\`/eval $1/gem; | |
648 | print $code; |