]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/aes/asm/aes-s390x.pl
1495917d261cc28d6b1b095b9fed2e331ca57b13
[thirdparty/openssl.git] / crypto / aes / asm / aes-s390x.pl
1 #! /usr/bin/env perl
2 # Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
3 #
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
8
9
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
16
17 # AES for s390x.
18
19 # April 2007.
20 #
21 # Software performance improvement over gcc-generated code is ~70% and
22 # in absolute terms is ~73 cycles per byte processed with 128-bit key.
23 # You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
24 # *strictly* in-order execution and issued instruction [in this case
25 # load value from memory is critical] has to complete before execution
26 # flow proceeds. S-boxes are compressed to 2KB[+256B].
27 #
28 # As for hardware acceleration support. It's basically a "teaser," as
29 # it can and should be improved in several ways. Most notably support
30 # for CBC is not utilized, nor multiple blocks are ever processed.
31 # Then software key schedule can be postponed till hardware support
32 # detection... Performance improvement over assembler is reportedly
33 # ~2.5x, but can reach >8x [naturally on larger chunks] if proper
34 # support is implemented.
35
36 # May 2007.
37 #
38 # Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
39 # for 128-bit keys, if hardware support is detected.
40
41 # Januray 2009.
42 #
43 # Add support for hardware AES192/256 and reschedule instructions to
44 # minimize/avoid Address Generation Interlock hazard and to favour
45 # dual-issue z10 pipeline. This gave ~25% improvement on z10 and
46 # almost 50% on z9. The gain is smaller on z10, because being dual-
47 # issue z10 makes it improssible to eliminate the interlock condition:
48 # critial path is not long enough. Yet it spends ~24 cycles per byte
49 # processed with 128-bit key.
50 #
51 # Unlike previous version hardware support detection takes place only
52 # at the moment of key schedule setup, which is denoted in key->rounds.
53 # This is done, because deferred key setup can't be made MT-safe, not
54 # for keys longer than 128 bits.
55 #
56 # Add AES_cbc_encrypt, which gives incredible performance improvement,
57 # it was measured to be ~6.6x. It's less than previously mentioned 8x,
58 # because software implementation was optimized.
59
60 # May 2010.
61 #
62 # Add AES_ctr32_encrypt. If hardware-assisted, it provides up to 4.3x
63 # performance improvement over "generic" counter mode routine relying
64 # on single-block, also hardware-assisted, AES_encrypt. "Up to" refers
65 # to the fact that exact throughput value depends on current stack
66 # frame alignment within 4KB page. In worst case you get ~75% of the
67 # maximum, but *on average* it would be as much as ~98%. Meaning that
68 # worst case is unlike, it's like hitting ravine on plateau.
69
70 # November 2010.
71 #
72 # Adapt for -m31 build. If kernel supports what's called "highgprs"
73 # feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
74 # instructions and achieve "64-bit" performance even in 31-bit legacy
75 # application context. The feature is not specific to any particular
76 # processor, as long as it's "z-CPU". Latter implies that the code
77 # remains z/Architecture specific. On z990 it was measured to perform
78 # 2x better than code generated by gcc 4.3.
79
80 # December 2010.
81 #
82 # Add support for z196 "cipher message with counter" instruction.
83 # Note however that it's disengaged, because it was measured to
84 # perform ~12% worse than vanilla km-based code...
85
86 # February 2011.
87 #
88 # Add AES_xts_[en|de]crypt. This includes support for z196 km-xts-aes
89 # instructions, which deliver ~70% improvement at 8KB block size over
90 # vanilla km-based code, 37% - at most like 512-bytes block size.
91
92 $flavour = shift;
93
94 if ($flavour =~ /3[12]/) {
95 $SIZE_T=4;
96 $g="";
97 } else {
98 $SIZE_T=8;
99 $g="g";
100 }
101
102 while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
103 open STDOUT,">$output";
104
105 $softonly=0; # allow hardware support
106
107 $t0="%r0"; $mask="%r0";
108 $t1="%r1";
109 $t2="%r2"; $inp="%r2";
110 $t3="%r3"; $out="%r3"; $bits="%r3";
111 $key="%r4";
112 $i1="%r5";
113 $i2="%r6";
114 $i3="%r7";
115 $s0="%r8";
116 $s1="%r9";
117 $s2="%r10";
118 $s3="%r11";
119 $tbl="%r12";
120 $rounds="%r13";
121 $ra="%r14";
122 $sp="%r15";
123
124 $stdframe=16*$SIZE_T+4*8;
125
126 sub _data_word()
127 { my $i;
128 while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
129 }
130
131 $code=<<___;
132 #include "s390x_arch.h"
133
134 .text
135
136 .type AES_Te,\@object
137 .align 256
138 AES_Te:
139 ___
140 &_data_word(
141 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
142 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
143 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
144 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
145 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
146 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
147 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
148 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
149 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
150 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
151 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
152 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
153 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
154 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
155 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
156 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
157 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
158 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
159 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
160 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
161 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
162 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
163 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
164 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
165 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
166 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
167 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
168 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
169 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
170 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
171 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
172 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
173 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
174 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
175 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
176 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
177 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
178 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
179 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
180 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
181 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
182 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
183 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
184 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
185 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
186 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
187 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
188 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
189 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
190 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
191 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
192 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
193 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
194 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
195 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
196 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
197 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
198 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
199 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
200 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
201 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
202 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
203 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
204 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
205 $code.=<<___;
206 # Te4[256]
207 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
208 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
209 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
210 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
211 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
212 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
213 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
214 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
215 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
216 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
217 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
218 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
219 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
220 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
221 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
222 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
223 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
224 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
225 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
226 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
227 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
228 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
229 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
230 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
231 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
232 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
233 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
234 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
235 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
236 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
237 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
238 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
239 # rcon[]
240 .long 0x01000000, 0x02000000, 0x04000000, 0x08000000
241 .long 0x10000000, 0x20000000, 0x40000000, 0x80000000
242 .long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
243 .align 256
244 .size AES_Te,.-AES_Te
245
246 # void AES_encrypt(const unsigned char *inp, unsigned char *out,
247 # const AES_KEY *key) {
248 .globl AES_encrypt
249 .type AES_encrypt,\@function
250 AES_encrypt:
251 ___
252 $code.=<<___ if (!$softonly);
253 l %r0,240($key)
254 lhi %r1,16
255 clr %r0,%r1
256 jl .Lesoft
257
258 la %r1,0($key)
259 #la %r2,0($inp)
260 la %r4,0($out)
261 lghi %r3,16 # single block length
262 .long 0xb92e0042 # km %r4,%r2
263 brc 1,.-4 # can this happen?
264 br %r14
265 .align 64
266 .Lesoft:
267 ___
268 $code.=<<___;
269 stm${g} %r3,$ra,3*$SIZE_T($sp)
270
271 llgf $s0,0($inp)
272 llgf $s1,4($inp)
273 llgf $s2,8($inp)
274 llgf $s3,12($inp)
275
276 larl $tbl,AES_Te
277 bras $ra,_s390x_AES_encrypt
278
279 l${g} $out,3*$SIZE_T($sp)
280 st $s0,0($out)
281 st $s1,4($out)
282 st $s2,8($out)
283 st $s3,12($out)
284
285 lm${g} %r6,$ra,6*$SIZE_T($sp)
286 br $ra
287 .size AES_encrypt,.-AES_encrypt
288
289 .type _s390x_AES_encrypt,\@function
290 .align 16
291 _s390x_AES_encrypt:
292 st${g} $ra,15*$SIZE_T($sp)
293 x $s0,0($key)
294 x $s1,4($key)
295 x $s2,8($key)
296 x $s3,12($key)
297 l $rounds,240($key)
298 llill $mask,`0xff<<3`
299 aghi $rounds,-1
300 j .Lenc_loop
301 .align 16
302 .Lenc_loop:
303 sllg $t1,$s0,`0+3`
304 srlg $t2,$s0,`8-3`
305 srlg $t3,$s0,`16-3`
306 srl $s0,`24-3`
307 nr $s0,$mask
308 ngr $t1,$mask
309 nr $t2,$mask
310 nr $t3,$mask
311
312 srlg $i1,$s1,`16-3` # i0
313 sllg $i2,$s1,`0+3`
314 srlg $i3,$s1,`8-3`
315 srl $s1,`24-3`
316 nr $i1,$mask
317 nr $s1,$mask
318 ngr $i2,$mask
319 nr $i3,$mask
320
321 l $s0,0($s0,$tbl) # Te0[s0>>24]
322 l $t1,1($t1,$tbl) # Te3[s0>>0]
323 l $t2,2($t2,$tbl) # Te2[s0>>8]
324 l $t3,3($t3,$tbl) # Te1[s0>>16]
325
326 x $s0,3($i1,$tbl) # Te1[s1>>16]
327 l $s1,0($s1,$tbl) # Te0[s1>>24]
328 x $t2,1($i2,$tbl) # Te3[s1>>0]
329 x $t3,2($i3,$tbl) # Te2[s1>>8]
330
331 srlg $i1,$s2,`8-3` # i0
332 srlg $i2,$s2,`16-3` # i1
333 nr $i1,$mask
334 nr $i2,$mask
335 sllg $i3,$s2,`0+3`
336 srl $s2,`24-3`
337 nr $s2,$mask
338 ngr $i3,$mask
339
340 xr $s1,$t1
341 srlg $ra,$s3,`8-3` # i1
342 sllg $t1,$s3,`0+3` # i0
343 nr $ra,$mask
344 la $key,16($key)
345 ngr $t1,$mask
346
347 x $s0,2($i1,$tbl) # Te2[s2>>8]
348 x $s1,3($i2,$tbl) # Te1[s2>>16]
349 l $s2,0($s2,$tbl) # Te0[s2>>24]
350 x $t3,1($i3,$tbl) # Te3[s2>>0]
351
352 srlg $i3,$s3,`16-3` # i2
353 xr $s2,$t2
354 srl $s3,`24-3`
355 nr $i3,$mask
356 nr $s3,$mask
357
358 x $s0,0($key)
359 x $s1,4($key)
360 x $s2,8($key)
361 x $t3,12($key)
362
363 x $s0,1($t1,$tbl) # Te3[s3>>0]
364 x $s1,2($ra,$tbl) # Te2[s3>>8]
365 x $s2,3($i3,$tbl) # Te1[s3>>16]
366 l $s3,0($s3,$tbl) # Te0[s3>>24]
367 xr $s3,$t3
368
369 brct $rounds,.Lenc_loop
370 .align 16
371
372 sllg $t1,$s0,`0+3`
373 srlg $t2,$s0,`8-3`
374 ngr $t1,$mask
375 srlg $t3,$s0,`16-3`
376 srl $s0,`24-3`
377 nr $s0,$mask
378 nr $t2,$mask
379 nr $t3,$mask
380
381 srlg $i1,$s1,`16-3` # i0
382 sllg $i2,$s1,`0+3`
383 ngr $i2,$mask
384 srlg $i3,$s1,`8-3`
385 srl $s1,`24-3`
386 nr $i1,$mask
387 nr $s1,$mask
388 nr $i3,$mask
389
390 llgc $s0,2($s0,$tbl) # Te4[s0>>24]
391 llgc $t1,2($t1,$tbl) # Te4[s0>>0]
392 sll $s0,24
393 llgc $t2,2($t2,$tbl) # Te4[s0>>8]
394 llgc $t3,2($t3,$tbl) # Te4[s0>>16]
395 sll $t2,8
396 sll $t3,16
397
398 llgc $i1,2($i1,$tbl) # Te4[s1>>16]
399 llgc $s1,2($s1,$tbl) # Te4[s1>>24]
400 llgc $i2,2($i2,$tbl) # Te4[s1>>0]
401 llgc $i3,2($i3,$tbl) # Te4[s1>>8]
402 sll $i1,16
403 sll $s1,24
404 sll $i3,8
405 or $s0,$i1
406 or $s1,$t1
407 or $t2,$i2
408 or $t3,$i3
409
410 srlg $i1,$s2,`8-3` # i0
411 srlg $i2,$s2,`16-3` # i1
412 nr $i1,$mask
413 nr $i2,$mask
414 sllg $i3,$s2,`0+3`
415 srl $s2,`24-3`
416 ngr $i3,$mask
417 nr $s2,$mask
418
419 sllg $t1,$s3,`0+3` # i0
420 srlg $ra,$s3,`8-3` # i1
421 ngr $t1,$mask
422
423 llgc $i1,2($i1,$tbl) # Te4[s2>>8]
424 llgc $i2,2($i2,$tbl) # Te4[s2>>16]
425 sll $i1,8
426 llgc $s2,2($s2,$tbl) # Te4[s2>>24]
427 llgc $i3,2($i3,$tbl) # Te4[s2>>0]
428 sll $i2,16
429 nr $ra,$mask
430 sll $s2,24
431 or $s0,$i1
432 or $s1,$i2
433 or $s2,$t2
434 or $t3,$i3
435
436 srlg $i3,$s3,`16-3` # i2
437 srl $s3,`24-3`
438 nr $i3,$mask
439 nr $s3,$mask
440
441 l $t0,16($key)
442 l $t2,20($key)
443
444 llgc $i1,2($t1,$tbl) # Te4[s3>>0]
445 llgc $i2,2($ra,$tbl) # Te4[s3>>8]
446 llgc $i3,2($i3,$tbl) # Te4[s3>>16]
447 llgc $s3,2($s3,$tbl) # Te4[s3>>24]
448 sll $i2,8
449 sll $i3,16
450 sll $s3,24
451 or $s0,$i1
452 or $s1,$i2
453 or $s2,$i3
454 or $s3,$t3
455
456 l${g} $ra,15*$SIZE_T($sp)
457 xr $s0,$t0
458 xr $s1,$t2
459 x $s2,24($key)
460 x $s3,28($key)
461
462 br $ra
463 .size _s390x_AES_encrypt,.-_s390x_AES_encrypt
464 ___
465
466 $code.=<<___;
467 .type AES_Td,\@object
468 .align 256
469 AES_Td:
470 ___
471 &_data_word(
472 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
473 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
474 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
475 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
476 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
477 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
478 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
479 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
480 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
481 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
482 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
483 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
484 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
485 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
486 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
487 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
488 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
489 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
490 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
491 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
492 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
493 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
494 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
495 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
496 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
497 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
498 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
499 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
500 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
501 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
502 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
503 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
504 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
505 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
506 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
507 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
508 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
509 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
510 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
511 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
512 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
513 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
514 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
515 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
516 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
517 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
518 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
519 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
520 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
521 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
522 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
523 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
524 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
525 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
526 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
527 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
528 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
529 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
530 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
531 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
532 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
533 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
534 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
535 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
536 $code.=<<___;
537 # Td4[256]
538 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
539 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
540 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
541 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
542 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
543 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
544 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
545 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
546 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
547 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
548 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
549 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
550 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
551 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
552 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
553 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
554 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
555 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
556 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
557 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
558 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
559 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
560 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
561 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
562 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
563 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
564 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
565 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
566 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
567 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
568 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
569 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
570 .size AES_Td,.-AES_Td
571
572 # void AES_decrypt(const unsigned char *inp, unsigned char *out,
573 # const AES_KEY *key) {
574 .globl AES_decrypt
575 .type AES_decrypt,\@function
576 AES_decrypt:
577 ___
578 $code.=<<___ if (!$softonly);
579 l %r0,240($key)
580 lhi %r1,16
581 clr %r0,%r1
582 jl .Ldsoft
583
584 la %r1,0($key)
585 #la %r2,0($inp)
586 la %r4,0($out)
587 lghi %r3,16 # single block length
588 .long 0xb92e0042 # km %r4,%r2
589 brc 1,.-4 # can this happen?
590 br %r14
591 .align 64
592 .Ldsoft:
593 ___
594 $code.=<<___;
595 stm${g} %r3,$ra,3*$SIZE_T($sp)
596
597 llgf $s0,0($inp)
598 llgf $s1,4($inp)
599 llgf $s2,8($inp)
600 llgf $s3,12($inp)
601
602 larl $tbl,AES_Td
603 bras $ra,_s390x_AES_decrypt
604
605 l${g} $out,3*$SIZE_T($sp)
606 st $s0,0($out)
607 st $s1,4($out)
608 st $s2,8($out)
609 st $s3,12($out)
610
611 lm${g} %r6,$ra,6*$SIZE_T($sp)
612 br $ra
613 .size AES_decrypt,.-AES_decrypt
614
615 .type _s390x_AES_decrypt,\@function
616 .align 16
617 _s390x_AES_decrypt:
618 st${g} $ra,15*$SIZE_T($sp)
619 x $s0,0($key)
620 x $s1,4($key)
621 x $s2,8($key)
622 x $s3,12($key)
623 l $rounds,240($key)
624 llill $mask,`0xff<<3`
625 aghi $rounds,-1
626 j .Ldec_loop
627 .align 16
628 .Ldec_loop:
629 srlg $t1,$s0,`16-3`
630 srlg $t2,$s0,`8-3`
631 sllg $t3,$s0,`0+3`
632 srl $s0,`24-3`
633 nr $s0,$mask
634 nr $t1,$mask
635 nr $t2,$mask
636 ngr $t3,$mask
637
638 sllg $i1,$s1,`0+3` # i0
639 srlg $i2,$s1,`16-3`
640 srlg $i3,$s1,`8-3`
641 srl $s1,`24-3`
642 ngr $i1,$mask
643 nr $s1,$mask
644 nr $i2,$mask
645 nr $i3,$mask
646
647 l $s0,0($s0,$tbl) # Td0[s0>>24]
648 l $t1,3($t1,$tbl) # Td1[s0>>16]
649 l $t2,2($t2,$tbl) # Td2[s0>>8]
650 l $t3,1($t3,$tbl) # Td3[s0>>0]
651
652 x $s0,1($i1,$tbl) # Td3[s1>>0]
653 l $s1,0($s1,$tbl) # Td0[s1>>24]
654 x $t2,3($i2,$tbl) # Td1[s1>>16]
655 x $t3,2($i3,$tbl) # Td2[s1>>8]
656
657 srlg $i1,$s2,`8-3` # i0
658 sllg $i2,$s2,`0+3` # i1
659 srlg $i3,$s2,`16-3`
660 srl $s2,`24-3`
661 nr $i1,$mask
662 ngr $i2,$mask
663 nr $s2,$mask
664 nr $i3,$mask
665
666 xr $s1,$t1
667 srlg $ra,$s3,`8-3` # i1
668 srlg $t1,$s3,`16-3` # i0
669 nr $ra,$mask
670 la $key,16($key)
671 nr $t1,$mask
672
673 x $s0,2($i1,$tbl) # Td2[s2>>8]
674 x $s1,1($i2,$tbl) # Td3[s2>>0]
675 l $s2,0($s2,$tbl) # Td0[s2>>24]
676 x $t3,3($i3,$tbl) # Td1[s2>>16]
677
678 sllg $i3,$s3,`0+3` # i2
679 srl $s3,`24-3`
680 ngr $i3,$mask
681 nr $s3,$mask
682
683 xr $s2,$t2
684 x $s0,0($key)
685 x $s1,4($key)
686 x $s2,8($key)
687 x $t3,12($key)
688
689 x $s0,3($t1,$tbl) # Td1[s3>>16]
690 x $s1,2($ra,$tbl) # Td2[s3>>8]
691 x $s2,1($i3,$tbl) # Td3[s3>>0]
692 l $s3,0($s3,$tbl) # Td0[s3>>24]
693 xr $s3,$t3
694
695 brct $rounds,.Ldec_loop
696 .align 16
697
698 l $t1,`2048+0`($tbl) # prefetch Td4
699 l $t2,`2048+64`($tbl)
700 l $t3,`2048+128`($tbl)
701 l $i1,`2048+192`($tbl)
702 llill $mask,0xff
703
704 srlg $i3,$s0,24 # i0
705 srlg $t1,$s0,16
706 srlg $t2,$s0,8
707 nr $s0,$mask # i3
708 nr $t1,$mask
709
710 srlg $i1,$s1,24
711 nr $t2,$mask
712 srlg $i2,$s1,16
713 srlg $ra,$s1,8
714 nr $s1,$mask # i0
715 nr $i2,$mask
716 nr $ra,$mask
717
718 llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
719 llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
720 llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
721 sll $t1,16
722 llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
723 sllg $s0,$i3,24
724 sll $t2,8
725
726 llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
727 llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
728 llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
729 sll $i1,24
730 llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
731 sll $i2,16
732 sll $i3,8
733 or $s0,$s1
734 or $t1,$i1
735 or $t2,$i2
736 or $t3,$i3
737
738 srlg $i1,$s2,8 # i0
739 srlg $i2,$s2,24
740 srlg $i3,$s2,16
741 nr $s2,$mask # i1
742 nr $i1,$mask
743 nr $i3,$mask
744 llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
745 llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
746 llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
747 llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
748 sll $i1,8
749 sll $i2,24
750 or $s0,$i1
751 sll $i3,16
752 or $t2,$i2
753 or $t3,$i3
754
755 srlg $i1,$s3,16 # i0
756 srlg $i2,$s3,8 # i1
757 srlg $i3,$s3,24
758 nr $s3,$mask # i2
759 nr $i1,$mask
760 nr $i2,$mask
761
762 l${g} $ra,15*$SIZE_T($sp)
763 or $s1,$t1
764 l $t0,16($key)
765 l $t1,20($key)
766
767 llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
768 llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
769 sll $i1,16
770 llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
771 llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
772 sll $i2,8
773 sll $s3,24
774 or $s0,$i1
775 or $s1,$i2
776 or $s2,$t2
777 or $s3,$t3
778
779 xr $s0,$t0
780 xr $s1,$t1
781 x $s2,24($key)
782 x $s3,28($key)
783
784 br $ra
785 .size _s390x_AES_decrypt,.-_s390x_AES_decrypt
786 ___
787
788 $code.=<<___;
789 # void AES_set_encrypt_key(const unsigned char *in, int bits,
790 # AES_KEY *key) {
791 .globl AES_set_encrypt_key
792 .type AES_set_encrypt_key,\@function
793 .align 16
794 AES_set_encrypt_key:
795 _s390x_AES_set_encrypt_key:
796 lghi $t0,0
797 cl${g}r $inp,$t0
798 je .Lminus1
799 cl${g}r $key,$t0
800 je .Lminus1
801
802 lghi $t0,128
803 clr $bits,$t0
804 je .Lproceed
805 lghi $t0,192
806 clr $bits,$t0
807 je .Lproceed
808 lghi $t0,256
809 clr $bits,$t0
810 je .Lproceed
811 lghi %r2,-2
812 br %r14
813
814 .align 16
815 .Lproceed:
816 ___
817 $code.=<<___ if (!$softonly);
818 # convert bits to km(c) code, [128,192,256]->[18,19,20]
819 lhi %r5,-128
820 lhi %r0,18
821 ar %r5,$bits
822 srl %r5,6
823 ar %r5,%r0
824
825 larl %r1,OPENSSL_s390xcap_P
826 llihh %r0,0x8000
827 srlg %r0,%r0,0(%r5)
828 ng %r0,S390X_KM(%r1) # check availability of both km...
829 ng %r0,S390X_KMC(%r1) # ...and kmc support for given key length
830 jz .Lekey_internal
831
832 lmg %r0,%r1,0($inp) # just copy 128 bits...
833 stmg %r0,%r1,0($key)
834 lhi %r0,192
835 cr $bits,%r0
836 jl 1f
837 lg %r1,16($inp)
838 stg %r1,16($key)
839 je 1f
840 lg %r1,24($inp)
841 stg %r1,24($key)
842 1: st $bits,236($key) # save bits [for debugging purposes]
843 lgr $t0,%r5
844 st %r5,240($key) # save km(c) code
845 lghi %r2,0
846 br %r14
847 ___
848 $code.=<<___;
849 .align 16
850 .Lekey_internal:
851 stm${g} %r4,%r13,4*$SIZE_T($sp) # all non-volatile regs and $key
852
853 larl $tbl,AES_Te+2048
854
855 llgf $s0,0($inp)
856 llgf $s1,4($inp)
857 llgf $s2,8($inp)
858 llgf $s3,12($inp)
859 st $s0,0($key)
860 st $s1,4($key)
861 st $s2,8($key)
862 st $s3,12($key)
863 lghi $t0,128
864 cr $bits,$t0
865 jne .Lnot128
866
867 llill $mask,0xff
868 lghi $t3,0 # i=0
869 lghi $rounds,10
870 st $rounds,240($key)
871
872 llgfr $t2,$s3 # temp=rk[3]
873 srlg $i1,$s3,8
874 srlg $i2,$s3,16
875 srlg $i3,$s3,24
876 nr $t2,$mask
877 nr $i1,$mask
878 nr $i2,$mask
879
880 .align 16
881 .L128_loop:
882 la $t2,0($t2,$tbl)
883 la $i1,0($i1,$tbl)
884 la $i2,0($i2,$tbl)
885 la $i3,0($i3,$tbl)
886 icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
887 icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
888 icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
889 icm $t2,1,0($i3) # Te4[rk[3]>>24]
890 x $t2,256($t3,$tbl) # rcon[i]
891 xr $s0,$t2 # rk[4]=rk[0]^...
892 xr $s1,$s0 # rk[5]=rk[1]^rk[4]
893 xr $s2,$s1 # rk[6]=rk[2]^rk[5]
894 xr $s3,$s2 # rk[7]=rk[3]^rk[6]
895
896 llgfr $t2,$s3 # temp=rk[3]
897 srlg $i1,$s3,8
898 srlg $i2,$s3,16
899 nr $t2,$mask
900 nr $i1,$mask
901 srlg $i3,$s3,24
902 nr $i2,$mask
903
904 st $s0,16($key)
905 st $s1,20($key)
906 st $s2,24($key)
907 st $s3,28($key)
908 la $key,16($key) # key+=4
909 la $t3,4($t3) # i++
910 brct $rounds,.L128_loop
911 lghi $t0,10
912 lghi %r2,0
913 lm${g} %r4,%r13,4*$SIZE_T($sp)
914 br $ra
915
916 .align 16
917 .Lnot128:
918 llgf $t0,16($inp)
919 llgf $t1,20($inp)
920 st $t0,16($key)
921 st $t1,20($key)
922 lghi $t0,192
923 cr $bits,$t0
924 jne .Lnot192
925
926 llill $mask,0xff
927 lghi $t3,0 # i=0
928 lghi $rounds,12
929 st $rounds,240($key)
930 lghi $rounds,8
931
932 srlg $i1,$t1,8
933 srlg $i2,$t1,16
934 srlg $i3,$t1,24
935 nr $t1,$mask
936 nr $i1,$mask
937 nr $i2,$mask
938
939 .align 16
940 .L192_loop:
941 la $t1,0($t1,$tbl)
942 la $i1,0($i1,$tbl)
943 la $i2,0($i2,$tbl)
944 la $i3,0($i3,$tbl)
945 icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
946 icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
947 icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
948 icm $t1,1,0($i3) # Te4[rk[5]>>24]
949 x $t1,256($t3,$tbl) # rcon[i]
950 xr $s0,$t1 # rk[6]=rk[0]^...
951 xr $s1,$s0 # rk[7]=rk[1]^rk[6]
952 xr $s2,$s1 # rk[8]=rk[2]^rk[7]
953 xr $s3,$s2 # rk[9]=rk[3]^rk[8]
954
955 st $s0,24($key)
956 st $s1,28($key)
957 st $s2,32($key)
958 st $s3,36($key)
959 brct $rounds,.L192_continue
960 lghi $t0,12
961 lghi %r2,0
962 lm${g} %r4,%r13,4*$SIZE_T($sp)
963 br $ra
964
965 .align 16
966 .L192_continue:
967 lgr $t1,$s3
968 x $t1,16($key) # rk[10]=rk[4]^rk[9]
969 st $t1,40($key)
970 x $t1,20($key) # rk[11]=rk[5]^rk[10]
971 st $t1,44($key)
972
973 srlg $i1,$t1,8
974 srlg $i2,$t1,16
975 srlg $i3,$t1,24
976 nr $t1,$mask
977 nr $i1,$mask
978 nr $i2,$mask
979
980 la $key,24($key) # key+=6
981 la $t3,4($t3) # i++
982 j .L192_loop
983
984 .align 16
985 .Lnot192:
986 llgf $t0,24($inp)
987 llgf $t1,28($inp)
988 st $t0,24($key)
989 st $t1,28($key)
990 llill $mask,0xff
991 lghi $t3,0 # i=0
992 lghi $rounds,14
993 st $rounds,240($key)
994 lghi $rounds,7
995
996 srlg $i1,$t1,8
997 srlg $i2,$t1,16
998 srlg $i3,$t1,24
999 nr $t1,$mask
1000 nr $i1,$mask
1001 nr $i2,$mask
1002
1003 .align 16
1004 .L256_loop:
1005 la $t1,0($t1,$tbl)
1006 la $i1,0($i1,$tbl)
1007 la $i2,0($i2,$tbl)
1008 la $i3,0($i3,$tbl)
1009 icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
1010 icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
1011 icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
1012 icm $t1,1,0($i3) # Te4[rk[7]>>24]
1013 x $t1,256($t3,$tbl) # rcon[i]
1014 xr $s0,$t1 # rk[8]=rk[0]^...
1015 xr $s1,$s0 # rk[9]=rk[1]^rk[8]
1016 xr $s2,$s1 # rk[10]=rk[2]^rk[9]
1017 xr $s3,$s2 # rk[11]=rk[3]^rk[10]
1018 st $s0,32($key)
1019 st $s1,36($key)
1020 st $s2,40($key)
1021 st $s3,44($key)
1022 brct $rounds,.L256_continue
1023 lghi $t0,14
1024 lghi %r2,0
1025 lm${g} %r4,%r13,4*$SIZE_T($sp)
1026 br $ra
1027
1028 .align 16
1029 .L256_continue:
1030 lgr $t1,$s3 # temp=rk[11]
1031 srlg $i1,$s3,8
1032 srlg $i2,$s3,16
1033 srlg $i3,$s3,24
1034 nr $t1,$mask
1035 nr $i1,$mask
1036 nr $i2,$mask
1037 la $t1,0($t1,$tbl)
1038 la $i1,0($i1,$tbl)
1039 la $i2,0($i2,$tbl)
1040 la $i3,0($i3,$tbl)
1041 llgc $t1,0($t1) # Te4[rk[11]>>0]
1042 icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
1043 icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
1044 icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
1045 x $t1,16($key) # rk[12]=rk[4]^...
1046 st $t1,48($key)
1047 x $t1,20($key) # rk[13]=rk[5]^rk[12]
1048 st $t1,52($key)
1049 x $t1,24($key) # rk[14]=rk[6]^rk[13]
1050 st $t1,56($key)
1051 x $t1,28($key) # rk[15]=rk[7]^rk[14]
1052 st $t1,60($key)
1053
1054 srlg $i1,$t1,8
1055 srlg $i2,$t1,16
1056 srlg $i3,$t1,24
1057 nr $t1,$mask
1058 nr $i1,$mask
1059 nr $i2,$mask
1060
1061 la $key,32($key) # key+=8
1062 la $t3,4($t3) # i++
1063 j .L256_loop
1064
1065 .Lminus1:
1066 lghi %r2,-1
1067 br $ra
1068 .size AES_set_encrypt_key,.-AES_set_encrypt_key
1069
1070 # void AES_set_decrypt_key(const unsigned char *in, int bits,
1071 # AES_KEY *key) {
1072 .globl AES_set_decrypt_key
1073 .type AES_set_decrypt_key,\@function
1074 .align 16
1075 AES_set_decrypt_key:
1076 #st${g} $key,4*$SIZE_T($sp) # I rely on AES_set_encrypt_key to
1077 st${g} $ra,14*$SIZE_T($sp) # save non-volatile registers and $key!
1078 bras $ra,_s390x_AES_set_encrypt_key
1079 #l${g} $key,4*$SIZE_T($sp)
1080 l${g} $ra,14*$SIZE_T($sp)
1081 ltgr %r2,%r2
1082 bnzr $ra
1083 ___
1084 $code.=<<___ if (!$softonly);
1085 #l $t0,240($key)
1086 lhi $t1,16
1087 cr $t0,$t1
1088 jl .Lgo
1089 oill $t0,0x80 # set "decrypt" bit
1090 st $t0,240($key)
1091 br $ra
1092 ___
1093 $code.=<<___;
1094 .align 16
1095 .Lgo: lgr $rounds,$t0 #llgf $rounds,240($key)
1096 la $i1,0($key)
1097 sllg $i2,$rounds,4
1098 la $i2,0($i2,$key)
1099 srl $rounds,1
1100 lghi $t1,-16
1101
1102 .align 16
1103 .Linv: lmg $s0,$s1,0($i1)
1104 lmg $s2,$s3,0($i2)
1105 stmg $s0,$s1,0($i2)
1106 stmg $s2,$s3,0($i1)
1107 la $i1,16($i1)
1108 la $i2,0($t1,$i2)
1109 brct $rounds,.Linv
1110 ___
1111 $mask80=$i1;
1112 $mask1b=$i2;
1113 $maskfe=$i3;
1114 $code.=<<___;
1115 llgf $rounds,240($key)
1116 aghi $rounds,-1
1117 sll $rounds,2 # (rounds-1)*4
1118 llilh $mask80,0x8080
1119 llilh $mask1b,0x1b1b
1120 llilh $maskfe,0xfefe
1121 oill $mask80,0x8080
1122 oill $mask1b,0x1b1b
1123 oill $maskfe,0xfefe
1124
1125 .align 16
1126 .Lmix: l $s0,16($key) # tp1
1127 lr $s1,$s0
1128 ngr $s1,$mask80
1129 srlg $t1,$s1,7
1130 slr $s1,$t1
1131 nr $s1,$mask1b
1132 sllg $t1,$s0,1
1133 nr $t1,$maskfe
1134 xr $s1,$t1 # tp2
1135
1136 lr $s2,$s1
1137 ngr $s2,$mask80
1138 srlg $t1,$s2,7
1139 slr $s2,$t1
1140 nr $s2,$mask1b
1141 sllg $t1,$s1,1
1142 nr $t1,$maskfe
1143 xr $s2,$t1 # tp4
1144
1145 lr $s3,$s2
1146 ngr $s3,$mask80
1147 srlg $t1,$s3,7
1148 slr $s3,$t1
1149 nr $s3,$mask1b
1150 sllg $t1,$s2,1
1151 nr $t1,$maskfe
1152 xr $s3,$t1 # tp8
1153
1154 xr $s1,$s0 # tp2^tp1
1155 xr $s2,$s0 # tp4^tp1
1156 rll $s0,$s0,24 # = ROTATE(tp1,8)
1157 xr $s2,$s3 # ^=tp8
1158 xr $s0,$s1 # ^=tp2^tp1
1159 xr $s1,$s3 # tp2^tp1^tp8
1160 xr $s0,$s2 # ^=tp4^tp1^tp8
1161 rll $s1,$s1,8
1162 rll $s2,$s2,16
1163 xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
1164 rll $s3,$s3,24
1165 xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
1166 xr $s0,$s3 # ^= ROTATE(tp8,8)
1167
1168 st $s0,16($key)
1169 la $key,4($key)
1170 brct $rounds,.Lmix
1171
1172 lm${g} %r6,%r13,6*$SIZE_T($sp)# as was saved by AES_set_encrypt_key!
1173 lghi %r2,0
1174 br $ra
1175 .size AES_set_decrypt_key,.-AES_set_decrypt_key
1176 ___
1177
1178 ########################################################################
1179 # void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
1180 # size_t length, const AES_KEY *key,
1181 # unsigned char *ivec, const int enc)
1182 {
1183 my $inp="%r2";
1184 my $out="%r4"; # length and out are swapped
1185 my $len="%r3";
1186 my $key="%r5";
1187 my $ivp="%r6";
1188
1189 $code.=<<___;
1190 .globl AES_cbc_encrypt
1191 .type AES_cbc_encrypt,\@function
1192 .align 16
1193 AES_cbc_encrypt:
1194 xgr %r3,%r4 # flip %r3 and %r4, out and len
1195 xgr %r4,%r3
1196 xgr %r3,%r4
1197 ___
1198 $code.=<<___ if (!$softonly);
1199 lhi %r0,16
1200 cl %r0,240($key)
1201 jh .Lcbc_software
1202
1203 lg %r0,0($ivp) # copy ivec
1204 lg %r1,8($ivp)
1205 stmg %r0,%r1,16($sp)
1206 lmg %r0,%r1,0($key) # copy key, cover 256 bit
1207 stmg %r0,%r1,32($sp)
1208 lmg %r0,%r1,16($key)
1209 stmg %r0,%r1,48($sp)
1210 l %r0,240($key) # load kmc code
1211 lghi $key,15 # res=len%16, len-=res;
1212 ngr $key,$len
1213 sl${g}r $len,$key
1214 la %r1,16($sp) # parameter block - ivec || key
1215 jz .Lkmc_truncated
1216 .long 0xb92f0042 # kmc %r4,%r2
1217 brc 1,.-4 # pay attention to "partial completion"
1218 ltr $key,$key
1219 jnz .Lkmc_truncated
1220 .Lkmc_done:
1221 lmg %r0,%r1,16($sp) # copy ivec to caller
1222 stg %r0,0($ivp)
1223 stg %r1,8($ivp)
1224 br $ra
1225 .align 16
1226 .Lkmc_truncated:
1227 ahi $key,-1 # it's the way it's encoded in mvc
1228 tmll %r0,0x80
1229 jnz .Lkmc_truncated_dec
1230 lghi %r1,0
1231 stg %r1,16*$SIZE_T($sp)
1232 stg %r1,16*$SIZE_T+8($sp)
1233 bras %r1,1f
1234 mvc 16*$SIZE_T(1,$sp),0($inp)
1235 1: ex $key,0(%r1)
1236 la %r1,16($sp) # restore parameter block
1237 la $inp,16*$SIZE_T($sp)
1238 lghi $len,16
1239 .long 0xb92f0042 # kmc %r4,%r2
1240 j .Lkmc_done
1241 .align 16
1242 .Lkmc_truncated_dec:
1243 st${g} $out,4*$SIZE_T($sp)
1244 la $out,16*$SIZE_T($sp)
1245 lghi $len,16
1246 .long 0xb92f0042 # kmc %r4,%r2
1247 l${g} $out,4*$SIZE_T($sp)
1248 bras %r1,2f
1249 mvc 0(1,$out),16*$SIZE_T($sp)
1250 2: ex $key,0(%r1)
1251 j .Lkmc_done
1252 .align 16
1253 .Lcbc_software:
1254 ___
1255 $code.=<<___;
1256 stm${g} $key,$ra,5*$SIZE_T($sp)
1257 lhi %r0,0
1258 cl %r0,`$stdframe+$SIZE_T-4`($sp)
1259 je .Lcbc_decrypt
1260
1261 larl $tbl,AES_Te
1262
1263 llgf $s0,0($ivp)
1264 llgf $s1,4($ivp)
1265 llgf $s2,8($ivp)
1266 llgf $s3,12($ivp)
1267
1268 lghi $t0,16
1269 sl${g}r $len,$t0
1270 brc 4,.Lcbc_enc_tail # if borrow
1271 .Lcbc_enc_loop:
1272 stm${g} $inp,$out,2*$SIZE_T($sp)
1273 x $s0,0($inp)
1274 x $s1,4($inp)
1275 x $s2,8($inp)
1276 x $s3,12($inp)
1277 lgr %r4,$key
1278
1279 bras $ra,_s390x_AES_encrypt
1280
1281 lm${g} $inp,$key,2*$SIZE_T($sp)
1282 st $s0,0($out)
1283 st $s1,4($out)
1284 st $s2,8($out)
1285 st $s3,12($out)
1286
1287 la $inp,16($inp)
1288 la $out,16($out)
1289 lghi $t0,16
1290 lt${g}r $len,$len
1291 jz .Lcbc_enc_done
1292 sl${g}r $len,$t0
1293 brc 4,.Lcbc_enc_tail # if borrow
1294 j .Lcbc_enc_loop
1295 .align 16
1296 .Lcbc_enc_done:
1297 l${g} $ivp,6*$SIZE_T($sp)
1298 st $s0,0($ivp)
1299 st $s1,4($ivp)
1300 st $s2,8($ivp)
1301 st $s3,12($ivp)
1302
1303 lm${g} %r7,$ra,7*$SIZE_T($sp)
1304 br $ra
1305
1306 .align 16
1307 .Lcbc_enc_tail:
1308 aghi $len,15
1309 lghi $t0,0
1310 stg $t0,16*$SIZE_T($sp)
1311 stg $t0,16*$SIZE_T+8($sp)
1312 bras $t1,3f
1313 mvc 16*$SIZE_T(1,$sp),0($inp)
1314 3: ex $len,0($t1)
1315 lghi $len,0
1316 la $inp,16*$SIZE_T($sp)
1317 j .Lcbc_enc_loop
1318
1319 .align 16
1320 .Lcbc_decrypt:
1321 larl $tbl,AES_Td
1322
1323 lg $t0,0($ivp)
1324 lg $t1,8($ivp)
1325 stmg $t0,$t1,16*$SIZE_T($sp)
1326
1327 .Lcbc_dec_loop:
1328 stm${g} $inp,$out,2*$SIZE_T($sp)
1329 llgf $s0,0($inp)
1330 llgf $s1,4($inp)
1331 llgf $s2,8($inp)
1332 llgf $s3,12($inp)
1333 lgr %r4,$key
1334
1335 bras $ra,_s390x_AES_decrypt
1336
1337 lm${g} $inp,$key,2*$SIZE_T($sp)
1338 sllg $s0,$s0,32
1339 sllg $s2,$s2,32
1340 lr $s0,$s1
1341 lr $s2,$s3
1342
1343 lg $t0,0($inp)
1344 lg $t1,8($inp)
1345 xg $s0,16*$SIZE_T($sp)
1346 xg $s2,16*$SIZE_T+8($sp)
1347 lghi $s1,16
1348 sl${g}r $len,$s1
1349 brc 4,.Lcbc_dec_tail # if borrow
1350 brc 2,.Lcbc_dec_done # if zero
1351 stg $s0,0($out)
1352 stg $s2,8($out)
1353 stmg $t0,$t1,16*$SIZE_T($sp)
1354
1355 la $inp,16($inp)
1356 la $out,16($out)
1357 j .Lcbc_dec_loop
1358
1359 .Lcbc_dec_done:
1360 stg $s0,0($out)
1361 stg $s2,8($out)
1362 .Lcbc_dec_exit:
1363 lm${g} %r6,$ra,6*$SIZE_T($sp)
1364 stmg $t0,$t1,0($ivp)
1365
1366 br $ra
1367
1368 .align 16
1369 .Lcbc_dec_tail:
1370 aghi $len,15
1371 stg $s0,16*$SIZE_T($sp)
1372 stg $s2,16*$SIZE_T+8($sp)
1373 bras $s1,4f
1374 mvc 0(1,$out),16*$SIZE_T($sp)
1375 4: ex $len,0($s1)
1376 j .Lcbc_dec_exit
1377 .size AES_cbc_encrypt,.-AES_cbc_encrypt
1378 ___
1379 }
1380 ########################################################################
1381 # void AES_ctr32_encrypt(const unsigned char *in, unsigned char *out,
1382 # size_t blocks, const AES_KEY *key,
1383 # const unsigned char *ivec)
1384 {
1385 my $inp="%r2";
1386 my $out="%r4"; # blocks and out are swapped
1387 my $len="%r3";
1388 my $key="%r5"; my $iv0="%r5";
1389 my $ivp="%r6";
1390 my $fp ="%r7";
1391
1392 $code.=<<___;
1393 .globl AES_ctr32_encrypt
1394 .type AES_ctr32_encrypt,\@function
1395 .align 16
1396 AES_ctr32_encrypt:
1397 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1398 xgr %r4,%r3
1399 xgr %r3,%r4
1400 llgfr $len,$len # safe in ctr32 subroutine even in 64-bit case
1401 ___
1402 $code.=<<___ if (!$softonly);
1403 l %r0,240($key)
1404 lhi %r1,16
1405 clr %r0,%r1
1406 jl .Lctr32_software
1407
1408 stm${g} %r6,$s3,6*$SIZE_T($sp)
1409
1410 slgr $out,$inp
1411 la %r1,0($key) # %r1 is permanent copy of $key
1412 lg $iv0,0($ivp) # load ivec
1413 lg $ivp,8($ivp)
1414
1415 # prepare and allocate stack frame at the top of 4K page
1416 # with 1K reserved for eventual signal handling
1417 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1418 lghi $s1,-4096
1419 algr $s0,$sp
1420 lgr $fp,$sp
1421 ngr $s0,$s1 # align at page boundary
1422 slgr $fp,$s0 # total buffer size
1423 lgr $s2,$sp
1424 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1425 slgr $fp,$s1 # deduct reservation to get usable buffer size
1426 # buffer size is at lest 256 and at most 3072+256-16
1427
1428 la $sp,1024($s0) # alloca
1429 srlg $fp,$fp,4 # convert bytes to blocks, minimum 16
1430 st${g} $s2,0($sp) # back-chain
1431 st${g} $fp,$SIZE_T($sp)
1432
1433 slgr $len,$fp
1434 brc 1,.Lctr32_hw_switch # not zero, no borrow
1435 algr $fp,$len # input is shorter than allocated buffer
1436 lghi $len,0
1437 st${g} $fp,$SIZE_T($sp)
1438
1439 .Lctr32_hw_switch:
1440 ___
1441 $code.=<<___ if (!$softonly && 0);# kmctr code was measured to be ~12% slower
1442 llgfr $s0,%r0
1443 lgr $s1,%r1
1444 larl %r1,OPENSSL_s390xcap_P
1445 llihh %r0,0x8000 # check if kmctr supports the function code
1446 srlg %r0,%r0,0($s0)
1447 ng %r0,S390X_KMCTR(%r1) # check kmctr capability vector
1448 lgr %r0,$s0
1449 lgr %r1,$s1
1450 jz .Lctr32_km_loop
1451
1452 ####### kmctr code
1453 algr $out,$inp # restore $out
1454 lgr $s1,$len # $s1 undertakes $len
1455 j .Lctr32_kmctr_loop
1456 .align 16
1457 .Lctr32_kmctr_loop:
1458 la $s2,16($sp)
1459 lgr $s3,$fp
1460 .Lctr32_kmctr_prepare:
1461 stg $iv0,0($s2)
1462 stg $ivp,8($s2)
1463 la $s2,16($s2)
1464 ahi $ivp,1 # 32-bit increment, preserves upper half
1465 brct $s3,.Lctr32_kmctr_prepare
1466
1467 #la $inp,0($inp) # inp
1468 sllg $len,$fp,4 # len
1469 #la $out,0($out) # out
1470 la $s2,16($sp) # iv
1471 .long 0xb92da042 # kmctr $out,$s2,$inp
1472 brc 1,.-4 # pay attention to "partial completion"
1473
1474 slgr $s1,$fp
1475 brc 1,.Lctr32_kmctr_loop # not zero, no borrow
1476 algr $fp,$s1
1477 lghi $s1,0
1478 brc 4+1,.Lctr32_kmctr_loop # not zero
1479
1480 l${g} $sp,0($sp)
1481 lm${g} %r6,$s3,6*$SIZE_T($sp)
1482 br $ra
1483 .align 16
1484 ___
1485 $code.=<<___ if (!$softonly);
1486 .Lctr32_km_loop:
1487 la $s2,16($sp)
1488 lgr $s3,$fp
1489 .Lctr32_km_prepare:
1490 stg $iv0,0($s2)
1491 stg $ivp,8($s2)
1492 la $s2,16($s2)
1493 ahi $ivp,1 # 32-bit increment, preserves upper half
1494 brct $s3,.Lctr32_km_prepare
1495
1496 la $s0,16($sp) # inp
1497 sllg $s1,$fp,4 # len
1498 la $s2,16($sp) # out
1499 .long 0xb92e00a8 # km %r10,%r8
1500 brc 1,.-4 # pay attention to "partial completion"
1501
1502 la $s2,16($sp)
1503 lgr $s3,$fp
1504 slgr $s2,$inp
1505 .Lctr32_km_xor:
1506 lg $s0,0($inp)
1507 lg $s1,8($inp)
1508 xg $s0,0($s2,$inp)
1509 xg $s1,8($s2,$inp)
1510 stg $s0,0($out,$inp)
1511 stg $s1,8($out,$inp)
1512 la $inp,16($inp)
1513 brct $s3,.Lctr32_km_xor
1514
1515 slgr $len,$fp
1516 brc 1,.Lctr32_km_loop # not zero, no borrow
1517 algr $fp,$len
1518 lghi $len,0
1519 brc 4+1,.Lctr32_km_loop # not zero
1520
1521 l${g} $s0,0($sp)
1522 l${g} $s1,$SIZE_T($sp)
1523 la $s2,16($sp)
1524 .Lctr32_km_zap:
1525 stg $s0,0($s2)
1526 stg $s0,8($s2)
1527 la $s2,16($s2)
1528 brct $s1,.Lctr32_km_zap
1529
1530 la $sp,0($s0)
1531 lm${g} %r6,$s3,6*$SIZE_T($sp)
1532 br $ra
1533 .align 16
1534 .Lctr32_software:
1535 ___
1536 $code.=<<___;
1537 stm${g} $key,$ra,5*$SIZE_T($sp)
1538 sl${g}r $inp,$out
1539 larl $tbl,AES_Te
1540 llgf $t1,12($ivp)
1541
1542 .Lctr32_loop:
1543 stm${g} $inp,$out,2*$SIZE_T($sp)
1544 llgf $s0,0($ivp)
1545 llgf $s1,4($ivp)
1546 llgf $s2,8($ivp)
1547 lgr $s3,$t1
1548 st $t1,16*$SIZE_T($sp)
1549 lgr %r4,$key
1550
1551 bras $ra,_s390x_AES_encrypt
1552
1553 lm${g} $inp,$ivp,2*$SIZE_T($sp)
1554 llgf $t1,16*$SIZE_T($sp)
1555 x $s0,0($inp,$out)
1556 x $s1,4($inp,$out)
1557 x $s2,8($inp,$out)
1558 x $s3,12($inp,$out)
1559 stm $s0,$s3,0($out)
1560
1561 la $out,16($out)
1562 ahi $t1,1 # 32-bit increment
1563 brct $len,.Lctr32_loop
1564
1565 lm${g} %r6,$ra,6*$SIZE_T($sp)
1566 br $ra
1567 .size AES_ctr32_encrypt,.-AES_ctr32_encrypt
1568 ___
1569 }
1570
1571 ########################################################################
1572 # void AES_xts_encrypt(const unsigned char *inp, unsigned char *out,
1573 # size_t len, const AES_KEY *key1, const AES_KEY *key2,
1574 # const unsigned char iv[16]);
1575 #
1576 {
1577 my $inp="%r2";
1578 my $out="%r4"; # len and out are swapped
1579 my $len="%r3";
1580 my $key1="%r5"; # $i1
1581 my $key2="%r6"; # $i2
1582 my $fp="%r7"; # $i3
1583 my $tweak=16*$SIZE_T+16; # or $stdframe-16, bottom of the frame...
1584
1585 $code.=<<___;
1586 .type _s390x_xts_km,\@function
1587 .align 16
1588 _s390x_xts_km:
1589 ___
1590 $code.=<<___ if(1);
1591 llgfr $s0,%r0 # put aside the function code
1592 lghi $s1,0x7f
1593 nr $s1,%r0
1594 larl %r1,OPENSSL_s390xcap_P
1595 llihh %r0,0x8000
1596 srlg %r0,%r0,32($s1) # check for 32+function code
1597 ng %r0,S390X_KM(%r1) # check km capability vector
1598 lgr %r0,$s0 # restore the function code
1599 la %r1,0($key1) # restore $key1
1600 jz .Lxts_km_vanilla
1601
1602 lmg $i2,$i3,$tweak($sp) # put aside the tweak value
1603 algr $out,$inp
1604
1605 oill %r0,32 # switch to xts function code
1606 aghi $s1,-18 #
1607 sllg $s1,$s1,3 # (function code - 18)*8, 0 or 16
1608 la %r1,$tweak-16($sp)
1609 slgr %r1,$s1 # parameter block position
1610 lmg $s0,$s3,0($key1) # load 256 bits of key material,
1611 stmg $s0,$s3,0(%r1) # and copy it to parameter block.
1612 # yes, it contains junk and overlaps
1613 # with the tweak in 128-bit case.
1614 # it's done to avoid conditional
1615 # branch.
1616 stmg $i2,$i3,$tweak($sp) # "re-seat" the tweak value
1617
1618 .long 0xb92e0042 # km %r4,%r2
1619 brc 1,.-4 # pay attention to "partial completion"
1620
1621 lrvg $s0,$tweak+0($sp) # load the last tweak
1622 lrvg $s1,$tweak+8($sp)
1623 stmg %r0,%r3,$tweak-32($sp) # wipe copy of the key
1624
1625 nill %r0,0xffdf # switch back to original function code
1626 la %r1,0($key1) # restore pointer to $key1
1627 slgr $out,$inp
1628
1629 llgc $len,2*$SIZE_T-1($sp)
1630 nill $len,0x0f # $len%=16
1631 br $ra
1632
1633 .align 16
1634 .Lxts_km_vanilla:
1635 ___
1636 $code.=<<___;
1637 # prepare and allocate stack frame at the top of 4K page
1638 # with 1K reserved for eventual signal handling
1639 lghi $s0,-1024-256-16# guarantee at least 256-bytes buffer
1640 lghi $s1,-4096
1641 algr $s0,$sp
1642 lgr $fp,$sp
1643 ngr $s0,$s1 # align at page boundary
1644 slgr $fp,$s0 # total buffer size
1645 lgr $s2,$sp
1646 lghi $s1,1024+16 # sl[g]fi is extended-immediate facility
1647 slgr $fp,$s1 # deduct reservation to get usable buffer size
1648 # buffer size is at lest 256 and at most 3072+256-16
1649
1650 la $sp,1024($s0) # alloca
1651 nill $fp,0xfff0 # round to 16*n
1652 st${g} $s2,0($sp) # back-chain
1653 nill $len,0xfff0 # redundant
1654 st${g} $fp,$SIZE_T($sp)
1655
1656 slgr $len,$fp
1657 brc 1,.Lxts_km_go # not zero, no borrow
1658 algr $fp,$len # input is shorter than allocated buffer
1659 lghi $len,0
1660 st${g} $fp,$SIZE_T($sp)
1661
1662 .Lxts_km_go:
1663 lrvg $s0,$tweak+0($s2) # load the tweak value in little-endian
1664 lrvg $s1,$tweak+8($s2)
1665
1666 la $s2,16($sp) # vector of ascending tweak values
1667 slgr $s2,$inp
1668 srlg $s3,$fp,4
1669 j .Lxts_km_start
1670
1671 .Lxts_km_loop:
1672 la $s2,16($sp)
1673 slgr $s2,$inp
1674 srlg $s3,$fp,4
1675 .Lxts_km_prepare:
1676 lghi $i1,0x87
1677 srag $i2,$s1,63 # broadcast upper bit
1678 ngr $i1,$i2 # rem
1679 algr $s0,$s0
1680 alcgr $s1,$s1
1681 xgr $s0,$i1
1682 .Lxts_km_start:
1683 lrvgr $i1,$s0 # flip byte order
1684 lrvgr $i2,$s1
1685 stg $i1,0($s2,$inp)
1686 stg $i2,8($s2,$inp)
1687 xg $i1,0($inp)
1688 xg $i2,8($inp)
1689 stg $i1,0($out,$inp)
1690 stg $i2,8($out,$inp)
1691 la $inp,16($inp)
1692 brct $s3,.Lxts_km_prepare
1693
1694 slgr $inp,$fp # rewind $inp
1695 la $s2,0($out,$inp)
1696 lgr $s3,$fp
1697 .long 0xb92e00aa # km $s2,$s2
1698 brc 1,.-4 # pay attention to "partial completion"
1699
1700 la $s2,16($sp)
1701 slgr $s2,$inp
1702 srlg $s3,$fp,4
1703 .Lxts_km_xor:
1704 lg $i1,0($out,$inp)
1705 lg $i2,8($out,$inp)
1706 xg $i1,0($s2,$inp)
1707 xg $i2,8($s2,$inp)
1708 stg $i1,0($out,$inp)
1709 stg $i2,8($out,$inp)
1710 la $inp,16($inp)
1711 brct $s3,.Lxts_km_xor
1712
1713 slgr $len,$fp
1714 brc 1,.Lxts_km_loop # not zero, no borrow
1715 algr $fp,$len
1716 lghi $len,0
1717 brc 4+1,.Lxts_km_loop # not zero
1718
1719 l${g} $i1,0($sp) # back-chain
1720 llgf $fp,`2*$SIZE_T-4`($sp) # bytes used
1721 la $i2,16($sp)
1722 srlg $fp,$fp,4
1723 .Lxts_km_zap:
1724 stg $i1,0($i2)
1725 stg $i1,8($i2)
1726 la $i2,16($i2)
1727 brct $fp,.Lxts_km_zap
1728
1729 la $sp,0($i1)
1730 llgc $len,2*$SIZE_T-1($i1)
1731 nill $len,0x0f # $len%=16
1732 bzr $ra
1733
1734 # generate one more tweak...
1735 lghi $i1,0x87
1736 srag $i2,$s1,63 # broadcast upper bit
1737 ngr $i1,$i2 # rem
1738 algr $s0,$s0
1739 alcgr $s1,$s1
1740 xgr $s0,$i1
1741
1742 ltr $len,$len # clear zero flag
1743 br $ra
1744 .size _s390x_xts_km,.-_s390x_xts_km
1745
1746 .globl AES_xts_encrypt
1747 .type AES_xts_encrypt,\@function
1748 .align 16
1749 AES_xts_encrypt:
1750 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1751 xgr %r4,%r3
1752 xgr %r3,%r4
1753 ___
1754 $code.=<<___ if ($SIZE_T==4);
1755 llgfr $len,$len
1756 ___
1757 $code.=<<___;
1758 st${g} $len,1*$SIZE_T($sp) # save copy of $len
1759 srag $len,$len,4 # formally wrong, because it expands
1760 # sign byte, but who can afford asking
1761 # to process more than 2^63-1 bytes?
1762 # I use it, because it sets condition
1763 # code...
1764 bcr 8,$ra # abort if zero (i.e. less than 16)
1765 ___
1766 $code.=<<___ if (!$softonly);
1767 llgf %r0,240($key2)
1768 lhi %r1,16
1769 clr %r0,%r1
1770 jl .Lxts_enc_software
1771
1772 st${g} $ra,5*$SIZE_T($sp)
1773 stm${g} %r6,$s3,6*$SIZE_T($sp)
1774
1775 sllg $len,$len,4 # $len&=~15
1776 slgr $out,$inp
1777
1778 # generate the tweak value
1779 l${g} $s3,$stdframe($sp) # pointer to iv
1780 la $s2,$tweak($sp)
1781 lmg $s0,$s1,0($s3)
1782 lghi $s3,16
1783 stmg $s0,$s1,0($s2)
1784 la %r1,0($key2) # $key2 is not needed anymore
1785 .long 0xb92e00aa # km $s2,$s2, generate the tweak
1786 brc 1,.-4 # can this happen?
1787
1788 l %r0,240($key1)
1789 la %r1,0($key1) # $key1 is not needed anymore
1790 bras $ra,_s390x_xts_km
1791 jz .Lxts_enc_km_done
1792
1793 aghi $inp,-16 # take one step back
1794 la $i3,0($out,$inp) # put aside real $out
1795 .Lxts_enc_km_steal:
1796 llgc $i1,16($inp)
1797 llgc $i2,0($out,$inp)
1798 stc $i1,0($out,$inp)
1799 stc $i2,16($out,$inp)
1800 la $inp,1($inp)
1801 brct $len,.Lxts_enc_km_steal
1802
1803 la $s2,0($i3)
1804 lghi $s3,16
1805 lrvgr $i1,$s0 # flip byte order
1806 lrvgr $i2,$s1
1807 xg $i1,0($s2)
1808 xg $i2,8($s2)
1809 stg $i1,0($s2)
1810 stg $i2,8($s2)
1811 .long 0xb92e00aa # km $s2,$s2
1812 brc 1,.-4 # can this happen?
1813 lrvgr $i1,$s0 # flip byte order
1814 lrvgr $i2,$s1
1815 xg $i1,0($i3)
1816 xg $i2,8($i3)
1817 stg $i1,0($i3)
1818 stg $i2,8($i3)
1819
1820 .Lxts_enc_km_done:
1821 stg $sp,$tweak+0($sp) # wipe tweak
1822 stg $sp,$tweak+8($sp)
1823 l${g} $ra,5*$SIZE_T($sp)
1824 lm${g} %r6,$s3,6*$SIZE_T($sp)
1825 br $ra
1826 .align 16
1827 .Lxts_enc_software:
1828 ___
1829 $code.=<<___;
1830 stm${g} %r6,$ra,6*$SIZE_T($sp)
1831
1832 slgr $out,$inp
1833
1834 l${g} $s3,$stdframe($sp) # ivp
1835 llgf $s0,0($s3) # load iv
1836 llgf $s1,4($s3)
1837 llgf $s2,8($s3)
1838 llgf $s3,12($s3)
1839 stm${g} %r2,%r5,2*$SIZE_T($sp)
1840 la $key,0($key2)
1841 larl $tbl,AES_Te
1842 bras $ra,_s390x_AES_encrypt # generate the tweak
1843 lm${g} %r2,%r5,2*$SIZE_T($sp)
1844 stm $s0,$s3,$tweak($sp) # save the tweak
1845 j .Lxts_enc_enter
1846
1847 .align 16
1848 .Lxts_enc_loop:
1849 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
1850 lrvg $s3,$tweak+8($sp)
1851 lghi %r1,0x87
1852 srag %r0,$s3,63 # broadcast upper bit
1853 ngr %r1,%r0 # rem
1854 algr $s1,$s1
1855 alcgr $s3,$s3
1856 xgr $s1,%r1
1857 lrvgr $s1,$s1 # flip byte order
1858 lrvgr $s3,$s3
1859 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
1860 stg $s1,$tweak+0($sp) # save the tweak
1861 llgfr $s1,$s1
1862 srlg $s2,$s3,32
1863 stg $s3,$tweak+8($sp)
1864 llgfr $s3,$s3
1865 la $inp,16($inp) # $inp+=16
1866 .Lxts_enc_enter:
1867 x $s0,0($inp) # ^=*($inp)
1868 x $s1,4($inp)
1869 x $s2,8($inp)
1870 x $s3,12($inp)
1871 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
1872 la $key,0($key1)
1873 bras $ra,_s390x_AES_encrypt
1874 lm${g} %r2,%r5,2*$SIZE_T($sp)
1875 x $s0,$tweak+0($sp) # ^=tweak
1876 x $s1,$tweak+4($sp)
1877 x $s2,$tweak+8($sp)
1878 x $s3,$tweak+12($sp)
1879 st $s0,0($out,$inp)
1880 st $s1,4($out,$inp)
1881 st $s2,8($out,$inp)
1882 st $s3,12($out,$inp)
1883 brct${g} $len,.Lxts_enc_loop
1884
1885 llgc $len,`2*$SIZE_T-1`($sp)
1886 nill $len,0x0f # $len%16
1887 jz .Lxts_enc_done
1888
1889 la $i3,0($inp,$out) # put aside real $out
1890 .Lxts_enc_steal:
1891 llgc %r0,16($inp)
1892 llgc %r1,0($out,$inp)
1893 stc %r0,0($out,$inp)
1894 stc %r1,16($out,$inp)
1895 la $inp,1($inp)
1896 brct $len,.Lxts_enc_steal
1897 la $out,0($i3) # restore real $out
1898
1899 # generate last tweak...
1900 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
1901 lrvg $s3,$tweak+8($sp)
1902 lghi %r1,0x87
1903 srag %r0,$s3,63 # broadcast upper bit
1904 ngr %r1,%r0 # rem
1905 algr $s1,$s1
1906 alcgr $s3,$s3
1907 xgr $s1,%r1
1908 lrvgr $s1,$s1 # flip byte order
1909 lrvgr $s3,$s3
1910 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
1911 stg $s1,$tweak+0($sp) # save the tweak
1912 llgfr $s1,$s1
1913 srlg $s2,$s3,32
1914 stg $s3,$tweak+8($sp)
1915 llgfr $s3,$s3
1916
1917 x $s0,0($out) # ^=*(inp)|stolen cipther-text
1918 x $s1,4($out)
1919 x $s2,8($out)
1920 x $s3,12($out)
1921 st${g} $out,4*$SIZE_T($sp)
1922 la $key,0($key1)
1923 bras $ra,_s390x_AES_encrypt
1924 l${g} $out,4*$SIZE_T($sp)
1925 x $s0,`$tweak+0`($sp) # ^=tweak
1926 x $s1,`$tweak+4`($sp)
1927 x $s2,`$tweak+8`($sp)
1928 x $s3,`$tweak+12`($sp)
1929 st $s0,0($out)
1930 st $s1,4($out)
1931 st $s2,8($out)
1932 st $s3,12($out)
1933
1934 .Lxts_enc_done:
1935 stg $sp,$tweak+0($sp) # wipe tweak
1936 stg $sp,$twesk+8($sp)
1937 lm${g} %r6,$ra,6*$SIZE_T($sp)
1938 br $ra
1939 .size AES_xts_encrypt,.-AES_xts_encrypt
1940 ___
1941 # void AES_xts_decrypt(const unsigned char *inp, unsigned char *out,
1942 # size_t len, const AES_KEY *key1, const AES_KEY *key2,
1943 # const unsigned char iv[16]);
1944 #
1945 $code.=<<___;
1946 .globl AES_xts_decrypt
1947 .type AES_xts_decrypt,\@function
1948 .align 16
1949 AES_xts_decrypt:
1950 xgr %r3,%r4 # flip %r3 and %r4, $out and $len
1951 xgr %r4,%r3
1952 xgr %r3,%r4
1953 ___
1954 $code.=<<___ if ($SIZE_T==4);
1955 llgfr $len,$len
1956 ___
1957 $code.=<<___;
1958 st${g} $len,1*$SIZE_T($sp) # save copy of $len
1959 aghi $len,-16
1960 bcr 4,$ra # abort if less than zero. formally
1961 # wrong, because $len is unsigned,
1962 # but who can afford asking to
1963 # process more than 2^63-1 bytes?
1964 tmll $len,0x0f
1965 jnz .Lxts_dec_proceed
1966 aghi $len,16
1967 .Lxts_dec_proceed:
1968 ___
1969 $code.=<<___ if (!$softonly);
1970 llgf %r0,240($key2)
1971 lhi %r1,16
1972 clr %r0,%r1
1973 jl .Lxts_dec_software
1974
1975 st${g} $ra,5*$SIZE_T($sp)
1976 stm${g} %r6,$s3,6*$SIZE_T($sp)
1977
1978 nill $len,0xfff0 # $len&=~15
1979 slgr $out,$inp
1980
1981 # generate the tweak value
1982 l${g} $s3,$stdframe($sp) # pointer to iv
1983 la $s2,$tweak($sp)
1984 lmg $s0,$s1,0($s3)
1985 lghi $s3,16
1986 stmg $s0,$s1,0($s2)
1987 la %r1,0($key2) # $key2 is not needed past this point
1988 .long 0xb92e00aa # km $s2,$s2, generate the tweak
1989 brc 1,.-4 # can this happen?
1990
1991 l %r0,240($key1)
1992 la %r1,0($key1) # $key1 is not needed anymore
1993
1994 ltgr $len,$len
1995 jz .Lxts_dec_km_short
1996 bras $ra,_s390x_xts_km
1997 jz .Lxts_dec_km_done
1998
1999 lrvgr $s2,$s0 # make copy in reverse byte order
2000 lrvgr $s3,$s1
2001 j .Lxts_dec_km_2ndtweak
2002
2003 .Lxts_dec_km_short:
2004 llgc $len,`2*$SIZE_T-1`($sp)
2005 nill $len,0x0f # $len%=16
2006 lrvg $s0,$tweak+0($sp) # load the tweak
2007 lrvg $s1,$tweak+8($sp)
2008 lrvgr $s2,$s0 # make copy in reverse byte order
2009 lrvgr $s3,$s1
2010
2011 .Lxts_dec_km_2ndtweak:
2012 lghi $i1,0x87
2013 srag $i2,$s1,63 # broadcast upper bit
2014 ngr $i1,$i2 # rem
2015 algr $s0,$s0
2016 alcgr $s1,$s1
2017 xgr $s0,$i1
2018 lrvgr $i1,$s0 # flip byte order
2019 lrvgr $i2,$s1
2020
2021 xg $i1,0($inp)
2022 xg $i2,8($inp)
2023 stg $i1,0($out,$inp)
2024 stg $i2,8($out,$inp)
2025 la $i2,0($out,$inp)
2026 lghi $i3,16
2027 .long 0xb92e0066 # km $i2,$i2
2028 brc 1,.-4 # can this happen?
2029 lrvgr $i1,$s0
2030 lrvgr $i2,$s1
2031 xg $i1,0($out,$inp)
2032 xg $i2,8($out,$inp)
2033 stg $i1,0($out,$inp)
2034 stg $i2,8($out,$inp)
2035
2036 la $i3,0($out,$inp) # put aside real $out
2037 .Lxts_dec_km_steal:
2038 llgc $i1,16($inp)
2039 llgc $i2,0($out,$inp)
2040 stc $i1,0($out,$inp)
2041 stc $i2,16($out,$inp)
2042 la $inp,1($inp)
2043 brct $len,.Lxts_dec_km_steal
2044
2045 lgr $s0,$s2
2046 lgr $s1,$s3
2047 xg $s0,0($i3)
2048 xg $s1,8($i3)
2049 stg $s0,0($i3)
2050 stg $s1,8($i3)
2051 la $s0,0($i3)
2052 lghi $s1,16
2053 .long 0xb92e0088 # km $s0,$s0
2054 brc 1,.-4 # can this happen?
2055 xg $s2,0($i3)
2056 xg $s3,8($i3)
2057 stg $s2,0($i3)
2058 stg $s3,8($i3)
2059 .Lxts_dec_km_done:
2060 stg $sp,$tweak+0($sp) # wipe tweak
2061 stg $sp,$tweak+8($sp)
2062 l${g} $ra,5*$SIZE_T($sp)
2063 lm${g} %r6,$s3,6*$SIZE_T($sp)
2064 br $ra
2065 .align 16
2066 .Lxts_dec_software:
2067 ___
2068 $code.=<<___;
2069 stm${g} %r6,$ra,6*$SIZE_T($sp)
2070
2071 srlg $len,$len,4
2072 slgr $out,$inp
2073
2074 l${g} $s3,$stdframe($sp) # ivp
2075 llgf $s0,0($s3) # load iv
2076 llgf $s1,4($s3)
2077 llgf $s2,8($s3)
2078 llgf $s3,12($s3)
2079 stm${g} %r2,%r5,2*$SIZE_T($sp)
2080 la $key,0($key2)
2081 larl $tbl,AES_Te
2082 bras $ra,_s390x_AES_encrypt # generate the tweak
2083 lm${g} %r2,%r5,2*$SIZE_T($sp)
2084 larl $tbl,AES_Td
2085 lt${g}r $len,$len
2086 stm $s0,$s3,$tweak($sp) # save the tweak
2087 jz .Lxts_dec_short
2088 j .Lxts_dec_enter
2089
2090 .align 16
2091 .Lxts_dec_loop:
2092 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2093 lrvg $s3,$tweak+8($sp)
2094 lghi %r1,0x87
2095 srag %r0,$s3,63 # broadcast upper bit
2096 ngr %r1,%r0 # rem
2097 algr $s1,$s1
2098 alcgr $s3,$s3
2099 xgr $s1,%r1
2100 lrvgr $s1,$s1 # flip byte order
2101 lrvgr $s3,$s3
2102 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
2103 stg $s1,$tweak+0($sp) # save the tweak
2104 llgfr $s1,$s1
2105 srlg $s2,$s3,32
2106 stg $s3,$tweak+8($sp)
2107 llgfr $s3,$s3
2108 .Lxts_dec_enter:
2109 x $s0,0($inp) # tweak^=*(inp)
2110 x $s1,4($inp)
2111 x $s2,8($inp)
2112 x $s3,12($inp)
2113 stm${g} %r2,%r3,2*$SIZE_T($sp) # only two registers are changing
2114 la $key,0($key1)
2115 bras $ra,_s390x_AES_decrypt
2116 lm${g} %r2,%r5,2*$SIZE_T($sp)
2117 x $s0,$tweak+0($sp) # ^=tweak
2118 x $s1,$tweak+4($sp)
2119 x $s2,$tweak+8($sp)
2120 x $s3,$tweak+12($sp)
2121 st $s0,0($out,$inp)
2122 st $s1,4($out,$inp)
2123 st $s2,8($out,$inp)
2124 st $s3,12($out,$inp)
2125 la $inp,16($inp)
2126 brct${g} $len,.Lxts_dec_loop
2127
2128 llgc $len,`2*$SIZE_T-1`($sp)
2129 nill $len,0x0f # $len%16
2130 jz .Lxts_dec_done
2131
2132 # generate pair of tweaks...
2133 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2134 lrvg $s3,$tweak+8($sp)
2135 lghi %r1,0x87
2136 srag %r0,$s3,63 # broadcast upper bit
2137 ngr %r1,%r0 # rem
2138 algr $s1,$s1
2139 alcgr $s3,$s3
2140 xgr $s1,%r1
2141 lrvgr $i2,$s1 # flip byte order
2142 lrvgr $i3,$s3
2143 stmg $i2,$i3,$tweak($sp) # save the 1st tweak
2144 j .Lxts_dec_2ndtweak
2145
2146 .align 16
2147 .Lxts_dec_short:
2148 llgc $len,`2*$SIZE_T-1`($sp)
2149 nill $len,0x0f # $len%16
2150 lrvg $s1,$tweak+0($sp) # load the tweak in little-endian
2151 lrvg $s3,$tweak+8($sp)
2152 .Lxts_dec_2ndtweak:
2153 lghi %r1,0x87
2154 srag %r0,$s3,63 # broadcast upper bit
2155 ngr %r1,%r0 # rem
2156 algr $s1,$s1
2157 alcgr $s3,$s3
2158 xgr $s1,%r1
2159 lrvgr $s1,$s1 # flip byte order
2160 lrvgr $s3,$s3
2161 srlg $s0,$s1,32 # smash the tweak to 4x32-bits
2162 stg $s1,$tweak-16+0($sp) # save the 2nd tweak
2163 llgfr $s1,$s1
2164 srlg $s2,$s3,32
2165 stg $s3,$tweak-16+8($sp)
2166 llgfr $s3,$s3
2167
2168 x $s0,0($inp) # tweak_the_2nd^=*(inp)
2169 x $s1,4($inp)
2170 x $s2,8($inp)
2171 x $s3,12($inp)
2172 stm${g} %r2,%r3,2*$SIZE_T($sp)
2173 la $key,0($key1)
2174 bras $ra,_s390x_AES_decrypt
2175 lm${g} %r2,%r5,2*$SIZE_T($sp)
2176 x $s0,$tweak-16+0($sp) # ^=tweak_the_2nd
2177 x $s1,$tweak-16+4($sp)
2178 x $s2,$tweak-16+8($sp)
2179 x $s3,$tweak-16+12($sp)
2180 st $s0,0($out,$inp)
2181 st $s1,4($out,$inp)
2182 st $s2,8($out,$inp)
2183 st $s3,12($out,$inp)
2184
2185 la $i3,0($out,$inp) # put aside real $out
2186 .Lxts_dec_steal:
2187 llgc %r0,16($inp)
2188 llgc %r1,0($out,$inp)
2189 stc %r0,0($out,$inp)
2190 stc %r1,16($out,$inp)
2191 la $inp,1($inp)
2192 brct $len,.Lxts_dec_steal
2193 la $out,0($i3) # restore real $out
2194
2195 lm $s0,$s3,$tweak($sp) # load the 1st tweak
2196 x $s0,0($out) # tweak^=*(inp)|stolen cipher-text
2197 x $s1,4($out)
2198 x $s2,8($out)
2199 x $s3,12($out)
2200 st${g} $out,4*$SIZE_T($sp)
2201 la $key,0($key1)
2202 bras $ra,_s390x_AES_decrypt
2203 l${g} $out,4*$SIZE_T($sp)
2204 x $s0,$tweak+0($sp) # ^=tweak
2205 x $s1,$tweak+4($sp)
2206 x $s2,$tweak+8($sp)
2207 x $s3,$tweak+12($sp)
2208 st $s0,0($out)
2209 st $s1,4($out)
2210 st $s2,8($out)
2211 st $s3,12($out)
2212 stg $sp,$tweak-16+0($sp) # wipe 2nd tweak
2213 stg $sp,$tweak-16+8($sp)
2214 .Lxts_dec_done:
2215 stg $sp,$tweak+0($sp) # wipe tweak
2216 stg $sp,$twesk+8($sp)
2217 lm${g} %r6,$ra,6*$SIZE_T($sp)
2218 br $ra
2219 .size AES_xts_decrypt,.-AES_xts_decrypt
2220 ___
2221 }
2222 $code.=<<___;
2223 .string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
2224 ___
2225
2226 $code =~ s/\`([^\`]*)\`/eval $1/gem;
2227 print $code;
2228 close STDOUT; # force flush