]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/aes/asm/aes-armv4.pl
Add assembly support for 32-bit iOS.
[thirdparty/openssl.git] / crypto / aes / asm / aes-armv4.pl
1 #!/usr/bin/env perl
2
3 # ====================================================================
4 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
5 # project. The module is, however, dual licensed under OpenSSL and
6 # CRYPTOGAMS licenses depending on where you obtain it. For further
7 # details see http://www.openssl.org/~appro/cryptogams/.
8 # ====================================================================
9
10 # AES for ARMv4
11
12 # January 2007.
13 #
14 # Code uses single 1K S-box and is >2 times faster than code generated
15 # by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
16 # allows to merge logical or arithmetic operation with shift or rotate
17 # in one instruction and emit combined result every cycle. The module
18 # is endian-neutral. The performance is ~42 cycles/byte for 128-bit
19 # key [on single-issue Xscale PXA250 core].
20
21 # May 2007.
22 #
23 # AES_set_[en|de]crypt_key is added.
24
25 # July 2010.
26 #
27 # Rescheduling for dual-issue pipeline resulted in 12% improvement on
28 # Cortex A8 core and ~25 cycles per byte processed with 128-bit key.
29
30 # February 2011.
31 #
32 # Profiler-assisted and platform-specific optimization resulted in 16%
33 # improvement on Cortex A8 core and ~21.5 cycles per byte.
34
35 $flavour = shift;
36 if ($flavour=~/^\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
37 else { while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} }
38
39 if ($flavour && $flavour ne "void") {
40 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
41 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
42 ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
43 die "can't locate arm-xlate.pl";
44
45 open STDOUT,"| \"$^X\" $xlate $flavour $output";
46 } else {
47 open STDOUT,">$output";
48 }
49
50 $s0="r0";
51 $s1="r1";
52 $s2="r2";
53 $s3="r3";
54 $t1="r4";
55 $t2="r5";
56 $t3="r6";
57 $i1="r7";
58 $i2="r8";
59 $i3="r9";
60
61 $tbl="r10";
62 $key="r11";
63 $rounds="r12";
64
65 $code=<<___;
66 #ifndef __KERNEL__
67 # include "arm_arch.h"
68 #else
69 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
70 #endif
71
72 .text
73 #if __ARM_ARCH__<7
74 .code 32
75 #else
76 .syntax unified
77 # if defined(__thumb2__) && !defined(__APPLE__)
78 .thumb
79 # else
80 .code 32
81 # endif
82 #endif
83
84 .type AES_Te,%object
85 .align 5
86 AES_Te:
87 .word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
88 .word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
89 .word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
90 .word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
91 .word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
92 .word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
93 .word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
94 .word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
95 .word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
96 .word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
97 .word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
98 .word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
99 .word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
100 .word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
101 .word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
102 .word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
103 .word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
104 .word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
105 .word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
106 .word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
107 .word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
108 .word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
109 .word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
110 .word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
111 .word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
112 .word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
113 .word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
114 .word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
115 .word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
116 .word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
117 .word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
118 .word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
119 .word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
120 .word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
121 .word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
122 .word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
123 .word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
124 .word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
125 .word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
126 .word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
127 .word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
128 .word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
129 .word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
130 .word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
131 .word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
132 .word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
133 .word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
134 .word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
135 .word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
136 .word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
137 .word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
138 .word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
139 .word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
140 .word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
141 .word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
142 .word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
143 .word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
144 .word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
145 .word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
146 .word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
147 .word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
148 .word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
149 .word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
150 .word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
151 @ Te4[256]
152 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
153 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
154 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
155 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
156 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
157 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
158 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
159 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
160 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
161 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
162 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
163 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
164 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
165 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
166 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
167 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
168 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
169 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
170 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
171 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
172 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
173 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
174 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
175 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
176 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
177 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
178 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
179 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
180 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
181 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
182 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
183 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
184 @ rcon[]
185 .word 0x01000000, 0x02000000, 0x04000000, 0x08000000
186 .word 0x10000000, 0x20000000, 0x40000000, 0x80000000
187 .word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
188 .size AES_Te,.-AES_Te
189
190 @ void AES_encrypt(const unsigned char *in, unsigned char *out,
191 @ const AES_KEY *key) {
192 .global AES_encrypt
193 .type AES_encrypt,%function
194 .align 5
195 AES_encrypt:
196 #if __ARM_ARCH__<7
197 sub r3,pc,#8 @ AES_encrypt
198 #else
199 adr r3,AES_encrypt
200 #endif
201 stmdb sp!,{r1,r4-r12,lr}
202 #ifdef __APPLE__
203 adr $tbl,AES_Te
204 #else
205 sub $tbl,r3,#AES_encrypt-AES_Te @ Te
206 #endif
207 mov $rounds,r0 @ inp
208 mov $key,r2
209 #if __ARM_ARCH__<7
210 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
211 ldrb $t1,[$rounds,#2] @ manner...
212 ldrb $t2,[$rounds,#1]
213 ldrb $t3,[$rounds,#0]
214 orr $s0,$s0,$t1,lsl#8
215 ldrb $s1,[$rounds,#7]
216 orr $s0,$s0,$t2,lsl#16
217 ldrb $t1,[$rounds,#6]
218 orr $s0,$s0,$t3,lsl#24
219 ldrb $t2,[$rounds,#5]
220 ldrb $t3,[$rounds,#4]
221 orr $s1,$s1,$t1,lsl#8
222 ldrb $s2,[$rounds,#11]
223 orr $s1,$s1,$t2,lsl#16
224 ldrb $t1,[$rounds,#10]
225 orr $s1,$s1,$t3,lsl#24
226 ldrb $t2,[$rounds,#9]
227 ldrb $t3,[$rounds,#8]
228 orr $s2,$s2,$t1,lsl#8
229 ldrb $s3,[$rounds,#15]
230 orr $s2,$s2,$t2,lsl#16
231 ldrb $t1,[$rounds,#14]
232 orr $s2,$s2,$t3,lsl#24
233 ldrb $t2,[$rounds,#13]
234 ldrb $t3,[$rounds,#12]
235 orr $s3,$s3,$t1,lsl#8
236 orr $s3,$s3,$t2,lsl#16
237 orr $s3,$s3,$t3,lsl#24
238 #else
239 ldr $s0,[$rounds,#0]
240 ldr $s1,[$rounds,#4]
241 ldr $s2,[$rounds,#8]
242 ldr $s3,[$rounds,#12]
243 #ifdef __ARMEL__
244 rev $s0,$s0
245 rev $s1,$s1
246 rev $s2,$s2
247 rev $s3,$s3
248 #endif
249 #endif
250 bl _armv4_AES_encrypt
251
252 ldr $rounds,[sp],#4 @ pop out
253 #if __ARM_ARCH__>=7
254 #ifdef __ARMEL__
255 rev $s0,$s0
256 rev $s1,$s1
257 rev $s2,$s2
258 rev $s3,$s3
259 #endif
260 str $s0,[$rounds,#0]
261 str $s1,[$rounds,#4]
262 str $s2,[$rounds,#8]
263 str $s3,[$rounds,#12]
264 #else
265 mov $t1,$s0,lsr#24 @ write output in endian-neutral
266 mov $t2,$s0,lsr#16 @ manner...
267 mov $t3,$s0,lsr#8
268 strb $t1,[$rounds,#0]
269 strb $t2,[$rounds,#1]
270 mov $t1,$s1,lsr#24
271 strb $t3,[$rounds,#2]
272 mov $t2,$s1,lsr#16
273 strb $s0,[$rounds,#3]
274 mov $t3,$s1,lsr#8
275 strb $t1,[$rounds,#4]
276 strb $t2,[$rounds,#5]
277 mov $t1,$s2,lsr#24
278 strb $t3,[$rounds,#6]
279 mov $t2,$s2,lsr#16
280 strb $s1,[$rounds,#7]
281 mov $t3,$s2,lsr#8
282 strb $t1,[$rounds,#8]
283 strb $t2,[$rounds,#9]
284 mov $t1,$s3,lsr#24
285 strb $t3,[$rounds,#10]
286 mov $t2,$s3,lsr#16
287 strb $s2,[$rounds,#11]
288 mov $t3,$s3,lsr#8
289 strb $t1,[$rounds,#12]
290 strb $t2,[$rounds,#13]
291 strb $t3,[$rounds,#14]
292 strb $s3,[$rounds,#15]
293 #endif
294 #if __ARM_ARCH__>=5
295 ldmia sp!,{r4-r12,pc}
296 #else
297 ldmia sp!,{r4-r12,lr}
298 tst lr,#1
299 moveq pc,lr @ be binary compatible with V4, yet
300 bx lr @ interoperable with Thumb ISA:-)
301 #endif
302 .size AES_encrypt,.-AES_encrypt
303
304 .type _armv4_AES_encrypt,%function
305 .align 2
306 _armv4_AES_encrypt:
307 str lr,[sp,#-4]! @ push lr
308 ldmia $key!,{$t1-$i1}
309 eor $s0,$s0,$t1
310 ldr $rounds,[$key,#240-16]
311 eor $s1,$s1,$t2
312 eor $s2,$s2,$t3
313 eor $s3,$s3,$i1
314 sub $rounds,$rounds,#1
315 mov lr,#255
316
317 and $i1,lr,$s0
318 and $i2,lr,$s0,lsr#8
319 and $i3,lr,$s0,lsr#16
320 mov $s0,$s0,lsr#24
321 .Lenc_loop:
322 ldr $t1,[$tbl,$i1,lsl#2] @ Te3[s0>>0]
323 and $i1,lr,$s1,lsr#16 @ i0
324 ldr $t2,[$tbl,$i2,lsl#2] @ Te2[s0>>8]
325 and $i2,lr,$s1
326 ldr $t3,[$tbl,$i3,lsl#2] @ Te1[s0>>16]
327 and $i3,lr,$s1,lsr#8
328 ldr $s0,[$tbl,$s0,lsl#2] @ Te0[s0>>24]
329 mov $s1,$s1,lsr#24
330
331 ldr $i1,[$tbl,$i1,lsl#2] @ Te1[s1>>16]
332 ldr $i2,[$tbl,$i2,lsl#2] @ Te3[s1>>0]
333 ldr $i3,[$tbl,$i3,lsl#2] @ Te2[s1>>8]
334 eor $s0,$s0,$i1,ror#8
335 ldr $s1,[$tbl,$s1,lsl#2] @ Te0[s1>>24]
336 and $i1,lr,$s2,lsr#8 @ i0
337 eor $t2,$t2,$i2,ror#8
338 and $i2,lr,$s2,lsr#16 @ i1
339 eor $t3,$t3,$i3,ror#8
340 and $i3,lr,$s2
341 ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8]
342 eor $s1,$s1,$t1,ror#24
343 ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16]
344 mov $s2,$s2,lsr#24
345
346 ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0]
347 eor $s0,$s0,$i1,ror#16
348 ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24]
349 and $i1,lr,$s3 @ i0
350 eor $s1,$s1,$i2,ror#8
351 and $i2,lr,$s3,lsr#8 @ i1
352 eor $t3,$t3,$i3,ror#16
353 and $i3,lr,$s3,lsr#16 @ i2
354 ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0]
355 eor $s2,$s2,$t2,ror#16
356 ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8]
357 mov $s3,$s3,lsr#24
358
359 ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16]
360 eor $s0,$s0,$i1,ror#24
361 ldr $i1,[$key],#16
362 eor $s1,$s1,$i2,ror#16
363 ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24]
364 eor $s2,$s2,$i3,ror#8
365 ldr $t1,[$key,#-12]
366 eor $s3,$s3,$t3,ror#8
367
368 ldr $t2,[$key,#-8]
369 eor $s0,$s0,$i1
370 ldr $t3,[$key,#-4]
371 and $i1,lr,$s0
372 eor $s1,$s1,$t1
373 and $i2,lr,$s0,lsr#8
374 eor $s2,$s2,$t2
375 and $i3,lr,$s0,lsr#16
376 eor $s3,$s3,$t3
377 mov $s0,$s0,lsr#24
378
379 subs $rounds,$rounds,#1
380 bne .Lenc_loop
381
382 add $tbl,$tbl,#2
383
384 ldrb $t1,[$tbl,$i1,lsl#2] @ Te4[s0>>0]
385 and $i1,lr,$s1,lsr#16 @ i0
386 ldrb $t2,[$tbl,$i2,lsl#2] @ Te4[s0>>8]
387 and $i2,lr,$s1
388 ldrb $t3,[$tbl,$i3,lsl#2] @ Te4[s0>>16]
389 and $i3,lr,$s1,lsr#8
390 ldrb $s0,[$tbl,$s0,lsl#2] @ Te4[s0>>24]
391 mov $s1,$s1,lsr#24
392
393 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s1>>16]
394 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s1>>0]
395 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s1>>8]
396 eor $s0,$i1,$s0,lsl#8
397 ldrb $s1,[$tbl,$s1,lsl#2] @ Te4[s1>>24]
398 and $i1,lr,$s2,lsr#8 @ i0
399 eor $t2,$i2,$t2,lsl#8
400 and $i2,lr,$s2,lsr#16 @ i1
401 eor $t3,$i3,$t3,lsl#8
402 and $i3,lr,$s2
403 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8]
404 eor $s1,$t1,$s1,lsl#24
405 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16]
406 mov $s2,$s2,lsr#24
407
408 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0]
409 eor $s0,$i1,$s0,lsl#8
410 ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24]
411 and $i1,lr,$s3 @ i0
412 eor $s1,$s1,$i2,lsl#16
413 and $i2,lr,$s3,lsr#8 @ i1
414 eor $t3,$i3,$t3,lsl#8
415 and $i3,lr,$s3,lsr#16 @ i2
416 ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0]
417 eor $s2,$t2,$s2,lsl#24
418 ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8]
419 mov $s3,$s3,lsr#24
420
421 ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16]
422 eor $s0,$i1,$s0,lsl#8
423 ldr $i1,[$key,#0]
424 ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24]
425 eor $s1,$s1,$i2,lsl#8
426 ldr $t1,[$key,#4]
427 eor $s2,$s2,$i3,lsl#16
428 ldr $t2,[$key,#8]
429 eor $s3,$t3,$s3,lsl#24
430 ldr $t3,[$key,#12]
431
432 eor $s0,$s0,$i1
433 eor $s1,$s1,$t1
434 eor $s2,$s2,$t2
435 eor $s3,$s3,$t3
436
437 sub $tbl,$tbl,#2
438 ldr pc,[sp],#4 @ pop and return
439 .size _armv4_AES_encrypt,.-_armv4_AES_encrypt
440
441 .global AES_set_encrypt_key
442 .type AES_set_encrypt_key,%function
443 .align 5
444 AES_set_encrypt_key:
445 _armv4_AES_set_encrypt_key:
446 #if __ARM_ARCH__<7
447 sub r3,pc,#8 @ AES_set_encrypt_key
448 #else
449 adr r3,AES_set_encrypt_key
450 #endif
451 teq r0,#0
452 #if __ARM_ARCH__>=7
453 itt eq @ Thumb2 thing, sanity check in ARM
454 #endif
455 moveq r0,#-1
456 beq .Labrt
457 teq r2,#0
458 #if __ARM_ARCH__>=7
459 itt eq @ Thumb2 thing, sanity check in ARM
460 #endif
461 moveq r0,#-1
462 beq .Labrt
463
464 teq r1,#128
465 beq .Lok
466 teq r1,#192
467 beq .Lok
468 teq r1,#256
469 #if __ARM_ARCH__>=7
470 itt ne @ Thumb2 thing, sanity check in ARM
471 #endif
472 movne r0,#-1
473 bne .Labrt
474
475 .Lok: stmdb sp!,{r4-r12,lr}
476 mov $rounds,r0 @ inp
477 mov lr,r1 @ bits
478 mov $key,r2 @ key
479
480 #ifdef __APPLE__
481 adr $tbl,AES_Te+1024 @ Te4
482 #else
483 sub $tbl,r3,#_armv4_AES_set_encrypt_key-AES_Te-1024 @ Te4
484 #endif
485
486 #if __ARM_ARCH__<7
487 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
488 ldrb $t1,[$rounds,#2] @ manner...
489 ldrb $t2,[$rounds,#1]
490 ldrb $t3,[$rounds,#0]
491 orr $s0,$s0,$t1,lsl#8
492 ldrb $s1,[$rounds,#7]
493 orr $s0,$s0,$t2,lsl#16
494 ldrb $t1,[$rounds,#6]
495 orr $s0,$s0,$t3,lsl#24
496 ldrb $t2,[$rounds,#5]
497 ldrb $t3,[$rounds,#4]
498 orr $s1,$s1,$t1,lsl#8
499 ldrb $s2,[$rounds,#11]
500 orr $s1,$s1,$t2,lsl#16
501 ldrb $t1,[$rounds,#10]
502 orr $s1,$s1,$t3,lsl#24
503 ldrb $t2,[$rounds,#9]
504 ldrb $t3,[$rounds,#8]
505 orr $s2,$s2,$t1,lsl#8
506 ldrb $s3,[$rounds,#15]
507 orr $s2,$s2,$t2,lsl#16
508 ldrb $t1,[$rounds,#14]
509 orr $s2,$s2,$t3,lsl#24
510 ldrb $t2,[$rounds,#13]
511 ldrb $t3,[$rounds,#12]
512 orr $s3,$s3,$t1,lsl#8
513 str $s0,[$key],#16
514 orr $s3,$s3,$t2,lsl#16
515 str $s1,[$key,#-12]
516 orr $s3,$s3,$t3,lsl#24
517 str $s2,[$key,#-8]
518 str $s3,[$key,#-4]
519 #else
520 ldr $s0,[$rounds,#0]
521 ldr $s1,[$rounds,#4]
522 ldr $s2,[$rounds,#8]
523 ldr $s3,[$rounds,#12]
524 #ifdef __ARMEL__
525 rev $s0,$s0
526 rev $s1,$s1
527 rev $s2,$s2
528 rev $s3,$s3
529 #endif
530 str $s0,[$key],#16
531 str $s1,[$key,#-12]
532 str $s2,[$key,#-8]
533 str $s3,[$key,#-4]
534 #endif
535
536 teq lr,#128
537 bne .Lnot128
538 mov $rounds,#10
539 str $rounds,[$key,#240-16]
540 add $t3,$tbl,#256 @ rcon
541 mov lr,#255
542
543 .L128_loop:
544 and $t2,lr,$s3,lsr#24
545 and $i1,lr,$s3,lsr#16
546 ldrb $t2,[$tbl,$t2]
547 and $i2,lr,$s3,lsr#8
548 ldrb $i1,[$tbl,$i1]
549 and $i3,lr,$s3
550 ldrb $i2,[$tbl,$i2]
551 orr $t2,$t2,$i1,lsl#24
552 ldrb $i3,[$tbl,$i3]
553 orr $t2,$t2,$i2,lsl#16
554 ldr $t1,[$t3],#4 @ rcon[i++]
555 orr $t2,$t2,$i3,lsl#8
556 eor $t2,$t2,$t1
557 eor $s0,$s0,$t2 @ rk[4]=rk[0]^...
558 eor $s1,$s1,$s0 @ rk[5]=rk[1]^rk[4]
559 str $s0,[$key],#16
560 eor $s2,$s2,$s1 @ rk[6]=rk[2]^rk[5]
561 str $s1,[$key,#-12]
562 eor $s3,$s3,$s2 @ rk[7]=rk[3]^rk[6]
563 str $s2,[$key,#-8]
564 subs $rounds,$rounds,#1
565 str $s3,[$key,#-4]
566 bne .L128_loop
567 sub r2,$key,#176
568 b .Ldone
569
570 .Lnot128:
571 #if __ARM_ARCH__<7
572 ldrb $i2,[$rounds,#19]
573 ldrb $t1,[$rounds,#18]
574 ldrb $t2,[$rounds,#17]
575 ldrb $t3,[$rounds,#16]
576 orr $i2,$i2,$t1,lsl#8
577 ldrb $i3,[$rounds,#23]
578 orr $i2,$i2,$t2,lsl#16
579 ldrb $t1,[$rounds,#22]
580 orr $i2,$i2,$t3,lsl#24
581 ldrb $t2,[$rounds,#21]
582 ldrb $t3,[$rounds,#20]
583 orr $i3,$i3,$t1,lsl#8
584 orr $i3,$i3,$t2,lsl#16
585 str $i2,[$key],#8
586 orr $i3,$i3,$t3,lsl#24
587 str $i3,[$key,#-4]
588 #else
589 ldr $i2,[$rounds,#16]
590 ldr $i3,[$rounds,#20]
591 #ifdef __ARMEL__
592 rev $i2,$i2
593 rev $i3,$i3
594 #endif
595 str $i2,[$key],#8
596 str $i3,[$key,#-4]
597 #endif
598
599 teq lr,#192
600 bne .Lnot192
601 mov $rounds,#12
602 str $rounds,[$key,#240-24]
603 add $t3,$tbl,#256 @ rcon
604 mov lr,#255
605 mov $rounds,#8
606
607 .L192_loop:
608 and $t2,lr,$i3,lsr#24
609 and $i1,lr,$i3,lsr#16
610 ldrb $t2,[$tbl,$t2]
611 and $i2,lr,$i3,lsr#8
612 ldrb $i1,[$tbl,$i1]
613 and $i3,lr,$i3
614 ldrb $i2,[$tbl,$i2]
615 orr $t2,$t2,$i1,lsl#24
616 ldrb $i3,[$tbl,$i3]
617 orr $t2,$t2,$i2,lsl#16
618 ldr $t1,[$t3],#4 @ rcon[i++]
619 orr $t2,$t2,$i3,lsl#8
620 eor $i3,$t2,$t1
621 eor $s0,$s0,$i3 @ rk[6]=rk[0]^...
622 eor $s1,$s1,$s0 @ rk[7]=rk[1]^rk[6]
623 str $s0,[$key],#24
624 eor $s2,$s2,$s1 @ rk[8]=rk[2]^rk[7]
625 str $s1,[$key,#-20]
626 eor $s3,$s3,$s2 @ rk[9]=rk[3]^rk[8]
627 str $s2,[$key,#-16]
628 subs $rounds,$rounds,#1
629 str $s3,[$key,#-12]
630 #if __ARM_ARCH__>=7
631 itt eq @ Thumb2 thing, sanity check in ARM
632 #endif
633 subeq r2,$key,#216
634 beq .Ldone
635
636 ldr $i1,[$key,#-32]
637 ldr $i2,[$key,#-28]
638 eor $i1,$i1,$s3 @ rk[10]=rk[4]^rk[9]
639 eor $i3,$i2,$i1 @ rk[11]=rk[5]^rk[10]
640 str $i1,[$key,#-8]
641 str $i3,[$key,#-4]
642 b .L192_loop
643
644 .Lnot192:
645 #if __ARM_ARCH__<7
646 ldrb $i2,[$rounds,#27]
647 ldrb $t1,[$rounds,#26]
648 ldrb $t2,[$rounds,#25]
649 ldrb $t3,[$rounds,#24]
650 orr $i2,$i2,$t1,lsl#8
651 ldrb $i3,[$rounds,#31]
652 orr $i2,$i2,$t2,lsl#16
653 ldrb $t1,[$rounds,#30]
654 orr $i2,$i2,$t3,lsl#24
655 ldrb $t2,[$rounds,#29]
656 ldrb $t3,[$rounds,#28]
657 orr $i3,$i3,$t1,lsl#8
658 orr $i3,$i3,$t2,lsl#16
659 str $i2,[$key],#8
660 orr $i3,$i3,$t3,lsl#24
661 str $i3,[$key,#-4]
662 #else
663 ldr $i2,[$rounds,#24]
664 ldr $i3,[$rounds,#28]
665 #ifdef __ARMEL__
666 rev $i2,$i2
667 rev $i3,$i3
668 #endif
669 str $i2,[$key],#8
670 str $i3,[$key,#-4]
671 #endif
672
673 mov $rounds,#14
674 str $rounds,[$key,#240-32]
675 add $t3,$tbl,#256 @ rcon
676 mov lr,#255
677 mov $rounds,#7
678
679 .L256_loop:
680 and $t2,lr,$i3,lsr#24
681 and $i1,lr,$i3,lsr#16
682 ldrb $t2,[$tbl,$t2]
683 and $i2,lr,$i3,lsr#8
684 ldrb $i1,[$tbl,$i1]
685 and $i3,lr,$i3
686 ldrb $i2,[$tbl,$i2]
687 orr $t2,$t2,$i1,lsl#24
688 ldrb $i3,[$tbl,$i3]
689 orr $t2,$t2,$i2,lsl#16
690 ldr $t1,[$t3],#4 @ rcon[i++]
691 orr $t2,$t2,$i3,lsl#8
692 eor $i3,$t2,$t1
693 eor $s0,$s0,$i3 @ rk[8]=rk[0]^...
694 eor $s1,$s1,$s0 @ rk[9]=rk[1]^rk[8]
695 str $s0,[$key],#32
696 eor $s2,$s2,$s1 @ rk[10]=rk[2]^rk[9]
697 str $s1,[$key,#-28]
698 eor $s3,$s3,$s2 @ rk[11]=rk[3]^rk[10]
699 str $s2,[$key,#-24]
700 subs $rounds,$rounds,#1
701 str $s3,[$key,#-20]
702 #if __ARM_ARCH__>=7
703 itt eq @ Thumb2 thing, sanity check in ARM
704 #endif
705 subeq r2,$key,#256
706 beq .Ldone
707
708 and $t2,lr,$s3
709 and $i1,lr,$s3,lsr#8
710 ldrb $t2,[$tbl,$t2]
711 and $i2,lr,$s3,lsr#16
712 ldrb $i1,[$tbl,$i1]
713 and $i3,lr,$s3,lsr#24
714 ldrb $i2,[$tbl,$i2]
715 orr $t2,$t2,$i1,lsl#8
716 ldrb $i3,[$tbl,$i3]
717 orr $t2,$t2,$i2,lsl#16
718 ldr $t1,[$key,#-48]
719 orr $t2,$t2,$i3,lsl#24
720
721 ldr $i1,[$key,#-44]
722 ldr $i2,[$key,#-40]
723 eor $t1,$t1,$t2 @ rk[12]=rk[4]^...
724 ldr $i3,[$key,#-36]
725 eor $i1,$i1,$t1 @ rk[13]=rk[5]^rk[12]
726 str $t1,[$key,#-16]
727 eor $i2,$i2,$i1 @ rk[14]=rk[6]^rk[13]
728 str $i1,[$key,#-12]
729 eor $i3,$i3,$i2 @ rk[15]=rk[7]^rk[14]
730 str $i2,[$key,#-8]
731 str $i3,[$key,#-4]
732 b .L256_loop
733
734 .align 2
735 .Ldone: mov r0,#0
736 ldmia sp!,{r4-r12,lr}
737 .Labrt:
738 #if __ARM_ARCH__>=5
739 ret @ bx lr
740 #else
741 tst lr,#1
742 moveq pc,lr @ be binary compatible with V4, yet
743 bx lr @ interoperable with Thumb ISA:-)
744 #endif
745 .size AES_set_encrypt_key,.-AES_set_encrypt_key
746
747 .global AES_set_decrypt_key
748 .type AES_set_decrypt_key,%function
749 .align 5
750 AES_set_decrypt_key:
751 str lr,[sp,#-4]! @ push lr
752 bl _armv4_AES_set_encrypt_key
753 teq r0,#0
754 ldr lr,[sp],#4 @ pop lr
755 bne .Labrt
756
757 mov r0,r2 @ AES_set_encrypt_key preserves r2,
758 mov r1,r2 @ which is AES_KEY *key
759 b _armv4_AES_set_enc2dec_key
760 .size AES_set_decrypt_key,.-AES_set_decrypt_key
761
762 @ void AES_set_enc2dec_key(const AES_KEY *inp,AES_KEY *out)
763 .global AES_set_enc2dec_key
764 .type AES_set_enc2dec_key,%function
765 .align 5
766 AES_set_enc2dec_key:
767 _armv4_AES_set_enc2dec_key:
768 stmdb sp!,{r4-r12,lr}
769
770 ldr $rounds,[r0,#240]
771 mov $i1,r0 @ input
772 add $i2,r0,$rounds,lsl#4
773 mov $key,r1 @ ouput
774 add $tbl,r1,$rounds,lsl#4
775 str $rounds,[r1,#240]
776
777 .Linv: ldr $s0,[$i1],#16
778 ldr $s1,[$i1,#-12]
779 ldr $s2,[$i1,#-8]
780 ldr $s3,[$i1,#-4]
781 ldr $t1,[$i2],#-16
782 ldr $t2,[$i2,#16+4]
783 ldr $t3,[$i2,#16+8]
784 ldr $i3,[$i2,#16+12]
785 str $s0,[$tbl],#-16
786 str $s1,[$tbl,#16+4]
787 str $s2,[$tbl,#16+8]
788 str $s3,[$tbl,#16+12]
789 str $t1,[$key],#16
790 str $t2,[$key,#-12]
791 str $t3,[$key,#-8]
792 str $i3,[$key,#-4]
793 teq $i1,$i2
794 bne .Linv
795
796 ldr $s0,[$i1]
797 ldr $s1,[$i1,#4]
798 ldr $s2,[$i1,#8]
799 ldr $s3,[$i1,#12]
800 str $s0,[$key]
801 str $s1,[$key,#4]
802 str $s2,[$key,#8]
803 str $s3,[$key,#12]
804 sub $key,$key,$rounds,lsl#3
805 ___
806 $mask80=$i1;
807 $mask1b=$i2;
808 $mask7f=$i3;
809 $code.=<<___;
810 ldr $s0,[$key,#16]! @ prefetch tp1
811 mov $mask80,#0x80
812 mov $mask1b,#0x1b
813 orr $mask80,$mask80,#0x8000
814 orr $mask1b,$mask1b,#0x1b00
815 orr $mask80,$mask80,$mask80,lsl#16
816 orr $mask1b,$mask1b,$mask1b,lsl#16
817 sub $rounds,$rounds,#1
818 mvn $mask7f,$mask80
819 mov $rounds,$rounds,lsl#2 @ (rounds-1)*4
820
821 .Lmix: and $t1,$s0,$mask80
822 and $s1,$s0,$mask7f
823 sub $t1,$t1,$t1,lsr#7
824 and $t1,$t1,$mask1b
825 eor $s1,$t1,$s1,lsl#1 @ tp2
826
827 and $t1,$s1,$mask80
828 and $s2,$s1,$mask7f
829 sub $t1,$t1,$t1,lsr#7
830 and $t1,$t1,$mask1b
831 eor $s2,$t1,$s2,lsl#1 @ tp4
832
833 and $t1,$s2,$mask80
834 and $s3,$s2,$mask7f
835 sub $t1,$t1,$t1,lsr#7
836 and $t1,$t1,$mask1b
837 eor $s3,$t1,$s3,lsl#1 @ tp8
838
839 eor $t1,$s1,$s2
840 eor $t2,$s0,$s3 @ tp9
841 eor $t1,$t1,$s3 @ tpe
842 eor $t1,$t1,$s1,ror#24
843 eor $t1,$t1,$t2,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
844 eor $t1,$t1,$s2,ror#16
845 eor $t1,$t1,$t2,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
846 eor $t1,$t1,$t2,ror#8 @ ^= ROTATE(tp9,24)
847
848 ldr $s0,[$key,#4] @ prefetch tp1
849 str $t1,[$key],#4
850 subs $rounds,$rounds,#1
851 bne .Lmix
852
853 mov r0,#0
854 #if __ARM_ARCH__>=5
855 ldmia sp!,{r4-r12,pc}
856 #else
857 ldmia sp!,{r4-r12,lr}
858 tst lr,#1
859 moveq pc,lr @ be binary compatible with V4, yet
860 bx lr @ interoperable with Thumb ISA:-)
861 #endif
862 .size AES_set_enc2dec_key,.-AES_set_enc2dec_key
863
864 .type AES_Td,%object
865 .align 5
866 AES_Td:
867 .word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
868 .word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
869 .word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
870 .word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
871 .word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
872 .word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
873 .word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
874 .word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
875 .word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
876 .word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
877 .word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
878 .word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
879 .word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
880 .word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
881 .word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
882 .word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
883 .word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
884 .word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
885 .word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
886 .word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
887 .word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
888 .word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
889 .word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
890 .word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
891 .word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
892 .word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
893 .word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
894 .word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
895 .word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
896 .word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
897 .word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
898 .word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
899 .word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
900 .word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
901 .word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
902 .word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
903 .word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
904 .word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
905 .word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
906 .word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
907 .word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
908 .word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
909 .word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
910 .word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
911 .word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
912 .word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
913 .word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
914 .word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
915 .word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
916 .word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
917 .word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
918 .word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
919 .word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
920 .word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
921 .word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
922 .word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
923 .word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
924 .word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
925 .word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
926 .word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
927 .word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
928 .word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
929 .word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
930 .word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
931 @ Td4[256]
932 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
933 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
934 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
935 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
936 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
937 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
938 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
939 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
940 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
941 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
942 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
943 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
944 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
945 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
946 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
947 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
948 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
949 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
950 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
951 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
952 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
953 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
954 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
955 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
956 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
957 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
958 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
959 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
960 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
961 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
962 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
963 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
964 .size AES_Td,.-AES_Td
965
966 @ void AES_decrypt(const unsigned char *in, unsigned char *out,
967 @ const AES_KEY *key) {
968 .global AES_decrypt
969 .type AES_decrypt,%function
970 .align 5
971 AES_decrypt:
972 #if __ARM_ARCH__<7
973 sub r3,pc,#8 @ AES_decrypt
974 #else
975 adr r3,AES_decrypt
976 #endif
977 stmdb sp!,{r1,r4-r12,lr}
978 #ifdef __APPLE__
979 adr $tbl,AES_Td
980 #else
981 sub $tbl,r3,#AES_decrypt-AES_Td @ Td
982 #endif
983 mov $rounds,r0 @ inp
984 mov $key,r2
985 #if __ARM_ARCH__<7
986 ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
987 ldrb $t1,[$rounds,#2] @ manner...
988 ldrb $t2,[$rounds,#1]
989 ldrb $t3,[$rounds,#0]
990 orr $s0,$s0,$t1,lsl#8
991 ldrb $s1,[$rounds,#7]
992 orr $s0,$s0,$t2,lsl#16
993 ldrb $t1,[$rounds,#6]
994 orr $s0,$s0,$t3,lsl#24
995 ldrb $t2,[$rounds,#5]
996 ldrb $t3,[$rounds,#4]
997 orr $s1,$s1,$t1,lsl#8
998 ldrb $s2,[$rounds,#11]
999 orr $s1,$s1,$t2,lsl#16
1000 ldrb $t1,[$rounds,#10]
1001 orr $s1,$s1,$t3,lsl#24
1002 ldrb $t2,[$rounds,#9]
1003 ldrb $t3,[$rounds,#8]
1004 orr $s2,$s2,$t1,lsl#8
1005 ldrb $s3,[$rounds,#15]
1006 orr $s2,$s2,$t2,lsl#16
1007 ldrb $t1,[$rounds,#14]
1008 orr $s2,$s2,$t3,lsl#24
1009 ldrb $t2,[$rounds,#13]
1010 ldrb $t3,[$rounds,#12]
1011 orr $s3,$s3,$t1,lsl#8
1012 orr $s3,$s3,$t2,lsl#16
1013 orr $s3,$s3,$t3,lsl#24
1014 #else
1015 ldr $s0,[$rounds,#0]
1016 ldr $s1,[$rounds,#4]
1017 ldr $s2,[$rounds,#8]
1018 ldr $s3,[$rounds,#12]
1019 #ifdef __ARMEL__
1020 rev $s0,$s0
1021 rev $s1,$s1
1022 rev $s2,$s2
1023 rev $s3,$s3
1024 #endif
1025 #endif
1026 bl _armv4_AES_decrypt
1027
1028 ldr $rounds,[sp],#4 @ pop out
1029 #if __ARM_ARCH__>=7
1030 #ifdef __ARMEL__
1031 rev $s0,$s0
1032 rev $s1,$s1
1033 rev $s2,$s2
1034 rev $s3,$s3
1035 #endif
1036 str $s0,[$rounds,#0]
1037 str $s1,[$rounds,#4]
1038 str $s2,[$rounds,#8]
1039 str $s3,[$rounds,#12]
1040 #else
1041 mov $t1,$s0,lsr#24 @ write output in endian-neutral
1042 mov $t2,$s0,lsr#16 @ manner...
1043 mov $t3,$s0,lsr#8
1044 strb $t1,[$rounds,#0]
1045 strb $t2,[$rounds,#1]
1046 mov $t1,$s1,lsr#24
1047 strb $t3,[$rounds,#2]
1048 mov $t2,$s1,lsr#16
1049 strb $s0,[$rounds,#3]
1050 mov $t3,$s1,lsr#8
1051 strb $t1,[$rounds,#4]
1052 strb $t2,[$rounds,#5]
1053 mov $t1,$s2,lsr#24
1054 strb $t3,[$rounds,#6]
1055 mov $t2,$s2,lsr#16
1056 strb $s1,[$rounds,#7]
1057 mov $t3,$s2,lsr#8
1058 strb $t1,[$rounds,#8]
1059 strb $t2,[$rounds,#9]
1060 mov $t1,$s3,lsr#24
1061 strb $t3,[$rounds,#10]
1062 mov $t2,$s3,lsr#16
1063 strb $s2,[$rounds,#11]
1064 mov $t3,$s3,lsr#8
1065 strb $t1,[$rounds,#12]
1066 strb $t2,[$rounds,#13]
1067 strb $t3,[$rounds,#14]
1068 strb $s3,[$rounds,#15]
1069 #endif
1070 #if __ARM_ARCH__>=5
1071 ldmia sp!,{r4-r12,pc}
1072 #else
1073 ldmia sp!,{r4-r12,lr}
1074 tst lr,#1
1075 moveq pc,lr @ be binary compatible with V4, yet
1076 bx lr @ interoperable with Thumb ISA:-)
1077 #endif
1078 .size AES_decrypt,.-AES_decrypt
1079
1080 .type _armv4_AES_decrypt,%function
1081 .align 2
1082 _armv4_AES_decrypt:
1083 str lr,[sp,#-4]! @ push lr
1084 ldmia $key!,{$t1-$i1}
1085 eor $s0,$s0,$t1
1086 ldr $rounds,[$key,#240-16]
1087 eor $s1,$s1,$t2
1088 eor $s2,$s2,$t3
1089 eor $s3,$s3,$i1
1090 sub $rounds,$rounds,#1
1091 mov lr,#255
1092
1093 and $i1,lr,$s0,lsr#16
1094 and $i2,lr,$s0,lsr#8
1095 and $i3,lr,$s0
1096 mov $s0,$s0,lsr#24
1097 .Ldec_loop:
1098 ldr $t1,[$tbl,$i1,lsl#2] @ Td1[s0>>16]
1099 and $i1,lr,$s1 @ i0
1100 ldr $t2,[$tbl,$i2,lsl#2] @ Td2[s0>>8]
1101 and $i2,lr,$s1,lsr#16
1102 ldr $t3,[$tbl,$i3,lsl#2] @ Td3[s0>>0]
1103 and $i3,lr,$s1,lsr#8
1104 ldr $s0,[$tbl,$s0,lsl#2] @ Td0[s0>>24]
1105 mov $s1,$s1,lsr#24
1106
1107 ldr $i1,[$tbl,$i1,lsl#2] @ Td3[s1>>0]
1108 ldr $i2,[$tbl,$i2,lsl#2] @ Td1[s1>>16]
1109 ldr $i3,[$tbl,$i3,lsl#2] @ Td2[s1>>8]
1110 eor $s0,$s0,$i1,ror#24
1111 ldr $s1,[$tbl,$s1,lsl#2] @ Td0[s1>>24]
1112 and $i1,lr,$s2,lsr#8 @ i0
1113 eor $t2,$i2,$t2,ror#8
1114 and $i2,lr,$s2 @ i1
1115 eor $t3,$i3,$t3,ror#8
1116 and $i3,lr,$s2,lsr#16
1117 ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8]
1118 eor $s1,$s1,$t1,ror#8
1119 ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0]
1120 mov $s2,$s2,lsr#24
1121
1122 ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16]
1123 eor $s0,$s0,$i1,ror#16
1124 ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24]
1125 and $i1,lr,$s3,lsr#16 @ i0
1126 eor $s1,$s1,$i2,ror#24
1127 and $i2,lr,$s3,lsr#8 @ i1
1128 eor $t3,$i3,$t3,ror#8
1129 and $i3,lr,$s3 @ i2
1130 ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16]
1131 eor $s2,$s2,$t2,ror#8
1132 ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8]
1133 mov $s3,$s3,lsr#24
1134
1135 ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0]
1136 eor $s0,$s0,$i1,ror#8
1137 ldr $i1,[$key],#16
1138 eor $s1,$s1,$i2,ror#16
1139 ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24]
1140 eor $s2,$s2,$i3,ror#24
1141
1142 ldr $t1,[$key,#-12]
1143 eor $s0,$s0,$i1
1144 ldr $t2,[$key,#-8]
1145 eor $s3,$s3,$t3,ror#8
1146 ldr $t3,[$key,#-4]
1147 and $i1,lr,$s0,lsr#16
1148 eor $s1,$s1,$t1
1149 and $i2,lr,$s0,lsr#8
1150 eor $s2,$s2,$t2
1151 and $i3,lr,$s0
1152 eor $s3,$s3,$t3
1153 mov $s0,$s0,lsr#24
1154
1155 subs $rounds,$rounds,#1
1156 bne .Ldec_loop
1157
1158 add $tbl,$tbl,#1024
1159
1160 ldr $t2,[$tbl,#0] @ prefetch Td4
1161 ldr $t3,[$tbl,#32]
1162 ldr $t1,[$tbl,#64]
1163 ldr $t2,[$tbl,#96]
1164 ldr $t3,[$tbl,#128]
1165 ldr $t1,[$tbl,#160]
1166 ldr $t2,[$tbl,#192]
1167 ldr $t3,[$tbl,#224]
1168
1169 ldrb $s0,[$tbl,$s0] @ Td4[s0>>24]
1170 ldrb $t1,[$tbl,$i1] @ Td4[s0>>16]
1171 and $i1,lr,$s1 @ i0
1172 ldrb $t2,[$tbl,$i2] @ Td4[s0>>8]
1173 and $i2,lr,$s1,lsr#16
1174 ldrb $t3,[$tbl,$i3] @ Td4[s0>>0]
1175 and $i3,lr,$s1,lsr#8
1176
1177 add $s1,$tbl,$s1,lsr#24
1178 ldrb $i1,[$tbl,$i1] @ Td4[s1>>0]
1179 ldrb $s1,[$s1] @ Td4[s1>>24]
1180 ldrb $i2,[$tbl,$i2] @ Td4[s1>>16]
1181 eor $s0,$i1,$s0,lsl#24
1182 ldrb $i3,[$tbl,$i3] @ Td4[s1>>8]
1183 eor $s1,$t1,$s1,lsl#8
1184 and $i1,lr,$s2,lsr#8 @ i0
1185 eor $t2,$t2,$i2,lsl#8
1186 and $i2,lr,$s2 @ i1
1187 ldrb $i1,[$tbl,$i1] @ Td4[s2>>8]
1188 eor $t3,$t3,$i3,lsl#8
1189 ldrb $i2,[$tbl,$i2] @ Td4[s2>>0]
1190 and $i3,lr,$s2,lsr#16
1191
1192 add $s2,$tbl,$s2,lsr#24
1193 ldrb $s2,[$s2] @ Td4[s2>>24]
1194 eor $s0,$s0,$i1,lsl#8
1195 ldrb $i3,[$tbl,$i3] @ Td4[s2>>16]
1196 eor $s1,$i2,$s1,lsl#16
1197 and $i1,lr,$s3,lsr#16 @ i0
1198 eor $s2,$t2,$s2,lsl#16
1199 and $i2,lr,$s3,lsr#8 @ i1
1200 ldrb $i1,[$tbl,$i1] @ Td4[s3>>16]
1201 eor $t3,$t3,$i3,lsl#16
1202 ldrb $i2,[$tbl,$i2] @ Td4[s3>>8]
1203 and $i3,lr,$s3 @ i2
1204
1205 add $s3,$tbl,$s3,lsr#24
1206 ldrb $i3,[$tbl,$i3] @ Td4[s3>>0]
1207 ldrb $s3,[$s3] @ Td4[s3>>24]
1208 eor $s0,$s0,$i1,lsl#16
1209 ldr $i1,[$key,#0]
1210 eor $s1,$s1,$i2,lsl#8
1211 ldr $t1,[$key,#4]
1212 eor $s2,$i3,$s2,lsl#8
1213 ldr $t2,[$key,#8]
1214 eor $s3,$t3,$s3,lsl#24
1215 ldr $t3,[$key,#12]
1216
1217 eor $s0,$s0,$i1
1218 eor $s1,$s1,$t1
1219 eor $s2,$s2,$t2
1220 eor $s3,$s3,$t3
1221
1222 sub $tbl,$tbl,#1024
1223 ldr pc,[sp],#4 @ pop and return
1224 .size _armv4_AES_decrypt,.-_armv4_AES_decrypt
1225 .asciz "AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
1226 .align 2
1227 ___
1228
1229 $code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
1230 $code =~ s/\bret\b/bx\tlr/gm;
1231
1232 open SELF,$0;
1233 while(<SELF>) {
1234 next if (/^#!/);
1235 last if (!s/^#/@/ and !/^$/);
1236 print;
1237 }
1238 close SELF;
1239
1240 print $code;
1241 close STDOUT; # enforce flush