]> git.ipfire.org Git - people/ms/strongswan.git/blob - lib/libcrypto/libaes/asm/aes-i586.S
- import of strongswan-2.7.0
[people/ms/strongswan.git] / lib / libcrypto / libaes / asm / aes-i586.S
1 //
2 // Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
3 // All rights reserved.
4 //
5 // TERMS
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted subject to the following conditions:
9 //
10 // 1. Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 //
13 // 2. Redistributions in binary form must reproduce the above copyright
14 // notice, this list of conditions and the following disclaimer in the
15 // documentation and/or other materials provided with the distribution.
16 //
17 // 3. The copyright holder's name must not be used to endorse or promote
18 // any products derived from this software without his specific prior
19 // written permission.
20 //
21 // This software is provided 'as is' with no express or implied warranties
22 // of correctness or fitness for purpose.
23
24 // Modified by Jari Ruusu, December 24 2001
25 // - Converted syntax to GNU CPP/assembler syntax
26 // - C programming interface converted back to "old" API
27 // - Minor portability cleanups and speed optimizations
28
29 // An AES (Rijndael) implementation for the Pentium. This version only
30 // implements the standard AES block length (128 bits, 16 bytes). This code
31 // does not preserve the eax, ecx or edx registers or the artihmetic status
32 // flags. However, the ebx, esi, edi, and ebp registers are preserved across
33 // calls.
34
35 // void aes_set_key(aes_context *cx, const unsigned char key[], const int key_len, const int f)
36 // void aes_encrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
37 // void aes_decrypt(const aes_context *cx, const unsigned char in_blk[], unsigned char out_blk[])
38
39 #if defined(USE_UNDERLINE)
40 # define aes_set_key _aes_set_key
41 # define aes_encrypt _aes_encrypt
42 # define aes_decrypt _aes_decrypt
43 #endif
44 #if !defined(ALIGN32BYTES)
45 # define ALIGN32BYTES 32
46 #endif
47
48 .file "aes-i586.S"
49 .globl aes_set_key
50 .globl aes_encrypt
51 .globl aes_decrypt
52
53 #define tlen 1024 // length of each of 4 'xor' arrays (256 32-bit words)
54
55 // offsets to parameters with one register pushed onto stack
56
57 #define ctx 8 // AES context structure
58 #define in_blk 12 // input byte array address parameter
59 #define out_blk 16 // output byte array address parameter
60
61 // offsets in context structure
62
63 #define nkey 0 // key length, size 4
64 #define nrnd 4 // number of rounds, size 4
65 #define ekey 8 // encryption key schedule base address, size 256
66 #define dkey 264 // decryption key schedule base address, size 256
67
68 // This macro performs a forward encryption cycle. It is entered with
69 // the first previous round column values in %eax, %ebx, %esi and %edi and
70 // exits with the final values in the same registers.
71
72 #define fwd_rnd(p1,p2) \
73 mov %ebx,(%esp) ;\
74 movzbl %al,%edx ;\
75 mov %eax,%ecx ;\
76 mov p2(%ebp),%eax ;\
77 mov %edi,4(%esp) ;\
78 mov p2+12(%ebp),%edi ;\
79 xor p1(,%edx,4),%eax ;\
80 movzbl %ch,%edx ;\
81 shr $16,%ecx ;\
82 mov p2+4(%ebp),%ebx ;\
83 xor p1+tlen(,%edx,4),%edi ;\
84 movzbl %cl,%edx ;\
85 movzbl %ch,%ecx ;\
86 xor p1+3*tlen(,%ecx,4),%ebx ;\
87 mov %esi,%ecx ;\
88 mov p1+2*tlen(,%edx,4),%esi ;\
89 movzbl %cl,%edx ;\
90 xor p1(,%edx,4),%esi ;\
91 movzbl %ch,%edx ;\
92 shr $16,%ecx ;\
93 xor p1+tlen(,%edx,4),%ebx ;\
94 movzbl %cl,%edx ;\
95 movzbl %ch,%ecx ;\
96 xor p1+2*tlen(,%edx,4),%eax ;\
97 mov (%esp),%edx ;\
98 xor p1+3*tlen(,%ecx,4),%edi ;\
99 movzbl %dl,%ecx ;\
100 xor p2+8(%ebp),%esi ;\
101 xor p1(,%ecx,4),%ebx ;\
102 movzbl %dh,%ecx ;\
103 shr $16,%edx ;\
104 xor p1+tlen(,%ecx,4),%eax ;\
105 movzbl %dl,%ecx ;\
106 movzbl %dh,%edx ;\
107 xor p1+2*tlen(,%ecx,4),%edi ;\
108 mov 4(%esp),%ecx ;\
109 xor p1+3*tlen(,%edx,4),%esi ;\
110 movzbl %cl,%edx ;\
111 xor p1(,%edx,4),%edi ;\
112 movzbl %ch,%edx ;\
113 shr $16,%ecx ;\
114 xor p1+tlen(,%edx,4),%esi ;\
115 movzbl %cl,%edx ;\
116 movzbl %ch,%ecx ;\
117 xor p1+2*tlen(,%edx,4),%ebx ;\
118 xor p1+3*tlen(,%ecx,4),%eax
119
120 // This macro performs an inverse encryption cycle. It is entered with
121 // the first previous round column values in %eax, %ebx, %esi and %edi and
122 // exits with the final values in the same registers.
123
124 #define inv_rnd(p1,p2) \
125 movzbl %al,%edx ;\
126 mov %ebx,(%esp) ;\
127 mov %eax,%ecx ;\
128 mov p2(%ebp),%eax ;\
129 mov %edi,4(%esp) ;\
130 mov p2+4(%ebp),%ebx ;\
131 xor p1(,%edx,4),%eax ;\
132 movzbl %ch,%edx ;\
133 shr $16,%ecx ;\
134 mov p2+12(%ebp),%edi ;\
135 xor p1+tlen(,%edx,4),%ebx ;\
136 movzbl %cl,%edx ;\
137 movzbl %ch,%ecx ;\
138 xor p1+3*tlen(,%ecx,4),%edi ;\
139 mov %esi,%ecx ;\
140 mov p1+2*tlen(,%edx,4),%esi ;\
141 movzbl %cl,%edx ;\
142 xor p1(,%edx,4),%esi ;\
143 movzbl %ch,%edx ;\
144 shr $16,%ecx ;\
145 xor p1+tlen(,%edx,4),%edi ;\
146 movzbl %cl,%edx ;\
147 movzbl %ch,%ecx ;\
148 xor p1+2*tlen(,%edx,4),%eax ;\
149 mov (%esp),%edx ;\
150 xor p1+3*tlen(,%ecx,4),%ebx ;\
151 movzbl %dl,%ecx ;\
152 xor p2+8(%ebp),%esi ;\
153 xor p1(,%ecx,4),%ebx ;\
154 movzbl %dh,%ecx ;\
155 shr $16,%edx ;\
156 xor p1+tlen(,%ecx,4),%esi ;\
157 movzbl %dl,%ecx ;\
158 movzbl %dh,%edx ;\
159 xor p1+2*tlen(,%ecx,4),%edi ;\
160 mov 4(%esp),%ecx ;\
161 xor p1+3*tlen(,%edx,4),%eax ;\
162 movzbl %cl,%edx ;\
163 xor p1(,%edx,4),%edi ;\
164 movzbl %ch,%edx ;\
165 shr $16,%ecx ;\
166 xor p1+tlen(,%edx,4),%eax ;\
167 movzbl %cl,%edx ;\
168 movzbl %ch,%ecx ;\
169 xor p1+2*tlen(,%edx,4),%ebx ;\
170 xor p1+3*tlen(,%ecx,4),%esi
171
172 // AES (Rijndael) Encryption Subroutine
173
174 .text
175 .align ALIGN32BYTES
176 aes_encrypt:
177 push %ebp
178 mov ctx(%esp),%ebp // pointer to context
179 mov in_blk(%esp),%ecx
180 push %ebx
181 push %esi
182 push %edi
183 mov nrnd(%ebp),%edx // number of rounds
184 lea ekey+16(%ebp),%ebp // key pointer
185
186 // input four columns and xor in first round key
187
188 mov (%ecx),%eax
189 mov 4(%ecx),%ebx
190 mov 8(%ecx),%esi
191 mov 12(%ecx),%edi
192 xor -16(%ebp),%eax
193 xor -12(%ebp),%ebx
194 xor -8(%ebp),%esi
195 xor -4(%ebp),%edi
196
197 sub $8,%esp // space for register saves on stack
198
199 sub $10,%edx
200 je aes_15
201 add $32,%ebp
202 sub $2,%edx
203 je aes_13
204 add $32,%ebp
205
206 fwd_rnd(aes_ft_tab,-64) // 14 rounds for 256-bit key
207 fwd_rnd(aes_ft_tab,-48)
208 aes_13: fwd_rnd(aes_ft_tab,-32) // 12 rounds for 192-bit key
209 fwd_rnd(aes_ft_tab,-16)
210 aes_15: fwd_rnd(aes_ft_tab,0) // 10 rounds for 128-bit key
211 fwd_rnd(aes_ft_tab,16)
212 fwd_rnd(aes_ft_tab,32)
213 fwd_rnd(aes_ft_tab,48)
214 fwd_rnd(aes_ft_tab,64)
215 fwd_rnd(aes_ft_tab,80)
216 fwd_rnd(aes_ft_tab,96)
217 fwd_rnd(aes_ft_tab,112)
218 fwd_rnd(aes_ft_tab,128)
219 fwd_rnd(aes_fl_tab,144) // last round uses a different table
220
221 // move final values to the output array.
222
223 mov out_blk+20(%esp),%ebp
224 add $8,%esp
225 mov %eax,(%ebp)
226 mov %ebx,4(%ebp)
227 mov %esi,8(%ebp)
228 mov %edi,12(%ebp)
229 pop %edi
230 pop %esi
231 pop %ebx
232 pop %ebp
233 ret
234
235
236 // AES (Rijndael) Decryption Subroutine
237
238 .align ALIGN32BYTES
239 aes_decrypt:
240 push %ebp
241 mov ctx(%esp),%ebp // pointer to context
242 mov in_blk(%esp),%ecx
243 push %ebx
244 push %esi
245 push %edi
246 mov nrnd(%ebp),%edx // number of rounds
247 lea dkey+16(%ebp),%ebp // key pointer
248
249 // input four columns and xor in first round key
250
251 mov (%ecx),%eax
252 mov 4(%ecx),%ebx
253 mov 8(%ecx),%esi
254 mov 12(%ecx),%edi
255 xor -16(%ebp),%eax
256 xor -12(%ebp),%ebx
257 xor -8(%ebp),%esi
258 xor -4(%ebp),%edi
259
260 sub $8,%esp // space for register saves on stack
261
262 sub $10,%edx
263 je aes_25
264 add $32,%ebp
265 sub $2,%edx
266 je aes_23
267 add $32,%ebp
268
269 inv_rnd(aes_it_tab,-64) // 14 rounds for 256-bit key
270 inv_rnd(aes_it_tab,-48)
271 aes_23: inv_rnd(aes_it_tab,-32) // 12 rounds for 192-bit key
272 inv_rnd(aes_it_tab,-16)
273 aes_25: inv_rnd(aes_it_tab,0) // 10 rounds for 128-bit key
274 inv_rnd(aes_it_tab,16)
275 inv_rnd(aes_it_tab,32)
276 inv_rnd(aes_it_tab,48)
277 inv_rnd(aes_it_tab,64)
278 inv_rnd(aes_it_tab,80)
279 inv_rnd(aes_it_tab,96)
280 inv_rnd(aes_it_tab,112)
281 inv_rnd(aes_it_tab,128)
282 inv_rnd(aes_il_tab,144) // last round uses a different table
283
284 // move final values to the output array.
285
286 mov out_blk+20(%esp),%ebp
287 add $8,%esp
288 mov %eax,(%ebp)
289 mov %ebx,4(%ebp)
290 mov %esi,8(%ebp)
291 mov %edi,12(%ebp)
292 pop %edi
293 pop %esi
294 pop %ebx
295 pop %ebp
296 ret
297
298 // AES (Rijndael) Key Schedule Subroutine
299
300 // input/output parameters
301
302 #define aes_cx 12 // AES context
303 #define in_key 16 // key input array address
304 #define key_ln 20 // key length, bytes (16,24,32) or bits (128,192,256)
305 #define ed_flg 24 // 0=create both encr/decr keys, 1=create encr key only
306
307 // offsets for locals
308
309 #define cnt -4
310 #define kpf -8
311 #define slen 8
312
313 // This macro performs a column mixing operation on an input 32-bit
314 // word to give a 32-bit result. It uses each of the 4 bytes in the
315 // the input column to index 4 different tables of 256 32-bit words
316 // that are xored together to form the output value.
317
318 #define mix_col(p1) \
319 movzbl %bl,%ecx ;\
320 mov p1(,%ecx,4),%eax ;\
321 movzbl %bh,%ecx ;\
322 ror $16,%ebx ;\
323 xor p1+tlen(,%ecx,4),%eax ;\
324 movzbl %bl,%ecx ;\
325 xor p1+2*tlen(,%ecx,4),%eax ;\
326 movzbl %bh,%ecx ;\
327 xor p1+3*tlen(,%ecx,4),%eax
328
329 // Key Schedule Macros
330
331 #define ksc4(p1) \
332 rol $24,%ebx ;\
333 mix_col(aes_fl_tab) ;\
334 ror $8,%ebx ;\
335 xor 4*p1+aes_rcon_tab,%eax ;\
336 xor %eax,%esi ;\
337 xor %esi,%ebp ;\
338 mov %esi,16*p1(%edi) ;\
339 mov %ebp,16*p1+4(%edi) ;\
340 xor %ebp,%edx ;\
341 xor %edx,%ebx ;\
342 mov %edx,16*p1+8(%edi) ;\
343 mov %ebx,16*p1+12(%edi)
344
345 #define ksc6(p1) \
346 rol $24,%ebx ;\
347 mix_col(aes_fl_tab) ;\
348 ror $8,%ebx ;\
349 xor 4*p1+aes_rcon_tab,%eax ;\
350 xor 24*p1-24(%edi),%eax ;\
351 mov %eax,24*p1(%edi) ;\
352 xor 24*p1-20(%edi),%eax ;\
353 mov %eax,24*p1+4(%edi) ;\
354 xor %eax,%esi ;\
355 xor %esi,%ebp ;\
356 mov %esi,24*p1+8(%edi) ;\
357 mov %ebp,24*p1+12(%edi) ;\
358 xor %ebp,%edx ;\
359 xor %edx,%ebx ;\
360 mov %edx,24*p1+16(%edi) ;\
361 mov %ebx,24*p1+20(%edi)
362
363 #define ksc8(p1) \
364 rol $24,%ebx ;\
365 mix_col(aes_fl_tab) ;\
366 ror $8,%ebx ;\
367 xor 4*p1+aes_rcon_tab,%eax ;\
368 xor 32*p1-32(%edi),%eax ;\
369 mov %eax,32*p1(%edi) ;\
370 xor 32*p1-28(%edi),%eax ;\
371 mov %eax,32*p1+4(%edi) ;\
372 xor 32*p1-24(%edi),%eax ;\
373 mov %eax,32*p1+8(%edi) ;\
374 xor 32*p1-20(%edi),%eax ;\
375 mov %eax,32*p1+12(%edi) ;\
376 push %ebx ;\
377 mov %eax,%ebx ;\
378 mix_col(aes_fl_tab) ;\
379 pop %ebx ;\
380 xor %eax,%esi ;\
381 xor %esi,%ebp ;\
382 mov %esi,32*p1+16(%edi) ;\
383 mov %ebp,32*p1+20(%edi) ;\
384 xor %ebp,%edx ;\
385 xor %edx,%ebx ;\
386 mov %edx,32*p1+24(%edi) ;\
387 mov %ebx,32*p1+28(%edi)
388
389 .align ALIGN32BYTES
390 aes_set_key:
391 pushfl
392 push %ebp
393 mov %esp,%ebp
394 sub $slen,%esp
395 push %ebx
396 push %esi
397 push %edi
398
399 mov aes_cx(%ebp),%edx // edx -> AES context
400
401 mov key_ln(%ebp),%ecx // key length
402 cmpl $128,%ecx
403 jb aes_30
404 shr $3,%ecx
405 aes_30: cmpl $32,%ecx
406 je aes_32
407 cmpl $24,%ecx
408 je aes_32
409 mov $16,%ecx
410 aes_32: shr $2,%ecx
411 mov %ecx,nkey(%edx)
412
413 lea 6(%ecx),%eax // 10/12/14 for 4/6/8 32-bit key length
414 mov %eax,nrnd(%edx)
415
416 mov in_key(%ebp),%esi // key input array
417 lea ekey(%edx),%edi // key position in AES context
418 cld
419 push %ebp
420 mov %ecx,%eax // save key length in eax
421 rep ; movsl // words in the key schedule
422 mov -4(%esi),%ebx // put some values in registers
423 mov -8(%esi),%edx // to allow faster code
424 mov -12(%esi),%ebp
425 mov -16(%esi),%esi
426
427 cmpl $4,%eax // jump on key size
428 je aes_36
429 cmpl $6,%eax
430 je aes_35
431
432 ksc8(0)
433 ksc8(1)
434 ksc8(2)
435 ksc8(3)
436 ksc8(4)
437 ksc8(5)
438 ksc8(6)
439 jmp aes_37
440 aes_35: ksc6(0)
441 ksc6(1)
442 ksc6(2)
443 ksc6(3)
444 ksc6(4)
445 ksc6(5)
446 ksc6(6)
447 ksc6(7)
448 jmp aes_37
449 aes_36: ksc4(0)
450 ksc4(1)
451 ksc4(2)
452 ksc4(3)
453 ksc4(4)
454 ksc4(5)
455 ksc4(6)
456 ksc4(7)
457 ksc4(8)
458 ksc4(9)
459 aes_37: pop %ebp
460 mov aes_cx(%ebp),%edx // edx -> AES context
461 cmpl $0,ed_flg(%ebp)
462 jne aes_39
463
464 // compile decryption key schedule from encryption schedule - reverse
465 // order and do mix_column operation on round keys except first and last
466
467 mov nrnd(%edx),%eax // kt = cx->d_key + nc * cx->Nrnd
468 shl $2,%eax
469 lea dkey(%edx,%eax,4),%edi
470 lea ekey(%edx),%esi // kf = cx->e_key
471
472 movsl // copy first round key (unmodified)
473 movsl
474 movsl
475 movsl
476 sub $32,%edi
477 movl $1,cnt(%ebp)
478 aes_38: // do mix column on each column of
479 lodsl // each round key
480 mov %eax,%ebx
481 mix_col(aes_im_tab)
482 stosl
483 lodsl
484 mov %eax,%ebx
485 mix_col(aes_im_tab)
486 stosl
487 lodsl
488 mov %eax,%ebx
489 mix_col(aes_im_tab)
490 stosl
491 lodsl
492 mov %eax,%ebx
493 mix_col(aes_im_tab)
494 stosl
495 sub $32,%edi
496
497 incl cnt(%ebp)
498 mov cnt(%ebp),%eax
499 cmp nrnd(%edx),%eax
500 jb aes_38
501
502 movsl // copy last round key (unmodified)
503 movsl
504 movsl
505 movsl
506 aes_39: pop %edi
507 pop %esi
508 pop %ebx
509 mov %ebp,%esp
510 pop %ebp
511 popfl
512 ret
513
514
515 // finite field multiplies by {02}, {04} and {08}
516
517 #define f2(x) ((x<<1)^(((x>>7)&1)*0x11b))
518 #define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b))
519 #define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b))
520
521 // finite field multiplies required in table generation
522
523 #define f3(x) (f2(x) ^ x)
524 #define f9(x) (f8(x) ^ x)
525 #define fb(x) (f8(x) ^ f2(x) ^ x)
526 #define fd(x) (f8(x) ^ f4(x) ^ x)
527 #define fe(x) (f8(x) ^ f4(x) ^ f2(x))
528
529 // These defines generate the forward table entries
530
531 #define u0(x) ((f3(x) << 24) | (x << 16) | (x << 8) | f2(x))
532 #define u1(x) ((x << 24) | (x << 16) | (f2(x) << 8) | f3(x))
533 #define u2(x) ((x << 24) | (f2(x) << 16) | (f3(x) << 8) | x)
534 #define u3(x) ((f2(x) << 24) | (f3(x) << 16) | (x << 8) | x)
535
536 // These defines generate the inverse table entries
537
538 #define v0(x) ((fb(x) << 24) | (fd(x) << 16) | (f9(x) << 8) | fe(x))
539 #define v1(x) ((fd(x) << 24) | (f9(x) << 16) | (fe(x) << 8) | fb(x))
540 #define v2(x) ((f9(x) << 24) | (fe(x) << 16) | (fb(x) << 8) | fd(x))
541 #define v3(x) ((fe(x) << 24) | (fb(x) << 16) | (fd(x) << 8) | f9(x))
542
543 // These defines generate entries for the last round tables
544
545 #define w0(x) (x)
546 #define w1(x) (x << 8)
547 #define w2(x) (x << 16)
548 #define w3(x) (x << 24)
549
550 // macro to generate inverse mix column tables (needed for the key schedule)
551
552 #define im_data0(p1) \
553 .long p1(0x00),p1(0x01),p1(0x02),p1(0x03),p1(0x04),p1(0x05),p1(0x06),p1(0x07) ;\
554 .long p1(0x08),p1(0x09),p1(0x0a),p1(0x0b),p1(0x0c),p1(0x0d),p1(0x0e),p1(0x0f) ;\
555 .long p1(0x10),p1(0x11),p1(0x12),p1(0x13),p1(0x14),p1(0x15),p1(0x16),p1(0x17) ;\
556 .long p1(0x18),p1(0x19),p1(0x1a),p1(0x1b),p1(0x1c),p1(0x1d),p1(0x1e),p1(0x1f)
557 #define im_data1(p1) \
558 .long p1(0x20),p1(0x21),p1(0x22),p1(0x23),p1(0x24),p1(0x25),p1(0x26),p1(0x27) ;\
559 .long p1(0x28),p1(0x29),p1(0x2a),p1(0x2b),p1(0x2c),p1(0x2d),p1(0x2e),p1(0x2f) ;\
560 .long p1(0x30),p1(0x31),p1(0x32),p1(0x33),p1(0x34),p1(0x35),p1(0x36),p1(0x37) ;\
561 .long p1(0x38),p1(0x39),p1(0x3a),p1(0x3b),p1(0x3c),p1(0x3d),p1(0x3e),p1(0x3f)
562 #define im_data2(p1) \
563 .long p1(0x40),p1(0x41),p1(0x42),p1(0x43),p1(0x44),p1(0x45),p1(0x46),p1(0x47) ;\
564 .long p1(0x48),p1(0x49),p1(0x4a),p1(0x4b),p1(0x4c),p1(0x4d),p1(0x4e),p1(0x4f) ;\
565 .long p1(0x50),p1(0x51),p1(0x52),p1(0x53),p1(0x54),p1(0x55),p1(0x56),p1(0x57) ;\
566 .long p1(0x58),p1(0x59),p1(0x5a),p1(0x5b),p1(0x5c),p1(0x5d),p1(0x5e),p1(0x5f)
567 #define im_data3(p1) \
568 .long p1(0x60),p1(0x61),p1(0x62),p1(0x63),p1(0x64),p1(0x65),p1(0x66),p1(0x67) ;\
569 .long p1(0x68),p1(0x69),p1(0x6a),p1(0x6b),p1(0x6c),p1(0x6d),p1(0x6e),p1(0x6f) ;\
570 .long p1(0x70),p1(0x71),p1(0x72),p1(0x73),p1(0x74),p1(0x75),p1(0x76),p1(0x77) ;\
571 .long p1(0x78),p1(0x79),p1(0x7a),p1(0x7b),p1(0x7c),p1(0x7d),p1(0x7e),p1(0x7f)
572 #define im_data4(p1) \
573 .long p1(0x80),p1(0x81),p1(0x82),p1(0x83),p1(0x84),p1(0x85),p1(0x86),p1(0x87) ;\
574 .long p1(0x88),p1(0x89),p1(0x8a),p1(0x8b),p1(0x8c),p1(0x8d),p1(0x8e),p1(0x8f) ;\
575 .long p1(0x90),p1(0x91),p1(0x92),p1(0x93),p1(0x94),p1(0x95),p1(0x96),p1(0x97) ;\
576 .long p1(0x98),p1(0x99),p1(0x9a),p1(0x9b),p1(0x9c),p1(0x9d),p1(0x9e),p1(0x9f)
577 #define im_data5(p1) \
578 .long p1(0xa0),p1(0xa1),p1(0xa2),p1(0xa3),p1(0xa4),p1(0xa5),p1(0xa6),p1(0xa7) ;\
579 .long p1(0xa8),p1(0xa9),p1(0xaa),p1(0xab),p1(0xac),p1(0xad),p1(0xae),p1(0xaf) ;\
580 .long p1(0xb0),p1(0xb1),p1(0xb2),p1(0xb3),p1(0xb4),p1(0xb5),p1(0xb6),p1(0xb7) ;\
581 .long p1(0xb8),p1(0xb9),p1(0xba),p1(0xbb),p1(0xbc),p1(0xbd),p1(0xbe),p1(0xbf)
582 #define im_data6(p1) \
583 .long p1(0xc0),p1(0xc1),p1(0xc2),p1(0xc3),p1(0xc4),p1(0xc5),p1(0xc6),p1(0xc7) ;\
584 .long p1(0xc8),p1(0xc9),p1(0xca),p1(0xcb),p1(0xcc),p1(0xcd),p1(0xce),p1(0xcf) ;\
585 .long p1(0xd0),p1(0xd1),p1(0xd2),p1(0xd3),p1(0xd4),p1(0xd5),p1(0xd6),p1(0xd7) ;\
586 .long p1(0xd8),p1(0xd9),p1(0xda),p1(0xdb),p1(0xdc),p1(0xdd),p1(0xde),p1(0xdf)
587 #define im_data7(p1) \
588 .long p1(0xe0),p1(0xe1),p1(0xe2),p1(0xe3),p1(0xe4),p1(0xe5),p1(0xe6),p1(0xe7) ;\
589 .long p1(0xe8),p1(0xe9),p1(0xea),p1(0xeb),p1(0xec),p1(0xed),p1(0xee),p1(0xef) ;\
590 .long p1(0xf0),p1(0xf1),p1(0xf2),p1(0xf3),p1(0xf4),p1(0xf5),p1(0xf6),p1(0xf7) ;\
591 .long p1(0xf8),p1(0xf9),p1(0xfa),p1(0xfb),p1(0xfc),p1(0xfd),p1(0xfe),p1(0xff)
592
593 // S-box data - 256 entries
594
595 #define sb_data0(p1) \
596 .long p1(0x63),p1(0x7c),p1(0x77),p1(0x7b),p1(0xf2),p1(0x6b),p1(0x6f),p1(0xc5) ;\
597 .long p1(0x30),p1(0x01),p1(0x67),p1(0x2b),p1(0xfe),p1(0xd7),p1(0xab),p1(0x76) ;\
598 .long p1(0xca),p1(0x82),p1(0xc9),p1(0x7d),p1(0xfa),p1(0x59),p1(0x47),p1(0xf0) ;\
599 .long p1(0xad),p1(0xd4),p1(0xa2),p1(0xaf),p1(0x9c),p1(0xa4),p1(0x72),p1(0xc0)
600 #define sb_data1(p1) \
601 .long p1(0xb7),p1(0xfd),p1(0x93),p1(0x26),p1(0x36),p1(0x3f),p1(0xf7),p1(0xcc) ;\
602 .long p1(0x34),p1(0xa5),p1(0xe5),p1(0xf1),p1(0x71),p1(0xd8),p1(0x31),p1(0x15) ;\
603 .long p1(0x04),p1(0xc7),p1(0x23),p1(0xc3),p1(0x18),p1(0x96),p1(0x05),p1(0x9a) ;\
604 .long p1(0x07),p1(0x12),p1(0x80),p1(0xe2),p1(0xeb),p1(0x27),p1(0xb2),p1(0x75)
605 #define sb_data2(p1) \
606 .long p1(0x09),p1(0x83),p1(0x2c),p1(0x1a),p1(0x1b),p1(0x6e),p1(0x5a),p1(0xa0) ;\
607 .long p1(0x52),p1(0x3b),p1(0xd6),p1(0xb3),p1(0x29),p1(0xe3),p1(0x2f),p1(0x84) ;\
608 .long p1(0x53),p1(0xd1),p1(0x00),p1(0xed),p1(0x20),p1(0xfc),p1(0xb1),p1(0x5b) ;\
609 .long p1(0x6a),p1(0xcb),p1(0xbe),p1(0x39),p1(0x4a),p1(0x4c),p1(0x58),p1(0xcf)
610 #define sb_data3(p1) \
611 .long p1(0xd0),p1(0xef),p1(0xaa),p1(0xfb),p1(0x43),p1(0x4d),p1(0x33),p1(0x85) ;\
612 .long p1(0x45),p1(0xf9),p1(0x02),p1(0x7f),p1(0x50),p1(0x3c),p1(0x9f),p1(0xa8) ;\
613 .long p1(0x51),p1(0xa3),p1(0x40),p1(0x8f),p1(0x92),p1(0x9d),p1(0x38),p1(0xf5) ;\
614 .long p1(0xbc),p1(0xb6),p1(0xda),p1(0x21),p1(0x10),p1(0xff),p1(0xf3),p1(0xd2)
615 #define sb_data4(p1) \
616 .long p1(0xcd),p1(0x0c),p1(0x13),p1(0xec),p1(0x5f),p1(0x97),p1(0x44),p1(0x17) ;\
617 .long p1(0xc4),p1(0xa7),p1(0x7e),p1(0x3d),p1(0x64),p1(0x5d),p1(0x19),p1(0x73) ;\
618 .long p1(0x60),p1(0x81),p1(0x4f),p1(0xdc),p1(0x22),p1(0x2a),p1(0x90),p1(0x88) ;\
619 .long p1(0x46),p1(0xee),p1(0xb8),p1(0x14),p1(0xde),p1(0x5e),p1(0x0b),p1(0xdb)
620 #define sb_data5(p1) \
621 .long p1(0xe0),p1(0x32),p1(0x3a),p1(0x0a),p1(0x49),p1(0x06),p1(0x24),p1(0x5c) ;\
622 .long p1(0xc2),p1(0xd3),p1(0xac),p1(0x62),p1(0x91),p1(0x95),p1(0xe4),p1(0x79) ;\
623 .long p1(0xe7),p1(0xc8),p1(0x37),p1(0x6d),p1(0x8d),p1(0xd5),p1(0x4e),p1(0xa9) ;\
624 .long p1(0x6c),p1(0x56),p1(0xf4),p1(0xea),p1(0x65),p1(0x7a),p1(0xae),p1(0x08)
625 #define sb_data6(p1) \
626 .long p1(0xba),p1(0x78),p1(0x25),p1(0x2e),p1(0x1c),p1(0xa6),p1(0xb4),p1(0xc6) ;\
627 .long p1(0xe8),p1(0xdd),p1(0x74),p1(0x1f),p1(0x4b),p1(0xbd),p1(0x8b),p1(0x8a) ;\
628 .long p1(0x70),p1(0x3e),p1(0xb5),p1(0x66),p1(0x48),p1(0x03),p1(0xf6),p1(0x0e) ;\
629 .long p1(0x61),p1(0x35),p1(0x57),p1(0xb9),p1(0x86),p1(0xc1),p1(0x1d),p1(0x9e)
630 #define sb_data7(p1) \
631 .long p1(0xe1),p1(0xf8),p1(0x98),p1(0x11),p1(0x69),p1(0xd9),p1(0x8e),p1(0x94) ;\
632 .long p1(0x9b),p1(0x1e),p1(0x87),p1(0xe9),p1(0xce),p1(0x55),p1(0x28),p1(0xdf) ;\
633 .long p1(0x8c),p1(0xa1),p1(0x89),p1(0x0d),p1(0xbf),p1(0xe6),p1(0x42),p1(0x68) ;\
634 .long p1(0x41),p1(0x99),p1(0x2d),p1(0x0f),p1(0xb0),p1(0x54),p1(0xbb),p1(0x16)
635
636 // Inverse S-box data - 256 entries
637
638 #define ib_data0(p1) \
639 .long p1(0x52),p1(0x09),p1(0x6a),p1(0xd5),p1(0x30),p1(0x36),p1(0xa5),p1(0x38) ;\
640 .long p1(0xbf),p1(0x40),p1(0xa3),p1(0x9e),p1(0x81),p1(0xf3),p1(0xd7),p1(0xfb) ;\
641 .long p1(0x7c),p1(0xe3),p1(0x39),p1(0x82),p1(0x9b),p1(0x2f),p1(0xff),p1(0x87) ;\
642 .long p1(0x34),p1(0x8e),p1(0x43),p1(0x44),p1(0xc4),p1(0xde),p1(0xe9),p1(0xcb)
643 #define ib_data1(p1) \
644 .long p1(0x54),p1(0x7b),p1(0x94),p1(0x32),p1(0xa6),p1(0xc2),p1(0x23),p1(0x3d) ;\
645 .long p1(0xee),p1(0x4c),p1(0x95),p1(0x0b),p1(0x42),p1(0xfa),p1(0xc3),p1(0x4e) ;\
646 .long p1(0x08),p1(0x2e),p1(0xa1),p1(0x66),p1(0x28),p1(0xd9),p1(0x24),p1(0xb2) ;\
647 .long p1(0x76),p1(0x5b),p1(0xa2),p1(0x49),p1(0x6d),p1(0x8b),p1(0xd1),p1(0x25)
648 #define ib_data2(p1) \
649 .long p1(0x72),p1(0xf8),p1(0xf6),p1(0x64),p1(0x86),p1(0x68),p1(0x98),p1(0x16) ;\
650 .long p1(0xd4),p1(0xa4),p1(0x5c),p1(0xcc),p1(0x5d),p1(0x65),p1(0xb6),p1(0x92) ;\
651 .long p1(0x6c),p1(0x70),p1(0x48),p1(0x50),p1(0xfd),p1(0xed),p1(0xb9),p1(0xda) ;\
652 .long p1(0x5e),p1(0x15),p1(0x46),p1(0x57),p1(0xa7),p1(0x8d),p1(0x9d),p1(0x84)
653 #define ib_data3(p1) \
654 .long p1(0x90),p1(0xd8),p1(0xab),p1(0x00),p1(0x8c),p1(0xbc),p1(0xd3),p1(0x0a) ;\
655 .long p1(0xf7),p1(0xe4),p1(0x58),p1(0x05),p1(0xb8),p1(0xb3),p1(0x45),p1(0x06) ;\
656 .long p1(0xd0),p1(0x2c),p1(0x1e),p1(0x8f),p1(0xca),p1(0x3f),p1(0x0f),p1(0x02) ;\
657 .long p1(0xc1),p1(0xaf),p1(0xbd),p1(0x03),p1(0x01),p1(0x13),p1(0x8a),p1(0x6b)
658 #define ib_data4(p1) \
659 .long p1(0x3a),p1(0x91),p1(0x11),p1(0x41),p1(0x4f),p1(0x67),p1(0xdc),p1(0xea) ;\
660 .long p1(0x97),p1(0xf2),p1(0xcf),p1(0xce),p1(0xf0),p1(0xb4),p1(0xe6),p1(0x73) ;\
661 .long p1(0x96),p1(0xac),p1(0x74),p1(0x22),p1(0xe7),p1(0xad),p1(0x35),p1(0x85) ;\
662 .long p1(0xe2),p1(0xf9),p1(0x37),p1(0xe8),p1(0x1c),p1(0x75),p1(0xdf),p1(0x6e)
663 #define ib_data5(p1) \
664 .long p1(0x47),p1(0xf1),p1(0x1a),p1(0x71),p1(0x1d),p1(0x29),p1(0xc5),p1(0x89) ;\
665 .long p1(0x6f),p1(0xb7),p1(0x62),p1(0x0e),p1(0xaa),p1(0x18),p1(0xbe),p1(0x1b) ;\
666 .long p1(0xfc),p1(0x56),p1(0x3e),p1(0x4b),p1(0xc6),p1(0xd2),p1(0x79),p1(0x20) ;\
667 .long p1(0x9a),p1(0xdb),p1(0xc0),p1(0xfe),p1(0x78),p1(0xcd),p1(0x5a),p1(0xf4)
668 #define ib_data6(p1) \
669 .long p1(0x1f),p1(0xdd),p1(0xa8),p1(0x33),p1(0x88),p1(0x07),p1(0xc7),p1(0x31) ;\
670 .long p1(0xb1),p1(0x12),p1(0x10),p1(0x59),p1(0x27),p1(0x80),p1(0xec),p1(0x5f) ;\
671 .long p1(0x60),p1(0x51),p1(0x7f),p1(0xa9),p1(0x19),p1(0xb5),p1(0x4a),p1(0x0d) ;\
672 .long p1(0x2d),p1(0xe5),p1(0x7a),p1(0x9f),p1(0x93),p1(0xc9),p1(0x9c),p1(0xef)
673 #define ib_data7(p1) \
674 .long p1(0xa0),p1(0xe0),p1(0x3b),p1(0x4d),p1(0xae),p1(0x2a),p1(0xf5),p1(0xb0) ;\
675 .long p1(0xc8),p1(0xeb),p1(0xbb),p1(0x3c),p1(0x83),p1(0x53),p1(0x99),p1(0x61) ;\
676 .long p1(0x17),p1(0x2b),p1(0x04),p1(0x7e),p1(0xba),p1(0x77),p1(0xd6),p1(0x26) ;\
677 .long p1(0xe1),p1(0x69),p1(0x14),p1(0x63),p1(0x55),p1(0x21),p1(0x0c),p1(0x7d)
678
679 // The rcon_table (needed for the key schedule)
680 //
681 // Here is original Dr Brian Gladman's source code:
682 // _rcon_tab:
683 // %assign x 1
684 // %rep 29
685 // dd x
686 // %assign x f2(x)
687 // %endrep
688 //
689 // Here is precomputed output (it's more portable this way):
690
691 .align ALIGN32BYTES
692 aes_rcon_tab:
693 .long 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80
694 .long 0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f
695 .long 0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4
696 .long 0xb3,0x7d,0xfa,0xef,0xc5
697
698 // The forward xor tables
699
700 .align ALIGN32BYTES
701 aes_ft_tab:
702 sb_data0(u0)
703 sb_data1(u0)
704 sb_data2(u0)
705 sb_data3(u0)
706 sb_data4(u0)
707 sb_data5(u0)
708 sb_data6(u0)
709 sb_data7(u0)
710
711 sb_data0(u1)
712 sb_data1(u1)
713 sb_data2(u1)
714 sb_data3(u1)
715 sb_data4(u1)
716 sb_data5(u1)
717 sb_data6(u1)
718 sb_data7(u1)
719
720 sb_data0(u2)
721 sb_data1(u2)
722 sb_data2(u2)
723 sb_data3(u2)
724 sb_data4(u2)
725 sb_data5(u2)
726 sb_data6(u2)
727 sb_data7(u2)
728
729 sb_data0(u3)
730 sb_data1(u3)
731 sb_data2(u3)
732 sb_data3(u3)
733 sb_data4(u3)
734 sb_data5(u3)
735 sb_data6(u3)
736 sb_data7(u3)
737
738 .align ALIGN32BYTES
739 aes_fl_tab:
740 sb_data0(w0)
741 sb_data1(w0)
742 sb_data2(w0)
743 sb_data3(w0)
744 sb_data4(w0)
745 sb_data5(w0)
746 sb_data6(w0)
747 sb_data7(w0)
748
749 sb_data0(w1)
750 sb_data1(w1)
751 sb_data2(w1)
752 sb_data3(w1)
753 sb_data4(w1)
754 sb_data5(w1)
755 sb_data6(w1)
756 sb_data7(w1)
757
758 sb_data0(w2)
759 sb_data1(w2)
760 sb_data2(w2)
761 sb_data3(w2)
762 sb_data4(w2)
763 sb_data5(w2)
764 sb_data6(w2)
765 sb_data7(w2)
766
767 sb_data0(w3)
768 sb_data1(w3)
769 sb_data2(w3)
770 sb_data3(w3)
771 sb_data4(w3)
772 sb_data5(w3)
773 sb_data6(w3)
774 sb_data7(w3)
775
776 // The inverse xor tables
777
778 .align ALIGN32BYTES
779 aes_it_tab:
780 ib_data0(v0)
781 ib_data1(v0)
782 ib_data2(v0)
783 ib_data3(v0)
784 ib_data4(v0)
785 ib_data5(v0)
786 ib_data6(v0)
787 ib_data7(v0)
788
789 ib_data0(v1)
790 ib_data1(v1)
791 ib_data2(v1)
792 ib_data3(v1)
793 ib_data4(v1)
794 ib_data5(v1)
795 ib_data6(v1)
796 ib_data7(v1)
797
798 ib_data0(v2)
799 ib_data1(v2)
800 ib_data2(v2)
801 ib_data3(v2)
802 ib_data4(v2)
803 ib_data5(v2)
804 ib_data6(v2)
805 ib_data7(v2)
806
807 ib_data0(v3)
808 ib_data1(v3)
809 ib_data2(v3)
810 ib_data3(v3)
811 ib_data4(v3)
812 ib_data5(v3)
813 ib_data6(v3)
814 ib_data7(v3)
815
816 .align ALIGN32BYTES
817 aes_il_tab:
818 ib_data0(w0)
819 ib_data1(w0)
820 ib_data2(w0)
821 ib_data3(w0)
822 ib_data4(w0)
823 ib_data5(w0)
824 ib_data6(w0)
825 ib_data7(w0)
826
827 ib_data0(w1)
828 ib_data1(w1)
829 ib_data2(w1)
830 ib_data3(w1)
831 ib_data4(w1)
832 ib_data5(w1)
833 ib_data6(w1)
834 ib_data7(w1)
835
836 ib_data0(w2)
837 ib_data1(w2)
838 ib_data2(w2)
839 ib_data3(w2)
840 ib_data4(w2)
841 ib_data5(w2)
842 ib_data6(w2)
843 ib_data7(w2)
844
845 ib_data0(w3)
846 ib_data1(w3)
847 ib_data2(w3)
848 ib_data3(w3)
849 ib_data4(w3)
850 ib_data5(w3)
851 ib_data6(w3)
852 ib_data7(w3)
853
854 // The inverse mix column tables
855
856 .align ALIGN32BYTES
857 aes_im_tab:
858 im_data0(v0)
859 im_data1(v0)
860 im_data2(v0)
861 im_data3(v0)
862 im_data4(v0)
863 im_data5(v0)
864 im_data6(v0)
865 im_data7(v0)
866
867 im_data0(v1)
868 im_data1(v1)
869 im_data2(v1)
870 im_data3(v1)
871 im_data4(v1)
872 im_data5(v1)
873 im_data6(v1)
874 im_data7(v1)
875
876 im_data0(v2)
877 im_data1(v2)
878 im_data2(v2)
879 im_data3(v2)
880 im_data4(v2)
881 im_data5(v2)
882 im_data6(v2)
883 im_data7(v2)
884
885 im_data0(v3)
886 im_data1(v3)
887 im_data2(v3)
888 im_data3(v3)
889 im_data4(v3)
890 im_data5(v3)
891 im_data6(v3)
892 im_data7(v3)