]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/aes/aes_x86core.c
Experimental symbol renaming to avoid clashes with regular OpenSSL.
[thirdparty/openssl.git] / crypto / aes / aes_x86core.c
1 /* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
2 /**
3 * rijndael-alg-fst.c
4 *
5 * @version 3.0 (December 2000)
6 *
7 * Optimised ANSI C code for the Rijndael cipher (now AES)
8 *
9 * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10 * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11 * @author Paulo Barreto <paulo.barreto@terra.com.br>
12 *
13 * This code is hereby placed in the public domain.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * This is experimental x86[_64] derivative. It assumes little-endian
30 * byte order and expects CPU to sustain unaligned memory references.
31 * It is used as playground for cache-time attack mitigations and
32 * serves as reference C implementation for x86[_64] assembler.
33 *
34 * <appro@fy.chalmers.se>
35 */
36
37
38 #ifndef AES_DEBUG
39 # ifndef NDEBUG
40 # define NDEBUG
41 # endif
42 #endif
43 #include <assert.h>
44
45 #include <stdlib.h>
46 #include <crypto/aes.h>
47 #include <openssl/aes.h>
48 #include "aes_locl.h"
49
50 /*
51 * These two parameters control which table, 256-byte or 2KB, is
52 * referenced in outer and respectively inner rounds.
53 */
54 #define AES_COMPACT_IN_OUTER_ROUNDS
55 #ifdef AES_COMPACT_IN_OUTER_ROUNDS
56 /* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
57 * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
58 * by factor of ~2. */
59 # undef AES_COMPACT_IN_INNER_ROUNDS
60 #endif
61
62 #if 1
63 static void prefetch256(const void *table)
64 {
65 volatile unsigned long *t=(void *)table,ret;
66 unsigned long sum;
67 int i;
68
69 /* 32 is common least cache-line size */
70 for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i];
71
72 ret = sum;
73 }
74 #else
75 # define prefetch256(t)
76 #endif
77
78 #undef GETU32
79 #define GETU32(p) (*((u32*)(p)))
80
81 #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
82 typedef unsigned __int64 u64;
83 #define U64(C) C##UI64
84 #elif defined(__arch64__)
85 typedef unsigned long u64;
86 #define U64(C) C##UL
87 #else
88 typedef unsigned long long u64;
89 #define U64(C) C##ULL
90 #endif
91
92 #undef ROTATE
93 #if defined(_MSC_VER) || defined(__ICC)
94 # define ROTATE(a,n) _lrotl(a,n)
95 #elif defined(__GNUC__) && __GNUC__>=2
96 # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
97 # define ROTATE(a,n) ({ register unsigned int ret; \
98 asm ( \
99 "roll %1,%0" \
100 : "=r"(ret) \
101 : "I"(n), "0"(a) \
102 : "cc"); \
103 ret; \
104 })
105 # endif
106 #endif
107 /*
108 Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
109 Te0[x] = S [x].[02, 01, 01, 03];
110 Te1[x] = S [x].[03, 02, 01, 01];
111 Te2[x] = S [x].[01, 03, 02, 01];
112 Te3[x] = S [x].[01, 01, 03, 02];
113 */
114 #define Te0 (u32)((u64*)((u8*)Te+0))
115 #define Te1 (u32)((u64*)((u8*)Te+3))
116 #define Te2 (u32)((u64*)((u8*)Te+2))
117 #define Te3 (u32)((u64*)((u8*)Te+1))
118 /*
119 Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
120 Td0[x] = Si[x].[0e, 09, 0d, 0b];
121 Td1[x] = Si[x].[0b, 0e, 09, 0d];
122 Td2[x] = Si[x].[0d, 0b, 0e, 09];
123 Td3[x] = Si[x].[09, 0d, 0b, 0e];
124 Td4[x] = Si[x].[01];
125 */
126 #define Td0 (u32)((u64*)((u8*)Td+0))
127 #define Td1 (u32)((u64*)((u8*)Td+3))
128 #define Td2 (u32)((u64*)((u8*)Td+2))
129 #define Td3 (u32)((u64*)((u8*)Td+1))
130
131 static const u64 Te[256] = {
132 U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
133 U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
134 U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
135 U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
136 U64(0x5030306050303060), U64(0x0301010203010102),
137 U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
138 U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
139 U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
140 U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
141 U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
142 U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
143 U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
144 U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
145 U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
146 U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
147 U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
148 U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
149 U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
150 U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
151 U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
152 U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
153 U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
154 U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
155 U64(0x5331316253313162), U64(0x3f15152a3f15152a),
156 U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
157 U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
158 U64(0x2818183028181830), U64(0xa1969637a1969637),
159 U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
160 U64(0x0907070e0907070e), U64(0x3612122436121224),
161 U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
162 U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
163 U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
164 U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
165 U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
166 U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
167 U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
168 U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
169 U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
170 U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
171 U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
172 U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
173 U64(0x0000000000000000), U64(0x2cededc12cededc1),
174 U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
175 U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
176 U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
177 U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
178 U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
179 U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
180 U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
181 U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
182 U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
183 U64(0x5533336655333366), U64(0x9485851194858511),
184 U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
185 U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
186 U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
187 U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
188 U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
189 U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
190 U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
191 U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
192 U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
193 U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
194 U64(0x3010102030101020), U64(0x1affffe51affffe5),
195 U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
196 U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
197 U64(0x3513132635131326), U64(0x2fececc32fececc3),
198 U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
199 U64(0xcc444488cc444488), U64(0x3917172e3917172e),
200 U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
201 U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
202 U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
203 U64(0x2b1919322b191932), U64(0x957373e6957373e6),
204 U64(0xa06060c0a06060c0), U64(0x9881811998818119),
205 U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
206 U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
207 U64(0xab90903bab90903b), U64(0x8388880b8388880b),
208 U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
209 U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
210 U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
211 U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
212 U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
213 U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
214 U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
215 U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
216 U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
217 U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
218 U64(0xa8919139a8919139), U64(0xa4959531a4959531),
219 U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
220 U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
221 U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
222 U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
223 U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
224 U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
225 U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
226 U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
227 U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
228 U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
229 U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
230 U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
231 U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
232 U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
233 U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
234 U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
235 U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
236 U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
237 U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
238 U64(0xd8484890d8484890), U64(0x0503030605030306),
239 U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
240 U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
241 U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
242 U64(0x9186861791868617), U64(0x58c1c19958c1c199),
243 U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
244 U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
245 U64(0xb398982bb398982b), U64(0x3311112233111122),
246 U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
247 U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
248 U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
249 U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
250 U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
251 U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
252 U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
253 U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
254 U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
255 U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
256 U64(0xc3414182c3414182), U64(0xb0999929b0999929),
257 U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
258 U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
259 U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
260 };
261
262 static const u8 Te4[256] = {
263 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
264 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
265 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
266 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
267 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
268 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
269 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
270 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
271 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
272 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
273 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
274 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
275 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
276 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
277 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
278 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
279 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
280 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
281 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
282 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
283 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
284 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
285 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
286 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
287 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
288 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
289 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
290 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
291 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
292 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
293 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
294 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
295 };
296
297 static const u64 Td[256] = {
298 U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
299 U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
300 U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
301 U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
302 U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
303 U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
304 U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
305 U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
306 U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
307 U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
308 U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
309 U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
310 U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
311 U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
312 U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
313 U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
314 U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
315 U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
316 U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
317 U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
318 U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
319 U64(0x6033519760335197), U64(0x457f5362457f5362),
320 U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
321 U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
322 U64(0x5868487058684870), U64(0x19fd458f19fd458f),
323 U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
324 U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
325 U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
326 U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
327 U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
328 U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
329 U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
330 U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
331 U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
332 U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
333 U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
334 U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
335 U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
336 U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
337 U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
338 U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
339 U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
340 U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
341 U64(0x6fd406046fd40604), U64(0xff155060ff155060),
342 U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
343 U64(0xcc434089cc434089), U64(0x779ed967779ed967),
344 U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
345 U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
346 U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
347 U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
348 U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
349 U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
350 U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
351 U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
352 U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
353 U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
354 U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
355 U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
356 U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
357 U64(0x694b775a694b775a), U64(0x161a121c161a121c),
358 U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
359 U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
360 U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
361 U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
362 U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
363 U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
364 U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
365 U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
366 U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
367 U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
368 U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
369 U64(0x4022971340229713), U64(0x2011c6842011c684),
370 U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
371 U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
372 U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
373 U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
374 U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
375 U64(0xfa489411fa489411), U64(0x2264e9472264e947),
376 U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
377 U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
378 U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
379 U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
380 U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
381 U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
382 U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
383 U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
384 U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
385 U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
386 U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
387 U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
388 U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
389 U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
390 U64(0x097826cd097826cd), U64(0xf418596ef418596e),
391 U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
392 U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
393 U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
394 U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
395 U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
396 U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
397 U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
398 U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
399 U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
400 U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
401 U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
402 U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
403 U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
404 U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
405 U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
406 U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
407 U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
408 U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
409 U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
410 U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
411 U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
412 U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
413 U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
414 U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
415 U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
416 U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
417 U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
418 U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
419 U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
420 U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
421 U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
422 U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
423 U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
424 U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
425 U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
426 };
427 static const u8 Td4[256] = {
428 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
429 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
430 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
431 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
432 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
433 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
434 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
435 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
436 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
437 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
438 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
439 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
440 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
441 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
442 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
443 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
444 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
445 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
446 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
447 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
448 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
449 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
450 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
451 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
452 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
453 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
454 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
455 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
456 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
457 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
458 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
459 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
460 };
461
462 static const u32 rcon[] = {
463 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
464 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
465 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
466 };
467
468 /**
469 * Expand the cipher key into the encryption key schedule.
470 */
471 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
472 AES_KEY *key) {
473
474 u32 *rk;
475 int i = 0;
476 u32 temp;
477
478 if (!userKey || !key)
479 return -1;
480 if (bits != 128 && bits != 192 && bits != 256)
481 return -2;
482
483 rk = key->rd_key;
484
485 if (bits==128)
486 key->rounds = 10;
487 else if (bits==192)
488 key->rounds = 12;
489 else
490 key->rounds = 14;
491
492 rk[0] = GETU32(userKey );
493 rk[1] = GETU32(userKey + 4);
494 rk[2] = GETU32(userKey + 8);
495 rk[3] = GETU32(userKey + 12);
496 if (bits == 128) {
497 while (1) {
498 temp = rk[3];
499 rk[4] = rk[0] ^
500 (Te4[(temp >> 8) & 0xff] ) ^
501 (Te4[(temp >> 16) & 0xff] << 8) ^
502 (Te4[(temp >> 24) ] << 16) ^
503 (Te4[(temp ) & 0xff] << 24) ^
504 rcon[i];
505 rk[5] = rk[1] ^ rk[4];
506 rk[6] = rk[2] ^ rk[5];
507 rk[7] = rk[3] ^ rk[6];
508 if (++i == 10) {
509 return 0;
510 }
511 rk += 4;
512 }
513 }
514 rk[4] = GETU32(userKey + 16);
515 rk[5] = GETU32(userKey + 20);
516 if (bits == 192) {
517 while (1) {
518 temp = rk[ 5];
519 rk[ 6] = rk[ 0] ^
520 (Te4[(temp >> 8) & 0xff] ) ^
521 (Te4[(temp >> 16) & 0xff] << 8) ^
522 (Te4[(temp >> 24) ] << 16) ^
523 (Te4[(temp ) & 0xff] << 24) ^
524 rcon[i];
525 rk[ 7] = rk[ 1] ^ rk[ 6];
526 rk[ 8] = rk[ 2] ^ rk[ 7];
527 rk[ 9] = rk[ 3] ^ rk[ 8];
528 if (++i == 8) {
529 return 0;
530 }
531 rk[10] = rk[ 4] ^ rk[ 9];
532 rk[11] = rk[ 5] ^ rk[10];
533 rk += 6;
534 }
535 }
536 rk[6] = GETU32(userKey + 24);
537 rk[7] = GETU32(userKey + 28);
538 if (bits == 256) {
539 while (1) {
540 temp = rk[ 7];
541 rk[ 8] = rk[ 0] ^
542 (Te4[(temp >> 8) & 0xff] ) ^
543 (Te4[(temp >> 16) & 0xff] << 8) ^
544 (Te4[(temp >> 24) ] << 16) ^
545 (Te4[(temp ) & 0xff] << 24) ^
546 rcon[i];
547 rk[ 9] = rk[ 1] ^ rk[ 8];
548 rk[10] = rk[ 2] ^ rk[ 9];
549 rk[11] = rk[ 3] ^ rk[10];
550 if (++i == 7) {
551 return 0;
552 }
553 temp = rk[11];
554 rk[12] = rk[ 4] ^
555 (Te4[(temp ) & 0xff] ) ^
556 (Te4[(temp >> 8) & 0xff] << 8) ^
557 (Te4[(temp >> 16) & 0xff] << 16) ^
558 (Te4[(temp >> 24) ] << 24);
559 rk[13] = rk[ 5] ^ rk[12];
560 rk[14] = rk[ 6] ^ rk[13];
561 rk[15] = rk[ 7] ^ rk[14];
562
563 rk += 8;
564 }
565 }
566 return 0;
567 }
568
569 /**
570 * Expand the cipher key into the decryption key schedule.
571 */
572 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
573 AES_KEY *key) {
574
575 u32 *rk;
576 int i, j, status;
577 u32 temp;
578
579 /* first, start with an encryption schedule */
580 status = AES_set_encrypt_key(userKey, bits, key);
581 if (status < 0)
582 return status;
583
584 rk = key->rd_key;
585
586 /* invert the order of the round keys: */
587 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
588 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
589 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
590 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
591 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
592 }
593 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
594 for (i = 1; i < (key->rounds); i++) {
595 rk += 4;
596 #if 1
597 for (j = 0; j < 4; j++) {
598 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
599
600 tp1 = rk[j];
601 m = tp1 & 0x80808080;
602 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
603 ((m - (m >> 7)) & 0x1b1b1b1b);
604 m = tp2 & 0x80808080;
605 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
606 ((m - (m >> 7)) & 0x1b1b1b1b);
607 m = tp4 & 0x80808080;
608 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
609 ((m - (m >> 7)) & 0x1b1b1b1b);
610 tp9 = tp8 ^ tp1;
611 tpb = tp9 ^ tp2;
612 tpd = tp9 ^ tp4;
613 tpe = tp8 ^ tp4 ^ tp2;
614 #if defined(ROTATE)
615 rk[j] = tpe ^ ROTATE(tpd,16) ^
616 ROTATE(tp9,8) ^ ROTATE(tpb,24);
617 #else
618 rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
619 (tp9 >> 24) ^ (tp9 << 8) ^
620 (tpb >> 8) ^ (tpb << 24);
621 #endif
622 }
623 #else
624 rk[0] =
625 Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^
626 Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^
627 Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
628 Td3[Te2[(rk[0] >> 24) ] & 0xff];
629 rk[1] =
630 Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^
631 Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^
632 Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
633 Td3[Te2[(rk[1] >> 24) ] & 0xff];
634 rk[2] =
635 Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^
636 Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^
637 Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
638 Td3[Te2[(rk[2] >> 24) ] & 0xff];
639 rk[3] =
640 Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^
641 Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^
642 Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
643 Td3[Te2[(rk[3] >> 24) ] & 0xff];
644 #endif
645 }
646 return 0;
647 }
648
649 /*
650 * Encrypt a single block
651 * in and out can overlap
652 */
653 void AES_encrypt(const unsigned char *in, unsigned char *out,
654 const AES_KEY *key) {
655
656 const u32 *rk;
657 u32 s0, s1, s2, s3, t[4];
658 int r;
659
660 assert(in && out && key);
661 rk = key->rd_key;
662
663 /*
664 * map byte array block to cipher state
665 * and add initial round key:
666 */
667 s0 = GETU32(in ) ^ rk[0];
668 s1 = GETU32(in + 4) ^ rk[1];
669 s2 = GETU32(in + 8) ^ rk[2];
670 s3 = GETU32(in + 12) ^ rk[3];
671
672 #if defined(AES_COMPACT_IN_OUTER_ROUNDS)
673 prefetch256(Te4);
674
675 t[0] = Te4[(s0 ) & 0xff] ^
676 Te4[(s1 >> 8) & 0xff] << 8 ^
677 Te4[(s2 >> 16) & 0xff] << 16 ^
678 Te4[(s3 >> 24) ] << 24;
679 t[1] = Te4[(s1 ) & 0xff] ^
680 Te4[(s2 >> 8) & 0xff] << 8 ^
681 Te4[(s3 >> 16) & 0xff] << 16 ^
682 Te4[(s0 >> 24) ] << 24;
683 t[2] = Te4[(s2 ) & 0xff] ^
684 Te4[(s3 >> 8) & 0xff] << 8 ^
685 Te4[(s0 >> 16) & 0xff] << 16 ^
686 Te4[(s1 >> 24) ] << 24;
687 t[3] = Te4[(s3 ) & 0xff] ^
688 Te4[(s0 >> 8) & 0xff] << 8 ^
689 Te4[(s1 >> 16) & 0xff] << 16 ^
690 Te4[(s2 >> 24) ] << 24;
691
692 /* now do the linear transform using words */
693 { int i;
694 u32 r0, r1, r2;
695
696 for (i = 0; i < 4; i++) {
697 r0 = t[i];
698 r1 = r0 & 0x80808080;
699 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
700 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
701 #if defined(ROTATE)
702 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
703 ROTATE(r0,16) ^ ROTATE(r0,8);
704 #else
705 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
706 (r0 << 16) ^ (r0 >> 16) ^
707 (r0 << 8) ^ (r0 >> 24);
708 #endif
709 t[i] ^= rk[4+i];
710 }
711 }
712 #else
713 t[0] = Te0[(s0 ) & 0xff] ^
714 Te1[(s1 >> 8) & 0xff] ^
715 Te2[(s2 >> 16) & 0xff] ^
716 Te3[(s3 >> 24) ] ^
717 rk[4];
718 t[1] = Te0[(s1 ) & 0xff] ^
719 Te1[(s2 >> 8) & 0xff] ^
720 Te2[(s3 >> 16) & 0xff] ^
721 Te3[(s0 >> 24) ] ^
722 rk[5];
723 t[2] = Te0[(s2 ) & 0xff] ^
724 Te1[(s3 >> 8) & 0xff] ^
725 Te2[(s0 >> 16) & 0xff] ^
726 Te3[(s1 >> 24) ] ^
727 rk[6];
728 t[3] = Te0[(s3 ) & 0xff] ^
729 Te1[(s0 >> 8) & 0xff] ^
730 Te2[(s1 >> 16) & 0xff] ^
731 Te3[(s2 >> 24) ] ^
732 rk[7];
733 #endif
734 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
735
736 /*
737 * Nr - 2 full rounds:
738 */
739 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
740 #if defined(AES_COMPACT_IN_INNER_ROUNDS)
741 t[0] = Te4[(s0 ) & 0xff] ^
742 Te4[(s1 >> 8) & 0xff] << 8 ^
743 Te4[(s2 >> 16) & 0xff] << 16 ^
744 Te4[(s3 >> 24) ] << 24;
745 t[1] = Te4[(s1 ) & 0xff] ^
746 Te4[(s2 >> 8) & 0xff] << 8 ^
747 Te4[(s3 >> 16) & 0xff] << 16 ^
748 Te4[(s0 >> 24) ] << 24;
749 t[2] = Te4[(s2 ) & 0xff] ^
750 Te4[(s3 >> 8) & 0xff] << 8 ^
751 Te4[(s0 >> 16) & 0xff] << 16 ^
752 Te4[(s1 >> 24) ] << 24;
753 t[3] = Te4[(s3 ) & 0xff] ^
754 Te4[(s0 >> 8) & 0xff] << 8 ^
755 Te4[(s1 >> 16) & 0xff] << 16 ^
756 Te4[(s2 >> 24) ] << 24;
757
758 /* now do the linear transform using words */
759 { int i;
760 u32 r0, r1, r2;
761
762 for (i = 0; i < 4; i++) {
763 r0 = t[i];
764 r1 = r0 & 0x80808080;
765 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
766 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
767 #if defined(ROTATE)
768 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
769 ROTATE(r0,16) ^ ROTATE(r0,8);
770 #else
771 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
772 (r0 << 16) ^ (r0 >> 16) ^
773 (r0 << 8) ^ (r0 >> 24);
774 #endif
775 t[i] ^= rk[i];
776 }
777 }
778 #else
779 t[0] = Te0[(s0 ) & 0xff] ^
780 Te1[(s1 >> 8) & 0xff] ^
781 Te2[(s2 >> 16) & 0xff] ^
782 Te3[(s3 >> 24) ] ^
783 rk[0];
784 t[1] = Te0[(s1 ) & 0xff] ^
785 Te1[(s2 >> 8) & 0xff] ^
786 Te2[(s3 >> 16) & 0xff] ^
787 Te3[(s0 >> 24) ] ^
788 rk[1];
789 t[2] = Te0[(s2 ) & 0xff] ^
790 Te1[(s3 >> 8) & 0xff] ^
791 Te2[(s0 >> 16) & 0xff] ^
792 Te3[(s1 >> 24) ] ^
793 rk[2];
794 t[3] = Te0[(s3 ) & 0xff] ^
795 Te1[(s0 >> 8) & 0xff] ^
796 Te2[(s1 >> 16) & 0xff] ^
797 Te3[(s2 >> 24) ] ^
798 rk[3];
799 #endif
800 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
801 }
802 /*
803 * apply last round and
804 * map cipher state to byte array block:
805 */
806 #if defined(AES_COMPACT_IN_OUTER_ROUNDS)
807 prefetch256(Te4);
808
809 *(u32*)(out+0) =
810 Te4[(s0 ) & 0xff] ^
811 Te4[(s1 >> 8) & 0xff] << 8 ^
812 Te4[(s2 >> 16) & 0xff] << 16 ^
813 Te4[(s3 >> 24) ] << 24 ^
814 rk[0];
815 *(u32*)(out+4) =
816 Te4[(s1 ) & 0xff] ^
817 Te4[(s2 >> 8) & 0xff] << 8 ^
818 Te4[(s3 >> 16) & 0xff] << 16 ^
819 Te4[(s0 >> 24) ] << 24 ^
820 rk[1];
821 *(u32*)(out+8) =
822 Te4[(s2 ) & 0xff] ^
823 Te4[(s3 >> 8) & 0xff] << 8 ^
824 Te4[(s0 >> 16) & 0xff] << 16 ^
825 Te4[(s1 >> 24) ] << 24 ^
826 rk[2];
827 *(u32*)(out+12) =
828 Te4[(s3 ) & 0xff] ^
829 Te4[(s0 >> 8) & 0xff] << 8 ^
830 Te4[(s1 >> 16) & 0xff] << 16 ^
831 Te4[(s2 >> 24) ] << 24 ^
832 rk[3];
833 #else
834 *(u32*)(out+0) =
835 (Te2[(s0 ) & 0xff] & 0x000000ffU) ^
836 (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^
837 (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
838 (Te1[(s3 >> 24) ] & 0xff000000U) ^
839 rk[0];
840 *(u32*)(out+4) =
841 (Te2[(s1 ) & 0xff] & 0x000000ffU) ^
842 (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^
843 (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
844 (Te1[(s0 >> 24) ] & 0xff000000U) ^
845 rk[1];
846 *(u32*)(out+8) =
847 (Te2[(s2 ) & 0xff] & 0x000000ffU) ^
848 (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^
849 (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
850 (Te1[(s1 >> 24) ] & 0xff000000U) ^
851 rk[2];
852 *(u32*)(out+12) =
853 (Te2[(s3 ) & 0xff] & 0x000000ffU) ^
854 (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^
855 (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
856 (Te1[(s2 >> 24) ] & 0xff000000U) ^
857 rk[3];
858 #endif
859 }
860
861 /*
862 * Decrypt a single block
863 * in and out can overlap
864 */
865 void AES_decrypt(const unsigned char *in, unsigned char *out,
866 const AES_KEY *key) {
867
868 const u32 *rk;
869 u32 s0, s1, s2, s3, t[4];
870 int r;
871
872 assert(in && out && key);
873 rk = key->rd_key;
874
875 /*
876 * map byte array block to cipher state
877 * and add initial round key:
878 */
879 s0 = GETU32(in ) ^ rk[0];
880 s1 = GETU32(in + 4) ^ rk[1];
881 s2 = GETU32(in + 8) ^ rk[2];
882 s3 = GETU32(in + 12) ^ rk[3];
883
884 #if defined(AES_COMPACT_IN_OUTER_ROUNDS)
885 prefetch256(Td4);
886
887 t[0] = Td4[(s0 ) & 0xff] ^
888 Td4[(s3 >> 8) & 0xff] << 8 ^
889 Td4[(s2 >> 16) & 0xff] << 16 ^
890 Td4[(s1 >> 24) ] << 24;
891 t[1] = Td4[(s1 ) & 0xff] ^
892 Td4[(s0 >> 8) & 0xff] << 8 ^
893 Td4[(s3 >> 16) & 0xff] << 16 ^
894 Td4[(s2 >> 24) ] << 24;
895 t[2] = Td4[(s2 ) & 0xff] ^
896 Td4[(s1 >> 8) & 0xff] << 8 ^
897 Td4[(s0 >> 16) & 0xff] << 16 ^
898 Td4[(s3 >> 24) ] << 24;
899 t[3] = Td4[(s3 ) & 0xff] ^
900 Td4[(s2 >> 8) & 0xff] << 8 ^
901 Td4[(s1 >> 16) & 0xff] << 16 ^
902 Td4[(s0 >> 24) ] << 24;
903
904 /* now do the linear transform using words */
905 { int i;
906 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
907
908 for (i = 0; i < 4; i++) {
909 tp1 = t[i];
910 m = tp1 & 0x80808080;
911 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
912 ((m - (m >> 7)) & 0x1b1b1b1b);
913 m = tp2 & 0x80808080;
914 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
915 ((m - (m >> 7)) & 0x1b1b1b1b);
916 m = tp4 & 0x80808080;
917 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
918 ((m - (m >> 7)) & 0x1b1b1b1b);
919 tp9 = tp8 ^ tp1;
920 tpb = tp9 ^ tp2;
921 tpd = tp9 ^ tp4;
922 tpe = tp8 ^ tp4 ^ tp2;
923 #if defined(ROTATE)
924 t[i] = tpe ^ ROTATE(tpd,16) ^
925 ROTATE(tp9,8) ^ ROTATE(tpb,24);
926 #else
927 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
928 (tp9 >> 24) ^ (tp9 << 8) ^
929 (tpb >> 8) ^ (tpb << 24);
930 #endif
931 t[i] ^= rk[4+i];
932 }
933 }
934 #else
935 t[0] = Td0[(s0 ) & 0xff] ^
936 Td1[(s3 >> 8) & 0xff] ^
937 Td2[(s2 >> 16) & 0xff] ^
938 Td3[(s1 >> 24) ] ^
939 rk[4];
940 t[1] = Td0[(s1 ) & 0xff] ^
941 Td1[(s0 >> 8) & 0xff] ^
942 Td2[(s3 >> 16) & 0xff] ^
943 Td3[(s2 >> 24) ] ^
944 rk[5];
945 t[2] = Td0[(s2 ) & 0xff] ^
946 Td1[(s1 >> 8) & 0xff] ^
947 Td2[(s0 >> 16) & 0xff] ^
948 Td3[(s3 >> 24) ] ^
949 rk[6];
950 t[3] = Td0[(s3 ) & 0xff] ^
951 Td1[(s2 >> 8) & 0xff] ^
952 Td2[(s1 >> 16) & 0xff] ^
953 Td3[(s0 >> 24) ] ^
954 rk[7];
955 #endif
956 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
957
958 /*
959 * Nr - 2 full rounds:
960 */
961 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
962 #if defined(AES_COMPACT_IN_INNER_ROUNDS)
963 t[0] = Td4[(s0 ) & 0xff] ^
964 Td4[(s3 >> 8) & 0xff] << 8 ^
965 Td4[(s2 >> 16) & 0xff] << 16 ^
966 Td4[(s1 >> 24) ] << 24;
967 t[1] = Td4[(s1 ) & 0xff] ^
968 Td4[(s0 >> 8) & 0xff] << 8 ^
969 Td4[(s3 >> 16) & 0xff] << 16 ^
970 Td4[(s2 >> 24) ] << 24;
971 t[2] = Td4[(s2 ) & 0xff] ^
972 Td4[(s1 >> 8) & 0xff] << 8 ^
973 Td4[(s0 >> 16) & 0xff] << 16 ^
974 Td4[(s3 >> 24) ] << 24;
975 t[3] = Td4[(s3 ) & 0xff] ^
976 Td4[(s2 >> 8) & 0xff] << 8 ^
977 Td4[(s1 >> 16) & 0xff] << 16 ^
978 Td4[(s0 >> 24) ] << 24;
979
980 /* now do the linear transform using words */
981 { int i;
982 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
983
984 for (i = 0; i < 4; i++) {
985 tp1 = t[i];
986 m = tp1 & 0x80808080;
987 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
988 ((m - (m >> 7)) & 0x1b1b1b1b);
989 m = tp2 & 0x80808080;
990 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
991 ((m - (m >> 7)) & 0x1b1b1b1b);
992 m = tp4 & 0x80808080;
993 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
994 ((m - (m >> 7)) & 0x1b1b1b1b);
995 tp9 = tp8 ^ tp1;
996 tpb = tp9 ^ tp2;
997 tpd = tp9 ^ tp4;
998 tpe = tp8 ^ tp4 ^ tp2;
999 #if defined(ROTATE)
1000 t[i] = tpe ^ ROTATE(tpd,16) ^
1001 ROTATE(tp9,8) ^ ROTATE(tpb,24);
1002 #else
1003 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1004 (tp9 >> 24) ^ (tp9 << 8) ^
1005 (tpb >> 8) ^ (tpb << 24);
1006 #endif
1007 t[i] ^= rk[i];
1008 }
1009 }
1010 #else
1011 t[0] = Td0[(s0 ) & 0xff] ^
1012 Td1[(s3 >> 8) & 0xff] ^
1013 Td2[(s2 >> 16) & 0xff] ^
1014 Td3[(s1 >> 24) ] ^
1015 rk[0];
1016 t[1] = Td0[(s1 ) & 0xff] ^
1017 Td1[(s0 >> 8) & 0xff] ^
1018 Td2[(s3 >> 16) & 0xff] ^
1019 Td3[(s2 >> 24) ] ^
1020 rk[1];
1021 t[2] = Td0[(s2 ) & 0xff] ^
1022 Td1[(s1 >> 8) & 0xff] ^
1023 Td2[(s0 >> 16) & 0xff] ^
1024 Td3[(s3 >> 24) ] ^
1025 rk[2];
1026 t[3] = Td0[(s3 ) & 0xff] ^
1027 Td1[(s2 >> 8) & 0xff] ^
1028 Td2[(s1 >> 16) & 0xff] ^
1029 Td3[(s0 >> 24) ] ^
1030 rk[3];
1031 #endif
1032 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
1033 }
1034 /*
1035 * apply last round and
1036 * map cipher state to byte array block:
1037 */
1038 prefetch256(Td4);
1039
1040 *(u32*)(out+0) =
1041 (Td4[(s0 ) & 0xff]) ^
1042 (Td4[(s3 >> 8) & 0xff] << 8) ^
1043 (Td4[(s2 >> 16) & 0xff] << 16) ^
1044 (Td4[(s1 >> 24) ] << 24) ^
1045 rk[0];
1046 *(u32*)(out+4) =
1047 (Td4[(s1 ) & 0xff]) ^
1048 (Td4[(s0 >> 8) & 0xff] << 8) ^
1049 (Td4[(s3 >> 16) & 0xff] << 16) ^
1050 (Td4[(s2 >> 24) ] << 24) ^
1051 rk[1];
1052 *(u32*)(out+8) =
1053 (Td4[(s2 ) & 0xff]) ^
1054 (Td4[(s1 >> 8) & 0xff] << 8) ^
1055 (Td4[(s0 >> 16) & 0xff] << 16) ^
1056 (Td4[(s3 >> 24) ] << 24) ^
1057 rk[2];
1058 *(u32*)(out+12) =
1059 (Td4[(s3 ) & 0xff]) ^
1060 (Td4[(s2 >> 8) & 0xff] << 8) ^
1061 (Td4[(s1 >> 16) & 0xff] << 16) ^
1062 (Td4[(s0 >> 24) ] << 24) ^
1063 rk[3];
1064 }