]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/aes/aes_x86core.c
Remove /* foo.c */ comments
[thirdparty/openssl.git] / crypto / aes / aes_x86core.c
CommitLineData
9c62bca1
AP
1/**
2 * rijndael-alg-fst.c
3 *
4 * @version 3.0 (December 2000)
5 *
6 * Optimised ANSI C code for the Rijndael cipher (now AES)
7 *
8 * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
9 * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
10 * @author Paulo Barreto <paulo.barreto@terra.com.br>
11 *
12 * This code is hereby placed in the public domain.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
15 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
18 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
21 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
23 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27/*
28 * This is experimental x86[_64] derivative. It assumes little-endian
29 * byte order and expects CPU to sustain unaligned memory references.
30 * It is used as playground for cache-time attack mitigations and
31 * serves as reference C implementation for x86[_64] assembler.
32 *
66186aee 33 * <appro@fy.chalmers.se>
9c62bca1
AP
34 */
35
36
37#ifndef AES_DEBUG
38# ifndef NDEBUG
39# define NDEBUG
40# endif
41#endif
42#include <assert.h>
43
44#include <stdlib.h>
45#include <openssl/aes.h>
46#include "aes_locl.h"
47
dff2922a
AP
48/*
49 * These two parameters control which table, 256-byte or 2KB, is
50 * referenced in outer and respectively inner rounds.
51 */
52#define AES_COMPACT_IN_OUTER_ROUNDS
53#ifdef AES_COMPACT_IN_OUTER_ROUNDS
54/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
55 * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
56 * by factor of ~2. */
57# undef AES_COMPACT_IN_INNER_ROUNDS
58#endif
59
60#if 1
61static void prefetch256(const void *table)
62{
66186aee
MC
63 volatile unsigned long *t=(void *)table,ret;
64 unsigned long sum;
65 int i;
dff2922a 66
66186aee
MC
67 /* 32 is common least cache-line size */
68 for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i];
dff2922a 69
66186aee 70 ret = sum;
dff2922a
AP
71}
72#else
73# define prefetch256(t)
74#endif
75
9c62bca1
AP
76#undef GETU32
77#define GETU32(p) (*((u32*)(p)))
9c62bca1
AP
78
79#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
80typedef unsigned __int64 u64;
66186aee 81#define U64(C) C##UI64
9c62bca1
AP
82#elif defined(__arch64__)
83typedef unsigned long u64;
66186aee 84#define U64(C) C##UL
9c62bca1
AP
85#else
86typedef unsigned long long u64;
66186aee 87#define U64(C) C##ULL
9c62bca1
AP
88#endif
89
dff2922a 90#undef ROTATE
cf5ecc3e 91#if defined(_MSC_VER)
66186aee 92# define ROTATE(a,n) _lrotl(a,n)
cf5ecc3e 93#elif defined(__ICC)
66186aee 94# define ROTATE(a,n) _rotl(a,n)
dff2922a
AP
95#elif defined(__GNUC__) && __GNUC__>=2
96# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
66186aee
MC
97# define ROTATE(a,n) ({ register unsigned int ret; \
98 asm ( \
99 "roll %1,%0" \
100 : "=r"(ret) \
101 : "I"(n), "0"(a) \
102 : "cc"); \
103 ret; \
104 })
dff2922a
AP
105# endif
106#endif
1d97c843 107/*-
9c62bca1
AP
108Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
109Te0[x] = S [x].[02, 01, 01, 03];
110Te1[x] = S [x].[03, 02, 01, 01];
111Te2[x] = S [x].[01, 03, 02, 01];
112Te3[x] = S [x].[01, 01, 03, 02];
113*/
86bdc0a3
AP
114#define Te0 (u32)((u64*)((u8*)Te+0))
115#define Te1 (u32)((u64*)((u8*)Te+3))
116#define Te2 (u32)((u64*)((u8*)Te+2))
117#define Te3 (u32)((u64*)((u8*)Te+1))
1d97c843 118/*-
9c62bca1
AP
119Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
120Td0[x] = Si[x].[0e, 09, 0d, 0b];
121Td1[x] = Si[x].[0b, 0e, 09, 0d];
122Td2[x] = Si[x].[0d, 0b, 0e, 09];
123Td3[x] = Si[x].[09, 0d, 0b, 0e];
124Td4[x] = Si[x].[01];
125*/
86bdc0a3
AP
126#define Td0 (u32)((u64*)((u8*)Td+0))
127#define Td1 (u32)((u64*)((u8*)Td+3))
128#define Td2 (u32)((u64*)((u8*)Td+2))
129#define Td3 (u32)((u64*)((u8*)Td+1))
9c62bca1
AP
130
131static const u64 Te[256] = {
132 U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
133 U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
134 U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
135 U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
136 U64(0x5030306050303060), U64(0x0301010203010102),
137 U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
138 U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
139 U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
140 U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
141 U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
142 U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
143 U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
144 U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
145 U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
146 U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
147 U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
148 U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
149 U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
150 U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
151 U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
152 U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
153 U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
154 U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
155 U64(0x5331316253313162), U64(0x3f15152a3f15152a),
156 U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
157 U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
158 U64(0x2818183028181830), U64(0xa1969637a1969637),
159 U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
160 U64(0x0907070e0907070e), U64(0x3612122436121224),
161 U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
162 U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
163 U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
164 U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
165 U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
166 U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
167 U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
168 U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
169 U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
170 U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
171 U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
172 U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
173 U64(0x0000000000000000), U64(0x2cededc12cededc1),
174 U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
175 U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
176 U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
177 U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
178 U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
179 U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
180 U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
181 U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
182 U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
183 U64(0x5533336655333366), U64(0x9485851194858511),
184 U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
185 U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
186 U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
187 U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
188 U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
189 U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
190 U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
191 U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
192 U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
193 U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
194 U64(0x3010102030101020), U64(0x1affffe51affffe5),
195 U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
196 U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
197 U64(0x3513132635131326), U64(0x2fececc32fececc3),
198 U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
199 U64(0xcc444488cc444488), U64(0x3917172e3917172e),
200 U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
201 U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
202 U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
203 U64(0x2b1919322b191932), U64(0x957373e6957373e6),
204 U64(0xa06060c0a06060c0), U64(0x9881811998818119),
205 U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
206 U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
207 U64(0xab90903bab90903b), U64(0x8388880b8388880b),
208 U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
209 U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
210 U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
211 U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
212 U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
213 U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
214 U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
215 U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
216 U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
217 U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
218 U64(0xa8919139a8919139), U64(0xa4959531a4959531),
219 U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
220 U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
221 U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
222 U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
223 U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
224 U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
225 U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
226 U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
227 U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
228 U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
229 U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
230 U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
231 U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
232 U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
233 U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
234 U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
235 U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
236 U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
237 U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
238 U64(0xd8484890d8484890), U64(0x0503030605030306),
239 U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
240 U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
241 U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
242 U64(0x9186861791868617), U64(0x58c1c19958c1c199),
243 U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
244 U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
245 U64(0xb398982bb398982b), U64(0x3311112233111122),
246 U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
247 U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
248 U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
249 U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
250 U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
251 U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
252 U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
253 U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
254 U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
255 U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
256 U64(0xc3414182c3414182), U64(0xb0999929b0999929),
257 U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
258 U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
259 U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
260};
261
dff2922a
AP
262static const u8 Te4[256] = {
263 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
264 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
265 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
266 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
267 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
268 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
269 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
270 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
271 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
272 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
273 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
274 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
275 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
276 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
277 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
278 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
279 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
280 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
281 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
282 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
283 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
284 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
285 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
286 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
287 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
288 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
289 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
290 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
291 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
292 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
293 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
294 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
295};
296
9c62bca1
AP
297static const u64 Td[256] = {
298 U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
299 U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
300 U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
301 U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
302 U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
303 U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
304 U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
305 U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
306 U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
307 U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
308 U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
309 U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
310 U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
311 U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
312 U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
313 U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
314 U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
315 U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
316 U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
317 U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
318 U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
319 U64(0x6033519760335197), U64(0x457f5362457f5362),
320 U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
321 U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
322 U64(0x5868487058684870), U64(0x19fd458f19fd458f),
323 U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
324 U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
325 U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
326 U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
327 U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
328 U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
329 U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
330 U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
331 U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
332 U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
333 U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
334 U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
335 U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
336 U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
337 U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
338 U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
339 U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
340 U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
341 U64(0x6fd406046fd40604), U64(0xff155060ff155060),
342 U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
343 U64(0xcc434089cc434089), U64(0x779ed967779ed967),
344 U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
345 U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
346 U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
347 U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
348 U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
349 U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
350 U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
351 U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
352 U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
353 U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
354 U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
355 U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
356 U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
357 U64(0x694b775a694b775a), U64(0x161a121c161a121c),
358 U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
359 U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
360 U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
361 U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
362 U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
363 U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
364 U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
365 U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
366 U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
367 U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
368 U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
369 U64(0x4022971340229713), U64(0x2011c6842011c684),
370 U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
371 U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
372 U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
373 U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
374 U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
375 U64(0xfa489411fa489411), U64(0x2264e9472264e947),
376 U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
377 U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
378 U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
379 U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
380 U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
381 U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
382 U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
383 U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
384 U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
385 U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
386 U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
387 U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
388 U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
389 U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
390 U64(0x097826cd097826cd), U64(0xf418596ef418596e),
391 U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
392 U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
393 U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
394 U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
395 U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
396 U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
397 U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
398 U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
399 U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
400 U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
401 U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
402 U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
403 U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
404 U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
405 U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
406 U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
407 U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
408 U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
409 U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
410 U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
411 U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
412 U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
413 U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
414 U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
415 U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
416 U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
417 U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
418 U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
419 U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
420 U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
421 U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
422 U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
423 U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
424 U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
425 U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
426};
427static const u8 Td4[256] = {
428 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
429 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
430 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
431 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
432 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
433 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
434 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
435 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
436 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
437 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
438 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
439 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
440 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
441 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
442 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
443 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
444 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
445 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
446 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
447 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
448 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
449 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
450 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
451 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
452 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
453 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
454 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
455 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
456 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
457 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
458 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
459 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
460};
461
462static const u32 rcon[] = {
463 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
464 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
465 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
466};
467
468/**
469 * Expand the cipher key into the encryption key schedule.
470 */
471int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
66186aee
MC
472 AES_KEY *key)
473{
9c62bca1 474
66186aee
MC
475 u32 *rk;
476 int i = 0;
477 u32 temp;
9c62bca1 478
66186aee
MC
479 if (!userKey || !key)
480 return -1;
481 if (bits != 128 && bits != 192 && bits != 256)
482 return -2;
9c62bca1 483
66186aee 484 rk = key->rd_key;
9c62bca1 485
66186aee
MC
486 if (bits==128)
487 key->rounds = 10;
488 else if (bits==192)
489 key->rounds = 12;
490 else
491 key->rounds = 14;
9c62bca1 492
66186aee
MC
493 rk[0] = GETU32(userKey );
494 rk[1] = GETU32(userKey + 4);
495 rk[2] = GETU32(userKey + 8);
496 rk[3] = GETU32(userKey + 12);
497 if (bits == 128) {
498 while (1) {
499 temp = rk[3];
500 rk[4] = rk[0] ^
8b37e5c1
EK
501 ((u32)Te4[(temp >> 8) & 0xff] ) ^
502 ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
503 ((u32)Te4[(temp >> 24) ] << 16) ^
504 ((u32)Te4[(temp ) & 0xff] << 24) ^
66186aee
MC
505 rcon[i];
506 rk[5] = rk[1] ^ rk[4];
507 rk[6] = rk[2] ^ rk[5];
508 rk[7] = rk[3] ^ rk[6];
509 if (++i == 10) {
510 return 0;
511 }
512 rk += 4;
513 }
514 }
515 rk[4] = GETU32(userKey + 16);
516 rk[5] = GETU32(userKey + 20);
517 if (bits == 192) {
518 while (1) {
519 temp = rk[ 5];
520 rk[ 6] = rk[ 0] ^
8b37e5c1
EK
521 ((u32)Te4[(temp >> 8) & 0xff] ) ^
522 ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
523 ((u32)Te4[(temp >> 24) ] << 16) ^
524 ((u32)Te4[(temp ) & 0xff] << 24) ^
66186aee
MC
525 rcon[i];
526 rk[ 7] = rk[ 1] ^ rk[ 6];
527 rk[ 8] = rk[ 2] ^ rk[ 7];
528 rk[ 9] = rk[ 3] ^ rk[ 8];
529 if (++i == 8) {
530 return 0;
531 }
532 rk[10] = rk[ 4] ^ rk[ 9];
533 rk[11] = rk[ 5] ^ rk[10];
534 rk += 6;
535 }
536 }
537 rk[6] = GETU32(userKey + 24);
538 rk[7] = GETU32(userKey + 28);
539 if (bits == 256) {
540 while (1) {
541 temp = rk[ 7];
542 rk[ 8] = rk[ 0] ^
8b37e5c1
EK
543 ((u32)Te4[(temp >> 8) & 0xff] ) ^
544 ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
545 ((u32)Te4[(temp >> 24) ] << 16) ^
546 ((u32)Te4[(temp ) & 0xff] << 24) ^
66186aee
MC
547 rcon[i];
548 rk[ 9] = rk[ 1] ^ rk[ 8];
549 rk[10] = rk[ 2] ^ rk[ 9];
550 rk[11] = rk[ 3] ^ rk[10];
551 if (++i == 7) {
552 return 0;
553 }
554 temp = rk[11];
555 rk[12] = rk[ 4] ^
8b37e5c1
EK
556 ((u32)Te4[(temp ) & 0xff] ) ^
557 ((u32)Te4[(temp >> 8) & 0xff] << 8) ^
558 ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
559 ((u32)Te4[(temp >> 24) ] << 24);
66186aee
MC
560 rk[13] = rk[ 5] ^ rk[12];
561 rk[14] = rk[ 6] ^ rk[13];
562 rk[15] = rk[ 7] ^ rk[14];
9c62bca1 563
66186aee
MC
564 rk += 8;
565 }
566 }
567 return 0;
9c62bca1
AP
568}
569
570/**
571 * Expand the cipher key into the decryption key schedule.
572 */
573int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
66186aee
MC
574 AES_KEY *key)
575{
9c62bca1 576
66186aee
MC
577 u32 *rk;
578 int i, j, status;
579 u32 temp;
9c62bca1 580
66186aee
MC
581 /* first, start with an encryption schedule */
582 status = AES_set_encrypt_key(userKey, bits, key);
583 if (status < 0)
584 return status;
9c62bca1 585
66186aee 586 rk = key->rd_key;
9c62bca1 587
66186aee
MC
588 /* invert the order of the round keys: */
589 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
590 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
591 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
592 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
593 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
594 }
595 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
596 for (i = 1; i < (key->rounds); i++) {
597 rk += 4;
8cebec98 598#if 1
66186aee
MC
599 for (j = 0; j < 4; j++) {
600 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
8cebec98 601
66186aee
MC
602 tp1 = rk[j];
603 m = tp1 & 0x80808080;
604 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
605 ((m - (m >> 7)) & 0x1b1b1b1b);
606 m = tp2 & 0x80808080;
607 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
608 ((m - (m >> 7)) & 0x1b1b1b1b);
609 m = tp4 & 0x80808080;
610 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
611 ((m - (m >> 7)) & 0x1b1b1b1b);
612 tp9 = tp8 ^ tp1;
613 tpb = tp9 ^ tp2;
614 tpd = tp9 ^ tp4;
615 tpe = tp8 ^ tp4 ^ tp2;
8cebec98 616#if defined(ROTATE)
66186aee
MC
617 rk[j] = tpe ^ ROTATE(tpd,16) ^
618 ROTATE(tp9,8) ^ ROTATE(tpb,24);
8cebec98 619#else
66186aee
MC
620 rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
621 (tp9 >> 24) ^ (tp9 << 8) ^
622 (tpb >> 8) ^ (tpb << 24);
8cebec98 623#endif
66186aee 624 }
8cebec98 625#else
66186aee
MC
626 rk[0] =
627 Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^
628 Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^
629 Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
630 Td3[Te2[(rk[0] >> 24) ] & 0xff];
631 rk[1] =
632 Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^
633 Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^
634 Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
635 Td3[Te2[(rk[1] >> 24) ] & 0xff];
636 rk[2] =
637 Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^
638 Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^
639 Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
640 Td3[Te2[(rk[2] >> 24) ] & 0xff];
641 rk[3] =
642 Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^
643 Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^
644 Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
645 Td3[Te2[(rk[3] >> 24) ] & 0xff];
8cebec98 646#endif
66186aee
MC
647 }
648 return 0;
9c62bca1
AP
649}
650
651/*
652 * Encrypt a single block
653 * in and out can overlap
654 */
655void AES_encrypt(const unsigned char *in, unsigned char *out,
66186aee
MC
656 const AES_KEY *key)
657{
9c62bca1 658
66186aee
MC
659 const u32 *rk;
660 u32 s0, s1, s2, s3, t[4];
661 int r;
9c62bca1 662
66186aee
MC
663 assert(in && out && key);
664 rk = key->rd_key;
9c62bca1 665
66186aee
MC
666 /*
667 * map byte array block to cipher state
668 * and add initial round key:
669 */
670 s0 = GETU32(in ) ^ rk[0];
671 s1 = GETU32(in + 4) ^ rk[1];
672 s2 = GETU32(in + 8) ^ rk[2];
673 s3 = GETU32(in + 12) ^ rk[3];
9c62bca1 674
dff2922a 675#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
66186aee 676 prefetch256(Te4);
dff2922a 677
8b37e5c1
EK
678 t[0] = (u32)Te4[(s0 ) & 0xff] ^
679 (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
680 (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
681 (u32)Te4[(s3 >> 24) ] << 24;
682 t[1] = (u32)Te4[(s1 ) & 0xff] ^
683 (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
684 (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
685 (u32)Te4[(s0 >> 24) ] << 24;
686 t[2] = (u32)Te4[(s2 ) & 0xff] ^
687 (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
688 (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
689 (u32)Te4[(s1 >> 24) ] << 24;
690 t[3] = (u32)Te4[(s3 ) & 0xff] ^
691 (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
692 (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
693 (u32)Te4[(s2 >> 24) ] << 24;
dff2922a 694
66186aee
MC
695 /* now do the linear transform using words */
696 { int i;
697 u32 r0, r1, r2;
dff2922a 698
66186aee
MC
699 for (i = 0; i < 4; i++) {
700 r0 = t[i];
701 r1 = r0 & 0x80808080;
702 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
703 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
dff2922a 704#if defined(ROTATE)
66186aee
MC
705 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
706 ROTATE(r0,16) ^ ROTATE(r0,8);
dff2922a 707#else
66186aee
MC
708 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
709 (r0 << 16) ^ (r0 >> 16) ^
710 (r0 << 8) ^ (r0 >> 24);
dff2922a 711#endif
66186aee
MC
712 t[i] ^= rk[4+i];
713 }
714 }
dff2922a 715#else
66186aee
MC
716 t[0] = Te0[(s0 ) & 0xff] ^
717 Te1[(s1 >> 8) & 0xff] ^
718 Te2[(s2 >> 16) & 0xff] ^
719 Te3[(s3 >> 24) ] ^
720 rk[4];
721 t[1] = Te0[(s1 ) & 0xff] ^
722 Te1[(s2 >> 8) & 0xff] ^
723 Te2[(s3 >> 16) & 0xff] ^
724 Te3[(s0 >> 24) ] ^
725 rk[5];
726 t[2] = Te0[(s2 ) & 0xff] ^
727 Te1[(s3 >> 8) & 0xff] ^
728 Te2[(s0 >> 16) & 0xff] ^
729 Te3[(s1 >> 24) ] ^
730 rk[6];
731 t[3] = Te0[(s3 ) & 0xff] ^
732 Te1[(s0 >> 8) & 0xff] ^
733 Te2[(s1 >> 16) & 0xff] ^
734 Te3[(s2 >> 24) ] ^
735 rk[7];
dff2922a 736#endif
66186aee 737 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
738
739 /*
740 * Nr - 2 full rounds:
741 */
dff2922a
AP
742 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
743#if defined(AES_COMPACT_IN_INNER_ROUNDS)
8b37e5c1
EK
744 t[0] = (u32)Te4[(s0 ) & 0xff] ^
745 (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
746 (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
747 (u32)Te4[(s3 >> 24) ] << 24;
748 t[1] = (u32)Te4[(s1 ) & 0xff] ^
749 (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
750 (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
751 (u32)Te4[(s0 >> 24) ] << 24;
752 t[2] = (u32)Te4[(s2 ) & 0xff] ^
753 (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
754 (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
755 (u32)Te4[(s1 >> 24) ] << 24;
756 t[3] = (u32)Te4[(s3 ) & 0xff] ^
757 (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
758 (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
759 (u32)Te4[(s2 >> 24) ] << 24;
dff2922a 760
66186aee
MC
761 /* now do the linear transform using words */
762 {
763 int i;
764 u32 r0, r1, r2;
dff2922a 765
66186aee
MC
766 for (i = 0; i < 4; i++) {
767 r0 = t[i];
768 r1 = r0 & 0x80808080;
769 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
770 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
dff2922a 771#if defined(ROTATE)
66186aee
MC
772 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
773 ROTATE(r0,16) ^ ROTATE(r0,8);
dff2922a 774#else
66186aee
MC
775 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
776 (r0 << 16) ^ (r0 >> 16) ^
777 (r0 << 8) ^ (r0 >> 24);
dff2922a 778#endif
66186aee
MC
779 t[i] ^= rk[i];
780 }
781 }
dff2922a 782#else
66186aee
MC
783 t[0] = Te0[(s0 ) & 0xff] ^
784 Te1[(s1 >> 8) & 0xff] ^
785 Te2[(s2 >> 16) & 0xff] ^
786 Te3[(s3 >> 24) ] ^
787 rk[0];
788 t[1] = Te0[(s1 ) & 0xff] ^
789 Te1[(s2 >> 8) & 0xff] ^
790 Te2[(s3 >> 16) & 0xff] ^
791 Te3[(s0 >> 24) ] ^
792 rk[1];
793 t[2] = Te0[(s2 ) & 0xff] ^
794 Te1[(s3 >> 8) & 0xff] ^
795 Te2[(s0 >> 16) & 0xff] ^
796 Te3[(s1 >> 24) ] ^
797 rk[2];
798 t[3] = Te0[(s3 ) & 0xff] ^
799 Te1[(s0 >> 8) & 0xff] ^
800 Te2[(s1 >> 16) & 0xff] ^
801 Te3[(s2 >> 24) ] ^
802 rk[3];
dff2922a 803#endif
66186aee 804 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
805 }
806 /*
66186aee
MC
807 * apply last round and
808 * map cipher state to byte array block:
809 */
dff2922a 810#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
66186aee 811 prefetch256(Te4);
dff2922a 812
66186aee 813 *(u32*)(out+0) =
8b37e5c1
EK
814 (u32)Te4[(s0 ) & 0xff] ^
815 (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
816 (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
817 (u32)Te4[(s3 >> 24) ] << 24 ^
66186aee
MC
818 rk[0];
819 *(u32*)(out+4) =
8b37e5c1
EK
820 (u32)Te4[(s1 ) & 0xff] ^
821 (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
822 (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
823 (u32)Te4[(s0 >> 24) ] << 24 ^
66186aee
MC
824 rk[1];
825 *(u32*)(out+8) =
8b37e5c1
EK
826 (u32)Te4[(s2 ) & 0xff] ^
827 (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
828 (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
829 (u32)Te4[(s1 >> 24) ] << 24 ^
66186aee
MC
830 rk[2];
831 *(u32*)(out+12) =
8b37e5c1
EK
832 (u32)Te4[(s3 ) & 0xff] ^
833 (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
834 (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
835 (u32)Te4[(s2 >> 24) ] << 24 ^
66186aee 836 rk[3];
dff2922a 837#else
66186aee
MC
838 *(u32*)(out+0) =
839 (Te2[(s0 ) & 0xff] & 0x000000ffU) ^
840 (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^
841 (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
842 (Te1[(s3 >> 24) ] & 0xff000000U) ^
843 rk[0];
844 *(u32*)(out+4) =
845 (Te2[(s1 ) & 0xff] & 0x000000ffU) ^
846 (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^
847 (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
848 (Te1[(s0 >> 24) ] & 0xff000000U) ^
849 rk[1];
850 *(u32*)(out+8) =
851 (Te2[(s2 ) & 0xff] & 0x000000ffU) ^
852 (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^
853 (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
854 (Te1[(s1 >> 24) ] & 0xff000000U) ^
855 rk[2];
856 *(u32*)(out+12) =
857 (Te2[(s3 ) & 0xff] & 0x000000ffU) ^
858 (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^
859 (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
860 (Te1[(s2 >> 24) ] & 0xff000000U) ^
861 rk[3];
dff2922a 862#endif
9c62bca1
AP
863}
864
865/*
866 * Decrypt a single block
867 * in and out can overlap
868 */
869void AES_decrypt(const unsigned char *in, unsigned char *out,
66186aee
MC
870 const AES_KEY *key)
871{
9c62bca1 872
66186aee
MC
873 const u32 *rk;
874 u32 s0, s1, s2, s3, t[4];
875 int r;
9c62bca1 876
66186aee
MC
877 assert(in && out && key);
878 rk = key->rd_key;
9c62bca1 879
66186aee
MC
880 /*
881 * map byte array block to cipher state
882 * and add initial round key:
883 */
884 s0 = GETU32(in ) ^ rk[0];
885 s1 = GETU32(in + 4) ^ rk[1];
886 s2 = GETU32(in + 8) ^ rk[2];
887 s3 = GETU32(in + 12) ^ rk[3];
9c62bca1 888
dff2922a 889#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
66186aee 890 prefetch256(Td4);
dff2922a 891
8b37e5c1
EK
892 t[0] = (u32)Td4[(s0 ) & 0xff] ^
893 (u32)Td4[(s3 >> 8) & 0xff] << 8 ^
894 (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
895 (u32)Td4[(s1 >> 24) ] << 24;
896 t[1] = (u32)Td4[(s1 ) & 0xff] ^
897 (u32)Td4[(s0 >> 8) & 0xff] << 8 ^
898 (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
899 (u32)Td4[(s2 >> 24) ] << 24;
900 t[2] = (u32)Td4[(s2 ) & 0xff] ^
901 (u32)Td4[(s1 >> 8) & 0xff] << 8 ^
902 (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
903 (u32)Td4[(s3 >> 24) ] << 24;
904 t[3] = (u32)Td4[(s3 ) & 0xff] ^
905 (u32)Td4[(s2 >> 8) & 0xff] << 8 ^
906 (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
907 (u32)Td4[(s0 >> 24) ] << 24;
dff2922a 908
66186aee
MC
909 /* now do the linear transform using words */
910 {
911 int i;
912 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
dff2922a 913
66186aee
MC
914 for (i = 0; i < 4; i++) {
915 tp1 = t[i];
916 m = tp1 & 0x80808080;
917 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
918 ((m - (m >> 7)) & 0x1b1b1b1b);
919 m = tp2 & 0x80808080;
920 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
921 ((m - (m >> 7)) & 0x1b1b1b1b);
922 m = tp4 & 0x80808080;
923 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
924 ((m - (m >> 7)) & 0x1b1b1b1b);
925 tp9 = tp8 ^ tp1;
926 tpb = tp9 ^ tp2;
927 tpd = tp9 ^ tp4;
928 tpe = tp8 ^ tp4 ^ tp2;
dff2922a 929#if defined(ROTATE)
66186aee
MC
930 t[i] = tpe ^ ROTATE(tpd,16) ^
931 ROTATE(tp9,8) ^ ROTATE(tpb,24);
dff2922a 932#else
66186aee
MC
933 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
934 (tp9 >> 24) ^ (tp9 << 8) ^
935 (tpb >> 8) ^ (tpb << 24);
dff2922a 936#endif
66186aee
MC
937 t[i] ^= rk[4+i];
938 }
939 }
dff2922a 940#else
66186aee
MC
941 t[0] = Td0[(s0 ) & 0xff] ^
942 Td1[(s3 >> 8) & 0xff] ^
943 Td2[(s2 >> 16) & 0xff] ^
944 Td3[(s1 >> 24) ] ^
945 rk[4];
946 t[1] = Td0[(s1 ) & 0xff] ^
947 Td1[(s0 >> 8) & 0xff] ^
948 Td2[(s3 >> 16) & 0xff] ^
949 Td3[(s2 >> 24) ] ^
950 rk[5];
951 t[2] = Td0[(s2 ) & 0xff] ^
952 Td1[(s1 >> 8) & 0xff] ^
953 Td2[(s0 >> 16) & 0xff] ^
954 Td3[(s3 >> 24) ] ^
955 rk[6];
956 t[3] = Td0[(s3 ) & 0xff] ^
957 Td1[(s2 >> 8) & 0xff] ^
958 Td2[(s1 >> 16) & 0xff] ^
959 Td3[(s0 >> 24) ] ^
960 rk[7];
dff2922a 961#endif
66186aee 962 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
963
964 /*
965 * Nr - 2 full rounds:
966 */
dff2922a
AP
967 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
968#if defined(AES_COMPACT_IN_INNER_ROUNDS)
8b37e5c1
EK
969 t[0] = (u32)Td4[(s0 ) & 0xff] ^
970 (u32)Td4[(s3 >> 8) & 0xff] << 8 ^
971 (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
972 (u32)Td4[(s1 >> 24) ] << 24;
973 t[1] = (u32)Td4[(s1 ) & 0xff] ^
974 (u32)Td4[(s0 >> 8) & 0xff] << 8 ^
975 (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
976 (u32)Td4[(s2 >> 24) ] << 24;
977 t[2] = (u32)Td4[(s2 ) & 0xff] ^
978 (u32)Td4[(s1 >> 8) & 0xff] << 8 ^
979 (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
980 (u32)Td4[(s3 >> 24) ] << 24;
981 t[3] = (u32)Td4[(s3 ) & 0xff] ^
982 (u32)Td4[(s2 >> 8) & 0xff] << 8 ^
983 (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
984 (u32)Td4[(s0 >> 24) ] << 24;
dff2922a 985
66186aee
MC
986 /* now do the linear transform using words */
987 {
988 int i;
989 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
dff2922a 990
66186aee
MC
991 for (i = 0; i < 4; i++) {
992 tp1 = t[i];
993 m = tp1 & 0x80808080;
994 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
995 ((m - (m >> 7)) & 0x1b1b1b1b);
996 m = tp2 & 0x80808080;
997 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
998 ((m - (m >> 7)) & 0x1b1b1b1b);
999 m = tp4 & 0x80808080;
1000 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1001 ((m - (m >> 7)) & 0x1b1b1b1b);
1002 tp9 = tp8 ^ tp1;
1003 tpb = tp9 ^ tp2;
1004 tpd = tp9 ^ tp4;
1005 tpe = tp8 ^ tp4 ^ tp2;
dff2922a 1006#if defined(ROTATE)
66186aee
MC
1007 t[i] = tpe ^ ROTATE(tpd,16) ^
1008 ROTATE(tp9,8) ^ ROTATE(tpb,24);
dff2922a 1009#else
66186aee
MC
1010 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1011 (tp9 >> 24) ^ (tp9 << 8) ^
1012 (tpb >> 8) ^ (tpb << 24);
dff2922a 1013#endif
66186aee
MC
1014 t[i] ^= rk[i];
1015 }
1016 }
dff2922a 1017#else
66186aee
MC
1018 t[0] = Td0[(s0 ) & 0xff] ^
1019 Td1[(s3 >> 8) & 0xff] ^
1020 Td2[(s2 >> 16) & 0xff] ^
1021 Td3[(s1 >> 24) ] ^
1022 rk[0];
1023 t[1] = Td0[(s1 ) & 0xff] ^
1024 Td1[(s0 >> 8) & 0xff] ^
1025 Td2[(s3 >> 16) & 0xff] ^
1026 Td3[(s2 >> 24) ] ^
1027 rk[1];
1028 t[2] = Td0[(s2 ) & 0xff] ^
1029 Td1[(s1 >> 8) & 0xff] ^
1030 Td2[(s0 >> 16) & 0xff] ^
1031 Td3[(s3 >> 24) ] ^
1032 rk[2];
1033 t[3] = Td0[(s3 ) & 0xff] ^
1034 Td1[(s2 >> 8) & 0xff] ^
1035 Td2[(s1 >> 16) & 0xff] ^
1036 Td3[(s0 >> 24) ] ^
1037 rk[3];
dff2922a 1038#endif
66186aee 1039 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
1040 }
1041 /*
66186aee
MC
1042 * apply last round and
1043 * map cipher state to byte array block:
1044 */
1045 prefetch256(Td4);
dff2922a 1046
66186aee 1047 *(u32*)(out+0) =
8b37e5c1
EK
1048 ((u32)Td4[(s0 ) & 0xff]) ^
1049 ((u32)Td4[(s3 >> 8) & 0xff] << 8) ^
1050 ((u32)Td4[(s2 >> 16) & 0xff] << 16) ^
1051 ((u32)Td4[(s1 >> 24) ] << 24) ^
66186aee
MC
1052 rk[0];
1053 *(u32*)(out+4) =
8b37e5c1
EK
1054 ((u32)Td4[(s1 ) & 0xff]) ^
1055 ((u32)Td4[(s0 >> 8) & 0xff] << 8) ^
1056 ((u32)Td4[(s3 >> 16) & 0xff] << 16) ^
1057 ((u32)Td4[(s2 >> 24) ] << 24) ^
66186aee
MC
1058 rk[1];
1059 *(u32*)(out+8) =
8b37e5c1
EK
1060 ((u32)Td4[(s2 ) & 0xff]) ^
1061 ((u32)Td4[(s1 >> 8) & 0xff] << 8) ^
1062 ((u32)Td4[(s0 >> 16) & 0xff] << 16) ^
1063 ((u32)Td4[(s3 >> 24) ] << 24) ^
66186aee
MC
1064 rk[2];
1065 *(u32*)(out+12) =
8b37e5c1
EK
1066 ((u32)Td4[(s3 ) & 0xff]) ^
1067 ((u32)Td4[(s2 >> 8) & 0xff] << 8) ^
1068 ((u32)Td4[(s1 >> 16) & 0xff] << 16) ^
1069 ((u32)Td4[(s0 >> 24) ] << 24) ^
66186aee 1070 rk[3];
9c62bca1 1071}