]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/aes/aes_x86core.c
Intel compiler support update from HEAD.
[thirdparty/openssl.git] / crypto / aes / aes_x86core.c
CommitLineData
9c62bca1
AP
1/* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
2/**
3 * rijndael-alg-fst.c
4 *
5 * @version 3.0 (December 2000)
6 *
7 * Optimised ANSI C code for the Rijndael cipher (now AES)
8 *
9 * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
10 * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
11 * @author Paulo Barreto <paulo.barreto@terra.com.br>
12 *
13 * This code is hereby placed in the public domain.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * This is experimental x86[_64] derivative. It assumes little-endian
30 * byte order and expects CPU to sustain unaligned memory references.
31 * It is used as playground for cache-time attack mitigations and
32 * serves as reference C implementation for x86[_64] assembler.
33 *
34 * <appro@fy.chalmers.se>
35 */
36
37
38#ifndef AES_DEBUG
39# ifndef NDEBUG
40# define NDEBUG
41# endif
42#endif
43#include <assert.h>
44
45#include <stdlib.h>
46#include <openssl/aes.h>
47#include "aes_locl.h"
48
dff2922a
AP
49/*
50 * These two parameters control which table, 256-byte or 2KB, is
51 * referenced in outer and respectively inner rounds.
52 */
53#define AES_COMPACT_IN_OUTER_ROUNDS
54#ifdef AES_COMPACT_IN_OUTER_ROUNDS
55/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
56 * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
57 * by factor of ~2. */
58# undef AES_COMPACT_IN_INNER_ROUNDS
59#endif
60
61#if 1
62static void prefetch256(const void *table)
63{
64 volatile unsigned long *t=(void *)table,ret;
65 unsigned long sum;
66 int i;
67
68 /* 32 is common least cache-line size */
69 for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i];
70
71 ret = sum;
72}
73#else
74# define prefetch256(t)
75#endif
76
9c62bca1
AP
77#undef GETU32
78#define GETU32(p) (*((u32*)(p)))
9c62bca1
AP
79
80#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
81typedef unsigned __int64 u64;
82#define U64(C) C##UI64
83#elif defined(__arch64__)
84typedef unsigned long u64;
85#define U64(C) C##UL
86#else
87typedef unsigned long long u64;
88#define U64(C) C##ULL
89#endif
90
dff2922a 91#undef ROTATE
ad00a52f 92#if defined(_MSC_VER)
dff2922a 93# define ROTATE(a,n) _lrotl(a,n)
ad00a52f
AP
94#elif defined(__ICC)
95# define ROTATE(a,n) _rotl(a,n)
dff2922a
AP
96#elif defined(__GNUC__) && __GNUC__>=2
97# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
98# define ROTATE(a,n) ({ register unsigned int ret; \
99 asm ( \
100 "roll %1,%0" \
101 : "=r"(ret) \
102 : "I"(n), "0"(a) \
103 : "cc"); \
104 ret; \
105 })
106# endif
107#endif
9c62bca1
AP
108/*
109Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
110Te0[x] = S [x].[02, 01, 01, 03];
111Te1[x] = S [x].[03, 02, 01, 01];
112Te2[x] = S [x].[01, 03, 02, 01];
113Te3[x] = S [x].[01, 01, 03, 02];
114*/
86bdc0a3
AP
115#define Te0 (u32)((u64*)((u8*)Te+0))
116#define Te1 (u32)((u64*)((u8*)Te+3))
117#define Te2 (u32)((u64*)((u8*)Te+2))
118#define Te3 (u32)((u64*)((u8*)Te+1))
9c62bca1
AP
119/*
120Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
121Td0[x] = Si[x].[0e, 09, 0d, 0b];
122Td1[x] = Si[x].[0b, 0e, 09, 0d];
123Td2[x] = Si[x].[0d, 0b, 0e, 09];
124Td3[x] = Si[x].[09, 0d, 0b, 0e];
125Td4[x] = Si[x].[01];
126*/
86bdc0a3
AP
127#define Td0 (u32)((u64*)((u8*)Td+0))
128#define Td1 (u32)((u64*)((u8*)Td+3))
129#define Td2 (u32)((u64*)((u8*)Td+2))
130#define Td3 (u32)((u64*)((u8*)Td+1))
9c62bca1
AP
131
132static const u64 Te[256] = {
133 U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
134 U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
135 U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
136 U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
137 U64(0x5030306050303060), U64(0x0301010203010102),
138 U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
139 U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
140 U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
141 U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
142 U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
143 U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
144 U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
145 U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
146 U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
147 U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
148 U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
149 U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
150 U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
151 U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
152 U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
153 U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
154 U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
155 U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
156 U64(0x5331316253313162), U64(0x3f15152a3f15152a),
157 U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
158 U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
159 U64(0x2818183028181830), U64(0xa1969637a1969637),
160 U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
161 U64(0x0907070e0907070e), U64(0x3612122436121224),
162 U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
163 U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
164 U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
165 U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
166 U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
167 U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
168 U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
169 U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
170 U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
171 U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
172 U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
173 U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
174 U64(0x0000000000000000), U64(0x2cededc12cededc1),
175 U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
176 U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
177 U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
178 U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
179 U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
180 U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
181 U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
182 U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
183 U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
184 U64(0x5533336655333366), U64(0x9485851194858511),
185 U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
186 U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
187 U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
188 U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
189 U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
190 U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
191 U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
192 U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
193 U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
194 U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
195 U64(0x3010102030101020), U64(0x1affffe51affffe5),
196 U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
197 U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
198 U64(0x3513132635131326), U64(0x2fececc32fececc3),
199 U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
200 U64(0xcc444488cc444488), U64(0x3917172e3917172e),
201 U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
202 U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
203 U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
204 U64(0x2b1919322b191932), U64(0x957373e6957373e6),
205 U64(0xa06060c0a06060c0), U64(0x9881811998818119),
206 U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
207 U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
208 U64(0xab90903bab90903b), U64(0x8388880b8388880b),
209 U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
210 U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
211 U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
212 U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
213 U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
214 U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
215 U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
216 U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
217 U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
218 U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
219 U64(0xa8919139a8919139), U64(0xa4959531a4959531),
220 U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
221 U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
222 U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
223 U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
224 U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
225 U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
226 U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
227 U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
228 U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
229 U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
230 U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
231 U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
232 U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
233 U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
234 U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
235 U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
236 U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
237 U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
238 U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
239 U64(0xd8484890d8484890), U64(0x0503030605030306),
240 U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
241 U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
242 U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
243 U64(0x9186861791868617), U64(0x58c1c19958c1c199),
244 U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
245 U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
246 U64(0xb398982bb398982b), U64(0x3311112233111122),
247 U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
248 U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
249 U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
250 U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
251 U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
252 U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
253 U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
254 U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
255 U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
256 U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
257 U64(0xc3414182c3414182), U64(0xb0999929b0999929),
258 U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
259 U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
260 U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
261};
262
dff2922a
AP
263static const u8 Te4[256] = {
264 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
265 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
266 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
267 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
268 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
269 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
270 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
271 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
272 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
273 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
274 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
275 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
276 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
277 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
278 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
279 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
280 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
281 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
282 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
283 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
284 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
285 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
286 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
287 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
288 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
289 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
290 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
291 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
292 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
293 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
294 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
295 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
296};
297
9c62bca1
AP
298static const u64 Td[256] = {
299 U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
300 U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
301 U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
302 U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
303 U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
304 U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
305 U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
306 U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
307 U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
308 U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
309 U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
310 U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
311 U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
312 U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
313 U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
314 U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
315 U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
316 U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
317 U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
318 U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
319 U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
320 U64(0x6033519760335197), U64(0x457f5362457f5362),
321 U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
322 U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
323 U64(0x5868487058684870), U64(0x19fd458f19fd458f),
324 U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
325 U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
326 U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
327 U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
328 U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
329 U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
330 U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
331 U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
332 U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
333 U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
334 U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
335 U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
336 U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
337 U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
338 U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
339 U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
340 U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
341 U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
342 U64(0x6fd406046fd40604), U64(0xff155060ff155060),
343 U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
344 U64(0xcc434089cc434089), U64(0x779ed967779ed967),
345 U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
346 U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
347 U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
348 U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
349 U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
350 U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
351 U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
352 U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
353 U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
354 U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
355 U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
356 U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
357 U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
358 U64(0x694b775a694b775a), U64(0x161a121c161a121c),
359 U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
360 U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
361 U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
362 U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
363 U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
364 U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
365 U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
366 U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
367 U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
368 U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
369 U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
370 U64(0x4022971340229713), U64(0x2011c6842011c684),
371 U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
372 U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
373 U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
374 U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
375 U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
376 U64(0xfa489411fa489411), U64(0x2264e9472264e947),
377 U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
378 U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
379 U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
380 U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
381 U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
382 U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
383 U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
384 U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
385 U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
386 U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
387 U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
388 U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
389 U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
390 U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
391 U64(0x097826cd097826cd), U64(0xf418596ef418596e),
392 U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
393 U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
394 U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
395 U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
396 U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
397 U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
398 U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
399 U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
400 U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
401 U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
402 U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
403 U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
404 U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
405 U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
406 U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
407 U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
408 U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
409 U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
410 U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
411 U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
412 U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
413 U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
414 U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
415 U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
416 U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
417 U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
418 U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
419 U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
420 U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
421 U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
422 U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
423 U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
424 U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
425 U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
426 U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
427};
428static const u8 Td4[256] = {
429 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
430 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
431 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
432 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
433 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
434 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
435 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
436 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
437 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
438 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
439 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
440 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
441 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
442 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
443 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
444 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
445 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
446 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
447 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
448 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
449 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
450 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
451 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
452 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
453 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
454 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
455 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
456 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
457 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
458 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
459 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
460 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
461};
462
463static const u32 rcon[] = {
464 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
465 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
466 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
467};
468
469/**
470 * Expand the cipher key into the encryption key schedule.
471 */
472int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
473 AES_KEY *key) {
474
475 u32 *rk;
476 int i = 0;
477 u32 temp;
478
479 if (!userKey || !key)
480 return -1;
481 if (bits != 128 && bits != 192 && bits != 256)
482 return -2;
483
484 rk = key->rd_key;
485
486 if (bits==128)
487 key->rounds = 10;
488 else if (bits==192)
489 key->rounds = 12;
490 else
491 key->rounds = 14;
492
493 rk[0] = GETU32(userKey );
494 rk[1] = GETU32(userKey + 4);
495 rk[2] = GETU32(userKey + 8);
496 rk[3] = GETU32(userKey + 12);
497 if (bits == 128) {
498 while (1) {
499 temp = rk[3];
500 rk[4] = rk[0] ^
8cebec98
AP
501 (Te4[(temp >> 8) & 0xff] ) ^
502 (Te4[(temp >> 16) & 0xff] << 8) ^
503 (Te4[(temp >> 24) ] << 16) ^
504 (Te4[(temp ) & 0xff] << 24) ^
9c62bca1
AP
505 rcon[i];
506 rk[5] = rk[1] ^ rk[4];
507 rk[6] = rk[2] ^ rk[5];
508 rk[7] = rk[3] ^ rk[6];
509 if (++i == 10) {
510 return 0;
511 }
512 rk += 4;
513 }
514 }
515 rk[4] = GETU32(userKey + 16);
516 rk[5] = GETU32(userKey + 20);
517 if (bits == 192) {
518 while (1) {
519 temp = rk[ 5];
520 rk[ 6] = rk[ 0] ^
8cebec98
AP
521 (Te4[(temp >> 8) & 0xff] ) ^
522 (Te4[(temp >> 16) & 0xff] << 8) ^
523 (Te4[(temp >> 24) ] << 16) ^
524 (Te4[(temp ) & 0xff] << 24) ^
9c62bca1
AP
525 rcon[i];
526 rk[ 7] = rk[ 1] ^ rk[ 6];
527 rk[ 8] = rk[ 2] ^ rk[ 7];
528 rk[ 9] = rk[ 3] ^ rk[ 8];
529 if (++i == 8) {
530 return 0;
531 }
532 rk[10] = rk[ 4] ^ rk[ 9];
533 rk[11] = rk[ 5] ^ rk[10];
534 rk += 6;
535 }
536 }
537 rk[6] = GETU32(userKey + 24);
538 rk[7] = GETU32(userKey + 28);
539 if (bits == 256) {
540 while (1) {
541 temp = rk[ 7];
542 rk[ 8] = rk[ 0] ^
8cebec98
AP
543 (Te4[(temp >> 8) & 0xff] ) ^
544 (Te4[(temp >> 16) & 0xff] << 8) ^
545 (Te4[(temp >> 24) ] << 16) ^
546 (Te4[(temp ) & 0xff] << 24) ^
9c62bca1
AP
547 rcon[i];
548 rk[ 9] = rk[ 1] ^ rk[ 8];
549 rk[10] = rk[ 2] ^ rk[ 9];
550 rk[11] = rk[ 3] ^ rk[10];
551 if (++i == 7) {
552 return 0;
553 }
554 temp = rk[11];
555 rk[12] = rk[ 4] ^
8cebec98
AP
556 (Te4[(temp ) & 0xff] ) ^
557 (Te4[(temp >> 8) & 0xff] << 8) ^
558 (Te4[(temp >> 16) & 0xff] << 16) ^
559 (Te4[(temp >> 24) ] << 24);
9c62bca1
AP
560 rk[13] = rk[ 5] ^ rk[12];
561 rk[14] = rk[ 6] ^ rk[13];
562 rk[15] = rk[ 7] ^ rk[14];
563
564 rk += 8;
565 }
566 }
567 return 0;
568}
569
570/**
571 * Expand the cipher key into the decryption key schedule.
572 */
573int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
574 AES_KEY *key) {
575
576 u32 *rk;
577 int i, j, status;
578 u32 temp;
579
580 /* first, start with an encryption schedule */
581 status = AES_set_encrypt_key(userKey, bits, key);
582 if (status < 0)
583 return status;
584
585 rk = key->rd_key;
586
587 /* invert the order of the round keys: */
588 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
589 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
590 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
591 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
592 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
593 }
594 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
595 for (i = 1; i < (key->rounds); i++) {
596 rk += 4;
8cebec98
AP
597#if 1
598 for (j = 0; j < 4; j++) {
599 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
600
601 tp1 = rk[j];
602 m = tp1 & 0x80808080;
603 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
604 ((m - (m >> 7)) & 0x1b1b1b1b);
605 m = tp2 & 0x80808080;
606 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
607 ((m - (m >> 7)) & 0x1b1b1b1b);
608 m = tp4 & 0x80808080;
609 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
610 ((m - (m >> 7)) & 0x1b1b1b1b);
611 tp9 = tp8 ^ tp1;
612 tpb = tp9 ^ tp2;
613 tpd = tp9 ^ tp4;
614 tpe = tp8 ^ tp4 ^ tp2;
615#if defined(ROTATE)
616 rk[j] = tpe ^ ROTATE(tpd,16) ^
617 ROTATE(tp9,8) ^ ROTATE(tpb,24);
618#else
619 rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
620 (tp9 >> 24) ^ (tp9 << 8) ^
621 (tpb >> 8) ^ (tpb << 24);
622#endif
623 }
624#else
9c62bca1
AP
625 rk[0] =
626 Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^
627 Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^
628 Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
629 Td3[Te2[(rk[0] >> 24) ] & 0xff];
630 rk[1] =
631 Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^
632 Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^
633 Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
634 Td3[Te2[(rk[1] >> 24) ] & 0xff];
635 rk[2] =
636 Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^
637 Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^
638 Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
639 Td3[Te2[(rk[2] >> 24) ] & 0xff];
640 rk[3] =
641 Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^
642 Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^
643 Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
644 Td3[Te2[(rk[3] >> 24) ] & 0xff];
8cebec98 645#endif
9c62bca1
AP
646 }
647 return 0;
648}
649
650/*
651 * Encrypt a single block
652 * in and out can overlap
653 */
654void AES_encrypt(const unsigned char *in, unsigned char *out,
655 const AES_KEY *key) {
656
657 const u32 *rk;
dff2922a 658 u32 s0, s1, s2, s3, t[4];
9c62bca1
AP
659 int r;
660
661 assert(in && out && key);
662 rk = key->rd_key;
663
664 /*
665 * map byte array block to cipher state
666 * and add initial round key:
667 */
668 s0 = GETU32(in ) ^ rk[0];
669 s1 = GETU32(in + 4) ^ rk[1];
670 s2 = GETU32(in + 8) ^ rk[2];
671 s3 = GETU32(in + 12) ^ rk[3];
672
dff2922a
AP
673#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
674 prefetch256(Te4);
675
676 t[0] = Te4[(s0 ) & 0xff] ^
677 Te4[(s1 >> 8) & 0xff] << 8 ^
678 Te4[(s2 >> 16) & 0xff] << 16 ^
679 Te4[(s3 >> 24) ] << 24;
680 t[1] = Te4[(s1 ) & 0xff] ^
681 Te4[(s2 >> 8) & 0xff] << 8 ^
682 Te4[(s3 >> 16) & 0xff] << 16 ^
683 Te4[(s0 >> 24) ] << 24;
684 t[2] = Te4[(s2 ) & 0xff] ^
685 Te4[(s3 >> 8) & 0xff] << 8 ^
686 Te4[(s0 >> 16) & 0xff] << 16 ^
687 Te4[(s1 >> 24) ] << 24;
688 t[3] = Te4[(s3 ) & 0xff] ^
689 Te4[(s0 >> 8) & 0xff] << 8 ^
690 Te4[(s1 >> 16) & 0xff] << 16 ^
691 Te4[(s2 >> 24) ] << 24;
692
693 /* now do the linear transform using words */
694 { int i;
695 u32 r0, r1, r2;
696
697 for (i = 0; i < 4; i++) {
698 r0 = t[i];
699 r1 = r0 & 0x80808080;
700 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
701 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
702#if defined(ROTATE)
703 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
704 ROTATE(r0,16) ^ ROTATE(r0,8);
705#else
706 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
707 (r0 << 16) ^ (r0 >> 16) ^
708 (r0 << 8) ^ (r0 >> 24);
709#endif
710 t[i] ^= rk[4+i];
711 }
712 }
713#else
714 t[0] = Te0[(s0 ) & 0xff] ^
715 Te1[(s1 >> 8) & 0xff] ^
716 Te2[(s2 >> 16) & 0xff] ^
717 Te3[(s3 >> 24) ] ^
718 rk[4];
719 t[1] = Te0[(s1 ) & 0xff] ^
720 Te1[(s2 >> 8) & 0xff] ^
721 Te2[(s3 >> 16) & 0xff] ^
722 Te3[(s0 >> 24) ] ^
723 rk[5];
724 t[2] = Te0[(s2 ) & 0xff] ^
725 Te1[(s3 >> 8) & 0xff] ^
726 Te2[(s0 >> 16) & 0xff] ^
727 Te3[(s1 >> 24) ] ^
728 rk[6];
729 t[3] = Te0[(s3 ) & 0xff] ^
730 Te1[(s0 >> 8) & 0xff] ^
731 Te2[(s1 >> 16) & 0xff] ^
732 Te3[(s2 >> 24) ] ^
733 rk[7];
734#endif
735 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
736
737 /*
738 * Nr - 2 full rounds:
739 */
dff2922a
AP
740 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
741#if defined(AES_COMPACT_IN_INNER_ROUNDS)
742 t[0] = Te4[(s0 ) & 0xff] ^
743 Te4[(s1 >> 8) & 0xff] << 8 ^
744 Te4[(s2 >> 16) & 0xff] << 16 ^
745 Te4[(s3 >> 24) ] << 24;
746 t[1] = Te4[(s1 ) & 0xff] ^
747 Te4[(s2 >> 8) & 0xff] << 8 ^
748 Te4[(s3 >> 16) & 0xff] << 16 ^
749 Te4[(s0 >> 24) ] << 24;
750 t[2] = Te4[(s2 ) & 0xff] ^
751 Te4[(s3 >> 8) & 0xff] << 8 ^
752 Te4[(s0 >> 16) & 0xff] << 16 ^
753 Te4[(s1 >> 24) ] << 24;
754 t[3] = Te4[(s3 ) & 0xff] ^
755 Te4[(s0 >> 8) & 0xff] << 8 ^
756 Te4[(s1 >> 16) & 0xff] << 16 ^
757 Te4[(s2 >> 24) ] << 24;
758
759 /* now do the linear transform using words */
760 { int i;
761 u32 r0, r1, r2;
762
763 for (i = 0; i < 4; i++) {
764 r0 = t[i];
765 r1 = r0 & 0x80808080;
766 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
767 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
768#if defined(ROTATE)
769 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
770 ROTATE(r0,16) ^ ROTATE(r0,8);
771#else
772 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
773 (r0 << 16) ^ (r0 >> 16) ^
774 (r0 << 8) ^ (r0 >> 24);
775#endif
776 t[i] ^= rk[i];
777 }
778 }
779#else
780 t[0] = Te0[(s0 ) & 0xff] ^
781 Te1[(s1 >> 8) & 0xff] ^
782 Te2[(s2 >> 16) & 0xff] ^
783 Te3[(s3 >> 24) ] ^
784 rk[0];
785 t[1] = Te0[(s1 ) & 0xff] ^
786 Te1[(s2 >> 8) & 0xff] ^
787 Te2[(s3 >> 16) & 0xff] ^
788 Te3[(s0 >> 24) ] ^
789 rk[1];
790 t[2] = Te0[(s2 ) & 0xff] ^
791 Te1[(s3 >> 8) & 0xff] ^
792 Te2[(s0 >> 16) & 0xff] ^
793 Te3[(s1 >> 24) ] ^
794 rk[2];
795 t[3] = Te0[(s3 ) & 0xff] ^
796 Te1[(s0 >> 8) & 0xff] ^
797 Te2[(s1 >> 16) & 0xff] ^
798 Te3[(s2 >> 24) ] ^
799 rk[3];
800#endif
801 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
802 }
803 /*
804 * apply last round and
805 * map cipher state to byte array block:
806 */
dff2922a
AP
807#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
808 prefetch256(Te4);
809
810 *(u32*)(out+0) =
811 Te4[(s0 ) & 0xff] ^
812 Te4[(s1 >> 8) & 0xff] << 8 ^
813 Te4[(s2 >> 16) & 0xff] << 16 ^
814 Te4[(s3 >> 24) ] << 24 ^
9c62bca1 815 rk[0];
dff2922a
AP
816 *(u32*)(out+4) =
817 Te4[(s1 ) & 0xff] ^
818 Te4[(s2 >> 8) & 0xff] << 8 ^
819 Te4[(s3 >> 16) & 0xff] << 16 ^
820 Te4[(s0 >> 24) ] << 24 ^
9c62bca1 821 rk[1];
dff2922a
AP
822 *(u32*)(out+8) =
823 Te4[(s2 ) & 0xff] ^
824 Te4[(s3 >> 8) & 0xff] << 8 ^
825 Te4[(s0 >> 16) & 0xff] << 16 ^
826 Te4[(s1 >> 24) ] << 24 ^
9c62bca1 827 rk[2];
dff2922a
AP
828 *(u32*)(out+12) =
829 Te4[(s3 ) & 0xff] ^
830 Te4[(s0 >> 8) & 0xff] << 8 ^
831 Te4[(s1 >> 16) & 0xff] << 16 ^
832 Te4[(s2 >> 24) ] << 24 ^
9c62bca1 833 rk[3];
dff2922a
AP
834#else
835 *(u32*)(out+0) =
836 (Te2[(s0 ) & 0xff] & 0x000000ffU) ^
837 (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^
838 (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
839 (Te1[(s3 >> 24) ] & 0xff000000U) ^
840 rk[0];
841 *(u32*)(out+4) =
842 (Te2[(s1 ) & 0xff] & 0x000000ffU) ^
843 (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^
844 (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
845 (Te1[(s0 >> 24) ] & 0xff000000U) ^
846 rk[1];
847 *(u32*)(out+8) =
848 (Te2[(s2 ) & 0xff] & 0x000000ffU) ^
849 (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^
850 (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
851 (Te1[(s1 >> 24) ] & 0xff000000U) ^
852 rk[2];
853 *(u32*)(out+12) =
854 (Te2[(s3 ) & 0xff] & 0x000000ffU) ^
855 (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^
856 (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
857 (Te1[(s2 >> 24) ] & 0xff000000U) ^
858 rk[3];
859#endif
9c62bca1
AP
860}
861
862/*
863 * Decrypt a single block
864 * in and out can overlap
865 */
866void AES_decrypt(const unsigned char *in, unsigned char *out,
867 const AES_KEY *key) {
868
869 const u32 *rk;
dff2922a 870 u32 s0, s1, s2, s3, t[4];
9c62bca1
AP
871 int r;
872
873 assert(in && out && key);
874 rk = key->rd_key;
875
876 /*
877 * map byte array block to cipher state
878 * and add initial round key:
879 */
880 s0 = GETU32(in ) ^ rk[0];
881 s1 = GETU32(in + 4) ^ rk[1];
882 s2 = GETU32(in + 8) ^ rk[2];
883 s3 = GETU32(in + 12) ^ rk[3];
884
dff2922a
AP
885#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
886 prefetch256(Td4);
887
888 t[0] = Td4[(s0 ) & 0xff] ^
889 Td4[(s3 >> 8) & 0xff] << 8 ^
890 Td4[(s2 >> 16) & 0xff] << 16 ^
891 Td4[(s1 >> 24) ] << 24;
892 t[1] = Td4[(s1 ) & 0xff] ^
893 Td4[(s0 >> 8) & 0xff] << 8 ^
894 Td4[(s3 >> 16) & 0xff] << 16 ^
895 Td4[(s2 >> 24) ] << 24;
896 t[2] = Td4[(s2 ) & 0xff] ^
897 Td4[(s1 >> 8) & 0xff] << 8 ^
898 Td4[(s0 >> 16) & 0xff] << 16 ^
899 Td4[(s3 >> 24) ] << 24;
900 t[3] = Td4[(s3 ) & 0xff] ^
901 Td4[(s2 >> 8) & 0xff] << 8 ^
902 Td4[(s1 >> 16) & 0xff] << 16 ^
903 Td4[(s0 >> 24) ] << 24;
904
905 /* now do the linear transform using words */
906 { int i;
907 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
908
909 for (i = 0; i < 4; i++) {
910 tp1 = t[i];
911 m = tp1 & 0x80808080;
912 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
913 ((m - (m >> 7)) & 0x1b1b1b1b);
914 m = tp2 & 0x80808080;
915 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
916 ((m - (m >> 7)) & 0x1b1b1b1b);
917 m = tp4 & 0x80808080;
918 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
919 ((m - (m >> 7)) & 0x1b1b1b1b);
920 tp9 = tp8 ^ tp1;
921 tpb = tp9 ^ tp2;
922 tpd = tp9 ^ tp4;
923 tpe = tp8 ^ tp4 ^ tp2;
924#if defined(ROTATE)
925 t[i] = tpe ^ ROTATE(tpd,16) ^
926 ROTATE(tp9,8) ^ ROTATE(tpb,24);
927#else
928 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
929 (tp9 >> 24) ^ (tp9 << 8) ^
930 (tpb >> 8) ^ (tpb << 24);
931#endif
932 t[i] ^= rk[4+i];
933 }
934 }
935#else
936 t[0] = Td0[(s0 ) & 0xff] ^
937 Td1[(s3 >> 8) & 0xff] ^
938 Td2[(s2 >> 16) & 0xff] ^
939 Td3[(s1 >> 24) ] ^
940 rk[4];
941 t[1] = Td0[(s1 ) & 0xff] ^
942 Td1[(s0 >> 8) & 0xff] ^
943 Td2[(s3 >> 16) & 0xff] ^
944 Td3[(s2 >> 24) ] ^
945 rk[5];
946 t[2] = Td0[(s2 ) & 0xff] ^
947 Td1[(s1 >> 8) & 0xff] ^
948 Td2[(s0 >> 16) & 0xff] ^
949 Td3[(s3 >> 24) ] ^
950 rk[6];
951 t[3] = Td0[(s3 ) & 0xff] ^
952 Td1[(s2 >> 8) & 0xff] ^
953 Td2[(s1 >> 16) & 0xff] ^
954 Td3[(s0 >> 24) ] ^
955 rk[7];
956#endif
957 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
958
959 /*
960 * Nr - 2 full rounds:
961 */
dff2922a
AP
962 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
963#if defined(AES_COMPACT_IN_INNER_ROUNDS)
964 t[0] = Td4[(s0 ) & 0xff] ^
965 Td4[(s3 >> 8) & 0xff] << 8 ^
966 Td4[(s2 >> 16) & 0xff] << 16 ^
967 Td4[(s1 >> 24) ] << 24;
968 t[1] = Td4[(s1 ) & 0xff] ^
969 Td4[(s0 >> 8) & 0xff] << 8 ^
970 Td4[(s3 >> 16) & 0xff] << 16 ^
971 Td4[(s2 >> 24) ] << 24;
972 t[2] = Td4[(s2 ) & 0xff] ^
973 Td4[(s1 >> 8) & 0xff] << 8 ^
974 Td4[(s0 >> 16) & 0xff] << 16 ^
975 Td4[(s3 >> 24) ] << 24;
976 t[3] = Td4[(s3 ) & 0xff] ^
977 Td4[(s2 >> 8) & 0xff] << 8 ^
978 Td4[(s1 >> 16) & 0xff] << 16 ^
979 Td4[(s0 >> 24) ] << 24;
980
981 /* now do the linear transform using words */
982 { int i;
983 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
984
985 for (i = 0; i < 4; i++) {
986 tp1 = t[i];
987 m = tp1 & 0x80808080;
988 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
989 ((m - (m >> 7)) & 0x1b1b1b1b);
990 m = tp2 & 0x80808080;
991 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
992 ((m - (m >> 7)) & 0x1b1b1b1b);
993 m = tp4 & 0x80808080;
994 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
995 ((m - (m >> 7)) & 0x1b1b1b1b);
996 tp9 = tp8 ^ tp1;
997 tpb = tp9 ^ tp2;
998 tpd = tp9 ^ tp4;
999 tpe = tp8 ^ tp4 ^ tp2;
1000#if defined(ROTATE)
1001 t[i] = tpe ^ ROTATE(tpd,16) ^
1002 ROTATE(tp9,8) ^ ROTATE(tpb,24);
1003#else
1004 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1005 (tp9 >> 24) ^ (tp9 << 8) ^
1006 (tpb >> 8) ^ (tpb << 24);
1007#endif
1008 t[i] ^= rk[i];
1009 }
1010 }
1011#else
1012 t[0] = Td0[(s0 ) & 0xff] ^
1013 Td1[(s3 >> 8) & 0xff] ^
1014 Td2[(s2 >> 16) & 0xff] ^
1015 Td3[(s1 >> 24) ] ^
1016 rk[0];
1017 t[1] = Td0[(s1 ) & 0xff] ^
1018 Td1[(s0 >> 8) & 0xff] ^
1019 Td2[(s3 >> 16) & 0xff] ^
1020 Td3[(s2 >> 24) ] ^
1021 rk[1];
1022 t[2] = Td0[(s2 ) & 0xff] ^
1023 Td1[(s1 >> 8) & 0xff] ^
1024 Td2[(s0 >> 16) & 0xff] ^
1025 Td3[(s3 >> 24) ] ^
1026 rk[2];
1027 t[3] = Td0[(s3 ) & 0xff] ^
1028 Td1[(s2 >> 8) & 0xff] ^
1029 Td2[(s1 >> 16) & 0xff] ^
1030 Td3[(s0 >> 24) ] ^
1031 rk[3];
1032#endif
1033 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
1034 }
1035 /*
1036 * apply last round and
1037 * map cipher state to byte array block:
1038 */
dff2922a
AP
1039 prefetch256(Td4);
1040
1041 *(u32*)(out+0) =
1042 (Td4[(s0 ) & 0xff]) ^
1043 (Td4[(s3 >> 8) & 0xff] << 8) ^
1044 (Td4[(s2 >> 16) & 0xff] << 16) ^
1045 (Td4[(s1 >> 24) ] << 24) ^
1046 rk[0];
1047 *(u32*)(out+4) =
1048 (Td4[(s1 ) & 0xff]) ^
1049 (Td4[(s0 >> 8) & 0xff] << 8) ^
1050 (Td4[(s3 >> 16) & 0xff] << 16) ^
1051 (Td4[(s2 >> 24) ] << 24) ^
1052 rk[1];
1053 *(u32*)(out+8) =
1054 (Td4[(s2 ) & 0xff]) ^
1055 (Td4[(s1 >> 8) & 0xff] << 8) ^
1056 (Td4[(s0 >> 16) & 0xff] << 16) ^
1057 (Td4[(s3 >> 24) ] << 24) ^
1058 rk[2];
1059 *(u32*)(out+12) =
1060 (Td4[(s3 ) & 0xff]) ^
1061 (Td4[(s2 >> 8) & 0xff] << 8) ^
1062 (Td4[(s1 >> 16) & 0xff] << 16) ^
1063 (Td4[(s0 >> 24) ] << 24) ^
1064 rk[3];
9c62bca1 1065}