]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/aes/aes_x86core.c
Copyright consolidation 05/10
[thirdparty/openssl.git] / crypto / aes / aes_x86core.c
CommitLineData
aa6bb135
RS
1/*
2 * Copyright 2006-2016 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the OpenSSL license (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
9c62bca1
AP
10/**
11 * rijndael-alg-fst.c
12 *
13 * @version 3.0 (December 2000)
14 *
15 * Optimised ANSI C code for the Rijndael cipher (now AES)
16 *
17 * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
18 * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
19 * @author Paulo Barreto <paulo.barreto@terra.com.br>
20 *
21 * This code is hereby placed in the public domain.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
24 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
32 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 */
35
36/*
37 * This is experimental x86[_64] derivative. It assumes little-endian
38 * byte order and expects CPU to sustain unaligned memory references.
39 * It is used as playground for cache-time attack mitigations and
40 * serves as reference C implementation for x86[_64] assembler.
41 *
66186aee 42 * <appro@fy.chalmers.se>
9c62bca1
AP
43 */
44
45
9c62bca1
AP
46#include <assert.h>
47
48#include <stdlib.h>
49#include <openssl/aes.h>
50#include "aes_locl.h"
51
dff2922a
AP
52/*
53 * These two parameters control which table, 256-byte or 2KB, is
54 * referenced in outer and respectively inner rounds.
55 */
56#define AES_COMPACT_IN_OUTER_ROUNDS
57#ifdef AES_COMPACT_IN_OUTER_ROUNDS
58/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
59 * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
60 * by factor of ~2. */
61# undef AES_COMPACT_IN_INNER_ROUNDS
62#endif
63
64#if 1
65static void prefetch256(const void *table)
66{
66186aee
MC
67 volatile unsigned long *t=(void *)table,ret;
68 unsigned long sum;
69 int i;
dff2922a 70
66186aee
MC
71 /* 32 is common least cache-line size */
72 for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i];
dff2922a 73
66186aee 74 ret = sum;
dff2922a
AP
75}
76#else
77# define prefetch256(t)
78#endif
79
9c62bca1
AP
80#undef GETU32
81#define GETU32(p) (*((u32*)(p)))
9c62bca1
AP
82
83#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
84typedef unsigned __int64 u64;
66186aee 85#define U64(C) C##UI64
9c62bca1
AP
86#elif defined(__arch64__)
87typedef unsigned long u64;
66186aee 88#define U64(C) C##UL
9c62bca1
AP
89#else
90typedef unsigned long long u64;
66186aee 91#define U64(C) C##ULL
9c62bca1
AP
92#endif
93
dff2922a 94#undef ROTATE
cf5ecc3e 95#if defined(_MSC_VER)
66186aee 96# define ROTATE(a,n) _lrotl(a,n)
cf5ecc3e 97#elif defined(__ICC)
66186aee 98# define ROTATE(a,n) _rotl(a,n)
dff2922a
AP
99#elif defined(__GNUC__) && __GNUC__>=2
100# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
66186aee
MC
101# define ROTATE(a,n) ({ register unsigned int ret; \
102 asm ( \
103 "roll %1,%0" \
104 : "=r"(ret) \
105 : "I"(n), "0"(a) \
106 : "cc"); \
107 ret; \
108 })
dff2922a
AP
109# endif
110#endif
1d97c843 111/*-
9c62bca1
AP
112Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
113Te0[x] = S [x].[02, 01, 01, 03];
114Te1[x] = S [x].[03, 02, 01, 01];
115Te2[x] = S [x].[01, 03, 02, 01];
116Te3[x] = S [x].[01, 01, 03, 02];
117*/
86bdc0a3
AP
118#define Te0 (u32)((u64*)((u8*)Te+0))
119#define Te1 (u32)((u64*)((u8*)Te+3))
120#define Te2 (u32)((u64*)((u8*)Te+2))
121#define Te3 (u32)((u64*)((u8*)Te+1))
1d97c843 122/*-
9c62bca1
AP
123Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
124Td0[x] = Si[x].[0e, 09, 0d, 0b];
125Td1[x] = Si[x].[0b, 0e, 09, 0d];
126Td2[x] = Si[x].[0d, 0b, 0e, 09];
127Td3[x] = Si[x].[09, 0d, 0b, 0e];
128Td4[x] = Si[x].[01];
129*/
86bdc0a3
AP
130#define Td0 (u32)((u64*)((u8*)Td+0))
131#define Td1 (u32)((u64*)((u8*)Td+3))
132#define Td2 (u32)((u64*)((u8*)Td+2))
133#define Td3 (u32)((u64*)((u8*)Td+1))
9c62bca1
AP
134
135static const u64 Te[256] = {
136 U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
137 U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
138 U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
139 U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
140 U64(0x5030306050303060), U64(0x0301010203010102),
141 U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
142 U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
143 U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
144 U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
145 U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
146 U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
147 U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
148 U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
149 U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
150 U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
151 U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
152 U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
153 U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
154 U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
155 U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
156 U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
157 U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
158 U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
159 U64(0x5331316253313162), U64(0x3f15152a3f15152a),
160 U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
161 U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
162 U64(0x2818183028181830), U64(0xa1969637a1969637),
163 U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
164 U64(0x0907070e0907070e), U64(0x3612122436121224),
165 U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
166 U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
167 U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
168 U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
169 U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
170 U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
171 U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
172 U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
173 U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
174 U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
175 U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
176 U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
177 U64(0x0000000000000000), U64(0x2cededc12cededc1),
178 U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
179 U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
180 U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
181 U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
182 U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
183 U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
184 U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
185 U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
186 U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
187 U64(0x5533336655333366), U64(0x9485851194858511),
188 U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
189 U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
190 U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
191 U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
192 U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
193 U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
194 U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
195 U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
196 U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
197 U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
198 U64(0x3010102030101020), U64(0x1affffe51affffe5),
199 U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
200 U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
201 U64(0x3513132635131326), U64(0x2fececc32fececc3),
202 U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
203 U64(0xcc444488cc444488), U64(0x3917172e3917172e),
204 U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
205 U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
206 U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
207 U64(0x2b1919322b191932), U64(0x957373e6957373e6),
208 U64(0xa06060c0a06060c0), U64(0x9881811998818119),
209 U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
210 U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
211 U64(0xab90903bab90903b), U64(0x8388880b8388880b),
212 U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
213 U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
214 U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
215 U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
216 U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
217 U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
218 U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
219 U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
220 U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
221 U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
222 U64(0xa8919139a8919139), U64(0xa4959531a4959531),
223 U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
224 U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
225 U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
226 U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
227 U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
228 U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
229 U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
230 U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
231 U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
232 U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
233 U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
234 U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
235 U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
236 U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
237 U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
238 U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
239 U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
240 U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
241 U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
242 U64(0xd8484890d8484890), U64(0x0503030605030306),
243 U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
244 U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
245 U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
246 U64(0x9186861791868617), U64(0x58c1c19958c1c199),
247 U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
248 U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
249 U64(0xb398982bb398982b), U64(0x3311112233111122),
250 U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
251 U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
252 U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
253 U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
254 U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
255 U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
256 U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
257 U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
258 U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
259 U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
260 U64(0xc3414182c3414182), U64(0xb0999929b0999929),
261 U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
262 U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
263 U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
264};
265
dff2922a
AP
266static const u8 Te4[256] = {
267 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
268 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
269 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
270 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
271 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
272 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
273 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
274 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
275 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
276 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
277 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
278 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
279 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
280 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
281 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
282 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
283 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
284 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
285 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
286 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
287 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
288 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
289 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
290 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
291 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
292 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
293 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
294 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
295 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
296 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
297 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
298 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
299};
300
9c62bca1
AP
301static const u64 Td[256] = {
302 U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
303 U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
304 U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
305 U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
306 U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
307 U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
308 U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
309 U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
310 U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
311 U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
312 U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
313 U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
314 U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
315 U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
316 U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
317 U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
318 U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
319 U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
320 U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
321 U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
322 U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
323 U64(0x6033519760335197), U64(0x457f5362457f5362),
324 U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
325 U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
326 U64(0x5868487058684870), U64(0x19fd458f19fd458f),
327 U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
328 U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
329 U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
330 U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
331 U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
332 U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
333 U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
334 U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
335 U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
336 U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
337 U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
338 U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
339 U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
340 U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
341 U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
342 U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
343 U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
344 U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
345 U64(0x6fd406046fd40604), U64(0xff155060ff155060),
346 U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
347 U64(0xcc434089cc434089), U64(0x779ed967779ed967),
348 U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
349 U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
350 U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
351 U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
352 U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
353 U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
354 U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
355 U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
356 U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
357 U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
358 U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
359 U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
360 U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
361 U64(0x694b775a694b775a), U64(0x161a121c161a121c),
362 U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
363 U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
364 U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
365 U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
366 U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
367 U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
368 U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
369 U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
370 U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
371 U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
372 U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
373 U64(0x4022971340229713), U64(0x2011c6842011c684),
374 U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
375 U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
376 U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
377 U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
378 U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
379 U64(0xfa489411fa489411), U64(0x2264e9472264e947),
380 U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
381 U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
382 U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
383 U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
384 U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
385 U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
386 U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
387 U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
388 U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
389 U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
390 U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
391 U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
392 U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
393 U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
394 U64(0x097826cd097826cd), U64(0xf418596ef418596e),
395 U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
396 U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
397 U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
398 U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
399 U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
400 U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
401 U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
402 U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
403 U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
404 U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
405 U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
406 U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
407 U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
408 U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
409 U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
410 U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
411 U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
412 U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
413 U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
414 U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
415 U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
416 U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
417 U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
418 U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
419 U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
420 U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
421 U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
422 U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
423 U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
424 U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
425 U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
426 U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
427 U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
428 U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
429 U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
430};
431static const u8 Td4[256] = {
432 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
433 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
434 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
435 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
436 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
437 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
438 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
439 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
440 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
441 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
442 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
443 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
444 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
445 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
446 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
447 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
448 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
449 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
450 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
451 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
452 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
453 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
454 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
455 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
456 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
457 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
458 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
459 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
460 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
461 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
462 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
463 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
464};
465
466static const u32 rcon[] = {
467 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
468 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
469 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
470};
471
472/**
473 * Expand the cipher key into the encryption key schedule.
474 */
475int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
66186aee
MC
476 AES_KEY *key)
477{
9c62bca1 478
66186aee
MC
479 u32 *rk;
480 int i = 0;
481 u32 temp;
9c62bca1 482
66186aee
MC
483 if (!userKey || !key)
484 return -1;
485 if (bits != 128 && bits != 192 && bits != 256)
486 return -2;
9c62bca1 487
66186aee 488 rk = key->rd_key;
9c62bca1 489
66186aee
MC
490 if (bits==128)
491 key->rounds = 10;
492 else if (bits==192)
493 key->rounds = 12;
494 else
495 key->rounds = 14;
9c62bca1 496
66186aee
MC
497 rk[0] = GETU32(userKey );
498 rk[1] = GETU32(userKey + 4);
499 rk[2] = GETU32(userKey + 8);
500 rk[3] = GETU32(userKey + 12);
501 if (bits == 128) {
502 while (1) {
503 temp = rk[3];
504 rk[4] = rk[0] ^
8b37e5c1
EK
505 ((u32)Te4[(temp >> 8) & 0xff] ) ^
506 ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
507 ((u32)Te4[(temp >> 24) ] << 16) ^
508 ((u32)Te4[(temp ) & 0xff] << 24) ^
66186aee
MC
509 rcon[i];
510 rk[5] = rk[1] ^ rk[4];
511 rk[6] = rk[2] ^ rk[5];
512 rk[7] = rk[3] ^ rk[6];
513 if (++i == 10) {
514 return 0;
515 }
516 rk += 4;
517 }
518 }
519 rk[4] = GETU32(userKey + 16);
520 rk[5] = GETU32(userKey + 20);
521 if (bits == 192) {
522 while (1) {
523 temp = rk[ 5];
524 rk[ 6] = rk[ 0] ^
8b37e5c1
EK
525 ((u32)Te4[(temp >> 8) & 0xff] ) ^
526 ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
527 ((u32)Te4[(temp >> 24) ] << 16) ^
528 ((u32)Te4[(temp ) & 0xff] << 24) ^
66186aee
MC
529 rcon[i];
530 rk[ 7] = rk[ 1] ^ rk[ 6];
531 rk[ 8] = rk[ 2] ^ rk[ 7];
532 rk[ 9] = rk[ 3] ^ rk[ 8];
533 if (++i == 8) {
534 return 0;
535 }
536 rk[10] = rk[ 4] ^ rk[ 9];
537 rk[11] = rk[ 5] ^ rk[10];
538 rk += 6;
539 }
540 }
541 rk[6] = GETU32(userKey + 24);
542 rk[7] = GETU32(userKey + 28);
543 if (bits == 256) {
544 while (1) {
545 temp = rk[ 7];
546 rk[ 8] = rk[ 0] ^
8b37e5c1
EK
547 ((u32)Te4[(temp >> 8) & 0xff] ) ^
548 ((u32)Te4[(temp >> 16) & 0xff] << 8) ^
549 ((u32)Te4[(temp >> 24) ] << 16) ^
550 ((u32)Te4[(temp ) & 0xff] << 24) ^
66186aee
MC
551 rcon[i];
552 rk[ 9] = rk[ 1] ^ rk[ 8];
553 rk[10] = rk[ 2] ^ rk[ 9];
554 rk[11] = rk[ 3] ^ rk[10];
555 if (++i == 7) {
556 return 0;
557 }
558 temp = rk[11];
559 rk[12] = rk[ 4] ^
8b37e5c1
EK
560 ((u32)Te4[(temp ) & 0xff] ) ^
561 ((u32)Te4[(temp >> 8) & 0xff] << 8) ^
562 ((u32)Te4[(temp >> 16) & 0xff] << 16) ^
563 ((u32)Te4[(temp >> 24) ] << 24);
66186aee
MC
564 rk[13] = rk[ 5] ^ rk[12];
565 rk[14] = rk[ 6] ^ rk[13];
566 rk[15] = rk[ 7] ^ rk[14];
9c62bca1 567
66186aee
MC
568 rk += 8;
569 }
570 }
571 return 0;
9c62bca1
AP
572}
573
574/**
575 * Expand the cipher key into the decryption key schedule.
576 */
577int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
66186aee
MC
578 AES_KEY *key)
579{
9c62bca1 580
66186aee
MC
581 u32 *rk;
582 int i, j, status;
583 u32 temp;
9c62bca1 584
66186aee
MC
585 /* first, start with an encryption schedule */
586 status = AES_set_encrypt_key(userKey, bits, key);
587 if (status < 0)
588 return status;
9c62bca1 589
66186aee 590 rk = key->rd_key;
9c62bca1 591
66186aee
MC
592 /* invert the order of the round keys: */
593 for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
594 temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
595 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
596 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
597 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
598 }
599 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
600 for (i = 1; i < (key->rounds); i++) {
601 rk += 4;
8cebec98 602#if 1
66186aee
MC
603 for (j = 0; j < 4; j++) {
604 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
8cebec98 605
66186aee
MC
606 tp1 = rk[j];
607 m = tp1 & 0x80808080;
608 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
609 ((m - (m >> 7)) & 0x1b1b1b1b);
610 m = tp2 & 0x80808080;
611 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
612 ((m - (m >> 7)) & 0x1b1b1b1b);
613 m = tp4 & 0x80808080;
614 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
615 ((m - (m >> 7)) & 0x1b1b1b1b);
616 tp9 = tp8 ^ tp1;
617 tpb = tp9 ^ tp2;
618 tpd = tp9 ^ tp4;
619 tpe = tp8 ^ tp4 ^ tp2;
8cebec98 620#if defined(ROTATE)
66186aee
MC
621 rk[j] = tpe ^ ROTATE(tpd,16) ^
622 ROTATE(tp9,8) ^ ROTATE(tpb,24);
8cebec98 623#else
66186aee
MC
624 rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
625 (tp9 >> 24) ^ (tp9 << 8) ^
626 (tpb >> 8) ^ (tpb << 24);
8cebec98 627#endif
66186aee 628 }
8cebec98 629#else
66186aee
MC
630 rk[0] =
631 Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^
632 Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^
633 Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
634 Td3[Te2[(rk[0] >> 24) ] & 0xff];
635 rk[1] =
636 Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^
637 Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^
638 Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
639 Td3[Te2[(rk[1] >> 24) ] & 0xff];
640 rk[2] =
641 Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^
642 Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^
643 Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
644 Td3[Te2[(rk[2] >> 24) ] & 0xff];
645 rk[3] =
646 Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^
647 Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^
648 Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
649 Td3[Te2[(rk[3] >> 24) ] & 0xff];
8cebec98 650#endif
66186aee
MC
651 }
652 return 0;
9c62bca1
AP
653}
654
655/*
656 * Encrypt a single block
657 * in and out can overlap
658 */
659void AES_encrypt(const unsigned char *in, unsigned char *out,
66186aee
MC
660 const AES_KEY *key)
661{
9c62bca1 662
66186aee
MC
663 const u32 *rk;
664 u32 s0, s1, s2, s3, t[4];
665 int r;
9c62bca1 666
66186aee
MC
667 assert(in && out && key);
668 rk = key->rd_key;
9c62bca1 669
66186aee
MC
670 /*
671 * map byte array block to cipher state
672 * and add initial round key:
673 */
674 s0 = GETU32(in ) ^ rk[0];
675 s1 = GETU32(in + 4) ^ rk[1];
676 s2 = GETU32(in + 8) ^ rk[2];
677 s3 = GETU32(in + 12) ^ rk[3];
9c62bca1 678
dff2922a 679#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
66186aee 680 prefetch256(Te4);
dff2922a 681
8b37e5c1
EK
682 t[0] = (u32)Te4[(s0 ) & 0xff] ^
683 (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
684 (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
685 (u32)Te4[(s3 >> 24) ] << 24;
686 t[1] = (u32)Te4[(s1 ) & 0xff] ^
687 (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
688 (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
689 (u32)Te4[(s0 >> 24) ] << 24;
690 t[2] = (u32)Te4[(s2 ) & 0xff] ^
691 (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
692 (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
693 (u32)Te4[(s1 >> 24) ] << 24;
694 t[3] = (u32)Te4[(s3 ) & 0xff] ^
695 (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
696 (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
697 (u32)Te4[(s2 >> 24) ] << 24;
dff2922a 698
66186aee
MC
699 /* now do the linear transform using words */
700 { int i;
701 u32 r0, r1, r2;
dff2922a 702
66186aee
MC
703 for (i = 0; i < 4; i++) {
704 r0 = t[i];
705 r1 = r0 & 0x80808080;
706 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
707 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
dff2922a 708#if defined(ROTATE)
66186aee
MC
709 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
710 ROTATE(r0,16) ^ ROTATE(r0,8);
dff2922a 711#else
66186aee
MC
712 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
713 (r0 << 16) ^ (r0 >> 16) ^
714 (r0 << 8) ^ (r0 >> 24);
dff2922a 715#endif
66186aee
MC
716 t[i] ^= rk[4+i];
717 }
718 }
dff2922a 719#else
66186aee
MC
720 t[0] = Te0[(s0 ) & 0xff] ^
721 Te1[(s1 >> 8) & 0xff] ^
722 Te2[(s2 >> 16) & 0xff] ^
723 Te3[(s3 >> 24) ] ^
724 rk[4];
725 t[1] = Te0[(s1 ) & 0xff] ^
726 Te1[(s2 >> 8) & 0xff] ^
727 Te2[(s3 >> 16) & 0xff] ^
728 Te3[(s0 >> 24) ] ^
729 rk[5];
730 t[2] = Te0[(s2 ) & 0xff] ^
731 Te1[(s3 >> 8) & 0xff] ^
732 Te2[(s0 >> 16) & 0xff] ^
733 Te3[(s1 >> 24) ] ^
734 rk[6];
735 t[3] = Te0[(s3 ) & 0xff] ^
736 Te1[(s0 >> 8) & 0xff] ^
737 Te2[(s1 >> 16) & 0xff] ^
738 Te3[(s2 >> 24) ] ^
739 rk[7];
dff2922a 740#endif
66186aee 741 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
742
743 /*
744 * Nr - 2 full rounds:
745 */
dff2922a
AP
746 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
747#if defined(AES_COMPACT_IN_INNER_ROUNDS)
8b37e5c1
EK
748 t[0] = (u32)Te4[(s0 ) & 0xff] ^
749 (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
750 (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
751 (u32)Te4[(s3 >> 24) ] << 24;
752 t[1] = (u32)Te4[(s1 ) & 0xff] ^
753 (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
754 (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
755 (u32)Te4[(s0 >> 24) ] << 24;
756 t[2] = (u32)Te4[(s2 ) & 0xff] ^
757 (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
758 (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
759 (u32)Te4[(s1 >> 24) ] << 24;
760 t[3] = (u32)Te4[(s3 ) & 0xff] ^
761 (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
762 (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
763 (u32)Te4[(s2 >> 24) ] << 24;
dff2922a 764
66186aee
MC
765 /* now do the linear transform using words */
766 {
767 int i;
768 u32 r0, r1, r2;
dff2922a 769
66186aee
MC
770 for (i = 0; i < 4; i++) {
771 r0 = t[i];
772 r1 = r0 & 0x80808080;
773 r2 = ((r0 & 0x7f7f7f7f) << 1) ^
774 ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
dff2922a 775#if defined(ROTATE)
66186aee
MC
776 t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
777 ROTATE(r0,16) ^ ROTATE(r0,8);
dff2922a 778#else
66186aee
MC
779 t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
780 (r0 << 16) ^ (r0 >> 16) ^
781 (r0 << 8) ^ (r0 >> 24);
dff2922a 782#endif
66186aee
MC
783 t[i] ^= rk[i];
784 }
785 }
dff2922a 786#else
66186aee
MC
787 t[0] = Te0[(s0 ) & 0xff] ^
788 Te1[(s1 >> 8) & 0xff] ^
789 Te2[(s2 >> 16) & 0xff] ^
790 Te3[(s3 >> 24) ] ^
791 rk[0];
792 t[1] = Te0[(s1 ) & 0xff] ^
793 Te1[(s2 >> 8) & 0xff] ^
794 Te2[(s3 >> 16) & 0xff] ^
795 Te3[(s0 >> 24) ] ^
796 rk[1];
797 t[2] = Te0[(s2 ) & 0xff] ^
798 Te1[(s3 >> 8) & 0xff] ^
799 Te2[(s0 >> 16) & 0xff] ^
800 Te3[(s1 >> 24) ] ^
801 rk[2];
802 t[3] = Te0[(s3 ) & 0xff] ^
803 Te1[(s0 >> 8) & 0xff] ^
804 Te2[(s1 >> 16) & 0xff] ^
805 Te3[(s2 >> 24) ] ^
806 rk[3];
dff2922a 807#endif
66186aee 808 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
809 }
810 /*
66186aee
MC
811 * apply last round and
812 * map cipher state to byte array block:
813 */
dff2922a 814#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
66186aee 815 prefetch256(Te4);
dff2922a 816
66186aee 817 *(u32*)(out+0) =
8b37e5c1
EK
818 (u32)Te4[(s0 ) & 0xff] ^
819 (u32)Te4[(s1 >> 8) & 0xff] << 8 ^
820 (u32)Te4[(s2 >> 16) & 0xff] << 16 ^
821 (u32)Te4[(s3 >> 24) ] << 24 ^
66186aee
MC
822 rk[0];
823 *(u32*)(out+4) =
8b37e5c1
EK
824 (u32)Te4[(s1 ) & 0xff] ^
825 (u32)Te4[(s2 >> 8) & 0xff] << 8 ^
826 (u32)Te4[(s3 >> 16) & 0xff] << 16 ^
827 (u32)Te4[(s0 >> 24) ] << 24 ^
66186aee
MC
828 rk[1];
829 *(u32*)(out+8) =
8b37e5c1
EK
830 (u32)Te4[(s2 ) & 0xff] ^
831 (u32)Te4[(s3 >> 8) & 0xff] << 8 ^
832 (u32)Te4[(s0 >> 16) & 0xff] << 16 ^
833 (u32)Te4[(s1 >> 24) ] << 24 ^
66186aee
MC
834 rk[2];
835 *(u32*)(out+12) =
8b37e5c1
EK
836 (u32)Te4[(s3 ) & 0xff] ^
837 (u32)Te4[(s0 >> 8) & 0xff] << 8 ^
838 (u32)Te4[(s1 >> 16) & 0xff] << 16 ^
839 (u32)Te4[(s2 >> 24) ] << 24 ^
66186aee 840 rk[3];
dff2922a 841#else
66186aee
MC
842 *(u32*)(out+0) =
843 (Te2[(s0 ) & 0xff] & 0x000000ffU) ^
844 (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^
845 (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
846 (Te1[(s3 >> 24) ] & 0xff000000U) ^
847 rk[0];
848 *(u32*)(out+4) =
849 (Te2[(s1 ) & 0xff] & 0x000000ffU) ^
850 (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^
851 (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
852 (Te1[(s0 >> 24) ] & 0xff000000U) ^
853 rk[1];
854 *(u32*)(out+8) =
855 (Te2[(s2 ) & 0xff] & 0x000000ffU) ^
856 (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^
857 (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
858 (Te1[(s1 >> 24) ] & 0xff000000U) ^
859 rk[2];
860 *(u32*)(out+12) =
861 (Te2[(s3 ) & 0xff] & 0x000000ffU) ^
862 (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^
863 (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
864 (Te1[(s2 >> 24) ] & 0xff000000U) ^
865 rk[3];
dff2922a 866#endif
9c62bca1
AP
867}
868
869/*
870 * Decrypt a single block
871 * in and out can overlap
872 */
873void AES_decrypt(const unsigned char *in, unsigned char *out,
66186aee
MC
874 const AES_KEY *key)
875{
9c62bca1 876
66186aee
MC
877 const u32 *rk;
878 u32 s0, s1, s2, s3, t[4];
879 int r;
9c62bca1 880
66186aee
MC
881 assert(in && out && key);
882 rk = key->rd_key;
9c62bca1 883
66186aee
MC
884 /*
885 * map byte array block to cipher state
886 * and add initial round key:
887 */
888 s0 = GETU32(in ) ^ rk[0];
889 s1 = GETU32(in + 4) ^ rk[1];
890 s2 = GETU32(in + 8) ^ rk[2];
891 s3 = GETU32(in + 12) ^ rk[3];
9c62bca1 892
dff2922a 893#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
66186aee 894 prefetch256(Td4);
dff2922a 895
8b37e5c1
EK
896 t[0] = (u32)Td4[(s0 ) & 0xff] ^
897 (u32)Td4[(s3 >> 8) & 0xff] << 8 ^
898 (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
899 (u32)Td4[(s1 >> 24) ] << 24;
900 t[1] = (u32)Td4[(s1 ) & 0xff] ^
901 (u32)Td4[(s0 >> 8) & 0xff] << 8 ^
902 (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
903 (u32)Td4[(s2 >> 24) ] << 24;
904 t[2] = (u32)Td4[(s2 ) & 0xff] ^
905 (u32)Td4[(s1 >> 8) & 0xff] << 8 ^
906 (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
907 (u32)Td4[(s3 >> 24) ] << 24;
908 t[3] = (u32)Td4[(s3 ) & 0xff] ^
909 (u32)Td4[(s2 >> 8) & 0xff] << 8 ^
910 (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
911 (u32)Td4[(s0 >> 24) ] << 24;
dff2922a 912
66186aee
MC
913 /* now do the linear transform using words */
914 {
915 int i;
916 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
dff2922a 917
66186aee
MC
918 for (i = 0; i < 4; i++) {
919 tp1 = t[i];
920 m = tp1 & 0x80808080;
921 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
922 ((m - (m >> 7)) & 0x1b1b1b1b);
923 m = tp2 & 0x80808080;
924 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
925 ((m - (m >> 7)) & 0x1b1b1b1b);
926 m = tp4 & 0x80808080;
927 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
928 ((m - (m >> 7)) & 0x1b1b1b1b);
929 tp9 = tp8 ^ tp1;
930 tpb = tp9 ^ tp2;
931 tpd = tp9 ^ tp4;
932 tpe = tp8 ^ tp4 ^ tp2;
dff2922a 933#if defined(ROTATE)
66186aee
MC
934 t[i] = tpe ^ ROTATE(tpd,16) ^
935 ROTATE(tp9,8) ^ ROTATE(tpb,24);
dff2922a 936#else
66186aee
MC
937 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
938 (tp9 >> 24) ^ (tp9 << 8) ^
939 (tpb >> 8) ^ (tpb << 24);
dff2922a 940#endif
66186aee
MC
941 t[i] ^= rk[4+i];
942 }
943 }
dff2922a 944#else
66186aee
MC
945 t[0] = Td0[(s0 ) & 0xff] ^
946 Td1[(s3 >> 8) & 0xff] ^
947 Td2[(s2 >> 16) & 0xff] ^
948 Td3[(s1 >> 24) ] ^
949 rk[4];
950 t[1] = Td0[(s1 ) & 0xff] ^
951 Td1[(s0 >> 8) & 0xff] ^
952 Td2[(s3 >> 16) & 0xff] ^
953 Td3[(s2 >> 24) ] ^
954 rk[5];
955 t[2] = Td0[(s2 ) & 0xff] ^
956 Td1[(s1 >> 8) & 0xff] ^
957 Td2[(s0 >> 16) & 0xff] ^
958 Td3[(s3 >> 24) ] ^
959 rk[6];
960 t[3] = Td0[(s3 ) & 0xff] ^
961 Td1[(s2 >> 8) & 0xff] ^
962 Td2[(s1 >> 16) & 0xff] ^
963 Td3[(s0 >> 24) ] ^
964 rk[7];
dff2922a 965#endif
66186aee 966 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
967
968 /*
969 * Nr - 2 full rounds:
970 */
dff2922a
AP
971 for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
972#if defined(AES_COMPACT_IN_INNER_ROUNDS)
8b37e5c1
EK
973 t[0] = (u32)Td4[(s0 ) & 0xff] ^
974 (u32)Td4[(s3 >> 8) & 0xff] << 8 ^
975 (u32)Td4[(s2 >> 16) & 0xff] << 16 ^
976 (u32)Td4[(s1 >> 24) ] << 24;
977 t[1] = (u32)Td4[(s1 ) & 0xff] ^
978 (u32)Td4[(s0 >> 8) & 0xff] << 8 ^
979 (u32)Td4[(s3 >> 16) & 0xff] << 16 ^
980 (u32)Td4[(s2 >> 24) ] << 24;
981 t[2] = (u32)Td4[(s2 ) & 0xff] ^
982 (u32)Td4[(s1 >> 8) & 0xff] << 8 ^
983 (u32)Td4[(s0 >> 16) & 0xff] << 16 ^
984 (u32)Td4[(s3 >> 24) ] << 24;
985 t[3] = (u32)Td4[(s3 ) & 0xff] ^
986 (u32)Td4[(s2 >> 8) & 0xff] << 8 ^
987 (u32)Td4[(s1 >> 16) & 0xff] << 16 ^
988 (u32)Td4[(s0 >> 24) ] << 24;
dff2922a 989
66186aee
MC
990 /* now do the linear transform using words */
991 {
992 int i;
993 u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
dff2922a 994
66186aee
MC
995 for (i = 0; i < 4; i++) {
996 tp1 = t[i];
997 m = tp1 & 0x80808080;
998 tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
999 ((m - (m >> 7)) & 0x1b1b1b1b);
1000 m = tp2 & 0x80808080;
1001 tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
1002 ((m - (m >> 7)) & 0x1b1b1b1b);
1003 m = tp4 & 0x80808080;
1004 tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
1005 ((m - (m >> 7)) & 0x1b1b1b1b);
1006 tp9 = tp8 ^ tp1;
1007 tpb = tp9 ^ tp2;
1008 tpd = tp9 ^ tp4;
1009 tpe = tp8 ^ tp4 ^ tp2;
dff2922a 1010#if defined(ROTATE)
66186aee
MC
1011 t[i] = tpe ^ ROTATE(tpd,16) ^
1012 ROTATE(tp9,8) ^ ROTATE(tpb,24);
dff2922a 1013#else
66186aee
MC
1014 t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
1015 (tp9 >> 24) ^ (tp9 << 8) ^
1016 (tpb >> 8) ^ (tpb << 24);
dff2922a 1017#endif
66186aee
MC
1018 t[i] ^= rk[i];
1019 }
1020 }
dff2922a 1021#else
66186aee
MC
1022 t[0] = Td0[(s0 ) & 0xff] ^
1023 Td1[(s3 >> 8) & 0xff] ^
1024 Td2[(s2 >> 16) & 0xff] ^
1025 Td3[(s1 >> 24) ] ^
1026 rk[0];
1027 t[1] = Td0[(s1 ) & 0xff] ^
1028 Td1[(s0 >> 8) & 0xff] ^
1029 Td2[(s3 >> 16) & 0xff] ^
1030 Td3[(s2 >> 24) ] ^
1031 rk[1];
1032 t[2] = Td0[(s2 ) & 0xff] ^
1033 Td1[(s1 >> 8) & 0xff] ^
1034 Td2[(s0 >> 16) & 0xff] ^
1035 Td3[(s3 >> 24) ] ^
1036 rk[2];
1037 t[3] = Td0[(s3 ) & 0xff] ^
1038 Td1[(s2 >> 8) & 0xff] ^
1039 Td2[(s1 >> 16) & 0xff] ^
1040 Td3[(s0 >> 24) ] ^
1041 rk[3];
dff2922a 1042#endif
66186aee 1043 s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
9c62bca1
AP
1044 }
1045 /*
66186aee
MC
1046 * apply last round and
1047 * map cipher state to byte array block:
1048 */
1049 prefetch256(Td4);
dff2922a 1050
66186aee 1051 *(u32*)(out+0) =
8b37e5c1
EK
1052 ((u32)Td4[(s0 ) & 0xff]) ^
1053 ((u32)Td4[(s3 >> 8) & 0xff] << 8) ^
1054 ((u32)Td4[(s2 >> 16) & 0xff] << 16) ^
1055 ((u32)Td4[(s1 >> 24) ] << 24) ^
66186aee
MC
1056 rk[0];
1057 *(u32*)(out+4) =
8b37e5c1
EK
1058 ((u32)Td4[(s1 ) & 0xff]) ^
1059 ((u32)Td4[(s0 >> 8) & 0xff] << 8) ^
1060 ((u32)Td4[(s3 >> 16) & 0xff] << 16) ^
1061 ((u32)Td4[(s2 >> 24) ] << 24) ^
66186aee
MC
1062 rk[1];
1063 *(u32*)(out+8) =
8b37e5c1
EK
1064 ((u32)Td4[(s2 ) & 0xff]) ^
1065 ((u32)Td4[(s1 >> 8) & 0xff] << 8) ^
1066 ((u32)Td4[(s0 >> 16) & 0xff] << 16) ^
1067 ((u32)Td4[(s3 >> 24) ] << 24) ^
66186aee
MC
1068 rk[2];
1069 *(u32*)(out+12) =
8b37e5c1
EK
1070 ((u32)Td4[(s3 ) & 0xff]) ^
1071 ((u32)Td4[(s2 >> 8) & 0xff] << 8) ^
1072 ((u32)Td4[(s1 >> 16) & 0xff] << 16) ^
1073 ((u32)Td4[(s0 >> 24) ] << 24) ^
66186aee 1074 rk[3];
9c62bca1 1075}