]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/modes/gcm128.c
Run util/openssl-format-source -v -c .
[thirdparty/openssl.git] / crypto / modes / gcm128.c
1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50 #include <openssl/crypto.h>
51 #include "modes_lcl.h"
52 #include <string.h>
53
54 #ifndef MODES_DEBUG
55 # ifndef NDEBUG
56 # define NDEBUG
57 # endif
58 #endif
59 #include <assert.h>
60
61 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
62 /* redefine, because alignment is ensured */
63 # undef GETU32
64 # define GETU32(p) BSWAP4(*(const u32 *)(p))
65 # undef PUTU32
66 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
67 #endif
68
69 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
70 #define REDUCE1BIT(V) do { \
71 if (sizeof(size_t)==8) { \
72 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
73 V.lo = (V.hi<<63)|(V.lo>>1); \
74 V.hi = (V.hi>>1 )^T; \
75 } \
76 else { \
77 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
78 V.lo = (V.hi<<63)|(V.lo>>1); \
79 V.hi = (V.hi>>1 )^((u64)T<<32); \
80 } \
81 } while(0)
82
83 /*-
84 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
85 * never be set to 8. 8 is effectively reserved for testing purposes.
86 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
87 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
88 * whole spectrum of possible table driven implementations. Why? In
89 * non-"Shoup's" case memory access pattern is segmented in such manner,
90 * that it's trivial to see that cache timing information can reveal
91 * fair portion of intermediate hash value. Given that ciphertext is
92 * always available to attacker, it's possible for him to attempt to
93 * deduce secret parameter H and if successful, tamper with messages
94 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
95 * not as trivial, but there is no reason to believe that it's resistant
96 * to cache-timing attack. And the thing about "8-bit" implementation is
97 * that it consumes 16 (sixteen) times more memory, 4KB per individual
98 * key + 1KB shared. Well, on pros side it should be twice as fast as
99 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
100 * was observed to run ~75% faster, closer to 100% for commercial
101 * compilers... Yet "4-bit" procedure is preferred, because it's
102 * believed to provide better security-performance balance and adequate
103 * all-round performance. "All-round" refers to things like:
104 *
105 * - shorter setup time effectively improves overall timing for
106 * handling short messages;
107 * - larger table allocation can become unbearable because of VM
108 * subsystem penalties (for example on Windows large enough free
109 * results in VM working set trimming, meaning that consequent
110 * malloc would immediately incur working set expansion);
111 * - larger table has larger cache footprint, which can affect
112 * performance of other code paths (not necessarily even from same
113 * thread in Hyper-Threading world);
114 *
115 * Value of 1 is not appropriate for performance reasons.
116 */
117 #if TABLE_BITS==8
118
119 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
120 {
121 int i, j;
122 u128 V;
123
124 Htable[0].hi = 0;
125 Htable[0].lo = 0;
126 V.hi = H[0];
127 V.lo = H[1];
128
129 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
130 REDUCE1BIT(V);
131 Htable[i] = V;
132 }
133
134 for (i = 2; i < 256; i <<= 1) {
135 u128 *Hi = Htable + i, H0 = *Hi;
136 for (j = 1; j < i; ++j) {
137 Hi[j].hi = H0.hi ^ Htable[j].hi;
138 Hi[j].lo = H0.lo ^ Htable[j].lo;
139 }
140 }
141 }
142
143 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
144 {
145 u128 Z = { 0, 0 };
146 const u8 *xi = (const u8 *)Xi + 15;
147 size_t rem, n = *xi;
148 const union {
149 long one;
150 char little;
151 } is_endian = {
152 1
153 };
154 static const size_t rem_8bit[256] = {
155 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
156 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
157 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
158 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
159 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
160 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
161 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
162 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
163 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
164 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
165 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
166 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
167 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
168 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
169 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
170 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
171 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
172 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
173 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
174 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
175 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
176 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
177 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
178 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
179 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
180 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
181 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
182 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
183 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
184 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
185 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
186 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
187 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
188 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
189 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
190 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
191 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
192 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
193 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
194 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
195 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
196 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
197 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
198 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
199 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
200 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
201 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
202 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
203 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
204 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
205 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
206 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
207 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
208 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
209 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
210 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
211 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
212 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
213 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
214 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
215 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
216 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
217 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
218 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
219 };
220
221 while (1) {
222 Z.hi ^= Htable[n].hi;
223 Z.lo ^= Htable[n].lo;
224
225 if ((u8 *)Xi == xi)
226 break;
227
228 n = *(--xi);
229
230 rem = (size_t)Z.lo & 0xff;
231 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
232 Z.hi = (Z.hi >> 8);
233 if (sizeof(size_t) == 8)
234 Z.hi ^= rem_8bit[rem];
235 else
236 Z.hi ^= (u64)rem_8bit[rem] << 32;
237 }
238
239 if (is_endian.little) {
240 # ifdef BSWAP8
241 Xi[0] = BSWAP8(Z.hi);
242 Xi[1] = BSWAP8(Z.lo);
243 # else
244 u8 *p = (u8 *)Xi;
245 u32 v;
246 v = (u32)(Z.hi >> 32);
247 PUTU32(p, v);
248 v = (u32)(Z.hi);
249 PUTU32(p + 4, v);
250 v = (u32)(Z.lo >> 32);
251 PUTU32(p + 8, v);
252 v = (u32)(Z.lo);
253 PUTU32(p + 12, v);
254 # endif
255 } else {
256 Xi[0] = Z.hi;
257 Xi[1] = Z.lo;
258 }
259 }
260
261 # define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
262
263 #elif TABLE_BITS==4
264
265 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
266 {
267 u128 V;
268 # if defined(OPENSSL_SMALL_FOOTPRINT)
269 int i;
270 # endif
271
272 Htable[0].hi = 0;
273 Htable[0].lo = 0;
274 V.hi = H[0];
275 V.lo = H[1];
276
277 # if defined(OPENSSL_SMALL_FOOTPRINT)
278 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
279 REDUCE1BIT(V);
280 Htable[i] = V;
281 }
282
283 for (i = 2; i < 16; i <<= 1) {
284 u128 *Hi = Htable + i;
285 int j;
286 for (V = *Hi, j = 1; j < i; ++j) {
287 Hi[j].hi = V.hi ^ Htable[j].hi;
288 Hi[j].lo = V.lo ^ Htable[j].lo;
289 }
290 }
291 # else
292 Htable[8] = V;
293 REDUCE1BIT(V);
294 Htable[4] = V;
295 REDUCE1BIT(V);
296 Htable[2] = V;
297 REDUCE1BIT(V);
298 Htable[1] = V;
299 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
300 V = Htable[4];
301 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
302 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
303 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
304 V = Htable[8];
305 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
306 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
307 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
308 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
309 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
310 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
311 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
312 # endif
313 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
314 /*
315 * ARM assembler expects specific dword order in Htable.
316 */
317 {
318 int j;
319 const union {
320 long one;
321 char little;
322 } is_endian = {
323 1
324 };
325
326 if (is_endian.little)
327 for (j = 0; j < 16; ++j) {
328 V = Htable[j];
329 Htable[j].hi = V.lo;
330 Htable[j].lo = V.hi;
331 } else
332 for (j = 0; j < 16; ++j) {
333 V = Htable[j];
334 Htable[j].hi = V.lo << 32 | V.lo >> 32;
335 Htable[j].lo = V.hi << 32 | V.hi >> 32;
336 }
337 }
338 # endif
339 }
340
341 # ifndef GHASH_ASM
342 static const size_t rem_4bit[16] = {
343 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
344 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
345 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
346 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
347 };
348
349 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
350 {
351 u128 Z;
352 int cnt = 15;
353 size_t rem, nlo, nhi;
354 const union {
355 long one;
356 char little;
357 } is_endian = {
358 1
359 };
360
361 nlo = ((const u8 *)Xi)[15];
362 nhi = nlo >> 4;
363 nlo &= 0xf;
364
365 Z.hi = Htable[nlo].hi;
366 Z.lo = Htable[nlo].lo;
367
368 while (1) {
369 rem = (size_t)Z.lo & 0xf;
370 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
371 Z.hi = (Z.hi >> 4);
372 if (sizeof(size_t) == 8)
373 Z.hi ^= rem_4bit[rem];
374 else
375 Z.hi ^= (u64)rem_4bit[rem] << 32;
376
377 Z.hi ^= Htable[nhi].hi;
378 Z.lo ^= Htable[nhi].lo;
379
380 if (--cnt < 0)
381 break;
382
383 nlo = ((const u8 *)Xi)[cnt];
384 nhi = nlo >> 4;
385 nlo &= 0xf;
386
387 rem = (size_t)Z.lo & 0xf;
388 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
389 Z.hi = (Z.hi >> 4);
390 if (sizeof(size_t) == 8)
391 Z.hi ^= rem_4bit[rem];
392 else
393 Z.hi ^= (u64)rem_4bit[rem] << 32;
394
395 Z.hi ^= Htable[nlo].hi;
396 Z.lo ^= Htable[nlo].lo;
397 }
398
399 if (is_endian.little) {
400 # ifdef BSWAP8
401 Xi[0] = BSWAP8(Z.hi);
402 Xi[1] = BSWAP8(Z.lo);
403 # else
404 u8 *p = (u8 *)Xi;
405 u32 v;
406 v = (u32)(Z.hi >> 32);
407 PUTU32(p, v);
408 v = (u32)(Z.hi);
409 PUTU32(p + 4, v);
410 v = (u32)(Z.lo >> 32);
411 PUTU32(p + 8, v);
412 v = (u32)(Z.lo);
413 PUTU32(p + 12, v);
414 # endif
415 } else {
416 Xi[0] = Z.hi;
417 Xi[1] = Z.lo;
418 }
419 }
420
421 # if !defined(OPENSSL_SMALL_FOOTPRINT)
422 /*
423 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
424 * details... Compiler-generated code doesn't seem to give any
425 * performance improvement, at least not on x86[_64]. It's here
426 * mostly as reference and a placeholder for possible future
427 * non-trivial optimization[s]...
428 */
429 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
430 const u8 *inp, size_t len)
431 {
432 u128 Z;
433 int cnt;
434 size_t rem, nlo, nhi;
435 const union {
436 long one;
437 char little;
438 } is_endian = {
439 1
440 };
441
442 # if 1
443 do {
444 cnt = 15;
445 nlo = ((const u8 *)Xi)[15];
446 nlo ^= inp[15];
447 nhi = nlo >> 4;
448 nlo &= 0xf;
449
450 Z.hi = Htable[nlo].hi;
451 Z.lo = Htable[nlo].lo;
452
453 while (1) {
454 rem = (size_t)Z.lo & 0xf;
455 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
456 Z.hi = (Z.hi >> 4);
457 if (sizeof(size_t) == 8)
458 Z.hi ^= rem_4bit[rem];
459 else
460 Z.hi ^= (u64)rem_4bit[rem] << 32;
461
462 Z.hi ^= Htable[nhi].hi;
463 Z.lo ^= Htable[nhi].lo;
464
465 if (--cnt < 0)
466 break;
467
468 nlo = ((const u8 *)Xi)[cnt];
469 nlo ^= inp[cnt];
470 nhi = nlo >> 4;
471 nlo &= 0xf;
472
473 rem = (size_t)Z.lo & 0xf;
474 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
475 Z.hi = (Z.hi >> 4);
476 if (sizeof(size_t) == 8)
477 Z.hi ^= rem_4bit[rem];
478 else
479 Z.hi ^= (u64)rem_4bit[rem] << 32;
480
481 Z.hi ^= Htable[nlo].hi;
482 Z.lo ^= Htable[nlo].lo;
483 }
484 # else
485 /*
486 * Extra 256+16 bytes per-key plus 512 bytes shared tables
487 * [should] give ~50% improvement... One could have PACK()-ed
488 * the rem_8bit even here, but the priority is to minimize
489 * cache footprint...
490 */
491 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
492 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
493 static const unsigned short rem_8bit[256] = {
494 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
495 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
496 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
497 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
498 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
499 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
500 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
501 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
502 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
503 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
504 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
505 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
506 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
507 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
508 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
509 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
510 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
511 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
512 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
513 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
514 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
515 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
516 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
517 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
518 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
519 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
520 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
521 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
522 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
523 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
524 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
525 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
526 };
527 /*
528 * This pre-processing phase slows down procedure by approximately
529 * same time as it makes each loop spin faster. In other words
530 * single block performance is approximately same as straightforward
531 * "4-bit" implementation, and then it goes only faster...
532 */
533 for (cnt = 0; cnt < 16; ++cnt) {
534 Z.hi = Htable[cnt].hi;
535 Z.lo = Htable[cnt].lo;
536 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
537 Hshr4[cnt].hi = (Z.hi >> 4);
538 Hshl4[cnt] = (u8)(Z.lo << 4);
539 }
540
541 do {
542 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
543 nlo = ((const u8 *)Xi)[cnt];
544 nlo ^= inp[cnt];
545 nhi = nlo >> 4;
546 nlo &= 0xf;
547
548 Z.hi ^= Htable[nlo].hi;
549 Z.lo ^= Htable[nlo].lo;
550
551 rem = (size_t)Z.lo & 0xff;
552
553 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
554 Z.hi = (Z.hi >> 8);
555
556 Z.hi ^= Hshr4[nhi].hi;
557 Z.lo ^= Hshr4[nhi].lo;
558 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
559 }
560
561 nlo = ((const u8 *)Xi)[0];
562 nlo ^= inp[0];
563 nhi = nlo >> 4;
564 nlo &= 0xf;
565
566 Z.hi ^= Htable[nlo].hi;
567 Z.lo ^= Htable[nlo].lo;
568
569 rem = (size_t)Z.lo & 0xf;
570
571 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
572 Z.hi = (Z.hi >> 4);
573
574 Z.hi ^= Htable[nhi].hi;
575 Z.lo ^= Htable[nhi].lo;
576 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
577 # endif
578
579 if (is_endian.little) {
580 # ifdef BSWAP8
581 Xi[0] = BSWAP8(Z.hi);
582 Xi[1] = BSWAP8(Z.lo);
583 # else
584 u8 *p = (u8 *)Xi;
585 u32 v;
586 v = (u32)(Z.hi >> 32);
587 PUTU32(p, v);
588 v = (u32)(Z.hi);
589 PUTU32(p + 4, v);
590 v = (u32)(Z.lo >> 32);
591 PUTU32(p + 8, v);
592 v = (u32)(Z.lo);
593 PUTU32(p + 12, v);
594 # endif
595 } else {
596 Xi[0] = Z.hi;
597 Xi[1] = Z.lo;
598 }
599 } while (inp += 16, len -= 16);
600 }
601 # endif
602 # else
603 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
604 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
605 size_t len);
606 # endif
607
608 # define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
609 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
610 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
611 /*
612 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
613 * effect. In other words idea is to hash data while it's still in L1 cache
614 * after encryption pass...
615 */
616 # define GHASH_CHUNK (3*1024)
617 # endif
618
619 #else /* TABLE_BITS */
620
621 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
622 {
623 u128 V, Z = { 0, 0 };
624 long X;
625 int i, j;
626 const long *xi = (const long *)Xi;
627 const union {
628 long one;
629 char little;
630 } is_endian = {
631 1
632 };
633
634 V.hi = H[0]; /* H is in host byte order, no byte swapping */
635 V.lo = H[1];
636
637 for (j = 0; j < 16 / sizeof(long); ++j) {
638 if (is_endian.little) {
639 if (sizeof(long) == 8) {
640 # ifdef BSWAP8
641 X = (long)(BSWAP8(xi[j]));
642 # else
643 const u8 *p = (const u8 *)(xi + j);
644 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
645 # endif
646 } else {
647 const u8 *p = (const u8 *)(xi + j);
648 X = (long)GETU32(p);
649 }
650 } else
651 X = xi[j];
652
653 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
654 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
655 Z.hi ^= V.hi & M;
656 Z.lo ^= V.lo & M;
657
658 REDUCE1BIT(V);
659 }
660 }
661
662 if (is_endian.little) {
663 # ifdef BSWAP8
664 Xi[0] = BSWAP8(Z.hi);
665 Xi[1] = BSWAP8(Z.lo);
666 # else
667 u8 *p = (u8 *)Xi;
668 u32 v;
669 v = (u32)(Z.hi >> 32);
670 PUTU32(p, v);
671 v = (u32)(Z.hi);
672 PUTU32(p + 4, v);
673 v = (u32)(Z.lo >> 32);
674 PUTU32(p + 8, v);
675 v = (u32)(Z.lo);
676 PUTU32(p + 12, v);
677 # endif
678 } else {
679 Xi[0] = Z.hi;
680 Xi[1] = Z.lo;
681 }
682 }
683
684 # define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
685
686 #endif
687
688 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
689 # if !defined(I386_ONLY) && \
690 (defined(__i386) || defined(__i386__) || \
691 defined(__x86_64) || defined(__x86_64__) || \
692 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
693 # define GHASH_ASM_X86_OR_64
694 # define GCM_FUNCREF_4BIT
695 extern unsigned int OPENSSL_ia32cap_P[2];
696
697 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
698 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
699 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
700 size_t len);
701
702 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
703 # define gcm_init_avx gcm_init_clmul
704 # define gcm_gmult_avx gcm_gmult_clmul
705 # define gcm_ghash_avx gcm_ghash_clmul
706 # else
707 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
708 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
709 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
710 size_t len);
711 # endif
712
713 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
714 # define GHASH_ASM_X86
715 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
716 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
717 size_t len);
718
719 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
720 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
721 size_t len);
722 # endif
723 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
724 # include "arm_arch.h"
725 # if __ARM_MAX_ARCH__>=7
726 # define GHASH_ASM_ARM
727 # define GCM_FUNCREF_4BIT
728 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
729 # if defined(__arm__) || defined(__arm)
730 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
731 # endif
732 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
733 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
734 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
735 size_t len);
736 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
737 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
738 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
739 size_t len);
740 # endif
741 # elif defined(__sparc__) || defined(__sparc)
742 # include "sparc_arch.h"
743 # define GHASH_ASM_SPARC
744 # define GCM_FUNCREF_4BIT
745 extern unsigned int OPENSSL_sparcv9cap_P[];
746 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
747 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
748 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
749 size_t len);
750 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
751 # include "ppc_arch.h"
752 # define GHASH_ASM_PPC
753 # define GCM_FUNCREF_4BIT
754 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
755 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
756 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
757 size_t len);
758 # endif
759 #endif
760
761 #ifdef GCM_FUNCREF_4BIT
762 # undef GCM_MUL
763 # define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
764 # ifdef GHASH
765 # undef GHASH
766 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
767 # endif
768 #endif
769
770 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
771 {
772 const union {
773 long one;
774 char little;
775 } is_endian = {
776 1
777 };
778
779 memset(ctx, 0, sizeof(*ctx));
780 ctx->block = block;
781 ctx->key = key;
782
783 (*block) (ctx->H.c, ctx->H.c, key);
784
785 if (is_endian.little) {
786 /* H is stored in host byte order */
787 #ifdef BSWAP8
788 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
789 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
790 #else
791 u8 *p = ctx->H.c;
792 u64 hi, lo;
793 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
794 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
795 ctx->H.u[0] = hi;
796 ctx->H.u[1] = lo;
797 #endif
798 }
799 #if TABLE_BITS==8
800 gcm_init_8bit(ctx->Htable, ctx->H.u);
801 #elif TABLE_BITS==4
802 # if defined(GHASH_ASM_X86_OR_64)
803 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
804 if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
805 OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
806 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
807 gcm_init_avx(ctx->Htable, ctx->H.u);
808 ctx->gmult = gcm_gmult_avx;
809 ctx->ghash = gcm_ghash_avx;
810 } else {
811 gcm_init_clmul(ctx->Htable, ctx->H.u);
812 ctx->gmult = gcm_gmult_clmul;
813 ctx->ghash = gcm_ghash_clmul;
814 }
815 return;
816 }
817 # endif
818 gcm_init_4bit(ctx->Htable, ctx->H.u);
819 # if defined(GHASH_ASM_X86) /* x86 only */
820 # if defined(OPENSSL_IA32_SSE2)
821 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
822 # else
823 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
824 # endif
825 ctx->gmult = gcm_gmult_4bit_mmx;
826 ctx->ghash = gcm_ghash_4bit_mmx;
827 } else {
828 ctx->gmult = gcm_gmult_4bit_x86;
829 ctx->ghash = gcm_ghash_4bit_x86;
830 }
831 # else
832 ctx->gmult = gcm_gmult_4bit;
833 ctx->ghash = gcm_ghash_4bit;
834 # endif
835 # elif defined(GHASH_ASM_ARM)
836 # ifdef PMULL_CAPABLE
837 if (PMULL_CAPABLE) {
838 gcm_init_v8(ctx->Htable, ctx->H.u);
839 ctx->gmult = gcm_gmult_v8;
840 ctx->ghash = gcm_ghash_v8;
841 } else
842 # endif
843 # ifdef NEON_CAPABLE
844 if (NEON_CAPABLE) {
845 gcm_init_neon(ctx->Htable, ctx->H.u);
846 ctx->gmult = gcm_gmult_neon;
847 ctx->ghash = gcm_ghash_neon;
848 } else
849 # endif
850 {
851 gcm_init_4bit(ctx->Htable, ctx->H.u);
852 ctx->gmult = gcm_gmult_4bit;
853 ctx->ghash = gcm_ghash_4bit;
854 }
855 # elif defined(GHASH_ASM_SPARC)
856 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
857 gcm_init_vis3(ctx->Htable, ctx->H.u);
858 ctx->gmult = gcm_gmult_vis3;
859 ctx->ghash = gcm_ghash_vis3;
860 } else {
861 gcm_init_4bit(ctx->Htable, ctx->H.u);
862 ctx->gmult = gcm_gmult_4bit;
863 ctx->ghash = gcm_ghash_4bit;
864 }
865 # elif defined(GHASH_ASM_PPC)
866 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
867 gcm_init_p8(ctx->Htable, ctx->H.u);
868 ctx->gmult = gcm_gmult_p8;
869 ctx->ghash = gcm_ghash_p8;
870 } else {
871 gcm_init_4bit(ctx->Htable, ctx->H.u);
872 ctx->gmult = gcm_gmult_4bit;
873 ctx->ghash = gcm_ghash_4bit;
874 }
875 # else
876 gcm_init_4bit(ctx->Htable, ctx->H.u);
877 # endif
878 #endif
879 }
880
881 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
882 size_t len)
883 {
884 const union {
885 long one;
886 char little;
887 } is_endian = {
888 1
889 };
890 unsigned int ctr;
891 #ifdef GCM_FUNCREF_4BIT
892 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
893 #endif
894
895 ctx->Yi.u[0] = 0;
896 ctx->Yi.u[1] = 0;
897 ctx->Xi.u[0] = 0;
898 ctx->Xi.u[1] = 0;
899 ctx->len.u[0] = 0; /* AAD length */
900 ctx->len.u[1] = 0; /* message length */
901 ctx->ares = 0;
902 ctx->mres = 0;
903
904 if (len == 12) {
905 memcpy(ctx->Yi.c, iv, 12);
906 ctx->Yi.c[15] = 1;
907 ctr = 1;
908 } else {
909 size_t i;
910 u64 len0 = len;
911
912 while (len >= 16) {
913 for (i = 0; i < 16; ++i)
914 ctx->Yi.c[i] ^= iv[i];
915 GCM_MUL(ctx, Yi);
916 iv += 16;
917 len -= 16;
918 }
919 if (len) {
920 for (i = 0; i < len; ++i)
921 ctx->Yi.c[i] ^= iv[i];
922 GCM_MUL(ctx, Yi);
923 }
924 len0 <<= 3;
925 if (is_endian.little) {
926 #ifdef BSWAP8
927 ctx->Yi.u[1] ^= BSWAP8(len0);
928 #else
929 ctx->Yi.c[8] ^= (u8)(len0 >> 56);
930 ctx->Yi.c[9] ^= (u8)(len0 >> 48);
931 ctx->Yi.c[10] ^= (u8)(len0 >> 40);
932 ctx->Yi.c[11] ^= (u8)(len0 >> 32);
933 ctx->Yi.c[12] ^= (u8)(len0 >> 24);
934 ctx->Yi.c[13] ^= (u8)(len0 >> 16);
935 ctx->Yi.c[14] ^= (u8)(len0 >> 8);
936 ctx->Yi.c[15] ^= (u8)(len0);
937 #endif
938 } else
939 ctx->Yi.u[1] ^= len0;
940
941 GCM_MUL(ctx, Yi);
942
943 if (is_endian.little)
944 #ifdef BSWAP4
945 ctr = BSWAP4(ctx->Yi.d[3]);
946 #else
947 ctr = GETU32(ctx->Yi.c + 12);
948 #endif
949 else
950 ctr = ctx->Yi.d[3];
951 }
952
953 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
954 ++ctr;
955 if (is_endian.little)
956 #ifdef BSWAP4
957 ctx->Yi.d[3] = BSWAP4(ctr);
958 #else
959 PUTU32(ctx->Yi.c + 12, ctr);
960 #endif
961 else
962 ctx->Yi.d[3] = ctr;
963 }
964
965 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
966 size_t len)
967 {
968 size_t i;
969 unsigned int n;
970 u64 alen = ctx->len.u[0];
971 #ifdef GCM_FUNCREF_4BIT
972 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
973 # ifdef GHASH
974 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
975 const u8 *inp, size_t len) = ctx->ghash;
976 # endif
977 #endif
978
979 if (ctx->len.u[1])
980 return -2;
981
982 alen += len;
983 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
984 return -1;
985 ctx->len.u[0] = alen;
986
987 n = ctx->ares;
988 if (n) {
989 while (n && len) {
990 ctx->Xi.c[n] ^= *(aad++);
991 --len;
992 n = (n + 1) % 16;
993 }
994 if (n == 0)
995 GCM_MUL(ctx, Xi);
996 else {
997 ctx->ares = n;
998 return 0;
999 }
1000 }
1001 #ifdef GHASH
1002 if ((i = (len & (size_t)-16))) {
1003 GHASH(ctx, aad, i);
1004 aad += i;
1005 len -= i;
1006 }
1007 #else
1008 while (len >= 16) {
1009 for (i = 0; i < 16; ++i)
1010 ctx->Xi.c[i] ^= aad[i];
1011 GCM_MUL(ctx, Xi);
1012 aad += 16;
1013 len -= 16;
1014 }
1015 #endif
1016 if (len) {
1017 n = (unsigned int)len;
1018 for (i = 0; i < len; ++i)
1019 ctx->Xi.c[i] ^= aad[i];
1020 }
1021
1022 ctx->ares = n;
1023 return 0;
1024 }
1025
1026 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
1027 const unsigned char *in, unsigned char *out,
1028 size_t len)
1029 {
1030 const union {
1031 long one;
1032 char little;
1033 } is_endian = {
1034 1
1035 };
1036 unsigned int n, ctr;
1037 size_t i;
1038 u64 mlen = ctx->len.u[1];
1039 block128_f block = ctx->block;
1040 void *key = ctx->key;
1041 #ifdef GCM_FUNCREF_4BIT
1042 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1043 # ifdef GHASH
1044 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1045 const u8 *inp, size_t len) = ctx->ghash;
1046 # endif
1047 #endif
1048
1049 #if 0
1050 n = (unsigned int)mlen % 16; /* alternative to ctx->mres */
1051 #endif
1052 mlen += len;
1053 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1054 return -1;
1055 ctx->len.u[1] = mlen;
1056
1057 if (ctx->ares) {
1058 /* First call to encrypt finalizes GHASH(AAD) */
1059 GCM_MUL(ctx, Xi);
1060 ctx->ares = 0;
1061 }
1062
1063 if (is_endian.little)
1064 #ifdef BSWAP4
1065 ctr = BSWAP4(ctx->Yi.d[3]);
1066 #else
1067 ctr = GETU32(ctx->Yi.c + 12);
1068 #endif
1069 else
1070 ctr = ctx->Yi.d[3];
1071
1072 n = ctx->mres;
1073 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1074 if (16 % sizeof(size_t) == 0) { /* always true actually */
1075 do {
1076 if (n) {
1077 while (n && len) {
1078 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1079 --len;
1080 n = (n + 1) % 16;
1081 }
1082 if (n == 0)
1083 GCM_MUL(ctx, Xi);
1084 else {
1085 ctx->mres = n;
1086 return 0;
1087 }
1088 }
1089 # if defined(STRICT_ALIGNMENT)
1090 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1091 break;
1092 # endif
1093 # if defined(GHASH) && defined(GHASH_CHUNK)
1094 while (len >= GHASH_CHUNK) {
1095 size_t j = GHASH_CHUNK;
1096
1097 while (j) {
1098 size_t *out_t = (size_t *)out;
1099 const size_t *in_t = (const size_t *)in;
1100
1101 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1102 ++ctr;
1103 if (is_endian.little)
1104 # ifdef BSWAP4
1105 ctx->Yi.d[3] = BSWAP4(ctr);
1106 # else
1107 PUTU32(ctx->Yi.c + 12, ctr);
1108 # endif
1109 else
1110 ctx->Yi.d[3] = ctr;
1111 for (i = 0; i < 16 / sizeof(size_t); ++i)
1112 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1113 out += 16;
1114 in += 16;
1115 j -= 16;
1116 }
1117 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1118 len -= GHASH_CHUNK;
1119 }
1120 if ((i = (len & (size_t)-16))) {
1121 size_t j = i;
1122
1123 while (len >= 16) {
1124 size_t *out_t = (size_t *)out;
1125 const size_t *in_t = (const size_t *)in;
1126
1127 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1128 ++ctr;
1129 if (is_endian.little)
1130 # ifdef BSWAP4
1131 ctx->Yi.d[3] = BSWAP4(ctr);
1132 # else
1133 PUTU32(ctx->Yi.c + 12, ctr);
1134 # endif
1135 else
1136 ctx->Yi.d[3] = ctr;
1137 for (i = 0; i < 16 / sizeof(size_t); ++i)
1138 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1139 out += 16;
1140 in += 16;
1141 len -= 16;
1142 }
1143 GHASH(ctx, out - j, j);
1144 }
1145 # else
1146 while (len >= 16) {
1147 size_t *out_t = (size_t *)out;
1148 const size_t *in_t = (const size_t *)in;
1149
1150 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1151 ++ctr;
1152 if (is_endian.little)
1153 # ifdef BSWAP4
1154 ctx->Yi.d[3] = BSWAP4(ctr);
1155 # else
1156 PUTU32(ctx->Yi.c + 12, ctr);
1157 # endif
1158 else
1159 ctx->Yi.d[3] = ctr;
1160 for (i = 0; i < 16 / sizeof(size_t); ++i)
1161 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1162 GCM_MUL(ctx, Xi);
1163 out += 16;
1164 in += 16;
1165 len -= 16;
1166 }
1167 # endif
1168 if (len) {
1169 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1170 ++ctr;
1171 if (is_endian.little)
1172 # ifdef BSWAP4
1173 ctx->Yi.d[3] = BSWAP4(ctr);
1174 # else
1175 PUTU32(ctx->Yi.c + 12, ctr);
1176 # endif
1177 else
1178 ctx->Yi.d[3] = ctr;
1179 while (len--) {
1180 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1181 ++n;
1182 }
1183 }
1184
1185 ctx->mres = n;
1186 return 0;
1187 } while (0);
1188 }
1189 #endif
1190 for (i = 0; i < len; ++i) {
1191 if (n == 0) {
1192 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1193 ++ctr;
1194 if (is_endian.little)
1195 #ifdef BSWAP4
1196 ctx->Yi.d[3] = BSWAP4(ctr);
1197 #else
1198 PUTU32(ctx->Yi.c + 12, ctr);
1199 #endif
1200 else
1201 ctx->Yi.d[3] = ctr;
1202 }
1203 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1204 n = (n + 1) % 16;
1205 if (n == 0)
1206 GCM_MUL(ctx, Xi);
1207 }
1208
1209 ctx->mres = n;
1210 return 0;
1211 }
1212
1213 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1214 const unsigned char *in, unsigned char *out,
1215 size_t len)
1216 {
1217 const union {
1218 long one;
1219 char little;
1220 } is_endian = {
1221 1
1222 };
1223 unsigned int n, ctr;
1224 size_t i;
1225 u64 mlen = ctx->len.u[1];
1226 block128_f block = ctx->block;
1227 void *key = ctx->key;
1228 #ifdef GCM_FUNCREF_4BIT
1229 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1230 # ifdef GHASH
1231 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1232 const u8 *inp, size_t len) = ctx->ghash;
1233 # endif
1234 #endif
1235
1236 mlen += len;
1237 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1238 return -1;
1239 ctx->len.u[1] = mlen;
1240
1241 if (ctx->ares) {
1242 /* First call to decrypt finalizes GHASH(AAD) */
1243 GCM_MUL(ctx, Xi);
1244 ctx->ares = 0;
1245 }
1246
1247 if (is_endian.little)
1248 #ifdef BSWAP4
1249 ctr = BSWAP4(ctx->Yi.d[3]);
1250 #else
1251 ctr = GETU32(ctx->Yi.c + 12);
1252 #endif
1253 else
1254 ctr = ctx->Yi.d[3];
1255
1256 n = ctx->mres;
1257 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1258 if (16 % sizeof(size_t) == 0) { /* always true actually */
1259 do {
1260 if (n) {
1261 while (n && len) {
1262 u8 c = *(in++);
1263 *(out++) = c ^ ctx->EKi.c[n];
1264 ctx->Xi.c[n] ^= c;
1265 --len;
1266 n = (n + 1) % 16;
1267 }
1268 if (n == 0)
1269 GCM_MUL(ctx, Xi);
1270 else {
1271 ctx->mres = n;
1272 return 0;
1273 }
1274 }
1275 # if defined(STRICT_ALIGNMENT)
1276 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1277 break;
1278 # endif
1279 # if defined(GHASH) && defined(GHASH_CHUNK)
1280 while (len >= GHASH_CHUNK) {
1281 size_t j = GHASH_CHUNK;
1282
1283 GHASH(ctx, in, GHASH_CHUNK);
1284 while (j) {
1285 size_t *out_t = (size_t *)out;
1286 const size_t *in_t = (const size_t *)in;
1287
1288 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1289 ++ctr;
1290 if (is_endian.little)
1291 # ifdef BSWAP4
1292 ctx->Yi.d[3] = BSWAP4(ctr);
1293 # else
1294 PUTU32(ctx->Yi.c + 12, ctr);
1295 # endif
1296 else
1297 ctx->Yi.d[3] = ctr;
1298 for (i = 0; i < 16 / sizeof(size_t); ++i)
1299 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1300 out += 16;
1301 in += 16;
1302 j -= 16;
1303 }
1304 len -= GHASH_CHUNK;
1305 }
1306 if ((i = (len & (size_t)-16))) {
1307 GHASH(ctx, in, i);
1308 while (len >= 16) {
1309 size_t *out_t = (size_t *)out;
1310 const size_t *in_t = (const size_t *)in;
1311
1312 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1313 ++ctr;
1314 if (is_endian.little)
1315 # ifdef BSWAP4
1316 ctx->Yi.d[3] = BSWAP4(ctr);
1317 # else
1318 PUTU32(ctx->Yi.c + 12, ctr);
1319 # endif
1320 else
1321 ctx->Yi.d[3] = ctr;
1322 for (i = 0; i < 16 / sizeof(size_t); ++i)
1323 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1324 out += 16;
1325 in += 16;
1326 len -= 16;
1327 }
1328 }
1329 # else
1330 while (len >= 16) {
1331 size_t *out_t = (size_t *)out;
1332 const size_t *in_t = (const size_t *)in;
1333
1334 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1335 ++ctr;
1336 if (is_endian.little)
1337 # ifdef BSWAP4
1338 ctx->Yi.d[3] = BSWAP4(ctr);
1339 # else
1340 PUTU32(ctx->Yi.c + 12, ctr);
1341 # endif
1342 else
1343 ctx->Yi.d[3] = ctr;
1344 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1345 size_t c = in[i];
1346 out[i] = c ^ ctx->EKi.t[i];
1347 ctx->Xi.t[i] ^= c;
1348 }
1349 GCM_MUL(ctx, Xi);
1350 out += 16;
1351 in += 16;
1352 len -= 16;
1353 }
1354 # endif
1355 if (len) {
1356 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1357 ++ctr;
1358 if (is_endian.little)
1359 # ifdef BSWAP4
1360 ctx->Yi.d[3] = BSWAP4(ctr);
1361 # else
1362 PUTU32(ctx->Yi.c + 12, ctr);
1363 # endif
1364 else
1365 ctx->Yi.d[3] = ctr;
1366 while (len--) {
1367 u8 c = in[n];
1368 ctx->Xi.c[n] ^= c;
1369 out[n] = c ^ ctx->EKi.c[n];
1370 ++n;
1371 }
1372 }
1373
1374 ctx->mres = n;
1375 return 0;
1376 } while (0);
1377 }
1378 #endif
1379 for (i = 0; i < len; ++i) {
1380 u8 c;
1381 if (n == 0) {
1382 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1383 ++ctr;
1384 if (is_endian.little)
1385 #ifdef BSWAP4
1386 ctx->Yi.d[3] = BSWAP4(ctr);
1387 #else
1388 PUTU32(ctx->Yi.c + 12, ctr);
1389 #endif
1390 else
1391 ctx->Yi.d[3] = ctr;
1392 }
1393 c = in[i];
1394 out[i] = c ^ ctx->EKi.c[n];
1395 ctx->Xi.c[n] ^= c;
1396 n = (n + 1) % 16;
1397 if (n == 0)
1398 GCM_MUL(ctx, Xi);
1399 }
1400
1401 ctx->mres = n;
1402 return 0;
1403 }
1404
1405 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1406 const unsigned char *in, unsigned char *out,
1407 size_t len, ctr128_f stream)
1408 {
1409 const union {
1410 long one;
1411 char little;
1412 } is_endian = {
1413 1
1414 };
1415 unsigned int n, ctr;
1416 size_t i;
1417 u64 mlen = ctx->len.u[1];
1418 void *key = ctx->key;
1419 #ifdef GCM_FUNCREF_4BIT
1420 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1421 # ifdef GHASH
1422 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1423 const u8 *inp, size_t len) = ctx->ghash;
1424 # endif
1425 #endif
1426
1427 mlen += len;
1428 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1429 return -1;
1430 ctx->len.u[1] = mlen;
1431
1432 if (ctx->ares) {
1433 /* First call to encrypt finalizes GHASH(AAD) */
1434 GCM_MUL(ctx, Xi);
1435 ctx->ares = 0;
1436 }
1437
1438 if (is_endian.little)
1439 #ifdef BSWAP4
1440 ctr = BSWAP4(ctx->Yi.d[3]);
1441 #else
1442 ctr = GETU32(ctx->Yi.c + 12);
1443 #endif
1444 else
1445 ctr = ctx->Yi.d[3];
1446
1447 n = ctx->mres;
1448 if (n) {
1449 while (n && len) {
1450 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1451 --len;
1452 n = (n + 1) % 16;
1453 }
1454 if (n == 0)
1455 GCM_MUL(ctx, Xi);
1456 else {
1457 ctx->mres = n;
1458 return 0;
1459 }
1460 }
1461 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1462 while (len >= GHASH_CHUNK) {
1463 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1464 ctr += GHASH_CHUNK / 16;
1465 if (is_endian.little)
1466 # ifdef BSWAP4
1467 ctx->Yi.d[3] = BSWAP4(ctr);
1468 # else
1469 PUTU32(ctx->Yi.c + 12, ctr);
1470 # endif
1471 else
1472 ctx->Yi.d[3] = ctr;
1473 GHASH(ctx, out, GHASH_CHUNK);
1474 out += GHASH_CHUNK;
1475 in += GHASH_CHUNK;
1476 len -= GHASH_CHUNK;
1477 }
1478 #endif
1479 if ((i = (len & (size_t)-16))) {
1480 size_t j = i / 16;
1481
1482 (*stream) (in, out, j, key, ctx->Yi.c);
1483 ctr += (unsigned int)j;
1484 if (is_endian.little)
1485 #ifdef BSWAP4
1486 ctx->Yi.d[3] = BSWAP4(ctr);
1487 #else
1488 PUTU32(ctx->Yi.c + 12, ctr);
1489 #endif
1490 else
1491 ctx->Yi.d[3] = ctr;
1492 in += i;
1493 len -= i;
1494 #if defined(GHASH)
1495 GHASH(ctx, out, i);
1496 out += i;
1497 #else
1498 while (j--) {
1499 for (i = 0; i < 16; ++i)
1500 ctx->Xi.c[i] ^= out[i];
1501 GCM_MUL(ctx, Xi);
1502 out += 16;
1503 }
1504 #endif
1505 }
1506 if (len) {
1507 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1508 ++ctr;
1509 if (is_endian.little)
1510 #ifdef BSWAP4
1511 ctx->Yi.d[3] = BSWAP4(ctr);
1512 #else
1513 PUTU32(ctx->Yi.c + 12, ctr);
1514 #endif
1515 else
1516 ctx->Yi.d[3] = ctr;
1517 while (len--) {
1518 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1519 ++n;
1520 }
1521 }
1522
1523 ctx->mres = n;
1524 return 0;
1525 }
1526
1527 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1528 const unsigned char *in, unsigned char *out,
1529 size_t len, ctr128_f stream)
1530 {
1531 const union {
1532 long one;
1533 char little;
1534 } is_endian = {
1535 1
1536 };
1537 unsigned int n, ctr;
1538 size_t i;
1539 u64 mlen = ctx->len.u[1];
1540 void *key = ctx->key;
1541 #ifdef GCM_FUNCREF_4BIT
1542 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1543 # ifdef GHASH
1544 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1545 const u8 *inp, size_t len) = ctx->ghash;
1546 # endif
1547 #endif
1548
1549 mlen += len;
1550 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1551 return -1;
1552 ctx->len.u[1] = mlen;
1553
1554 if (ctx->ares) {
1555 /* First call to decrypt finalizes GHASH(AAD) */
1556 GCM_MUL(ctx, Xi);
1557 ctx->ares = 0;
1558 }
1559
1560 if (is_endian.little)
1561 #ifdef BSWAP4
1562 ctr = BSWAP4(ctx->Yi.d[3]);
1563 #else
1564 ctr = GETU32(ctx->Yi.c + 12);
1565 #endif
1566 else
1567 ctr = ctx->Yi.d[3];
1568
1569 n = ctx->mres;
1570 if (n) {
1571 while (n && len) {
1572 u8 c = *(in++);
1573 *(out++) = c ^ ctx->EKi.c[n];
1574 ctx->Xi.c[n] ^= c;
1575 --len;
1576 n = (n + 1) % 16;
1577 }
1578 if (n == 0)
1579 GCM_MUL(ctx, Xi);
1580 else {
1581 ctx->mres = n;
1582 return 0;
1583 }
1584 }
1585 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1586 while (len >= GHASH_CHUNK) {
1587 GHASH(ctx, in, GHASH_CHUNK);
1588 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1589 ctr += GHASH_CHUNK / 16;
1590 if (is_endian.little)
1591 # ifdef BSWAP4
1592 ctx->Yi.d[3] = BSWAP4(ctr);
1593 # else
1594 PUTU32(ctx->Yi.c + 12, ctr);
1595 # endif
1596 else
1597 ctx->Yi.d[3] = ctr;
1598 out += GHASH_CHUNK;
1599 in += GHASH_CHUNK;
1600 len -= GHASH_CHUNK;
1601 }
1602 #endif
1603 if ((i = (len & (size_t)-16))) {
1604 size_t j = i / 16;
1605
1606 #if defined(GHASH)
1607 GHASH(ctx, in, i);
1608 #else
1609 while (j--) {
1610 size_t k;
1611 for (k = 0; k < 16; ++k)
1612 ctx->Xi.c[k] ^= in[k];
1613 GCM_MUL(ctx, Xi);
1614 in += 16;
1615 }
1616 j = i / 16;
1617 in -= i;
1618 #endif
1619 (*stream) (in, out, j, key, ctx->Yi.c);
1620 ctr += (unsigned int)j;
1621 if (is_endian.little)
1622 #ifdef BSWAP4
1623 ctx->Yi.d[3] = BSWAP4(ctr);
1624 #else
1625 PUTU32(ctx->Yi.c + 12, ctr);
1626 #endif
1627 else
1628 ctx->Yi.d[3] = ctr;
1629 out += i;
1630 in += i;
1631 len -= i;
1632 }
1633 if (len) {
1634 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1635 ++ctr;
1636 if (is_endian.little)
1637 #ifdef BSWAP4
1638 ctx->Yi.d[3] = BSWAP4(ctr);
1639 #else
1640 PUTU32(ctx->Yi.c + 12, ctr);
1641 #endif
1642 else
1643 ctx->Yi.d[3] = ctr;
1644 while (len--) {
1645 u8 c = in[n];
1646 ctx->Xi.c[n] ^= c;
1647 out[n] = c ^ ctx->EKi.c[n];
1648 ++n;
1649 }
1650 }
1651
1652 ctx->mres = n;
1653 return 0;
1654 }
1655
1656 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1657 size_t len)
1658 {
1659 const union {
1660 long one;
1661 char little;
1662 } is_endian = {
1663 1
1664 };
1665 u64 alen = ctx->len.u[0] << 3;
1666 u64 clen = ctx->len.u[1] << 3;
1667 #ifdef GCM_FUNCREF_4BIT
1668 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1669 #endif
1670
1671 if (ctx->mres || ctx->ares)
1672 GCM_MUL(ctx, Xi);
1673
1674 if (is_endian.little) {
1675 #ifdef BSWAP8
1676 alen = BSWAP8(alen);
1677 clen = BSWAP8(clen);
1678 #else
1679 u8 *p = ctx->len.c;
1680
1681 ctx->len.u[0] = alen;
1682 ctx->len.u[1] = clen;
1683
1684 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1685 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1686 #endif
1687 }
1688
1689 ctx->Xi.u[0] ^= alen;
1690 ctx->Xi.u[1] ^= clen;
1691 GCM_MUL(ctx, Xi);
1692
1693 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1694 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1695
1696 if (tag && len <= sizeof(ctx->Xi))
1697 return memcmp(ctx->Xi.c, tag, len);
1698 else
1699 return -1;
1700 }
1701
1702 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1703 {
1704 CRYPTO_gcm128_finish(ctx, NULL, 0);
1705 memcpy(tag, ctx->Xi.c,
1706 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1707 }
1708
1709 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1710 {
1711 GCM128_CONTEXT *ret;
1712
1713 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1714 CRYPTO_gcm128_init(ret, key, block);
1715
1716 return ret;
1717 }
1718
1719 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1720 {
1721 if (ctx) {
1722 OPENSSL_cleanse(ctx, sizeof(*ctx));
1723 OPENSSL_free(ctx);
1724 }
1725 }
1726
1727 #if defined(SELFTEST)
1728 # include <stdio.h>
1729 # include <openssl/aes.h>
1730
1731 /* Test Case 1 */
1732 static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1733 static const u8 T1[] = {
1734 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1735 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1736 };
1737
1738 /* Test Case 2 */
1739 # define K2 K1
1740 # define A2 A1
1741 # define IV2 IV1
1742 static const u8 P2[16];
1743 static const u8 C2[] = {
1744 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1745 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1746 };
1747
1748 static const u8 T2[] = {
1749 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1750 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1751 };
1752
1753 /* Test Case 3 */
1754 # define A3 A2
1755 static const u8 K3[] = {
1756 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1757 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1758 };
1759
1760 static const u8 P3[] = {
1761 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1762 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1763 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1764 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1765 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1766 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1767 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1768 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1769 };
1770
1771 static const u8 IV3[] = {
1772 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1773 0xde, 0xca, 0xf8, 0x88
1774 };
1775
1776 static const u8 C3[] = {
1777 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1778 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1779 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1780 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1781 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1782 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1783 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1784 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1785 };
1786
1787 static const u8 T3[] = {
1788 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1789 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1790 };
1791
1792 /* Test Case 4 */
1793 # define K4 K3
1794 # define IV4 IV3
1795 static const u8 P4[] = {
1796 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1797 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1798 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1799 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1800 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1801 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1802 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1803 0xba, 0x63, 0x7b, 0x39
1804 };
1805
1806 static const u8 A4[] = {
1807 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1808 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1809 0xab, 0xad, 0xda, 0xd2
1810 };
1811
1812 static const u8 C4[] = {
1813 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1814 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1815 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1816 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1817 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1818 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1819 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1820 0x3d, 0x58, 0xe0, 0x91
1821 };
1822
1823 static const u8 T4[] = {
1824 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1825 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1826 };
1827
1828 /* Test Case 5 */
1829 # define K5 K4
1830 # define P5 P4
1831 # define A5 A4
1832 static const u8 IV5[] = {
1833 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1834 };
1835
1836 static const u8 C5[] = {
1837 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1838 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1839 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1840 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1841 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1842 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1843 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1844 0xc2, 0x3f, 0x45, 0x98
1845 };
1846
1847 static const u8 T5[] = {
1848 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1849 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1850 };
1851
1852 /* Test Case 6 */
1853 # define K6 K5
1854 # define P6 P5
1855 # define A6 A5
1856 static const u8 IV6[] = {
1857 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1858 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1859 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1860 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1861 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1862 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1863 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1864 0xa6, 0x37, 0xb3, 0x9b
1865 };
1866
1867 static const u8 C6[] = {
1868 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1869 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1870 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1871 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1872 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1873 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1874 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1875 0x4c, 0x34, 0xae, 0xe5
1876 };
1877
1878 static const u8 T6[] = {
1879 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1880 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1881 };
1882
1883 /* Test Case 7 */
1884 static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1885 static const u8 T7[] = {
1886 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1887 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1888 };
1889
1890 /* Test Case 8 */
1891 # define K8 K7
1892 # define IV8 IV7
1893 # define A8 A7
1894 static const u8 P8[16];
1895 static const u8 C8[] = {
1896 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1897 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1898 };
1899
1900 static const u8 T8[] = {
1901 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1902 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1903 };
1904
1905 /* Test Case 9 */
1906 # define A9 A8
1907 static const u8 K9[] = {
1908 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1909 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1910 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1911 };
1912
1913 static const u8 P9[] = {
1914 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1915 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1916 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1917 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1918 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1919 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1920 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1921 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1922 };
1923
1924 static const u8 IV9[] = {
1925 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1926 0xde, 0xca, 0xf8, 0x88
1927 };
1928
1929 static const u8 C9[] = {
1930 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1931 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1932 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1933 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1934 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1935 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1936 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1937 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1938 };
1939
1940 static const u8 T9[] = {
1941 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1942 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1943 };
1944
1945 /* Test Case 10 */
1946 # define K10 K9
1947 # define IV10 IV9
1948 static const u8 P10[] = {
1949 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1950 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1951 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1952 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1953 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1954 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1955 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1956 0xba, 0x63, 0x7b, 0x39
1957 };
1958
1959 static const u8 A10[] = {
1960 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1961 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1962 0xab, 0xad, 0xda, 0xd2
1963 };
1964
1965 static const u8 C10[] = {
1966 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1967 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1968 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1969 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1970 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1971 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1972 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1973 0xcc, 0xda, 0x27, 0x10
1974 };
1975
1976 static const u8 T10[] = {
1977 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1978 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1979 };
1980
1981 /* Test Case 11 */
1982 # define K11 K10
1983 # define P11 P10
1984 # define A11 A10
1985 static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1986
1987 static const u8 C11[] = {
1988 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1989 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
1990 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
1991 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
1992 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
1993 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
1994 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
1995 0xa0, 0xf0, 0x62, 0xf7
1996 };
1997
1998 static const u8 T11[] = {
1999 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
2000 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
2001 };
2002
2003 /* Test Case 12 */
2004 # define K12 K11
2005 # define P12 P11
2006 # define A12 A11
2007 static const u8 IV12[] = {
2008 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2009 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2010 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2011 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2012 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2013 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2014 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2015 0xa6, 0x37, 0xb3, 0x9b
2016 };
2017
2018 static const u8 C12[] = {
2019 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
2020 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
2021 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
2022 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
2023 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
2024 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
2025 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
2026 0xe9, 0xb7, 0x37, 0x3b
2027 };
2028
2029 static const u8 T12[] = {
2030 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
2031 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
2032 };
2033
2034 /* Test Case 13 */
2035 static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
2036 static const u8 T13[] = {
2037 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
2038 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
2039 };
2040
2041 /* Test Case 14 */
2042 # define K14 K13
2043 # define A14 A13
2044 static const u8 P14[16], IV14[12];
2045 static const u8 C14[] = {
2046 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
2047 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
2048 };
2049
2050 static const u8 T14[] = {
2051 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
2052 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
2053 };
2054
2055 /* Test Case 15 */
2056 # define A15 A14
2057 static const u8 K15[] = {
2058 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2059 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2060 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2061 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2062 };
2063
2064 static const u8 P15[] = {
2065 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2066 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2067 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2068 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2069 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2070 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2071 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2072 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2073 };
2074
2075 static const u8 IV15[] = {
2076 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2077 0xde, 0xca, 0xf8, 0x88
2078 };
2079
2080 static const u8 C15[] = {
2081 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2082 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2083 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2084 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2085 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2086 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2087 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2088 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2089 };
2090
2091 static const u8 T15[] = {
2092 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2093 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2094 };
2095
2096 /* Test Case 16 */
2097 # define K16 K15
2098 # define IV16 IV15
2099 static const u8 P16[] = {
2100 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2101 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2102 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2103 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2104 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2105 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2106 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2107 0xba, 0x63, 0x7b, 0x39
2108 };
2109
2110 static const u8 A16[] = {
2111 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2112 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2113 0xab, 0xad, 0xda, 0xd2
2114 };
2115
2116 static const u8 C16[] = {
2117 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2118 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2119 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2120 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2121 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2122 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2123 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2124 0xbc, 0xc9, 0xf6, 0x62
2125 };
2126
2127 static const u8 T16[] = {
2128 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2129 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2130 };
2131
2132 /* Test Case 17 */
2133 # define K17 K16
2134 # define P17 P16
2135 # define A17 A16
2136 static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2137
2138 static const u8 C17[] = {
2139 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2140 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2141 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2142 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2143 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2144 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2145 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2146 0xf4, 0x7c, 0x9b, 0x1f
2147 };
2148
2149 static const u8 T17[] = {
2150 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2151 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2152 };
2153
2154 /* Test Case 18 */
2155 # define K18 K17
2156 # define P18 P17
2157 # define A18 A17
2158 static const u8 IV18[] = {
2159 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2160 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2161 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2162 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2163 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2164 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2165 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2166 0xa6, 0x37, 0xb3, 0x9b
2167 };
2168
2169 static const u8 C18[] = {
2170 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2171 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2172 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2173 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2174 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2175 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2176 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2177 0x44, 0xae, 0x7e, 0x3f
2178 };
2179
2180 static const u8 T18[] = {
2181 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2182 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2183 };
2184
2185 /* Test Case 19 */
2186 # define K19 K1
2187 # define P19 P1
2188 # define IV19 IV1
2189 # define C19 C1
2190 static const u8 A19[] = {
2191 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2192 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2193 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2194 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2195 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2196 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2197 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2198 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2199 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2200 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2201 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2202 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2203 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2204 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2205 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2206 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2207 };
2208
2209 static const u8 T19[] = {
2210 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2211 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2212 };
2213
2214 /* Test Case 20 */
2215 # define K20 K1
2216 # define A20 A1
2217 /* this results in 0xff in counter LSB */
2218 static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2219
2220 static const u8 P20[288];
2221 static const u8 C20[] = {
2222 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2223 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2224 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2225 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2226 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2227 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2228 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2229 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2230 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2231 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2232 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2233 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2234 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2235 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2236 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2237 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2238 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2239 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2240 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2241 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2242 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2243 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2244 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2245 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2246 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2247 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2248 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2249 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2250 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2251 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2252 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2253 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2254 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2255 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2256 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2257 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2258 };
2259
2260 static const u8 T20[] = {
2261 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2262 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2263 };
2264
2265 # define TEST_CASE(n) do { \
2266 u8 out[sizeof(P##n)]; \
2267 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
2268 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
2269 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2270 memset(out,0,sizeof(out)); \
2271 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2272 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
2273 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2274 (C##n && memcmp(out,C##n,sizeof(out)))) \
2275 ret++, printf ("encrypt test#%d failed.\n",n); \
2276 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2277 memset(out,0,sizeof(out)); \
2278 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2279 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
2280 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2281 (P##n && memcmp(out,P##n,sizeof(out)))) \
2282 ret++, printf ("decrypt test#%d failed.\n",n); \
2283 } while(0)
2284
2285 int main()
2286 {
2287 GCM128_CONTEXT ctx;
2288 AES_KEY key;
2289 int ret = 0;
2290
2291 TEST_CASE(1);
2292 TEST_CASE(2);
2293 TEST_CASE(3);
2294 TEST_CASE(4);
2295 TEST_CASE(5);
2296 TEST_CASE(6);
2297 TEST_CASE(7);
2298 TEST_CASE(8);
2299 TEST_CASE(9);
2300 TEST_CASE(10);
2301 TEST_CASE(11);
2302 TEST_CASE(12);
2303 TEST_CASE(13);
2304 TEST_CASE(14);
2305 TEST_CASE(15);
2306 TEST_CASE(16);
2307 TEST_CASE(17);
2308 TEST_CASE(18);
2309 TEST_CASE(19);
2310 TEST_CASE(20);
2311
2312 # ifdef OPENSSL_CPUID_OBJ
2313 {
2314 size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2315 union {
2316 u64 u;
2317 u8 c[1024];
2318 } buf;
2319 int i;
2320
2321 AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2322 CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2323 CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2324
2325 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2326 start = OPENSSL_rdtsc();
2327 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2328 gcm_t = OPENSSL_rdtsc() - start;
2329
2330 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2331 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2332 (block128_f) AES_encrypt);
2333 start = OPENSSL_rdtsc();
2334 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2335 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2336 (block128_f) AES_encrypt);
2337 ctr_t = OPENSSL_rdtsc() - start;
2338
2339 printf("%.2f-%.2f=%.2f\n",
2340 gcm_t / (double)sizeof(buf),
2341 ctr_t / (double)sizeof(buf),
2342 (gcm_t - ctr_t) / (double)sizeof(buf));
2343 # ifdef GHASH
2344 {
2345 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2346 const u8 *inp, size_t len) = ctx.ghash;
2347
2348 GHASH((&ctx), buf.c, sizeof(buf));
2349 start = OPENSSL_rdtsc();
2350 for (i = 0; i < 100; ++i)
2351 GHASH((&ctx), buf.c, sizeof(buf));
2352 gcm_t = OPENSSL_rdtsc() - start;
2353 printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);
2354 }
2355 # endif
2356 }
2357 # endif
2358
2359 return ret;
2360 }
2361 #endif