]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/gcm128.c
gcm128.c: tidy up, minor optimization, rearrange gcm128_context.
[thirdparty/openssl.git] / crypto / modes / gcm128.c
CommitLineData
e7f5b1cd
AP
1/* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
f4001a0d
DSH
50#define OPENSSL_FIPSAPI
51
aa763c0f 52#include <openssl/crypto.h>
f472ec8c 53#include "modes_lcl.h"
e7f5b1cd
AP
54#include <string.h>
55
56#ifndef MODES_DEBUG
57# ifndef NDEBUG
58# define NDEBUG
59# endif
60#endif
61#include <assert.h>
62
f472ec8c
AP
63#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64/* redefine, because alignment is ensured */
65#undef GETU32
66#define GETU32(p) BSWAP4(*(const u32 *)(p))
67#undef PUTU32
68#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
e7f5b1cd
AP
69#endif
70
c1f092d1
AP
71#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72#define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83} while(0)
84
d8d95832
AP
85/*
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
a595baff
AP
119#if TABLE_BITS==8
120
e7f5b1cd
AP
121static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122{
123 int i, j;
124 u128 V;
125
126 Htable[0].hi = 0;
127 Htable[0].lo = 0;
128 V.hi = H[0];
129 V.lo = H[1];
130
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
c1f092d1 132 REDUCE1BIT(V);
e7f5b1cd
AP
133 Htable[i] = V;
134 }
135
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
141 }
142 }
143}
144
d8d95832 145static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
e7f5b1cd
AP
146{
147 u128 Z = { 0, 0};
148 const u8 *xi = (const u8 *)Xi+15;
149 size_t rem, n = *xi;
150 const union { long one; char little; } is_endian = {1};
151 static const size_t rem_8bit[256] = {
152 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216
217 while (1) {
218 Z.hi ^= Htable[n].hi;
219 Z.lo ^= Htable[n].lo;
220
221 if ((u8 *)Xi==xi) break;
222
223 n = *(--xi);
224
225 rem = (size_t)Z.lo&0xff;
226 Z.lo = (Z.hi<<56)|(Z.lo>>8);
227 Z.hi = (Z.hi>>8);
228 if (sizeof(size_t)==8)
229 Z.hi ^= rem_8bit[rem];
230 else
231 Z.hi ^= (u64)rem_8bit[rem]<<32;
232 }
233
234 if (is_endian.little) {
235#ifdef BSWAP8
236 Xi[0] = BSWAP8(Z.hi);
237 Xi[1] = BSWAP8(Z.lo);
238#else
239 u8 *p = (u8 *)Xi;
240 u32 v;
241 v = (u32)(Z.hi>>32); PUTU32(p,v);
242 v = (u32)(Z.hi); PUTU32(p+4,v);
243 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
244 v = (u32)(Z.lo); PUTU32(p+12,v);
245#endif
246 }
247 else {
248 Xi[0] = Z.hi;
249 Xi[1] = Z.lo;
250 }
251}
a595baff 252#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
e7f5b1cd 253
a595baff 254#elif TABLE_BITS==4
2262beef 255
e7f5b1cd
AP
256static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257{
e7f5b1cd 258 u128 V;
f472ec8c
AP
259#if defined(OPENSSL_SMALL_FOOTPRINT)
260 int i;
261#endif
e7f5b1cd
AP
262
263 Htable[0].hi = 0;
264 Htable[0].lo = 0;
265 V.hi = H[0];
266 V.lo = H[1];
267
f472ec8c 268#if defined(OPENSSL_SMALL_FOOTPRINT)
e7f5b1cd 269 for (Htable[8]=V, i=4; i>0; i>>=1) {
c1f092d1 270 REDUCE1BIT(V);
e7f5b1cd
AP
271 Htable[i] = V;
272 }
273
274 for (i=2; i<16; i<<=1) {
2262beef
AP
275 u128 *Hi = Htable+i;
276 int j;
277 for (V=*Hi, j=1; j<i; ++j) {
278 Hi[j].hi = V.hi^Htable[j].hi;
279 Hi[j].lo = V.lo^Htable[j].lo;
e7f5b1cd
AP
280 }
281 }
2262beef 282#else
f472ec8c 283 Htable[8] = V;
c1f092d1 284 REDUCE1BIT(V);
f472ec8c 285 Htable[4] = V;
c1f092d1 286 REDUCE1BIT(V);
f472ec8c 287 Htable[2] = V;
c1f092d1 288 REDUCE1BIT(V);
f472ec8c 289 Htable[1] = V;
2262beef
AP
290 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
291 V=Htable[4];
292 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
293 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
294 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
295 V=Htable[8];
296 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
297 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303#endif
f472ec8c
AP
304#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305 /*
306 * ARM assembler expects specific dword order in Htable.
307 */
308 {
309 int j;
310 const union { long one; char little; } is_endian = {1};
311
312 if (is_endian.little)
313 for (j=0;j<16;++j) {
314 V = Htable[j];
315 Htable[j].hi = V.lo;
316 Htable[j].lo = V.hi;
317 }
318 else
319 for (j=0;j<16;++j) {
320 V = Htable[j];
321 Htable[j].hi = V.lo<<32|V.lo>>32;
322 Htable[j].lo = V.hi<<32|V.hi>>32;
323 }
324 }
325#endif
e7f5b1cd
AP
326}
327
a595baff 328#ifndef GHASH_ASM
2262beef
AP
329static const size_t rem_4bit[16] = {
330 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334
4f39edbf 335static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
e7f5b1cd 336{
2262beef
AP
337 u128 Z;
338 int cnt = 15;
339 size_t rem, nlo, nhi;
e7f5b1cd 340 const union { long one; char little; } is_endian = {1};
2262beef
AP
341
342 nlo = ((const u8 *)Xi)[15];
343 nhi = nlo>>4;
344 nlo &= 0xf;
345
346 Z.hi = Htable[nlo].hi;
347 Z.lo = Htable[nlo].lo;
e7f5b1cd
AP
348
349 while (1) {
2262beef
AP
350 rem = (size_t)Z.lo&0xf;
351 Z.lo = (Z.hi<<60)|(Z.lo>>4);
352 Z.hi = (Z.hi>>4);
353 if (sizeof(size_t)==8)
354 Z.hi ^= rem_4bit[rem];
355 else
356 Z.hi ^= (u64)rem_4bit[rem]<<32;
357
358 Z.hi ^= Htable[nhi].hi;
359 Z.lo ^= Htable[nhi].lo;
360
361 if (--cnt<0) break;
362
363 nlo = ((const u8 *)Xi)[cnt];
e7f5b1cd
AP
364 nhi = nlo>>4;
365 nlo &= 0xf;
366
2262beef
AP
367 rem = (size_t)Z.lo&0xf;
368 Z.lo = (Z.hi<<60)|(Z.lo>>4);
369 Z.hi = (Z.hi>>4);
370 if (sizeof(size_t)==8)
371 Z.hi ^= rem_4bit[rem];
372 else
373 Z.hi ^= (u64)rem_4bit[rem]<<32;
374
e7f5b1cd
AP
375 Z.hi ^= Htable[nlo].hi;
376 Z.lo ^= Htable[nlo].lo;
2262beef 377 }
e7f5b1cd 378
2262beef
AP
379 if (is_endian.little) {
380#ifdef BSWAP8
381 Xi[0] = BSWAP8(Z.hi);
382 Xi[1] = BSWAP8(Z.lo);
383#else
384 u8 *p = (u8 *)Xi;
385 u32 v;
386 v = (u32)(Z.hi>>32); PUTU32(p,v);
387 v = (u32)(Z.hi); PUTU32(p+4,v);
388 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
389 v = (u32)(Z.lo); PUTU32(p+12,v);
390#endif
391 }
392 else {
393 Xi[0] = Z.hi;
394 Xi[1] = Z.lo;
395 }
396}
397
398#if !defined(OPENSSL_SMALL_FOOTPRINT)
399/*
400 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
a595baff
AP
401 * details... Compiler-generated code doesn't seem to give any
402 * performance improvement, at least not on x86[_64]. It's here
403 * mostly as reference and a placeholder for possible future
404 * non-trivial optimization[s]...
2262beef 405 */
4f39edbf
AP
406static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407 const u8 *inp,size_t len)
2262beef
AP
408{
409 u128 Z;
410 int cnt;
411 size_t rem, nlo, nhi;
412 const union { long one; char little; } is_endian = {1};
413
e747f4d4 414#if 1
2262beef
AP
415 do {
416 cnt = 15;
417 nlo = ((const u8 *)Xi)[15];
418 nlo ^= inp[15];
419 nhi = nlo>>4;
420 nlo &= 0xf;
421
422 Z.hi = Htable[nlo].hi;
423 Z.lo = Htable[nlo].lo;
424
425 while (1) {
e7f5b1cd
AP
426 rem = (size_t)Z.lo&0xf;
427 Z.lo = (Z.hi<<60)|(Z.lo>>4);
428 Z.hi = (Z.hi>>4);
429 if (sizeof(size_t)==8)
430 Z.hi ^= rem_4bit[rem];
431 else
432 Z.hi ^= (u64)rem_4bit[rem]<<32;
433
434 Z.hi ^= Htable[nhi].hi;
435 Z.lo ^= Htable[nhi].lo;
436
2262beef 437 if (--cnt<0) break;
e7f5b1cd 438
2262beef
AP
439 nlo = ((const u8 *)Xi)[cnt];
440 nlo ^= inp[cnt];
441 nhi = nlo>>4;
442 nlo &= 0xf;
e7f5b1cd
AP
443
444 rem = (size_t)Z.lo&0xf;
445 Z.lo = (Z.hi<<60)|(Z.lo>>4);
446 Z.hi = (Z.hi>>4);
447 if (sizeof(size_t)==8)
448 Z.hi ^= rem_4bit[rem];
449 else
450 Z.hi ^= (u64)rem_4bit[rem]<<32;
2262beef
AP
451
452 Z.hi ^= Htable[nlo].hi;
453 Z.lo ^= Htable[nlo].lo;
e7f5b1cd 454 }
e747f4d4
AP
455#else
456 /*
457 * Extra 256+16 bytes per-key plus 512 bytes shared tables
458 * [should] give ~50% improvement... One could have PACK()-ed
6acb4ff3
AP
459 * the rem_8bit even here, but the priority is to minimize
460 * cache footprint...
e747f4d4
AP
461 */
462 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
463 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
464 static const unsigned short rem_8bit[256] = {
465 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
e747f4d4
AP
497 /*
498 * This pre-processing phase slows down procedure by approximately
499 * same time as it makes each loop spin faster. In other words
500 * single block performance is approximately same as straightforward
501 * "4-bit" implementation, and then it goes only faster...
502 */
503 for (cnt=0; cnt<16; ++cnt) {
504 Z.hi = Htable[cnt].hi;
505 Z.lo = Htable[cnt].lo;
506 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507 Hshr4[cnt].hi = (Z.hi>>4);
508 Hshl4[cnt] = (u8)(Z.lo<<4);
509 }
510
511 do {
6acb4ff3 512 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
e747f4d4
AP
513 nlo = ((const u8 *)Xi)[cnt];
514 nlo ^= inp[cnt];
515 nhi = nlo>>4;
516 nlo &= 0xf;
517
518 Z.hi ^= Htable[nlo].hi;
519 Z.lo ^= Htable[nlo].lo;
520
521 rem = (size_t)Z.lo&0xff;
522
523 Z.lo = (Z.hi<<56)|(Z.lo>>8);
524 Z.hi = (Z.hi>>8);
525
526 Z.hi ^= Hshr4[nhi].hi;
527 Z.lo ^= Hshr4[nhi].lo;
528 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529 }
530
531 nlo = ((const u8 *)Xi)[0];
532 nlo ^= inp[0];
533 nhi = nlo>>4;
534 nlo &= 0xf;
535
536 Z.hi ^= Htable[nlo].hi;
537 Z.lo ^= Htable[nlo].lo;
538
539 rem = (size_t)Z.lo&0xf;
540
541 Z.lo = (Z.hi<<60)|(Z.lo>>4);
542 Z.hi = (Z.hi>>4);
543
544 Z.hi ^= Htable[nhi].hi;
545 Z.lo ^= Htable[nhi].lo;
546 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547#endif
e7f5b1cd
AP
548
549 if (is_endian.little) {
550#ifdef BSWAP8
551 Xi[0] = BSWAP8(Z.hi);
552 Xi[1] = BSWAP8(Z.lo);
553#else
554 u8 *p = (u8 *)Xi;
555 u32 v;
556 v = (u32)(Z.hi>>32); PUTU32(p,v);
557 v = (u32)(Z.hi); PUTU32(p+4,v);
558 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
559 v = (u32)(Z.lo); PUTU32(p+12,v);
560#endif
561 }
562 else {
563 Xi[0] = Z.hi;
564 Xi[1] = Z.lo;
565 }
2262beef 566 } while (inp+=16, len-=16);
e7f5b1cd 567}
2262beef
AP
568#endif
569#else
4f39edbf
AP
570void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
2262beef
AP
572#endif
573
574#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
a595baff 575#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
c1f092d1 576#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
a595baff
AP
577/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578 * trashing effect. In other words idea is to hash data while it's
579 * still in L1 cache after encryption pass... */
68e2586b 580#define GHASH_CHUNK (3*1024)
a595baff 581#endif
2262beef 582
a595baff 583#else /* TABLE_BITS */
e7f5b1cd 584
2262beef 585static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
e7f5b1cd
AP
586{
587 u128 V,Z = { 0,0 };
588 long X;
589 int i,j;
590 const long *xi = (const long *)Xi;
591 const union { long one; char little; } is_endian = {1};
592
2262beef 593 V.hi = H[0]; /* H is in host byte order, no byte swapping */
e7f5b1cd
AP
594 V.lo = H[1];
595
596 for (j=0; j<16/sizeof(long); ++j) {
597 if (is_endian.little) {
598 if (sizeof(long)==8) {
599#ifdef BSWAP8
600 X = (long)(BSWAP8(xi[j]));
601#else
602 const u8 *p = (const u8 *)(xi+j);
603 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604#endif
605 }
606 else {
607 const u8 *p = (const u8 *)(xi+j);
608 X = (long)GETU32(p);
609 }
610 }
611 else
612 X = xi[j];
613
614 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615 u64 M = (u64)(X>>(8*sizeof(long)-1));
616 Z.hi ^= V.hi&M;
617 Z.lo ^= V.lo&M;
618
c1f092d1 619 REDUCE1BIT(V);
e7f5b1cd
AP
620 }
621 }
622
623 if (is_endian.little) {
624#ifdef BSWAP8
625 Xi[0] = BSWAP8(Z.hi);
626 Xi[1] = BSWAP8(Z.lo);
627#else
628 u8 *p = (u8 *)Xi;
629 u32 v;
630 v = (u32)(Z.hi>>32); PUTU32(p,v);
631 v = (u32)(Z.hi); PUTU32(p+4,v);
632 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
633 v = (u32)(Z.lo); PUTU32(p+12,v);
634#endif
635 }
636 else {
637 Xi[0] = Z.hi;
638 Xi[1] = Z.lo;
639 }
640}
2262beef 641#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
a595baff 642
e7f5b1cd
AP
643#endif
644
c1f092d1
AP
645#if TABLE_BITS==4 && defined(GHASH_ASM) && !defined(I386_ONLY) && \
646 (defined(__i386) || defined(__i386__) || \
647 defined(__x86_64) || defined(__x86_64__) || \
648 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
d8d95832 649# define GHASH_ASM_X86_OR_64
c1f092d1
AP
650extern unsigned int OPENSSL_ia32cap_P[2];
651
652void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
653void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
654void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
655
656# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
657# define GHASH_ASM_X86
658void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
659void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
660
661void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
662void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
663# endif
664
d8d95832 665# define GCM_FUNCREF_4BIT
c1f092d1
AP
666#endif
667
e7f5b1cd
AP
668void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
669{
670 const union { long one; char little; } is_endian = {1};
671
672 memset(ctx,0,sizeof(*ctx));
673 ctx->block = block;
674 ctx->key = key;
675
676 (*block)(ctx->H.c,ctx->H.c,key);
677
678 if (is_endian.little) {
679 /* H is stored in host byte order */
680#ifdef BSWAP8
681 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
682 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
683#else
684 u8 *p = ctx->H.c;
685 u64 hi,lo;
686 hi = (u64)GETU32(p) <<32|GETU32(p+4);
687 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
688 ctx->H.u[0] = hi;
689 ctx->H.u[1] = lo;
690#endif
691 }
692
a595baff
AP
693#if TABLE_BITS==8
694 gcm_init_8bit(ctx->Htable,ctx->H.u);
695#elif TABLE_BITS==4
d8d95832 696# if defined(GHASH_ASM_X86_OR_64)
a6d915e0 697# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
c1f092d1
AP
698 if (OPENSSL_ia32cap_P[1]&(1<<1)) {
699 gcm_init_clmul(ctx->Htable,ctx->H.u);
700 ctx->gmult = gcm_gmult_clmul;
701 ctx->ghash = gcm_ghash_clmul;
702 return;
703 }
a6d915e0 704# endif
e7f5b1cd 705 gcm_init_4bit(ctx->Htable,ctx->H.u);
6acb4ff3 706# if defined(GHASH_ASM_X86) /* x86 only */
c1f092d1
AP
707 if (OPENSSL_ia32cap_P[0]&(1<<23)) {
708 ctx->gmult = gcm_gmult_4bit_mmx;
709 ctx->ghash = gcm_ghash_4bit_mmx;
710 } else {
711 ctx->gmult = gcm_gmult_4bit_x86;
712 ctx->ghash = gcm_ghash_4bit_x86;
713 }
714# else
715 ctx->gmult = gcm_gmult_4bit;
716 ctx->ghash = gcm_ghash_4bit;
717# endif
718# else
719 gcm_init_4bit(ctx->Htable,ctx->H.u);
720# endif
a595baff 721#endif
e7f5b1cd
AP
722}
723
724void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
725{
726 const union { long one; char little; } is_endian = {1};
f472ec8c 727 unsigned int ctr;
d8d95832
AP
728#ifdef GCM_FUNCREF_4BIT
729 void (*gcm_gmult_4bit)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
730#endif
e7f5b1cd
AP
731
732 ctx->Yi.u[0] = 0;
733 ctx->Yi.u[1] = 0;
734 ctx->Xi.u[0] = 0;
735 ctx->Xi.u[1] = 0;
b68c1315
AP
736 ctx->len.u[0] = 0; /* AAD length */
737 ctx->len.u[1] = 0; /* message length */
738 ctx->ares = 0;
739 ctx->mres = 0;
e7f5b1cd
AP
740
741 if (len==12) {
742 memcpy(ctx->Yi.c,iv,12);
743 ctx->Yi.c[15]=1;
f472ec8c 744 ctr=1;
e7f5b1cd
AP
745 }
746 else {
747 size_t i;
748 u64 len0 = len;
749
750 while (len>=16) {
751 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
752 GCM_MUL(ctx,Yi);
753 iv += 16;
754 len -= 16;
755 }
756 if (len) {
757 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
758 GCM_MUL(ctx,Yi);
759 }
760 len0 <<= 3;
761 if (is_endian.little) {
762#ifdef BSWAP8
763 ctx->Yi.u[1] ^= BSWAP8(len0);
764#else
765 ctx->Yi.c[8] ^= (u8)(len0>>56);
766 ctx->Yi.c[9] ^= (u8)(len0>>48);
767 ctx->Yi.c[10] ^= (u8)(len0>>40);
768 ctx->Yi.c[11] ^= (u8)(len0>>32);
769 ctx->Yi.c[12] ^= (u8)(len0>>24);
770 ctx->Yi.c[13] ^= (u8)(len0>>16);
771 ctx->Yi.c[14] ^= (u8)(len0>>8);
772 ctx->Yi.c[15] ^= (u8)(len0);
773#endif
774 }
775 else
776 ctx->Yi.u[1] ^= len0;
777
778 GCM_MUL(ctx,Yi);
779
780 if (is_endian.little)
f472ec8c 781 ctr = GETU32(ctx->Yi.c+12);
e7f5b1cd 782 else
f472ec8c 783 ctr = ctx->Yi.d[3];
e7f5b1cd
AP
784 }
785
786 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
f472ec8c 787 ++ctr;
2262beef 788 if (is_endian.little)
f472ec8c 789 PUTU32(ctx->Yi.c+12,ctr);
2262beef 790 else
f472ec8c 791 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
792}
793
1f2502eb 794int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
e7f5b1cd
AP
795{
796 size_t i;
1f2502eb
AP
797 unsigned int n;
798 u64 alen = ctx->len.u[0];
d8d95832
AP
799#ifdef GCM_FUNCREF_4BIT
800 void (*gcm_gmult_4bit)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
801# ifdef GHASH
802 void (*gcm_ghash_4bit)(u64 Xi[2],const u128 Htable[16],
803 const u8 *inp,size_t len) = ctx->ghash;
804# endif
805#endif
e7f5b1cd 806
1f2502eb
AP
807 if (ctx->len.u[1]) return -2;
808
809 alen += len;
810 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
811 return -1;
812 ctx->len.u[0] = alen;
b68c1315 813
1f2502eb 814 n = ctx->ares;
b68c1315
AP
815 if (n) {
816 while (n && len) {
817 ctx->Xi.c[n] ^= *(aad++);
818 --len;
819 n = (n+1)%16;
820 }
821 if (n==0) GCM_MUL(ctx,Xi);
822 else {
823 ctx->ares = n;
1f2502eb 824 return 0;
b68c1315
AP
825 }
826 }
e7f5b1cd 827
2262beef
AP
828#ifdef GHASH
829 if ((i = (len&(size_t)-16))) {
c1f092d1 830 GHASH(ctx,aad,i);
2262beef
AP
831 aad += i;
832 len -= i;
833 }
834#else
e7f5b1cd
AP
835 while (len>=16) {
836 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
837 GCM_MUL(ctx,Xi);
838 aad += 16;
839 len -= 16;
840 }
2262beef 841#endif
e7f5b1cd 842 if (len) {
1f2502eb 843 n = (unsigned int)len;
e7f5b1cd 844 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
e7f5b1cd 845 }
b68c1315
AP
846
847 ctx->ares = n;
1f2502eb 848 return 0;
e7f5b1cd
AP
849}
850
1f2502eb 851int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
e7f5b1cd
AP
852 const unsigned char *in, unsigned char *out,
853 size_t len)
854{
855 const union { long one; char little; } is_endian = {1};
856 unsigned int n, ctr;
857 size_t i;
1f2502eb 858 u64 mlen = ctx->len.u[1];
d8d95832
AP
859#ifdef GCM_FUNCREF_4BIT
860 void (*gcm_gmult_4bit)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
861# ifdef GHASH
862 void (*gcm_ghash_4bit)(u64 Xi[2],const u128 Htable[16],
863 const u8 *inp,size_t len) = ctx->ghash;
864# endif
865#endif
1f2502eb
AP
866
867#if 0
868 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
869#endif
870 mlen += len;
871 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
872 return -1;
873 ctx->len.u[1] = mlen;
e7f5b1cd 874
b68c1315
AP
875 if (ctx->ares) {
876 /* First call to encrypt finalizes GHASH(AAD) */
877 GCM_MUL(ctx,Xi);
878 ctx->ares = 0;
879 }
880
f472ec8c
AP
881 if (is_endian.little)
882 ctr = GETU32(ctx->Yi.c+12);
883 else
884 ctr = ctx->Yi.d[3];
e7f5b1cd 885
1f2502eb 886 n = ctx->mres;
e7f5b1cd
AP
887#if !defined(OPENSSL_SMALL_FOOTPRINT)
888 if (16%sizeof(size_t) == 0) do { /* always true actually */
889 if (n) {
890 while (n && len) {
891 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
892 --len;
893 n = (n+1)%16;
894 }
895 if (n==0) GCM_MUL(ctx,Xi);
896 else {
b68c1315 897 ctx->mres = n;
1f2502eb 898 return 0;
e7f5b1cd
AP
899 }
900 }
e7f5b1cd
AP
901#if defined(STRICT_ALIGNMENT)
902 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
903 break;
904#endif
a595baff 905#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
906 while (len>=GHASH_CHUNK) {
907 size_t j=GHASH_CHUNK;
908
909 while (j) {
910 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
911 ++ctr;
912 if (is_endian.little)
913 PUTU32(ctx->Yi.c+12,ctr);
914 else
915 ctx->Yi.d[3] = ctr;
2262beef
AP
916 for (i=0; i<16; i+=sizeof(size_t))
917 *(size_t *)(out+i) =
918 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
919 out += 16;
920 in += 16;
921 j -= 16;
922 }
c1f092d1 923 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
2262beef
AP
924 len -= GHASH_CHUNK;
925 }
926 if ((i = (len&(size_t)-16))) {
927 size_t j=i;
928
929 while (len>=16) {
930 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
931 ++ctr;
932 if (is_endian.little)
933 PUTU32(ctx->Yi.c+12,ctr);
934 else
935 ctx->Yi.d[3] = ctr;
936 for (i=0; i<16; i+=sizeof(size_t))
937 *(size_t *)(out+i) =
938 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
939 out += 16;
940 in += 16;
941 len -= 16;
942 }
c1f092d1 943 GHASH(ctx,out-j,j);
2262beef
AP
944 }
945#else
946 while (len>=16) {
e7f5b1cd 947 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
2262beef
AP
948 ++ctr;
949 if (is_endian.little)
950 PUTU32(ctx->Yi.c+12,ctr);
951 else
952 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
953 for (i=0; i<16; i+=sizeof(size_t))
954 *(size_t *)(ctx->Xi.c+i) ^=
955 *(size_t *)(out+i) =
956 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
957 GCM_MUL(ctx,Xi);
958 out += 16;
959 in += 16;
960 len -= 16;
961 }
2262beef 962#endif
e7f5b1cd 963 if (len) {
2262beef 964 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
965 ++ctr;
966 if (is_endian.little)
967 PUTU32(ctx->Yi.c+12,ctr);
968 else
969 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
970 while (len--) {
971 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
972 ++n;
973 }
974 }
975
b68c1315 976 ctx->mres = n;
1f2502eb 977 return 0;
e7f5b1cd
AP
978 } while(0);
979#endif
980 for (i=0;i<len;++i) {
981 if (n==0) {
2262beef 982 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
983 ++ctr;
984 if (is_endian.little)
985 PUTU32(ctx->Yi.c+12,ctr);
986 else
987 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
988 }
989 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
990 n = (n+1)%16;
991 if (n==0)
992 GCM_MUL(ctx,Xi);
993 }
994
b68c1315 995 ctx->mres = n;
1f2502eb 996 return 0;
e7f5b1cd
AP
997}
998
1f2502eb 999int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
e7f5b1cd
AP
1000 const unsigned char *in, unsigned char *out,
1001 size_t len)
1002{
1003 const union { long one; char little; } is_endian = {1};
1004 unsigned int n, ctr;
1005 size_t i;
1f2502eb 1006 u64 mlen = ctx->len.u[1];
d8d95832
AP
1007#ifdef GCM_FUNCREF_4BIT
1008 void (*gcm_gmult_4bit)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1009# ifdef GHASH
1010 void (*gcm_ghash_4bit)(u64 Xi[2],const u128 Htable[16],
1011 const u8 *inp,size_t len) = ctx->ghash;
1012# endif
1013#endif
1f2502eb
AP
1014
1015 mlen += len;
1016 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1017 return -1;
1018 ctx->len.u[1] = mlen;
e7f5b1cd 1019
b68c1315
AP
1020 if (ctx->ares) {
1021 /* First call to decrypt finalizes GHASH(AAD) */
1022 GCM_MUL(ctx,Xi);
1023 ctx->ares = 0;
1024 }
1025
f472ec8c
AP
1026 if (is_endian.little)
1027 ctr = GETU32(ctx->Yi.c+12);
1028 else
1029 ctr = ctx->Yi.d[3];
e7f5b1cd 1030
1f2502eb 1031 n = ctx->mres;
e7f5b1cd
AP
1032#if !defined(OPENSSL_SMALL_FOOTPRINT)
1033 if (16%sizeof(size_t) == 0) do { /* always true actually */
1034 if (n) {
1035 while (n && len) {
1036 u8 c = *(in++);
1037 *(out++) = c^ctx->EKi.c[n];
1038 ctx->Xi.c[n] ^= c;
1039 --len;
1040 n = (n+1)%16;
1041 }
1042 if (n==0) GCM_MUL (ctx,Xi);
1043 else {
b68c1315 1044 ctx->mres = n;
1f2502eb 1045 return 0;
e7f5b1cd
AP
1046 }
1047 }
e7f5b1cd
AP
1048#if defined(STRICT_ALIGNMENT)
1049 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1050 break;
1051#endif
a595baff 1052#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
1053 while (len>=GHASH_CHUNK) {
1054 size_t j=GHASH_CHUNK;
1055
c1f092d1 1056 GHASH(ctx,in,GHASH_CHUNK);
2262beef
AP
1057 while (j) {
1058 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
1059 ++ctr;
1060 if (is_endian.little)
1061 PUTU32(ctx->Yi.c+12,ctr);
1062 else
1063 ctx->Yi.d[3] = ctr;
2262beef
AP
1064 for (i=0; i<16; i+=sizeof(size_t))
1065 *(size_t *)(out+i) =
1066 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1067 out += 16;
1068 in += 16;
1069 j -= 16;
1070 }
1071 len -= GHASH_CHUNK;
1072 }
1073 if ((i = (len&(size_t)-16))) {
c1f092d1 1074 GHASH(ctx,in,i);
2262beef
AP
1075 while (len>=16) {
1076 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1077 ++ctr;
1078 if (is_endian.little)
1079 PUTU32(ctx->Yi.c+12,ctr);
1080 else
1081 ctx->Yi.d[3] = ctr;
1082 for (i=0; i<16; i+=sizeof(size_t))
1083 *(size_t *)(out+i) =
1084 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1085 out += 16;
1086 in += 16;
1087 len -= 16;
1088 }
1089 }
1090#else
1091 while (len>=16) {
e7f5b1cd 1092 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
2262beef
AP
1093 ++ctr;
1094 if (is_endian.little)
1095 PUTU32(ctx->Yi.c+12,ctr);
1096 else
1097 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1098 for (i=0; i<16; i+=sizeof(size_t)) {
1099 size_t c = *(size_t *)(in+i);
1100 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1101 *(size_t *)(ctx->Xi.c+i) ^= c;
1102 }
2262beef 1103 GCM_MUL(ctx,Xi);
e7f5b1cd
AP
1104 out += 16;
1105 in += 16;
1106 len -= 16;
1107 }
2262beef 1108#endif
e7f5b1cd 1109 if (len) {
2262beef 1110 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
1111 ++ctr;
1112 if (is_endian.little)
1113 PUTU32(ctx->Yi.c+12,ctr);
1114 else
1115 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1116 while (len--) {
1117 u8 c = in[n];
1118 ctx->Xi.c[n] ^= c;
1119 out[n] = c^ctx->EKi.c[n];
1120 ++n;
1121 }
1122 }
1123
b68c1315 1124 ctx->mres = n;
1f2502eb 1125 return 0;
e7f5b1cd
AP
1126 } while(0);
1127#endif
1128 for (i=0;i<len;++i) {
1129 u8 c;
1130 if (n==0) {
2262beef 1131 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
1132 ++ctr;
1133 if (is_endian.little)
1134 PUTU32(ctx->Yi.c+12,ctr);
1135 else
1136 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1137 }
1138 c = in[i];
68e2586b 1139 out[i] = c^ctx->EKi.c[n];
e7f5b1cd
AP
1140 ctx->Xi.c[n] ^= c;
1141 n = (n+1)%16;
1142 if (n==0)
1143 GCM_MUL(ctx,Xi);
1144 }
1145
b68c1315 1146 ctx->mres = n;
1f2502eb 1147 return 0;
e7f5b1cd
AP
1148}
1149
1f2502eb 1150int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
f71c6ace
AP
1151 const unsigned char *in, unsigned char *out,
1152 size_t len, ctr128_f stream)
1153{
1154 const union { long one; char little; } is_endian = {1};
1155 unsigned int n, ctr;
1156 size_t i;
1f2502eb 1157 u64 mlen = ctx->len.u[1];
d8d95832
AP
1158#ifdef GCM_FUNCREF_4BIT
1159 void (*gcm_gmult_4bit)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1160# ifdef GHASH
1161 void (*gcm_ghash_4bit)(u64 Xi[2],const u128 Htable[16],
1162 const u8 *inp,size_t len) = ctx->ghash;
1163# endif
1164#endif
1f2502eb
AP
1165
1166 mlen += len;
1167 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1168 return -1;
1169 ctx->len.u[1] = mlen;
f71c6ace 1170
b68c1315
AP
1171 if (ctx->ares) {
1172 /* First call to encrypt finalizes GHASH(AAD) */
1173 GCM_MUL(ctx,Xi);
1174 ctx->ares = 0;
1175 }
1176
f71c6ace
AP
1177 if (is_endian.little)
1178 ctr = GETU32(ctx->Yi.c+12);
1179 else
1180 ctr = ctx->Yi.d[3];
1181
1f2502eb 1182 n = ctx->mres;
f71c6ace
AP
1183 if (n) {
1184 while (n && len) {
1185 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1186 --len;
1187 n = (n+1)%16;
1188 }
1189 if (n==0) GCM_MUL(ctx,Xi);
1190 else {
b68c1315 1191 ctx->mres = n;
1f2502eb 1192 return 0;
f71c6ace
AP
1193 }
1194 }
1195#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1196 while (len>=GHASH_CHUNK) {
1197 (*stream)(in,out,GHASH_CHUNK/16,ctx->key,ctx->Yi.c);
1198 ctr += GHASH_CHUNK/16;
1199 if (is_endian.little)
1200 PUTU32(ctx->Yi.c+12,ctr);
1201 else
1202 ctx->Yi.d[3] = ctr;
1203 GHASH(ctx,out,GHASH_CHUNK);
1204 out += GHASH_CHUNK;
1205 in += GHASH_CHUNK;
1206 len -= GHASH_CHUNK;
1207 }
1208#endif
1209 if ((i = (len&(size_t)-16))) {
1210 size_t j=i/16;
1211
1212 (*stream)(in,out,j,ctx->key,ctx->Yi.c);
68e2586b 1213 ctr += (unsigned int)j;
f71c6ace
AP
1214 if (is_endian.little)
1215 PUTU32(ctx->Yi.c+12,ctr);
1216 else
1217 ctx->Yi.d[3] = ctr;
1218 in += i;
1219 len -= i;
1220#if defined(GHASH)
1221 GHASH(ctx,out,i);
1222 out += i;
1223#else
1224 while (j--) {
1225 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1226 GCM_MUL(ctx,Xi);
1227 out += 16;
1228 }
1229#endif
1230 }
1231 if (len) {
1232 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1233 ++ctr;
1234 if (is_endian.little)
1235 PUTU32(ctx->Yi.c+12,ctr);
1236 else
1237 ctx->Yi.d[3] = ctr;
1238 while (len--) {
1239 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1240 ++n;
1241 }
1242 }
1243
b68c1315 1244 ctx->mres = n;
1f2502eb 1245 return 0;
f71c6ace
AP
1246}
1247
1f2502eb 1248int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
f71c6ace
AP
1249 const unsigned char *in, unsigned char *out,
1250 size_t len,ctr128_f stream)
1251{
1252 const union { long one; char little; } is_endian = {1};
1253 unsigned int n, ctr;
1254 size_t i;
1f2502eb 1255 u64 mlen = ctx->len.u[1];
d8d95832
AP
1256#ifdef GCM_FUNCREF_4BIT
1257 void (*gcm_gmult_4bit)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1258# ifdef GHASH
1259 void (*gcm_ghash_4bit)(u64 Xi[2],const u128 Htable[16],
1260 const u8 *inp,size_t len) = ctx->ghash;
1261# endif
1262#endif
1f2502eb
AP
1263
1264 mlen += len;
1265 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1266 return -1;
1267 ctx->len.u[1] = mlen;
f71c6ace 1268
b68c1315
AP
1269 if (ctx->ares) {
1270 /* First call to decrypt finalizes GHASH(AAD) */
1271 GCM_MUL(ctx,Xi);
1272 ctx->ares = 0;
1273 }
1274
f71c6ace
AP
1275 if (is_endian.little)
1276 ctr = GETU32(ctx->Yi.c+12);
1277 else
1278 ctr = ctx->Yi.d[3];
1279
1f2502eb 1280 n = ctx->mres;
f71c6ace
AP
1281 if (n) {
1282 while (n && len) {
1283 u8 c = *(in++);
1284 *(out++) = c^ctx->EKi.c[n];
1285 ctx->Xi.c[n] ^= c;
1286 --len;
1287 n = (n+1)%16;
1288 }
1289 if (n==0) GCM_MUL (ctx,Xi);
1290 else {
b68c1315 1291 ctx->mres = n;
1f2502eb 1292 return 0;
f71c6ace
AP
1293 }
1294 }
1295#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1296 while (len>=GHASH_CHUNK) {
1297 GHASH(ctx,in,GHASH_CHUNK);
1298 (*stream)(in,out,GHASH_CHUNK/16,ctx->key,ctx->Yi.c);
1299 ctr += GHASH_CHUNK/16;
1300 if (is_endian.little)
1301 PUTU32(ctx->Yi.c+12,ctr);
1302 else
1303 ctx->Yi.d[3] = ctr;
1304 out += GHASH_CHUNK;
1305 in += GHASH_CHUNK;
1306 len -= GHASH_CHUNK;
1307 }
1308#endif
1309 if ((i = (len&(size_t)-16))) {
1310 size_t j=i/16;
1311
1312#if defined(GHASH)
1313 GHASH(ctx,in,i);
1314#else
1315 while (j--) {
1316 size_t k;
1317 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1318 GCM_MUL(ctx,Xi);
1319 in += 16;
1320 }
1321 j = i/16;
1322 in -= i;
1323#endif
1324 (*stream)(in,out,j,ctx->key,ctx->Yi.c);
68e2586b 1325 ctr += (unsigned int)j;
f71c6ace
AP
1326 if (is_endian.little)
1327 PUTU32(ctx->Yi.c+12,ctr);
1328 else
1329 ctx->Yi.d[3] = ctr;
1330 out += i;
1331 in += i;
1332 len -= i;
1333 }
1334 if (len) {
1335 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1336 ++ctr;
1337 if (is_endian.little)
1338 PUTU32(ctx->Yi.c+12,ctr);
1339 else
1340 ctx->Yi.d[3] = ctr;
1341 while (len--) {
1342 u8 c = in[n];
1343 ctx->Xi.c[n] ^= c;
1344 out[n] = c^ctx->EKi.c[n];
1345 ++n;
1346 }
1347 }
1348
b68c1315 1349 ctx->mres = n;
1f2502eb 1350 return 0;
f71c6ace
AP
1351}
1352
6acb4ff3
AP
1353int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1354 size_t len)
e7f5b1cd
AP
1355{
1356 const union { long one; char little; } is_endian = {1};
1357 u64 alen = ctx->len.u[0]<<3;
1358 u64 clen = ctx->len.u[1]<<3;
d8d95832
AP
1359#ifdef GCM_FUNCREF_4BIT
1360 void (*gcm_gmult_4bit)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
1361#endif
e7f5b1cd 1362
b68c1315 1363 if (ctx->mres)
e7f5b1cd
AP
1364 GCM_MUL(ctx,Xi);
1365
1366 if (is_endian.little) {
1367#ifdef BSWAP8
1368 alen = BSWAP8(alen);
1369 clen = BSWAP8(clen);
1370#else
1371 u8 *p = ctx->len.c;
1372
1373 ctx->len.u[0] = alen;
1374 ctx->len.u[1] = clen;
1375
1376 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1377 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1378#endif
1379 }
1380
1381 ctx->Xi.u[0] ^= alen;
1382 ctx->Xi.u[1] ^= clen;
1383 GCM_MUL(ctx,Xi);
1384
1385 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1386 ctx->Xi.u[1] ^= ctx->EK0.u[1];
6acb4ff3
AP
1387
1388 if (tag && len<=sizeof(ctx->Xi))
1389 return memcmp(ctx->Xi.c,tag,len);
1390 else
1391 return -1;
1392}
1393
fd3dbc1d
DSH
1394void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1395{
1396 CRYPTO_gcm128_finish(ctx, NULL, 0);
1f2502eb 1397 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
fd3dbc1d
DSH
1398}
1399
6acb4ff3
AP
1400GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1401{
1402 GCM128_CONTEXT *ret;
1403
1404 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1405 CRYPTO_gcm128_init(ret,key,block);
1406
1407 return ret;
1408}
1409
1410void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1411{
1412 if (ctx) {
1413 OPENSSL_cleanse(ctx,sizeof(*ctx));
1414 OPENSSL_free(ctx);
1415 }
e7f5b1cd
AP
1416}
1417
1418#if defined(SELFTEST)
1419#include <stdio.h>
1420#include <openssl/aes.h>
1421
1422/* Test Case 1 */
1423static const u8 K1[16],
1424 *P1=NULL,
1425 *A1=NULL,
1426 IV1[12],
1427 *C1=NULL,
1428 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
a595baff 1429
e7f5b1cd
AP
1430/* Test Case 2 */
1431#define K2 K1
1432#define A2 A1
1433#define IV2 IV1
1434static const u8 P2[16],
1435 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1436 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1437
1438/* Test Case 3 */
1439#define A3 A2
1440static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1441 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1442 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1443 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1444 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1445 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1446 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1447 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1448 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1449 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
fb2d5a91 1450 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
e7f5b1cd
AP
1451
1452/* Test Case 4 */
1453#define K4 K3
1454#define IV4 IV3
1455static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1456 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1457 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1458 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1459 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1460 0xab,0xad,0xda,0xd2},
1461 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1462 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1463 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1464 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1465 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1466
1467/* Test Case 5 */
1468#define K5 K4
1469#define P5 P4
d8d95832
AP
1470#define A5 A4
1471static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
e7f5b1cd
AP
1472 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1473 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1474 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1475 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1476 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
a595baff 1477
e7f5b1cd
AP
1478/* Test Case 6 */
1479#define K6 K5
1480#define P6 P5
1481#define A6 A5
1482static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1483 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1484 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1485 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1486 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1487 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1488 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1489 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1490 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1491
1492/* Test Case 7 */
1493static const u8 K7[24],
1494 *P7=NULL,
1495 *A7=NULL,
1496 IV7[12],
1497 *C7=NULL,
1498 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1499
1500/* Test Case 8 */
1501#define K8 K7
1502#define IV8 IV7
1503#define A8 A7
1504static const u8 P8[16],
1505 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1506 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1507
1508/* Test Case 9 */
1509#define A9 A8
1510static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1511 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1512 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1513 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1514 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1515 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1516 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1517 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1518 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1519 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1520 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1521 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1522
1523/* Test Case 10 */
1524#define K10 K9
1525#define IV10 IV9
1526static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1527 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1528 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1529 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1530 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1531 0xab,0xad,0xda,0xd2},
1532 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1533 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1534 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1535 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1536 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1537
1538/* Test Case 11 */
1539#define K11 K10
1540#define P11 P10
1541#define A11 A10
1542static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1543 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1544 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1545 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1546 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1547 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1548
1549/* Test Case 12 */
1550#define K12 K11
1551#define P12 P11
1552#define A12 A11
1553static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1554 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1555 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1556 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1557 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1558 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1559 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1560 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1561 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1562
1563/* Test Case 13 */
1564static const u8 K13[32],
1565 *P13=NULL,
1566 *A13=NULL,
1567 IV13[12],
1568 *C13=NULL,
1569 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1570
1571/* Test Case 14 */
1572#define K14 K13
1573#define A14 A13
1574static const u8 P14[16],
1575 IV14[12],
1576 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1577 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1578
1579/* Test Case 15 */
1580#define A15 A14
1581static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1582 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1583 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1584 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1585 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1586 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1587 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1588 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1589 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1590 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1591 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1592 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1593
1594/* Test Case 16 */
1595#define K16 K15
1596#define IV16 IV15
1597static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1598 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1599 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1600 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1601 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1602 0xab,0xad,0xda,0xd2},
1603 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1604 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1605 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1606 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1607 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1608
1609/* Test Case 17 */
1610#define K17 K16
1611#define P17 P16
1612#define A17 A16
1613static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1614 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1615 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1616 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1617 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1618 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1619
1620/* Test Case 18 */
1621#define K18 K17
1622#define P18 P17
1623#define A18 A17
1624static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1625 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1626 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1627 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1628 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1629 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1630 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1631 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1632 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1633
1634#define TEST_CASE(n) do { \
1635 u8 out[sizeof(P##n)]; \
1636 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1637 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1638 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
68e2586b 1639 memset(out,0,sizeof(out)); \
e7f5b1cd
AP
1640 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1641 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
6acb4ff3
AP
1642 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1643 (C##n && memcmp(out,C##n,sizeof(out)))) \
68e2586b 1644 ret++, printf ("encrypt test#%d failed.\n",n); \
e7f5b1cd 1645 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
68e2586b 1646 memset(out,0,sizeof(out)); \
e7f5b1cd
AP
1647 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1648 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
6acb4ff3
AP
1649 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1650 (P##n && memcmp(out,P##n,sizeof(out)))) \
1651 ret++, printf ("decrypt test#%d failed.\n",n); \
e7f5b1cd
AP
1652 } while(0)
1653
1654int main()
1655{
1656 GCM128_CONTEXT ctx;
1657 AES_KEY key;
1658 int ret=0;
1659
1660 TEST_CASE(1);
1661 TEST_CASE(2);
1662 TEST_CASE(3);
1663 TEST_CASE(4);
1664 TEST_CASE(5);
1665 TEST_CASE(6);
1666 TEST_CASE(7);
1667 TEST_CASE(8);
1668 TEST_CASE(9);
1669 TEST_CASE(10);
1670 TEST_CASE(11);
1671 TEST_CASE(12);
1672 TEST_CASE(13);
1673 TEST_CASE(14);
1674 TEST_CASE(15);
1675 TEST_CASE(16);
1676 TEST_CASE(17);
1677 TEST_CASE(18);
1678
a595baff 1679#ifdef OPENSSL_CPUID_OBJ
2262beef
AP
1680 {
1681 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1682 union { u64 u; u8 c[1024]; } buf;
c1f092d1 1683 int i;
2262beef
AP
1684
1685 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1686 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1687 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1688
1689 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1690 start = OPENSSL_rdtsc();
1691 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1692 gcm_t = OPENSSL_rdtsc() - start;
1693
1694 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
b68c1315 1695 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
2262beef
AP
1696 (block128_f)AES_encrypt);
1697 start = OPENSSL_rdtsc();
1698 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
b68c1315 1699 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
a595baff 1700 (block128_f)AES_encrypt);
2262beef
AP
1701 ctr_t = OPENSSL_rdtsc() - start;
1702
1703 printf("%.2f-%.2f=%.2f\n",
1704 gcm_t/(double)sizeof(buf),
1705 ctr_t/(double)sizeof(buf),
1706 (gcm_t-ctr_t)/(double)sizeof(buf));
a595baff 1707#ifdef GHASH
c1f092d1 1708 GHASH(&ctx,buf.c,sizeof(buf));
a595baff 1709 start = OPENSSL_rdtsc();
c1f092d1 1710 for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
a595baff 1711 gcm_t = OPENSSL_rdtsc() - start;
c1f092d1 1712 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
a595baff 1713#endif
2262beef 1714 }
a595baff 1715#endif
2262beef 1716
e7f5b1cd
AP
1717 return ret;
1718}
1719#endif