]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/gcm128.c
Make "run" volatile
[thirdparty/openssl.git] / crypto / modes / gcm128.c
CommitLineData
e7f5b1cd
AP
1/* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
73e45b2d 50
f4001a0d 51
aa763c0f 52#include <openssl/crypto.h>
f472ec8c 53#include "modes_lcl.h"
e7f5b1cd
AP
54#include <string.h>
55
56#ifndef MODES_DEBUG
57# ifndef NDEBUG
58# define NDEBUG
59# endif
60#endif
61#include <assert.h>
62
f472ec8c
AP
63#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64/* redefine, because alignment is ensured */
65#undef GETU32
66#define GETU32(p) BSWAP4(*(const u32 *)(p))
67#undef PUTU32
68#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
e7f5b1cd
AP
69#endif
70
c1f092d1
AP
71#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72#define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83} while(0)
84
d8d95832
AP
85/*
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
a595baff
AP
119#if TABLE_BITS==8
120
e7f5b1cd
AP
121static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122{
123 int i, j;
124 u128 V;
125
126 Htable[0].hi = 0;
127 Htable[0].lo = 0;
128 V.hi = H[0];
129 V.lo = H[1];
130
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
c1f092d1 132 REDUCE1BIT(V);
e7f5b1cd
AP
133 Htable[i] = V;
134 }
135
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
141 }
142 }
143}
144
d8d95832 145static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
e7f5b1cd
AP
146{
147 u128 Z = { 0, 0};
148 const u8 *xi = (const u8 *)Xi+15;
149 size_t rem, n = *xi;
150 const union { long one; char little; } is_endian = {1};
151 static const size_t rem_8bit[256] = {
152 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
153 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
154 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
155 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
156 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
157 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
158 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
159 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
160 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
161 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
162 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
163 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
164 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
165 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
166 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
167 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
168 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
169 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
170 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
171 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
172 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
173 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
174 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
175 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
176 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
177 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
178 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
179 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
180 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
181 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
182 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
183 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
184 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
185 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
186 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
187 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
188 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
189 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
190 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
191 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
192 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
193 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
194 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
195 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
196 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
197 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
198 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
199 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
200 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
201 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
202 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
203 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
204 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
205 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
206 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
207 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
208 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
209 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
210 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
211 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
212 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
213 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
214 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
215 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
216
217 while (1) {
218 Z.hi ^= Htable[n].hi;
219 Z.lo ^= Htable[n].lo;
220
221 if ((u8 *)Xi==xi) break;
222
223 n = *(--xi);
224
225 rem = (size_t)Z.lo&0xff;
226 Z.lo = (Z.hi<<56)|(Z.lo>>8);
227 Z.hi = (Z.hi>>8);
228 if (sizeof(size_t)==8)
229 Z.hi ^= rem_8bit[rem];
230 else
231 Z.hi ^= (u64)rem_8bit[rem]<<32;
232 }
233
234 if (is_endian.little) {
235#ifdef BSWAP8
236 Xi[0] = BSWAP8(Z.hi);
237 Xi[1] = BSWAP8(Z.lo);
238#else
239 u8 *p = (u8 *)Xi;
240 u32 v;
241 v = (u32)(Z.hi>>32); PUTU32(p,v);
242 v = (u32)(Z.hi); PUTU32(p+4,v);
243 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
244 v = (u32)(Z.lo); PUTU32(p+12,v);
245#endif
246 }
247 else {
248 Xi[0] = Z.hi;
249 Xi[1] = Z.lo;
250 }
251}
a595baff 252#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
e7f5b1cd 253
a595baff 254#elif TABLE_BITS==4
2262beef 255
e7f5b1cd
AP
256static void gcm_init_4bit(u128 Htable[16], u64 H[2])
257{
e7f5b1cd 258 u128 V;
f472ec8c
AP
259#if defined(OPENSSL_SMALL_FOOTPRINT)
260 int i;
261#endif
e7f5b1cd
AP
262
263 Htable[0].hi = 0;
264 Htable[0].lo = 0;
265 V.hi = H[0];
266 V.lo = H[1];
267
f472ec8c 268#if defined(OPENSSL_SMALL_FOOTPRINT)
e7f5b1cd 269 for (Htable[8]=V, i=4; i>0; i>>=1) {
c1f092d1 270 REDUCE1BIT(V);
e7f5b1cd
AP
271 Htable[i] = V;
272 }
273
274 for (i=2; i<16; i<<=1) {
2262beef
AP
275 u128 *Hi = Htable+i;
276 int j;
277 for (V=*Hi, j=1; j<i; ++j) {
278 Hi[j].hi = V.hi^Htable[j].hi;
279 Hi[j].lo = V.lo^Htable[j].lo;
e7f5b1cd
AP
280 }
281 }
2262beef 282#else
f472ec8c 283 Htable[8] = V;
c1f092d1 284 REDUCE1BIT(V);
f472ec8c 285 Htable[4] = V;
c1f092d1 286 REDUCE1BIT(V);
f472ec8c 287 Htable[2] = V;
c1f092d1 288 REDUCE1BIT(V);
f472ec8c 289 Htable[1] = V;
2262beef
AP
290 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
291 V=Htable[4];
292 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
293 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
294 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
295 V=Htable[8];
296 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
297 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
298 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
299 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
300 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
301 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
302 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
303#endif
f472ec8c
AP
304#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
305 /*
306 * ARM assembler expects specific dword order in Htable.
307 */
308 {
309 int j;
310 const union { long one; char little; } is_endian = {1};
311
312 if (is_endian.little)
313 for (j=0;j<16;++j) {
314 V = Htable[j];
315 Htable[j].hi = V.lo;
316 Htable[j].lo = V.hi;
317 }
318 else
319 for (j=0;j<16;++j) {
320 V = Htable[j];
321 Htable[j].hi = V.lo<<32|V.lo>>32;
322 Htable[j].lo = V.hi<<32|V.hi>>32;
323 }
324 }
325#endif
e7f5b1cd
AP
326}
327
a595baff 328#ifndef GHASH_ASM
2262beef
AP
329static const size_t rem_4bit[16] = {
330 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
331 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
332 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
333 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
334
4f39edbf 335static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
e7f5b1cd 336{
2262beef
AP
337 u128 Z;
338 int cnt = 15;
339 size_t rem, nlo, nhi;
e7f5b1cd 340 const union { long one; char little; } is_endian = {1};
2262beef
AP
341
342 nlo = ((const u8 *)Xi)[15];
343 nhi = nlo>>4;
344 nlo &= 0xf;
345
346 Z.hi = Htable[nlo].hi;
347 Z.lo = Htable[nlo].lo;
e7f5b1cd
AP
348
349 while (1) {
2262beef
AP
350 rem = (size_t)Z.lo&0xf;
351 Z.lo = (Z.hi<<60)|(Z.lo>>4);
352 Z.hi = (Z.hi>>4);
353 if (sizeof(size_t)==8)
354 Z.hi ^= rem_4bit[rem];
355 else
356 Z.hi ^= (u64)rem_4bit[rem]<<32;
357
358 Z.hi ^= Htable[nhi].hi;
359 Z.lo ^= Htable[nhi].lo;
360
361 if (--cnt<0) break;
362
363 nlo = ((const u8 *)Xi)[cnt];
e7f5b1cd
AP
364 nhi = nlo>>4;
365 nlo &= 0xf;
366
2262beef
AP
367 rem = (size_t)Z.lo&0xf;
368 Z.lo = (Z.hi<<60)|(Z.lo>>4);
369 Z.hi = (Z.hi>>4);
370 if (sizeof(size_t)==8)
371 Z.hi ^= rem_4bit[rem];
372 else
373 Z.hi ^= (u64)rem_4bit[rem]<<32;
374
e7f5b1cd
AP
375 Z.hi ^= Htable[nlo].hi;
376 Z.lo ^= Htable[nlo].lo;
2262beef 377 }
e7f5b1cd 378
2262beef
AP
379 if (is_endian.little) {
380#ifdef BSWAP8
381 Xi[0] = BSWAP8(Z.hi);
382 Xi[1] = BSWAP8(Z.lo);
383#else
384 u8 *p = (u8 *)Xi;
385 u32 v;
386 v = (u32)(Z.hi>>32); PUTU32(p,v);
387 v = (u32)(Z.hi); PUTU32(p+4,v);
388 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
389 v = (u32)(Z.lo); PUTU32(p+12,v);
390#endif
391 }
392 else {
393 Xi[0] = Z.hi;
394 Xi[1] = Z.lo;
395 }
396}
397
398#if !defined(OPENSSL_SMALL_FOOTPRINT)
399/*
400 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
a595baff
AP
401 * details... Compiler-generated code doesn't seem to give any
402 * performance improvement, at least not on x86[_64]. It's here
403 * mostly as reference and a placeholder for possible future
404 * non-trivial optimization[s]...
2262beef 405 */
4f39edbf
AP
406static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
407 const u8 *inp,size_t len)
2262beef
AP
408{
409 u128 Z;
410 int cnt;
411 size_t rem, nlo, nhi;
412 const union { long one; char little; } is_endian = {1};
413
e747f4d4 414#if 1
2262beef
AP
415 do {
416 cnt = 15;
417 nlo = ((const u8 *)Xi)[15];
418 nlo ^= inp[15];
419 nhi = nlo>>4;
420 nlo &= 0xf;
421
422 Z.hi = Htable[nlo].hi;
423 Z.lo = Htable[nlo].lo;
424
425 while (1) {
e7f5b1cd
AP
426 rem = (size_t)Z.lo&0xf;
427 Z.lo = (Z.hi<<60)|(Z.lo>>4);
428 Z.hi = (Z.hi>>4);
429 if (sizeof(size_t)==8)
430 Z.hi ^= rem_4bit[rem];
431 else
432 Z.hi ^= (u64)rem_4bit[rem]<<32;
433
434 Z.hi ^= Htable[nhi].hi;
435 Z.lo ^= Htable[nhi].lo;
436
2262beef 437 if (--cnt<0) break;
e7f5b1cd 438
2262beef
AP
439 nlo = ((const u8 *)Xi)[cnt];
440 nlo ^= inp[cnt];
441 nhi = nlo>>4;
442 nlo &= 0xf;
e7f5b1cd
AP
443
444 rem = (size_t)Z.lo&0xf;
445 Z.lo = (Z.hi<<60)|(Z.lo>>4);
446 Z.hi = (Z.hi>>4);
447 if (sizeof(size_t)==8)
448 Z.hi ^= rem_4bit[rem];
449 else
450 Z.hi ^= (u64)rem_4bit[rem]<<32;
2262beef
AP
451
452 Z.hi ^= Htable[nlo].hi;
453 Z.lo ^= Htable[nlo].lo;
e7f5b1cd 454 }
e747f4d4
AP
455#else
456 /*
457 * Extra 256+16 bytes per-key plus 512 bytes shared tables
458 * [should] give ~50% improvement... One could have PACK()-ed
6acb4ff3
AP
459 * the rem_8bit even here, but the priority is to minimize
460 * cache footprint...
e747f4d4
AP
461 */
462 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
463 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
464 static const unsigned short rem_8bit[256] = {
465 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
466 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
467 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
468 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
469 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
470 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
471 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
472 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
473 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
474 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
475 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
476 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
477 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
478 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
479 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
480 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
481 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
482 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
483 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
484 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
485 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
486 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
487 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
488 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
489 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
490 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
491 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
492 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
493 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
494 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
495 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
496 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
e747f4d4
AP
497 /*
498 * This pre-processing phase slows down procedure by approximately
499 * same time as it makes each loop spin faster. In other words
500 * single block performance is approximately same as straightforward
501 * "4-bit" implementation, and then it goes only faster...
502 */
503 for (cnt=0; cnt<16; ++cnt) {
504 Z.hi = Htable[cnt].hi;
505 Z.lo = Htable[cnt].lo;
506 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
507 Hshr4[cnt].hi = (Z.hi>>4);
508 Hshl4[cnt] = (u8)(Z.lo<<4);
509 }
510
511 do {
6acb4ff3 512 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
e747f4d4
AP
513 nlo = ((const u8 *)Xi)[cnt];
514 nlo ^= inp[cnt];
515 nhi = nlo>>4;
516 nlo &= 0xf;
517
518 Z.hi ^= Htable[nlo].hi;
519 Z.lo ^= Htable[nlo].lo;
520
521 rem = (size_t)Z.lo&0xff;
522
523 Z.lo = (Z.hi<<56)|(Z.lo>>8);
524 Z.hi = (Z.hi>>8);
525
526 Z.hi ^= Hshr4[nhi].hi;
527 Z.lo ^= Hshr4[nhi].lo;
528 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
529 }
530
531 nlo = ((const u8 *)Xi)[0];
532 nlo ^= inp[0];
533 nhi = nlo>>4;
534 nlo &= 0xf;
535
536 Z.hi ^= Htable[nlo].hi;
537 Z.lo ^= Htable[nlo].lo;
538
539 rem = (size_t)Z.lo&0xf;
540
541 Z.lo = (Z.hi<<60)|(Z.lo>>4);
542 Z.hi = (Z.hi>>4);
543
544 Z.hi ^= Htable[nhi].hi;
545 Z.lo ^= Htable[nhi].lo;
546 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
547#endif
e7f5b1cd
AP
548
549 if (is_endian.little) {
550#ifdef BSWAP8
551 Xi[0] = BSWAP8(Z.hi);
552 Xi[1] = BSWAP8(Z.lo);
553#else
554 u8 *p = (u8 *)Xi;
555 u32 v;
556 v = (u32)(Z.hi>>32); PUTU32(p,v);
557 v = (u32)(Z.hi); PUTU32(p+4,v);
558 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
559 v = (u32)(Z.lo); PUTU32(p+12,v);
560#endif
561 }
562 else {
563 Xi[0] = Z.hi;
564 Xi[1] = Z.lo;
565 }
2262beef 566 } while (inp+=16, len-=16);
e7f5b1cd 567}
2262beef
AP
568#endif
569#else
4f39edbf
AP
570void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
571void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
2262beef
AP
572#endif
573
574#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
a595baff 575#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
c1f092d1 576#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
a595baff
AP
577/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
578 * trashing effect. In other words idea is to hash data while it's
579 * still in L1 cache after encryption pass... */
68e2586b 580#define GHASH_CHUNK (3*1024)
a595baff 581#endif
2262beef 582
a595baff 583#else /* TABLE_BITS */
e7f5b1cd 584
2262beef 585static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
e7f5b1cd
AP
586{
587 u128 V,Z = { 0,0 };
588 long X;
589 int i,j;
590 const long *xi = (const long *)Xi;
591 const union { long one; char little; } is_endian = {1};
592
2262beef 593 V.hi = H[0]; /* H is in host byte order, no byte swapping */
e7f5b1cd
AP
594 V.lo = H[1];
595
596 for (j=0; j<16/sizeof(long); ++j) {
597 if (is_endian.little) {
598 if (sizeof(long)==8) {
599#ifdef BSWAP8
600 X = (long)(BSWAP8(xi[j]));
601#else
602 const u8 *p = (const u8 *)(xi+j);
603 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
604#endif
605 }
606 else {
607 const u8 *p = (const u8 *)(xi+j);
608 X = (long)GETU32(p);
609 }
610 }
611 else
612 X = xi[j];
613
614 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
615 u64 M = (u64)(X>>(8*sizeof(long)-1));
616 Z.hi ^= V.hi&M;
617 Z.lo ^= V.lo&M;
618
c1f092d1 619 REDUCE1BIT(V);
e7f5b1cd
AP
620 }
621 }
622
623 if (is_endian.little) {
624#ifdef BSWAP8
625 Xi[0] = BSWAP8(Z.hi);
626 Xi[1] = BSWAP8(Z.lo);
627#else
628 u8 *p = (u8 *)Xi;
629 u32 v;
630 v = (u32)(Z.hi>>32); PUTU32(p,v);
631 v = (u32)(Z.hi); PUTU32(p+4,v);
632 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
633 v = (u32)(Z.lo); PUTU32(p+12,v);
634#endif
635 }
636 else {
637 Xi[0] = Z.hi;
638 Xi[1] = Z.lo;
639 }
640}
2262beef 641#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
a595baff 642
e7f5b1cd
AP
643#endif
644
82741e9c 645#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
1e863180 646# if !defined(I386_ONLY) && \
c1f092d1
AP
647 (defined(__i386) || defined(__i386__) || \
648 defined(__x86_64) || defined(__x86_64__) || \
649 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
1e863180
AP
650# define GHASH_ASM_X86_OR_64
651# define GCM_FUNCREF_4BIT
c1f092d1
AP
652extern unsigned int OPENSSL_ia32cap_P[2];
653
654void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
655void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
656void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
657
64f7e2c4 658#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
1da5d302
AP
659# define gcm_init_avx gcm_init_clmul
660# define gcm_gmult_avx gcm_gmult_clmul
661# define gcm_ghash_avx gcm_ghash_clmul
662#else
663void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
664void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
665void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
666#endif
667
1e863180
AP
668# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
669# define GHASH_ASM_X86
c1f092d1
AP
670void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
671void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
672
673void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
674void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
1e863180 675# endif
82741e9c 676# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
1e863180
AP
677# include "arm_arch.h"
678# if __ARM_ARCH__>=7
679# define GHASH_ASM_ARM
680# define GCM_FUNCREF_4BIT
82741e9c
AP
681# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
682# if defined(__arm__) || defined(__arm)
683# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
684# endif
f8cee9d0 685void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
1e863180
AP
686void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
687void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
82741e9c
AP
688void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
689void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
690void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
1e863180 691# endif
23328d4b
AP
692# elif defined(__sparc__) || defined(__sparc)
693# include "sparc_arch.h"
694# define GHASH_ASM_SPARC
695# define GCM_FUNCREF_4BIT
696extern unsigned int OPENSSL_sparcv9cap_P[];
24798c5e 697void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
23328d4b
AP
698void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
699void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
0e716d92
AP
700#elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
701# include "ppc_arch.h"
702# define GHASH_ASM_PPC
703# define GCM_FUNCREF_4BIT
704void gcm_init_p8(u128 Htable[16],const u64 Xi[2]);
705void gcm_gmult_p8(u64 Xi[2],const u128 Htable[16]);
706void gcm_ghash_p8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
c1f092d1 707# endif
c1f092d1
AP
708#endif
709
7af04002
AP
710#ifdef GCM_FUNCREF_4BIT
711# undef GCM_MUL
712# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
713# ifdef GHASH
714# undef GHASH
715# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
716# endif
717#endif
718
e7f5b1cd
AP
719void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
720{
721 const union { long one; char little; } is_endian = {1};
722
723 memset(ctx,0,sizeof(*ctx));
724 ctx->block = block;
725 ctx->key = key;
726
727 (*block)(ctx->H.c,ctx->H.c,key);
728
729 if (is_endian.little) {
730 /* H is stored in host byte order */
731#ifdef BSWAP8
732 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
733 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
734#else
735 u8 *p = ctx->H.c;
736 u64 hi,lo;
737 hi = (u64)GETU32(p) <<32|GETU32(p+4);
738 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
739 ctx->H.u[0] = hi;
740 ctx->H.u[1] = lo;
741#endif
742 }
743
a595baff
AP
744#if TABLE_BITS==8
745 gcm_init_8bit(ctx->Htable,ctx->H.u);
746#elif TABLE_BITS==4
d8d95832 747# if defined(GHASH_ASM_X86_OR_64)
a6d915e0 748# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
87873f43
AP
749 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
750 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
1da5d302
AP
751 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) { /* AVX+MOVBE */
752 gcm_init_avx(ctx->Htable,ctx->H.u);
753 ctx->gmult = gcm_gmult_avx;
754 ctx->ghash = gcm_ghash_avx;
755 } else {
756 gcm_init_clmul(ctx->Htable,ctx->H.u);
757 ctx->gmult = gcm_gmult_clmul;
758 ctx->ghash = gcm_ghash_clmul;
759 }
c1f092d1
AP
760 return;
761 }
a6d915e0 762# endif
e7f5b1cd 763 gcm_init_4bit(ctx->Htable,ctx->H.u);
6acb4ff3 764# if defined(GHASH_ASM_X86) /* x86 only */
98909c1d
AP
765# if defined(OPENSSL_IA32_SSE2)
766 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
767# else
7af04002 768 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
98909c1d 769# endif
c1f092d1
AP
770 ctx->gmult = gcm_gmult_4bit_mmx;
771 ctx->ghash = gcm_ghash_4bit_mmx;
772 } else {
773 ctx->gmult = gcm_gmult_4bit_x86;
774 ctx->ghash = gcm_ghash_4bit_x86;
775 }
776# else
777 ctx->gmult = gcm_gmult_4bit;
778 ctx->ghash = gcm_ghash_4bit;
779# endif
1e863180 780# elif defined(GHASH_ASM_ARM)
82741e9c
AP
781# ifdef PMULL_CAPABLE
782 if (PMULL_CAPABLE) {
783 gcm_init_v8(ctx->Htable,ctx->H.u);
784 ctx->gmult = gcm_gmult_v8;
785 ctx->ghash = gcm_ghash_v8;
786 } else
787# endif
788# ifdef NEON_CAPABLE
789 if (NEON_CAPABLE) {
f8cee9d0 790 gcm_init_neon(ctx->Htable,ctx->H.u);
1e863180
AP
791 ctx->gmult = gcm_gmult_neon;
792 ctx->ghash = gcm_ghash_neon;
82741e9c
AP
793 } else
794# endif
795 {
1e863180
AP
796 gcm_init_4bit(ctx->Htable,ctx->H.u);
797 ctx->gmult = gcm_gmult_4bit;
798 ctx->ghash = gcm_ghash_4bit;
799 }
23328d4b
AP
800# elif defined(GHASH_ASM_SPARC)
801 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
24798c5e 802 gcm_init_vis3(ctx->Htable,ctx->H.u);
23328d4b
AP
803 ctx->gmult = gcm_gmult_vis3;
804 ctx->ghash = gcm_ghash_vis3;
805 } else {
806 gcm_init_4bit(ctx->Htable,ctx->H.u);
807 ctx->gmult = gcm_gmult_4bit;
808 ctx->ghash = gcm_ghash_4bit;
809 }
0e716d92
AP
810# elif defined(GHASH_ASM_PPC)
811 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
812 gcm_init_p8(ctx->Htable,ctx->H.u);
813 ctx->gmult = gcm_gmult_p8;
814 ctx->ghash = gcm_ghash_p8;
815 } else {
816 gcm_init_4bit(ctx->Htable,ctx->H.u);
817 ctx->gmult = gcm_gmult_4bit;
818 ctx->ghash = gcm_ghash_4bit;
819 }
c1f092d1
AP
820# else
821 gcm_init_4bit(ctx->Htable,ctx->H.u);
822# endif
a595baff 823#endif
e7f5b1cd
AP
824}
825
826void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
827{
828 const union { long one; char little; } is_endian = {1};
f472ec8c 829 unsigned int ctr;
d8d95832 830#ifdef GCM_FUNCREF_4BIT
7af04002 831 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 832#endif
e7f5b1cd
AP
833
834 ctx->Yi.u[0] = 0;
835 ctx->Yi.u[1] = 0;
836 ctx->Xi.u[0] = 0;
837 ctx->Xi.u[1] = 0;
b68c1315
AP
838 ctx->len.u[0] = 0; /* AAD length */
839 ctx->len.u[1] = 0; /* message length */
840 ctx->ares = 0;
841 ctx->mres = 0;
e7f5b1cd
AP
842
843 if (len==12) {
844 memcpy(ctx->Yi.c,iv,12);
845 ctx->Yi.c[15]=1;
f472ec8c 846 ctr=1;
e7f5b1cd
AP
847 }
848 else {
849 size_t i;
850 u64 len0 = len;
851
852 while (len>=16) {
853 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
854 GCM_MUL(ctx,Yi);
855 iv += 16;
856 len -= 16;
857 }
858 if (len) {
859 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
860 GCM_MUL(ctx,Yi);
861 }
862 len0 <<= 3;
863 if (is_endian.little) {
864#ifdef BSWAP8
865 ctx->Yi.u[1] ^= BSWAP8(len0);
866#else
867 ctx->Yi.c[8] ^= (u8)(len0>>56);
868 ctx->Yi.c[9] ^= (u8)(len0>>48);
869 ctx->Yi.c[10] ^= (u8)(len0>>40);
870 ctx->Yi.c[11] ^= (u8)(len0>>32);
871 ctx->Yi.c[12] ^= (u8)(len0>>24);
872 ctx->Yi.c[13] ^= (u8)(len0>>16);
873 ctx->Yi.c[14] ^= (u8)(len0>>8);
874 ctx->Yi.c[15] ^= (u8)(len0);
875#endif
876 }
877 else
878 ctx->Yi.u[1] ^= len0;
879
880 GCM_MUL(ctx,Yi);
881
882 if (is_endian.little)
997d1aac
AP
883#ifdef BSWAP4
884 ctr = BSWAP4(ctx->Yi.d[3]);
885#else
f472ec8c 886 ctr = GETU32(ctx->Yi.c+12);
997d1aac 887#endif
e7f5b1cd 888 else
f472ec8c 889 ctr = ctx->Yi.d[3];
e7f5b1cd
AP
890 }
891
892 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
f472ec8c 893 ++ctr;
2262beef 894 if (is_endian.little)
997d1aac
AP
895#ifdef BSWAP4
896 ctx->Yi.d[3] = BSWAP4(ctr);
897#else
f472ec8c 898 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 899#endif
2262beef 900 else
f472ec8c 901 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
902}
903
1f2502eb 904int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
e7f5b1cd
AP
905{
906 size_t i;
1f2502eb
AP
907 unsigned int n;
908 u64 alen = ctx->len.u[0];
d8d95832 909#ifdef GCM_FUNCREF_4BIT
7af04002 910 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 911# ifdef GHASH
7af04002
AP
912 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
913 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
914# endif
915#endif
e7f5b1cd 916
1f2502eb
AP
917 if (ctx->len.u[1]) return -2;
918
919 alen += len;
920 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
921 return -1;
922 ctx->len.u[0] = alen;
b68c1315 923
1f2502eb 924 n = ctx->ares;
b68c1315
AP
925 if (n) {
926 while (n && len) {
927 ctx->Xi.c[n] ^= *(aad++);
928 --len;
929 n = (n+1)%16;
930 }
931 if (n==0) GCM_MUL(ctx,Xi);
932 else {
933 ctx->ares = n;
1f2502eb 934 return 0;
b68c1315
AP
935 }
936 }
e7f5b1cd 937
2262beef
AP
938#ifdef GHASH
939 if ((i = (len&(size_t)-16))) {
c1f092d1 940 GHASH(ctx,aad,i);
2262beef
AP
941 aad += i;
942 len -= i;
943 }
944#else
e7f5b1cd
AP
945 while (len>=16) {
946 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
947 GCM_MUL(ctx,Xi);
948 aad += 16;
949 len -= 16;
950 }
2262beef 951#endif
e7f5b1cd 952 if (len) {
1f2502eb 953 n = (unsigned int)len;
e7f5b1cd 954 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
e7f5b1cd 955 }
b68c1315
AP
956
957 ctx->ares = n;
1f2502eb 958 return 0;
e7f5b1cd
AP
959}
960
1f2502eb 961int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
e7f5b1cd
AP
962 const unsigned char *in, unsigned char *out,
963 size_t len)
964{
965 const union { long one; char little; } is_endian = {1};
966 unsigned int n, ctr;
967 size_t i;
3f0d1405
AP
968 u64 mlen = ctx->len.u[1];
969 block128_f block = ctx->block;
970 void *key = ctx->key;
d8d95832 971#ifdef GCM_FUNCREF_4BIT
7af04002 972 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 973# ifdef GHASH
7af04002
AP
974 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
975 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
976# endif
977#endif
1f2502eb
AP
978
979#if 0
980 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
981#endif
982 mlen += len;
983 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
984 return -1;
985 ctx->len.u[1] = mlen;
e7f5b1cd 986
b68c1315
AP
987 if (ctx->ares) {
988 /* First call to encrypt finalizes GHASH(AAD) */
989 GCM_MUL(ctx,Xi);
990 ctx->ares = 0;
991 }
992
f472ec8c 993 if (is_endian.little)
997d1aac
AP
994#ifdef BSWAP4
995 ctr = BSWAP4(ctx->Yi.d[3]);
996#else
f472ec8c 997 ctr = GETU32(ctx->Yi.c+12);
997d1aac 998#endif
f472ec8c
AP
999 else
1000 ctr = ctx->Yi.d[3];
e7f5b1cd 1001
1f2502eb 1002 n = ctx->mres;
e7f5b1cd
AP
1003#if !defined(OPENSSL_SMALL_FOOTPRINT)
1004 if (16%sizeof(size_t) == 0) do { /* always true actually */
1005 if (n) {
1006 while (n && len) {
1007 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1008 --len;
1009 n = (n+1)%16;
1010 }
1011 if (n==0) GCM_MUL(ctx,Xi);
1012 else {
b68c1315 1013 ctx->mres = n;
1f2502eb 1014 return 0;
e7f5b1cd
AP
1015 }
1016 }
e7f5b1cd
AP
1017#if defined(STRICT_ALIGNMENT)
1018 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1019 break;
1020#endif
a595baff 1021#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
1022 while (len>=GHASH_CHUNK) {
1023 size_t j=GHASH_CHUNK;
1024
1025 while (j) {
96a4cf8c
AP
1026 size_t *out_t=(size_t *)out;
1027 const size_t *in_t=(const size_t *)in;
1028
3f0d1405 1029 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1030 ++ctr;
1031 if (is_endian.little)
997d1aac
AP
1032#ifdef BSWAP4
1033 ctx->Yi.d[3] = BSWAP4(ctr);
1034#else
e7f5b1cd 1035 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1036#endif
e7f5b1cd
AP
1037 else
1038 ctx->Yi.d[3] = ctr;
da01515c
BL
1039 for (i=0; i<16/sizeof(size_t); ++i)
1040 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
2262beef
AP
1041 out += 16;
1042 in += 16;
1043 j -= 16;
1044 }
c1f092d1 1045 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
2262beef
AP
1046 len -= GHASH_CHUNK;
1047 }
1048 if ((i = (len&(size_t)-16))) {
1049 size_t j=i;
1050
1051 while (len>=16) {
96a4cf8c
AP
1052 size_t *out_t=(size_t *)out;
1053 const size_t *in_t=(const size_t *)in;
1054
3f0d1405 1055 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1056 ++ctr;
1057 if (is_endian.little)
997d1aac
AP
1058#ifdef BSWAP4
1059 ctx->Yi.d[3] = BSWAP4(ctr);
1060#else
2262beef 1061 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1062#endif
2262beef
AP
1063 else
1064 ctx->Yi.d[3] = ctr;
96a4cf8c
AP
1065 for (i=0; i<16/sizeof(size_t); ++i)
1066 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
2262beef
AP
1067 out += 16;
1068 in += 16;
1069 len -= 16;
1070 }
c1f092d1 1071 GHASH(ctx,out-j,j);
2262beef
AP
1072 }
1073#else
1074 while (len>=16) {
96a4cf8c
AP
1075 size_t *out_t=(size_t *)out;
1076 const size_t *in_t=(const size_t *)in;
1077
3f0d1405 1078 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1079 ++ctr;
1080 if (is_endian.little)
997d1aac
AP
1081#ifdef BSWAP4
1082 ctx->Yi.d[3] = BSWAP4(ctr);
1083#else
2262beef 1084 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1085#endif
2262beef
AP
1086 else
1087 ctx->Yi.d[3] = ctr;
96a4cf8c
AP
1088 for (i=0; i<16/sizeof(size_t); ++i)
1089 ctx->Xi.t[i] ^=
1090 out_t[i] = in_t[i]^ctx->EKi.t[i];
e7f5b1cd
AP
1091 GCM_MUL(ctx,Xi);
1092 out += 16;
1093 in += 16;
1094 len -= 16;
1095 }
2262beef 1096#endif
e7f5b1cd 1097 if (len) {
3f0d1405 1098 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1099 ++ctr;
1100 if (is_endian.little)
997d1aac
AP
1101#ifdef BSWAP4
1102 ctx->Yi.d[3] = BSWAP4(ctr);
1103#else
e7f5b1cd 1104 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1105#endif
e7f5b1cd
AP
1106 else
1107 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1108 while (len--) {
1109 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1110 ++n;
1111 }
1112 }
1113
b68c1315 1114 ctx->mres = n;
1f2502eb 1115 return 0;
e7f5b1cd
AP
1116 } while(0);
1117#endif
1118 for (i=0;i<len;++i) {
1119 if (n==0) {
3f0d1405 1120 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1121 ++ctr;
1122 if (is_endian.little)
997d1aac
AP
1123#ifdef BSWAP4
1124 ctx->Yi.d[3] = BSWAP4(ctr);
1125#else
e7f5b1cd 1126 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1127#endif
e7f5b1cd
AP
1128 else
1129 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1130 }
1131 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1132 n = (n+1)%16;
1133 if (n==0)
1134 GCM_MUL(ctx,Xi);
1135 }
1136
b68c1315 1137 ctx->mres = n;
1f2502eb 1138 return 0;
e7f5b1cd
AP
1139}
1140
1f2502eb 1141int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
e7f5b1cd
AP
1142 const unsigned char *in, unsigned char *out,
1143 size_t len)
1144{
1145 const union { long one; char little; } is_endian = {1};
1146 unsigned int n, ctr;
1147 size_t i;
3f0d1405
AP
1148 u64 mlen = ctx->len.u[1];
1149 block128_f block = ctx->block;
1150 void *key = ctx->key;
d8d95832 1151#ifdef GCM_FUNCREF_4BIT
7af04002 1152 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1153# ifdef GHASH
7af04002
AP
1154 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1155 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1156# endif
1157#endif
1f2502eb
AP
1158
1159 mlen += len;
1160 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1161 return -1;
1162 ctx->len.u[1] = mlen;
e7f5b1cd 1163
b68c1315
AP
1164 if (ctx->ares) {
1165 /* First call to decrypt finalizes GHASH(AAD) */
1166 GCM_MUL(ctx,Xi);
1167 ctx->ares = 0;
1168 }
1169
f472ec8c 1170 if (is_endian.little)
997d1aac
AP
1171#ifdef BSWAP4
1172 ctr = BSWAP4(ctx->Yi.d[3]);
1173#else
f472ec8c 1174 ctr = GETU32(ctx->Yi.c+12);
997d1aac 1175#endif
f472ec8c
AP
1176 else
1177 ctr = ctx->Yi.d[3];
e7f5b1cd 1178
1f2502eb 1179 n = ctx->mres;
e7f5b1cd
AP
1180#if !defined(OPENSSL_SMALL_FOOTPRINT)
1181 if (16%sizeof(size_t) == 0) do { /* always true actually */
1182 if (n) {
1183 while (n && len) {
1184 u8 c = *(in++);
1185 *(out++) = c^ctx->EKi.c[n];
1186 ctx->Xi.c[n] ^= c;
1187 --len;
1188 n = (n+1)%16;
1189 }
1190 if (n==0) GCM_MUL (ctx,Xi);
1191 else {
b68c1315 1192 ctx->mres = n;
1f2502eb 1193 return 0;
e7f5b1cd
AP
1194 }
1195 }
e7f5b1cd
AP
1196#if defined(STRICT_ALIGNMENT)
1197 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1198 break;
1199#endif
a595baff 1200#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
1201 while (len>=GHASH_CHUNK) {
1202 size_t j=GHASH_CHUNK;
1203
c1f092d1 1204 GHASH(ctx,in,GHASH_CHUNK);
2262beef 1205 while (j) {
96a4cf8c
AP
1206 size_t *out_t=(size_t *)out;
1207 const size_t *in_t=(const size_t *)in;
1208
3f0d1405 1209 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1210 ++ctr;
1211 if (is_endian.little)
997d1aac
AP
1212#ifdef BSWAP4
1213 ctx->Yi.d[3] = BSWAP4(ctr);
1214#else
e7f5b1cd 1215 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1216#endif
e7f5b1cd
AP
1217 else
1218 ctx->Yi.d[3] = ctr;
96a4cf8c
AP
1219 for (i=0; i<16/sizeof(size_t); ++i)
1220 out_t[i] = in_t[i]^ctx->EKi.t[i];
2262beef
AP
1221 out += 16;
1222 in += 16;
1223 j -= 16;
1224 }
1225 len -= GHASH_CHUNK;
1226 }
1227 if ((i = (len&(size_t)-16))) {
c1f092d1 1228 GHASH(ctx,in,i);
2262beef 1229 while (len>=16) {
96a4cf8c
AP
1230 size_t *out_t=(size_t *)out;
1231 const size_t *in_t=(const size_t *)in;
1232
3f0d1405 1233 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1234 ++ctr;
1235 if (is_endian.little)
997d1aac
AP
1236#ifdef BSWAP4
1237 ctx->Yi.d[3] = BSWAP4(ctr);
1238#else
2262beef 1239 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1240#endif
2262beef
AP
1241 else
1242 ctx->Yi.d[3] = ctr;
96a4cf8c
AP
1243 for (i=0; i<16/sizeof(size_t); ++i)
1244 out_t[i] = in_t[i]^ctx->EKi.t[i];
2262beef
AP
1245 out += 16;
1246 in += 16;
1247 len -= 16;
1248 }
1249 }
1250#else
1251 while (len>=16) {
96a4cf8c
AP
1252 size_t *out_t=(size_t *)out;
1253 const size_t *in_t=(const size_t *)in;
1254
3f0d1405 1255 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1256 ++ctr;
1257 if (is_endian.little)
997d1aac
AP
1258#ifdef BSWAP4
1259 ctx->Yi.d[3] = BSWAP4(ctr);
1260#else
2262beef 1261 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1262#endif
2262beef
AP
1263 else
1264 ctx->Yi.d[3] = ctr;
96a4cf8c
AP
1265 for (i=0; i<16/sizeof(size_t); ++i) {
1266 size_t c = in[i];
1267 out[i] = c^ctx->EKi.t[i];
1268 ctx->Xi.t[i] ^= c;
e7f5b1cd 1269 }
2262beef 1270 GCM_MUL(ctx,Xi);
e7f5b1cd
AP
1271 out += 16;
1272 in += 16;
1273 len -= 16;
1274 }
2262beef 1275#endif
e7f5b1cd 1276 if (len) {
3f0d1405 1277 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1278 ++ctr;
1279 if (is_endian.little)
997d1aac
AP
1280#ifdef BSWAP4
1281 ctx->Yi.d[3] = BSWAP4(ctr);
1282#else
e7f5b1cd 1283 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1284#endif
e7f5b1cd
AP
1285 else
1286 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1287 while (len--) {
1288 u8 c = in[n];
1289 ctx->Xi.c[n] ^= c;
1290 out[n] = c^ctx->EKi.c[n];
1291 ++n;
1292 }
1293 }
1294
b68c1315 1295 ctx->mres = n;
1f2502eb 1296 return 0;
e7f5b1cd
AP
1297 } while(0);
1298#endif
1299 for (i=0;i<len;++i) {
1300 u8 c;
1301 if (n==0) {
3f0d1405 1302 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1303 ++ctr;
1304 if (is_endian.little)
997d1aac
AP
1305#ifdef BSWAP4
1306 ctx->Yi.d[3] = BSWAP4(ctr);
1307#else
e7f5b1cd 1308 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1309#endif
e7f5b1cd
AP
1310 else
1311 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1312 }
1313 c = in[i];
68e2586b 1314 out[i] = c^ctx->EKi.c[n];
e7f5b1cd
AP
1315 ctx->Xi.c[n] ^= c;
1316 n = (n+1)%16;
1317 if (n==0)
1318 GCM_MUL(ctx,Xi);
1319 }
1320
b68c1315 1321 ctx->mres = n;
1f2502eb 1322 return 0;
e7f5b1cd
AP
1323}
1324
1f2502eb 1325int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
f71c6ace
AP
1326 const unsigned char *in, unsigned char *out,
1327 size_t len, ctr128_f stream)
1328{
1329 const union { long one; char little; } is_endian = {1};
1330 unsigned int n, ctr;
1331 size_t i;
3f0d1405
AP
1332 u64 mlen = ctx->len.u[1];
1333 void *key = ctx->key;
d8d95832 1334#ifdef GCM_FUNCREF_4BIT
7af04002 1335 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1336# ifdef GHASH
7af04002
AP
1337 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1338 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1339# endif
1340#endif
1f2502eb
AP
1341
1342 mlen += len;
1343 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1344 return -1;
1345 ctx->len.u[1] = mlen;
f71c6ace 1346
b68c1315
AP
1347 if (ctx->ares) {
1348 /* First call to encrypt finalizes GHASH(AAD) */
1349 GCM_MUL(ctx,Xi);
1350 ctx->ares = 0;
1351 }
1352
f71c6ace 1353 if (is_endian.little)
997d1aac
AP
1354#ifdef BSWAP4
1355 ctr = BSWAP4(ctx->Yi.d[3]);
1356#else
f71c6ace 1357 ctr = GETU32(ctx->Yi.c+12);
997d1aac 1358#endif
f71c6ace
AP
1359 else
1360 ctr = ctx->Yi.d[3];
1361
1f2502eb 1362 n = ctx->mres;
f71c6ace
AP
1363 if (n) {
1364 while (n && len) {
1365 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1366 --len;
1367 n = (n+1)%16;
1368 }
1369 if (n==0) GCM_MUL(ctx,Xi);
1370 else {
b68c1315 1371 ctx->mres = n;
1f2502eb 1372 return 0;
f71c6ace
AP
1373 }
1374 }
1375#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1376 while (len>=GHASH_CHUNK) {
3f0d1405 1377 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
f71c6ace
AP
1378 ctr += GHASH_CHUNK/16;
1379 if (is_endian.little)
997d1aac
AP
1380#ifdef BSWAP4
1381 ctx->Yi.d[3] = BSWAP4(ctr);
1382#else
f71c6ace 1383 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1384#endif
f71c6ace
AP
1385 else
1386 ctx->Yi.d[3] = ctr;
1387 GHASH(ctx,out,GHASH_CHUNK);
1388 out += GHASH_CHUNK;
1389 in += GHASH_CHUNK;
1390 len -= GHASH_CHUNK;
1391 }
1392#endif
1393 if ((i = (len&(size_t)-16))) {
1394 size_t j=i/16;
1395
3f0d1405 1396 (*stream)(in,out,j,key,ctx->Yi.c);
68e2586b 1397 ctr += (unsigned int)j;
f71c6ace 1398 if (is_endian.little)
997d1aac
AP
1399#ifdef BSWAP4
1400 ctx->Yi.d[3] = BSWAP4(ctr);
1401#else
f71c6ace 1402 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1403#endif
f71c6ace
AP
1404 else
1405 ctx->Yi.d[3] = ctr;
1406 in += i;
1407 len -= i;
1408#if defined(GHASH)
1409 GHASH(ctx,out,i);
1410 out += i;
1411#else
1412 while (j--) {
1413 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1414 GCM_MUL(ctx,Xi);
1415 out += 16;
1416 }
1417#endif
1418 }
1419 if (len) {
3f0d1405 1420 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
f71c6ace
AP
1421 ++ctr;
1422 if (is_endian.little)
997d1aac
AP
1423#ifdef BSWAP4
1424 ctx->Yi.d[3] = BSWAP4(ctr);
1425#else
f71c6ace 1426 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1427#endif
f71c6ace
AP
1428 else
1429 ctx->Yi.d[3] = ctr;
1430 while (len--) {
1431 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1432 ++n;
1433 }
1434 }
1435
b68c1315 1436 ctx->mres = n;
1f2502eb 1437 return 0;
f71c6ace
AP
1438}
1439
1f2502eb 1440int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
f71c6ace
AP
1441 const unsigned char *in, unsigned char *out,
1442 size_t len,ctr128_f stream)
1443{
1444 const union { long one; char little; } is_endian = {1};
1445 unsigned int n, ctr;
1446 size_t i;
3f0d1405
AP
1447 u64 mlen = ctx->len.u[1];
1448 void *key = ctx->key;
d8d95832 1449#ifdef GCM_FUNCREF_4BIT
7af04002 1450 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1451# ifdef GHASH
7af04002
AP
1452 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1453 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1454# endif
1455#endif
1f2502eb
AP
1456
1457 mlen += len;
1458 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1459 return -1;
1460 ctx->len.u[1] = mlen;
f71c6ace 1461
b68c1315
AP
1462 if (ctx->ares) {
1463 /* First call to decrypt finalizes GHASH(AAD) */
1464 GCM_MUL(ctx,Xi);
1465 ctx->ares = 0;
1466 }
1467
f71c6ace 1468 if (is_endian.little)
997d1aac
AP
1469#ifdef BSWAP4
1470 ctr = BSWAP4(ctx->Yi.d[3]);
1471#else
f71c6ace 1472 ctr = GETU32(ctx->Yi.c+12);
997d1aac 1473#endif
f71c6ace
AP
1474 else
1475 ctr = ctx->Yi.d[3];
1476
1f2502eb 1477 n = ctx->mres;
f71c6ace
AP
1478 if (n) {
1479 while (n && len) {
1480 u8 c = *(in++);
1481 *(out++) = c^ctx->EKi.c[n];
1482 ctx->Xi.c[n] ^= c;
1483 --len;
1484 n = (n+1)%16;
1485 }
1486 if (n==0) GCM_MUL (ctx,Xi);
1487 else {
b68c1315 1488 ctx->mres = n;
1f2502eb 1489 return 0;
f71c6ace
AP
1490 }
1491 }
1492#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1493 while (len>=GHASH_CHUNK) {
1494 GHASH(ctx,in,GHASH_CHUNK);
3f0d1405 1495 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
f71c6ace
AP
1496 ctr += GHASH_CHUNK/16;
1497 if (is_endian.little)
997d1aac
AP
1498#ifdef BSWAP4
1499 ctx->Yi.d[3] = BSWAP4(ctr);
1500#else
f71c6ace 1501 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1502#endif
f71c6ace
AP
1503 else
1504 ctx->Yi.d[3] = ctr;
1505 out += GHASH_CHUNK;
1506 in += GHASH_CHUNK;
1507 len -= GHASH_CHUNK;
1508 }
1509#endif
1510 if ((i = (len&(size_t)-16))) {
1511 size_t j=i/16;
1512
1513#if defined(GHASH)
1514 GHASH(ctx,in,i);
1515#else
1516 while (j--) {
1517 size_t k;
1518 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1519 GCM_MUL(ctx,Xi);
1520 in += 16;
1521 }
1522 j = i/16;
1523 in -= i;
1524#endif
3f0d1405 1525 (*stream)(in,out,j,key,ctx->Yi.c);
68e2586b 1526 ctr += (unsigned int)j;
f71c6ace 1527 if (is_endian.little)
997d1aac
AP
1528#ifdef BSWAP4
1529 ctx->Yi.d[3] = BSWAP4(ctr);
1530#else
f71c6ace 1531 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1532#endif
f71c6ace
AP
1533 else
1534 ctx->Yi.d[3] = ctr;
1535 out += i;
1536 in += i;
1537 len -= i;
1538 }
1539 if (len) {
3f0d1405 1540 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
f71c6ace
AP
1541 ++ctr;
1542 if (is_endian.little)
997d1aac
AP
1543#ifdef BSWAP4
1544 ctx->Yi.d[3] = BSWAP4(ctr);
1545#else
f71c6ace 1546 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1547#endif
f71c6ace
AP
1548 else
1549 ctx->Yi.d[3] = ctr;
1550 while (len--) {
1551 u8 c = in[n];
1552 ctx->Xi.c[n] ^= c;
1553 out[n] = c^ctx->EKi.c[n];
1554 ++n;
1555 }
1556 }
1557
b68c1315 1558 ctx->mres = n;
1f2502eb 1559 return 0;
f71c6ace
AP
1560}
1561
6acb4ff3
AP
1562int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1563 size_t len)
e7f5b1cd
AP
1564{
1565 const union { long one; char little; } is_endian = {1};
1566 u64 alen = ctx->len.u[0]<<3;
1567 u64 clen = ctx->len.u[1]<<3;
d8d95832 1568#ifdef GCM_FUNCREF_4BIT
7af04002 1569 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1570#endif
e7f5b1cd 1571
9ddd859d 1572 if (ctx->mres || ctx->ares)
e7f5b1cd
AP
1573 GCM_MUL(ctx,Xi);
1574
1575 if (is_endian.little) {
1576#ifdef BSWAP8
1577 alen = BSWAP8(alen);
1578 clen = BSWAP8(clen);
1579#else
1580 u8 *p = ctx->len.c;
1581
1582 ctx->len.u[0] = alen;
1583 ctx->len.u[1] = clen;
1584
1585 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1586 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1587#endif
1588 }
1589
1590 ctx->Xi.u[0] ^= alen;
1591 ctx->Xi.u[1] ^= clen;
1592 GCM_MUL(ctx,Xi);
1593
1594 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1595 ctx->Xi.u[1] ^= ctx->EK0.u[1];
6acb4ff3
AP
1596
1597 if (tag && len<=sizeof(ctx->Xi))
1598 return memcmp(ctx->Xi.c,tag,len);
1599 else
1600 return -1;
1601}
1602
fd3dbc1d
DSH
1603void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1604{
1605 CRYPTO_gcm128_finish(ctx, NULL, 0);
1f2502eb 1606 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
fd3dbc1d
DSH
1607}
1608
6acb4ff3
AP
1609GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1610{
1611 GCM128_CONTEXT *ret;
1612
1613 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1614 CRYPTO_gcm128_init(ret,key,block);
1615
1616 return ret;
1617}
1618
1619void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1620{
1621 if (ctx) {
1622 OPENSSL_cleanse(ctx,sizeof(*ctx));
1623 OPENSSL_free(ctx);
1624 }
e7f5b1cd
AP
1625}
1626
1627#if defined(SELFTEST)
1628#include <stdio.h>
1629#include <openssl/aes.h>
1630
1631/* Test Case 1 */
1632static const u8 K1[16],
1633 *P1=NULL,
1634 *A1=NULL,
1635 IV1[12],
1636 *C1=NULL,
1637 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
a595baff 1638
e7f5b1cd
AP
1639/* Test Case 2 */
1640#define K2 K1
1641#define A2 A1
1642#define IV2 IV1
1643static const u8 P2[16],
1644 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1645 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1646
1647/* Test Case 3 */
1648#define A3 A2
1649static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1650 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1651 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1652 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1653 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1654 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1655 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1656 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1657 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1658 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
fb2d5a91 1659 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
e7f5b1cd
AP
1660
1661/* Test Case 4 */
1662#define K4 K3
1663#define IV4 IV3
1664static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1665 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1666 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1667 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1668 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1669 0xab,0xad,0xda,0xd2},
1670 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1671 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1672 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1673 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1674 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1675
1676/* Test Case 5 */
1677#define K5 K4
1678#define P5 P4
d8d95832
AP
1679#define A5 A4
1680static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
e7f5b1cd
AP
1681 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1682 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1683 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1684 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1685 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
a595baff 1686
e7f5b1cd
AP
1687/* Test Case 6 */
1688#define K6 K5
1689#define P6 P5
1690#define A6 A5
1691static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1692 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1693 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1694 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1695 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1696 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1697 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1698 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1699 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1700
1701/* Test Case 7 */
1702static const u8 K7[24],
1703 *P7=NULL,
1704 *A7=NULL,
1705 IV7[12],
1706 *C7=NULL,
1707 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1708
1709/* Test Case 8 */
1710#define K8 K7
1711#define IV8 IV7
1712#define A8 A7
1713static const u8 P8[16],
1714 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1715 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1716
1717/* Test Case 9 */
1718#define A9 A8
1719static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1720 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1721 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1722 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1723 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1724 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1725 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1726 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1727 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1728 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1729 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1730 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1731
1732/* Test Case 10 */
1733#define K10 K9
1734#define IV10 IV9
1735static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1736 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1737 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1738 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1739 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1740 0xab,0xad,0xda,0xd2},
1741 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1742 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1743 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1744 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1745 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1746
1747/* Test Case 11 */
1748#define K11 K10
1749#define P11 P10
1750#define A11 A10
1751static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1752 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1753 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1754 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1755 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1756 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1757
1758/* Test Case 12 */
1759#define K12 K11
1760#define P12 P11
1761#define A12 A11
1762static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1763 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1764 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1765 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1766 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1767 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1768 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1769 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1770 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1771
1772/* Test Case 13 */
1773static const u8 K13[32],
1774 *P13=NULL,
1775 *A13=NULL,
1776 IV13[12],
1777 *C13=NULL,
1778 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1779
1780/* Test Case 14 */
1781#define K14 K13
1782#define A14 A13
1783static const u8 P14[16],
1784 IV14[12],
1785 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1786 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1787
1788/* Test Case 15 */
1789#define A15 A14
1790static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1791 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1792 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1793 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1794 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1795 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1796 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1797 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1798 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1799 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1800 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1801 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1802
1803/* Test Case 16 */
1804#define K16 K15
1805#define IV16 IV15
1806static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1807 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1808 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1809 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1810 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1811 0xab,0xad,0xda,0xd2},
1812 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1813 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1814 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1815 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1816 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1817
1818/* Test Case 17 */
1819#define K17 K16
1820#define P17 P16
1821#define A17 A16
1822static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1823 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1824 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1825 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1826 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1827 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1828
1829/* Test Case 18 */
1830#define K18 K17
1831#define P18 P17
1832#define A18 A17
1833static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1834 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1835 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1836 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1837 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1838 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1839 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1840 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1841 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1842
273a8081
AP
1843/* Test Case 19 */
1844#define K19 K1
1845#define P19 P1
1846#define IV19 IV1
1847#define C19 C1
1848static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1849 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1850 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1851 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1852 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1853 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1854 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1855 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1856 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1857
1da5d302
AP
1858/* Test Case 20 */
1859#define K20 K1
1860#define A20 A1
1861static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1862 P20[288],
1863 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1864 0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1865 0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1866 0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1867 0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1868 0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1869 0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1870 0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1871 0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1872 0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1873 0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1874 0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1875 0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1876 0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1877 0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1878 0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1879 0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1880 0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1881 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1882
e7f5b1cd
AP
1883#define TEST_CASE(n) do { \
1884 u8 out[sizeof(P##n)]; \
1885 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1886 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1887 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
68e2586b 1888 memset(out,0,sizeof(out)); \
e7f5b1cd
AP
1889 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1890 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
6acb4ff3
AP
1891 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1892 (C##n && memcmp(out,C##n,sizeof(out)))) \
68e2586b 1893 ret++, printf ("encrypt test#%d failed.\n",n); \
e7f5b1cd 1894 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
68e2586b 1895 memset(out,0,sizeof(out)); \
e7f5b1cd
AP
1896 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1897 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
6acb4ff3
AP
1898 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1899 (P##n && memcmp(out,P##n,sizeof(out)))) \
1900 ret++, printf ("decrypt test#%d failed.\n",n); \
e7f5b1cd
AP
1901 } while(0)
1902
1903int main()
1904{
1905 GCM128_CONTEXT ctx;
1906 AES_KEY key;
1907 int ret=0;
1908
1909 TEST_CASE(1);
1910 TEST_CASE(2);
1911 TEST_CASE(3);
1912 TEST_CASE(4);
1913 TEST_CASE(5);
1914 TEST_CASE(6);
1915 TEST_CASE(7);
1916 TEST_CASE(8);
1917 TEST_CASE(9);
1918 TEST_CASE(10);
1919 TEST_CASE(11);
1920 TEST_CASE(12);
1921 TEST_CASE(13);
1922 TEST_CASE(14);
1923 TEST_CASE(15);
1924 TEST_CASE(16);
1925 TEST_CASE(17);
1926 TEST_CASE(18);
273a8081 1927 TEST_CASE(19);
1da5d302 1928 TEST_CASE(20);
e7f5b1cd 1929
a595baff 1930#ifdef OPENSSL_CPUID_OBJ
2262beef
AP
1931 {
1932 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1933 union { u64 u; u8 c[1024]; } buf;
c1f092d1 1934 int i;
2262beef
AP
1935
1936 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1937 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1938 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1939
1940 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1941 start = OPENSSL_rdtsc();
1942 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1943 gcm_t = OPENSSL_rdtsc() - start;
1944
1945 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
b68c1315 1946 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
2262beef
AP
1947 (block128_f)AES_encrypt);
1948 start = OPENSSL_rdtsc();
1949 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
b68c1315 1950 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
a595baff 1951 (block128_f)AES_encrypt);
2262beef
AP
1952 ctr_t = OPENSSL_rdtsc() - start;
1953
1954 printf("%.2f-%.2f=%.2f\n",
1955 gcm_t/(double)sizeof(buf),
1956 ctr_t/(double)sizeof(buf),
1957 (gcm_t-ctr_t)/(double)sizeof(buf));
a595baff 1958#ifdef GHASH
23a05fa0
AP
1959 {
1960 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1961 const u8 *inp,size_t len) = ctx.ghash;
1962
1963 GHASH((&ctx),buf.c,sizeof(buf));
a595baff 1964 start = OPENSSL_rdtsc();
8d1b199d 1965 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
a595baff 1966 gcm_t = OPENSSL_rdtsc() - start;
c1f092d1 1967 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
23a05fa0 1968 }
a595baff 1969#endif
2262beef 1970 }
a595baff 1971#endif
2262beef 1972
e7f5b1cd
AP
1973 return ret;
1974}
1975#endif