]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/gcm128.c
Add GHASH for PowerISA 2.0.7.
[thirdparty/openssl.git] / crypto / modes / gcm128.c
CommitLineData
e7f5b1cd
AP
1/* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
f4001a0d
DSH
50#define OPENSSL_FIPSAPI
51
aa763c0f 52#include <openssl/crypto.h>
f472ec8c 53#include "modes_lcl.h"
e7f5b1cd
AP
54#include <string.h>
55
56#ifndef MODES_DEBUG
57# ifndef NDEBUG
58# define NDEBUG
59# endif
60#endif
61#include <assert.h>
62
f472ec8c
AP
63#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64/* redefine, because alignment is ensured */
65#undef GETU32
66#define GETU32(p) BSWAP4(*(const u32 *)(p))
67#undef PUTU32
68#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
e7f5b1cd
AP
69#endif
70
c1f092d1
AP
71#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72#define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83} while(0)
84
d8d95832
AP
85/*
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
a595baff
AP
119#if TABLE_BITS==8
120
e7f5b1cd
AP
121static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122{
123 int i, j;
124 u128 V;
125
126 Htable[0].hi = 0;
127 Htable[0].lo = 0;
128 V.hi = H[0];
129 V.lo = H[1];
130
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
c1f092d1 132 REDUCE1BIT(V);
e7f5b1cd
AP
133 Htable[i] = V;
134 }
135
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
141 }
142 }
143}
144
d8d95832 145static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
e7f5b1cd
AP
146{
147 u128 Z = { 0, 0};
148 const u8 *xi = (const u8 *)Xi+15;
149 size_t rem, n = *xi;
150 const union { long one; char little; } is_endian = {1};
03e389cf 151 __fips_constseg
e7f5b1cd
AP
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
217
218 while (1) {
219 Z.hi ^= Htable[n].hi;
220 Z.lo ^= Htable[n].lo;
221
222 if ((u8 *)Xi==xi) break;
223
224 n = *(--xi);
225
226 rem = (size_t)Z.lo&0xff;
227 Z.lo = (Z.hi<<56)|(Z.lo>>8);
228 Z.hi = (Z.hi>>8);
229 if (sizeof(size_t)==8)
230 Z.hi ^= rem_8bit[rem];
231 else
232 Z.hi ^= (u64)rem_8bit[rem]<<32;
233 }
234
235 if (is_endian.little) {
236#ifdef BSWAP8
237 Xi[0] = BSWAP8(Z.hi);
238 Xi[1] = BSWAP8(Z.lo);
239#else
240 u8 *p = (u8 *)Xi;
241 u32 v;
242 v = (u32)(Z.hi>>32); PUTU32(p,v);
243 v = (u32)(Z.hi); PUTU32(p+4,v);
244 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
245 v = (u32)(Z.lo); PUTU32(p+12,v);
246#endif
247 }
248 else {
249 Xi[0] = Z.hi;
250 Xi[1] = Z.lo;
251 }
252}
a595baff 253#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
e7f5b1cd 254
a595baff 255#elif TABLE_BITS==4
2262beef 256
e7f5b1cd
AP
257static void gcm_init_4bit(u128 Htable[16], u64 H[2])
258{
e7f5b1cd 259 u128 V;
f472ec8c
AP
260#if defined(OPENSSL_SMALL_FOOTPRINT)
261 int i;
262#endif
e7f5b1cd
AP
263
264 Htable[0].hi = 0;
265 Htable[0].lo = 0;
266 V.hi = H[0];
267 V.lo = H[1];
268
f472ec8c 269#if defined(OPENSSL_SMALL_FOOTPRINT)
e7f5b1cd 270 for (Htable[8]=V, i=4; i>0; i>>=1) {
c1f092d1 271 REDUCE1BIT(V);
e7f5b1cd
AP
272 Htable[i] = V;
273 }
274
275 for (i=2; i<16; i<<=1) {
2262beef
AP
276 u128 *Hi = Htable+i;
277 int j;
278 for (V=*Hi, j=1; j<i; ++j) {
279 Hi[j].hi = V.hi^Htable[j].hi;
280 Hi[j].lo = V.lo^Htable[j].lo;
e7f5b1cd
AP
281 }
282 }
2262beef 283#else
f472ec8c 284 Htable[8] = V;
c1f092d1 285 REDUCE1BIT(V);
f472ec8c 286 Htable[4] = V;
c1f092d1 287 REDUCE1BIT(V);
f472ec8c 288 Htable[2] = V;
c1f092d1 289 REDUCE1BIT(V);
f472ec8c 290 Htable[1] = V;
2262beef
AP
291 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
292 V=Htable[4];
293 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
294 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
295 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
296 V=Htable[8];
297 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
298 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
299 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
300 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
301 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
302 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
303 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
304#endif
f472ec8c
AP
305#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
306 /*
307 * ARM assembler expects specific dword order in Htable.
308 */
309 {
310 int j;
311 const union { long one; char little; } is_endian = {1};
312
313 if (is_endian.little)
314 for (j=0;j<16;++j) {
315 V = Htable[j];
316 Htable[j].hi = V.lo;
317 Htable[j].lo = V.hi;
318 }
319 else
320 for (j=0;j<16;++j) {
321 V = Htable[j];
322 Htable[j].hi = V.lo<<32|V.lo>>32;
323 Htable[j].lo = V.hi<<32|V.hi>>32;
324 }
325 }
326#endif
e7f5b1cd
AP
327}
328
a595baff 329#ifndef GHASH_ASM
03e389cf 330__fips_constseg
2262beef
AP
331static const size_t rem_4bit[16] = {
332 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
336
4f39edbf 337static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
e7f5b1cd 338{
2262beef
AP
339 u128 Z;
340 int cnt = 15;
341 size_t rem, nlo, nhi;
e7f5b1cd 342 const union { long one; char little; } is_endian = {1};
2262beef
AP
343
344 nlo = ((const u8 *)Xi)[15];
345 nhi = nlo>>4;
346 nlo &= 0xf;
347
348 Z.hi = Htable[nlo].hi;
349 Z.lo = Htable[nlo].lo;
e7f5b1cd
AP
350
351 while (1) {
2262beef
AP
352 rem = (size_t)Z.lo&0xf;
353 Z.lo = (Z.hi<<60)|(Z.lo>>4);
354 Z.hi = (Z.hi>>4);
355 if (sizeof(size_t)==8)
356 Z.hi ^= rem_4bit[rem];
357 else
358 Z.hi ^= (u64)rem_4bit[rem]<<32;
359
360 Z.hi ^= Htable[nhi].hi;
361 Z.lo ^= Htable[nhi].lo;
362
363 if (--cnt<0) break;
364
365 nlo = ((const u8 *)Xi)[cnt];
e7f5b1cd
AP
366 nhi = nlo>>4;
367 nlo &= 0xf;
368
2262beef
AP
369 rem = (size_t)Z.lo&0xf;
370 Z.lo = (Z.hi<<60)|(Z.lo>>4);
371 Z.hi = (Z.hi>>4);
372 if (sizeof(size_t)==8)
373 Z.hi ^= rem_4bit[rem];
374 else
375 Z.hi ^= (u64)rem_4bit[rem]<<32;
376
e7f5b1cd
AP
377 Z.hi ^= Htable[nlo].hi;
378 Z.lo ^= Htable[nlo].lo;
2262beef 379 }
e7f5b1cd 380
2262beef
AP
381 if (is_endian.little) {
382#ifdef BSWAP8
383 Xi[0] = BSWAP8(Z.hi);
384 Xi[1] = BSWAP8(Z.lo);
385#else
386 u8 *p = (u8 *)Xi;
387 u32 v;
388 v = (u32)(Z.hi>>32); PUTU32(p,v);
389 v = (u32)(Z.hi); PUTU32(p+4,v);
390 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
391 v = (u32)(Z.lo); PUTU32(p+12,v);
392#endif
393 }
394 else {
395 Xi[0] = Z.hi;
396 Xi[1] = Z.lo;
397 }
398}
399
400#if !defined(OPENSSL_SMALL_FOOTPRINT)
401/*
402 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
a595baff
AP
403 * details... Compiler-generated code doesn't seem to give any
404 * performance improvement, at least not on x86[_64]. It's here
405 * mostly as reference and a placeholder for possible future
406 * non-trivial optimization[s]...
2262beef 407 */
4f39edbf
AP
408static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409 const u8 *inp,size_t len)
2262beef
AP
410{
411 u128 Z;
412 int cnt;
413 size_t rem, nlo, nhi;
414 const union { long one; char little; } is_endian = {1};
415
e747f4d4 416#if 1
2262beef
AP
417 do {
418 cnt = 15;
419 nlo = ((const u8 *)Xi)[15];
420 nlo ^= inp[15];
421 nhi = nlo>>4;
422 nlo &= 0xf;
423
424 Z.hi = Htable[nlo].hi;
425 Z.lo = Htable[nlo].lo;
426
427 while (1) {
e7f5b1cd
AP
428 rem = (size_t)Z.lo&0xf;
429 Z.lo = (Z.hi<<60)|(Z.lo>>4);
430 Z.hi = (Z.hi>>4);
431 if (sizeof(size_t)==8)
432 Z.hi ^= rem_4bit[rem];
433 else
434 Z.hi ^= (u64)rem_4bit[rem]<<32;
435
436 Z.hi ^= Htable[nhi].hi;
437 Z.lo ^= Htable[nhi].lo;
438
2262beef 439 if (--cnt<0) break;
e7f5b1cd 440
2262beef
AP
441 nlo = ((const u8 *)Xi)[cnt];
442 nlo ^= inp[cnt];
443 nhi = nlo>>4;
444 nlo &= 0xf;
e7f5b1cd
AP
445
446 rem = (size_t)Z.lo&0xf;
447 Z.lo = (Z.hi<<60)|(Z.lo>>4);
448 Z.hi = (Z.hi>>4);
449 if (sizeof(size_t)==8)
450 Z.hi ^= rem_4bit[rem];
451 else
452 Z.hi ^= (u64)rem_4bit[rem]<<32;
2262beef
AP
453
454 Z.hi ^= Htable[nlo].hi;
455 Z.lo ^= Htable[nlo].lo;
e7f5b1cd 456 }
e747f4d4
AP
457#else
458 /*
459 * Extra 256+16 bytes per-key plus 512 bytes shared tables
460 * [should] give ~50% improvement... One could have PACK()-ed
6acb4ff3
AP
461 * the rem_8bit even here, but the priority is to minimize
462 * cache footprint...
e747f4d4
AP
463 */
464 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
465 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
03e389cf 466 __fips_constseg
e747f4d4
AP
467 static const unsigned short rem_8bit[256] = {
468 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
e747f4d4
AP
500 /*
501 * This pre-processing phase slows down procedure by approximately
502 * same time as it makes each loop spin faster. In other words
503 * single block performance is approximately same as straightforward
504 * "4-bit" implementation, and then it goes only faster...
505 */
506 for (cnt=0; cnt<16; ++cnt) {
507 Z.hi = Htable[cnt].hi;
508 Z.lo = Htable[cnt].lo;
509 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510 Hshr4[cnt].hi = (Z.hi>>4);
511 Hshl4[cnt] = (u8)(Z.lo<<4);
512 }
513
514 do {
6acb4ff3 515 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
e747f4d4
AP
516 nlo = ((const u8 *)Xi)[cnt];
517 nlo ^= inp[cnt];
518 nhi = nlo>>4;
519 nlo &= 0xf;
520
521 Z.hi ^= Htable[nlo].hi;
522 Z.lo ^= Htable[nlo].lo;
523
524 rem = (size_t)Z.lo&0xff;
525
526 Z.lo = (Z.hi<<56)|(Z.lo>>8);
527 Z.hi = (Z.hi>>8);
528
529 Z.hi ^= Hshr4[nhi].hi;
530 Z.lo ^= Hshr4[nhi].lo;
531 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
532 }
533
534 nlo = ((const u8 *)Xi)[0];
535 nlo ^= inp[0];
536 nhi = nlo>>4;
537 nlo &= 0xf;
538
539 Z.hi ^= Htable[nlo].hi;
540 Z.lo ^= Htable[nlo].lo;
541
542 rem = (size_t)Z.lo&0xf;
543
544 Z.lo = (Z.hi<<60)|(Z.lo>>4);
545 Z.hi = (Z.hi>>4);
546
547 Z.hi ^= Htable[nhi].hi;
548 Z.lo ^= Htable[nhi].lo;
549 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
550#endif
e7f5b1cd
AP
551
552 if (is_endian.little) {
553#ifdef BSWAP8
554 Xi[0] = BSWAP8(Z.hi);
555 Xi[1] = BSWAP8(Z.lo);
556#else
557 u8 *p = (u8 *)Xi;
558 u32 v;
559 v = (u32)(Z.hi>>32); PUTU32(p,v);
560 v = (u32)(Z.hi); PUTU32(p+4,v);
561 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
562 v = (u32)(Z.lo); PUTU32(p+12,v);
563#endif
564 }
565 else {
566 Xi[0] = Z.hi;
567 Xi[1] = Z.lo;
568 }
2262beef 569 } while (inp+=16, len-=16);
e7f5b1cd 570}
2262beef
AP
571#endif
572#else
4f39edbf
AP
573void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
2262beef
AP
575#endif
576
577#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
a595baff 578#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
c1f092d1 579#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
a595baff
AP
580/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581 * trashing effect. In other words idea is to hash data while it's
582 * still in L1 cache after encryption pass... */
68e2586b 583#define GHASH_CHUNK (3*1024)
a595baff 584#endif
2262beef 585
a595baff 586#else /* TABLE_BITS */
e7f5b1cd 587
2262beef 588static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
e7f5b1cd
AP
589{
590 u128 V,Z = { 0,0 };
591 long X;
592 int i,j;
593 const long *xi = (const long *)Xi;
594 const union { long one; char little; } is_endian = {1};
595
2262beef 596 V.hi = H[0]; /* H is in host byte order, no byte swapping */
e7f5b1cd
AP
597 V.lo = H[1];
598
599 for (j=0; j<16/sizeof(long); ++j) {
600 if (is_endian.little) {
601 if (sizeof(long)==8) {
602#ifdef BSWAP8
603 X = (long)(BSWAP8(xi[j]));
604#else
605 const u8 *p = (const u8 *)(xi+j);
606 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607#endif
608 }
609 else {
610 const u8 *p = (const u8 *)(xi+j);
611 X = (long)GETU32(p);
612 }
613 }
614 else
615 X = xi[j];
616
617 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618 u64 M = (u64)(X>>(8*sizeof(long)-1));
619 Z.hi ^= V.hi&M;
620 Z.lo ^= V.lo&M;
621
c1f092d1 622 REDUCE1BIT(V);
e7f5b1cd
AP
623 }
624 }
625
626 if (is_endian.little) {
627#ifdef BSWAP8
628 Xi[0] = BSWAP8(Z.hi);
629 Xi[1] = BSWAP8(Z.lo);
630#else
631 u8 *p = (u8 *)Xi;
632 u32 v;
633 v = (u32)(Z.hi>>32); PUTU32(p,v);
634 v = (u32)(Z.hi); PUTU32(p+4,v);
635 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
636 v = (u32)(Z.lo); PUTU32(p+12,v);
637#endif
638 }
639 else {
640 Xi[0] = Z.hi;
641 Xi[1] = Z.lo;
642 }
643}
2262beef 644#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
a595baff 645
e7f5b1cd
AP
646#endif
647
82741e9c 648#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
1e863180 649# if !defined(I386_ONLY) && \
c1f092d1
AP
650 (defined(__i386) || defined(__i386__) || \
651 defined(__x86_64) || defined(__x86_64__) || \
652 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
1e863180
AP
653# define GHASH_ASM_X86_OR_64
654# define GCM_FUNCREF_4BIT
c1f092d1
AP
655extern unsigned int OPENSSL_ia32cap_P[2];
656
657void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
658void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
659void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
660
64f7e2c4 661#if defined(__i386) || defined(__i386__) || defined(_M_IX86)
1da5d302
AP
662# define gcm_init_avx gcm_init_clmul
663# define gcm_gmult_avx gcm_gmult_clmul
664# define gcm_ghash_avx gcm_ghash_clmul
665#else
666void gcm_init_avx(u128 Htable[16],const u64 Xi[2]);
667void gcm_gmult_avx(u64 Xi[2],const u128 Htable[16]);
668void gcm_ghash_avx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
669#endif
670
1e863180
AP
671# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
672# define GHASH_ASM_X86
c1f092d1
AP
673void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
674void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
675
676void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
677void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
1e863180 678# endif
82741e9c 679# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
1e863180
AP
680# include "arm_arch.h"
681# if __ARM_ARCH__>=7
682# define GHASH_ASM_ARM
683# define GCM_FUNCREF_4BIT
82741e9c
AP
684# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
685# if defined(__arm__) || defined(__arm)
686# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
687# endif
f8cee9d0 688void gcm_init_neon(u128 Htable[16],const u64 Xi[2]);
1e863180
AP
689void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
690void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
82741e9c
AP
691void gcm_init_v8(u128 Htable[16],const u64 Xi[2]);
692void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]);
693void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
1e863180 694# endif
23328d4b
AP
695# elif defined(__sparc__) || defined(__sparc)
696# include "sparc_arch.h"
697# define GHASH_ASM_SPARC
698# define GCM_FUNCREF_4BIT
699extern unsigned int OPENSSL_sparcv9cap_P[];
24798c5e 700void gcm_init_vis3(u128 Htable[16],const u64 Xi[2]);
23328d4b
AP
701void gcm_gmult_vis3(u64 Xi[2],const u128 Htable[16]);
702void gcm_ghash_vis3(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
c1f092d1 703# endif
c1f092d1
AP
704#endif
705
7af04002
AP
706#ifdef GCM_FUNCREF_4BIT
707# undef GCM_MUL
708# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
709# ifdef GHASH
710# undef GHASH
711# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
712# endif
713#endif
714
e7f5b1cd
AP
715void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
716{
717 const union { long one; char little; } is_endian = {1};
718
719 memset(ctx,0,sizeof(*ctx));
720 ctx->block = block;
721 ctx->key = key;
722
723 (*block)(ctx->H.c,ctx->H.c,key);
724
725 if (is_endian.little) {
726 /* H is stored in host byte order */
727#ifdef BSWAP8
728 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
729 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
730#else
731 u8 *p = ctx->H.c;
732 u64 hi,lo;
733 hi = (u64)GETU32(p) <<32|GETU32(p+4);
734 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
735 ctx->H.u[0] = hi;
736 ctx->H.u[1] = lo;
737#endif
738 }
739
a595baff
AP
740#if TABLE_BITS==8
741 gcm_init_8bit(ctx->Htable,ctx->H.u);
742#elif TABLE_BITS==4
d8d95832 743# if defined(GHASH_ASM_X86_OR_64)
a6d915e0 744# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
87873f43
AP
745 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
746 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
1da5d302
AP
747 if (((OPENSSL_ia32cap_P[1]>>22)&0x41)==0x41) { /* AVX+MOVBE */
748 gcm_init_avx(ctx->Htable,ctx->H.u);
749 ctx->gmult = gcm_gmult_avx;
750 ctx->ghash = gcm_ghash_avx;
751 } else {
752 gcm_init_clmul(ctx->Htable,ctx->H.u);
753 ctx->gmult = gcm_gmult_clmul;
754 ctx->ghash = gcm_ghash_clmul;
755 }
c1f092d1
AP
756 return;
757 }
a6d915e0 758# endif
e7f5b1cd 759 gcm_init_4bit(ctx->Htable,ctx->H.u);
6acb4ff3 760# if defined(GHASH_ASM_X86) /* x86 only */
98909c1d
AP
761# if defined(OPENSSL_IA32_SSE2)
762 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
763# else
7af04002 764 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
98909c1d 765# endif
c1f092d1
AP
766 ctx->gmult = gcm_gmult_4bit_mmx;
767 ctx->ghash = gcm_ghash_4bit_mmx;
768 } else {
769 ctx->gmult = gcm_gmult_4bit_x86;
770 ctx->ghash = gcm_ghash_4bit_x86;
771 }
772# else
773 ctx->gmult = gcm_gmult_4bit;
774 ctx->ghash = gcm_ghash_4bit;
775# endif
1e863180 776# elif defined(GHASH_ASM_ARM)
82741e9c
AP
777# ifdef PMULL_CAPABLE
778 if (PMULL_CAPABLE) {
779 gcm_init_v8(ctx->Htable,ctx->H.u);
780 ctx->gmult = gcm_gmult_v8;
781 ctx->ghash = gcm_ghash_v8;
782 } else
783# endif
784# ifdef NEON_CAPABLE
785 if (NEON_CAPABLE) {
f8cee9d0 786 gcm_init_neon(ctx->Htable,ctx->H.u);
1e863180
AP
787 ctx->gmult = gcm_gmult_neon;
788 ctx->ghash = gcm_ghash_neon;
82741e9c
AP
789 } else
790# endif
791 {
1e863180
AP
792 gcm_init_4bit(ctx->Htable,ctx->H.u);
793 ctx->gmult = gcm_gmult_4bit;
794 ctx->ghash = gcm_ghash_4bit;
795 }
23328d4b
AP
796# elif defined(GHASH_ASM_SPARC)
797 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
24798c5e 798 gcm_init_vis3(ctx->Htable,ctx->H.u);
23328d4b
AP
799 ctx->gmult = gcm_gmult_vis3;
800 ctx->ghash = gcm_ghash_vis3;
801 } else {
802 gcm_init_4bit(ctx->Htable,ctx->H.u);
803 ctx->gmult = gcm_gmult_4bit;
804 ctx->ghash = gcm_ghash_4bit;
805 }
c1f092d1
AP
806# else
807 gcm_init_4bit(ctx->Htable,ctx->H.u);
808# endif
a595baff 809#endif
e7f5b1cd
AP
810}
811
812void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
813{
814 const union { long one; char little; } is_endian = {1};
f472ec8c 815 unsigned int ctr;
d8d95832 816#ifdef GCM_FUNCREF_4BIT
7af04002 817 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 818#endif
e7f5b1cd
AP
819
820 ctx->Yi.u[0] = 0;
821 ctx->Yi.u[1] = 0;
822 ctx->Xi.u[0] = 0;
823 ctx->Xi.u[1] = 0;
b68c1315
AP
824 ctx->len.u[0] = 0; /* AAD length */
825 ctx->len.u[1] = 0; /* message length */
826 ctx->ares = 0;
827 ctx->mres = 0;
e7f5b1cd
AP
828
829 if (len==12) {
830 memcpy(ctx->Yi.c,iv,12);
831 ctx->Yi.c[15]=1;
f472ec8c 832 ctr=1;
e7f5b1cd
AP
833 }
834 else {
835 size_t i;
836 u64 len0 = len;
837
838 while (len>=16) {
839 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
840 GCM_MUL(ctx,Yi);
841 iv += 16;
842 len -= 16;
843 }
844 if (len) {
845 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
846 GCM_MUL(ctx,Yi);
847 }
848 len0 <<= 3;
849 if (is_endian.little) {
850#ifdef BSWAP8
851 ctx->Yi.u[1] ^= BSWAP8(len0);
852#else
853 ctx->Yi.c[8] ^= (u8)(len0>>56);
854 ctx->Yi.c[9] ^= (u8)(len0>>48);
855 ctx->Yi.c[10] ^= (u8)(len0>>40);
856 ctx->Yi.c[11] ^= (u8)(len0>>32);
857 ctx->Yi.c[12] ^= (u8)(len0>>24);
858 ctx->Yi.c[13] ^= (u8)(len0>>16);
859 ctx->Yi.c[14] ^= (u8)(len0>>8);
860 ctx->Yi.c[15] ^= (u8)(len0);
861#endif
862 }
863 else
864 ctx->Yi.u[1] ^= len0;
865
866 GCM_MUL(ctx,Yi);
867
868 if (is_endian.little)
997d1aac
AP
869#ifdef BSWAP4
870 ctr = BSWAP4(ctx->Yi.d[3]);
871#else
f472ec8c 872 ctr = GETU32(ctx->Yi.c+12);
997d1aac 873#endif
e7f5b1cd 874 else
f472ec8c 875 ctr = ctx->Yi.d[3];
e7f5b1cd
AP
876 }
877
878 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
f472ec8c 879 ++ctr;
2262beef 880 if (is_endian.little)
997d1aac
AP
881#ifdef BSWAP4
882 ctx->Yi.d[3] = BSWAP4(ctr);
883#else
f472ec8c 884 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 885#endif
2262beef 886 else
f472ec8c 887 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
888}
889
1f2502eb 890int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
e7f5b1cd
AP
891{
892 size_t i;
1f2502eb
AP
893 unsigned int n;
894 u64 alen = ctx->len.u[0];
d8d95832 895#ifdef GCM_FUNCREF_4BIT
7af04002 896 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 897# ifdef GHASH
7af04002
AP
898 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
899 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
900# endif
901#endif
e7f5b1cd 902
1f2502eb
AP
903 if (ctx->len.u[1]) return -2;
904
905 alen += len;
906 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
907 return -1;
908 ctx->len.u[0] = alen;
b68c1315 909
1f2502eb 910 n = ctx->ares;
b68c1315
AP
911 if (n) {
912 while (n && len) {
913 ctx->Xi.c[n] ^= *(aad++);
914 --len;
915 n = (n+1)%16;
916 }
917 if (n==0) GCM_MUL(ctx,Xi);
918 else {
919 ctx->ares = n;
1f2502eb 920 return 0;
b68c1315
AP
921 }
922 }
e7f5b1cd 923
2262beef
AP
924#ifdef GHASH
925 if ((i = (len&(size_t)-16))) {
c1f092d1 926 GHASH(ctx,aad,i);
2262beef
AP
927 aad += i;
928 len -= i;
929 }
930#else
e7f5b1cd
AP
931 while (len>=16) {
932 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
933 GCM_MUL(ctx,Xi);
934 aad += 16;
935 len -= 16;
936 }
2262beef 937#endif
e7f5b1cd 938 if (len) {
1f2502eb 939 n = (unsigned int)len;
e7f5b1cd 940 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
e7f5b1cd 941 }
b68c1315
AP
942
943 ctx->ares = n;
1f2502eb 944 return 0;
e7f5b1cd
AP
945}
946
1f2502eb 947int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
e7f5b1cd
AP
948 const unsigned char *in, unsigned char *out,
949 size_t len)
950{
951 const union { long one; char little; } is_endian = {1};
952 unsigned int n, ctr;
953 size_t i;
3f0d1405
AP
954 u64 mlen = ctx->len.u[1];
955 block128_f block = ctx->block;
956 void *key = ctx->key;
d8d95832 957#ifdef GCM_FUNCREF_4BIT
7af04002 958 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 959# ifdef GHASH
7af04002
AP
960 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
961 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
962# endif
963#endif
1f2502eb
AP
964
965#if 0
966 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
967#endif
968 mlen += len;
969 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
970 return -1;
971 ctx->len.u[1] = mlen;
e7f5b1cd 972
b68c1315
AP
973 if (ctx->ares) {
974 /* First call to encrypt finalizes GHASH(AAD) */
975 GCM_MUL(ctx,Xi);
976 ctx->ares = 0;
977 }
978
f472ec8c 979 if (is_endian.little)
997d1aac
AP
980#ifdef BSWAP4
981 ctr = BSWAP4(ctx->Yi.d[3]);
982#else
f472ec8c 983 ctr = GETU32(ctx->Yi.c+12);
997d1aac 984#endif
f472ec8c
AP
985 else
986 ctr = ctx->Yi.d[3];
e7f5b1cd 987
1f2502eb 988 n = ctx->mres;
e7f5b1cd
AP
989#if !defined(OPENSSL_SMALL_FOOTPRINT)
990 if (16%sizeof(size_t) == 0) do { /* always true actually */
991 if (n) {
992 while (n && len) {
993 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
994 --len;
995 n = (n+1)%16;
996 }
997 if (n==0) GCM_MUL(ctx,Xi);
998 else {
b68c1315 999 ctx->mres = n;
1f2502eb 1000 return 0;
e7f5b1cd
AP
1001 }
1002 }
e7f5b1cd
AP
1003#if defined(STRICT_ALIGNMENT)
1004 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1005 break;
1006#endif
a595baff 1007#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
1008 while (len>=GHASH_CHUNK) {
1009 size_t j=GHASH_CHUNK;
1010
1011 while (j) {
96a4cf8c
AP
1012 size_t *out_t=(size_t *)out;
1013 const size_t *in_t=(const size_t *)in;
1014
3f0d1405 1015 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1016 ++ctr;
1017 if (is_endian.little)
997d1aac
AP
1018#ifdef BSWAP4
1019 ctx->Yi.d[3] = BSWAP4(ctr);
1020#else
e7f5b1cd 1021 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1022#endif
e7f5b1cd
AP
1023 else
1024 ctx->Yi.d[3] = ctr;
da01515c
BL
1025 for (i=0; i<16/sizeof(size_t); ++i)
1026 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
2262beef
AP
1027 out += 16;
1028 in += 16;
1029 j -= 16;
1030 }
c1f092d1 1031 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
2262beef
AP
1032 len -= GHASH_CHUNK;
1033 }
1034 if ((i = (len&(size_t)-16))) {
1035 size_t j=i;
1036
1037 while (len>=16) {
96a4cf8c
AP
1038 size_t *out_t=(size_t *)out;
1039 const size_t *in_t=(const size_t *)in;
1040
3f0d1405 1041 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1042 ++ctr;
1043 if (is_endian.little)
997d1aac
AP
1044#ifdef BSWAP4
1045 ctx->Yi.d[3] = BSWAP4(ctr);
1046#else
2262beef 1047 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1048#endif
2262beef
AP
1049 else
1050 ctx->Yi.d[3] = ctr;
96a4cf8c
AP
1051 for (i=0; i<16/sizeof(size_t); ++i)
1052 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
2262beef
AP
1053 out += 16;
1054 in += 16;
1055 len -= 16;
1056 }
c1f092d1 1057 GHASH(ctx,out-j,j);
2262beef
AP
1058 }
1059#else
1060 while (len>=16) {
96a4cf8c
AP
1061 size_t *out_t=(size_t *)out;
1062 const size_t *in_t=(const size_t *)in;
1063
3f0d1405 1064 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1065 ++ctr;
1066 if (is_endian.little)
997d1aac
AP
1067#ifdef BSWAP4
1068 ctx->Yi.d[3] = BSWAP4(ctr);
1069#else
2262beef 1070 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1071#endif
2262beef
AP
1072 else
1073 ctx->Yi.d[3] = ctr;
96a4cf8c
AP
1074 for (i=0; i<16/sizeof(size_t); ++i)
1075 ctx->Xi.t[i] ^=
1076 out_t[i] = in_t[i]^ctx->EKi.t[i];
e7f5b1cd
AP
1077 GCM_MUL(ctx,Xi);
1078 out += 16;
1079 in += 16;
1080 len -= 16;
1081 }
2262beef 1082#endif
e7f5b1cd 1083 if (len) {
3f0d1405 1084 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1085 ++ctr;
1086 if (is_endian.little)
997d1aac
AP
1087#ifdef BSWAP4
1088 ctx->Yi.d[3] = BSWAP4(ctr);
1089#else
e7f5b1cd 1090 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1091#endif
e7f5b1cd
AP
1092 else
1093 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1094 while (len--) {
1095 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1096 ++n;
1097 }
1098 }
1099
b68c1315 1100 ctx->mres = n;
1f2502eb 1101 return 0;
e7f5b1cd
AP
1102 } while(0);
1103#endif
1104 for (i=0;i<len;++i) {
1105 if (n==0) {
3f0d1405 1106 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1107 ++ctr;
1108 if (is_endian.little)
997d1aac
AP
1109#ifdef BSWAP4
1110 ctx->Yi.d[3] = BSWAP4(ctr);
1111#else
e7f5b1cd 1112 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1113#endif
e7f5b1cd
AP
1114 else
1115 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1116 }
1117 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1118 n = (n+1)%16;
1119 if (n==0)
1120 GCM_MUL(ctx,Xi);
1121 }
1122
b68c1315 1123 ctx->mres = n;
1f2502eb 1124 return 0;
e7f5b1cd
AP
1125}
1126
1f2502eb 1127int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
e7f5b1cd
AP
1128 const unsigned char *in, unsigned char *out,
1129 size_t len)
1130{
1131 const union { long one; char little; } is_endian = {1};
1132 unsigned int n, ctr;
1133 size_t i;
3f0d1405
AP
1134 u64 mlen = ctx->len.u[1];
1135 block128_f block = ctx->block;
1136 void *key = ctx->key;
d8d95832 1137#ifdef GCM_FUNCREF_4BIT
7af04002 1138 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1139# ifdef GHASH
7af04002
AP
1140 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1141 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1142# endif
1143#endif
1f2502eb
AP
1144
1145 mlen += len;
1146 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1147 return -1;
1148 ctx->len.u[1] = mlen;
e7f5b1cd 1149
b68c1315
AP
1150 if (ctx->ares) {
1151 /* First call to decrypt finalizes GHASH(AAD) */
1152 GCM_MUL(ctx,Xi);
1153 ctx->ares = 0;
1154 }
1155
f472ec8c 1156 if (is_endian.little)
997d1aac
AP
1157#ifdef BSWAP4
1158 ctr = BSWAP4(ctx->Yi.d[3]);
1159#else
f472ec8c 1160 ctr = GETU32(ctx->Yi.c+12);
997d1aac 1161#endif
f472ec8c
AP
1162 else
1163 ctr = ctx->Yi.d[3];
e7f5b1cd 1164
1f2502eb 1165 n = ctx->mres;
e7f5b1cd
AP
1166#if !defined(OPENSSL_SMALL_FOOTPRINT)
1167 if (16%sizeof(size_t) == 0) do { /* always true actually */
1168 if (n) {
1169 while (n && len) {
1170 u8 c = *(in++);
1171 *(out++) = c^ctx->EKi.c[n];
1172 ctx->Xi.c[n] ^= c;
1173 --len;
1174 n = (n+1)%16;
1175 }
1176 if (n==0) GCM_MUL (ctx,Xi);
1177 else {
b68c1315 1178 ctx->mres = n;
1f2502eb 1179 return 0;
e7f5b1cd
AP
1180 }
1181 }
e7f5b1cd
AP
1182#if defined(STRICT_ALIGNMENT)
1183 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1184 break;
1185#endif
a595baff 1186#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
1187 while (len>=GHASH_CHUNK) {
1188 size_t j=GHASH_CHUNK;
1189
c1f092d1 1190 GHASH(ctx,in,GHASH_CHUNK);
2262beef 1191 while (j) {
96a4cf8c
AP
1192 size_t *out_t=(size_t *)out;
1193 const size_t *in_t=(const size_t *)in;
1194
3f0d1405 1195 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1196 ++ctr;
1197 if (is_endian.little)
997d1aac
AP
1198#ifdef BSWAP4
1199 ctx->Yi.d[3] = BSWAP4(ctr);
1200#else
e7f5b1cd 1201 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1202#endif
e7f5b1cd
AP
1203 else
1204 ctx->Yi.d[3] = ctr;
96a4cf8c
AP
1205 for (i=0; i<16/sizeof(size_t); ++i)
1206 out_t[i] = in_t[i]^ctx->EKi.t[i];
2262beef
AP
1207 out += 16;
1208 in += 16;
1209 j -= 16;
1210 }
1211 len -= GHASH_CHUNK;
1212 }
1213 if ((i = (len&(size_t)-16))) {
c1f092d1 1214 GHASH(ctx,in,i);
2262beef 1215 while (len>=16) {
96a4cf8c
AP
1216 size_t *out_t=(size_t *)out;
1217 const size_t *in_t=(const size_t *)in;
1218
3f0d1405 1219 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1220 ++ctr;
1221 if (is_endian.little)
997d1aac
AP
1222#ifdef BSWAP4
1223 ctx->Yi.d[3] = BSWAP4(ctr);
1224#else
2262beef 1225 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1226#endif
2262beef
AP
1227 else
1228 ctx->Yi.d[3] = ctr;
96a4cf8c
AP
1229 for (i=0; i<16/sizeof(size_t); ++i)
1230 out_t[i] = in_t[i]^ctx->EKi.t[i];
2262beef
AP
1231 out += 16;
1232 in += 16;
1233 len -= 16;
1234 }
1235 }
1236#else
1237 while (len>=16) {
96a4cf8c
AP
1238 size_t *out_t=(size_t *)out;
1239 const size_t *in_t=(const size_t *)in;
1240
3f0d1405 1241 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1242 ++ctr;
1243 if (is_endian.little)
997d1aac
AP
1244#ifdef BSWAP4
1245 ctx->Yi.d[3] = BSWAP4(ctr);
1246#else
2262beef 1247 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1248#endif
2262beef
AP
1249 else
1250 ctx->Yi.d[3] = ctr;
96a4cf8c
AP
1251 for (i=0; i<16/sizeof(size_t); ++i) {
1252 size_t c = in[i];
1253 out[i] = c^ctx->EKi.t[i];
1254 ctx->Xi.t[i] ^= c;
e7f5b1cd 1255 }
2262beef 1256 GCM_MUL(ctx,Xi);
e7f5b1cd
AP
1257 out += 16;
1258 in += 16;
1259 len -= 16;
1260 }
2262beef 1261#endif
e7f5b1cd 1262 if (len) {
3f0d1405 1263 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1264 ++ctr;
1265 if (is_endian.little)
997d1aac
AP
1266#ifdef BSWAP4
1267 ctx->Yi.d[3] = BSWAP4(ctr);
1268#else
e7f5b1cd 1269 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1270#endif
e7f5b1cd
AP
1271 else
1272 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1273 while (len--) {
1274 u8 c = in[n];
1275 ctx->Xi.c[n] ^= c;
1276 out[n] = c^ctx->EKi.c[n];
1277 ++n;
1278 }
1279 }
1280
b68c1315 1281 ctx->mres = n;
1f2502eb 1282 return 0;
e7f5b1cd
AP
1283 } while(0);
1284#endif
1285 for (i=0;i<len;++i) {
1286 u8 c;
1287 if (n==0) {
3f0d1405 1288 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1289 ++ctr;
1290 if (is_endian.little)
997d1aac
AP
1291#ifdef BSWAP4
1292 ctx->Yi.d[3] = BSWAP4(ctr);
1293#else
e7f5b1cd 1294 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1295#endif
e7f5b1cd
AP
1296 else
1297 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1298 }
1299 c = in[i];
68e2586b 1300 out[i] = c^ctx->EKi.c[n];
e7f5b1cd
AP
1301 ctx->Xi.c[n] ^= c;
1302 n = (n+1)%16;
1303 if (n==0)
1304 GCM_MUL(ctx,Xi);
1305 }
1306
b68c1315 1307 ctx->mres = n;
1f2502eb 1308 return 0;
e7f5b1cd
AP
1309}
1310
1f2502eb 1311int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
f71c6ace
AP
1312 const unsigned char *in, unsigned char *out,
1313 size_t len, ctr128_f stream)
1314{
1315 const union { long one; char little; } is_endian = {1};
1316 unsigned int n, ctr;
1317 size_t i;
3f0d1405
AP
1318 u64 mlen = ctx->len.u[1];
1319 void *key = ctx->key;
d8d95832 1320#ifdef GCM_FUNCREF_4BIT
7af04002 1321 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1322# ifdef GHASH
7af04002
AP
1323 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1324 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1325# endif
1326#endif
1f2502eb
AP
1327
1328 mlen += len;
1329 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1330 return -1;
1331 ctx->len.u[1] = mlen;
f71c6ace 1332
b68c1315
AP
1333 if (ctx->ares) {
1334 /* First call to encrypt finalizes GHASH(AAD) */
1335 GCM_MUL(ctx,Xi);
1336 ctx->ares = 0;
1337 }
1338
f71c6ace 1339 if (is_endian.little)
997d1aac
AP
1340#ifdef BSWAP4
1341 ctr = BSWAP4(ctx->Yi.d[3]);
1342#else
f71c6ace 1343 ctr = GETU32(ctx->Yi.c+12);
997d1aac 1344#endif
f71c6ace
AP
1345 else
1346 ctr = ctx->Yi.d[3];
1347
1f2502eb 1348 n = ctx->mres;
f71c6ace
AP
1349 if (n) {
1350 while (n && len) {
1351 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1352 --len;
1353 n = (n+1)%16;
1354 }
1355 if (n==0) GCM_MUL(ctx,Xi);
1356 else {
b68c1315 1357 ctx->mres = n;
1f2502eb 1358 return 0;
f71c6ace
AP
1359 }
1360 }
1361#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1362 while (len>=GHASH_CHUNK) {
3f0d1405 1363 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
f71c6ace
AP
1364 ctr += GHASH_CHUNK/16;
1365 if (is_endian.little)
997d1aac
AP
1366#ifdef BSWAP4
1367 ctx->Yi.d[3] = BSWAP4(ctr);
1368#else
f71c6ace 1369 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1370#endif
f71c6ace
AP
1371 else
1372 ctx->Yi.d[3] = ctr;
1373 GHASH(ctx,out,GHASH_CHUNK);
1374 out += GHASH_CHUNK;
1375 in += GHASH_CHUNK;
1376 len -= GHASH_CHUNK;
1377 }
1378#endif
1379 if ((i = (len&(size_t)-16))) {
1380 size_t j=i/16;
1381
3f0d1405 1382 (*stream)(in,out,j,key,ctx->Yi.c);
68e2586b 1383 ctr += (unsigned int)j;
f71c6ace 1384 if (is_endian.little)
997d1aac
AP
1385#ifdef BSWAP4
1386 ctx->Yi.d[3] = BSWAP4(ctr);
1387#else
f71c6ace 1388 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1389#endif
f71c6ace
AP
1390 else
1391 ctx->Yi.d[3] = ctr;
1392 in += i;
1393 len -= i;
1394#if defined(GHASH)
1395 GHASH(ctx,out,i);
1396 out += i;
1397#else
1398 while (j--) {
1399 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1400 GCM_MUL(ctx,Xi);
1401 out += 16;
1402 }
1403#endif
1404 }
1405 if (len) {
3f0d1405 1406 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
f71c6ace
AP
1407 ++ctr;
1408 if (is_endian.little)
997d1aac
AP
1409#ifdef BSWAP4
1410 ctx->Yi.d[3] = BSWAP4(ctr);
1411#else
f71c6ace 1412 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1413#endif
f71c6ace
AP
1414 else
1415 ctx->Yi.d[3] = ctr;
1416 while (len--) {
1417 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1418 ++n;
1419 }
1420 }
1421
b68c1315 1422 ctx->mres = n;
1f2502eb 1423 return 0;
f71c6ace
AP
1424}
1425
1f2502eb 1426int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
f71c6ace
AP
1427 const unsigned char *in, unsigned char *out,
1428 size_t len,ctr128_f stream)
1429{
1430 const union { long one; char little; } is_endian = {1};
1431 unsigned int n, ctr;
1432 size_t i;
3f0d1405
AP
1433 u64 mlen = ctx->len.u[1];
1434 void *key = ctx->key;
d8d95832 1435#ifdef GCM_FUNCREF_4BIT
7af04002 1436 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1437# ifdef GHASH
7af04002
AP
1438 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1439 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1440# endif
1441#endif
1f2502eb
AP
1442
1443 mlen += len;
1444 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1445 return -1;
1446 ctx->len.u[1] = mlen;
f71c6ace 1447
b68c1315
AP
1448 if (ctx->ares) {
1449 /* First call to decrypt finalizes GHASH(AAD) */
1450 GCM_MUL(ctx,Xi);
1451 ctx->ares = 0;
1452 }
1453
f71c6ace 1454 if (is_endian.little)
997d1aac
AP
1455#ifdef BSWAP4
1456 ctr = BSWAP4(ctx->Yi.d[3]);
1457#else
f71c6ace 1458 ctr = GETU32(ctx->Yi.c+12);
997d1aac 1459#endif
f71c6ace
AP
1460 else
1461 ctr = ctx->Yi.d[3];
1462
1f2502eb 1463 n = ctx->mres;
f71c6ace
AP
1464 if (n) {
1465 while (n && len) {
1466 u8 c = *(in++);
1467 *(out++) = c^ctx->EKi.c[n];
1468 ctx->Xi.c[n] ^= c;
1469 --len;
1470 n = (n+1)%16;
1471 }
1472 if (n==0) GCM_MUL (ctx,Xi);
1473 else {
b68c1315 1474 ctx->mres = n;
1f2502eb 1475 return 0;
f71c6ace
AP
1476 }
1477 }
1478#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1479 while (len>=GHASH_CHUNK) {
1480 GHASH(ctx,in,GHASH_CHUNK);
3f0d1405 1481 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
f71c6ace
AP
1482 ctr += GHASH_CHUNK/16;
1483 if (is_endian.little)
997d1aac
AP
1484#ifdef BSWAP4
1485 ctx->Yi.d[3] = BSWAP4(ctr);
1486#else
f71c6ace 1487 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1488#endif
f71c6ace
AP
1489 else
1490 ctx->Yi.d[3] = ctr;
1491 out += GHASH_CHUNK;
1492 in += GHASH_CHUNK;
1493 len -= GHASH_CHUNK;
1494 }
1495#endif
1496 if ((i = (len&(size_t)-16))) {
1497 size_t j=i/16;
1498
1499#if defined(GHASH)
1500 GHASH(ctx,in,i);
1501#else
1502 while (j--) {
1503 size_t k;
1504 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1505 GCM_MUL(ctx,Xi);
1506 in += 16;
1507 }
1508 j = i/16;
1509 in -= i;
1510#endif
3f0d1405 1511 (*stream)(in,out,j,key,ctx->Yi.c);
68e2586b 1512 ctr += (unsigned int)j;
f71c6ace 1513 if (is_endian.little)
997d1aac
AP
1514#ifdef BSWAP4
1515 ctx->Yi.d[3] = BSWAP4(ctr);
1516#else
f71c6ace 1517 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1518#endif
f71c6ace
AP
1519 else
1520 ctx->Yi.d[3] = ctr;
1521 out += i;
1522 in += i;
1523 len -= i;
1524 }
1525 if (len) {
3f0d1405 1526 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
f71c6ace
AP
1527 ++ctr;
1528 if (is_endian.little)
997d1aac
AP
1529#ifdef BSWAP4
1530 ctx->Yi.d[3] = BSWAP4(ctr);
1531#else
f71c6ace 1532 PUTU32(ctx->Yi.c+12,ctr);
997d1aac 1533#endif
f71c6ace
AP
1534 else
1535 ctx->Yi.d[3] = ctr;
1536 while (len--) {
1537 u8 c = in[n];
1538 ctx->Xi.c[n] ^= c;
1539 out[n] = c^ctx->EKi.c[n];
1540 ++n;
1541 }
1542 }
1543
b68c1315 1544 ctx->mres = n;
1f2502eb 1545 return 0;
f71c6ace
AP
1546}
1547
6acb4ff3
AP
1548int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1549 size_t len)
e7f5b1cd
AP
1550{
1551 const union { long one; char little; } is_endian = {1};
1552 u64 alen = ctx->len.u[0]<<3;
1553 u64 clen = ctx->len.u[1]<<3;
d8d95832 1554#ifdef GCM_FUNCREF_4BIT
7af04002 1555 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1556#endif
e7f5b1cd 1557
9ddd859d 1558 if (ctx->mres || ctx->ares)
e7f5b1cd
AP
1559 GCM_MUL(ctx,Xi);
1560
1561 if (is_endian.little) {
1562#ifdef BSWAP8
1563 alen = BSWAP8(alen);
1564 clen = BSWAP8(clen);
1565#else
1566 u8 *p = ctx->len.c;
1567
1568 ctx->len.u[0] = alen;
1569 ctx->len.u[1] = clen;
1570
1571 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1572 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1573#endif
1574 }
1575
1576 ctx->Xi.u[0] ^= alen;
1577 ctx->Xi.u[1] ^= clen;
1578 GCM_MUL(ctx,Xi);
1579
1580 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1581 ctx->Xi.u[1] ^= ctx->EK0.u[1];
6acb4ff3
AP
1582
1583 if (tag && len<=sizeof(ctx->Xi))
1584 return memcmp(ctx->Xi.c,tag,len);
1585 else
1586 return -1;
1587}
1588
fd3dbc1d
DSH
1589void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1590{
1591 CRYPTO_gcm128_finish(ctx, NULL, 0);
1f2502eb 1592 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
fd3dbc1d
DSH
1593}
1594
6acb4ff3
AP
1595GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1596{
1597 GCM128_CONTEXT *ret;
1598
1599 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1600 CRYPTO_gcm128_init(ret,key,block);
1601
1602 return ret;
1603}
1604
1605void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1606{
1607 if (ctx) {
1608 OPENSSL_cleanse(ctx,sizeof(*ctx));
1609 OPENSSL_free(ctx);
1610 }
e7f5b1cd
AP
1611}
1612
1613#if defined(SELFTEST)
1614#include <stdio.h>
1615#include <openssl/aes.h>
1616
1617/* Test Case 1 */
1618static const u8 K1[16],
1619 *P1=NULL,
1620 *A1=NULL,
1621 IV1[12],
1622 *C1=NULL,
1623 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
a595baff 1624
e7f5b1cd
AP
1625/* Test Case 2 */
1626#define K2 K1
1627#define A2 A1
1628#define IV2 IV1
1629static const u8 P2[16],
1630 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1631 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1632
1633/* Test Case 3 */
1634#define A3 A2
1635static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1636 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1637 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1638 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1639 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1640 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1641 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1642 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1643 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1644 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
fb2d5a91 1645 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
e7f5b1cd
AP
1646
1647/* Test Case 4 */
1648#define K4 K3
1649#define IV4 IV3
1650static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1651 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1652 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1653 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1654 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1655 0xab,0xad,0xda,0xd2},
1656 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1657 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1658 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1659 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1660 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1661
1662/* Test Case 5 */
1663#define K5 K4
1664#define P5 P4
d8d95832
AP
1665#define A5 A4
1666static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
e7f5b1cd
AP
1667 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1668 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1669 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1670 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1671 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
a595baff 1672
e7f5b1cd
AP
1673/* Test Case 6 */
1674#define K6 K5
1675#define P6 P5
1676#define A6 A5
1677static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1678 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1679 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1680 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1681 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1682 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1683 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1684 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1685 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1686
1687/* Test Case 7 */
1688static const u8 K7[24],
1689 *P7=NULL,
1690 *A7=NULL,
1691 IV7[12],
1692 *C7=NULL,
1693 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1694
1695/* Test Case 8 */
1696#define K8 K7
1697#define IV8 IV7
1698#define A8 A7
1699static const u8 P8[16],
1700 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1701 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1702
1703/* Test Case 9 */
1704#define A9 A8
1705static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1706 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1707 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1708 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1709 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1710 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1711 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1712 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1713 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1714 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1715 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1716 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1717
1718/* Test Case 10 */
1719#define K10 K9
1720#define IV10 IV9
1721static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1722 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1723 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1724 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1725 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1726 0xab,0xad,0xda,0xd2},
1727 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1728 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1729 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1730 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1731 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1732
1733/* Test Case 11 */
1734#define K11 K10
1735#define P11 P10
1736#define A11 A10
1737static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1738 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1739 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1740 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1741 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1742 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1743
1744/* Test Case 12 */
1745#define K12 K11
1746#define P12 P11
1747#define A12 A11
1748static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1749 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1750 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1751 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1752 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1753 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1754 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1755 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1756 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1757
1758/* Test Case 13 */
1759static const u8 K13[32],
1760 *P13=NULL,
1761 *A13=NULL,
1762 IV13[12],
1763 *C13=NULL,
1764 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1765
1766/* Test Case 14 */
1767#define K14 K13
1768#define A14 A13
1769static const u8 P14[16],
1770 IV14[12],
1771 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1772 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1773
1774/* Test Case 15 */
1775#define A15 A14
1776static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1777 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1778 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1779 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1780 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1781 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1782 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1783 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1784 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1785 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1786 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1787 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1788
1789/* Test Case 16 */
1790#define K16 K15
1791#define IV16 IV15
1792static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1793 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1794 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1795 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1796 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1797 0xab,0xad,0xda,0xd2},
1798 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1799 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1800 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1801 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1802 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1803
1804/* Test Case 17 */
1805#define K17 K16
1806#define P17 P16
1807#define A17 A16
1808static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1809 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1810 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1811 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1812 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1813 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1814
1815/* Test Case 18 */
1816#define K18 K17
1817#define P18 P17
1818#define A18 A17
1819static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1820 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1821 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1822 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1823 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1824 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1825 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1826 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1827 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1828
273a8081
AP
1829/* Test Case 19 */
1830#define K19 K1
1831#define P19 P1
1832#define IV19 IV1
1833#define C19 C1
1834static const u8 A19[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1835 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1836 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1837 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55,
1838 0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1839 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1840 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1841 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1842 T19[]= {0x5f,0xea,0x79,0x3a,0x2d,0x6f,0x97,0x4d,0x37,0xe6,0x8e,0x0c,0xb8,0xff,0x94,0x92};
1843
1da5d302
AP
1844/* Test Case 20 */
1845#define K20 K1
1846#define A20 A1
1847static const u8 IV20[64]={0xff,0xff,0xff,0xff}, /* this results in 0xff in counter LSB */
1848 P20[288],
1849 C20[]= {0x56,0xb3,0x37,0x3c,0xa9,0xef,0x6e,0x4a,0x2b,0x64,0xfe,0x1e,0x9a,0x17,0xb6,0x14,
1850 0x25,0xf1,0x0d,0x47,0xa7,0x5a,0x5f,0xce,0x13,0xef,0xc6,0xbc,0x78,0x4a,0xf2,0x4f,
1851 0x41,0x41,0xbd,0xd4,0x8c,0xf7,0xc7,0x70,0x88,0x7a,0xfd,0x57,0x3c,0xca,0x54,0x18,
1852 0xa9,0xae,0xff,0xcd,0x7c,0x5c,0xed,0xdf,0xc6,0xa7,0x83,0x97,0xb9,0xa8,0x5b,0x49,
1853 0x9d,0xa5,0x58,0x25,0x72,0x67,0xca,0xab,0x2a,0xd0,0xb2,0x3c,0xa4,0x76,0xa5,0x3c,
1854 0xb1,0x7f,0xb4,0x1c,0x4b,0x8b,0x47,0x5c,0xb4,0xf3,0xf7,0x16,0x50,0x94,0xc2,0x29,
1855 0xc9,0xe8,0xc4,0xdc,0x0a,0x2a,0x5f,0xf1,0x90,0x3e,0x50,0x15,0x11,0x22,0x13,0x76,
1856 0xa1,0xcd,0xb8,0x36,0x4c,0x50,0x61,0xa2,0x0c,0xae,0x74,0xbc,0x4a,0xcd,0x76,0xce,
1857 0xb0,0xab,0xc9,0xfd,0x32,0x17,0xef,0x9f,0x8c,0x90,0xbe,0x40,0x2d,0xdf,0x6d,0x86,
1858 0x97,0xf4,0xf8,0x80,0xdf,0xf1,0x5b,0xfb,0x7a,0x6b,0x28,0x24,0x1e,0xc8,0xfe,0x18,
1859 0x3c,0x2d,0x59,0xe3,0xf9,0xdf,0xff,0x65,0x3c,0x71,0x26,0xf0,0xac,0xb9,0xe6,0x42,
1860 0x11,0xf4,0x2b,0xae,0x12,0xaf,0x46,0x2b,0x10,0x70,0xbe,0xf1,0xab,0x5e,0x36,0x06,
1861 0x87,0x2c,0xa1,0x0d,0xee,0x15,0xb3,0x24,0x9b,0x1a,0x1b,0x95,0x8f,0x23,0x13,0x4c,
1862 0x4b,0xcc,0xb7,0xd0,0x32,0x00,0xbc,0xe4,0x20,0xa2,0xf8,0xeb,0x66,0xdc,0xf3,0x64,
1863 0x4d,0x14,0x23,0xc1,0xb5,0x69,0x90,0x03,0xc1,0x3e,0xce,0xf4,0xbf,0x38,0xa3,0xb6,
1864 0x0e,0xed,0xc3,0x40,0x33,0xba,0xc1,0x90,0x27,0x83,0xdc,0x6d,0x89,0xe2,0xe7,0x74,
1865 0x18,0x8a,0x43,0x9c,0x7e,0xbc,0xc0,0x67,0x2d,0xbd,0xa4,0xdd,0xcf,0xb2,0x79,0x46,
1866 0x13,0xb0,0xbe,0x41,0x31,0x5e,0xf7,0x78,0x70,0x8a,0x70,0xee,0x7d,0x75,0x16,0x5c},
1867 T20[]= {0x8b,0x30,0x7f,0x6b,0x33,0x28,0x6d,0x0a,0xb0,0x26,0xa9,0xed,0x3f,0xe1,0xe8,0x5f};
1868
e7f5b1cd
AP
1869#define TEST_CASE(n) do { \
1870 u8 out[sizeof(P##n)]; \
1871 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1872 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1873 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
68e2586b 1874 memset(out,0,sizeof(out)); \
e7f5b1cd
AP
1875 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1876 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
6acb4ff3
AP
1877 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1878 (C##n && memcmp(out,C##n,sizeof(out)))) \
68e2586b 1879 ret++, printf ("encrypt test#%d failed.\n",n); \
e7f5b1cd 1880 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
68e2586b 1881 memset(out,0,sizeof(out)); \
e7f5b1cd
AP
1882 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1883 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
6acb4ff3
AP
1884 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1885 (P##n && memcmp(out,P##n,sizeof(out)))) \
1886 ret++, printf ("decrypt test#%d failed.\n",n); \
e7f5b1cd
AP
1887 } while(0)
1888
1889int main()
1890{
1891 GCM128_CONTEXT ctx;
1892 AES_KEY key;
1893 int ret=0;
1894
1895 TEST_CASE(1);
1896 TEST_CASE(2);
1897 TEST_CASE(3);
1898 TEST_CASE(4);
1899 TEST_CASE(5);
1900 TEST_CASE(6);
1901 TEST_CASE(7);
1902 TEST_CASE(8);
1903 TEST_CASE(9);
1904 TEST_CASE(10);
1905 TEST_CASE(11);
1906 TEST_CASE(12);
1907 TEST_CASE(13);
1908 TEST_CASE(14);
1909 TEST_CASE(15);
1910 TEST_CASE(16);
1911 TEST_CASE(17);
1912 TEST_CASE(18);
273a8081 1913 TEST_CASE(19);
1da5d302 1914 TEST_CASE(20);
e7f5b1cd 1915
a595baff 1916#ifdef OPENSSL_CPUID_OBJ
2262beef
AP
1917 {
1918 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1919 union { u64 u; u8 c[1024]; } buf;
c1f092d1 1920 int i;
2262beef
AP
1921
1922 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1923 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1924 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1925
1926 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1927 start = OPENSSL_rdtsc();
1928 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1929 gcm_t = OPENSSL_rdtsc() - start;
1930
1931 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
b68c1315 1932 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
2262beef
AP
1933 (block128_f)AES_encrypt);
1934 start = OPENSSL_rdtsc();
1935 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
b68c1315 1936 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
a595baff 1937 (block128_f)AES_encrypt);
2262beef
AP
1938 ctr_t = OPENSSL_rdtsc() - start;
1939
1940 printf("%.2f-%.2f=%.2f\n",
1941 gcm_t/(double)sizeof(buf),
1942 ctr_t/(double)sizeof(buf),
1943 (gcm_t-ctr_t)/(double)sizeof(buf));
a595baff 1944#ifdef GHASH
23a05fa0
AP
1945 {
1946 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1947 const u8 *inp,size_t len) = ctx.ghash;
1948
1949 GHASH((&ctx),buf.c,sizeof(buf));
a595baff 1950 start = OPENSSL_rdtsc();
8d1b199d 1951 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
a595baff 1952 gcm_t = OPENSSL_rdtsc() - start;
c1f092d1 1953 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
23a05fa0 1954 }
a595baff 1955#endif
2262beef 1956 }
a595baff 1957#endif
2262beef 1958
e7f5b1cd
AP
1959 return ret;
1960}
1961#endif