]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/gcm128.c
Run util/openssl-format-source -v -c .
[thirdparty/openssl.git] / crypto / modes / gcm128.c
CommitLineData
e7f5b1cd
AP
1/* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
0f113f3e 9 * notice, this list of conditions and the following disclaimer.
e7f5b1cd
AP
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
aa763c0f 50#include <openssl/crypto.h>
f472ec8c 51#include "modes_lcl.h"
e7f5b1cd
AP
52#include <string.h>
53
54#ifndef MODES_DEBUG
55# ifndef NDEBUG
56# define NDEBUG
57# endif
58#endif
59#include <assert.h>
60
f472ec8c
AP
61#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
62/* redefine, because alignment is ensured */
0f113f3e
MC
63# undef GETU32
64# define GETU32(p) BSWAP4(*(const u32 *)(p))
65# undef PUTU32
66# define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
67#endif
68
69#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
70#define REDUCE1BIT(V) do { \
71 if (sizeof(size_t)==8) { \
72 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
73 V.lo = (V.hi<<63)|(V.lo>>1); \
74 V.hi = (V.hi>>1 )^T; \
75 } \
76 else { \
77 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
78 V.lo = (V.hi<<63)|(V.lo>>1); \
79 V.hi = (V.hi>>1 )^((u64)T<<32); \
80 } \
c1f092d1
AP
81} while(0)
82
1d97c843 83/*-
d8d95832
AP
84 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
85 * never be set to 8. 8 is effectively reserved for testing purposes.
86 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
87 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
88 * whole spectrum of possible table driven implementations. Why? In
89 * non-"Shoup's" case memory access pattern is segmented in such manner,
90 * that it's trivial to see that cache timing information can reveal
91 * fair portion of intermediate hash value. Given that ciphertext is
92 * always available to attacker, it's possible for him to attempt to
93 * deduce secret parameter H and if successful, tamper with messages
94 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
95 * not as trivial, but there is no reason to believe that it's resistant
96 * to cache-timing attack. And the thing about "8-bit" implementation is
97 * that it consumes 16 (sixteen) times more memory, 4KB per individual
98 * key + 1KB shared. Well, on pros side it should be twice as fast as
99 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
100 * was observed to run ~75% faster, closer to 100% for commercial
101 * compilers... Yet "4-bit" procedure is preferred, because it's
102 * believed to provide better security-performance balance and adequate
103 * all-round performance. "All-round" refers to things like:
104 *
105 * - shorter setup time effectively improves overall timing for
106 * handling short messages;
107 * - larger table allocation can become unbearable because of VM
108 * subsystem penalties (for example on Windows large enough free
109 * results in VM working set trimming, meaning that consequent
110 * malloc would immediately incur working set expansion);
111 * - larger table has larger cache footprint, which can affect
112 * performance of other code paths (not necessarily even from same
113 * thread in Hyper-Threading world);
114 *
115 * Value of 1 is not appropriate for performance reasons.
116 */
0f113f3e 117#if TABLE_BITS==8
a595baff 118
e7f5b1cd
AP
119static void gcm_init_8bit(u128 Htable[256], u64 H[2])
120{
0f113f3e
MC
121 int i, j;
122 u128 V;
123
124 Htable[0].hi = 0;
125 Htable[0].lo = 0;
126 V.hi = H[0];
127 V.lo = H[1];
128
129 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
130 REDUCE1BIT(V);
131 Htable[i] = V;
132 }
133
134 for (i = 2; i < 256; i <<= 1) {
135 u128 *Hi = Htable + i, H0 = *Hi;
136 for (j = 1; j < i; ++j) {
137 Hi[j].hi = H0.hi ^ Htable[j].hi;
138 Hi[j].lo = H0.lo ^ Htable[j].lo;
139 }
140 }
e7f5b1cd
AP
141}
142
d8d95832 143static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
e7f5b1cd 144{
0f113f3e
MC
145 u128 Z = { 0, 0 };
146 const u8 *xi = (const u8 *)Xi + 15;
147 size_t rem, n = *xi;
148 const union {
149 long one;
150 char little;
151 } is_endian = {
152 1
153 };
154 static const size_t rem_8bit[256] = {
155 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
156 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
157 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
158 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
159 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
160 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
161 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
162 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
163 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
164 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
165 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
166 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
167 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
168 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
169 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
170 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
171 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
172 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
173 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
174 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
175 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
176 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
177 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
178 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
179 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
180 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
181 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
182 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
183 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
184 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
185 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
186 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
187 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
188 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
189 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
190 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
191 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
192 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
193 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
194 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
195 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
196 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
197 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
198 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
199 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
200 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
201 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
202 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
203 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
204 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
205 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
206 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
207 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
208 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
209 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
210 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
211 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
212 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
213 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
214 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
215 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
216 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
217 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
218 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
219 };
220
221 while (1) {
222 Z.hi ^= Htable[n].hi;
223 Z.lo ^= Htable[n].lo;
224
225 if ((u8 *)Xi == xi)
226 break;
227
228 n = *(--xi);
229
230 rem = (size_t)Z.lo & 0xff;
231 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
232 Z.hi = (Z.hi >> 8);
233 if (sizeof(size_t) == 8)
234 Z.hi ^= rem_8bit[rem];
235 else
236 Z.hi ^= (u64)rem_8bit[rem] << 32;
237 }
238
239 if (is_endian.little) {
240# ifdef BSWAP8
241 Xi[0] = BSWAP8(Z.hi);
242 Xi[1] = BSWAP8(Z.lo);
243# else
244 u8 *p = (u8 *)Xi;
245 u32 v;
246 v = (u32)(Z.hi >> 32);
247 PUTU32(p, v);
248 v = (u32)(Z.hi);
249 PUTU32(p + 4, v);
250 v = (u32)(Z.lo >> 32);
251 PUTU32(p + 8, v);
252 v = (u32)(Z.lo);
253 PUTU32(p + 12, v);
254# endif
255 } else {
256 Xi[0] = Z.hi;
257 Xi[1] = Z.lo;
258 }
e7f5b1cd 259}
e7f5b1cd 260
0f113f3e
MC
261# define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
262
263#elif TABLE_BITS==4
2262beef 264
e7f5b1cd
AP
265static void gcm_init_4bit(u128 Htable[16], u64 H[2])
266{
0f113f3e
MC
267 u128 V;
268# if defined(OPENSSL_SMALL_FOOTPRINT)
269 int i;
270# endif
e7f5b1cd 271
0f113f3e
MC
272 Htable[0].hi = 0;
273 Htable[0].lo = 0;
274 V.hi = H[0];
275 V.lo = H[1];
276
277# if defined(OPENSSL_SMALL_FOOTPRINT)
278 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
279 REDUCE1BIT(V);
280 Htable[i] = V;
281 }
282
283 for (i = 2; i < 16; i <<= 1) {
284 u128 *Hi = Htable + i;
285 int j;
286 for (V = *Hi, j = 1; j < i; ++j) {
287 Hi[j].hi = V.hi ^ Htable[j].hi;
288 Hi[j].lo = V.lo ^ Htable[j].lo;
289 }
290 }
291# else
292 Htable[8] = V;
293 REDUCE1BIT(V);
294 Htable[4] = V;
295 REDUCE1BIT(V);
296 Htable[2] = V;
297 REDUCE1BIT(V);
298 Htable[1] = V;
299 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
300 V = Htable[4];
301 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
302 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
303 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
304 V = Htable[8];
305 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
306 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
307 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
308 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
309 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
310 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
311 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
312# endif
313# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
314 /*
315 * ARM assembler expects specific dword order in Htable.
316 */
317 {
318 int j;
319 const union {
320 long one;
321 char little;
322 } is_endian = {
323 1
324 };
325
326 if (is_endian.little)
327 for (j = 0; j < 16; ++j) {
328 V = Htable[j];
329 Htable[j].hi = V.lo;
330 Htable[j].lo = V.hi;
331 } else
332 for (j = 0; j < 16; ++j) {
333 V = Htable[j];
334 Htable[j].hi = V.lo << 32 | V.lo >> 32;
335 Htable[j].lo = V.hi << 32 | V.hi >> 32;
336 }
337 }
338# endif
e7f5b1cd
AP
339}
340
0f113f3e 341# ifndef GHASH_ASM
2262beef 342static const size_t rem_4bit[16] = {
0f113f3e
MC
343 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
344 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
345 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
346 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
347};
2262beef 348
4f39edbf 349static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
e7f5b1cd 350{
0f113f3e
MC
351 u128 Z;
352 int cnt = 15;
353 size_t rem, nlo, nhi;
354 const union {
355 long one;
356 char little;
357 } is_endian = {
358 1
359 };
360
361 nlo = ((const u8 *)Xi)[15];
362 nhi = nlo >> 4;
363 nlo &= 0xf;
364
365 Z.hi = Htable[nlo].hi;
366 Z.lo = Htable[nlo].lo;
367
368 while (1) {
369 rem = (size_t)Z.lo & 0xf;
370 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
371 Z.hi = (Z.hi >> 4);
372 if (sizeof(size_t) == 8)
373 Z.hi ^= rem_4bit[rem];
374 else
375 Z.hi ^= (u64)rem_4bit[rem] << 32;
376
377 Z.hi ^= Htable[nhi].hi;
378 Z.lo ^= Htable[nhi].lo;
379
380 if (--cnt < 0)
381 break;
382
383 nlo = ((const u8 *)Xi)[cnt];
384 nhi = nlo >> 4;
385 nlo &= 0xf;
386
387 rem = (size_t)Z.lo & 0xf;
388 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
389 Z.hi = (Z.hi >> 4);
390 if (sizeof(size_t) == 8)
391 Z.hi ^= rem_4bit[rem];
392 else
393 Z.hi ^= (u64)rem_4bit[rem] << 32;
394
395 Z.hi ^= Htable[nlo].hi;
396 Z.lo ^= Htable[nlo].lo;
397 }
398
399 if (is_endian.little) {
400# ifdef BSWAP8
401 Xi[0] = BSWAP8(Z.hi);
402 Xi[1] = BSWAP8(Z.lo);
403# else
404 u8 *p = (u8 *)Xi;
405 u32 v;
406 v = (u32)(Z.hi >> 32);
407 PUTU32(p, v);
408 v = (u32)(Z.hi);
409 PUTU32(p + 4, v);
410 v = (u32)(Z.lo >> 32);
411 PUTU32(p + 8, v);
412 v = (u32)(Z.lo);
413 PUTU32(p + 12, v);
414# endif
415 } else {
416 Xi[0] = Z.hi;
417 Xi[1] = Z.lo;
418 }
2262beef
AP
419}
420
0f113f3e 421# if !defined(OPENSSL_SMALL_FOOTPRINT)
2262beef
AP
422/*
423 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
a595baff
AP
424 * details... Compiler-generated code doesn't seem to give any
425 * performance improvement, at least not on x86[_64]. It's here
426 * mostly as reference and a placeholder for possible future
427 * non-trivial optimization[s]...
2262beef 428 */
0f113f3e
MC
429static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
430 const u8 *inp, size_t len)
2262beef
AP
431{
432 u128 Z;
433 int cnt;
434 size_t rem, nlo, nhi;
0f113f3e
MC
435 const union {
436 long one;
437 char little;
438 } is_endian = {
439 1
440 };
441
442# if 1
2262beef 443 do {
0f113f3e
MC
444 cnt = 15;
445 nlo = ((const u8 *)Xi)[15];
446 nlo ^= inp[15];
447 nhi = nlo >> 4;
448 nlo &= 0xf;
449
450 Z.hi = Htable[nlo].hi;
451 Z.lo = Htable[nlo].lo;
452
453 while (1) {
454 rem = (size_t)Z.lo & 0xf;
455 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
456 Z.hi = (Z.hi >> 4);
457 if (sizeof(size_t) == 8)
458 Z.hi ^= rem_4bit[rem];
459 else
460 Z.hi ^= (u64)rem_4bit[rem] << 32;
461
462 Z.hi ^= Htable[nhi].hi;
463 Z.lo ^= Htable[nhi].lo;
464
465 if (--cnt < 0)
466 break;
467
468 nlo = ((const u8 *)Xi)[cnt];
469 nlo ^= inp[cnt];
470 nhi = nlo >> 4;
471 nlo &= 0xf;
472
473 rem = (size_t)Z.lo & 0xf;
474 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
475 Z.hi = (Z.hi >> 4);
476 if (sizeof(size_t) == 8)
477 Z.hi ^= rem_4bit[rem];
478 else
479 Z.hi ^= (u64)rem_4bit[rem] << 32;
480
481 Z.hi ^= Htable[nlo].hi;
482 Z.lo ^= Htable[nlo].lo;
483 }
484# else
e747f4d4
AP
485 /*
486 * Extra 256+16 bytes per-key plus 512 bytes shared tables
487 * [should] give ~50% improvement... One could have PACK()-ed
6acb4ff3
AP
488 * the rem_8bit even here, but the priority is to minimize
489 * cache footprint...
0f113f3e
MC
490 */
491 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
492 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
e747f4d4 493 static const unsigned short rem_8bit[256] = {
0f113f3e
MC
494 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
495 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
496 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
497 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
498 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
499 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
500 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
501 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
502 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
503 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
504 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
505 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
506 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
507 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
508 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
509 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
510 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
511 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
512 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
513 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
514 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
515 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
516 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
517 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
518 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
519 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
520 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
521 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
522 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
523 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
524 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
525 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
526 };
e747f4d4
AP
527 /*
528 * This pre-processing phase slows down procedure by approximately
529 * same time as it makes each loop spin faster. In other words
530 * single block performance is approximately same as straightforward
531 * "4-bit" implementation, and then it goes only faster...
532 */
0f113f3e
MC
533 for (cnt = 0; cnt < 16; ++cnt) {
534 Z.hi = Htable[cnt].hi;
535 Z.lo = Htable[cnt].lo;
536 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
537 Hshr4[cnt].hi = (Z.hi >> 4);
538 Hshl4[cnt] = (u8)(Z.lo << 4);
e747f4d4
AP
539 }
540
541 do {
0f113f3e
MC
542 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
543 nlo = ((const u8 *)Xi)[cnt];
544 nlo ^= inp[cnt];
545 nhi = nlo >> 4;
546 nlo &= 0xf;
e747f4d4 547
0f113f3e
MC
548 Z.hi ^= Htable[nlo].hi;
549 Z.lo ^= Htable[nlo].lo;
e747f4d4 550
0f113f3e 551 rem = (size_t)Z.lo & 0xff;
e747f4d4 552
0f113f3e
MC
553 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
554 Z.hi = (Z.hi >> 8);
e747f4d4 555
0f113f3e
MC
556 Z.hi ^= Hshr4[nhi].hi;
557 Z.lo ^= Hshr4[nhi].lo;
558 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
559 }
e747f4d4 560
0f113f3e
MC
561 nlo = ((const u8 *)Xi)[0];
562 nlo ^= inp[0];
563 nhi = nlo >> 4;
564 nlo &= 0xf;
e747f4d4 565
0f113f3e
MC
566 Z.hi ^= Htable[nlo].hi;
567 Z.lo ^= Htable[nlo].lo;
e747f4d4 568
0f113f3e 569 rem = (size_t)Z.lo & 0xf;
e747f4d4 570
0f113f3e
MC
571 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
572 Z.hi = (Z.hi >> 4);
e747f4d4 573
0f113f3e
MC
574 Z.hi ^= Htable[nhi].hi;
575 Z.lo ^= Htable[nhi].lo;
576 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
577# endif
e7f5b1cd 578
0f113f3e
MC
579 if (is_endian.little) {
580# ifdef BSWAP8
581 Xi[0] = BSWAP8(Z.hi);
582 Xi[1] = BSWAP8(Z.lo);
583# else
584 u8 *p = (u8 *)Xi;
585 u32 v;
586 v = (u32)(Z.hi >> 32);
587 PUTU32(p, v);
588 v = (u32)(Z.hi);
589 PUTU32(p + 4, v);
590 v = (u32)(Z.lo >> 32);
591 PUTU32(p + 8, v);
592 v = (u32)(Z.lo);
593 PUTU32(p + 12, v);
594# endif
595 } else {
596 Xi[0] = Z.hi;
597 Xi[1] = Z.lo;
598 }
599 } while (inp += 16, len -= 16);
e7f5b1cd 600}
0f113f3e
MC
601# endif
602# else
603void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
604void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
605 size_t len);
606# endif
2262beef 607
0f113f3e
MC
608# define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
609# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
610# define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
611/*
612 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
613 * effect. In other words idea is to hash data while it's still in L1 cache
614 * after encryption pass...
615 */
616# define GHASH_CHUNK (3*1024)
617# endif
2262beef 618
0f113f3e 619#else /* TABLE_BITS */
e7f5b1cd 620
0f113f3e 621static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
e7f5b1cd 622{
0f113f3e
MC
623 u128 V, Z = { 0, 0 };
624 long X;
625 int i, j;
626 const long *xi = (const long *)Xi;
627 const union {
628 long one;
629 char little;
630 } is_endian = {
631 1
632 };
633
634 V.hi = H[0]; /* H is in host byte order, no byte swapping */
635 V.lo = H[1];
636
637 for (j = 0; j < 16 / sizeof(long); ++j) {
638 if (is_endian.little) {
639 if (sizeof(long) == 8) {
640# ifdef BSWAP8
641 X = (long)(BSWAP8(xi[j]));
642# else
643 const u8 *p = (const u8 *)(xi + j);
644 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
645# endif
646 } else {
647 const u8 *p = (const u8 *)(xi + j);
648 X = (long)GETU32(p);
649 }
650 } else
651 X = xi[j];
652
653 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
654 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
655 Z.hi ^= V.hi & M;
656 Z.lo ^= V.lo & M;
657
658 REDUCE1BIT(V);
659 }
660 }
661
662 if (is_endian.little) {
663# ifdef BSWAP8
664 Xi[0] = BSWAP8(Z.hi);
665 Xi[1] = BSWAP8(Z.lo);
666# else
667 u8 *p = (u8 *)Xi;
668 u32 v;
669 v = (u32)(Z.hi >> 32);
670 PUTU32(p, v);
671 v = (u32)(Z.hi);
672 PUTU32(p + 4, v);
673 v = (u32)(Z.lo >> 32);
674 PUTU32(p + 8, v);
675 v = (u32)(Z.lo);
676 PUTU32(p + 12, v);
677# endif
678 } else {
679 Xi[0] = Z.hi;
680 Xi[1] = Z.lo;
681 }
e7f5b1cd 682}
0f113f3e
MC
683
684# define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
a595baff 685
e7f5b1cd
AP
686#endif
687
0f113f3e
MC
688#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
689# if !defined(I386_ONLY) && \
690 (defined(__i386) || defined(__i386__) || \
691 defined(__x86_64) || defined(__x86_64__) || \
692 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
1e863180
AP
693# define GHASH_ASM_X86_OR_64
694# define GCM_FUNCREF_4BIT
c1f092d1
AP
695extern unsigned int OPENSSL_ia32cap_P[2];
696
0f113f3e
MC
697void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
698void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
699void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
700 size_t len);
c1f092d1 701
0f113f3e
MC
702# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
703# define gcm_init_avx gcm_init_clmul
704# define gcm_gmult_avx gcm_gmult_clmul
705# define gcm_ghash_avx gcm_ghash_clmul
706# else
707void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
708void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
709void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
710 size_t len);
711# endif
1da5d302 712
0f113f3e 713# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
1e863180 714# define GHASH_ASM_X86
0f113f3e
MC
715void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
716void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
717 size_t len);
c1f092d1 718
0f113f3e
MC
719void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
720void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
721 size_t len);
1e863180 722# endif
82741e9c 723# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
1e863180 724# include "arm_arch.h"
c1669e1c 725# if __ARM_MAX_ARCH__>=7
1e863180
AP
726# define GHASH_ASM_ARM
727# define GCM_FUNCREF_4BIT
0f113f3e 728# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
82741e9c 729# if defined(__arm__) || defined(__arm)
0f113f3e 730# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
82741e9c 731# endif
0f113f3e
MC
732void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
733void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
734void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
735 size_t len);
736void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
737void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
738void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
739 size_t len);
1e863180 740# endif
23328d4b
AP
741# elif defined(__sparc__) || defined(__sparc)
742# include "sparc_arch.h"
743# define GHASH_ASM_SPARC
744# define GCM_FUNCREF_4BIT
745extern unsigned int OPENSSL_sparcv9cap_P[];
0f113f3e
MC
746void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
747void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
748void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
749 size_t len);
750# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
0e716d92
AP
751# include "ppc_arch.h"
752# define GHASH_ASM_PPC
753# define GCM_FUNCREF_4BIT
0f113f3e
MC
754void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
755void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
756void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
757 size_t len);
c1f092d1 758# endif
c1f092d1
AP
759#endif
760
7af04002
AP
761#ifdef GCM_FUNCREF_4BIT
762# undef GCM_MUL
0f113f3e 763# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
7af04002
AP
764# ifdef GHASH
765# undef GHASH
0f113f3e 766# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
7af04002
AP
767# endif
768#endif
769
0f113f3e 770void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
e7f5b1cd 771{
0f113f3e
MC
772 const union {
773 long one;
774 char little;
775 } is_endian = {
776 1
777 };
e7f5b1cd 778
0f113f3e
MC
779 memset(ctx, 0, sizeof(*ctx));
780 ctx->block = block;
781 ctx->key = key;
e7f5b1cd 782
0f113f3e 783 (*block) (ctx->H.c, ctx->H.c, key);
e7f5b1cd 784
0f113f3e
MC
785 if (is_endian.little) {
786 /* H is stored in host byte order */
e7f5b1cd 787#ifdef BSWAP8
0f113f3e
MC
788 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
789 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
e7f5b1cd 790#else
0f113f3e
MC
791 u8 *p = ctx->H.c;
792 u64 hi, lo;
793 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
794 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
795 ctx->H.u[0] = hi;
796 ctx->H.u[1] = lo;
e7f5b1cd 797#endif
0f113f3e
MC
798 }
799#if TABLE_BITS==8
800 gcm_init_8bit(ctx->Htable, ctx->H.u);
801#elif TABLE_BITS==4
802# if defined(GHASH_ASM_X86_OR_64)
803# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
804 if (OPENSSL_ia32cap_P[0] & (1 << 24) && /* check FXSR bit */
805 OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
806 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
807 gcm_init_avx(ctx->Htable, ctx->H.u);
808 ctx->gmult = gcm_gmult_avx;
809 ctx->ghash = gcm_ghash_avx;
810 } else {
811 gcm_init_clmul(ctx->Htable, ctx->H.u);
812 ctx->gmult = gcm_gmult_clmul;
813 ctx->ghash = gcm_ghash_clmul;
814 }
815 return;
816 }
a6d915e0 817# endif
0f113f3e
MC
818 gcm_init_4bit(ctx->Htable, ctx->H.u);
819# if defined(GHASH_ASM_X86) /* x86 only */
820# if defined(OPENSSL_IA32_SSE2)
821 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
98909c1d 822# else
0f113f3e 823 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
98909c1d 824# endif
0f113f3e
MC
825 ctx->gmult = gcm_gmult_4bit_mmx;
826 ctx->ghash = gcm_ghash_4bit_mmx;
827 } else {
828 ctx->gmult = gcm_gmult_4bit_x86;
829 ctx->ghash = gcm_ghash_4bit_x86;
830 }
c1f092d1 831# else
0f113f3e
MC
832 ctx->gmult = gcm_gmult_4bit;
833 ctx->ghash = gcm_ghash_4bit;
c1f092d1 834# endif
0f113f3e 835# elif defined(GHASH_ASM_ARM)
82741e9c 836# ifdef PMULL_CAPABLE
0f113f3e
MC
837 if (PMULL_CAPABLE) {
838 gcm_init_v8(ctx->Htable, ctx->H.u);
839 ctx->gmult = gcm_gmult_v8;
840 ctx->ghash = gcm_ghash_v8;
841 } else
82741e9c
AP
842# endif
843# ifdef NEON_CAPABLE
0f113f3e
MC
844 if (NEON_CAPABLE) {
845 gcm_init_neon(ctx->Htable, ctx->H.u);
846 ctx->gmult = gcm_gmult_neon;
847 ctx->ghash = gcm_ghash_neon;
848 } else
82741e9c 849# endif
0f113f3e
MC
850 {
851 gcm_init_4bit(ctx->Htable, ctx->H.u);
852 ctx->gmult = gcm_gmult_4bit;
853 ctx->ghash = gcm_ghash_4bit;
854 }
855# elif defined(GHASH_ASM_SPARC)
856 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
857 gcm_init_vis3(ctx->Htable, ctx->H.u);
858 ctx->gmult = gcm_gmult_vis3;
859 ctx->ghash = gcm_ghash_vis3;
860 } else {
861 gcm_init_4bit(ctx->Htable, ctx->H.u);
862 ctx->gmult = gcm_gmult_4bit;
863 ctx->ghash = gcm_ghash_4bit;
864 }
865# elif defined(GHASH_ASM_PPC)
866 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
867 gcm_init_p8(ctx->Htable, ctx->H.u);
868 ctx->gmult = gcm_gmult_p8;
869 ctx->ghash = gcm_ghash_p8;
870 } else {
871 gcm_init_4bit(ctx->Htable, ctx->H.u);
872 ctx->gmult = gcm_gmult_4bit;
873 ctx->ghash = gcm_ghash_4bit;
874 }
c1f092d1 875# else
0f113f3e 876 gcm_init_4bit(ctx->Htable, ctx->H.u);
c1f092d1 877# endif
a595baff 878#endif
e7f5b1cd
AP
879}
880
0f113f3e
MC
881void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
882 size_t len)
e7f5b1cd 883{
0f113f3e
MC
884 const union {
885 long one;
886 char little;
887 } is_endian = {
888 1
889 };
890 unsigned int ctr;
d8d95832 891#ifdef GCM_FUNCREF_4BIT
0f113f3e
MC
892 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
893#endif
894
895 ctx->Yi.u[0] = 0;
896 ctx->Yi.u[1] = 0;
897 ctx->Xi.u[0] = 0;
898 ctx->Xi.u[1] = 0;
899 ctx->len.u[0] = 0; /* AAD length */
900 ctx->len.u[1] = 0; /* message length */
901 ctx->ares = 0;
902 ctx->mres = 0;
903
904 if (len == 12) {
905 memcpy(ctx->Yi.c, iv, 12);
906 ctx->Yi.c[15] = 1;
907 ctr = 1;
908 } else {
909 size_t i;
910 u64 len0 = len;
911
912 while (len >= 16) {
913 for (i = 0; i < 16; ++i)
914 ctx->Yi.c[i] ^= iv[i];
915 GCM_MUL(ctx, Yi);
916 iv += 16;
917 len -= 16;
918 }
919 if (len) {
920 for (i = 0; i < len; ++i)
921 ctx->Yi.c[i] ^= iv[i];
922 GCM_MUL(ctx, Yi);
923 }
924 len0 <<= 3;
925 if (is_endian.little) {
e7f5b1cd 926#ifdef BSWAP8
0f113f3e 927 ctx->Yi.u[1] ^= BSWAP8(len0);
e7f5b1cd 928#else
0f113f3e
MC
929 ctx->Yi.c[8] ^= (u8)(len0 >> 56);
930 ctx->Yi.c[9] ^= (u8)(len0 >> 48);
931 ctx->Yi.c[10] ^= (u8)(len0 >> 40);
932 ctx->Yi.c[11] ^= (u8)(len0 >> 32);
933 ctx->Yi.c[12] ^= (u8)(len0 >> 24);
934 ctx->Yi.c[13] ^= (u8)(len0 >> 16);
935 ctx->Yi.c[14] ^= (u8)(len0 >> 8);
936 ctx->Yi.c[15] ^= (u8)(len0);
e7f5b1cd 937#endif
0f113f3e
MC
938 } else
939 ctx->Yi.u[1] ^= len0;
e7f5b1cd 940
0f113f3e 941 GCM_MUL(ctx, Yi);
e7f5b1cd 942
0f113f3e 943 if (is_endian.little)
997d1aac 944#ifdef BSWAP4
0f113f3e 945 ctr = BSWAP4(ctx->Yi.d[3]);
997d1aac 946#else
0f113f3e 947 ctr = GETU32(ctx->Yi.c + 12);
997d1aac 948#endif
0f113f3e
MC
949 else
950 ctr = ctx->Yi.d[3];
951 }
e7f5b1cd 952
0f113f3e
MC
953 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
954 ++ctr;
955 if (is_endian.little)
997d1aac 956#ifdef BSWAP4
0f113f3e 957 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 958#else
0f113f3e 959 PUTU32(ctx->Yi.c + 12, ctr);
997d1aac 960#endif
0f113f3e
MC
961 else
962 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
963}
964
0f113f3e
MC
965int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
966 size_t len)
e7f5b1cd 967{
0f113f3e
MC
968 size_t i;
969 unsigned int n;
970 u64 alen = ctx->len.u[0];
d8d95832 971#ifdef GCM_FUNCREF_4BIT
0f113f3e 972 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
d8d95832 973# ifdef GHASH
0f113f3e
MC
974 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
975 const u8 *inp, size_t len) = ctx->ghash;
d8d95832
AP
976# endif
977#endif
e7f5b1cd 978
0f113f3e
MC
979 if (ctx->len.u[1])
980 return -2;
981
982 alen += len;
983 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
984 return -1;
985 ctx->len.u[0] = alen;
986
987 n = ctx->ares;
988 if (n) {
989 while (n && len) {
990 ctx->Xi.c[n] ^= *(aad++);
991 --len;
992 n = (n + 1) % 16;
993 }
994 if (n == 0)
995 GCM_MUL(ctx, Xi);
996 else {
997 ctx->ares = n;
998 return 0;
999 }
1000 }
2262beef 1001#ifdef GHASH
0f113f3e
MC
1002 if ((i = (len & (size_t)-16))) {
1003 GHASH(ctx, aad, i);
1004 aad += i;
1005 len -= i;
1006 }
2262beef 1007#else
0f113f3e
MC
1008 while (len >= 16) {
1009 for (i = 0; i < 16; ++i)
1010 ctx->Xi.c[i] ^= aad[i];
1011 GCM_MUL(ctx, Xi);
1012 aad += 16;
1013 len -= 16;
1014 }
2262beef 1015#endif
0f113f3e
MC
1016 if (len) {
1017 n = (unsigned int)len;
1018 for (i = 0; i < len; ++i)
1019 ctx->Xi.c[i] ^= aad[i];
1020 }
b68c1315 1021
0f113f3e
MC
1022 ctx->ares = n;
1023 return 0;
e7f5b1cd
AP
1024}
1025
1f2502eb 1026int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
0f113f3e
MC
1027 const unsigned char *in, unsigned char *out,
1028 size_t len)
e7f5b1cd 1029{
0f113f3e
MC
1030 const union {
1031 long one;
1032 char little;
1033 } is_endian = {
1034 1
1035 };
1036 unsigned int n, ctr;
1037 size_t i;
1038 u64 mlen = ctx->len.u[1];
1039 block128_f block = ctx->block;
1040 void *key = ctx->key;
d8d95832 1041#ifdef GCM_FUNCREF_4BIT
0f113f3e 1042 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
d8d95832 1043# ifdef GHASH
0f113f3e
MC
1044 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1045 const u8 *inp, size_t len) = ctx->ghash;
d8d95832
AP
1046# endif
1047#endif
1f2502eb
AP
1048
1049#if 0
0f113f3e 1050 n = (unsigned int)mlen % 16; /* alternative to ctx->mres */
997d1aac 1051#endif
0f113f3e
MC
1052 mlen += len;
1053 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1054 return -1;
1055 ctx->len.u[1] = mlen;
e7f5b1cd 1056
0f113f3e
MC
1057 if (ctx->ares) {
1058 /* First call to encrypt finalizes GHASH(AAD) */
1059 GCM_MUL(ctx, Xi);
1060 ctx->ares = 0;
1061 }
96a4cf8c 1062
0f113f3e 1063 if (is_endian.little)
997d1aac 1064#ifdef BSWAP4
0f113f3e 1065 ctr = BSWAP4(ctx->Yi.d[3]);
997d1aac 1066#else
0f113f3e 1067 ctr = GETU32(ctx->Yi.c + 12);
997d1aac 1068#endif
0f113f3e
MC
1069 else
1070 ctr = ctx->Yi.d[3];
96a4cf8c 1071
0f113f3e
MC
1072 n = ctx->mres;
1073#if !defined(OPENSSL_SMALL_FOOTPRINT)
1074 if (16 % sizeof(size_t) == 0) { /* always true actually */
1075 do {
1076 if (n) {
1077 while (n && len) {
1078 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1079 --len;
1080 n = (n + 1) % 16;
1081 }
1082 if (n == 0)
1083 GCM_MUL(ctx, Xi);
1084 else {
1085 ctx->mres = n;
1086 return 0;
1087 }
1088 }
1089# if defined(STRICT_ALIGNMENT)
1090 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1091 break;
1092# endif
1093# if defined(GHASH) && defined(GHASH_CHUNK)
1094 while (len >= GHASH_CHUNK) {
1095 size_t j = GHASH_CHUNK;
1096
1097 while (j) {
1098 size_t *out_t = (size_t *)out;
1099 const size_t *in_t = (const size_t *)in;
1100
1101 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1102 ++ctr;
1103 if (is_endian.little)
1104# ifdef BSWAP4
1105 ctx->Yi.d[3] = BSWAP4(ctr);
1106# else
1107 PUTU32(ctx->Yi.c + 12, ctr);
1108# endif
1109 else
1110 ctx->Yi.d[3] = ctr;
1111 for (i = 0; i < 16 / sizeof(size_t); ++i)
1112 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1113 out += 16;
1114 in += 16;
1115 j -= 16;
1116 }
1117 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1118 len -= GHASH_CHUNK;
1119 }
1120 if ((i = (len & (size_t)-16))) {
1121 size_t j = i;
1122
1123 while (len >= 16) {
1124 size_t *out_t = (size_t *)out;
1125 const size_t *in_t = (const size_t *)in;
1126
1127 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1128 ++ctr;
1129 if (is_endian.little)
1130# ifdef BSWAP4
1131 ctx->Yi.d[3] = BSWAP4(ctr);
1132# else
1133 PUTU32(ctx->Yi.c + 12, ctr);
1134# endif
1135 else
1136 ctx->Yi.d[3] = ctr;
1137 for (i = 0; i < 16 / sizeof(size_t); ++i)
1138 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1139 out += 16;
1140 in += 16;
1141 len -= 16;
1142 }
1143 GHASH(ctx, out - j, j);
1144 }
1145# else
1146 while (len >= 16) {
1147 size_t *out_t = (size_t *)out;
1148 const size_t *in_t = (const size_t *)in;
1149
1150 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1151 ++ctr;
1152 if (is_endian.little)
1153# ifdef BSWAP4
1154 ctx->Yi.d[3] = BSWAP4(ctr);
1155# else
1156 PUTU32(ctx->Yi.c + 12, ctr);
1157# endif
1158 else
1159 ctx->Yi.d[3] = ctr;
1160 for (i = 0; i < 16 / sizeof(size_t); ++i)
1161 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1162 GCM_MUL(ctx, Xi);
1163 out += 16;
1164 in += 16;
1165 len -= 16;
1166 }
1167# endif
1168 if (len) {
1169 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1170 ++ctr;
1171 if (is_endian.little)
1172# ifdef BSWAP4
1173 ctx->Yi.d[3] = BSWAP4(ctr);
1174# else
1175 PUTU32(ctx->Yi.c + 12, ctr);
1176# endif
1177 else
1178 ctx->Yi.d[3] = ctr;
1179 while (len--) {
1180 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1181 ++n;
1182 }
1183 }
1184
1185 ctx->mres = n;
1186 return 0;
1187 } while (0);
1188 }
e7f5b1cd 1189#endif
0f113f3e
MC
1190 for (i = 0; i < len; ++i) {
1191 if (n == 0) {
1192 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1193 ++ctr;
1194 if (is_endian.little)
997d1aac 1195#ifdef BSWAP4
0f113f3e 1196 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 1197#else
0f113f3e
MC
1198 PUTU32(ctx->Yi.c + 12, ctr);
1199#endif
1200 else
1201 ctx->Yi.d[3] = ctr;
1202 }
1203 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1204 n = (n + 1) % 16;
1205 if (n == 0)
1206 GCM_MUL(ctx, Xi);
1207 }
1208
1209 ctx->mres = n;
1210 return 0;
e7f5b1cd
AP
1211}
1212
1f2502eb 1213int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
0f113f3e
MC
1214 const unsigned char *in, unsigned char *out,
1215 size_t len)
e7f5b1cd 1216{
0f113f3e
MC
1217 const union {
1218 long one;
1219 char little;
1220 } is_endian = {
1221 1
1222 };
1223 unsigned int n, ctr;
1224 size_t i;
1225 u64 mlen = ctx->len.u[1];
1226 block128_f block = ctx->block;
1227 void *key = ctx->key;
d8d95832 1228#ifdef GCM_FUNCREF_4BIT
0f113f3e 1229 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
d8d95832 1230# ifdef GHASH
0f113f3e
MC
1231 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1232 const u8 *inp, size_t len) = ctx->ghash;
d8d95832
AP
1233# endif
1234#endif
1f2502eb 1235
0f113f3e
MC
1236 mlen += len;
1237 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1238 return -1;
1239 ctx->len.u[1] = mlen;
e7f5b1cd 1240
0f113f3e
MC
1241 if (ctx->ares) {
1242 /* First call to decrypt finalizes GHASH(AAD) */
1243 GCM_MUL(ctx, Xi);
1244 ctx->ares = 0;
1245 }
b68c1315 1246
0f113f3e 1247 if (is_endian.little)
997d1aac 1248#ifdef BSWAP4
0f113f3e 1249 ctr = BSWAP4(ctx->Yi.d[3]);
997d1aac 1250#else
0f113f3e 1251 ctr = GETU32(ctx->Yi.c + 12);
997d1aac 1252#endif
0f113f3e
MC
1253 else
1254 ctr = ctx->Yi.d[3];
e7f5b1cd 1255
0f113f3e 1256 n = ctx->mres;
e7f5b1cd 1257#if !defined(OPENSSL_SMALL_FOOTPRINT)
0f113f3e
MC
1258 if (16 % sizeof(size_t) == 0) { /* always true actually */
1259 do {
1260 if (n) {
1261 while (n && len) {
1262 u8 c = *(in++);
1263 *(out++) = c ^ ctx->EKi.c[n];
1264 ctx->Xi.c[n] ^= c;
1265 --len;
1266 n = (n + 1) % 16;
1267 }
1268 if (n == 0)
1269 GCM_MUL(ctx, Xi);
1270 else {
1271 ctx->mres = n;
1272 return 0;
1273 }
1274 }
1275# if defined(STRICT_ALIGNMENT)
1276 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1277 break;
1278# endif
1279# if defined(GHASH) && defined(GHASH_CHUNK)
1280 while (len >= GHASH_CHUNK) {
1281 size_t j = GHASH_CHUNK;
1282
1283 GHASH(ctx, in, GHASH_CHUNK);
1284 while (j) {
1285 size_t *out_t = (size_t *)out;
1286 const size_t *in_t = (const size_t *)in;
1287
1288 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1289 ++ctr;
1290 if (is_endian.little)
1291# ifdef BSWAP4
1292 ctx->Yi.d[3] = BSWAP4(ctr);
1293# else
1294 PUTU32(ctx->Yi.c + 12, ctr);
1295# endif
1296 else
1297 ctx->Yi.d[3] = ctr;
1298 for (i = 0; i < 16 / sizeof(size_t); ++i)
1299 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1300 out += 16;
1301 in += 16;
1302 j -= 16;
1303 }
1304 len -= GHASH_CHUNK;
1305 }
1306 if ((i = (len & (size_t)-16))) {
1307 GHASH(ctx, in, i);
1308 while (len >= 16) {
1309 size_t *out_t = (size_t *)out;
1310 const size_t *in_t = (const size_t *)in;
1311
1312 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1313 ++ctr;
1314 if (is_endian.little)
1315# ifdef BSWAP4
1316 ctx->Yi.d[3] = BSWAP4(ctr);
1317# else
1318 PUTU32(ctx->Yi.c + 12, ctr);
1319# endif
1320 else
1321 ctx->Yi.d[3] = ctr;
1322 for (i = 0; i < 16 / sizeof(size_t); ++i)
1323 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1324 out += 16;
1325 in += 16;
1326 len -= 16;
1327 }
1328 }
1329# else
1330 while (len >= 16) {
1331 size_t *out_t = (size_t *)out;
1332 const size_t *in_t = (const size_t *)in;
1333
1334 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1335 ++ctr;
1336 if (is_endian.little)
1337# ifdef BSWAP4
1338 ctx->Yi.d[3] = BSWAP4(ctr);
1339# else
1340 PUTU32(ctx->Yi.c + 12, ctr);
1341# endif
1342 else
1343 ctx->Yi.d[3] = ctr;
1344 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1345 size_t c = in[i];
1346 out[i] = c ^ ctx->EKi.t[i];
1347 ctx->Xi.t[i] ^= c;
1348 }
1349 GCM_MUL(ctx, Xi);
1350 out += 16;
1351 in += 16;
1352 len -= 16;
1353 }
1354# endif
1355 if (len) {
1356 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1357 ++ctr;
1358 if (is_endian.little)
1359# ifdef BSWAP4
1360 ctx->Yi.d[3] = BSWAP4(ctr);
1361# else
1362 PUTU32(ctx->Yi.c + 12, ctr);
1363# endif
1364 else
1365 ctx->Yi.d[3] = ctr;
1366 while (len--) {
1367 u8 c = in[n];
1368 ctx->Xi.c[n] ^= c;
1369 out[n] = c ^ ctx->EKi.c[n];
1370 ++n;
1371 }
1372 }
1373
1374 ctx->mres = n;
1375 return 0;
1376 } while (0);
1377 }
997d1aac 1378#endif
0f113f3e
MC
1379 for (i = 0; i < len; ++i) {
1380 u8 c;
1381 if (n == 0) {
1382 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1383 ++ctr;
1384 if (is_endian.little)
997d1aac 1385#ifdef BSWAP4
0f113f3e 1386 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 1387#else
0f113f3e
MC
1388 PUTU32(ctx->Yi.c + 12, ctr);
1389#endif
1390 else
1391 ctx->Yi.d[3] = ctr;
1392 }
1393 c = in[i];
1394 out[i] = c ^ ctx->EKi.c[n];
1395 ctx->Xi.c[n] ^= c;
1396 n = (n + 1) % 16;
1397 if (n == 0)
1398 GCM_MUL(ctx, Xi);
1399 }
96a4cf8c 1400
0f113f3e
MC
1401 ctx->mres = n;
1402 return 0;
e7f5b1cd
AP
1403}
1404
1f2502eb 1405int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
0f113f3e
MC
1406 const unsigned char *in, unsigned char *out,
1407 size_t len, ctr128_f stream)
f71c6ace 1408{
0f113f3e
MC
1409 const union {
1410 long one;
1411 char little;
1412 } is_endian = {
1413 1
1414 };
1415 unsigned int n, ctr;
1416 size_t i;
1417 u64 mlen = ctx->len.u[1];
1418 void *key = ctx->key;
d8d95832 1419#ifdef GCM_FUNCREF_4BIT
0f113f3e 1420 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
d8d95832 1421# ifdef GHASH
0f113f3e
MC
1422 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1423 const u8 *inp, size_t len) = ctx->ghash;
d8d95832
AP
1424# endif
1425#endif
1f2502eb 1426
0f113f3e
MC
1427 mlen += len;
1428 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1429 return -1;
1430 ctx->len.u[1] = mlen;
f71c6ace 1431
0f113f3e
MC
1432 if (ctx->ares) {
1433 /* First call to encrypt finalizes GHASH(AAD) */
1434 GCM_MUL(ctx, Xi);
1435 ctx->ares = 0;
1436 }
b68c1315 1437
0f113f3e 1438 if (is_endian.little)
997d1aac 1439#ifdef BSWAP4
0f113f3e 1440 ctr = BSWAP4(ctx->Yi.d[3]);
997d1aac 1441#else
0f113f3e
MC
1442 ctr = GETU32(ctx->Yi.c + 12);
1443#endif
1444 else
1445 ctr = ctx->Yi.d[3];
1446
1447 n = ctx->mres;
1448 if (n) {
1449 while (n && len) {
1450 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1451 --len;
1452 n = (n + 1) % 16;
1453 }
1454 if (n == 0)
1455 GCM_MUL(ctx, Xi);
1456 else {
1457 ctx->mres = n;
1458 return 0;
1459 }
1460 }
f71c6ace 1461#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
0f113f3e
MC
1462 while (len >= GHASH_CHUNK) {
1463 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1464 ctr += GHASH_CHUNK / 16;
1465 if (is_endian.little)
1466# ifdef BSWAP4
1467 ctx->Yi.d[3] = BSWAP4(ctr);
1468# else
1469 PUTU32(ctx->Yi.c + 12, ctr);
1470# endif
1471 else
1472 ctx->Yi.d[3] = ctr;
1473 GHASH(ctx, out, GHASH_CHUNK);
1474 out += GHASH_CHUNK;
1475 in += GHASH_CHUNK;
1476 len -= GHASH_CHUNK;
1477 }
f71c6ace 1478#endif
0f113f3e
MC
1479 if ((i = (len & (size_t)-16))) {
1480 size_t j = i / 16;
f71c6ace 1481
0f113f3e
MC
1482 (*stream) (in, out, j, key, ctx->Yi.c);
1483 ctr += (unsigned int)j;
1484 if (is_endian.little)
997d1aac 1485#ifdef BSWAP4
0f113f3e 1486 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 1487#else
0f113f3e 1488 PUTU32(ctx->Yi.c + 12, ctr);
997d1aac 1489#endif
0f113f3e
MC
1490 else
1491 ctx->Yi.d[3] = ctr;
1492 in += i;
1493 len -= i;
f71c6ace 1494#if defined(GHASH)
0f113f3e
MC
1495 GHASH(ctx, out, i);
1496 out += i;
f71c6ace 1497#else
0f113f3e
MC
1498 while (j--) {
1499 for (i = 0; i < 16; ++i)
1500 ctx->Xi.c[i] ^= out[i];
1501 GCM_MUL(ctx, Xi);
1502 out += 16;
1503 }
f71c6ace 1504#endif
0f113f3e
MC
1505 }
1506 if (len) {
1507 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1508 ++ctr;
1509 if (is_endian.little)
997d1aac 1510#ifdef BSWAP4
0f113f3e 1511 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 1512#else
0f113f3e
MC
1513 PUTU32(ctx->Yi.c + 12, ctr);
1514#endif
1515 else
1516 ctx->Yi.d[3] = ctr;
1517 while (len--) {
1518 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1519 ++n;
1520 }
1521 }
1522
1523 ctx->mres = n;
1524 return 0;
f71c6ace
AP
1525}
1526
1f2502eb 1527int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
0f113f3e
MC
1528 const unsigned char *in, unsigned char *out,
1529 size_t len, ctr128_f stream)
f71c6ace 1530{
0f113f3e
MC
1531 const union {
1532 long one;
1533 char little;
1534 } is_endian = {
1535 1
1536 };
1537 unsigned int n, ctr;
1538 size_t i;
1539 u64 mlen = ctx->len.u[1];
1540 void *key = ctx->key;
d8d95832 1541#ifdef GCM_FUNCREF_4BIT
0f113f3e 1542 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
d8d95832 1543# ifdef GHASH
0f113f3e
MC
1544 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1545 const u8 *inp, size_t len) = ctx->ghash;
d8d95832
AP
1546# endif
1547#endif
1f2502eb 1548
0f113f3e
MC
1549 mlen += len;
1550 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1551 return -1;
1552 ctx->len.u[1] = mlen;
f71c6ace 1553
0f113f3e
MC
1554 if (ctx->ares) {
1555 /* First call to decrypt finalizes GHASH(AAD) */
1556 GCM_MUL(ctx, Xi);
1557 ctx->ares = 0;
1558 }
b68c1315 1559
0f113f3e 1560 if (is_endian.little)
997d1aac 1561#ifdef BSWAP4
0f113f3e 1562 ctr = BSWAP4(ctx->Yi.d[3]);
997d1aac 1563#else
0f113f3e
MC
1564 ctr = GETU32(ctx->Yi.c + 12);
1565#endif
1566 else
1567 ctr = ctx->Yi.d[3];
1568
1569 n = ctx->mres;
1570 if (n) {
1571 while (n && len) {
1572 u8 c = *(in++);
1573 *(out++) = c ^ ctx->EKi.c[n];
1574 ctx->Xi.c[n] ^= c;
1575 --len;
1576 n = (n + 1) % 16;
1577 }
1578 if (n == 0)
1579 GCM_MUL(ctx, Xi);
1580 else {
1581 ctx->mres = n;
1582 return 0;
1583 }
1584 }
f71c6ace 1585#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
0f113f3e
MC
1586 while (len >= GHASH_CHUNK) {
1587 GHASH(ctx, in, GHASH_CHUNK);
1588 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1589 ctr += GHASH_CHUNK / 16;
1590 if (is_endian.little)
1591# ifdef BSWAP4
1592 ctx->Yi.d[3] = BSWAP4(ctr);
1593# else
1594 PUTU32(ctx->Yi.c + 12, ctr);
1595# endif
1596 else
1597 ctx->Yi.d[3] = ctr;
1598 out += GHASH_CHUNK;
1599 in += GHASH_CHUNK;
1600 len -= GHASH_CHUNK;
1601 }
f71c6ace 1602#endif
0f113f3e
MC
1603 if ((i = (len & (size_t)-16))) {
1604 size_t j = i / 16;
f71c6ace
AP
1605
1606#if defined(GHASH)
0f113f3e 1607 GHASH(ctx, in, i);
f71c6ace 1608#else
0f113f3e
MC
1609 while (j--) {
1610 size_t k;
1611 for (k = 0; k < 16; ++k)
1612 ctx->Xi.c[k] ^= in[k];
1613 GCM_MUL(ctx, Xi);
1614 in += 16;
1615 }
1616 j = i / 16;
1617 in -= i;
1618#endif
1619 (*stream) (in, out, j, key, ctx->Yi.c);
1620 ctr += (unsigned int)j;
1621 if (is_endian.little)
997d1aac 1622#ifdef BSWAP4
0f113f3e 1623 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 1624#else
0f113f3e 1625 PUTU32(ctx->Yi.c + 12, ctr);
997d1aac 1626#endif
0f113f3e
MC
1627 else
1628 ctx->Yi.d[3] = ctr;
1629 out += i;
1630 in += i;
1631 len -= i;
1632 }
1633 if (len) {
1634 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1635 ++ctr;
1636 if (is_endian.little)
997d1aac 1637#ifdef BSWAP4
0f113f3e 1638 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 1639#else
0f113f3e
MC
1640 PUTU32(ctx->Yi.c + 12, ctr);
1641#endif
1642 else
1643 ctx->Yi.d[3] = ctr;
1644 while (len--) {
1645 u8 c = in[n];
1646 ctx->Xi.c[n] ^= c;
1647 out[n] = c ^ ctx->EKi.c[n];
1648 ++n;
1649 }
1650 }
1651
1652 ctx->mres = n;
1653 return 0;
f71c6ace
AP
1654}
1655
0f113f3e
MC
1656int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1657 size_t len)
e7f5b1cd 1658{
0f113f3e
MC
1659 const union {
1660 long one;
1661 char little;
1662 } is_endian = {
1663 1
1664 };
1665 u64 alen = ctx->len.u[0] << 3;
1666 u64 clen = ctx->len.u[1] << 3;
d8d95832 1667#ifdef GCM_FUNCREF_4BIT
0f113f3e 1668 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
d8d95832 1669#endif
e7f5b1cd 1670
0f113f3e
MC
1671 if (ctx->mres || ctx->ares)
1672 GCM_MUL(ctx, Xi);
e7f5b1cd 1673
0f113f3e 1674 if (is_endian.little) {
e7f5b1cd 1675#ifdef BSWAP8
0f113f3e
MC
1676 alen = BSWAP8(alen);
1677 clen = BSWAP8(clen);
e7f5b1cd 1678#else
0f113f3e 1679 u8 *p = ctx->len.c;
e7f5b1cd 1680
0f113f3e
MC
1681 ctx->len.u[0] = alen;
1682 ctx->len.u[1] = clen;
e7f5b1cd 1683
0f113f3e
MC
1684 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1685 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
e7f5b1cd 1686#endif
0f113f3e 1687 }
e7f5b1cd 1688
0f113f3e
MC
1689 ctx->Xi.u[0] ^= alen;
1690 ctx->Xi.u[1] ^= clen;
1691 GCM_MUL(ctx, Xi);
e7f5b1cd 1692
0f113f3e
MC
1693 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1694 ctx->Xi.u[1] ^= ctx->EK0.u[1];
6acb4ff3 1695
0f113f3e
MC
1696 if (tag && len <= sizeof(ctx->Xi))
1697 return memcmp(ctx->Xi.c, tag, len);
1698 else
1699 return -1;
6acb4ff3
AP
1700}
1701
fd3dbc1d
DSH
1702void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1703{
0f113f3e
MC
1704 CRYPTO_gcm128_finish(ctx, NULL, 0);
1705 memcpy(tag, ctx->Xi.c,
1706 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
fd3dbc1d
DSH
1707}
1708
6acb4ff3
AP
1709GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1710{
0f113f3e 1711 GCM128_CONTEXT *ret;
6acb4ff3 1712
0f113f3e
MC
1713 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1714 CRYPTO_gcm128_init(ret, key, block);
6acb4ff3 1715
0f113f3e 1716 return ret;
6acb4ff3
AP
1717}
1718
1719void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1720{
0f113f3e
MC
1721 if (ctx) {
1722 OPENSSL_cleanse(ctx, sizeof(*ctx));
1723 OPENSSL_free(ctx);
1724 }
e7f5b1cd
AP
1725}
1726
1727#if defined(SELFTEST)
0f113f3e
MC
1728# include <stdio.h>
1729# include <openssl/aes.h>
e7f5b1cd
AP
1730
1731/* Test Case 1 */
0f113f3e
MC
1732static const u8 K1[16], *P1 = NULL, *A1 = NULL, IV1[12], *C1 = NULL;
1733static const u8 T1[] = {
1734 0x58, 0xe2, 0xfc, 0xce, 0xfa, 0x7e, 0x30, 0x61,
1735 0x36, 0x7f, 0x1d, 0x57, 0xa4, 0xe7, 0x45, 0x5a
1736};
a595baff 1737
e7f5b1cd 1738/* Test Case 2 */
0f113f3e
MC
1739# define K2 K1
1740# define A2 A1
1741# define IV2 IV1
1742static const u8 P2[16];
1743static const u8 C2[] = {
1744 0x03, 0x88, 0xda, 0xce, 0x60, 0xb6, 0xa3, 0x92,
1745 0xf3, 0x28, 0xc2, 0xb9, 0x71, 0xb2, 0xfe, 0x78
1746};
1747
1748static const u8 T2[] = {
1749 0xab, 0x6e, 0x47, 0xd4, 0x2c, 0xec, 0x13, 0xbd,
1750 0xf5, 0x3a, 0x67, 0xb2, 0x12, 0x57, 0xbd, 0xdf
1751};
e7f5b1cd
AP
1752
1753/* Test Case 3 */
0f113f3e
MC
1754# define A3 A2
1755static const u8 K3[] = {
1756 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1757 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
1758};
1759
1760static const u8 P3[] = {
1761 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1762 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1763 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1764 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1765 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1766 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1767 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1768 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1769};
1770
1771static const u8 IV3[] = {
1772 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1773 0xde, 0xca, 0xf8, 0x88
1774};
1775
1776static const u8 C3[] = {
1777 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1778 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1779 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1780 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1781 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1782 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1783 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1784 0x3d, 0x58, 0xe0, 0x91, 0x47, 0x3f, 0x59, 0x85
1785};
1786
1787static const u8 T3[] = {
1788 0x4d, 0x5c, 0x2a, 0xf3, 0x27, 0xcd, 0x64, 0xa6,
1789 0x2c, 0xf3, 0x5a, 0xbd, 0x2b, 0xa6, 0xfa, 0xb4
1790};
e7f5b1cd
AP
1791
1792/* Test Case 4 */
0f113f3e
MC
1793# define K4 K3
1794# define IV4 IV3
1795static const u8 P4[] = {
1796 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1797 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1798 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1799 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1800 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1801 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1802 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1803 0xba, 0x63, 0x7b, 0x39
1804};
1805
1806static const u8 A4[] = {
1807 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1808 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1809 0xab, 0xad, 0xda, 0xd2
1810};
1811
1812static const u8 C4[] = {
1813 0x42, 0x83, 0x1e, 0xc2, 0x21, 0x77, 0x74, 0x24,
1814 0x4b, 0x72, 0x21, 0xb7, 0x84, 0xd0, 0xd4, 0x9c,
1815 0xe3, 0xaa, 0x21, 0x2f, 0x2c, 0x02, 0xa4, 0xe0,
1816 0x35, 0xc1, 0x7e, 0x23, 0x29, 0xac, 0xa1, 0x2e,
1817 0x21, 0xd5, 0x14, 0xb2, 0x54, 0x66, 0x93, 0x1c,
1818 0x7d, 0x8f, 0x6a, 0x5a, 0xac, 0x84, 0xaa, 0x05,
1819 0x1b, 0xa3, 0x0b, 0x39, 0x6a, 0x0a, 0xac, 0x97,
1820 0x3d, 0x58, 0xe0, 0x91
1821};
1822
1823static const u8 T4[] = {
1824 0x5b, 0xc9, 0x4f, 0xbc, 0x32, 0x21, 0xa5, 0xdb,
1825 0x94, 0xfa, 0xe9, 0x5a, 0xe7, 0x12, 0x1a, 0x47
1826};
e7f5b1cd
AP
1827
1828/* Test Case 5 */
0f113f3e
MC
1829# define K5 K4
1830# define P5 P4
1831# define A5 A4
1832static const u8 IV5[] = {
1833 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad
1834};
1835
1836static const u8 C5[] = {
1837 0x61, 0x35, 0x3b, 0x4c, 0x28, 0x06, 0x93, 0x4a,
1838 0x77, 0x7f, 0xf5, 0x1f, 0xa2, 0x2a, 0x47, 0x55,
1839 0x69, 0x9b, 0x2a, 0x71, 0x4f, 0xcd, 0xc6, 0xf8,
1840 0x37, 0x66, 0xe5, 0xf9, 0x7b, 0x6c, 0x74, 0x23,
1841 0x73, 0x80, 0x69, 0x00, 0xe4, 0x9f, 0x24, 0xb2,
1842 0x2b, 0x09, 0x75, 0x44, 0xd4, 0x89, 0x6b, 0x42,
1843 0x49, 0x89, 0xb5, 0xe1, 0xeb, 0xac, 0x0f, 0x07,
1844 0xc2, 0x3f, 0x45, 0x98
1845};
1846
1847static const u8 T5[] = {
1848 0x36, 0x12, 0xd2, 0xe7, 0x9e, 0x3b, 0x07, 0x85,
1849 0x56, 0x1b, 0xe1, 0x4a, 0xac, 0xa2, 0xfc, 0xcb
1850};
a595baff 1851
e7f5b1cd 1852/* Test Case 6 */
0f113f3e
MC
1853# define K6 K5
1854# define P6 P5
1855# define A6 A5
1856static const u8 IV6[] = {
1857 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
1858 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
1859 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
1860 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
1861 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
1862 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
1863 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
1864 0xa6, 0x37, 0xb3, 0x9b
1865};
1866
1867static const u8 C6[] = {
1868 0x8c, 0xe2, 0x49, 0x98, 0x62, 0x56, 0x15, 0xb6,
1869 0x03, 0xa0, 0x33, 0xac, 0xa1, 0x3f, 0xb8, 0x94,
1870 0xbe, 0x91, 0x12, 0xa5, 0xc3, 0xa2, 0x11, 0xa8,
1871 0xba, 0x26, 0x2a, 0x3c, 0xca, 0x7e, 0x2c, 0xa7,
1872 0x01, 0xe4, 0xa9, 0xa4, 0xfb, 0xa4, 0x3c, 0x90,
1873 0xcc, 0xdc, 0xb2, 0x81, 0xd4, 0x8c, 0x7c, 0x6f,
1874 0xd6, 0x28, 0x75, 0xd2, 0xac, 0xa4, 0x17, 0x03,
1875 0x4c, 0x34, 0xae, 0xe5
1876};
1877
1878static const u8 T6[] = {
1879 0x61, 0x9c, 0xc5, 0xae, 0xff, 0xfe, 0x0b, 0xfa,
1880 0x46, 0x2a, 0xf4, 0x3c, 0x16, 0x99, 0xd0, 0x50
1881};
e7f5b1cd
AP
1882
1883/* Test Case 7 */
0f113f3e
MC
1884static const u8 K7[24], *P7 = NULL, *A7 = NULL, IV7[12], *C7 = NULL;
1885static const u8 T7[] = {
1886 0xcd, 0x33, 0xb2, 0x8a, 0xc7, 0x73, 0xf7, 0x4b,
1887 0xa0, 0x0e, 0xd1, 0xf3, 0x12, 0x57, 0x24, 0x35
1888};
e7f5b1cd
AP
1889
1890/* Test Case 8 */
0f113f3e
MC
1891# define K8 K7
1892# define IV8 IV7
1893# define A8 A7
1894static const u8 P8[16];
1895static const u8 C8[] = {
1896 0x98, 0xe7, 0x24, 0x7c, 0x07, 0xf0, 0xfe, 0x41,
1897 0x1c, 0x26, 0x7e, 0x43, 0x84, 0xb0, 0xf6, 0x00
1898};
1899
1900static const u8 T8[] = {
1901 0x2f, 0xf5, 0x8d, 0x80, 0x03, 0x39, 0x27, 0xab,
1902 0x8e, 0xf4, 0xd4, 0x58, 0x75, 0x14, 0xf0, 0xfb
1903};
e7f5b1cd
AP
1904
1905/* Test Case 9 */
0f113f3e
MC
1906# define A9 A8
1907static const u8 K9[] = {
1908 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
1909 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
1910 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c
1911};
1912
1913static const u8 P9[] = {
1914 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1915 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1916 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1917 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1918 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1919 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1920 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1921 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
1922};
1923
1924static const u8 IV9[] = {
1925 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
1926 0xde, 0xca, 0xf8, 0x88
1927};
1928
1929static const u8 C9[] = {
1930 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1931 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1932 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1933 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1934 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1935 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1936 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1937 0xcc, 0xda, 0x27, 0x10, 0xac, 0xad, 0xe2, 0x56
1938};
1939
1940static const u8 T9[] = {
1941 0x99, 0x24, 0xa7, 0xc8, 0x58, 0x73, 0x36, 0xbf,
1942 0xb1, 0x18, 0x02, 0x4d, 0xb8, 0x67, 0x4a, 0x14
1943};
e7f5b1cd
AP
1944
1945/* Test Case 10 */
0f113f3e
MC
1946# define K10 K9
1947# define IV10 IV9
1948static const u8 P10[] = {
1949 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
1950 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
1951 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
1952 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
1953 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
1954 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
1955 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
1956 0xba, 0x63, 0x7b, 0x39
1957};
1958
1959static const u8 A10[] = {
1960 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1961 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
1962 0xab, 0xad, 0xda, 0xd2
1963};
1964
1965static const u8 C10[] = {
1966 0x39, 0x80, 0xca, 0x0b, 0x3c, 0x00, 0xe8, 0x41,
1967 0xeb, 0x06, 0xfa, 0xc4, 0x87, 0x2a, 0x27, 0x57,
1968 0x85, 0x9e, 0x1c, 0xea, 0xa6, 0xef, 0xd9, 0x84,
1969 0x62, 0x85, 0x93, 0xb4, 0x0c, 0xa1, 0xe1, 0x9c,
1970 0x7d, 0x77, 0x3d, 0x00, 0xc1, 0x44, 0xc5, 0x25,
1971 0xac, 0x61, 0x9d, 0x18, 0xc8, 0x4a, 0x3f, 0x47,
1972 0x18, 0xe2, 0x44, 0x8b, 0x2f, 0xe3, 0x24, 0xd9,
1973 0xcc, 0xda, 0x27, 0x10
1974};
1975
1976static const u8 T10[] = {
1977 0x25, 0x19, 0x49, 0x8e, 0x80, 0xf1, 0x47, 0x8f,
1978 0x37, 0xba, 0x55, 0xbd, 0x6d, 0x27, 0x61, 0x8c
1979};
e7f5b1cd
AP
1980
1981/* Test Case 11 */
0f113f3e
MC
1982# define K11 K10
1983# define P11 P10
1984# define A11 A10
1985static const u8 IV11[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
1986
1987static const u8 C11[] = {
1988 0x0f, 0x10, 0xf5, 0x99, 0xae, 0x14, 0xa1, 0x54,
1989 0xed, 0x24, 0xb3, 0x6e, 0x25, 0x32, 0x4d, 0xb8,
1990 0xc5, 0x66, 0x63, 0x2e, 0xf2, 0xbb, 0xb3, 0x4f,
1991 0x83, 0x47, 0x28, 0x0f, 0xc4, 0x50, 0x70, 0x57,
1992 0xfd, 0xdc, 0x29, 0xdf, 0x9a, 0x47, 0x1f, 0x75,
1993 0xc6, 0x65, 0x41, 0xd4, 0xd4, 0xda, 0xd1, 0xc9,
1994 0xe9, 0x3a, 0x19, 0xa5, 0x8e, 0x8b, 0x47, 0x3f,
1995 0xa0, 0xf0, 0x62, 0xf7
1996};
1997
1998static const u8 T11[] = {
1999 0x65, 0xdc, 0xc5, 0x7f, 0xcf, 0x62, 0x3a, 0x24,
2000 0x09, 0x4f, 0xcc, 0xa4, 0x0d, 0x35, 0x33, 0xf8
2001};
e7f5b1cd
AP
2002
2003/* Test Case 12 */
0f113f3e
MC
2004# define K12 K11
2005# define P12 P11
2006# define A12 A11
2007static const u8 IV12[] = {
2008 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2009 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2010 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2011 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2012 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2013 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2014 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2015 0xa6, 0x37, 0xb3, 0x9b
2016};
2017
2018static const u8 C12[] = {
2019 0xd2, 0x7e, 0x88, 0x68, 0x1c, 0xe3, 0x24, 0x3c,
2020 0x48, 0x30, 0x16, 0x5a, 0x8f, 0xdc, 0xf9, 0xff,
2021 0x1d, 0xe9, 0xa1, 0xd8, 0xe6, 0xb4, 0x47, 0xef,
2022 0x6e, 0xf7, 0xb7, 0x98, 0x28, 0x66, 0x6e, 0x45,
2023 0x81, 0xe7, 0x90, 0x12, 0xaf, 0x34, 0xdd, 0xd9,
2024 0xe2, 0xf0, 0x37, 0x58, 0x9b, 0x29, 0x2d, 0xb3,
2025 0xe6, 0x7c, 0x03, 0x67, 0x45, 0xfa, 0x22, 0xe7,
2026 0xe9, 0xb7, 0x37, 0x3b
2027};
2028
2029static const u8 T12[] = {
2030 0xdc, 0xf5, 0x66, 0xff, 0x29, 0x1c, 0x25, 0xbb,
2031 0xb8, 0x56, 0x8f, 0xc3, 0xd3, 0x76, 0xa6, 0xd9
2032};
e7f5b1cd
AP
2033
2034/* Test Case 13 */
0f113f3e
MC
2035static const u8 K13[32], *P13 = NULL, *A13 = NULL, IV13[12], *C13 = NULL;
2036static const u8 T13[] = {
2037 0x53, 0x0f, 0x8a, 0xfb, 0xc7, 0x45, 0x36, 0xb9,
2038 0xa9, 0x63, 0xb4, 0xf1, 0xc4, 0xcb, 0x73, 0x8b
2039};
e7f5b1cd
AP
2040
2041/* Test Case 14 */
0f113f3e
MC
2042# define K14 K13
2043# define A14 A13
2044static const u8 P14[16], IV14[12];
2045static const u8 C14[] = {
2046 0xce, 0xa7, 0x40, 0x3d, 0x4d, 0x60, 0x6b, 0x6e,
2047 0x07, 0x4e, 0xc5, 0xd3, 0xba, 0xf3, 0x9d, 0x18
2048};
2049
2050static const u8 T14[] = {
2051 0xd0, 0xd1, 0xc8, 0xa7, 0x99, 0x99, 0x6b, 0xf0,
2052 0x26, 0x5b, 0x98, 0xb5, 0xd4, 0x8a, 0xb9, 0x19
2053};
e7f5b1cd
AP
2054
2055/* Test Case 15 */
0f113f3e
MC
2056# define A15 A14
2057static const u8 K15[] = {
2058 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2059 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08,
2060 0xfe, 0xff, 0xe9, 0x92, 0x86, 0x65, 0x73, 0x1c,
2061 0x6d, 0x6a, 0x8f, 0x94, 0x67, 0x30, 0x83, 0x08
2062};
2063
2064static const u8 P15[] = {
2065 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2066 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2067 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2068 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2069 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2070 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2071 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2072 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55
2073};
2074
2075static const u8 IV15[] = {
2076 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad,
2077 0xde, 0xca, 0xf8, 0x88
2078};
2079
2080static const u8 C15[] = {
2081 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2082 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2083 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2084 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2085 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2086 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2087 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2088 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2089};
2090
2091static const u8 T15[] = {
2092 0xb0, 0x94, 0xda, 0xc5, 0xd9, 0x34, 0x71, 0xbd,
2093 0xec, 0x1a, 0x50, 0x22, 0x70, 0xe3, 0xcc, 0x6c
2094};
e7f5b1cd
AP
2095
2096/* Test Case 16 */
0f113f3e
MC
2097# define K16 K15
2098# define IV16 IV15
2099static const u8 P16[] = {
2100 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2101 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2102 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2103 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2104 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2105 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2106 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2107 0xba, 0x63, 0x7b, 0x39
2108};
2109
2110static const u8 A16[] = {
2111 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2112 0xfe, 0xed, 0xfa, 0xce, 0xde, 0xad, 0xbe, 0xef,
2113 0xab, 0xad, 0xda, 0xd2
2114};
2115
2116static const u8 C16[] = {
2117 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2118 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2119 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2120 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2121 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2122 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2123 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2124 0xbc, 0xc9, 0xf6, 0x62
2125};
2126
2127static const u8 T16[] = {
2128 0x76, 0xfc, 0x6e, 0xce, 0x0f, 0x4e, 0x17, 0x68,
2129 0xcd, 0xdf, 0x88, 0x53, 0xbb, 0x2d, 0x55, 0x1b
2130};
e7f5b1cd
AP
2131
2132/* Test Case 17 */
0f113f3e
MC
2133# define K17 K16
2134# define P17 P16
2135# define A17 A16
2136static const u8 IV17[] = { 0xca, 0xfe, 0xba, 0xbe, 0xfa, 0xce, 0xdb, 0xad };
2137
2138static const u8 C17[] = {
2139 0xc3, 0x76, 0x2d, 0xf1, 0xca, 0x78, 0x7d, 0x32,
2140 0xae, 0x47, 0xc1, 0x3b, 0xf1, 0x98, 0x44, 0xcb,
2141 0xaf, 0x1a, 0xe1, 0x4d, 0x0b, 0x97, 0x6a, 0xfa,
2142 0xc5, 0x2f, 0xf7, 0xd7, 0x9b, 0xba, 0x9d, 0xe0,
2143 0xfe, 0xb5, 0x82, 0xd3, 0x39, 0x34, 0xa4, 0xf0,
2144 0x95, 0x4c, 0xc2, 0x36, 0x3b, 0xc7, 0x3f, 0x78,
2145 0x62, 0xac, 0x43, 0x0e, 0x64, 0xab, 0xe4, 0x99,
2146 0xf4, 0x7c, 0x9b, 0x1f
2147};
2148
2149static const u8 T17[] = {
2150 0x3a, 0x33, 0x7d, 0xbf, 0x46, 0xa7, 0x92, 0xc4,
2151 0x5e, 0x45, 0x49, 0x13, 0xfe, 0x2e, 0xa8, 0xf2
2152};
e7f5b1cd
AP
2153
2154/* Test Case 18 */
0f113f3e
MC
2155# define K18 K17
2156# define P18 P17
2157# define A18 A17
2158static const u8 IV18[] = {
2159 0x93, 0x13, 0x22, 0x5d, 0xf8, 0x84, 0x06, 0xe5,
2160 0x55, 0x90, 0x9c, 0x5a, 0xff, 0x52, 0x69, 0xaa,
2161 0x6a, 0x7a, 0x95, 0x38, 0x53, 0x4f, 0x7d, 0xa1,
2162 0xe4, 0xc3, 0x03, 0xd2, 0xa3, 0x18, 0xa7, 0x28,
2163 0xc3, 0xc0, 0xc9, 0x51, 0x56, 0x80, 0x95, 0x39,
2164 0xfc, 0xf0, 0xe2, 0x42, 0x9a, 0x6b, 0x52, 0x54,
2165 0x16, 0xae, 0xdb, 0xf5, 0xa0, 0xde, 0x6a, 0x57,
2166 0xa6, 0x37, 0xb3, 0x9b
2167};
2168
2169static const u8 C18[] = {
2170 0x5a, 0x8d, 0xef, 0x2f, 0x0c, 0x9e, 0x53, 0xf1,
2171 0xf7, 0x5d, 0x78, 0x53, 0x65, 0x9e, 0x2a, 0x20,
2172 0xee, 0xb2, 0xb2, 0x2a, 0xaf, 0xde, 0x64, 0x19,
2173 0xa0, 0x58, 0xab, 0x4f, 0x6f, 0x74, 0x6b, 0xf4,
2174 0x0f, 0xc0, 0xc3, 0xb7, 0x80, 0xf2, 0x44, 0x45,
2175 0x2d, 0xa3, 0xeb, 0xf1, 0xc5, 0xd8, 0x2c, 0xde,
2176 0xa2, 0x41, 0x89, 0x97, 0x20, 0x0e, 0xf8, 0x2e,
2177 0x44, 0xae, 0x7e, 0x3f
2178};
2179
2180static const u8 T18[] = {
2181 0xa4, 0x4a, 0x82, 0x66, 0xee, 0x1c, 0x8e, 0xb0,
2182 0xc8, 0xb5, 0xd4, 0xcf, 0x5a, 0xe9, 0xf1, 0x9a
2183};
e7f5b1cd 2184
273a8081 2185/* Test Case 19 */
0f113f3e
MC
2186# define K19 K1
2187# define P19 P1
2188# define IV19 IV1
2189# define C19 C1
2190static const u8 A19[] = {
2191 0xd9, 0x31, 0x32, 0x25, 0xf8, 0x84, 0x06, 0xe5,
2192 0xa5, 0x59, 0x09, 0xc5, 0xaf, 0xf5, 0x26, 0x9a,
2193 0x86, 0xa7, 0xa9, 0x53, 0x15, 0x34, 0xf7, 0xda,
2194 0x2e, 0x4c, 0x30, 0x3d, 0x8a, 0x31, 0x8a, 0x72,
2195 0x1c, 0x3c, 0x0c, 0x95, 0x95, 0x68, 0x09, 0x53,
2196 0x2f, 0xcf, 0x0e, 0x24, 0x49, 0xa6, 0xb5, 0x25,
2197 0xb1, 0x6a, 0xed, 0xf5, 0xaa, 0x0d, 0xe6, 0x57,
2198 0xba, 0x63, 0x7b, 0x39, 0x1a, 0xaf, 0xd2, 0x55,
2199 0x52, 0x2d, 0xc1, 0xf0, 0x99, 0x56, 0x7d, 0x07,
2200 0xf4, 0x7f, 0x37, 0xa3, 0x2a, 0x84, 0x42, 0x7d,
2201 0x64, 0x3a, 0x8c, 0xdc, 0xbf, 0xe5, 0xc0, 0xc9,
2202 0x75, 0x98, 0xa2, 0xbd, 0x25, 0x55, 0xd1, 0xaa,
2203 0x8c, 0xb0, 0x8e, 0x48, 0x59, 0x0d, 0xbb, 0x3d,
2204 0xa7, 0xb0, 0x8b, 0x10, 0x56, 0x82, 0x88, 0x38,
2205 0xc5, 0xf6, 0x1e, 0x63, 0x93, 0xba, 0x7a, 0x0a,
2206 0xbc, 0xc9, 0xf6, 0x62, 0x89, 0x80, 0x15, 0xad
2207};
2208
2209static const u8 T19[] = {
2210 0x5f, 0xea, 0x79, 0x3a, 0x2d, 0x6f, 0x97, 0x4d,
2211 0x37, 0xe6, 0x8e, 0x0c, 0xb8, 0xff, 0x94, 0x92
2212};
273a8081 2213
1da5d302 2214/* Test Case 20 */
0f113f3e
MC
2215# define K20 K1
2216# define A20 A1
68d39f3c 2217/* this results in 0xff in counter LSB */
0f113f3e
MC
2218static const u8 IV20[64] = { 0xff, 0xff, 0xff, 0xff };
2219
2220static const u8 P20[288];
2221static const u8 C20[] = {
2222 0x56, 0xb3, 0x37, 0x3c, 0xa9, 0xef, 0x6e, 0x4a,
2223 0x2b, 0x64, 0xfe, 0x1e, 0x9a, 0x17, 0xb6, 0x14,
2224 0x25, 0xf1, 0x0d, 0x47, 0xa7, 0x5a, 0x5f, 0xce,
2225 0x13, 0xef, 0xc6, 0xbc, 0x78, 0x4a, 0xf2, 0x4f,
2226 0x41, 0x41, 0xbd, 0xd4, 0x8c, 0xf7, 0xc7, 0x70,
2227 0x88, 0x7a, 0xfd, 0x57, 0x3c, 0xca, 0x54, 0x18,
2228 0xa9, 0xae, 0xff, 0xcd, 0x7c, 0x5c, 0xed, 0xdf,
2229 0xc6, 0xa7, 0x83, 0x97, 0xb9, 0xa8, 0x5b, 0x49,
2230 0x9d, 0xa5, 0x58, 0x25, 0x72, 0x67, 0xca, 0xab,
2231 0x2a, 0xd0, 0xb2, 0x3c, 0xa4, 0x76, 0xa5, 0x3c,
2232 0xb1, 0x7f, 0xb4, 0x1c, 0x4b, 0x8b, 0x47, 0x5c,
2233 0xb4, 0xf3, 0xf7, 0x16, 0x50, 0x94, 0xc2, 0x29,
2234 0xc9, 0xe8, 0xc4, 0xdc, 0x0a, 0x2a, 0x5f, 0xf1,
2235 0x90, 0x3e, 0x50, 0x15, 0x11, 0x22, 0x13, 0x76,
2236 0xa1, 0xcd, 0xb8, 0x36, 0x4c, 0x50, 0x61, 0xa2,
2237 0x0c, 0xae, 0x74, 0xbc, 0x4a, 0xcd, 0x76, 0xce,
2238 0xb0, 0xab, 0xc9, 0xfd, 0x32, 0x17, 0xef, 0x9f,
2239 0x8c, 0x90, 0xbe, 0x40, 0x2d, 0xdf, 0x6d, 0x86,
2240 0x97, 0xf4, 0xf8, 0x80, 0xdf, 0xf1, 0x5b, 0xfb,
2241 0x7a, 0x6b, 0x28, 0x24, 0x1e, 0xc8, 0xfe, 0x18,
2242 0x3c, 0x2d, 0x59, 0xe3, 0xf9, 0xdf, 0xff, 0x65,
2243 0x3c, 0x71, 0x26, 0xf0, 0xac, 0xb9, 0xe6, 0x42,
2244 0x11, 0xf4, 0x2b, 0xae, 0x12, 0xaf, 0x46, 0x2b,
2245 0x10, 0x70, 0xbe, 0xf1, 0xab, 0x5e, 0x36, 0x06,
2246 0x87, 0x2c, 0xa1, 0x0d, 0xee, 0x15, 0xb3, 0x24,
2247 0x9b, 0x1a, 0x1b, 0x95, 0x8f, 0x23, 0x13, 0x4c,
2248 0x4b, 0xcc, 0xb7, 0xd0, 0x32, 0x00, 0xbc, 0xe4,
2249 0x20, 0xa2, 0xf8, 0xeb, 0x66, 0xdc, 0xf3, 0x64,
2250 0x4d, 0x14, 0x23, 0xc1, 0xb5, 0x69, 0x90, 0x03,
2251 0xc1, 0x3e, 0xce, 0xf4, 0xbf, 0x38, 0xa3, 0xb6,
2252 0x0e, 0xed, 0xc3, 0x40, 0x33, 0xba, 0xc1, 0x90,
2253 0x27, 0x83, 0xdc, 0x6d, 0x89, 0xe2, 0xe7, 0x74,
2254 0x18, 0x8a, 0x43, 0x9c, 0x7e, 0xbc, 0xc0, 0x67,
2255 0x2d, 0xbd, 0xa4, 0xdd, 0xcf, 0xb2, 0x79, 0x46,
2256 0x13, 0xb0, 0xbe, 0x41, 0x31, 0x5e, 0xf7, 0x78,
2257 0x70, 0x8a, 0x70, 0xee, 0x7d, 0x75, 0x16, 0x5c
2258};
2259
2260static const u8 T20[] = {
2261 0x8b, 0x30, 0x7f, 0x6b, 0x33, 0x28, 0x6d, 0x0a,
2262 0xb0, 0x26, 0xa9, 0xed, 0x3f, 0xe1, 0xe8, 0x5f
2263};
2264
2265# define TEST_CASE(n) do { \
2266 u8 out[sizeof(P##n)]; \
2267 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
2268 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
2269 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2270 memset(out,0,sizeof(out)); \
2271 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2272 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
2273 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2274 (C##n && memcmp(out,C##n,sizeof(out)))) \
2275 ret++, printf ("encrypt test#%d failed.\n",n); \
2276 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
2277 memset(out,0,sizeof(out)); \
2278 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
2279 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
2280 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
2281 (P##n && memcmp(out,P##n,sizeof(out)))) \
2282 ret++, printf ("decrypt test#%d failed.\n",n); \
2283 } while(0)
e7f5b1cd
AP
2284
2285int main()
2286{
0f113f3e
MC
2287 GCM128_CONTEXT ctx;
2288 AES_KEY key;
2289 int ret = 0;
2290
2291 TEST_CASE(1);
2292 TEST_CASE(2);
2293 TEST_CASE(3);
2294 TEST_CASE(4);
2295 TEST_CASE(5);
2296 TEST_CASE(6);
2297 TEST_CASE(7);
2298 TEST_CASE(8);
2299 TEST_CASE(9);
2300 TEST_CASE(10);
2301 TEST_CASE(11);
2302 TEST_CASE(12);
2303 TEST_CASE(13);
2304 TEST_CASE(14);
2305 TEST_CASE(15);
2306 TEST_CASE(16);
2307 TEST_CASE(17);
2308 TEST_CASE(18);
2309 TEST_CASE(19);
2310 TEST_CASE(20);
2311
2312# ifdef OPENSSL_CPUID_OBJ
2313 {
2314 size_t start, stop, gcm_t, ctr_t, OPENSSL_rdtsc();
2315 union {
2316 u64 u;
2317 u8 c[1024];
2318 } buf;
2319 int i;
2320
2321 AES_set_encrypt_key(K1, sizeof(K1) * 8, &key);
2322 CRYPTO_gcm128_init(&ctx, &key, (block128_f) AES_encrypt);
2323 CRYPTO_gcm128_setiv(&ctx, IV1, sizeof(IV1));
2324
2325 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2326 start = OPENSSL_rdtsc();
2327 CRYPTO_gcm128_encrypt(&ctx, buf.c, buf.c, sizeof(buf));
2328 gcm_t = OPENSSL_rdtsc() - start;
2329
2330 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2331 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2332 (block128_f) AES_encrypt);
2333 start = OPENSSL_rdtsc();
2334 CRYPTO_ctr128_encrypt(buf.c, buf.c, sizeof(buf),
2335 &key, ctx.Yi.c, ctx.EKi.c, &ctx.mres,
2336 (block128_f) AES_encrypt);
2337 ctr_t = OPENSSL_rdtsc() - start;
2338
2339 printf("%.2f-%.2f=%.2f\n",
2340 gcm_t / (double)sizeof(buf),
2341 ctr_t / (double)sizeof(buf),
2342 (gcm_t - ctr_t) / (double)sizeof(buf));
2343# ifdef GHASH
2344 {
2345 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
2346 const u8 *inp, size_t len) = ctx.ghash;
2347
2348 GHASH((&ctx), buf.c, sizeof(buf));
2349 start = OPENSSL_rdtsc();
2350 for (i = 0; i < 100; ++i)
2351 GHASH((&ctx), buf.c, sizeof(buf));
2352 gcm_t = OPENSSL_rdtsc() - start;
2353 printf("%.2f\n", gcm_t / (double)sizeof(buf) / (double)i);
2354 }
2355# endif
2356 }
2357# endif
2262beef 2358
0f113f3e 2359 return ret;
e7f5b1cd
AP
2360}
2361#endif