]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/gcm128.c
add example for DH certificate generation
[thirdparty/openssl.git] / crypto / modes / gcm128.c
CommitLineData
e7f5b1cd
AP
1/* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
f4001a0d
DSH
50#define OPENSSL_FIPSAPI
51
aa763c0f 52#include <openssl/crypto.h>
f472ec8c 53#include "modes_lcl.h"
e7f5b1cd
AP
54#include <string.h>
55
56#ifndef MODES_DEBUG
57# ifndef NDEBUG
58# define NDEBUG
59# endif
60#endif
61#include <assert.h>
62
f472ec8c
AP
63#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64/* redefine, because alignment is ensured */
65#undef GETU32
66#define GETU32(p) BSWAP4(*(const u32 *)(p))
67#undef PUTU32
68#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
e7f5b1cd
AP
69#endif
70
c1f092d1
AP
71#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72#define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83} while(0)
84
d8d95832
AP
85/*
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
a595baff
AP
119#if TABLE_BITS==8
120
e7f5b1cd
AP
121static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122{
123 int i, j;
124 u128 V;
125
126 Htable[0].hi = 0;
127 Htable[0].lo = 0;
128 V.hi = H[0];
129 V.lo = H[1];
130
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
c1f092d1 132 REDUCE1BIT(V);
e7f5b1cd
AP
133 Htable[i] = V;
134 }
135
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
141 }
142 }
143}
144
d8d95832 145static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
e7f5b1cd
AP
146{
147 u128 Z = { 0, 0};
148 const u8 *xi = (const u8 *)Xi+15;
149 size_t rem, n = *xi;
150 const union { long one; char little; } is_endian = {1};
03e389cf 151 __fips_constseg
e7f5b1cd
AP
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
217
218 while (1) {
219 Z.hi ^= Htable[n].hi;
220 Z.lo ^= Htable[n].lo;
221
222 if ((u8 *)Xi==xi) break;
223
224 n = *(--xi);
225
226 rem = (size_t)Z.lo&0xff;
227 Z.lo = (Z.hi<<56)|(Z.lo>>8);
228 Z.hi = (Z.hi>>8);
229 if (sizeof(size_t)==8)
230 Z.hi ^= rem_8bit[rem];
231 else
232 Z.hi ^= (u64)rem_8bit[rem]<<32;
233 }
234
235 if (is_endian.little) {
236#ifdef BSWAP8
237 Xi[0] = BSWAP8(Z.hi);
238 Xi[1] = BSWAP8(Z.lo);
239#else
240 u8 *p = (u8 *)Xi;
241 u32 v;
242 v = (u32)(Z.hi>>32); PUTU32(p,v);
243 v = (u32)(Z.hi); PUTU32(p+4,v);
244 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
245 v = (u32)(Z.lo); PUTU32(p+12,v);
246#endif
247 }
248 else {
249 Xi[0] = Z.hi;
250 Xi[1] = Z.lo;
251 }
252}
a595baff 253#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
e7f5b1cd 254
a595baff 255#elif TABLE_BITS==4
2262beef 256
e7f5b1cd
AP
257static void gcm_init_4bit(u128 Htable[16], u64 H[2])
258{
e7f5b1cd 259 u128 V;
f472ec8c
AP
260#if defined(OPENSSL_SMALL_FOOTPRINT)
261 int i;
262#endif
e7f5b1cd
AP
263
264 Htable[0].hi = 0;
265 Htable[0].lo = 0;
266 V.hi = H[0];
267 V.lo = H[1];
268
f472ec8c 269#if defined(OPENSSL_SMALL_FOOTPRINT)
e7f5b1cd 270 for (Htable[8]=V, i=4; i>0; i>>=1) {
c1f092d1 271 REDUCE1BIT(V);
e7f5b1cd
AP
272 Htable[i] = V;
273 }
274
275 for (i=2; i<16; i<<=1) {
2262beef
AP
276 u128 *Hi = Htable+i;
277 int j;
278 for (V=*Hi, j=1; j<i; ++j) {
279 Hi[j].hi = V.hi^Htable[j].hi;
280 Hi[j].lo = V.lo^Htable[j].lo;
e7f5b1cd
AP
281 }
282 }
2262beef 283#else
f472ec8c 284 Htable[8] = V;
c1f092d1 285 REDUCE1BIT(V);
f472ec8c 286 Htable[4] = V;
c1f092d1 287 REDUCE1BIT(V);
f472ec8c 288 Htable[2] = V;
c1f092d1 289 REDUCE1BIT(V);
f472ec8c 290 Htable[1] = V;
2262beef
AP
291 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
292 V=Htable[4];
293 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
294 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
295 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
296 V=Htable[8];
297 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
298 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
299 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
300 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
301 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
302 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
303 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
304#endif
f472ec8c
AP
305#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
306 /*
307 * ARM assembler expects specific dword order in Htable.
308 */
309 {
310 int j;
311 const union { long one; char little; } is_endian = {1};
312
313 if (is_endian.little)
314 for (j=0;j<16;++j) {
315 V = Htable[j];
316 Htable[j].hi = V.lo;
317 Htable[j].lo = V.hi;
318 }
319 else
320 for (j=0;j<16;++j) {
321 V = Htable[j];
322 Htable[j].hi = V.lo<<32|V.lo>>32;
323 Htable[j].lo = V.hi<<32|V.hi>>32;
324 }
325 }
326#endif
e7f5b1cd
AP
327}
328
a595baff 329#ifndef GHASH_ASM
03e389cf 330__fips_constseg
2262beef
AP
331static const size_t rem_4bit[16] = {
332 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
336
4f39edbf 337static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
e7f5b1cd 338{
2262beef
AP
339 u128 Z;
340 int cnt = 15;
341 size_t rem, nlo, nhi;
e7f5b1cd 342 const union { long one; char little; } is_endian = {1};
2262beef
AP
343
344 nlo = ((const u8 *)Xi)[15];
345 nhi = nlo>>4;
346 nlo &= 0xf;
347
348 Z.hi = Htable[nlo].hi;
349 Z.lo = Htable[nlo].lo;
e7f5b1cd
AP
350
351 while (1) {
2262beef
AP
352 rem = (size_t)Z.lo&0xf;
353 Z.lo = (Z.hi<<60)|(Z.lo>>4);
354 Z.hi = (Z.hi>>4);
355 if (sizeof(size_t)==8)
356 Z.hi ^= rem_4bit[rem];
357 else
358 Z.hi ^= (u64)rem_4bit[rem]<<32;
359
360 Z.hi ^= Htable[nhi].hi;
361 Z.lo ^= Htable[nhi].lo;
362
363 if (--cnt<0) break;
364
365 nlo = ((const u8 *)Xi)[cnt];
e7f5b1cd
AP
366 nhi = nlo>>4;
367 nlo &= 0xf;
368
2262beef
AP
369 rem = (size_t)Z.lo&0xf;
370 Z.lo = (Z.hi<<60)|(Z.lo>>4);
371 Z.hi = (Z.hi>>4);
372 if (sizeof(size_t)==8)
373 Z.hi ^= rem_4bit[rem];
374 else
375 Z.hi ^= (u64)rem_4bit[rem]<<32;
376
e7f5b1cd
AP
377 Z.hi ^= Htable[nlo].hi;
378 Z.lo ^= Htable[nlo].lo;
2262beef 379 }
e7f5b1cd 380
2262beef
AP
381 if (is_endian.little) {
382#ifdef BSWAP8
383 Xi[0] = BSWAP8(Z.hi);
384 Xi[1] = BSWAP8(Z.lo);
385#else
386 u8 *p = (u8 *)Xi;
387 u32 v;
388 v = (u32)(Z.hi>>32); PUTU32(p,v);
389 v = (u32)(Z.hi); PUTU32(p+4,v);
390 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
391 v = (u32)(Z.lo); PUTU32(p+12,v);
392#endif
393 }
394 else {
395 Xi[0] = Z.hi;
396 Xi[1] = Z.lo;
397 }
398}
399
400#if !defined(OPENSSL_SMALL_FOOTPRINT)
401/*
402 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
a595baff
AP
403 * details... Compiler-generated code doesn't seem to give any
404 * performance improvement, at least not on x86[_64]. It's here
405 * mostly as reference and a placeholder for possible future
406 * non-trivial optimization[s]...
2262beef 407 */
4f39edbf
AP
408static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409 const u8 *inp,size_t len)
2262beef
AP
410{
411 u128 Z;
412 int cnt;
413 size_t rem, nlo, nhi;
414 const union { long one; char little; } is_endian = {1};
415
e747f4d4 416#if 1
2262beef
AP
417 do {
418 cnt = 15;
419 nlo = ((const u8 *)Xi)[15];
420 nlo ^= inp[15];
421 nhi = nlo>>4;
422 nlo &= 0xf;
423
424 Z.hi = Htable[nlo].hi;
425 Z.lo = Htable[nlo].lo;
426
427 while (1) {
e7f5b1cd
AP
428 rem = (size_t)Z.lo&0xf;
429 Z.lo = (Z.hi<<60)|(Z.lo>>4);
430 Z.hi = (Z.hi>>4);
431 if (sizeof(size_t)==8)
432 Z.hi ^= rem_4bit[rem];
433 else
434 Z.hi ^= (u64)rem_4bit[rem]<<32;
435
436 Z.hi ^= Htable[nhi].hi;
437 Z.lo ^= Htable[nhi].lo;
438
2262beef 439 if (--cnt<0) break;
e7f5b1cd 440
2262beef
AP
441 nlo = ((const u8 *)Xi)[cnt];
442 nlo ^= inp[cnt];
443 nhi = nlo>>4;
444 nlo &= 0xf;
e7f5b1cd
AP
445
446 rem = (size_t)Z.lo&0xf;
447 Z.lo = (Z.hi<<60)|(Z.lo>>4);
448 Z.hi = (Z.hi>>4);
449 if (sizeof(size_t)==8)
450 Z.hi ^= rem_4bit[rem];
451 else
452 Z.hi ^= (u64)rem_4bit[rem]<<32;
2262beef
AP
453
454 Z.hi ^= Htable[nlo].hi;
455 Z.lo ^= Htable[nlo].lo;
e7f5b1cd 456 }
e747f4d4
AP
457#else
458 /*
459 * Extra 256+16 bytes per-key plus 512 bytes shared tables
460 * [should] give ~50% improvement... One could have PACK()-ed
6acb4ff3
AP
461 * the rem_8bit even here, but the priority is to minimize
462 * cache footprint...
e747f4d4
AP
463 */
464 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
465 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
03e389cf 466 __fips_constseg
e747f4d4
AP
467 static const unsigned short rem_8bit[256] = {
468 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
e747f4d4
AP
500 /*
501 * This pre-processing phase slows down procedure by approximately
502 * same time as it makes each loop spin faster. In other words
503 * single block performance is approximately same as straightforward
504 * "4-bit" implementation, and then it goes only faster...
505 */
506 for (cnt=0; cnt<16; ++cnt) {
507 Z.hi = Htable[cnt].hi;
508 Z.lo = Htable[cnt].lo;
509 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510 Hshr4[cnt].hi = (Z.hi>>4);
511 Hshl4[cnt] = (u8)(Z.lo<<4);
512 }
513
514 do {
6acb4ff3 515 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
e747f4d4
AP
516 nlo = ((const u8 *)Xi)[cnt];
517 nlo ^= inp[cnt];
518 nhi = nlo>>4;
519 nlo &= 0xf;
520
521 Z.hi ^= Htable[nlo].hi;
522 Z.lo ^= Htable[nlo].lo;
523
524 rem = (size_t)Z.lo&0xff;
525
526 Z.lo = (Z.hi<<56)|(Z.lo>>8);
527 Z.hi = (Z.hi>>8);
528
529 Z.hi ^= Hshr4[nhi].hi;
530 Z.lo ^= Hshr4[nhi].lo;
531 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
532 }
533
534 nlo = ((const u8 *)Xi)[0];
535 nlo ^= inp[0];
536 nhi = nlo>>4;
537 nlo &= 0xf;
538
539 Z.hi ^= Htable[nlo].hi;
540 Z.lo ^= Htable[nlo].lo;
541
542 rem = (size_t)Z.lo&0xf;
543
544 Z.lo = (Z.hi<<60)|(Z.lo>>4);
545 Z.hi = (Z.hi>>4);
546
547 Z.hi ^= Htable[nhi].hi;
548 Z.lo ^= Htable[nhi].lo;
549 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
550#endif
e7f5b1cd
AP
551
552 if (is_endian.little) {
553#ifdef BSWAP8
554 Xi[0] = BSWAP8(Z.hi);
555 Xi[1] = BSWAP8(Z.lo);
556#else
557 u8 *p = (u8 *)Xi;
558 u32 v;
559 v = (u32)(Z.hi>>32); PUTU32(p,v);
560 v = (u32)(Z.hi); PUTU32(p+4,v);
561 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
562 v = (u32)(Z.lo); PUTU32(p+12,v);
563#endif
564 }
565 else {
566 Xi[0] = Z.hi;
567 Xi[1] = Z.lo;
568 }
2262beef 569 } while (inp+=16, len-=16);
e7f5b1cd 570}
2262beef
AP
571#endif
572#else
4f39edbf
AP
573void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
2262beef
AP
575#endif
576
577#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
a595baff 578#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
c1f092d1 579#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
a595baff
AP
580/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581 * trashing effect. In other words idea is to hash data while it's
582 * still in L1 cache after encryption pass... */
68e2586b 583#define GHASH_CHUNK (3*1024)
a595baff 584#endif
2262beef 585
a595baff 586#else /* TABLE_BITS */
e7f5b1cd 587
2262beef 588static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
e7f5b1cd
AP
589{
590 u128 V,Z = { 0,0 };
591 long X;
592 int i,j;
593 const long *xi = (const long *)Xi;
594 const union { long one; char little; } is_endian = {1};
595
2262beef 596 V.hi = H[0]; /* H is in host byte order, no byte swapping */
e7f5b1cd
AP
597 V.lo = H[1];
598
599 for (j=0; j<16/sizeof(long); ++j) {
600 if (is_endian.little) {
601 if (sizeof(long)==8) {
602#ifdef BSWAP8
603 X = (long)(BSWAP8(xi[j]));
604#else
605 const u8 *p = (const u8 *)(xi+j);
606 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607#endif
608 }
609 else {
610 const u8 *p = (const u8 *)(xi+j);
611 X = (long)GETU32(p);
612 }
613 }
614 else
615 X = xi[j];
616
617 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618 u64 M = (u64)(X>>(8*sizeof(long)-1));
619 Z.hi ^= V.hi&M;
620 Z.lo ^= V.lo&M;
621
c1f092d1 622 REDUCE1BIT(V);
e7f5b1cd
AP
623 }
624 }
625
626 if (is_endian.little) {
627#ifdef BSWAP8
628 Xi[0] = BSWAP8(Z.hi);
629 Xi[1] = BSWAP8(Z.lo);
630#else
631 u8 *p = (u8 *)Xi;
632 u32 v;
633 v = (u32)(Z.hi>>32); PUTU32(p,v);
634 v = (u32)(Z.hi); PUTU32(p+4,v);
635 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
636 v = (u32)(Z.lo); PUTU32(p+12,v);
637#endif
638 }
639 else {
640 Xi[0] = Z.hi;
641 Xi[1] = Z.lo;
642 }
643}
2262beef 644#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
a595baff 645
e7f5b1cd
AP
646#endif
647
1e863180
AP
648#if TABLE_BITS==4 && defined(GHASH_ASM)
649# if !defined(I386_ONLY) && \
c1f092d1
AP
650 (defined(__i386) || defined(__i386__) || \
651 defined(__x86_64) || defined(__x86_64__) || \
652 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
1e863180
AP
653# define GHASH_ASM_X86_OR_64
654# define GCM_FUNCREF_4BIT
c1f092d1
AP
655extern unsigned int OPENSSL_ia32cap_P[2];
656
657void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
658void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
659void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
660
1e863180
AP
661# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
662# define GHASH_ASM_X86
c1f092d1
AP
663void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
664void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
665
666void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
667void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
1e863180
AP
668# endif
669# elif defined(__arm__) || defined(__arm)
670# include "arm_arch.h"
671# if __ARM_ARCH__>=7
672# define GHASH_ASM_ARM
673# define GCM_FUNCREF_4BIT
1e863180
AP
674void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
675void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
676# endif
c1f092d1 677# endif
c1f092d1
AP
678#endif
679
7af04002
AP
680#ifdef GCM_FUNCREF_4BIT
681# undef GCM_MUL
682# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
683# ifdef GHASH
684# undef GHASH
685# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
686# endif
687#endif
688
e7f5b1cd
AP
689void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
690{
691 const union { long one; char little; } is_endian = {1};
692
693 memset(ctx,0,sizeof(*ctx));
694 ctx->block = block;
695 ctx->key = key;
696
697 (*block)(ctx->H.c,ctx->H.c,key);
698
699 if (is_endian.little) {
700 /* H is stored in host byte order */
701#ifdef BSWAP8
702 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
703 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
704#else
705 u8 *p = ctx->H.c;
706 u64 hi,lo;
707 hi = (u64)GETU32(p) <<32|GETU32(p+4);
708 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
709 ctx->H.u[0] = hi;
710 ctx->H.u[1] = lo;
711#endif
712 }
713
a595baff
AP
714#if TABLE_BITS==8
715 gcm_init_8bit(ctx->Htable,ctx->H.u);
716#elif TABLE_BITS==4
d8d95832 717# if defined(GHASH_ASM_X86_OR_64)
a6d915e0 718# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
87873f43
AP
719 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
720 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
c1f092d1
AP
721 gcm_init_clmul(ctx->Htable,ctx->H.u);
722 ctx->gmult = gcm_gmult_clmul;
723 ctx->ghash = gcm_ghash_clmul;
724 return;
725 }
a6d915e0 726# endif
e7f5b1cd 727 gcm_init_4bit(ctx->Htable,ctx->H.u);
6acb4ff3 728# if defined(GHASH_ASM_X86) /* x86 only */
7af04002 729 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
c1f092d1
AP
730 ctx->gmult = gcm_gmult_4bit_mmx;
731 ctx->ghash = gcm_ghash_4bit_mmx;
732 } else {
733 ctx->gmult = gcm_gmult_4bit_x86;
734 ctx->ghash = gcm_ghash_4bit_x86;
735 }
736# else
737 ctx->gmult = gcm_gmult_4bit;
738 ctx->ghash = gcm_ghash_4bit;
739# endif
1e863180 740# elif defined(GHASH_ASM_ARM)
87873f43 741 if (OPENSSL_armcap_P & ARMV7_NEON) {
1e863180
AP
742 ctx->gmult = gcm_gmult_neon;
743 ctx->ghash = gcm_ghash_neon;
744 } else {
745 gcm_init_4bit(ctx->Htable,ctx->H.u);
746 ctx->gmult = gcm_gmult_4bit;
747 ctx->ghash = gcm_ghash_4bit;
748 }
c1f092d1
AP
749# else
750 gcm_init_4bit(ctx->Htable,ctx->H.u);
751# endif
a595baff 752#endif
e7f5b1cd
AP
753}
754
755void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
756{
757 const union { long one; char little; } is_endian = {1};
f472ec8c 758 unsigned int ctr;
d8d95832 759#ifdef GCM_FUNCREF_4BIT
7af04002 760 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 761#endif
e7f5b1cd
AP
762
763 ctx->Yi.u[0] = 0;
764 ctx->Yi.u[1] = 0;
765 ctx->Xi.u[0] = 0;
766 ctx->Xi.u[1] = 0;
b68c1315
AP
767 ctx->len.u[0] = 0; /* AAD length */
768 ctx->len.u[1] = 0; /* message length */
769 ctx->ares = 0;
770 ctx->mres = 0;
e7f5b1cd
AP
771
772 if (len==12) {
773 memcpy(ctx->Yi.c,iv,12);
774 ctx->Yi.c[15]=1;
f472ec8c 775 ctr=1;
e7f5b1cd
AP
776 }
777 else {
778 size_t i;
779 u64 len0 = len;
780
781 while (len>=16) {
782 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
783 GCM_MUL(ctx,Yi);
784 iv += 16;
785 len -= 16;
786 }
787 if (len) {
788 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
789 GCM_MUL(ctx,Yi);
790 }
791 len0 <<= 3;
792 if (is_endian.little) {
793#ifdef BSWAP8
794 ctx->Yi.u[1] ^= BSWAP8(len0);
795#else
796 ctx->Yi.c[8] ^= (u8)(len0>>56);
797 ctx->Yi.c[9] ^= (u8)(len0>>48);
798 ctx->Yi.c[10] ^= (u8)(len0>>40);
799 ctx->Yi.c[11] ^= (u8)(len0>>32);
800 ctx->Yi.c[12] ^= (u8)(len0>>24);
801 ctx->Yi.c[13] ^= (u8)(len0>>16);
802 ctx->Yi.c[14] ^= (u8)(len0>>8);
803 ctx->Yi.c[15] ^= (u8)(len0);
804#endif
805 }
806 else
807 ctx->Yi.u[1] ^= len0;
808
809 GCM_MUL(ctx,Yi);
810
811 if (is_endian.little)
f472ec8c 812 ctr = GETU32(ctx->Yi.c+12);
e7f5b1cd 813 else
f472ec8c 814 ctr = ctx->Yi.d[3];
e7f5b1cd
AP
815 }
816
817 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
f472ec8c 818 ++ctr;
2262beef 819 if (is_endian.little)
f472ec8c 820 PUTU32(ctx->Yi.c+12,ctr);
2262beef 821 else
f472ec8c 822 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
823}
824
1f2502eb 825int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
e7f5b1cd
AP
826{
827 size_t i;
1f2502eb
AP
828 unsigned int n;
829 u64 alen = ctx->len.u[0];
d8d95832 830#ifdef GCM_FUNCREF_4BIT
7af04002 831 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 832# ifdef GHASH
7af04002
AP
833 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
834 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
835# endif
836#endif
e7f5b1cd 837
1f2502eb
AP
838 if (ctx->len.u[1]) return -2;
839
840 alen += len;
841 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
842 return -1;
843 ctx->len.u[0] = alen;
b68c1315 844
1f2502eb 845 n = ctx->ares;
b68c1315
AP
846 if (n) {
847 while (n && len) {
848 ctx->Xi.c[n] ^= *(aad++);
849 --len;
850 n = (n+1)%16;
851 }
852 if (n==0) GCM_MUL(ctx,Xi);
853 else {
854 ctx->ares = n;
1f2502eb 855 return 0;
b68c1315
AP
856 }
857 }
e7f5b1cd 858
2262beef
AP
859#ifdef GHASH
860 if ((i = (len&(size_t)-16))) {
c1f092d1 861 GHASH(ctx,aad,i);
2262beef
AP
862 aad += i;
863 len -= i;
864 }
865#else
e7f5b1cd
AP
866 while (len>=16) {
867 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
868 GCM_MUL(ctx,Xi);
869 aad += 16;
870 len -= 16;
871 }
2262beef 872#endif
e7f5b1cd 873 if (len) {
1f2502eb 874 n = (unsigned int)len;
e7f5b1cd 875 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
e7f5b1cd 876 }
b68c1315
AP
877
878 ctx->ares = n;
1f2502eb 879 return 0;
e7f5b1cd
AP
880}
881
1f2502eb 882int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
e7f5b1cd
AP
883 const unsigned char *in, unsigned char *out,
884 size_t len)
885{
886 const union { long one; char little; } is_endian = {1};
887 unsigned int n, ctr;
888 size_t i;
3f0d1405
AP
889 u64 mlen = ctx->len.u[1];
890 block128_f block = ctx->block;
891 void *key = ctx->key;
d8d95832 892#ifdef GCM_FUNCREF_4BIT
7af04002 893 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 894# ifdef GHASH
7af04002
AP
895 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
896 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
897# endif
898#endif
1f2502eb
AP
899
900#if 0
901 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
902#endif
903 mlen += len;
904 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
905 return -1;
906 ctx->len.u[1] = mlen;
e7f5b1cd 907
b68c1315
AP
908 if (ctx->ares) {
909 /* First call to encrypt finalizes GHASH(AAD) */
910 GCM_MUL(ctx,Xi);
911 ctx->ares = 0;
912 }
913
f472ec8c
AP
914 if (is_endian.little)
915 ctr = GETU32(ctx->Yi.c+12);
916 else
917 ctr = ctx->Yi.d[3];
e7f5b1cd 918
1f2502eb 919 n = ctx->mres;
e7f5b1cd
AP
920#if !defined(OPENSSL_SMALL_FOOTPRINT)
921 if (16%sizeof(size_t) == 0) do { /* always true actually */
922 if (n) {
923 while (n && len) {
924 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
925 --len;
926 n = (n+1)%16;
927 }
928 if (n==0) GCM_MUL(ctx,Xi);
929 else {
b68c1315 930 ctx->mres = n;
1f2502eb 931 return 0;
e7f5b1cd
AP
932 }
933 }
e7f5b1cd
AP
934#if defined(STRICT_ALIGNMENT)
935 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
936 break;
937#endif
a595baff 938#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
939 while (len>=GHASH_CHUNK) {
940 size_t j=GHASH_CHUNK;
941
942 while (j) {
3f0d1405 943 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
944 ++ctr;
945 if (is_endian.little)
946 PUTU32(ctx->Yi.c+12,ctr);
947 else
948 ctx->Yi.d[3] = ctr;
2262beef
AP
949 for (i=0; i<16; i+=sizeof(size_t))
950 *(size_t *)(out+i) =
951 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
952 out += 16;
953 in += 16;
954 j -= 16;
955 }
c1f092d1 956 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
2262beef
AP
957 len -= GHASH_CHUNK;
958 }
959 if ((i = (len&(size_t)-16))) {
960 size_t j=i;
961
962 while (len>=16) {
3f0d1405 963 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
964 ++ctr;
965 if (is_endian.little)
966 PUTU32(ctx->Yi.c+12,ctr);
967 else
968 ctx->Yi.d[3] = ctr;
969 for (i=0; i<16; i+=sizeof(size_t))
970 *(size_t *)(out+i) =
971 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
972 out += 16;
973 in += 16;
974 len -= 16;
975 }
c1f092d1 976 GHASH(ctx,out-j,j);
2262beef
AP
977 }
978#else
979 while (len>=16) {
3f0d1405 980 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
981 ++ctr;
982 if (is_endian.little)
983 PUTU32(ctx->Yi.c+12,ctr);
984 else
985 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
986 for (i=0; i<16; i+=sizeof(size_t))
987 *(size_t *)(ctx->Xi.c+i) ^=
988 *(size_t *)(out+i) =
989 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
990 GCM_MUL(ctx,Xi);
991 out += 16;
992 in += 16;
993 len -= 16;
994 }
2262beef 995#endif
e7f5b1cd 996 if (len) {
3f0d1405 997 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
998 ++ctr;
999 if (is_endian.little)
1000 PUTU32(ctx->Yi.c+12,ctr);
1001 else
1002 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1003 while (len--) {
1004 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1005 ++n;
1006 }
1007 }
1008
b68c1315 1009 ctx->mres = n;
1f2502eb 1010 return 0;
e7f5b1cd
AP
1011 } while(0);
1012#endif
1013 for (i=0;i<len;++i) {
1014 if (n==0) {
3f0d1405 1015 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1016 ++ctr;
1017 if (is_endian.little)
1018 PUTU32(ctx->Yi.c+12,ctr);
1019 else
1020 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1021 }
1022 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1023 n = (n+1)%16;
1024 if (n==0)
1025 GCM_MUL(ctx,Xi);
1026 }
1027
b68c1315 1028 ctx->mres = n;
1f2502eb 1029 return 0;
e7f5b1cd
AP
1030}
1031
1f2502eb 1032int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
e7f5b1cd
AP
1033 const unsigned char *in, unsigned char *out,
1034 size_t len)
1035{
1036 const union { long one; char little; } is_endian = {1};
1037 unsigned int n, ctr;
1038 size_t i;
3f0d1405
AP
1039 u64 mlen = ctx->len.u[1];
1040 block128_f block = ctx->block;
1041 void *key = ctx->key;
d8d95832 1042#ifdef GCM_FUNCREF_4BIT
7af04002 1043 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1044# ifdef GHASH
7af04002
AP
1045 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1046 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1047# endif
1048#endif
1f2502eb
AP
1049
1050 mlen += len;
1051 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1052 return -1;
1053 ctx->len.u[1] = mlen;
e7f5b1cd 1054
b68c1315
AP
1055 if (ctx->ares) {
1056 /* First call to decrypt finalizes GHASH(AAD) */
1057 GCM_MUL(ctx,Xi);
1058 ctx->ares = 0;
1059 }
1060
f472ec8c
AP
1061 if (is_endian.little)
1062 ctr = GETU32(ctx->Yi.c+12);
1063 else
1064 ctr = ctx->Yi.d[3];
e7f5b1cd 1065
1f2502eb 1066 n = ctx->mres;
e7f5b1cd
AP
1067#if !defined(OPENSSL_SMALL_FOOTPRINT)
1068 if (16%sizeof(size_t) == 0) do { /* always true actually */
1069 if (n) {
1070 while (n && len) {
1071 u8 c = *(in++);
1072 *(out++) = c^ctx->EKi.c[n];
1073 ctx->Xi.c[n] ^= c;
1074 --len;
1075 n = (n+1)%16;
1076 }
1077 if (n==0) GCM_MUL (ctx,Xi);
1078 else {
b68c1315 1079 ctx->mres = n;
1f2502eb 1080 return 0;
e7f5b1cd
AP
1081 }
1082 }
e7f5b1cd
AP
1083#if defined(STRICT_ALIGNMENT)
1084 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1085 break;
1086#endif
a595baff 1087#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
1088 while (len>=GHASH_CHUNK) {
1089 size_t j=GHASH_CHUNK;
1090
c1f092d1 1091 GHASH(ctx,in,GHASH_CHUNK);
2262beef 1092 while (j) {
3f0d1405 1093 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1094 ++ctr;
1095 if (is_endian.little)
1096 PUTU32(ctx->Yi.c+12,ctr);
1097 else
1098 ctx->Yi.d[3] = ctr;
2262beef
AP
1099 for (i=0; i<16; i+=sizeof(size_t))
1100 *(size_t *)(out+i) =
1101 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1102 out += 16;
1103 in += 16;
1104 j -= 16;
1105 }
1106 len -= GHASH_CHUNK;
1107 }
1108 if ((i = (len&(size_t)-16))) {
c1f092d1 1109 GHASH(ctx,in,i);
2262beef 1110 while (len>=16) {
3f0d1405 1111 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1112 ++ctr;
1113 if (is_endian.little)
1114 PUTU32(ctx->Yi.c+12,ctr);
1115 else
1116 ctx->Yi.d[3] = ctr;
1117 for (i=0; i<16; i+=sizeof(size_t))
1118 *(size_t *)(out+i) =
1119 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1120 out += 16;
1121 in += 16;
1122 len -= 16;
1123 }
1124 }
1125#else
1126 while (len>=16) {
3f0d1405 1127 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1128 ++ctr;
1129 if (is_endian.little)
1130 PUTU32(ctx->Yi.c+12,ctr);
1131 else
1132 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1133 for (i=0; i<16; i+=sizeof(size_t)) {
1134 size_t c = *(size_t *)(in+i);
1135 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1136 *(size_t *)(ctx->Xi.c+i) ^= c;
1137 }
2262beef 1138 GCM_MUL(ctx,Xi);
e7f5b1cd
AP
1139 out += 16;
1140 in += 16;
1141 len -= 16;
1142 }
2262beef 1143#endif
e7f5b1cd 1144 if (len) {
3f0d1405 1145 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1146 ++ctr;
1147 if (is_endian.little)
1148 PUTU32(ctx->Yi.c+12,ctr);
1149 else
1150 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1151 while (len--) {
1152 u8 c = in[n];
1153 ctx->Xi.c[n] ^= c;
1154 out[n] = c^ctx->EKi.c[n];
1155 ++n;
1156 }
1157 }
1158
b68c1315 1159 ctx->mres = n;
1f2502eb 1160 return 0;
e7f5b1cd
AP
1161 } while(0);
1162#endif
1163 for (i=0;i<len;++i) {
1164 u8 c;
1165 if (n==0) {
3f0d1405 1166 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1167 ++ctr;
1168 if (is_endian.little)
1169 PUTU32(ctx->Yi.c+12,ctr);
1170 else
1171 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1172 }
1173 c = in[i];
68e2586b 1174 out[i] = c^ctx->EKi.c[n];
e7f5b1cd
AP
1175 ctx->Xi.c[n] ^= c;
1176 n = (n+1)%16;
1177 if (n==0)
1178 GCM_MUL(ctx,Xi);
1179 }
1180
b68c1315 1181 ctx->mres = n;
1f2502eb 1182 return 0;
e7f5b1cd
AP
1183}
1184
1f2502eb 1185int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
f71c6ace
AP
1186 const unsigned char *in, unsigned char *out,
1187 size_t len, ctr128_f stream)
1188{
1189 const union { long one; char little; } is_endian = {1};
1190 unsigned int n, ctr;
1191 size_t i;
3f0d1405
AP
1192 u64 mlen = ctx->len.u[1];
1193 void *key = ctx->key;
d8d95832 1194#ifdef GCM_FUNCREF_4BIT
7af04002 1195 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1196# ifdef GHASH
7af04002
AP
1197 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1198 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1199# endif
1200#endif
1f2502eb
AP
1201
1202 mlen += len;
1203 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1204 return -1;
1205 ctx->len.u[1] = mlen;
f71c6ace 1206
b68c1315
AP
1207 if (ctx->ares) {
1208 /* First call to encrypt finalizes GHASH(AAD) */
1209 GCM_MUL(ctx,Xi);
1210 ctx->ares = 0;
1211 }
1212
f71c6ace
AP
1213 if (is_endian.little)
1214 ctr = GETU32(ctx->Yi.c+12);
1215 else
1216 ctr = ctx->Yi.d[3];
1217
1f2502eb 1218 n = ctx->mres;
f71c6ace
AP
1219 if (n) {
1220 while (n && len) {
1221 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1222 --len;
1223 n = (n+1)%16;
1224 }
1225 if (n==0) GCM_MUL(ctx,Xi);
1226 else {
b68c1315 1227 ctx->mres = n;
1f2502eb 1228 return 0;
f71c6ace
AP
1229 }
1230 }
1231#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1232 while (len>=GHASH_CHUNK) {
3f0d1405 1233 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
f71c6ace
AP
1234 ctr += GHASH_CHUNK/16;
1235 if (is_endian.little)
1236 PUTU32(ctx->Yi.c+12,ctr);
1237 else
1238 ctx->Yi.d[3] = ctr;
1239 GHASH(ctx,out,GHASH_CHUNK);
1240 out += GHASH_CHUNK;
1241 in += GHASH_CHUNK;
1242 len -= GHASH_CHUNK;
1243 }
1244#endif
1245 if ((i = (len&(size_t)-16))) {
1246 size_t j=i/16;
1247
3f0d1405 1248 (*stream)(in,out,j,key,ctx->Yi.c);
68e2586b 1249 ctr += (unsigned int)j;
f71c6ace
AP
1250 if (is_endian.little)
1251 PUTU32(ctx->Yi.c+12,ctr);
1252 else
1253 ctx->Yi.d[3] = ctr;
1254 in += i;
1255 len -= i;
1256#if defined(GHASH)
1257 GHASH(ctx,out,i);
1258 out += i;
1259#else
1260 while (j--) {
1261 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1262 GCM_MUL(ctx,Xi);
1263 out += 16;
1264 }
1265#endif
1266 }
1267 if (len) {
3f0d1405 1268 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
f71c6ace
AP
1269 ++ctr;
1270 if (is_endian.little)
1271 PUTU32(ctx->Yi.c+12,ctr);
1272 else
1273 ctx->Yi.d[3] = ctr;
1274 while (len--) {
1275 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1276 ++n;
1277 }
1278 }
1279
b68c1315 1280 ctx->mres = n;
1f2502eb 1281 return 0;
f71c6ace
AP
1282}
1283
1f2502eb 1284int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
f71c6ace
AP
1285 const unsigned char *in, unsigned char *out,
1286 size_t len,ctr128_f stream)
1287{
1288 const union { long one; char little; } is_endian = {1};
1289 unsigned int n, ctr;
1290 size_t i;
3f0d1405
AP
1291 u64 mlen = ctx->len.u[1];
1292 void *key = ctx->key;
d8d95832 1293#ifdef GCM_FUNCREF_4BIT
7af04002 1294 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1295# ifdef GHASH
7af04002
AP
1296 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1297 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1298# endif
1299#endif
1f2502eb
AP
1300
1301 mlen += len;
1302 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1303 return -1;
1304 ctx->len.u[1] = mlen;
f71c6ace 1305
b68c1315
AP
1306 if (ctx->ares) {
1307 /* First call to decrypt finalizes GHASH(AAD) */
1308 GCM_MUL(ctx,Xi);
1309 ctx->ares = 0;
1310 }
1311
f71c6ace
AP
1312 if (is_endian.little)
1313 ctr = GETU32(ctx->Yi.c+12);
1314 else
1315 ctr = ctx->Yi.d[3];
1316
1f2502eb 1317 n = ctx->mres;
f71c6ace
AP
1318 if (n) {
1319 while (n && len) {
1320 u8 c = *(in++);
1321 *(out++) = c^ctx->EKi.c[n];
1322 ctx->Xi.c[n] ^= c;
1323 --len;
1324 n = (n+1)%16;
1325 }
1326 if (n==0) GCM_MUL (ctx,Xi);
1327 else {
b68c1315 1328 ctx->mres = n;
1f2502eb 1329 return 0;
f71c6ace
AP
1330 }
1331 }
1332#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1333 while (len>=GHASH_CHUNK) {
1334 GHASH(ctx,in,GHASH_CHUNK);
3f0d1405 1335 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
f71c6ace
AP
1336 ctr += GHASH_CHUNK/16;
1337 if (is_endian.little)
1338 PUTU32(ctx->Yi.c+12,ctr);
1339 else
1340 ctx->Yi.d[3] = ctr;
1341 out += GHASH_CHUNK;
1342 in += GHASH_CHUNK;
1343 len -= GHASH_CHUNK;
1344 }
1345#endif
1346 if ((i = (len&(size_t)-16))) {
1347 size_t j=i/16;
1348
1349#if defined(GHASH)
1350 GHASH(ctx,in,i);
1351#else
1352 while (j--) {
1353 size_t k;
1354 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1355 GCM_MUL(ctx,Xi);
1356 in += 16;
1357 }
1358 j = i/16;
1359 in -= i;
1360#endif
3f0d1405 1361 (*stream)(in,out,j,key,ctx->Yi.c);
68e2586b 1362 ctr += (unsigned int)j;
f71c6ace
AP
1363 if (is_endian.little)
1364 PUTU32(ctx->Yi.c+12,ctr);
1365 else
1366 ctx->Yi.d[3] = ctr;
1367 out += i;
1368 in += i;
1369 len -= i;
1370 }
1371 if (len) {
3f0d1405 1372 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
f71c6ace
AP
1373 ++ctr;
1374 if (is_endian.little)
1375 PUTU32(ctx->Yi.c+12,ctr);
1376 else
1377 ctx->Yi.d[3] = ctr;
1378 while (len--) {
1379 u8 c = in[n];
1380 ctx->Xi.c[n] ^= c;
1381 out[n] = c^ctx->EKi.c[n];
1382 ++n;
1383 }
1384 }
1385
b68c1315 1386 ctx->mres = n;
1f2502eb 1387 return 0;
f71c6ace
AP
1388}
1389
6acb4ff3
AP
1390int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1391 size_t len)
e7f5b1cd
AP
1392{
1393 const union { long one; char little; } is_endian = {1};
1394 u64 alen = ctx->len.u[0]<<3;
1395 u64 clen = ctx->len.u[1]<<3;
d8d95832 1396#ifdef GCM_FUNCREF_4BIT
7af04002 1397 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1398#endif
e7f5b1cd 1399
b68c1315 1400 if (ctx->mres)
e7f5b1cd
AP
1401 GCM_MUL(ctx,Xi);
1402
1403 if (is_endian.little) {
1404#ifdef BSWAP8
1405 alen = BSWAP8(alen);
1406 clen = BSWAP8(clen);
1407#else
1408 u8 *p = ctx->len.c;
1409
1410 ctx->len.u[0] = alen;
1411 ctx->len.u[1] = clen;
1412
1413 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1414 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1415#endif
1416 }
1417
1418 ctx->Xi.u[0] ^= alen;
1419 ctx->Xi.u[1] ^= clen;
1420 GCM_MUL(ctx,Xi);
1421
1422 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1423 ctx->Xi.u[1] ^= ctx->EK0.u[1];
6acb4ff3
AP
1424
1425 if (tag && len<=sizeof(ctx->Xi))
1426 return memcmp(ctx->Xi.c,tag,len);
1427 else
1428 return -1;
1429}
1430
fd3dbc1d
DSH
1431void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1432{
1433 CRYPTO_gcm128_finish(ctx, NULL, 0);
1f2502eb 1434 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
fd3dbc1d
DSH
1435}
1436
6acb4ff3
AP
1437GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1438{
1439 GCM128_CONTEXT *ret;
1440
1441 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1442 CRYPTO_gcm128_init(ret,key,block);
1443
1444 return ret;
1445}
1446
1447void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1448{
1449 if (ctx) {
1450 OPENSSL_cleanse(ctx,sizeof(*ctx));
1451 OPENSSL_free(ctx);
1452 }
e7f5b1cd
AP
1453}
1454
1455#if defined(SELFTEST)
1456#include <stdio.h>
1457#include <openssl/aes.h>
1458
1459/* Test Case 1 */
1460static const u8 K1[16],
1461 *P1=NULL,
1462 *A1=NULL,
1463 IV1[12],
1464 *C1=NULL,
1465 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
a595baff 1466
e7f5b1cd
AP
1467/* Test Case 2 */
1468#define K2 K1
1469#define A2 A1
1470#define IV2 IV1
1471static const u8 P2[16],
1472 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1473 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1474
1475/* Test Case 3 */
1476#define A3 A2
1477static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1478 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1479 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1480 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1481 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1482 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1483 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1484 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1485 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1486 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
fb2d5a91 1487 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
e7f5b1cd
AP
1488
1489/* Test Case 4 */
1490#define K4 K3
1491#define IV4 IV3
1492static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1493 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1494 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1495 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1496 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1497 0xab,0xad,0xda,0xd2},
1498 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1499 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1500 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1501 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1502 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1503
1504/* Test Case 5 */
1505#define K5 K4
1506#define P5 P4
d8d95832
AP
1507#define A5 A4
1508static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
e7f5b1cd
AP
1509 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1510 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1511 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1512 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1513 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
a595baff 1514
e7f5b1cd
AP
1515/* Test Case 6 */
1516#define K6 K5
1517#define P6 P5
1518#define A6 A5
1519static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1520 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1521 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1522 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1523 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1524 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1525 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1526 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1527 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1528
1529/* Test Case 7 */
1530static const u8 K7[24],
1531 *P7=NULL,
1532 *A7=NULL,
1533 IV7[12],
1534 *C7=NULL,
1535 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1536
1537/* Test Case 8 */
1538#define K8 K7
1539#define IV8 IV7
1540#define A8 A7
1541static const u8 P8[16],
1542 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1543 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1544
1545/* Test Case 9 */
1546#define A9 A8
1547static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1548 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1549 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1550 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1551 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1552 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1553 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1554 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1555 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1556 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1557 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1558 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1559
1560/* Test Case 10 */
1561#define K10 K9
1562#define IV10 IV9
1563static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1564 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1565 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1566 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1567 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1568 0xab,0xad,0xda,0xd2},
1569 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1570 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1571 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1572 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1573 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1574
1575/* Test Case 11 */
1576#define K11 K10
1577#define P11 P10
1578#define A11 A10
1579static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1580 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1581 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1582 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1583 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1584 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1585
1586/* Test Case 12 */
1587#define K12 K11
1588#define P12 P11
1589#define A12 A11
1590static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1591 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1592 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1593 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1594 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1595 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1596 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1597 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1598 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1599
1600/* Test Case 13 */
1601static const u8 K13[32],
1602 *P13=NULL,
1603 *A13=NULL,
1604 IV13[12],
1605 *C13=NULL,
1606 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1607
1608/* Test Case 14 */
1609#define K14 K13
1610#define A14 A13
1611static const u8 P14[16],
1612 IV14[12],
1613 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1614 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1615
1616/* Test Case 15 */
1617#define A15 A14
1618static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1619 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1620 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1621 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1622 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1623 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1624 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1625 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1626 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1627 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1628 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1629 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1630
1631/* Test Case 16 */
1632#define K16 K15
1633#define IV16 IV15
1634static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1635 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1636 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1637 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1638 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1639 0xab,0xad,0xda,0xd2},
1640 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1641 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1642 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1643 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1644 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1645
1646/* Test Case 17 */
1647#define K17 K16
1648#define P17 P16
1649#define A17 A16
1650static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1651 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1652 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1653 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1654 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1655 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1656
1657/* Test Case 18 */
1658#define K18 K17
1659#define P18 P17
1660#define A18 A17
1661static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1662 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1663 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1664 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1665 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1666 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1667 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1668 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1669 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1670
1671#define TEST_CASE(n) do { \
1672 u8 out[sizeof(P##n)]; \
1673 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1674 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1675 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
68e2586b 1676 memset(out,0,sizeof(out)); \
e7f5b1cd
AP
1677 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1678 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
6acb4ff3
AP
1679 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1680 (C##n && memcmp(out,C##n,sizeof(out)))) \
68e2586b 1681 ret++, printf ("encrypt test#%d failed.\n",n); \
e7f5b1cd 1682 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
68e2586b 1683 memset(out,0,sizeof(out)); \
e7f5b1cd
AP
1684 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1685 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
6acb4ff3
AP
1686 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1687 (P##n && memcmp(out,P##n,sizeof(out)))) \
1688 ret++, printf ("decrypt test#%d failed.\n",n); \
e7f5b1cd
AP
1689 } while(0)
1690
1691int main()
1692{
1693 GCM128_CONTEXT ctx;
1694 AES_KEY key;
1695 int ret=0;
1696
1697 TEST_CASE(1);
1698 TEST_CASE(2);
1699 TEST_CASE(3);
1700 TEST_CASE(4);
1701 TEST_CASE(5);
1702 TEST_CASE(6);
1703 TEST_CASE(7);
1704 TEST_CASE(8);
1705 TEST_CASE(9);
1706 TEST_CASE(10);
1707 TEST_CASE(11);
1708 TEST_CASE(12);
1709 TEST_CASE(13);
1710 TEST_CASE(14);
1711 TEST_CASE(15);
1712 TEST_CASE(16);
1713 TEST_CASE(17);
1714 TEST_CASE(18);
1715
a595baff 1716#ifdef OPENSSL_CPUID_OBJ
2262beef
AP
1717 {
1718 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1719 union { u64 u; u8 c[1024]; } buf;
c1f092d1 1720 int i;
2262beef
AP
1721
1722 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1723 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1724 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1725
1726 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1727 start = OPENSSL_rdtsc();
1728 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1729 gcm_t = OPENSSL_rdtsc() - start;
1730
1731 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
b68c1315 1732 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
2262beef
AP
1733 (block128_f)AES_encrypt);
1734 start = OPENSSL_rdtsc();
1735 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
b68c1315 1736 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
a595baff 1737 (block128_f)AES_encrypt);
2262beef
AP
1738 ctr_t = OPENSSL_rdtsc() - start;
1739
1740 printf("%.2f-%.2f=%.2f\n",
1741 gcm_t/(double)sizeof(buf),
1742 ctr_t/(double)sizeof(buf),
1743 (gcm_t-ctr_t)/(double)sizeof(buf));
a595baff 1744#ifdef GHASH
c1f092d1 1745 GHASH(&ctx,buf.c,sizeof(buf));
a595baff 1746 start = OPENSSL_rdtsc();
c1f092d1 1747 for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
a595baff 1748 gcm_t = OPENSSL_rdtsc() - start;
c1f092d1 1749 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
a595baff 1750#endif
2262beef 1751 }
a595baff 1752#endif
2262beef 1753
e7f5b1cd
AP
1754 return ret;
1755}
1756#endif