]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/gcm128.c
New config module for string tables. This can be used to add new
[thirdparty/openssl.git] / crypto / modes / gcm128.c
CommitLineData
e7f5b1cd
AP
1/* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
f4001a0d
DSH
50#define OPENSSL_FIPSAPI
51
aa763c0f 52#include <openssl/crypto.h>
f472ec8c 53#include "modes_lcl.h"
e7f5b1cd
AP
54#include <string.h>
55
56#ifndef MODES_DEBUG
57# ifndef NDEBUG
58# define NDEBUG
59# endif
60#endif
61#include <assert.h>
62
f472ec8c
AP
63#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64/* redefine, because alignment is ensured */
65#undef GETU32
66#define GETU32(p) BSWAP4(*(const u32 *)(p))
67#undef PUTU32
68#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
e7f5b1cd
AP
69#endif
70
c1f092d1
AP
71#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72#define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83} while(0)
84
d8d95832
AP
85/*
86 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
87 * never be set to 8. 8 is effectively reserved for testing purposes.
88 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
89 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
90 * whole spectrum of possible table driven implementations. Why? In
91 * non-"Shoup's" case memory access pattern is segmented in such manner,
92 * that it's trivial to see that cache timing information can reveal
93 * fair portion of intermediate hash value. Given that ciphertext is
94 * always available to attacker, it's possible for him to attempt to
95 * deduce secret parameter H and if successful, tamper with messages
96 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
97 * not as trivial, but there is no reason to believe that it's resistant
98 * to cache-timing attack. And the thing about "8-bit" implementation is
99 * that it consumes 16 (sixteen) times more memory, 4KB per individual
100 * key + 1KB shared. Well, on pros side it should be twice as fast as
101 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
102 * was observed to run ~75% faster, closer to 100% for commercial
103 * compilers... Yet "4-bit" procedure is preferred, because it's
104 * believed to provide better security-performance balance and adequate
105 * all-round performance. "All-round" refers to things like:
106 *
107 * - shorter setup time effectively improves overall timing for
108 * handling short messages;
109 * - larger table allocation can become unbearable because of VM
110 * subsystem penalties (for example on Windows large enough free
111 * results in VM working set trimming, meaning that consequent
112 * malloc would immediately incur working set expansion);
113 * - larger table has larger cache footprint, which can affect
114 * performance of other code paths (not necessarily even from same
115 * thread in Hyper-Threading world);
116 *
117 * Value of 1 is not appropriate for performance reasons.
118 */
a595baff
AP
119#if TABLE_BITS==8
120
e7f5b1cd
AP
121static void gcm_init_8bit(u128 Htable[256], u64 H[2])
122{
123 int i, j;
124 u128 V;
125
126 Htable[0].hi = 0;
127 Htable[0].lo = 0;
128 V.hi = H[0];
129 V.lo = H[1];
130
131 for (Htable[128]=V, i=64; i>0; i>>=1) {
c1f092d1 132 REDUCE1BIT(V);
e7f5b1cd
AP
133 Htable[i] = V;
134 }
135
136 for (i=2; i<256; i<<=1) {
137 u128 *Hi = Htable+i, H0 = *Hi;
138 for (j=1; j<i; ++j) {
139 Hi[j].hi = H0.hi^Htable[j].hi;
140 Hi[j].lo = H0.lo^Htable[j].lo;
141 }
142 }
143}
144
d8d95832 145static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
e7f5b1cd
AP
146{
147 u128 Z = { 0, 0};
148 const u8 *xi = (const u8 *)Xi+15;
149 size_t rem, n = *xi;
150 const union { long one; char little; } is_endian = {1};
03e389cf 151 __fips_constseg
e7f5b1cd
AP
152 static const size_t rem_8bit[256] = {
153 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
154 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
155 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
156 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
157 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
158 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
159 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
160 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
161 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
162 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
163 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
164 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
165 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
166 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
167 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
168 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
169 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
170 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
171 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
172 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
173 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
174 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
175 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
176 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
177 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
178 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
179 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
180 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
181 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
182 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
183 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
184 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
185 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
186 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
187 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
188 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
189 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
190 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
191 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
192 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
193 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
194 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
195 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
196 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
197 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
198 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
199 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
200 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
201 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
202 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
203 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
204 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
205 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
206 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
207 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
208 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
209 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
210 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
211 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
212 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
213 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
214 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
215 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
216 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
217
218 while (1) {
219 Z.hi ^= Htable[n].hi;
220 Z.lo ^= Htable[n].lo;
221
222 if ((u8 *)Xi==xi) break;
223
224 n = *(--xi);
225
226 rem = (size_t)Z.lo&0xff;
227 Z.lo = (Z.hi<<56)|(Z.lo>>8);
228 Z.hi = (Z.hi>>8);
229 if (sizeof(size_t)==8)
230 Z.hi ^= rem_8bit[rem];
231 else
232 Z.hi ^= (u64)rem_8bit[rem]<<32;
233 }
234
235 if (is_endian.little) {
236#ifdef BSWAP8
237 Xi[0] = BSWAP8(Z.hi);
238 Xi[1] = BSWAP8(Z.lo);
239#else
240 u8 *p = (u8 *)Xi;
241 u32 v;
242 v = (u32)(Z.hi>>32); PUTU32(p,v);
243 v = (u32)(Z.hi); PUTU32(p+4,v);
244 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
245 v = (u32)(Z.lo); PUTU32(p+12,v);
246#endif
247 }
248 else {
249 Xi[0] = Z.hi;
250 Xi[1] = Z.lo;
251 }
252}
a595baff 253#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
e7f5b1cd 254
a595baff 255#elif TABLE_BITS==4
2262beef 256
e7f5b1cd
AP
257static void gcm_init_4bit(u128 Htable[16], u64 H[2])
258{
e7f5b1cd 259 u128 V;
f472ec8c
AP
260#if defined(OPENSSL_SMALL_FOOTPRINT)
261 int i;
262#endif
e7f5b1cd
AP
263
264 Htable[0].hi = 0;
265 Htable[0].lo = 0;
266 V.hi = H[0];
267 V.lo = H[1];
268
f472ec8c 269#if defined(OPENSSL_SMALL_FOOTPRINT)
e7f5b1cd 270 for (Htable[8]=V, i=4; i>0; i>>=1) {
c1f092d1 271 REDUCE1BIT(V);
e7f5b1cd
AP
272 Htable[i] = V;
273 }
274
275 for (i=2; i<16; i<<=1) {
2262beef
AP
276 u128 *Hi = Htable+i;
277 int j;
278 for (V=*Hi, j=1; j<i; ++j) {
279 Hi[j].hi = V.hi^Htable[j].hi;
280 Hi[j].lo = V.lo^Htable[j].lo;
e7f5b1cd
AP
281 }
282 }
2262beef 283#else
f472ec8c 284 Htable[8] = V;
c1f092d1 285 REDUCE1BIT(V);
f472ec8c 286 Htable[4] = V;
c1f092d1 287 REDUCE1BIT(V);
f472ec8c 288 Htable[2] = V;
c1f092d1 289 REDUCE1BIT(V);
f472ec8c 290 Htable[1] = V;
2262beef
AP
291 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
292 V=Htable[4];
293 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
294 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
295 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
296 V=Htable[8];
297 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
298 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
299 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
300 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
301 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
302 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
303 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
304#endif
f472ec8c
AP
305#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
306 /*
307 * ARM assembler expects specific dword order in Htable.
308 */
309 {
310 int j;
311 const union { long one; char little; } is_endian = {1};
312
313 if (is_endian.little)
314 for (j=0;j<16;++j) {
315 V = Htable[j];
316 Htable[j].hi = V.lo;
317 Htable[j].lo = V.hi;
318 }
319 else
320 for (j=0;j<16;++j) {
321 V = Htable[j];
322 Htable[j].hi = V.lo<<32|V.lo>>32;
323 Htable[j].lo = V.hi<<32|V.hi>>32;
324 }
325 }
326#endif
e7f5b1cd
AP
327}
328
a595baff 329#ifndef GHASH_ASM
03e389cf 330__fips_constseg
2262beef
AP
331static const size_t rem_4bit[16] = {
332 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
333 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
334 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
335 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
336
4f39edbf 337static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
e7f5b1cd 338{
2262beef
AP
339 u128 Z;
340 int cnt = 15;
341 size_t rem, nlo, nhi;
e7f5b1cd 342 const union { long one; char little; } is_endian = {1};
2262beef
AP
343
344 nlo = ((const u8 *)Xi)[15];
345 nhi = nlo>>4;
346 nlo &= 0xf;
347
348 Z.hi = Htable[nlo].hi;
349 Z.lo = Htable[nlo].lo;
e7f5b1cd
AP
350
351 while (1) {
2262beef
AP
352 rem = (size_t)Z.lo&0xf;
353 Z.lo = (Z.hi<<60)|(Z.lo>>4);
354 Z.hi = (Z.hi>>4);
355 if (sizeof(size_t)==8)
356 Z.hi ^= rem_4bit[rem];
357 else
358 Z.hi ^= (u64)rem_4bit[rem]<<32;
359
360 Z.hi ^= Htable[nhi].hi;
361 Z.lo ^= Htable[nhi].lo;
362
363 if (--cnt<0) break;
364
365 nlo = ((const u8 *)Xi)[cnt];
e7f5b1cd
AP
366 nhi = nlo>>4;
367 nlo &= 0xf;
368
2262beef
AP
369 rem = (size_t)Z.lo&0xf;
370 Z.lo = (Z.hi<<60)|(Z.lo>>4);
371 Z.hi = (Z.hi>>4);
372 if (sizeof(size_t)==8)
373 Z.hi ^= rem_4bit[rem];
374 else
375 Z.hi ^= (u64)rem_4bit[rem]<<32;
376
e7f5b1cd
AP
377 Z.hi ^= Htable[nlo].hi;
378 Z.lo ^= Htable[nlo].lo;
2262beef 379 }
e7f5b1cd 380
2262beef
AP
381 if (is_endian.little) {
382#ifdef BSWAP8
383 Xi[0] = BSWAP8(Z.hi);
384 Xi[1] = BSWAP8(Z.lo);
385#else
386 u8 *p = (u8 *)Xi;
387 u32 v;
388 v = (u32)(Z.hi>>32); PUTU32(p,v);
389 v = (u32)(Z.hi); PUTU32(p+4,v);
390 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
391 v = (u32)(Z.lo); PUTU32(p+12,v);
392#endif
393 }
394 else {
395 Xi[0] = Z.hi;
396 Xi[1] = Z.lo;
397 }
398}
399
400#if !defined(OPENSSL_SMALL_FOOTPRINT)
401/*
402 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
a595baff
AP
403 * details... Compiler-generated code doesn't seem to give any
404 * performance improvement, at least not on x86[_64]. It's here
405 * mostly as reference and a placeholder for possible future
406 * non-trivial optimization[s]...
2262beef 407 */
4f39edbf
AP
408static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
409 const u8 *inp,size_t len)
2262beef
AP
410{
411 u128 Z;
412 int cnt;
413 size_t rem, nlo, nhi;
414 const union { long one; char little; } is_endian = {1};
415
e747f4d4 416#if 1
2262beef
AP
417 do {
418 cnt = 15;
419 nlo = ((const u8 *)Xi)[15];
420 nlo ^= inp[15];
421 nhi = nlo>>4;
422 nlo &= 0xf;
423
424 Z.hi = Htable[nlo].hi;
425 Z.lo = Htable[nlo].lo;
426
427 while (1) {
e7f5b1cd
AP
428 rem = (size_t)Z.lo&0xf;
429 Z.lo = (Z.hi<<60)|(Z.lo>>4);
430 Z.hi = (Z.hi>>4);
431 if (sizeof(size_t)==8)
432 Z.hi ^= rem_4bit[rem];
433 else
434 Z.hi ^= (u64)rem_4bit[rem]<<32;
435
436 Z.hi ^= Htable[nhi].hi;
437 Z.lo ^= Htable[nhi].lo;
438
2262beef 439 if (--cnt<0) break;
e7f5b1cd 440
2262beef
AP
441 nlo = ((const u8 *)Xi)[cnt];
442 nlo ^= inp[cnt];
443 nhi = nlo>>4;
444 nlo &= 0xf;
e7f5b1cd
AP
445
446 rem = (size_t)Z.lo&0xf;
447 Z.lo = (Z.hi<<60)|(Z.lo>>4);
448 Z.hi = (Z.hi>>4);
449 if (sizeof(size_t)==8)
450 Z.hi ^= rem_4bit[rem];
451 else
452 Z.hi ^= (u64)rem_4bit[rem]<<32;
2262beef
AP
453
454 Z.hi ^= Htable[nlo].hi;
455 Z.lo ^= Htable[nlo].lo;
e7f5b1cd 456 }
e747f4d4
AP
457#else
458 /*
459 * Extra 256+16 bytes per-key plus 512 bytes shared tables
460 * [should] give ~50% improvement... One could have PACK()-ed
6acb4ff3
AP
461 * the rem_8bit even here, but the priority is to minimize
462 * cache footprint...
e747f4d4
AP
463 */
464 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
465 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
03e389cf 466 __fips_constseg
e747f4d4
AP
467 static const unsigned short rem_8bit[256] = {
468 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
e747f4d4
AP
500 /*
501 * This pre-processing phase slows down procedure by approximately
502 * same time as it makes each loop spin faster. In other words
503 * single block performance is approximately same as straightforward
504 * "4-bit" implementation, and then it goes only faster...
505 */
506 for (cnt=0; cnt<16; ++cnt) {
507 Z.hi = Htable[cnt].hi;
508 Z.lo = Htable[cnt].lo;
509 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510 Hshr4[cnt].hi = (Z.hi>>4);
511 Hshl4[cnt] = (u8)(Z.lo<<4);
512 }
513
514 do {
6acb4ff3 515 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
e747f4d4
AP
516 nlo = ((const u8 *)Xi)[cnt];
517 nlo ^= inp[cnt];
518 nhi = nlo>>4;
519 nlo &= 0xf;
520
521 Z.hi ^= Htable[nlo].hi;
522 Z.lo ^= Htable[nlo].lo;
523
524 rem = (size_t)Z.lo&0xff;
525
526 Z.lo = (Z.hi<<56)|(Z.lo>>8);
527 Z.hi = (Z.hi>>8);
528
529 Z.hi ^= Hshr4[nhi].hi;
530 Z.lo ^= Hshr4[nhi].lo;
531 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
532 }
533
534 nlo = ((const u8 *)Xi)[0];
535 nlo ^= inp[0];
536 nhi = nlo>>4;
537 nlo &= 0xf;
538
539 Z.hi ^= Htable[nlo].hi;
540 Z.lo ^= Htable[nlo].lo;
541
542 rem = (size_t)Z.lo&0xf;
543
544 Z.lo = (Z.hi<<60)|(Z.lo>>4);
545 Z.hi = (Z.hi>>4);
546
547 Z.hi ^= Htable[nhi].hi;
548 Z.lo ^= Htable[nhi].lo;
549 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
550#endif
e7f5b1cd
AP
551
552 if (is_endian.little) {
553#ifdef BSWAP8
554 Xi[0] = BSWAP8(Z.hi);
555 Xi[1] = BSWAP8(Z.lo);
556#else
557 u8 *p = (u8 *)Xi;
558 u32 v;
559 v = (u32)(Z.hi>>32); PUTU32(p,v);
560 v = (u32)(Z.hi); PUTU32(p+4,v);
561 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
562 v = (u32)(Z.lo); PUTU32(p+12,v);
563#endif
564 }
565 else {
566 Xi[0] = Z.hi;
567 Xi[1] = Z.lo;
568 }
2262beef 569 } while (inp+=16, len-=16);
e7f5b1cd 570}
2262beef
AP
571#endif
572#else
4f39edbf
AP
573void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
2262beef
AP
575#endif
576
577#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
a595baff 578#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
c1f092d1 579#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
a595baff
AP
580/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581 * trashing effect. In other words idea is to hash data while it's
582 * still in L1 cache after encryption pass... */
68e2586b 583#define GHASH_CHUNK (3*1024)
a595baff 584#endif
2262beef 585
a595baff 586#else /* TABLE_BITS */
e7f5b1cd 587
2262beef 588static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
e7f5b1cd
AP
589{
590 u128 V,Z = { 0,0 };
591 long X;
592 int i,j;
593 const long *xi = (const long *)Xi;
594 const union { long one; char little; } is_endian = {1};
595
2262beef 596 V.hi = H[0]; /* H is in host byte order, no byte swapping */
e7f5b1cd
AP
597 V.lo = H[1];
598
599 for (j=0; j<16/sizeof(long); ++j) {
600 if (is_endian.little) {
601 if (sizeof(long)==8) {
602#ifdef BSWAP8
603 X = (long)(BSWAP8(xi[j]));
604#else
605 const u8 *p = (const u8 *)(xi+j);
606 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607#endif
608 }
609 else {
610 const u8 *p = (const u8 *)(xi+j);
611 X = (long)GETU32(p);
612 }
613 }
614 else
615 X = xi[j];
616
617 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618 u64 M = (u64)(X>>(8*sizeof(long)-1));
619 Z.hi ^= V.hi&M;
620 Z.lo ^= V.lo&M;
621
c1f092d1 622 REDUCE1BIT(V);
e7f5b1cd
AP
623 }
624 }
625
626 if (is_endian.little) {
627#ifdef BSWAP8
628 Xi[0] = BSWAP8(Z.hi);
629 Xi[1] = BSWAP8(Z.lo);
630#else
631 u8 *p = (u8 *)Xi;
632 u32 v;
633 v = (u32)(Z.hi>>32); PUTU32(p,v);
634 v = (u32)(Z.hi); PUTU32(p+4,v);
635 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
636 v = (u32)(Z.lo); PUTU32(p+12,v);
637#endif
638 }
639 else {
640 Xi[0] = Z.hi;
641 Xi[1] = Z.lo;
642 }
643}
2262beef 644#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
a595baff 645
e7f5b1cd
AP
646#endif
647
1e863180
AP
648#if TABLE_BITS==4 && defined(GHASH_ASM)
649# if !defined(I386_ONLY) && \
c1f092d1
AP
650 (defined(__i386) || defined(__i386__) || \
651 defined(__x86_64) || defined(__x86_64__) || \
652 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
1e863180
AP
653# define GHASH_ASM_X86_OR_64
654# define GCM_FUNCREF_4BIT
c1f092d1
AP
655extern unsigned int OPENSSL_ia32cap_P[2];
656
657void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
658void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
659void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
660
1e863180
AP
661# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
662# define GHASH_ASM_X86
c1f092d1
AP
663void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
664void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
665
666void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
667void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
1e863180
AP
668# endif
669# elif defined(__arm__) || defined(__arm)
670# include "arm_arch.h"
671# if __ARM_ARCH__>=7
672# define GHASH_ASM_ARM
673# define GCM_FUNCREF_4BIT
1e863180
AP
674void gcm_gmult_neon(u64 Xi[2],const u128 Htable[16]);
675void gcm_ghash_neon(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
676# endif
c1f092d1 677# endif
c1f092d1
AP
678#endif
679
7af04002
AP
680#ifdef GCM_FUNCREF_4BIT
681# undef GCM_MUL
682# define GCM_MUL(ctx,Xi) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
683# ifdef GHASH
684# undef GHASH
685# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
686# endif
687#endif
688
e7f5b1cd
AP
689void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
690{
691 const union { long one; char little; } is_endian = {1};
692
693 memset(ctx,0,sizeof(*ctx));
694 ctx->block = block;
695 ctx->key = key;
696
697 (*block)(ctx->H.c,ctx->H.c,key);
698
699 if (is_endian.little) {
700 /* H is stored in host byte order */
701#ifdef BSWAP8
702 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
703 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
704#else
705 u8 *p = ctx->H.c;
706 u64 hi,lo;
707 hi = (u64)GETU32(p) <<32|GETU32(p+4);
708 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
709 ctx->H.u[0] = hi;
710 ctx->H.u[1] = lo;
711#endif
712 }
713
a595baff
AP
714#if TABLE_BITS==8
715 gcm_init_8bit(ctx->Htable,ctx->H.u);
716#elif TABLE_BITS==4
d8d95832 717# if defined(GHASH_ASM_X86_OR_64)
a6d915e0 718# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
87873f43
AP
719 if (OPENSSL_ia32cap_P[0]&(1<<24) && /* check FXSR bit */
720 OPENSSL_ia32cap_P[1]&(1<<1) ) { /* check PCLMULQDQ bit */
c1f092d1
AP
721 gcm_init_clmul(ctx->Htable,ctx->H.u);
722 ctx->gmult = gcm_gmult_clmul;
723 ctx->ghash = gcm_ghash_clmul;
724 return;
725 }
a6d915e0 726# endif
e7f5b1cd 727 gcm_init_4bit(ctx->Htable,ctx->H.u);
6acb4ff3 728# if defined(GHASH_ASM_X86) /* x86 only */
98909c1d
AP
729# if defined(OPENSSL_IA32_SSE2)
730 if (OPENSSL_ia32cap_P[0]&(1<<25)) { /* check SSE bit */
731# else
7af04002 732 if (OPENSSL_ia32cap_P[0]&(1<<23)) { /* check MMX bit */
98909c1d 733# endif
c1f092d1
AP
734 ctx->gmult = gcm_gmult_4bit_mmx;
735 ctx->ghash = gcm_ghash_4bit_mmx;
736 } else {
737 ctx->gmult = gcm_gmult_4bit_x86;
738 ctx->ghash = gcm_ghash_4bit_x86;
739 }
740# else
741 ctx->gmult = gcm_gmult_4bit;
742 ctx->ghash = gcm_ghash_4bit;
743# endif
1e863180 744# elif defined(GHASH_ASM_ARM)
87873f43 745 if (OPENSSL_armcap_P & ARMV7_NEON) {
1e863180
AP
746 ctx->gmult = gcm_gmult_neon;
747 ctx->ghash = gcm_ghash_neon;
748 } else {
749 gcm_init_4bit(ctx->Htable,ctx->H.u);
750 ctx->gmult = gcm_gmult_4bit;
751 ctx->ghash = gcm_ghash_4bit;
752 }
c1f092d1
AP
753# else
754 gcm_init_4bit(ctx->Htable,ctx->H.u);
755# endif
a595baff 756#endif
e7f5b1cd
AP
757}
758
759void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
760{
761 const union { long one; char little; } is_endian = {1};
f472ec8c 762 unsigned int ctr;
d8d95832 763#ifdef GCM_FUNCREF_4BIT
7af04002 764 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 765#endif
e7f5b1cd
AP
766
767 ctx->Yi.u[0] = 0;
768 ctx->Yi.u[1] = 0;
769 ctx->Xi.u[0] = 0;
770 ctx->Xi.u[1] = 0;
b68c1315
AP
771 ctx->len.u[0] = 0; /* AAD length */
772 ctx->len.u[1] = 0; /* message length */
773 ctx->ares = 0;
774 ctx->mres = 0;
e7f5b1cd
AP
775
776 if (len==12) {
777 memcpy(ctx->Yi.c,iv,12);
778 ctx->Yi.c[15]=1;
f472ec8c 779 ctr=1;
e7f5b1cd
AP
780 }
781 else {
782 size_t i;
783 u64 len0 = len;
784
785 while (len>=16) {
786 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
787 GCM_MUL(ctx,Yi);
788 iv += 16;
789 len -= 16;
790 }
791 if (len) {
792 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
793 GCM_MUL(ctx,Yi);
794 }
795 len0 <<= 3;
796 if (is_endian.little) {
797#ifdef BSWAP8
798 ctx->Yi.u[1] ^= BSWAP8(len0);
799#else
800 ctx->Yi.c[8] ^= (u8)(len0>>56);
801 ctx->Yi.c[9] ^= (u8)(len0>>48);
802 ctx->Yi.c[10] ^= (u8)(len0>>40);
803 ctx->Yi.c[11] ^= (u8)(len0>>32);
804 ctx->Yi.c[12] ^= (u8)(len0>>24);
805 ctx->Yi.c[13] ^= (u8)(len0>>16);
806 ctx->Yi.c[14] ^= (u8)(len0>>8);
807 ctx->Yi.c[15] ^= (u8)(len0);
808#endif
809 }
810 else
811 ctx->Yi.u[1] ^= len0;
812
813 GCM_MUL(ctx,Yi);
814
815 if (is_endian.little)
f472ec8c 816 ctr = GETU32(ctx->Yi.c+12);
e7f5b1cd 817 else
f472ec8c 818 ctr = ctx->Yi.d[3];
e7f5b1cd
AP
819 }
820
821 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
f472ec8c 822 ++ctr;
2262beef 823 if (is_endian.little)
f472ec8c 824 PUTU32(ctx->Yi.c+12,ctr);
2262beef 825 else
f472ec8c 826 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
827}
828
1f2502eb 829int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
e7f5b1cd
AP
830{
831 size_t i;
1f2502eb
AP
832 unsigned int n;
833 u64 alen = ctx->len.u[0];
d8d95832 834#ifdef GCM_FUNCREF_4BIT
7af04002 835 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 836# ifdef GHASH
7af04002
AP
837 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
838 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
839# endif
840#endif
e7f5b1cd 841
1f2502eb
AP
842 if (ctx->len.u[1]) return -2;
843
844 alen += len;
845 if (alen>(U64(1)<<61) || (sizeof(len)==8 && alen<len))
846 return -1;
847 ctx->len.u[0] = alen;
b68c1315 848
1f2502eb 849 n = ctx->ares;
b68c1315
AP
850 if (n) {
851 while (n && len) {
852 ctx->Xi.c[n] ^= *(aad++);
853 --len;
854 n = (n+1)%16;
855 }
856 if (n==0) GCM_MUL(ctx,Xi);
857 else {
858 ctx->ares = n;
1f2502eb 859 return 0;
b68c1315
AP
860 }
861 }
e7f5b1cd 862
2262beef
AP
863#ifdef GHASH
864 if ((i = (len&(size_t)-16))) {
c1f092d1 865 GHASH(ctx,aad,i);
2262beef
AP
866 aad += i;
867 len -= i;
868 }
869#else
e7f5b1cd
AP
870 while (len>=16) {
871 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
872 GCM_MUL(ctx,Xi);
873 aad += 16;
874 len -= 16;
875 }
2262beef 876#endif
e7f5b1cd 877 if (len) {
1f2502eb 878 n = (unsigned int)len;
e7f5b1cd 879 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
e7f5b1cd 880 }
b68c1315
AP
881
882 ctx->ares = n;
1f2502eb 883 return 0;
e7f5b1cd
AP
884}
885
1f2502eb 886int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
e7f5b1cd
AP
887 const unsigned char *in, unsigned char *out,
888 size_t len)
889{
890 const union { long one; char little; } is_endian = {1};
891 unsigned int n, ctr;
892 size_t i;
3f0d1405
AP
893 u64 mlen = ctx->len.u[1];
894 block128_f block = ctx->block;
895 void *key = ctx->key;
d8d95832 896#ifdef GCM_FUNCREF_4BIT
7af04002 897 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 898# ifdef GHASH
7af04002
AP
899 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
900 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
901# endif
902#endif
1f2502eb
AP
903
904#if 0
905 n = (unsigned int)mlen%16; /* alternative to ctx->mres */
906#endif
907 mlen += len;
908 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
909 return -1;
910 ctx->len.u[1] = mlen;
e7f5b1cd 911
b68c1315
AP
912 if (ctx->ares) {
913 /* First call to encrypt finalizes GHASH(AAD) */
914 GCM_MUL(ctx,Xi);
915 ctx->ares = 0;
916 }
917
f472ec8c
AP
918 if (is_endian.little)
919 ctr = GETU32(ctx->Yi.c+12);
920 else
921 ctr = ctx->Yi.d[3];
e7f5b1cd 922
1f2502eb 923 n = ctx->mres;
e7f5b1cd
AP
924#if !defined(OPENSSL_SMALL_FOOTPRINT)
925 if (16%sizeof(size_t) == 0) do { /* always true actually */
926 if (n) {
927 while (n && len) {
928 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
929 --len;
930 n = (n+1)%16;
931 }
932 if (n==0) GCM_MUL(ctx,Xi);
933 else {
b68c1315 934 ctx->mres = n;
1f2502eb 935 return 0;
e7f5b1cd
AP
936 }
937 }
e7f5b1cd
AP
938#if defined(STRICT_ALIGNMENT)
939 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
940 break;
941#endif
a595baff 942#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
943 while (len>=GHASH_CHUNK) {
944 size_t j=GHASH_CHUNK;
945
946 while (j) {
3f0d1405 947 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
948 ++ctr;
949 if (is_endian.little)
950 PUTU32(ctx->Yi.c+12,ctr);
951 else
952 ctx->Yi.d[3] = ctr;
2262beef
AP
953 for (i=0; i<16; i+=sizeof(size_t))
954 *(size_t *)(out+i) =
955 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
956 out += 16;
957 in += 16;
958 j -= 16;
959 }
c1f092d1 960 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
2262beef
AP
961 len -= GHASH_CHUNK;
962 }
963 if ((i = (len&(size_t)-16))) {
964 size_t j=i;
965
966 while (len>=16) {
3f0d1405 967 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
968 ++ctr;
969 if (is_endian.little)
970 PUTU32(ctx->Yi.c+12,ctr);
971 else
972 ctx->Yi.d[3] = ctr;
973 for (i=0; i<16; i+=sizeof(size_t))
974 *(size_t *)(out+i) =
975 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
976 out += 16;
977 in += 16;
978 len -= 16;
979 }
c1f092d1 980 GHASH(ctx,out-j,j);
2262beef
AP
981 }
982#else
983 while (len>=16) {
3f0d1405 984 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
985 ++ctr;
986 if (is_endian.little)
987 PUTU32(ctx->Yi.c+12,ctr);
988 else
989 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
990 for (i=0; i<16; i+=sizeof(size_t))
991 *(size_t *)(ctx->Xi.c+i) ^=
992 *(size_t *)(out+i) =
993 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
994 GCM_MUL(ctx,Xi);
995 out += 16;
996 in += 16;
997 len -= 16;
998 }
2262beef 999#endif
e7f5b1cd 1000 if (len) {
3f0d1405 1001 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1002 ++ctr;
1003 if (is_endian.little)
1004 PUTU32(ctx->Yi.c+12,ctr);
1005 else
1006 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1007 while (len--) {
1008 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1009 ++n;
1010 }
1011 }
1012
b68c1315 1013 ctx->mres = n;
1f2502eb 1014 return 0;
e7f5b1cd
AP
1015 } while(0);
1016#endif
1017 for (i=0;i<len;++i) {
1018 if (n==0) {
3f0d1405 1019 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1020 ++ctr;
1021 if (is_endian.little)
1022 PUTU32(ctx->Yi.c+12,ctr);
1023 else
1024 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1025 }
1026 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
1027 n = (n+1)%16;
1028 if (n==0)
1029 GCM_MUL(ctx,Xi);
1030 }
1031
b68c1315 1032 ctx->mres = n;
1f2502eb 1033 return 0;
e7f5b1cd
AP
1034}
1035
1f2502eb 1036int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
e7f5b1cd
AP
1037 const unsigned char *in, unsigned char *out,
1038 size_t len)
1039{
1040 const union { long one; char little; } is_endian = {1};
1041 unsigned int n, ctr;
1042 size_t i;
3f0d1405
AP
1043 u64 mlen = ctx->len.u[1];
1044 block128_f block = ctx->block;
1045 void *key = ctx->key;
d8d95832 1046#ifdef GCM_FUNCREF_4BIT
7af04002 1047 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1048# ifdef GHASH
7af04002
AP
1049 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1050 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1051# endif
1052#endif
1f2502eb
AP
1053
1054 mlen += len;
1055 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1056 return -1;
1057 ctx->len.u[1] = mlen;
e7f5b1cd 1058
b68c1315
AP
1059 if (ctx->ares) {
1060 /* First call to decrypt finalizes GHASH(AAD) */
1061 GCM_MUL(ctx,Xi);
1062 ctx->ares = 0;
1063 }
1064
f472ec8c
AP
1065 if (is_endian.little)
1066 ctr = GETU32(ctx->Yi.c+12);
1067 else
1068 ctr = ctx->Yi.d[3];
e7f5b1cd 1069
1f2502eb 1070 n = ctx->mres;
e7f5b1cd
AP
1071#if !defined(OPENSSL_SMALL_FOOTPRINT)
1072 if (16%sizeof(size_t) == 0) do { /* always true actually */
1073 if (n) {
1074 while (n && len) {
1075 u8 c = *(in++);
1076 *(out++) = c^ctx->EKi.c[n];
1077 ctx->Xi.c[n] ^= c;
1078 --len;
1079 n = (n+1)%16;
1080 }
1081 if (n==0) GCM_MUL (ctx,Xi);
1082 else {
b68c1315 1083 ctx->mres = n;
1f2502eb 1084 return 0;
e7f5b1cd
AP
1085 }
1086 }
e7f5b1cd
AP
1087#if defined(STRICT_ALIGNMENT)
1088 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1089 break;
1090#endif
a595baff 1091#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
1092 while (len>=GHASH_CHUNK) {
1093 size_t j=GHASH_CHUNK;
1094
c1f092d1 1095 GHASH(ctx,in,GHASH_CHUNK);
2262beef 1096 while (j) {
3f0d1405 1097 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1098 ++ctr;
1099 if (is_endian.little)
1100 PUTU32(ctx->Yi.c+12,ctr);
1101 else
1102 ctx->Yi.d[3] = ctr;
2262beef
AP
1103 for (i=0; i<16; i+=sizeof(size_t))
1104 *(size_t *)(out+i) =
1105 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1106 out += 16;
1107 in += 16;
1108 j -= 16;
1109 }
1110 len -= GHASH_CHUNK;
1111 }
1112 if ((i = (len&(size_t)-16))) {
c1f092d1 1113 GHASH(ctx,in,i);
2262beef 1114 while (len>=16) {
3f0d1405 1115 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1116 ++ctr;
1117 if (is_endian.little)
1118 PUTU32(ctx->Yi.c+12,ctr);
1119 else
1120 ctx->Yi.d[3] = ctr;
1121 for (i=0; i<16; i+=sizeof(size_t))
1122 *(size_t *)(out+i) =
1123 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1124 out += 16;
1125 in += 16;
1126 len -= 16;
1127 }
1128 }
1129#else
1130 while (len>=16) {
3f0d1405 1131 (*block)(ctx->Yi.c,ctx->EKi.c,key);
2262beef
AP
1132 ++ctr;
1133 if (is_endian.little)
1134 PUTU32(ctx->Yi.c+12,ctr);
1135 else
1136 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1137 for (i=0; i<16; i+=sizeof(size_t)) {
1138 size_t c = *(size_t *)(in+i);
1139 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1140 *(size_t *)(ctx->Xi.c+i) ^= c;
1141 }
2262beef 1142 GCM_MUL(ctx,Xi);
e7f5b1cd
AP
1143 out += 16;
1144 in += 16;
1145 len -= 16;
1146 }
2262beef 1147#endif
e7f5b1cd 1148 if (len) {
3f0d1405 1149 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1150 ++ctr;
1151 if (is_endian.little)
1152 PUTU32(ctx->Yi.c+12,ctr);
1153 else
1154 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1155 while (len--) {
1156 u8 c = in[n];
1157 ctx->Xi.c[n] ^= c;
1158 out[n] = c^ctx->EKi.c[n];
1159 ++n;
1160 }
1161 }
1162
b68c1315 1163 ctx->mres = n;
1f2502eb 1164 return 0;
e7f5b1cd
AP
1165 } while(0);
1166#endif
1167 for (i=0;i<len;++i) {
1168 u8 c;
1169 if (n==0) {
3f0d1405 1170 (*block)(ctx->Yi.c,ctx->EKi.c,key);
e7f5b1cd
AP
1171 ++ctr;
1172 if (is_endian.little)
1173 PUTU32(ctx->Yi.c+12,ctr);
1174 else
1175 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1176 }
1177 c = in[i];
68e2586b 1178 out[i] = c^ctx->EKi.c[n];
e7f5b1cd
AP
1179 ctx->Xi.c[n] ^= c;
1180 n = (n+1)%16;
1181 if (n==0)
1182 GCM_MUL(ctx,Xi);
1183 }
1184
b68c1315 1185 ctx->mres = n;
1f2502eb 1186 return 0;
e7f5b1cd
AP
1187}
1188
1f2502eb 1189int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
f71c6ace
AP
1190 const unsigned char *in, unsigned char *out,
1191 size_t len, ctr128_f stream)
1192{
1193 const union { long one; char little; } is_endian = {1};
1194 unsigned int n, ctr;
1195 size_t i;
3f0d1405
AP
1196 u64 mlen = ctx->len.u[1];
1197 void *key = ctx->key;
d8d95832 1198#ifdef GCM_FUNCREF_4BIT
7af04002 1199 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1200# ifdef GHASH
7af04002
AP
1201 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1202 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1203# endif
1204#endif
1f2502eb
AP
1205
1206 mlen += len;
1207 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1208 return -1;
1209 ctx->len.u[1] = mlen;
f71c6ace 1210
b68c1315
AP
1211 if (ctx->ares) {
1212 /* First call to encrypt finalizes GHASH(AAD) */
1213 GCM_MUL(ctx,Xi);
1214 ctx->ares = 0;
1215 }
1216
f71c6ace
AP
1217 if (is_endian.little)
1218 ctr = GETU32(ctx->Yi.c+12);
1219 else
1220 ctr = ctx->Yi.d[3];
1221
1f2502eb 1222 n = ctx->mres;
f71c6ace
AP
1223 if (n) {
1224 while (n && len) {
1225 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1226 --len;
1227 n = (n+1)%16;
1228 }
1229 if (n==0) GCM_MUL(ctx,Xi);
1230 else {
b68c1315 1231 ctx->mres = n;
1f2502eb 1232 return 0;
f71c6ace
AP
1233 }
1234 }
1235#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1236 while (len>=GHASH_CHUNK) {
3f0d1405 1237 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
f71c6ace
AP
1238 ctr += GHASH_CHUNK/16;
1239 if (is_endian.little)
1240 PUTU32(ctx->Yi.c+12,ctr);
1241 else
1242 ctx->Yi.d[3] = ctr;
1243 GHASH(ctx,out,GHASH_CHUNK);
1244 out += GHASH_CHUNK;
1245 in += GHASH_CHUNK;
1246 len -= GHASH_CHUNK;
1247 }
1248#endif
1249 if ((i = (len&(size_t)-16))) {
1250 size_t j=i/16;
1251
3f0d1405 1252 (*stream)(in,out,j,key,ctx->Yi.c);
68e2586b 1253 ctr += (unsigned int)j;
f71c6ace
AP
1254 if (is_endian.little)
1255 PUTU32(ctx->Yi.c+12,ctr);
1256 else
1257 ctx->Yi.d[3] = ctr;
1258 in += i;
1259 len -= i;
1260#if defined(GHASH)
1261 GHASH(ctx,out,i);
1262 out += i;
1263#else
1264 while (j--) {
1265 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1266 GCM_MUL(ctx,Xi);
1267 out += 16;
1268 }
1269#endif
1270 }
1271 if (len) {
3f0d1405 1272 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
f71c6ace
AP
1273 ++ctr;
1274 if (is_endian.little)
1275 PUTU32(ctx->Yi.c+12,ctr);
1276 else
1277 ctx->Yi.d[3] = ctr;
1278 while (len--) {
1279 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1280 ++n;
1281 }
1282 }
1283
b68c1315 1284 ctx->mres = n;
1f2502eb 1285 return 0;
f71c6ace
AP
1286}
1287
1f2502eb 1288int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
f71c6ace
AP
1289 const unsigned char *in, unsigned char *out,
1290 size_t len,ctr128_f stream)
1291{
1292 const union { long one; char little; } is_endian = {1};
1293 unsigned int n, ctr;
1294 size_t i;
3f0d1405
AP
1295 u64 mlen = ctx->len.u[1];
1296 void *key = ctx->key;
d8d95832 1297#ifdef GCM_FUNCREF_4BIT
7af04002 1298 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1299# ifdef GHASH
7af04002
AP
1300 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1301 const u8 *inp,size_t len) = ctx->ghash;
d8d95832
AP
1302# endif
1303#endif
1f2502eb
AP
1304
1305 mlen += len;
1306 if (mlen>((U64(1)<<36)-32) || (sizeof(len)==8 && mlen<len))
1307 return -1;
1308 ctx->len.u[1] = mlen;
f71c6ace 1309
b68c1315
AP
1310 if (ctx->ares) {
1311 /* First call to decrypt finalizes GHASH(AAD) */
1312 GCM_MUL(ctx,Xi);
1313 ctx->ares = 0;
1314 }
1315
f71c6ace
AP
1316 if (is_endian.little)
1317 ctr = GETU32(ctx->Yi.c+12);
1318 else
1319 ctr = ctx->Yi.d[3];
1320
1f2502eb 1321 n = ctx->mres;
f71c6ace
AP
1322 if (n) {
1323 while (n && len) {
1324 u8 c = *(in++);
1325 *(out++) = c^ctx->EKi.c[n];
1326 ctx->Xi.c[n] ^= c;
1327 --len;
1328 n = (n+1)%16;
1329 }
1330 if (n==0) GCM_MUL (ctx,Xi);
1331 else {
b68c1315 1332 ctx->mres = n;
1f2502eb 1333 return 0;
f71c6ace
AP
1334 }
1335 }
1336#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1337 while (len>=GHASH_CHUNK) {
1338 GHASH(ctx,in,GHASH_CHUNK);
3f0d1405 1339 (*stream)(in,out,GHASH_CHUNK/16,key,ctx->Yi.c);
f71c6ace
AP
1340 ctr += GHASH_CHUNK/16;
1341 if (is_endian.little)
1342 PUTU32(ctx->Yi.c+12,ctr);
1343 else
1344 ctx->Yi.d[3] = ctr;
1345 out += GHASH_CHUNK;
1346 in += GHASH_CHUNK;
1347 len -= GHASH_CHUNK;
1348 }
1349#endif
1350 if ((i = (len&(size_t)-16))) {
1351 size_t j=i/16;
1352
1353#if defined(GHASH)
1354 GHASH(ctx,in,i);
1355#else
1356 while (j--) {
1357 size_t k;
1358 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1359 GCM_MUL(ctx,Xi);
1360 in += 16;
1361 }
1362 j = i/16;
1363 in -= i;
1364#endif
3f0d1405 1365 (*stream)(in,out,j,key,ctx->Yi.c);
68e2586b 1366 ctr += (unsigned int)j;
f71c6ace
AP
1367 if (is_endian.little)
1368 PUTU32(ctx->Yi.c+12,ctr);
1369 else
1370 ctx->Yi.d[3] = ctr;
1371 out += i;
1372 in += i;
1373 len -= i;
1374 }
1375 if (len) {
3f0d1405 1376 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,key);
f71c6ace
AP
1377 ++ctr;
1378 if (is_endian.little)
1379 PUTU32(ctx->Yi.c+12,ctr);
1380 else
1381 ctx->Yi.d[3] = ctr;
1382 while (len--) {
1383 u8 c = in[n];
1384 ctx->Xi.c[n] ^= c;
1385 out[n] = c^ctx->EKi.c[n];
1386 ++n;
1387 }
1388 }
1389
b68c1315 1390 ctx->mres = n;
1f2502eb 1391 return 0;
f71c6ace
AP
1392}
1393
6acb4ff3
AP
1394int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1395 size_t len)
e7f5b1cd
AP
1396{
1397 const union { long one; char little; } is_endian = {1};
1398 u64 alen = ctx->len.u[0]<<3;
1399 u64 clen = ctx->len.u[1]<<3;
d8d95832 1400#ifdef GCM_FUNCREF_4BIT
7af04002 1401 void (*gcm_gmult_p)(u64 Xi[2],const u128 Htable[16]) = ctx->gmult;
d8d95832 1402#endif
e7f5b1cd 1403
9ddd859d 1404 if (ctx->mres || ctx->ares)
e7f5b1cd
AP
1405 GCM_MUL(ctx,Xi);
1406
1407 if (is_endian.little) {
1408#ifdef BSWAP8
1409 alen = BSWAP8(alen);
1410 clen = BSWAP8(clen);
1411#else
1412 u8 *p = ctx->len.c;
1413
1414 ctx->len.u[0] = alen;
1415 ctx->len.u[1] = clen;
1416
1417 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1418 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1419#endif
1420 }
1421
1422 ctx->Xi.u[0] ^= alen;
1423 ctx->Xi.u[1] ^= clen;
1424 GCM_MUL(ctx,Xi);
1425
1426 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1427 ctx->Xi.u[1] ^= ctx->EK0.u[1];
6acb4ff3
AP
1428
1429 if (tag && len<=sizeof(ctx->Xi))
1430 return memcmp(ctx->Xi.c,tag,len);
1431 else
1432 return -1;
1433}
1434
fd3dbc1d
DSH
1435void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1436{
1437 CRYPTO_gcm128_finish(ctx, NULL, 0);
1f2502eb 1438 memcpy(tag, ctx->Xi.c, len<=sizeof(ctx->Xi.c)?len:sizeof(ctx->Xi.c));
fd3dbc1d
DSH
1439}
1440
6acb4ff3
AP
1441GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1442{
1443 GCM128_CONTEXT *ret;
1444
1445 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1446 CRYPTO_gcm128_init(ret,key,block);
1447
1448 return ret;
1449}
1450
1451void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1452{
1453 if (ctx) {
1454 OPENSSL_cleanse(ctx,sizeof(*ctx));
1455 OPENSSL_free(ctx);
1456 }
e7f5b1cd
AP
1457}
1458
1459#if defined(SELFTEST)
1460#include <stdio.h>
1461#include <openssl/aes.h>
1462
1463/* Test Case 1 */
1464static const u8 K1[16],
1465 *P1=NULL,
1466 *A1=NULL,
1467 IV1[12],
1468 *C1=NULL,
1469 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
a595baff 1470
e7f5b1cd
AP
1471/* Test Case 2 */
1472#define K2 K1
1473#define A2 A1
1474#define IV2 IV1
1475static const u8 P2[16],
1476 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1477 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1478
1479/* Test Case 3 */
1480#define A3 A2
1481static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1482 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1483 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1484 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1485 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1486 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1487 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1488 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1489 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1490 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
fb2d5a91 1491 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
e7f5b1cd
AP
1492
1493/* Test Case 4 */
1494#define K4 K3
1495#define IV4 IV3
1496static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1497 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1498 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1499 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1500 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1501 0xab,0xad,0xda,0xd2},
1502 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1503 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1504 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1505 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1506 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1507
1508/* Test Case 5 */
1509#define K5 K4
1510#define P5 P4
d8d95832
AP
1511#define A5 A4
1512static const u8 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
e7f5b1cd
AP
1513 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1514 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1515 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1516 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1517 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
a595baff 1518
e7f5b1cd
AP
1519/* Test Case 6 */
1520#define K6 K5
1521#define P6 P5
1522#define A6 A5
1523static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1524 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1525 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1526 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1527 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1528 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1529 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1530 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1531 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1532
1533/* Test Case 7 */
1534static const u8 K7[24],
1535 *P7=NULL,
1536 *A7=NULL,
1537 IV7[12],
1538 *C7=NULL,
1539 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1540
1541/* Test Case 8 */
1542#define K8 K7
1543#define IV8 IV7
1544#define A8 A7
1545static const u8 P8[16],
1546 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1547 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1548
1549/* Test Case 9 */
1550#define A9 A8
1551static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1552 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1553 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1554 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1555 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1556 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1557 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1558 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1559 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1560 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1561 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1562 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1563
1564/* Test Case 10 */
1565#define K10 K9
1566#define IV10 IV9
1567static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1568 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1569 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1570 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1571 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1572 0xab,0xad,0xda,0xd2},
1573 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1574 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1575 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1576 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1577 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1578
1579/* Test Case 11 */
1580#define K11 K10
1581#define P11 P10
1582#define A11 A10
1583static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1584 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1585 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1586 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1587 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1588 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1589
1590/* Test Case 12 */
1591#define K12 K11
1592#define P12 P11
1593#define A12 A11
1594static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1595 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1596 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1597 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1598 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1599 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1600 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1601 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1602 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1603
1604/* Test Case 13 */
1605static const u8 K13[32],
1606 *P13=NULL,
1607 *A13=NULL,
1608 IV13[12],
1609 *C13=NULL,
1610 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1611
1612/* Test Case 14 */
1613#define K14 K13
1614#define A14 A13
1615static const u8 P14[16],
1616 IV14[12],
1617 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1618 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1619
1620/* Test Case 15 */
1621#define A15 A14
1622static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1623 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1624 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1625 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1626 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1627 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1628 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1629 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1630 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1631 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1632 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1633 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1634
1635/* Test Case 16 */
1636#define K16 K15
1637#define IV16 IV15
1638static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1639 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1640 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1641 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1642 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1643 0xab,0xad,0xda,0xd2},
1644 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1645 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1646 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1647 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1648 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1649
1650/* Test Case 17 */
1651#define K17 K16
1652#define P17 P16
1653#define A17 A16
1654static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1655 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1656 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1657 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1658 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1659 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1660
1661/* Test Case 18 */
1662#define K18 K17
1663#define P18 P17
1664#define A18 A17
1665static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1666 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1667 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1668 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1669 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1670 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1671 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1672 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1673 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1674
1675#define TEST_CASE(n) do { \
1676 u8 out[sizeof(P##n)]; \
1677 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1678 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1679 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
68e2586b 1680 memset(out,0,sizeof(out)); \
e7f5b1cd
AP
1681 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1682 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
6acb4ff3
AP
1683 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1684 (C##n && memcmp(out,C##n,sizeof(out)))) \
68e2586b 1685 ret++, printf ("encrypt test#%d failed.\n",n); \
e7f5b1cd 1686 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
68e2586b 1687 memset(out,0,sizeof(out)); \
e7f5b1cd
AP
1688 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1689 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
6acb4ff3
AP
1690 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1691 (P##n && memcmp(out,P##n,sizeof(out)))) \
1692 ret++, printf ("decrypt test#%d failed.\n",n); \
e7f5b1cd
AP
1693 } while(0)
1694
1695int main()
1696{
1697 GCM128_CONTEXT ctx;
1698 AES_KEY key;
1699 int ret=0;
1700
1701 TEST_CASE(1);
1702 TEST_CASE(2);
1703 TEST_CASE(3);
1704 TEST_CASE(4);
1705 TEST_CASE(5);
1706 TEST_CASE(6);
1707 TEST_CASE(7);
1708 TEST_CASE(8);
1709 TEST_CASE(9);
1710 TEST_CASE(10);
1711 TEST_CASE(11);
1712 TEST_CASE(12);
1713 TEST_CASE(13);
1714 TEST_CASE(14);
1715 TEST_CASE(15);
1716 TEST_CASE(16);
1717 TEST_CASE(17);
1718 TEST_CASE(18);
1719
a595baff 1720#ifdef OPENSSL_CPUID_OBJ
2262beef
AP
1721 {
1722 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1723 union { u64 u; u8 c[1024]; } buf;
c1f092d1 1724 int i;
2262beef
AP
1725
1726 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1727 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1728 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1729
1730 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1731 start = OPENSSL_rdtsc();
1732 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1733 gcm_t = OPENSSL_rdtsc() - start;
1734
1735 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
b68c1315 1736 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
2262beef
AP
1737 (block128_f)AES_encrypt);
1738 start = OPENSSL_rdtsc();
1739 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
b68c1315 1740 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
a595baff 1741 (block128_f)AES_encrypt);
2262beef
AP
1742 ctr_t = OPENSSL_rdtsc() - start;
1743
1744 printf("%.2f-%.2f=%.2f\n",
1745 gcm_t/(double)sizeof(buf),
1746 ctr_t/(double)sizeof(buf),
1747 (gcm_t-ctr_t)/(double)sizeof(buf));
a595baff 1748#ifdef GHASH
23a05fa0
AP
1749 {
1750 void (*gcm_ghash_p)(u64 Xi[2],const u128 Htable[16],
1751 const u8 *inp,size_t len) = ctx.ghash;
1752
1753 GHASH((&ctx),buf.c,sizeof(buf));
a595baff 1754 start = OPENSSL_rdtsc();
8d1b199d 1755 for (i=0;i<100;++i) GHASH((&ctx),buf.c,sizeof(buf));
a595baff 1756 gcm_t = OPENSSL_rdtsc() - start;
c1f092d1 1757 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
23a05fa0 1758 }
a595baff 1759#endif
2262beef 1760 }
a595baff 1761#endif
2262beef 1762
e7f5b1cd
AP
1763 return ret;
1764}
1765#endif