]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/gcm128.c
md32_common.h: modify MD32_REG_T pre-processing logic [triggered by clang].
[thirdparty/openssl.git] / crypto / modes / gcm128.c
CommitLineData
e7f5b1cd
AP
1/* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
aa763c0f 50#include <openssl/crypto.h>
f472ec8c 51#include "modes_lcl.h"
e7f5b1cd
AP
52#include <string.h>
53
54#ifndef MODES_DEBUG
55# ifndef NDEBUG
56# define NDEBUG
57# endif
58#endif
59#include <assert.h>
60
e7f5b1cd
AP
61typedef struct { u64 hi,lo; } u128;
62
f472ec8c
AP
63#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64/* redefine, because alignment is ensured */
65#undef GETU32
66#define GETU32(p) BSWAP4(*(const u32 *)(p))
67#undef PUTU32
68#define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
e7f5b1cd
AP
69#endif
70
c1f092d1
AP
71#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72#define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83} while(0)
84
a595baff
AP
85#ifdef TABLE_BITS
86#undef TABLE_BITS
87#endif
2262beef 88/*
a595baff
AP
89 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
90 * never be set to 8. 8 is effectively reserved for testing purposes.
fb2d5a91
AP
91 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
92 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
93 * whole spectrum of possible table driven implementations. Why? In
94 * non-"Shoup's" case memory access pattern is segmented in such manner,
95 * that it's trivial to see that cache timing information can reveal
96 * fair portion of intermediate hash value. Given that ciphertext is
97 * always available to attacker, it's possible for him to attempt to
98 * deduce secret parameter H and if successful, tamper with messages
99 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
100 * not as trivial, but there is no reason to believe that it's resistant
101 * to cache-timing attack. And the thing about "8-bit" implementation is
102 * that it consumes 16 (sixteen) times more memory, 4KB per individual
103 * key + 1KB shared. Well, on pros side it should be twice as fast as
104 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
105 * was observed to run ~75% faster, closer to 100% for commercial
106 * compilers... Yet "4-bit" procedure is preferred, because it's
107 * believed to provide better security-performance balance and adequate
108 * all-round performance. "All-round" refers to things like:
109 *
110 * - shorter setup time effectively improves overall timing for
111 * handling short messages;
112 * - larger table allocation can become unbearable because of VM
113 * subsystem penalties (for example on Windows large enough free
114 * results in VM working set trimming, meaning that consequent
115 * malloc would immediately incur working set expansion);
116 * - larger table has larger cache footprint, which can affect
117 * performance of other code paths (not necessarily even from same
118 * thread in Hyper-Threading world);
c1f092d1 119 */
a595baff
AP
120#define TABLE_BITS 4
121
122#if TABLE_BITS==8
123
e7f5b1cd
AP
124static void gcm_init_8bit(u128 Htable[256], u64 H[2])
125{
126 int i, j;
127 u128 V;
128
129 Htable[0].hi = 0;
130 Htable[0].lo = 0;
131 V.hi = H[0];
132 V.lo = H[1];
133
134 for (Htable[128]=V, i=64; i>0; i>>=1) {
c1f092d1 135 REDUCE1BIT(V);
e7f5b1cd
AP
136 Htable[i] = V;
137 }
138
139 for (i=2; i<256; i<<=1) {
140 u128 *Hi = Htable+i, H0 = *Hi;
141 for (j=1; j<i; ++j) {
142 Hi[j].hi = H0.hi^Htable[j].hi;
143 Hi[j].lo = H0.lo^Htable[j].lo;
144 }
145 }
146}
147
2262beef 148static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
e7f5b1cd
AP
149{
150 u128 Z = { 0, 0};
151 const u8 *xi = (const u8 *)Xi+15;
152 size_t rem, n = *xi;
153 const union { long one; char little; } is_endian = {1};
154 static const size_t rem_8bit[256] = {
155 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
156 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
157 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
158 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
159 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
160 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
161 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
162 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
163 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
164 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
165 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
166 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
167 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
168 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
169 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
170 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
171 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
172 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
173 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
174 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
175 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
176 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
177 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
178 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
179 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
180 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
181 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
182 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
183 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
184 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
185 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
186 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
187 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
188 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
189 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
190 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
191 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
192 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
193 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
194 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
195 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
196 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
197 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
198 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
199 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
200 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
201 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
202 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
203 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
204 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
205 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
206 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
207 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
208 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
209 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
210 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
211 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
212 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
213 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
214 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
215 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
216 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
217 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
218 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
219
220 while (1) {
221 Z.hi ^= Htable[n].hi;
222 Z.lo ^= Htable[n].lo;
223
224 if ((u8 *)Xi==xi) break;
225
226 n = *(--xi);
227
228 rem = (size_t)Z.lo&0xff;
229 Z.lo = (Z.hi<<56)|(Z.lo>>8);
230 Z.hi = (Z.hi>>8);
231 if (sizeof(size_t)==8)
232 Z.hi ^= rem_8bit[rem];
233 else
234 Z.hi ^= (u64)rem_8bit[rem]<<32;
235 }
236
237 if (is_endian.little) {
238#ifdef BSWAP8
239 Xi[0] = BSWAP8(Z.hi);
240 Xi[1] = BSWAP8(Z.lo);
241#else
242 u8 *p = (u8 *)Xi;
243 u32 v;
244 v = (u32)(Z.hi>>32); PUTU32(p,v);
245 v = (u32)(Z.hi); PUTU32(p+4,v);
246 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
247 v = (u32)(Z.lo); PUTU32(p+12,v);
248#endif
249 }
250 else {
251 Xi[0] = Z.hi;
252 Xi[1] = Z.lo;
253 }
254}
a595baff 255#define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
e7f5b1cd 256
a595baff 257#elif TABLE_BITS==4
2262beef 258
e7f5b1cd
AP
259static void gcm_init_4bit(u128 Htable[16], u64 H[2])
260{
e7f5b1cd 261 u128 V;
f472ec8c
AP
262#if defined(OPENSSL_SMALL_FOOTPRINT)
263 int i;
264#endif
e7f5b1cd
AP
265
266 Htable[0].hi = 0;
267 Htable[0].lo = 0;
268 V.hi = H[0];
269 V.lo = H[1];
270
f472ec8c 271#if defined(OPENSSL_SMALL_FOOTPRINT)
e7f5b1cd 272 for (Htable[8]=V, i=4; i>0; i>>=1) {
c1f092d1 273 REDUCE1BIT(V);
e7f5b1cd
AP
274 Htable[i] = V;
275 }
276
277 for (i=2; i<16; i<<=1) {
2262beef
AP
278 u128 *Hi = Htable+i;
279 int j;
280 for (V=*Hi, j=1; j<i; ++j) {
281 Hi[j].hi = V.hi^Htable[j].hi;
282 Hi[j].lo = V.lo^Htable[j].lo;
e7f5b1cd
AP
283 }
284 }
2262beef 285#else
f472ec8c 286 Htable[8] = V;
c1f092d1 287 REDUCE1BIT(V);
f472ec8c 288 Htable[4] = V;
c1f092d1 289 REDUCE1BIT(V);
f472ec8c 290 Htable[2] = V;
c1f092d1 291 REDUCE1BIT(V);
f472ec8c 292 Htable[1] = V;
2262beef
AP
293 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
294 V=Htable[4];
295 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
296 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
297 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
298 V=Htable[8];
299 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
300 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
301 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
302 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
303 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
304 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
305 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
306#endif
f472ec8c
AP
307#if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
308 /*
309 * ARM assembler expects specific dword order in Htable.
310 */
311 {
312 int j;
313 const union { long one; char little; } is_endian = {1};
314
315 if (is_endian.little)
316 for (j=0;j<16;++j) {
317 V = Htable[j];
318 Htable[j].hi = V.lo;
319 Htable[j].lo = V.hi;
320 }
321 else
322 for (j=0;j<16;++j) {
323 V = Htable[j];
324 Htable[j].hi = V.lo<<32|V.lo>>32;
325 Htable[j].lo = V.hi<<32|V.hi>>32;
326 }
327 }
328#endif
e7f5b1cd
AP
329}
330
a595baff 331#ifndef GHASH_ASM
2262beef
AP
332static const size_t rem_4bit[16] = {
333 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
334 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
335 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
336 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
337
4f39edbf 338static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
e7f5b1cd 339{
2262beef
AP
340 u128 Z;
341 int cnt = 15;
342 size_t rem, nlo, nhi;
e7f5b1cd 343 const union { long one; char little; } is_endian = {1};
2262beef
AP
344
345 nlo = ((const u8 *)Xi)[15];
346 nhi = nlo>>4;
347 nlo &= 0xf;
348
349 Z.hi = Htable[nlo].hi;
350 Z.lo = Htable[nlo].lo;
e7f5b1cd
AP
351
352 while (1) {
2262beef
AP
353 rem = (size_t)Z.lo&0xf;
354 Z.lo = (Z.hi<<60)|(Z.lo>>4);
355 Z.hi = (Z.hi>>4);
356 if (sizeof(size_t)==8)
357 Z.hi ^= rem_4bit[rem];
358 else
359 Z.hi ^= (u64)rem_4bit[rem]<<32;
360
361 Z.hi ^= Htable[nhi].hi;
362 Z.lo ^= Htable[nhi].lo;
363
364 if (--cnt<0) break;
365
366 nlo = ((const u8 *)Xi)[cnt];
e7f5b1cd
AP
367 nhi = nlo>>4;
368 nlo &= 0xf;
369
2262beef
AP
370 rem = (size_t)Z.lo&0xf;
371 Z.lo = (Z.hi<<60)|(Z.lo>>4);
372 Z.hi = (Z.hi>>4);
373 if (sizeof(size_t)==8)
374 Z.hi ^= rem_4bit[rem];
375 else
376 Z.hi ^= (u64)rem_4bit[rem]<<32;
377
e7f5b1cd
AP
378 Z.hi ^= Htable[nlo].hi;
379 Z.lo ^= Htable[nlo].lo;
2262beef 380 }
e7f5b1cd 381
2262beef
AP
382 if (is_endian.little) {
383#ifdef BSWAP8
384 Xi[0] = BSWAP8(Z.hi);
385 Xi[1] = BSWAP8(Z.lo);
386#else
387 u8 *p = (u8 *)Xi;
388 u32 v;
389 v = (u32)(Z.hi>>32); PUTU32(p,v);
390 v = (u32)(Z.hi); PUTU32(p+4,v);
391 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
392 v = (u32)(Z.lo); PUTU32(p+12,v);
393#endif
394 }
395 else {
396 Xi[0] = Z.hi;
397 Xi[1] = Z.lo;
398 }
399}
400
401#if !defined(OPENSSL_SMALL_FOOTPRINT)
402/*
403 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
a595baff
AP
404 * details... Compiler-generated code doesn't seem to give any
405 * performance improvement, at least not on x86[_64]. It's here
406 * mostly as reference and a placeholder for possible future
407 * non-trivial optimization[s]...
2262beef 408 */
4f39edbf
AP
409static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
410 const u8 *inp,size_t len)
2262beef
AP
411{
412 u128 Z;
413 int cnt;
414 size_t rem, nlo, nhi;
415 const union { long one; char little; } is_endian = {1};
416
e747f4d4 417#if 1
2262beef
AP
418 do {
419 cnt = 15;
420 nlo = ((const u8 *)Xi)[15];
421 nlo ^= inp[15];
422 nhi = nlo>>4;
423 nlo &= 0xf;
424
425 Z.hi = Htable[nlo].hi;
426 Z.lo = Htable[nlo].lo;
427
428 while (1) {
e7f5b1cd
AP
429 rem = (size_t)Z.lo&0xf;
430 Z.lo = (Z.hi<<60)|(Z.lo>>4);
431 Z.hi = (Z.hi>>4);
432 if (sizeof(size_t)==8)
433 Z.hi ^= rem_4bit[rem];
434 else
435 Z.hi ^= (u64)rem_4bit[rem]<<32;
436
437 Z.hi ^= Htable[nhi].hi;
438 Z.lo ^= Htable[nhi].lo;
439
2262beef 440 if (--cnt<0) break;
e7f5b1cd 441
2262beef
AP
442 nlo = ((const u8 *)Xi)[cnt];
443 nlo ^= inp[cnt];
444 nhi = nlo>>4;
445 nlo &= 0xf;
e7f5b1cd
AP
446
447 rem = (size_t)Z.lo&0xf;
448 Z.lo = (Z.hi<<60)|(Z.lo>>4);
449 Z.hi = (Z.hi>>4);
450 if (sizeof(size_t)==8)
451 Z.hi ^= rem_4bit[rem];
452 else
453 Z.hi ^= (u64)rem_4bit[rem]<<32;
2262beef
AP
454
455 Z.hi ^= Htable[nlo].hi;
456 Z.lo ^= Htable[nlo].lo;
e7f5b1cd 457 }
e747f4d4
AP
458#else
459 /*
460 * Extra 256+16 bytes per-key plus 512 bytes shared tables
461 * [should] give ~50% improvement... One could have PACK()-ed
6acb4ff3
AP
462 * the rem_8bit even here, but the priority is to minimize
463 * cache footprint...
e747f4d4
AP
464 */
465 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
466 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
467 static const unsigned short rem_8bit[256] = {
468 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
e747f4d4
AP
500 /*
501 * This pre-processing phase slows down procedure by approximately
502 * same time as it makes each loop spin faster. In other words
503 * single block performance is approximately same as straightforward
504 * "4-bit" implementation, and then it goes only faster...
505 */
506 for (cnt=0; cnt<16; ++cnt) {
507 Z.hi = Htable[cnt].hi;
508 Z.lo = Htable[cnt].lo;
509 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510 Hshr4[cnt].hi = (Z.hi>>4);
511 Hshl4[cnt] = (u8)(Z.lo<<4);
512 }
513
514 do {
6acb4ff3 515 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
e747f4d4
AP
516 nlo = ((const u8 *)Xi)[cnt];
517 nlo ^= inp[cnt];
518 nhi = nlo>>4;
519 nlo &= 0xf;
520
521 Z.hi ^= Htable[nlo].hi;
522 Z.lo ^= Htable[nlo].lo;
523
524 rem = (size_t)Z.lo&0xff;
525
526 Z.lo = (Z.hi<<56)|(Z.lo>>8);
527 Z.hi = (Z.hi>>8);
528
529 Z.hi ^= Hshr4[nhi].hi;
530 Z.lo ^= Hshr4[nhi].lo;
531 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
532 }
533
534 nlo = ((const u8 *)Xi)[0];
535 nlo ^= inp[0];
536 nhi = nlo>>4;
537 nlo &= 0xf;
538
539 Z.hi ^= Htable[nlo].hi;
540 Z.lo ^= Htable[nlo].lo;
541
542 rem = (size_t)Z.lo&0xf;
543
544 Z.lo = (Z.hi<<60)|(Z.lo>>4);
545 Z.hi = (Z.hi>>4);
546
547 Z.hi ^= Htable[nhi].hi;
548 Z.lo ^= Htable[nhi].lo;
549 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
550#endif
e7f5b1cd
AP
551
552 if (is_endian.little) {
553#ifdef BSWAP8
554 Xi[0] = BSWAP8(Z.hi);
555 Xi[1] = BSWAP8(Z.lo);
556#else
557 u8 *p = (u8 *)Xi;
558 u32 v;
559 v = (u32)(Z.hi>>32); PUTU32(p,v);
560 v = (u32)(Z.hi); PUTU32(p+4,v);
561 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
562 v = (u32)(Z.lo); PUTU32(p+12,v);
563#endif
564 }
565 else {
566 Xi[0] = Z.hi;
567 Xi[1] = Z.lo;
568 }
2262beef 569 } while (inp+=16, len-=16);
e7f5b1cd 570}
2262beef
AP
571#endif
572#else
4f39edbf
AP
573void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
2262beef
AP
575#endif
576
577#define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
a595baff 578#if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
c1f092d1 579#define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
a595baff
AP
580/* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581 * trashing effect. In other words idea is to hash data while it's
582 * still in L1 cache after encryption pass... */
2262beef 583#define GHASH_CHUNK 1024
a595baff 584#endif
2262beef 585
a595baff 586#else /* TABLE_BITS */
e7f5b1cd 587
2262beef 588static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
e7f5b1cd
AP
589{
590 u128 V,Z = { 0,0 };
591 long X;
592 int i,j;
593 const long *xi = (const long *)Xi;
594 const union { long one; char little; } is_endian = {1};
595
2262beef 596 V.hi = H[0]; /* H is in host byte order, no byte swapping */
e7f5b1cd
AP
597 V.lo = H[1];
598
599 for (j=0; j<16/sizeof(long); ++j) {
600 if (is_endian.little) {
601 if (sizeof(long)==8) {
602#ifdef BSWAP8
603 X = (long)(BSWAP8(xi[j]));
604#else
605 const u8 *p = (const u8 *)(xi+j);
606 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607#endif
608 }
609 else {
610 const u8 *p = (const u8 *)(xi+j);
611 X = (long)GETU32(p);
612 }
613 }
614 else
615 X = xi[j];
616
617 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618 u64 M = (u64)(X>>(8*sizeof(long)-1));
619 Z.hi ^= V.hi&M;
620 Z.lo ^= V.lo&M;
621
c1f092d1 622 REDUCE1BIT(V);
e7f5b1cd
AP
623 }
624 }
625
626 if (is_endian.little) {
627#ifdef BSWAP8
628 Xi[0] = BSWAP8(Z.hi);
629 Xi[1] = BSWAP8(Z.lo);
630#else
631 u8 *p = (u8 *)Xi;
632 u32 v;
633 v = (u32)(Z.hi>>32); PUTU32(p,v);
634 v = (u32)(Z.hi); PUTU32(p+4,v);
635 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
636 v = (u32)(Z.lo); PUTU32(p+12,v);
637#endif
638 }
639 else {
640 Xi[0] = Z.hi;
641 Xi[1] = Z.lo;
642 }
643}
2262beef 644#define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
a595baff 645
e7f5b1cd
AP
646#endif
647
19f7e5e2 648struct gcm128_context {
e7f5b1cd
AP
649 /* Following 6 names follow names in GCM specification */
650 union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
f472ec8c 651 Xi,H,len;
a595baff
AP
652 /* Pre-computed table used by gcm_gmult_* */
653#if TABLE_BITS==8
654 u128 Htable[256];
655#else
e7f5b1cd 656 u128 Htable[16];
c1f092d1
AP
657 void (*gmult)(u64 Xi[2],const u128 Htable[16]);
658 void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
a595baff 659#endif
f472ec8c 660 unsigned int res, pad;
e7f5b1cd
AP
661 block128_f block;
662 void *key;
19f7e5e2 663};
e7f5b1cd 664
c1f092d1
AP
665#if TABLE_BITS==4 && defined(GHASH_ASM) && !defined(I386_ONLY) && \
666 (defined(__i386) || defined(__i386__) || \
667 defined(__x86_64) || defined(__x86_64__) || \
668 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
669# define GHASH_ASM_IAX
670extern unsigned int OPENSSL_ia32cap_P[2];
671
672void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
673void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
674void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
675
676# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
677# define GHASH_ASM_X86
678void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
679void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
680
681void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
682void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
683# endif
684
685# undef GCM_MUL
686# define GCM_MUL(ctx,Xi) (*((ctx)->gmult))(ctx->Xi.u,ctx->Htable)
687# undef GHASH
688# define GHASH(ctx,in,len) (*((ctx)->ghash))((ctx)->Xi.u,(ctx)->Htable,in,len)
689#endif
690
e7f5b1cd
AP
691void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
692{
693 const union { long one; char little; } is_endian = {1};
694
695 memset(ctx,0,sizeof(*ctx));
696 ctx->block = block;
697 ctx->key = key;
698
699 (*block)(ctx->H.c,ctx->H.c,key);
700
701 if (is_endian.little) {
702 /* H is stored in host byte order */
703#ifdef BSWAP8
704 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
705 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
706#else
707 u8 *p = ctx->H.c;
708 u64 hi,lo;
709 hi = (u64)GETU32(p) <<32|GETU32(p+4);
710 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
711 ctx->H.u[0] = hi;
712 ctx->H.u[1] = lo;
713#endif
714 }
715
a595baff
AP
716#if TABLE_BITS==8
717 gcm_init_8bit(ctx->Htable,ctx->H.u);
718#elif TABLE_BITS==4
6acb4ff3 719# if defined(GHASH_ASM_IAX) /* both x86 and x86_64 */
c1f092d1
AP
720 if (OPENSSL_ia32cap_P[1]&(1<<1)) {
721 gcm_init_clmul(ctx->Htable,ctx->H.u);
722 ctx->gmult = gcm_gmult_clmul;
723 ctx->ghash = gcm_ghash_clmul;
724 return;
725 }
e7f5b1cd 726 gcm_init_4bit(ctx->Htable,ctx->H.u);
6acb4ff3 727# if defined(GHASH_ASM_X86) /* x86 only */
c1f092d1
AP
728 if (OPENSSL_ia32cap_P[0]&(1<<23)) {
729 ctx->gmult = gcm_gmult_4bit_mmx;
730 ctx->ghash = gcm_ghash_4bit_mmx;
731 } else {
732 ctx->gmult = gcm_gmult_4bit_x86;
733 ctx->ghash = gcm_ghash_4bit_x86;
734 }
735# else
736 ctx->gmult = gcm_gmult_4bit;
737 ctx->ghash = gcm_ghash_4bit;
738# endif
739# else
740 gcm_init_4bit(ctx->Htable,ctx->H.u);
741# endif
a595baff 742#endif
e7f5b1cd
AP
743}
744
745void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
746{
747 const union { long one; char little; } is_endian = {1};
f472ec8c 748 unsigned int ctr;
e7f5b1cd
AP
749
750 ctx->Yi.u[0] = 0;
751 ctx->Yi.u[1] = 0;
752 ctx->Xi.u[0] = 0;
753 ctx->Xi.u[1] = 0;
754 ctx->len.u[0] = 0;
755 ctx->len.u[1] = 0;
756 ctx->res = 0;
757
758 if (len==12) {
759 memcpy(ctx->Yi.c,iv,12);
760 ctx->Yi.c[15]=1;
f472ec8c 761 ctr=1;
e7f5b1cd
AP
762 }
763 else {
764 size_t i;
765 u64 len0 = len;
766
767 while (len>=16) {
768 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
769 GCM_MUL(ctx,Yi);
770 iv += 16;
771 len -= 16;
772 }
773 if (len) {
774 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
775 GCM_MUL(ctx,Yi);
776 }
777 len0 <<= 3;
778 if (is_endian.little) {
779#ifdef BSWAP8
780 ctx->Yi.u[1] ^= BSWAP8(len0);
781#else
782 ctx->Yi.c[8] ^= (u8)(len0>>56);
783 ctx->Yi.c[9] ^= (u8)(len0>>48);
784 ctx->Yi.c[10] ^= (u8)(len0>>40);
785 ctx->Yi.c[11] ^= (u8)(len0>>32);
786 ctx->Yi.c[12] ^= (u8)(len0>>24);
787 ctx->Yi.c[13] ^= (u8)(len0>>16);
788 ctx->Yi.c[14] ^= (u8)(len0>>8);
789 ctx->Yi.c[15] ^= (u8)(len0);
790#endif
791 }
792 else
793 ctx->Yi.u[1] ^= len0;
794
795 GCM_MUL(ctx,Yi);
796
797 if (is_endian.little)
f472ec8c 798 ctr = GETU32(ctx->Yi.c+12);
e7f5b1cd 799 else
f472ec8c 800 ctr = ctx->Yi.d[3];
e7f5b1cd
AP
801 }
802
803 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
f472ec8c 804 ++ctr;
2262beef 805 if (is_endian.little)
f472ec8c 806 PUTU32(ctx->Yi.c+12,ctr);
2262beef 807 else
f472ec8c 808 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
809}
810
811void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
812{
813 size_t i;
814
815 ctx->len.u[0] += len;
816
2262beef
AP
817#ifdef GHASH
818 if ((i = (len&(size_t)-16))) {
c1f092d1 819 GHASH(ctx,aad,i);
2262beef
AP
820 aad += i;
821 len -= i;
822 }
823#else
e7f5b1cd
AP
824 while (len>=16) {
825 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
826 GCM_MUL(ctx,Xi);
827 aad += 16;
828 len -= 16;
829 }
2262beef 830#endif
e7f5b1cd
AP
831 if (len) {
832 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
833 GCM_MUL(ctx,Xi);
834 }
835}
836
837void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
838 const unsigned char *in, unsigned char *out,
839 size_t len)
840{
841 const union { long one; char little; } is_endian = {1};
842 unsigned int n, ctr;
843 size_t i;
844
845 ctx->len.u[1] += len;
846 n = ctx->res;
f472ec8c
AP
847 if (is_endian.little)
848 ctr = GETU32(ctx->Yi.c+12);
849 else
850 ctr = ctx->Yi.d[3];
e7f5b1cd
AP
851
852#if !defined(OPENSSL_SMALL_FOOTPRINT)
853 if (16%sizeof(size_t) == 0) do { /* always true actually */
854 if (n) {
855 while (n && len) {
856 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
857 --len;
858 n = (n+1)%16;
859 }
860 if (n==0) GCM_MUL(ctx,Xi);
861 else {
862 ctx->res = n;
863 return;
864 }
865 }
e7f5b1cd
AP
866#if defined(STRICT_ALIGNMENT)
867 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
868 break;
869#endif
a595baff 870#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
871 while (len>=GHASH_CHUNK) {
872 size_t j=GHASH_CHUNK;
873
874 while (j) {
875 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
876 ++ctr;
877 if (is_endian.little)
878 PUTU32(ctx->Yi.c+12,ctr);
879 else
880 ctx->Yi.d[3] = ctr;
2262beef
AP
881 for (i=0; i<16; i+=sizeof(size_t))
882 *(size_t *)(out+i) =
883 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
884 out += 16;
885 in += 16;
886 j -= 16;
887 }
c1f092d1 888 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
2262beef
AP
889 len -= GHASH_CHUNK;
890 }
891 if ((i = (len&(size_t)-16))) {
892 size_t j=i;
893
894 while (len>=16) {
895 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
896 ++ctr;
897 if (is_endian.little)
898 PUTU32(ctx->Yi.c+12,ctr);
899 else
900 ctx->Yi.d[3] = ctr;
901 for (i=0; i<16; i+=sizeof(size_t))
902 *(size_t *)(out+i) =
903 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
904 out += 16;
905 in += 16;
906 len -= 16;
907 }
c1f092d1 908 GHASH(ctx,out-j,j);
2262beef
AP
909 }
910#else
911 while (len>=16) {
e7f5b1cd 912 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
2262beef
AP
913 ++ctr;
914 if (is_endian.little)
915 PUTU32(ctx->Yi.c+12,ctr);
916 else
917 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
918 for (i=0; i<16; i+=sizeof(size_t))
919 *(size_t *)(ctx->Xi.c+i) ^=
920 *(size_t *)(out+i) =
921 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
922 GCM_MUL(ctx,Xi);
923 out += 16;
924 in += 16;
925 len -= 16;
926 }
2262beef 927#endif
e7f5b1cd 928 if (len) {
2262beef 929 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
930 ++ctr;
931 if (is_endian.little)
932 PUTU32(ctx->Yi.c+12,ctr);
933 else
934 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
935 while (len--) {
936 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
937 ++n;
938 }
939 }
940
941 ctx->res = n;
e7f5b1cd
AP
942 return;
943 } while(0);
944#endif
945 for (i=0;i<len;++i) {
946 if (n==0) {
2262beef 947 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
948 ++ctr;
949 if (is_endian.little)
950 PUTU32(ctx->Yi.c+12,ctr);
951 else
952 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
953 }
954 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
955 n = (n+1)%16;
956 if (n==0)
957 GCM_MUL(ctx,Xi);
958 }
959
960 ctx->res = n;
e7f5b1cd
AP
961}
962
963void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
964 const unsigned char *in, unsigned char *out,
965 size_t len)
966{
967 const union { long one; char little; } is_endian = {1};
968 unsigned int n, ctr;
969 size_t i;
970
971 ctx->len.u[1] += len;
972 n = ctx->res;
f472ec8c
AP
973 if (is_endian.little)
974 ctr = GETU32(ctx->Yi.c+12);
975 else
976 ctr = ctx->Yi.d[3];
e7f5b1cd
AP
977
978#if !defined(OPENSSL_SMALL_FOOTPRINT)
979 if (16%sizeof(size_t) == 0) do { /* always true actually */
980 if (n) {
981 while (n && len) {
982 u8 c = *(in++);
983 *(out++) = c^ctx->EKi.c[n];
984 ctx->Xi.c[n] ^= c;
985 --len;
986 n = (n+1)%16;
987 }
988 if (n==0) GCM_MUL (ctx,Xi);
989 else {
990 ctx->res = n;
991 return;
992 }
993 }
e7f5b1cd
AP
994#if defined(STRICT_ALIGNMENT)
995 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
996 break;
997#endif
a595baff 998#if defined(GHASH) && defined(GHASH_CHUNK)
2262beef
AP
999 while (len>=GHASH_CHUNK) {
1000 size_t j=GHASH_CHUNK;
1001
c1f092d1 1002 GHASH(ctx,in,GHASH_CHUNK);
2262beef
AP
1003 while (j) {
1004 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
1005 ++ctr;
1006 if (is_endian.little)
1007 PUTU32(ctx->Yi.c+12,ctr);
1008 else
1009 ctx->Yi.d[3] = ctr;
2262beef
AP
1010 for (i=0; i<16; i+=sizeof(size_t))
1011 *(size_t *)(out+i) =
1012 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1013 out += 16;
1014 in += 16;
1015 j -= 16;
1016 }
1017 len -= GHASH_CHUNK;
1018 }
1019 if ((i = (len&(size_t)-16))) {
c1f092d1 1020 GHASH(ctx,in,i);
2262beef
AP
1021 while (len>=16) {
1022 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1023 ++ctr;
1024 if (is_endian.little)
1025 PUTU32(ctx->Yi.c+12,ctr);
1026 else
1027 ctx->Yi.d[3] = ctr;
1028 for (i=0; i<16; i+=sizeof(size_t))
1029 *(size_t *)(out+i) =
1030 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1031 out += 16;
1032 in += 16;
1033 len -= 16;
1034 }
1035 }
1036#else
1037 while (len>=16) {
e7f5b1cd 1038 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
2262beef
AP
1039 ++ctr;
1040 if (is_endian.little)
1041 PUTU32(ctx->Yi.c+12,ctr);
1042 else
1043 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1044 for (i=0; i<16; i+=sizeof(size_t)) {
1045 size_t c = *(size_t *)(in+i);
1046 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1047 *(size_t *)(ctx->Xi.c+i) ^= c;
1048 }
2262beef 1049 GCM_MUL(ctx,Xi);
e7f5b1cd
AP
1050 out += 16;
1051 in += 16;
1052 len -= 16;
1053 }
2262beef 1054#endif
e7f5b1cd 1055 if (len) {
2262beef 1056 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
1057 ++ctr;
1058 if (is_endian.little)
1059 PUTU32(ctx->Yi.c+12,ctr);
1060 else
1061 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1062 while (len--) {
1063 u8 c = in[n];
1064 ctx->Xi.c[n] ^= c;
1065 out[n] = c^ctx->EKi.c[n];
1066 ++n;
1067 }
1068 }
1069
1070 ctx->res = n;
e7f5b1cd
AP
1071 return;
1072 } while(0);
1073#endif
1074 for (i=0;i<len;++i) {
1075 u8 c;
1076 if (n==0) {
2262beef 1077 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
e7f5b1cd
AP
1078 ++ctr;
1079 if (is_endian.little)
1080 PUTU32(ctx->Yi.c+12,ctr);
1081 else
1082 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
1083 }
1084 c = in[i];
1085 out[i] ^= ctx->EKi.c[n];
1086 ctx->Xi.c[n] ^= c;
1087 n = (n+1)%16;
1088 if (n==0)
1089 GCM_MUL(ctx,Xi);
1090 }
1091
1092 ctx->res = n;
e7f5b1cd
AP
1093}
1094
6acb4ff3
AP
1095int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1096 size_t len)
e7f5b1cd
AP
1097{
1098 const union { long one; char little; } is_endian = {1};
1099 u64 alen = ctx->len.u[0]<<3;
1100 u64 clen = ctx->len.u[1]<<3;
1101
1102 if (ctx->res)
1103 GCM_MUL(ctx,Xi);
1104
1105 if (is_endian.little) {
1106#ifdef BSWAP8
1107 alen = BSWAP8(alen);
1108 clen = BSWAP8(clen);
1109#else
1110 u8 *p = ctx->len.c;
1111
1112 ctx->len.u[0] = alen;
1113 ctx->len.u[1] = clen;
1114
1115 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1116 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1117#endif
1118 }
1119
1120 ctx->Xi.u[0] ^= alen;
1121 ctx->Xi.u[1] ^= clen;
1122 GCM_MUL(ctx,Xi);
1123
1124 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1125 ctx->Xi.u[1] ^= ctx->EK0.u[1];
6acb4ff3
AP
1126
1127 if (tag && len<=sizeof(ctx->Xi))
1128 return memcmp(ctx->Xi.c,tag,len);
1129 else
1130 return -1;
1131}
1132
1133GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1134{
1135 GCM128_CONTEXT *ret;
1136
1137 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1138 CRYPTO_gcm128_init(ret,key,block);
1139
1140 return ret;
1141}
1142
1143void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1144{
1145 if (ctx) {
1146 OPENSSL_cleanse(ctx,sizeof(*ctx));
1147 OPENSSL_free(ctx);
1148 }
e7f5b1cd
AP
1149}
1150
1151#if defined(SELFTEST)
1152#include <stdio.h>
1153#include <openssl/aes.h>
1154
1155/* Test Case 1 */
1156static const u8 K1[16],
1157 *P1=NULL,
1158 *A1=NULL,
1159 IV1[12],
1160 *C1=NULL,
1161 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
a595baff 1162
e7f5b1cd
AP
1163/* Test Case 2 */
1164#define K2 K1
1165#define A2 A1
1166#define IV2 IV1
1167static const u8 P2[16],
1168 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1169 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1170
1171/* Test Case 3 */
1172#define A3 A2
1173static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1174 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1175 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1176 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1177 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1178 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1179 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1180 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1181 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1182 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
fb2d5a91 1183 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
e7f5b1cd
AP
1184
1185/* Test Case 4 */
1186#define K4 K3
1187#define IV4 IV3
1188static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1189 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1190 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1191 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1192 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1193 0xab,0xad,0xda,0xd2},
1194 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1195 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1196 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1197 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1198 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1199
1200/* Test Case 5 */
1201#define K5 K4
1202#define P5 P4
1203static const u8 A5[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1204 0xab,0xad,0xda,0xd2},
1205 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1206 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1207 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1208 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1209 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1210 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
a595baff 1211
e7f5b1cd
AP
1212/* Test Case 6 */
1213#define K6 K5
1214#define P6 P5
1215#define A6 A5
1216static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1217 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1218 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1219 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1220 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1221 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1222 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1223 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1224 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1225
1226/* Test Case 7 */
1227static const u8 K7[24],
1228 *P7=NULL,
1229 *A7=NULL,
1230 IV7[12],
1231 *C7=NULL,
1232 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1233
1234/* Test Case 8 */
1235#define K8 K7
1236#define IV8 IV7
1237#define A8 A7
1238static const u8 P8[16],
1239 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1240 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1241
1242/* Test Case 9 */
1243#define A9 A8
1244static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1245 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1246 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1247 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1248 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1249 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1250 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1251 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1252 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1253 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1254 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1255 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1256
1257/* Test Case 10 */
1258#define K10 K9
1259#define IV10 IV9
1260static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1261 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1262 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1263 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1264 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1265 0xab,0xad,0xda,0xd2},
1266 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1267 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1268 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1269 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1270 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1271
1272/* Test Case 11 */
1273#define K11 K10
1274#define P11 P10
1275#define A11 A10
1276static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1277 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1278 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1279 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1280 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1281 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1282
1283/* Test Case 12 */
1284#define K12 K11
1285#define P12 P11
1286#define A12 A11
1287static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1288 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1289 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1290 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1291 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1292 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1293 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1294 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1295 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1296
1297/* Test Case 13 */
1298static const u8 K13[32],
1299 *P13=NULL,
1300 *A13=NULL,
1301 IV13[12],
1302 *C13=NULL,
1303 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1304
1305/* Test Case 14 */
1306#define K14 K13
1307#define A14 A13
1308static const u8 P14[16],
1309 IV14[12],
1310 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1311 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1312
1313/* Test Case 15 */
1314#define A15 A14
1315static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1316 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1317 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1318 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1319 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1320 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1321 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1322 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1323 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1324 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1325 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1326 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1327
1328/* Test Case 16 */
1329#define K16 K15
1330#define IV16 IV15
1331static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1332 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1333 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1334 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1335 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1336 0xab,0xad,0xda,0xd2},
1337 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1338 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1339 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1340 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1341 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1342
1343/* Test Case 17 */
1344#define K17 K16
1345#define P17 P16
1346#define A17 A16
1347static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1348 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1349 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1350 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1351 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1352 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1353
1354/* Test Case 18 */
1355#define K18 K17
1356#define P18 P17
1357#define A18 A17
1358static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1359 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1360 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1361 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1362 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1363 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1364 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1365 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1366 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1367
1368#define TEST_CASE(n) do { \
1369 u8 out[sizeof(P##n)]; \
1370 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1371 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1372 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1373 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1374 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
6acb4ff3
AP
1375 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1376 (C##n && memcmp(out,C##n,sizeof(out)))) \
2262beef 1377 ret++, printf ("encrypt test#%d failed.\n",n);\
e7f5b1cd
AP
1378 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1379 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1380 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
6acb4ff3
AP
1381 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1382 (P##n && memcmp(out,P##n,sizeof(out)))) \
1383 ret++, printf ("decrypt test#%d failed.\n",n); \
e7f5b1cd
AP
1384 } while(0)
1385
1386int main()
1387{
1388 GCM128_CONTEXT ctx;
1389 AES_KEY key;
1390 int ret=0;
1391
1392 TEST_CASE(1);
1393 TEST_CASE(2);
1394 TEST_CASE(3);
1395 TEST_CASE(4);
1396 TEST_CASE(5);
1397 TEST_CASE(6);
1398 TEST_CASE(7);
1399 TEST_CASE(8);
1400 TEST_CASE(9);
1401 TEST_CASE(10);
1402 TEST_CASE(11);
1403 TEST_CASE(12);
1404 TEST_CASE(13);
1405 TEST_CASE(14);
1406 TEST_CASE(15);
1407 TEST_CASE(16);
1408 TEST_CASE(17);
1409 TEST_CASE(18);
1410
a595baff 1411#ifdef OPENSSL_CPUID_OBJ
2262beef
AP
1412 {
1413 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1414 union { u64 u; u8 c[1024]; } buf;
c1f092d1 1415 int i;
2262beef
AP
1416
1417 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1418 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1419 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1420
1421 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1422 start = OPENSSL_rdtsc();
1423 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1424 gcm_t = OPENSSL_rdtsc() - start;
1425
1426 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1427 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1428 (block128_f)AES_encrypt);
1429 start = OPENSSL_rdtsc();
1430 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
a595baff
AP
1431 &key,ctx.Yi.c,ctx.EKi.c,&ctx.res,
1432 (block128_f)AES_encrypt);
2262beef
AP
1433 ctr_t = OPENSSL_rdtsc() - start;
1434
1435 printf("%.2f-%.2f=%.2f\n",
1436 gcm_t/(double)sizeof(buf),
1437 ctr_t/(double)sizeof(buf),
1438 (gcm_t-ctr_t)/(double)sizeof(buf));
a595baff 1439#ifdef GHASH
c1f092d1 1440 GHASH(&ctx,buf.c,sizeof(buf));
a595baff 1441 start = OPENSSL_rdtsc();
c1f092d1 1442 for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
a595baff 1443 gcm_t = OPENSSL_rdtsc() - start;
c1f092d1 1444 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
a595baff 1445#endif
2262beef 1446 }
a595baff 1447#endif
2262beef 1448
e7f5b1cd
AP
1449 return ret;
1450}
1451#endif