]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/modes/gcm128.c
Add CRYPTO_gcm128_tag() function to retrieve the tag.
[thirdparty/openssl.git] / crypto / modes / gcm128.c
1 /* ====================================================================
2 * Copyright (c) 2010 The OpenSSL Project. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in
13 * the documentation and/or other materials provided with the
14 * distribution.
15 *
16 * 3. All advertising materials mentioning features or use of this
17 * software must display the following acknowledgment:
18 * "This product includes software developed by the OpenSSL Project
19 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
20 *
21 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
22 * endorse or promote products derived from this software without
23 * prior written permission. For written permission, please contact
24 * openssl-core@openssl.org.
25 *
26 * 5. Products derived from this software may not be called "OpenSSL"
27 * nor may "OpenSSL" appear in their names without prior written
28 * permission of the OpenSSL Project.
29 *
30 * 6. Redistributions of any form whatsoever must retain the following
31 * acknowledgment:
32 * "This product includes software developed by the OpenSSL Project
33 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
36 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
38 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
41 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
42 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
43 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
44 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
45 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
46 * OF THE POSSIBILITY OF SUCH DAMAGE.
47 * ====================================================================
48 */
49
50 #include <openssl/crypto.h>
51 #include "modes_lcl.h"
52 #include <string.h>
53
54 #ifndef MODES_DEBUG
55 # ifndef NDEBUG
56 # define NDEBUG
57 # endif
58 #endif
59 #include <assert.h>
60
61 typedef struct { u64 hi,lo; } u128;
62
63 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
64 /* redefine, because alignment is ensured */
65 #undef GETU32
66 #define GETU32(p) BSWAP4(*(const u32 *)(p))
67 #undef PUTU32
68 #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
69 #endif
70
71 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
72 #define REDUCE1BIT(V) do { \
73 if (sizeof(size_t)==8) { \
74 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
75 V.lo = (V.hi<<63)|(V.lo>>1); \
76 V.hi = (V.hi>>1 )^T; \
77 } \
78 else { \
79 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
80 V.lo = (V.hi<<63)|(V.lo>>1); \
81 V.hi = (V.hi>>1 )^((u64)T<<32); \
82 } \
83 } while(0)
84
85 #ifdef TABLE_BITS
86 #undef TABLE_BITS
87 #endif
88 /*
89 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
90 * never be set to 8. 8 is effectively reserved for testing purposes.
91 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
92 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
93 * whole spectrum of possible table driven implementations. Why? In
94 * non-"Shoup's" case memory access pattern is segmented in such manner,
95 * that it's trivial to see that cache timing information can reveal
96 * fair portion of intermediate hash value. Given that ciphertext is
97 * always available to attacker, it's possible for him to attempt to
98 * deduce secret parameter H and if successful, tamper with messages
99 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
100 * not as trivial, but there is no reason to believe that it's resistant
101 * to cache-timing attack. And the thing about "8-bit" implementation is
102 * that it consumes 16 (sixteen) times more memory, 4KB per individual
103 * key + 1KB shared. Well, on pros side it should be twice as fast as
104 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
105 * was observed to run ~75% faster, closer to 100% for commercial
106 * compilers... Yet "4-bit" procedure is preferred, because it's
107 * believed to provide better security-performance balance and adequate
108 * all-round performance. "All-round" refers to things like:
109 *
110 * - shorter setup time effectively improves overall timing for
111 * handling short messages;
112 * - larger table allocation can become unbearable because of VM
113 * subsystem penalties (for example on Windows large enough free
114 * results in VM working set trimming, meaning that consequent
115 * malloc would immediately incur working set expansion);
116 * - larger table has larger cache footprint, which can affect
117 * performance of other code paths (not necessarily even from same
118 * thread in Hyper-Threading world);
119 */
120 #define TABLE_BITS 4
121
122 #if TABLE_BITS==8
123
124 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
125 {
126 int i, j;
127 u128 V;
128
129 Htable[0].hi = 0;
130 Htable[0].lo = 0;
131 V.hi = H[0];
132 V.lo = H[1];
133
134 for (Htable[128]=V, i=64; i>0; i>>=1) {
135 REDUCE1BIT(V);
136 Htable[i] = V;
137 }
138
139 for (i=2; i<256; i<<=1) {
140 u128 *Hi = Htable+i, H0 = *Hi;
141 for (j=1; j<i; ++j) {
142 Hi[j].hi = H0.hi^Htable[j].hi;
143 Hi[j].lo = H0.lo^Htable[j].lo;
144 }
145 }
146 }
147
148 static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256])
149 {
150 u128 Z = { 0, 0};
151 const u8 *xi = (const u8 *)Xi+15;
152 size_t rem, n = *xi;
153 const union { long one; char little; } is_endian = {1};
154 static const size_t rem_8bit[256] = {
155 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
156 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
157 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
158 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
159 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
160 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
161 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
162 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
163 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
164 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
165 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
166 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
167 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
168 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
169 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
170 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
171 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
172 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
173 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
174 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
175 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
176 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
177 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
178 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
179 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
180 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
181 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
182 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
183 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
184 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
185 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
186 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
187 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
188 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
189 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
190 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
191 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
192 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
193 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
194 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
195 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
196 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
197 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
198 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
199 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
200 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
201 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
202 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
203 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
204 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
205 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
206 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
207 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
208 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
209 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
210 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
211 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
212 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
213 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
214 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
215 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
216 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
217 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
218 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) };
219
220 while (1) {
221 Z.hi ^= Htable[n].hi;
222 Z.lo ^= Htable[n].lo;
223
224 if ((u8 *)Xi==xi) break;
225
226 n = *(--xi);
227
228 rem = (size_t)Z.lo&0xff;
229 Z.lo = (Z.hi<<56)|(Z.lo>>8);
230 Z.hi = (Z.hi>>8);
231 if (sizeof(size_t)==8)
232 Z.hi ^= rem_8bit[rem];
233 else
234 Z.hi ^= (u64)rem_8bit[rem]<<32;
235 }
236
237 if (is_endian.little) {
238 #ifdef BSWAP8
239 Xi[0] = BSWAP8(Z.hi);
240 Xi[1] = BSWAP8(Z.lo);
241 #else
242 u8 *p = (u8 *)Xi;
243 u32 v;
244 v = (u32)(Z.hi>>32); PUTU32(p,v);
245 v = (u32)(Z.hi); PUTU32(p+4,v);
246 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
247 v = (u32)(Z.lo); PUTU32(p+12,v);
248 #endif
249 }
250 else {
251 Xi[0] = Z.hi;
252 Xi[1] = Z.lo;
253 }
254 }
255 #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
256
257 #elif TABLE_BITS==4
258
259 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
260 {
261 u128 V;
262 #if defined(OPENSSL_SMALL_FOOTPRINT)
263 int i;
264 #endif
265
266 Htable[0].hi = 0;
267 Htable[0].lo = 0;
268 V.hi = H[0];
269 V.lo = H[1];
270
271 #if defined(OPENSSL_SMALL_FOOTPRINT)
272 for (Htable[8]=V, i=4; i>0; i>>=1) {
273 REDUCE1BIT(V);
274 Htable[i] = V;
275 }
276
277 for (i=2; i<16; i<<=1) {
278 u128 *Hi = Htable+i;
279 int j;
280 for (V=*Hi, j=1; j<i; ++j) {
281 Hi[j].hi = V.hi^Htable[j].hi;
282 Hi[j].lo = V.lo^Htable[j].lo;
283 }
284 }
285 #else
286 Htable[8] = V;
287 REDUCE1BIT(V);
288 Htable[4] = V;
289 REDUCE1BIT(V);
290 Htable[2] = V;
291 REDUCE1BIT(V);
292 Htable[1] = V;
293 Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo;
294 V=Htable[4];
295 Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo;
296 Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo;
297 Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo;
298 V=Htable[8];
299 Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo;
300 Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo;
301 Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo;
302 Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo;
303 Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo;
304 Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo;
305 Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo;
306 #endif
307 #if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
308 /*
309 * ARM assembler expects specific dword order in Htable.
310 */
311 {
312 int j;
313 const union { long one; char little; } is_endian = {1};
314
315 if (is_endian.little)
316 for (j=0;j<16;++j) {
317 V = Htable[j];
318 Htable[j].hi = V.lo;
319 Htable[j].lo = V.hi;
320 }
321 else
322 for (j=0;j<16;++j) {
323 V = Htable[j];
324 Htable[j].hi = V.lo<<32|V.lo>>32;
325 Htable[j].lo = V.hi<<32|V.hi>>32;
326 }
327 }
328 #endif
329 }
330
331 #ifndef GHASH_ASM
332 static const size_t rem_4bit[16] = {
333 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
334 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
335 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
336 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) };
337
338 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
339 {
340 u128 Z;
341 int cnt = 15;
342 size_t rem, nlo, nhi;
343 const union { long one; char little; } is_endian = {1};
344
345 nlo = ((const u8 *)Xi)[15];
346 nhi = nlo>>4;
347 nlo &= 0xf;
348
349 Z.hi = Htable[nlo].hi;
350 Z.lo = Htable[nlo].lo;
351
352 while (1) {
353 rem = (size_t)Z.lo&0xf;
354 Z.lo = (Z.hi<<60)|(Z.lo>>4);
355 Z.hi = (Z.hi>>4);
356 if (sizeof(size_t)==8)
357 Z.hi ^= rem_4bit[rem];
358 else
359 Z.hi ^= (u64)rem_4bit[rem]<<32;
360
361 Z.hi ^= Htable[nhi].hi;
362 Z.lo ^= Htable[nhi].lo;
363
364 if (--cnt<0) break;
365
366 nlo = ((const u8 *)Xi)[cnt];
367 nhi = nlo>>4;
368 nlo &= 0xf;
369
370 rem = (size_t)Z.lo&0xf;
371 Z.lo = (Z.hi<<60)|(Z.lo>>4);
372 Z.hi = (Z.hi>>4);
373 if (sizeof(size_t)==8)
374 Z.hi ^= rem_4bit[rem];
375 else
376 Z.hi ^= (u64)rem_4bit[rem]<<32;
377
378 Z.hi ^= Htable[nlo].hi;
379 Z.lo ^= Htable[nlo].lo;
380 }
381
382 if (is_endian.little) {
383 #ifdef BSWAP8
384 Xi[0] = BSWAP8(Z.hi);
385 Xi[1] = BSWAP8(Z.lo);
386 #else
387 u8 *p = (u8 *)Xi;
388 u32 v;
389 v = (u32)(Z.hi>>32); PUTU32(p,v);
390 v = (u32)(Z.hi); PUTU32(p+4,v);
391 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
392 v = (u32)(Z.lo); PUTU32(p+12,v);
393 #endif
394 }
395 else {
396 Xi[0] = Z.hi;
397 Xi[1] = Z.lo;
398 }
399 }
400
401 #if !defined(OPENSSL_SMALL_FOOTPRINT)
402 /*
403 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
404 * details... Compiler-generated code doesn't seem to give any
405 * performance improvement, at least not on x86[_64]. It's here
406 * mostly as reference and a placeholder for possible future
407 * non-trivial optimization[s]...
408 */
409 static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],
410 const u8 *inp,size_t len)
411 {
412 u128 Z;
413 int cnt;
414 size_t rem, nlo, nhi;
415 const union { long one; char little; } is_endian = {1};
416
417 #if 1
418 do {
419 cnt = 15;
420 nlo = ((const u8 *)Xi)[15];
421 nlo ^= inp[15];
422 nhi = nlo>>4;
423 nlo &= 0xf;
424
425 Z.hi = Htable[nlo].hi;
426 Z.lo = Htable[nlo].lo;
427
428 while (1) {
429 rem = (size_t)Z.lo&0xf;
430 Z.lo = (Z.hi<<60)|(Z.lo>>4);
431 Z.hi = (Z.hi>>4);
432 if (sizeof(size_t)==8)
433 Z.hi ^= rem_4bit[rem];
434 else
435 Z.hi ^= (u64)rem_4bit[rem]<<32;
436
437 Z.hi ^= Htable[nhi].hi;
438 Z.lo ^= Htable[nhi].lo;
439
440 if (--cnt<0) break;
441
442 nlo = ((const u8 *)Xi)[cnt];
443 nlo ^= inp[cnt];
444 nhi = nlo>>4;
445 nlo &= 0xf;
446
447 rem = (size_t)Z.lo&0xf;
448 Z.lo = (Z.hi<<60)|(Z.lo>>4);
449 Z.hi = (Z.hi>>4);
450 if (sizeof(size_t)==8)
451 Z.hi ^= rem_4bit[rem];
452 else
453 Z.hi ^= (u64)rem_4bit[rem]<<32;
454
455 Z.hi ^= Htable[nlo].hi;
456 Z.lo ^= Htable[nlo].lo;
457 }
458 #else
459 /*
460 * Extra 256+16 bytes per-key plus 512 bytes shared tables
461 * [should] give ~50% improvement... One could have PACK()-ed
462 * the rem_8bit even here, but the priority is to minimize
463 * cache footprint...
464 */
465 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
466 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
467 static const unsigned short rem_8bit[256] = {
468 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
469 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
470 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
471 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
472 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
473 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
474 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
475 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
476 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
477 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
478 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
479 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
480 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
481 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
482 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
483 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
484 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
485 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
486 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
487 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
488 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
489 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
490 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
491 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
492 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
493 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
494 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
495 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
496 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
497 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
498 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
499 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE };
500 /*
501 * This pre-processing phase slows down procedure by approximately
502 * same time as it makes each loop spin faster. In other words
503 * single block performance is approximately same as straightforward
504 * "4-bit" implementation, and then it goes only faster...
505 */
506 for (cnt=0; cnt<16; ++cnt) {
507 Z.hi = Htable[cnt].hi;
508 Z.lo = Htable[cnt].lo;
509 Hshr4[cnt].lo = (Z.hi<<60)|(Z.lo>>4);
510 Hshr4[cnt].hi = (Z.hi>>4);
511 Hshl4[cnt] = (u8)(Z.lo<<4);
512 }
513
514 do {
515 for (Z.lo=0, Z.hi=0, cnt=15; cnt; --cnt) {
516 nlo = ((const u8 *)Xi)[cnt];
517 nlo ^= inp[cnt];
518 nhi = nlo>>4;
519 nlo &= 0xf;
520
521 Z.hi ^= Htable[nlo].hi;
522 Z.lo ^= Htable[nlo].lo;
523
524 rem = (size_t)Z.lo&0xff;
525
526 Z.lo = (Z.hi<<56)|(Z.lo>>8);
527 Z.hi = (Z.hi>>8);
528
529 Z.hi ^= Hshr4[nhi].hi;
530 Z.lo ^= Hshr4[nhi].lo;
531 Z.hi ^= (u64)rem_8bit[rem^Hshl4[nhi]]<<48;
532 }
533
534 nlo = ((const u8 *)Xi)[0];
535 nlo ^= inp[0];
536 nhi = nlo>>4;
537 nlo &= 0xf;
538
539 Z.hi ^= Htable[nlo].hi;
540 Z.lo ^= Htable[nlo].lo;
541
542 rem = (size_t)Z.lo&0xf;
543
544 Z.lo = (Z.hi<<60)|(Z.lo>>4);
545 Z.hi = (Z.hi>>4);
546
547 Z.hi ^= Htable[nhi].hi;
548 Z.lo ^= Htable[nhi].lo;
549 Z.hi ^= ((u64)rem_8bit[rem<<4])<<48;
550 #endif
551
552 if (is_endian.little) {
553 #ifdef BSWAP8
554 Xi[0] = BSWAP8(Z.hi);
555 Xi[1] = BSWAP8(Z.lo);
556 #else
557 u8 *p = (u8 *)Xi;
558 u32 v;
559 v = (u32)(Z.hi>>32); PUTU32(p,v);
560 v = (u32)(Z.hi); PUTU32(p+4,v);
561 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
562 v = (u32)(Z.lo); PUTU32(p+12,v);
563 #endif
564 }
565 else {
566 Xi[0] = Z.hi;
567 Xi[1] = Z.lo;
568 }
569 } while (inp+=16, len-=16);
570 }
571 #endif
572 #else
573 void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]);
574 void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
575 #endif
576
577 #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
578 #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
579 #define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
580 /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache
581 * trashing effect. In other words idea is to hash data while it's
582 * still in L1 cache after encryption pass... */
583 #define GHASH_CHUNK (3*1024)
584 #endif
585
586 #else /* TABLE_BITS */
587
588 static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2])
589 {
590 u128 V,Z = { 0,0 };
591 long X;
592 int i,j;
593 const long *xi = (const long *)Xi;
594 const union { long one; char little; } is_endian = {1};
595
596 V.hi = H[0]; /* H is in host byte order, no byte swapping */
597 V.lo = H[1];
598
599 for (j=0; j<16/sizeof(long); ++j) {
600 if (is_endian.little) {
601 if (sizeof(long)==8) {
602 #ifdef BSWAP8
603 X = (long)(BSWAP8(xi[j]));
604 #else
605 const u8 *p = (const u8 *)(xi+j);
606 X = (long)((u64)GETU32(p)<<32|GETU32(p+4));
607 #endif
608 }
609 else {
610 const u8 *p = (const u8 *)(xi+j);
611 X = (long)GETU32(p);
612 }
613 }
614 else
615 X = xi[j];
616
617 for (i=0; i<8*sizeof(long); ++i, X<<=1) {
618 u64 M = (u64)(X>>(8*sizeof(long)-1));
619 Z.hi ^= V.hi&M;
620 Z.lo ^= V.lo&M;
621
622 REDUCE1BIT(V);
623 }
624 }
625
626 if (is_endian.little) {
627 #ifdef BSWAP8
628 Xi[0] = BSWAP8(Z.hi);
629 Xi[1] = BSWAP8(Z.lo);
630 #else
631 u8 *p = (u8 *)Xi;
632 u32 v;
633 v = (u32)(Z.hi>>32); PUTU32(p,v);
634 v = (u32)(Z.hi); PUTU32(p+4,v);
635 v = (u32)(Z.lo>>32); PUTU32(p+8,v);
636 v = (u32)(Z.lo); PUTU32(p+12,v);
637 #endif
638 }
639 else {
640 Xi[0] = Z.hi;
641 Xi[1] = Z.lo;
642 }
643 }
644 #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
645
646 #endif
647
648 struct gcm128_context {
649 /* Following 6 names follow names in GCM specification */
650 union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0,
651 Xi,H,len;
652 /* Pre-computed table used by gcm_gmult_* */
653 #if TABLE_BITS==8
654 u128 Htable[256];
655 #else
656 u128 Htable[16];
657 void (*gmult)(u64 Xi[2],const u128 Htable[16]);
658 void (*ghash)(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
659 #endif
660 unsigned int mres, ares;
661 block128_f block;
662 void *key;
663 };
664
665 #if TABLE_BITS==4 && defined(GHASH_ASM) && !defined(I386_ONLY) && \
666 (defined(__i386) || defined(__i386__) || \
667 defined(__x86_64) || defined(__x86_64__) || \
668 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
669 # define GHASH_ASM_IAX
670 extern unsigned int OPENSSL_ia32cap_P[2];
671
672 void gcm_init_clmul(u128 Htable[16],const u64 Xi[2]);
673 void gcm_gmult_clmul(u64 Xi[2],const u128 Htable[16]);
674 void gcm_ghash_clmul(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
675
676 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
677 # define GHASH_ASM_X86
678 void gcm_gmult_4bit_mmx(u64 Xi[2],const u128 Htable[16]);
679 void gcm_ghash_4bit_mmx(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
680
681 void gcm_gmult_4bit_x86(u64 Xi[2],const u128 Htable[16]);
682 void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len);
683 # endif
684
685 # undef GCM_MUL
686 # define GCM_MUL(ctx,Xi) (*((ctx)->gmult))(ctx->Xi.u,ctx->Htable)
687 # undef GHASH
688 # define GHASH(ctx,in,len) (*((ctx)->ghash))((ctx)->Xi.u,(ctx)->Htable,in,len)
689 #endif
690
691 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block)
692 {
693 const union { long one; char little; } is_endian = {1};
694
695 memset(ctx,0,sizeof(*ctx));
696 ctx->block = block;
697 ctx->key = key;
698
699 (*block)(ctx->H.c,ctx->H.c,key);
700
701 if (is_endian.little) {
702 /* H is stored in host byte order */
703 #ifdef BSWAP8
704 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
705 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
706 #else
707 u8 *p = ctx->H.c;
708 u64 hi,lo;
709 hi = (u64)GETU32(p) <<32|GETU32(p+4);
710 lo = (u64)GETU32(p+8)<<32|GETU32(p+12);
711 ctx->H.u[0] = hi;
712 ctx->H.u[1] = lo;
713 #endif
714 }
715
716 #if TABLE_BITS==8
717 gcm_init_8bit(ctx->Htable,ctx->H.u);
718 #elif TABLE_BITS==4
719 # if defined(GHASH_ASM_IAX) /* both x86 and x86_64 */
720 if (OPENSSL_ia32cap_P[1]&(1<<1)) {
721 gcm_init_clmul(ctx->Htable,ctx->H.u);
722 ctx->gmult = gcm_gmult_clmul;
723 ctx->ghash = gcm_ghash_clmul;
724 return;
725 }
726 gcm_init_4bit(ctx->Htable,ctx->H.u);
727 # if defined(GHASH_ASM_X86) /* x86 only */
728 if (OPENSSL_ia32cap_P[0]&(1<<23)) {
729 ctx->gmult = gcm_gmult_4bit_mmx;
730 ctx->ghash = gcm_ghash_4bit_mmx;
731 } else {
732 ctx->gmult = gcm_gmult_4bit_x86;
733 ctx->ghash = gcm_ghash_4bit_x86;
734 }
735 # else
736 ctx->gmult = gcm_gmult_4bit;
737 ctx->ghash = gcm_ghash_4bit;
738 # endif
739 # else
740 gcm_init_4bit(ctx->Htable,ctx->H.u);
741 # endif
742 #endif
743 }
744
745 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len)
746 {
747 const union { long one; char little; } is_endian = {1};
748 unsigned int ctr;
749
750 ctx->Yi.u[0] = 0;
751 ctx->Yi.u[1] = 0;
752 ctx->Xi.u[0] = 0;
753 ctx->Xi.u[1] = 0;
754 ctx->len.u[0] = 0; /* AAD length */
755 ctx->len.u[1] = 0; /* message length */
756 ctx->ares = 0;
757 ctx->mres = 0;
758
759 if (len==12) {
760 memcpy(ctx->Yi.c,iv,12);
761 ctx->Yi.c[15]=1;
762 ctr=1;
763 }
764 else {
765 size_t i;
766 u64 len0 = len;
767
768 while (len>=16) {
769 for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i];
770 GCM_MUL(ctx,Yi);
771 iv += 16;
772 len -= 16;
773 }
774 if (len) {
775 for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i];
776 GCM_MUL(ctx,Yi);
777 }
778 len0 <<= 3;
779 if (is_endian.little) {
780 #ifdef BSWAP8
781 ctx->Yi.u[1] ^= BSWAP8(len0);
782 #else
783 ctx->Yi.c[8] ^= (u8)(len0>>56);
784 ctx->Yi.c[9] ^= (u8)(len0>>48);
785 ctx->Yi.c[10] ^= (u8)(len0>>40);
786 ctx->Yi.c[11] ^= (u8)(len0>>32);
787 ctx->Yi.c[12] ^= (u8)(len0>>24);
788 ctx->Yi.c[13] ^= (u8)(len0>>16);
789 ctx->Yi.c[14] ^= (u8)(len0>>8);
790 ctx->Yi.c[15] ^= (u8)(len0);
791 #endif
792 }
793 else
794 ctx->Yi.u[1] ^= len0;
795
796 GCM_MUL(ctx,Yi);
797
798 if (is_endian.little)
799 ctr = GETU32(ctx->Yi.c+12);
800 else
801 ctr = ctx->Yi.d[3];
802 }
803
804 (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key);
805 ++ctr;
806 if (is_endian.little)
807 PUTU32(ctx->Yi.c+12,ctr);
808 else
809 ctx->Yi.d[3] = ctr;
810 }
811
812 void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len)
813 {
814 size_t i;
815 int n;
816
817 ctx->len.u[0] += len;
818 n = ctx->ares;
819
820 if (n) {
821 while (n && len) {
822 ctx->Xi.c[n] ^= *(aad++);
823 --len;
824 n = (n+1)%16;
825 }
826 if (n==0) GCM_MUL(ctx,Xi);
827 else {
828 ctx->ares = n;
829 return;
830 }
831 }
832
833 #ifdef GHASH
834 if ((i = (len&(size_t)-16))) {
835 GHASH(ctx,aad,i);
836 aad += i;
837 len -= i;
838 }
839 #else
840 while (len>=16) {
841 for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i];
842 GCM_MUL(ctx,Xi);
843 aad += 16;
844 len -= 16;
845 }
846 #endif
847 if (len) {
848 n = (int)len;
849 for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i];
850 }
851
852 ctx->ares = n;
853 }
854
855 void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
856 const unsigned char *in, unsigned char *out,
857 size_t len)
858 {
859 const union { long one; char little; } is_endian = {1};
860 unsigned int n, ctr;
861 size_t i;
862
863 if (ctx->ares) {
864 /* First call to encrypt finalizes GHASH(AAD) */
865 GCM_MUL(ctx,Xi);
866 ctx->ares = 0;
867 }
868
869 ctx->len.u[1] += len;
870 n = ctx->mres;
871 if (is_endian.little)
872 ctr = GETU32(ctx->Yi.c+12);
873 else
874 ctr = ctx->Yi.d[3];
875
876 #if !defined(OPENSSL_SMALL_FOOTPRINT)
877 if (16%sizeof(size_t) == 0) do { /* always true actually */
878 if (n) {
879 while (n && len) {
880 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
881 --len;
882 n = (n+1)%16;
883 }
884 if (n==0) GCM_MUL(ctx,Xi);
885 else {
886 ctx->mres = n;
887 return;
888 }
889 }
890 #if defined(STRICT_ALIGNMENT)
891 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
892 break;
893 #endif
894 #if defined(GHASH) && defined(GHASH_CHUNK)
895 while (len>=GHASH_CHUNK) {
896 size_t j=GHASH_CHUNK;
897
898 while (j) {
899 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
900 ++ctr;
901 if (is_endian.little)
902 PUTU32(ctx->Yi.c+12,ctr);
903 else
904 ctx->Yi.d[3] = ctr;
905 for (i=0; i<16; i+=sizeof(size_t))
906 *(size_t *)(out+i) =
907 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
908 out += 16;
909 in += 16;
910 j -= 16;
911 }
912 GHASH(ctx,out-GHASH_CHUNK,GHASH_CHUNK);
913 len -= GHASH_CHUNK;
914 }
915 if ((i = (len&(size_t)-16))) {
916 size_t j=i;
917
918 while (len>=16) {
919 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
920 ++ctr;
921 if (is_endian.little)
922 PUTU32(ctx->Yi.c+12,ctr);
923 else
924 ctx->Yi.d[3] = ctr;
925 for (i=0; i<16; i+=sizeof(size_t))
926 *(size_t *)(out+i) =
927 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
928 out += 16;
929 in += 16;
930 len -= 16;
931 }
932 GHASH(ctx,out-j,j);
933 }
934 #else
935 while (len>=16) {
936 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
937 ++ctr;
938 if (is_endian.little)
939 PUTU32(ctx->Yi.c+12,ctr);
940 else
941 ctx->Yi.d[3] = ctr;
942 for (i=0; i<16; i+=sizeof(size_t))
943 *(size_t *)(ctx->Xi.c+i) ^=
944 *(size_t *)(out+i) =
945 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
946 GCM_MUL(ctx,Xi);
947 out += 16;
948 in += 16;
949 len -= 16;
950 }
951 #endif
952 if (len) {
953 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
954 ++ctr;
955 if (is_endian.little)
956 PUTU32(ctx->Yi.c+12,ctr);
957 else
958 ctx->Yi.d[3] = ctr;
959 while (len--) {
960 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
961 ++n;
962 }
963 }
964
965 ctx->mres = n;
966 return;
967 } while(0);
968 #endif
969 for (i=0;i<len;++i) {
970 if (n==0) {
971 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
972 ++ctr;
973 if (is_endian.little)
974 PUTU32(ctx->Yi.c+12,ctr);
975 else
976 ctx->Yi.d[3] = ctr;
977 }
978 ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n];
979 n = (n+1)%16;
980 if (n==0)
981 GCM_MUL(ctx,Xi);
982 }
983
984 ctx->mres = n;
985 }
986
987 void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
988 const unsigned char *in, unsigned char *out,
989 size_t len)
990 {
991 const union { long one; char little; } is_endian = {1};
992 unsigned int n, ctr;
993 size_t i;
994
995 if (ctx->ares) {
996 /* First call to decrypt finalizes GHASH(AAD) */
997 GCM_MUL(ctx,Xi);
998 ctx->ares = 0;
999 }
1000
1001 ctx->len.u[1] += len;
1002 n = ctx->mres;
1003 if (is_endian.little)
1004 ctr = GETU32(ctx->Yi.c+12);
1005 else
1006 ctr = ctx->Yi.d[3];
1007
1008 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1009 if (16%sizeof(size_t) == 0) do { /* always true actually */
1010 if (n) {
1011 while (n && len) {
1012 u8 c = *(in++);
1013 *(out++) = c^ctx->EKi.c[n];
1014 ctx->Xi.c[n] ^= c;
1015 --len;
1016 n = (n+1)%16;
1017 }
1018 if (n==0) GCM_MUL (ctx,Xi);
1019 else {
1020 ctx->mres = n;
1021 return;
1022 }
1023 }
1024 #if defined(STRICT_ALIGNMENT)
1025 if (((size_t)in|(size_t)out)%sizeof(size_t) != 0)
1026 break;
1027 #endif
1028 #if defined(GHASH) && defined(GHASH_CHUNK)
1029 while (len>=GHASH_CHUNK) {
1030 size_t j=GHASH_CHUNK;
1031
1032 GHASH(ctx,in,GHASH_CHUNK);
1033 while (j) {
1034 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1035 ++ctr;
1036 if (is_endian.little)
1037 PUTU32(ctx->Yi.c+12,ctr);
1038 else
1039 ctx->Yi.d[3] = ctr;
1040 for (i=0; i<16; i+=sizeof(size_t))
1041 *(size_t *)(out+i) =
1042 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1043 out += 16;
1044 in += 16;
1045 j -= 16;
1046 }
1047 len -= GHASH_CHUNK;
1048 }
1049 if ((i = (len&(size_t)-16))) {
1050 GHASH(ctx,in,i);
1051 while (len>=16) {
1052 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1053 ++ctr;
1054 if (is_endian.little)
1055 PUTU32(ctx->Yi.c+12,ctr);
1056 else
1057 ctx->Yi.d[3] = ctr;
1058 for (i=0; i<16; i+=sizeof(size_t))
1059 *(size_t *)(out+i) =
1060 *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i);
1061 out += 16;
1062 in += 16;
1063 len -= 16;
1064 }
1065 }
1066 #else
1067 while (len>=16) {
1068 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1069 ++ctr;
1070 if (is_endian.little)
1071 PUTU32(ctx->Yi.c+12,ctr);
1072 else
1073 ctx->Yi.d[3] = ctr;
1074 for (i=0; i<16; i+=sizeof(size_t)) {
1075 size_t c = *(size_t *)(in+i);
1076 *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i);
1077 *(size_t *)(ctx->Xi.c+i) ^= c;
1078 }
1079 GCM_MUL(ctx,Xi);
1080 out += 16;
1081 in += 16;
1082 len -= 16;
1083 }
1084 #endif
1085 if (len) {
1086 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1087 ++ctr;
1088 if (is_endian.little)
1089 PUTU32(ctx->Yi.c+12,ctr);
1090 else
1091 ctx->Yi.d[3] = ctr;
1092 while (len--) {
1093 u8 c = in[n];
1094 ctx->Xi.c[n] ^= c;
1095 out[n] = c^ctx->EKi.c[n];
1096 ++n;
1097 }
1098 }
1099
1100 ctx->mres = n;
1101 return;
1102 } while(0);
1103 #endif
1104 for (i=0;i<len;++i) {
1105 u8 c;
1106 if (n==0) {
1107 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1108 ++ctr;
1109 if (is_endian.little)
1110 PUTU32(ctx->Yi.c+12,ctr);
1111 else
1112 ctx->Yi.d[3] = ctr;
1113 }
1114 c = in[i];
1115 out[i] = c^ctx->EKi.c[n];
1116 ctx->Xi.c[n] ^= c;
1117 n = (n+1)%16;
1118 if (n==0)
1119 GCM_MUL(ctx,Xi);
1120 }
1121
1122 ctx->mres = n;
1123 }
1124
1125 void CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1126 const unsigned char *in, unsigned char *out,
1127 size_t len, ctr128_f stream)
1128 {
1129 const union { long one; char little; } is_endian = {1};
1130 unsigned int n, ctr;
1131 size_t i;
1132
1133 if (ctx->ares) {
1134 /* First call to encrypt finalizes GHASH(AAD) */
1135 GCM_MUL(ctx,Xi);
1136 ctx->ares = 0;
1137 }
1138
1139 ctx->len.u[1] += len;
1140 n = ctx->mres;
1141 if (is_endian.little)
1142 ctr = GETU32(ctx->Yi.c+12);
1143 else
1144 ctr = ctx->Yi.d[3];
1145
1146 if (n) {
1147 while (n && len) {
1148 ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n];
1149 --len;
1150 n = (n+1)%16;
1151 }
1152 if (n==0) GCM_MUL(ctx,Xi);
1153 else {
1154 ctx->mres = n;
1155 return;
1156 }
1157 }
1158 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1159 while (len>=GHASH_CHUNK) {
1160 (*stream)(in,out,GHASH_CHUNK/16,ctx->key,ctx->Yi.c);
1161 ctr += GHASH_CHUNK/16;
1162 if (is_endian.little)
1163 PUTU32(ctx->Yi.c+12,ctr);
1164 else
1165 ctx->Yi.d[3] = ctr;
1166 GHASH(ctx,out,GHASH_CHUNK);
1167 out += GHASH_CHUNK;
1168 in += GHASH_CHUNK;
1169 len -= GHASH_CHUNK;
1170 }
1171 #endif
1172 if ((i = (len&(size_t)-16))) {
1173 size_t j=i/16;
1174
1175 (*stream)(in,out,j,ctx->key,ctx->Yi.c);
1176 ctr += (unsigned int)j;
1177 if (is_endian.little)
1178 PUTU32(ctx->Yi.c+12,ctr);
1179 else
1180 ctx->Yi.d[3] = ctr;
1181 in += i;
1182 len -= i;
1183 #if defined(GHASH)
1184 GHASH(ctx,out,i);
1185 out += i;
1186 #else
1187 while (j--) {
1188 for (i=0;i<16;++i) ctx->Xi.c[i] ^= out[i];
1189 GCM_MUL(ctx,Xi);
1190 out += 16;
1191 }
1192 #endif
1193 }
1194 if (len) {
1195 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1196 ++ctr;
1197 if (is_endian.little)
1198 PUTU32(ctx->Yi.c+12,ctr);
1199 else
1200 ctx->Yi.d[3] = ctr;
1201 while (len--) {
1202 ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n];
1203 ++n;
1204 }
1205 }
1206
1207 ctx->mres = n;
1208 }
1209
1210 void CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1211 const unsigned char *in, unsigned char *out,
1212 size_t len,ctr128_f stream)
1213 {
1214 const union { long one; char little; } is_endian = {1};
1215 unsigned int n, ctr;
1216 size_t i;
1217
1218 if (ctx->ares) {
1219 /* First call to decrypt finalizes GHASH(AAD) */
1220 GCM_MUL(ctx,Xi);
1221 ctx->ares = 0;
1222 }
1223
1224 ctx->len.u[1] += len;
1225 n = ctx->mres;
1226 if (is_endian.little)
1227 ctr = GETU32(ctx->Yi.c+12);
1228 else
1229 ctr = ctx->Yi.d[3];
1230
1231 if (n) {
1232 while (n && len) {
1233 u8 c = *(in++);
1234 *(out++) = c^ctx->EKi.c[n];
1235 ctx->Xi.c[n] ^= c;
1236 --len;
1237 n = (n+1)%16;
1238 }
1239 if (n==0) GCM_MUL (ctx,Xi);
1240 else {
1241 ctx->mres = n;
1242 return;
1243 }
1244 }
1245 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1246 while (len>=GHASH_CHUNK) {
1247 GHASH(ctx,in,GHASH_CHUNK);
1248 (*stream)(in,out,GHASH_CHUNK/16,ctx->key,ctx->Yi.c);
1249 ctr += GHASH_CHUNK/16;
1250 if (is_endian.little)
1251 PUTU32(ctx->Yi.c+12,ctr);
1252 else
1253 ctx->Yi.d[3] = ctr;
1254 out += GHASH_CHUNK;
1255 in += GHASH_CHUNK;
1256 len -= GHASH_CHUNK;
1257 }
1258 #endif
1259 if ((i = (len&(size_t)-16))) {
1260 size_t j=i/16;
1261
1262 #if defined(GHASH)
1263 GHASH(ctx,in,i);
1264 #else
1265 while (j--) {
1266 size_t k;
1267 for (k=0;k<16;++k) ctx->Xi.c[k] ^= in[k];
1268 GCM_MUL(ctx,Xi);
1269 in += 16;
1270 }
1271 j = i/16;
1272 in -= i;
1273 #endif
1274 (*stream)(in,out,j,ctx->key,ctx->Yi.c);
1275 ctr += (unsigned int)j;
1276 if (is_endian.little)
1277 PUTU32(ctx->Yi.c+12,ctr);
1278 else
1279 ctx->Yi.d[3] = ctr;
1280 out += i;
1281 in += i;
1282 len -= i;
1283 }
1284 if (len) {
1285 (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key);
1286 ++ctr;
1287 if (is_endian.little)
1288 PUTU32(ctx->Yi.c+12,ctr);
1289 else
1290 ctx->Yi.d[3] = ctr;
1291 while (len--) {
1292 u8 c = in[n];
1293 ctx->Xi.c[n] ^= c;
1294 out[n] = c^ctx->EKi.c[n];
1295 ++n;
1296 }
1297 }
1298
1299 ctx->mres = n;
1300 }
1301
1302 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx,const unsigned char *tag,
1303 size_t len)
1304 {
1305 const union { long one; char little; } is_endian = {1};
1306 u64 alen = ctx->len.u[0]<<3;
1307 u64 clen = ctx->len.u[1]<<3;
1308
1309 if (ctx->mres)
1310 GCM_MUL(ctx,Xi);
1311
1312 if (is_endian.little) {
1313 #ifdef BSWAP8
1314 alen = BSWAP8(alen);
1315 clen = BSWAP8(clen);
1316 #else
1317 u8 *p = ctx->len.c;
1318
1319 ctx->len.u[0] = alen;
1320 ctx->len.u[1] = clen;
1321
1322 alen = (u64)GETU32(p) <<32|GETU32(p+4);
1323 clen = (u64)GETU32(p+8)<<32|GETU32(p+12);
1324 #endif
1325 }
1326
1327 ctx->Xi.u[0] ^= alen;
1328 ctx->Xi.u[1] ^= clen;
1329 GCM_MUL(ctx,Xi);
1330
1331 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1332 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1333
1334 if (tag && len<=sizeof(ctx->Xi))
1335 return memcmp(ctx->Xi.c,tag,len);
1336 else
1337 return -1;
1338 }
1339
1340 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1341 {
1342 CRYPTO_gcm128_finish(ctx, NULL, 0);
1343 memcpy(tag, ctx->Xi.c, len);
1344 }
1345
1346 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1347 {
1348 GCM128_CONTEXT *ret;
1349
1350 if ((ret = (GCM128_CONTEXT *)OPENSSL_malloc(sizeof(GCM128_CONTEXT))))
1351 CRYPTO_gcm128_init(ret,key,block);
1352
1353 return ret;
1354 }
1355
1356 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1357 {
1358 if (ctx) {
1359 OPENSSL_cleanse(ctx,sizeof(*ctx));
1360 OPENSSL_free(ctx);
1361 }
1362 }
1363
1364 #if defined(SELFTEST)
1365 #include <stdio.h>
1366 #include <openssl/aes.h>
1367
1368 /* Test Case 1 */
1369 static const u8 K1[16],
1370 *P1=NULL,
1371 *A1=NULL,
1372 IV1[12],
1373 *C1=NULL,
1374 T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a};
1375
1376 /* Test Case 2 */
1377 #define K2 K1
1378 #define A2 A1
1379 #define IV2 IV1
1380 static const u8 P2[16],
1381 C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78},
1382 T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf};
1383
1384 /* Test Case 3 */
1385 #define A3 A2
1386 static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1387 P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1388 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1389 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1390 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1391 IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1392 C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1393 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1394 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1395 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85},
1396 T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4};
1397
1398 /* Test Case 4 */
1399 #define K4 K3
1400 #define IV4 IV3
1401 static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1402 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1403 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1404 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1405 A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1406 0xab,0xad,0xda,0xd2},
1407 C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c,
1408 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e,
1409 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05,
1410 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91},
1411 T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47};
1412
1413 /* Test Case 5 */
1414 #define K5 K4
1415 #define P5 P4
1416 static const u8 A5[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1417 0xab,0xad,0xda,0xd2},
1418 IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1419 C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55,
1420 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23,
1421 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42,
1422 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98},
1423 T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb};
1424
1425 /* Test Case 6 */
1426 #define K6 K5
1427 #define P6 P5
1428 #define A6 A5
1429 static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1430 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1431 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1432 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1433 C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94,
1434 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7,
1435 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f,
1436 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5},
1437 T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50};
1438
1439 /* Test Case 7 */
1440 static const u8 K7[24],
1441 *P7=NULL,
1442 *A7=NULL,
1443 IV7[12],
1444 *C7=NULL,
1445 T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35};
1446
1447 /* Test Case 8 */
1448 #define K8 K7
1449 #define IV8 IV7
1450 #define A8 A7
1451 static const u8 P8[16],
1452 C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00},
1453 T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb};
1454
1455 /* Test Case 9 */
1456 #define A9 A8
1457 static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1458 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c},
1459 P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1460 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1461 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1462 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1463 IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1464 C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1465 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1466 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1467 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56},
1468 T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14};
1469
1470 /* Test Case 10 */
1471 #define K10 K9
1472 #define IV10 IV9
1473 static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1474 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1475 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1476 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1477 A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1478 0xab,0xad,0xda,0xd2},
1479 C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57,
1480 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c,
1481 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47,
1482 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10},
1483 T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c};
1484
1485 /* Test Case 11 */
1486 #define K11 K10
1487 #define P11 P10
1488 #define A11 A10
1489 static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1490 C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8,
1491 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57,
1492 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9,
1493 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7},
1494 T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8};
1495
1496 /* Test Case 12 */
1497 #define K12 K11
1498 #define P12 P11
1499 #define A12 A11
1500 static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1501 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1502 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1503 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1504 C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff,
1505 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45,
1506 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3,
1507 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b},
1508 T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9};
1509
1510 /* Test Case 13 */
1511 static const u8 K13[32],
1512 *P13=NULL,
1513 *A13=NULL,
1514 IV13[12],
1515 *C13=NULL,
1516 T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b};
1517
1518 /* Test Case 14 */
1519 #define K14 K13
1520 #define A14 A13
1521 static const u8 P14[16],
1522 IV14[12],
1523 C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18},
1524 T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19};
1525
1526 /* Test Case 15 */
1527 #define A15 A14
1528 static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08,
1529 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08},
1530 P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1531 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1532 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1533 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55},
1534 IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88},
1535 C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1536 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1537 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1538 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad},
1539 T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c};
1540
1541 /* Test Case 16 */
1542 #define K16 K15
1543 #define IV16 IV15
1544 static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a,
1545 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72,
1546 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25,
1547 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39},
1548 A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,
1549 0xab,0xad,0xda,0xd2},
1550 C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d,
1551 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa,
1552 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38,
1553 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62},
1554 T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b};
1555
1556 /* Test Case 17 */
1557 #define K17 K16
1558 #define P17 P16
1559 #define A17 A16
1560 static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad},
1561 C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb,
1562 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0,
1563 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78,
1564 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f},
1565 T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2};
1566
1567 /* Test Case 18 */
1568 #define K18 K17
1569 #define P18 P17
1570 #define A18 A17
1571 static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa,
1572 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28,
1573 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54,
1574 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b},
1575 C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20,
1576 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4,
1577 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde,
1578 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f},
1579 T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a};
1580
1581 #define TEST_CASE(n) do { \
1582 u8 out[sizeof(P##n)]; \
1583 AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \
1584 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \
1585 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1586 memset(out,0,sizeof(out)); \
1587 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1588 if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \
1589 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1590 (C##n && memcmp(out,C##n,sizeof(out)))) \
1591 ret++, printf ("encrypt test#%d failed.\n",n); \
1592 CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \
1593 memset(out,0,sizeof(out)); \
1594 if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \
1595 if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \
1596 if (CRYPTO_gcm128_finish(&ctx,T##n,16) || \
1597 (P##n && memcmp(out,P##n,sizeof(out)))) \
1598 ret++, printf ("decrypt test#%d failed.\n",n); \
1599 } while(0)
1600
1601 int main()
1602 {
1603 GCM128_CONTEXT ctx;
1604 AES_KEY key;
1605 int ret=0;
1606
1607 TEST_CASE(1);
1608 TEST_CASE(2);
1609 TEST_CASE(3);
1610 TEST_CASE(4);
1611 TEST_CASE(5);
1612 TEST_CASE(6);
1613 TEST_CASE(7);
1614 TEST_CASE(8);
1615 TEST_CASE(9);
1616 TEST_CASE(10);
1617 TEST_CASE(11);
1618 TEST_CASE(12);
1619 TEST_CASE(13);
1620 TEST_CASE(14);
1621 TEST_CASE(15);
1622 TEST_CASE(16);
1623 TEST_CASE(17);
1624 TEST_CASE(18);
1625
1626 #ifdef OPENSSL_CPUID_OBJ
1627 {
1628 size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc();
1629 union { u64 u; u8 c[1024]; } buf;
1630 int i;
1631
1632 AES_set_encrypt_key(K1,sizeof(K1)*8,&key);
1633 CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt);
1634 CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1));
1635
1636 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1637 start = OPENSSL_rdtsc();
1638 CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf));
1639 gcm_t = OPENSSL_rdtsc() - start;
1640
1641 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1642 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1643 (block128_f)AES_encrypt);
1644 start = OPENSSL_rdtsc();
1645 CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf),
1646 &key,ctx.Yi.c,ctx.EKi.c,&ctx.mres,
1647 (block128_f)AES_encrypt);
1648 ctr_t = OPENSSL_rdtsc() - start;
1649
1650 printf("%.2f-%.2f=%.2f\n",
1651 gcm_t/(double)sizeof(buf),
1652 ctr_t/(double)sizeof(buf),
1653 (gcm_t-ctr_t)/(double)sizeof(buf));
1654 #ifdef GHASH
1655 GHASH(&ctx,buf.c,sizeof(buf));
1656 start = OPENSSL_rdtsc();
1657 for (i=0;i<100;++i) GHASH(&ctx,buf.c,sizeof(buf));
1658 gcm_t = OPENSSL_rdtsc() - start;
1659 printf("%.2f\n",gcm_t/(double)sizeof(buf)/(double)i);
1660 #endif
1661 }
1662 #endif
1663
1664 return ret;
1665 }
1666 #endif