]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/modes/gcm128.c
22ce8500b61444971f76aacb804004ef229e894c
[thirdparty/openssl.git] / crypto / modes / gcm128.c
1 /*
2 * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 #include <string.h>
11 #include <openssl/crypto.h>
12 #include "internal/cryptlib.h"
13 #include "internal/endian.h"
14 #include "crypto/modes.h"
15
16 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17 typedef size_t size_t_aX __attribute((__aligned__(1)));
18 #else
19 typedef size_t size_t_aX;
20 #endif
21
22 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23 /* redefine, because alignment is ensured */
24 # undef GETU32
25 # define GETU32(p) BSWAP4(*(const u32 *)(p))
26 # undef PUTU32
27 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
28 #endif
29
30 /* RISC-V uses C implementation of gmult as a fallback. */
31 #if defined(__riscv)
32 # define INCLUDE_C_GMULT_4BIT
33 #endif
34
35 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
36 #define REDUCE1BIT(V) do { \
37 if (sizeof(size_t)==8) { \
38 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
39 V.lo = (V.hi<<63)|(V.lo>>1); \
40 V.hi = (V.hi>>1 )^T; \
41 } \
42 else { \
43 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
44 V.lo = (V.hi<<63)|(V.lo>>1); \
45 V.hi = (V.hi>>1 )^((u64)T<<32); \
46 } \
47 } while(0)
48
49 /*-
50 *
51 * NOTE: TABLE_BITS and all non-4bit implmentations have been removed in 3.1.
52 *
53 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
54 * never be set to 8. 8 is effectively reserved for testing purposes.
55 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
56 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
57 * whole spectrum of possible table driven implementations. Why? In
58 * non-"Shoup's" case memory access pattern is segmented in such manner,
59 * that it's trivial to see that cache timing information can reveal
60 * fair portion of intermediate hash value. Given that ciphertext is
61 * always available to attacker, it's possible for him to attempt to
62 * deduce secret parameter H and if successful, tamper with messages
63 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
64 * not as trivial, but there is no reason to believe that it's resistant
65 * to cache-timing attack. And the thing about "8-bit" implementation is
66 * that it consumes 16 (sixteen) times more memory, 4KB per individual
67 * key + 1KB shared. Well, on pros side it should be twice as fast as
68 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
69 * was observed to run ~75% faster, closer to 100% for commercial
70 * compilers... Yet "4-bit" procedure is preferred, because it's
71 * believed to provide better security-performance balance and adequate
72 * all-round performance. "All-round" refers to things like:
73 *
74 * - shorter setup time effectively improves overall timing for
75 * handling short messages;
76 * - larger table allocation can become unbearable because of VM
77 * subsystem penalties (for example on Windows large enough free
78 * results in VM working set trimming, meaning that consequent
79 * malloc would immediately incur working set expansion);
80 * - larger table has larger cache footprint, which can affect
81 * performance of other code paths (not necessarily even from same
82 * thread in Hyper-Threading world);
83 *
84 * Value of 1 is not appropriate for performance reasons.
85 */
86
87 static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
88 {
89 u128 V;
90 # if defined(OPENSSL_SMALL_FOOTPRINT)
91 int i;
92 # endif
93
94 Htable[0].hi = 0;
95 Htable[0].lo = 0;
96 V.hi = H[0];
97 V.lo = H[1];
98
99 # if defined(OPENSSL_SMALL_FOOTPRINT)
100 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
101 REDUCE1BIT(V);
102 Htable[i] = V;
103 }
104
105 for (i = 2; i < 16; i <<= 1) {
106 u128 *Hi = Htable + i;
107 int j;
108 for (V = *Hi, j = 1; j < i; ++j) {
109 Hi[j].hi = V.hi ^ Htable[j].hi;
110 Hi[j].lo = V.lo ^ Htable[j].lo;
111 }
112 }
113 # else
114 Htable[8] = V;
115 REDUCE1BIT(V);
116 Htable[4] = V;
117 REDUCE1BIT(V);
118 Htable[2] = V;
119 REDUCE1BIT(V);
120 Htable[1] = V;
121 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
122 V = Htable[4];
123 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
124 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
125 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
126 V = Htable[8];
127 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
128 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
129 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
130 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
131 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
132 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
133 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
134 # endif
135 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
136 /*
137 * ARM assembler expects specific dword order in Htable.
138 */
139 {
140 int j;
141 DECLARE_IS_ENDIAN;
142
143 if (IS_LITTLE_ENDIAN)
144 for (j = 0; j < 16; ++j) {
145 V = Htable[j];
146 Htable[j].hi = V.lo;
147 Htable[j].lo = V.hi;
148 } else
149 for (j = 0; j < 16; ++j) {
150 V = Htable[j];
151 Htable[j].hi = V.lo << 32 | V.lo >> 32;
152 Htable[j].lo = V.hi << 32 | V.hi >> 32;
153 }
154 }
155 # endif
156 }
157
158 # if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
159 static const size_t rem_4bit[16] = {
160 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
161 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
162 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
163 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
164 };
165
166 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
167 {
168 u128 Z;
169 int cnt = 15;
170 size_t rem, nlo, nhi;
171 DECLARE_IS_ENDIAN;
172
173 nlo = ((const u8 *)Xi)[15];
174 nhi = nlo >> 4;
175 nlo &= 0xf;
176
177 Z.hi = Htable[nlo].hi;
178 Z.lo = Htable[nlo].lo;
179
180 while (1) {
181 rem = (size_t)Z.lo & 0xf;
182 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
183 Z.hi = (Z.hi >> 4);
184 if (sizeof(size_t) == 8)
185 Z.hi ^= rem_4bit[rem];
186 else
187 Z.hi ^= (u64)rem_4bit[rem] << 32;
188
189 Z.hi ^= Htable[nhi].hi;
190 Z.lo ^= Htable[nhi].lo;
191
192 if (--cnt < 0)
193 break;
194
195 nlo = ((const u8 *)Xi)[cnt];
196 nhi = nlo >> 4;
197 nlo &= 0xf;
198
199 rem = (size_t)Z.lo & 0xf;
200 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
201 Z.hi = (Z.hi >> 4);
202 if (sizeof(size_t) == 8)
203 Z.hi ^= rem_4bit[rem];
204 else
205 Z.hi ^= (u64)rem_4bit[rem] << 32;
206
207 Z.hi ^= Htable[nlo].hi;
208 Z.lo ^= Htable[nlo].lo;
209 }
210
211 if (IS_LITTLE_ENDIAN) {
212 # ifdef BSWAP8
213 Xi[0] = BSWAP8(Z.hi);
214 Xi[1] = BSWAP8(Z.lo);
215 # else
216 u8 *p = (u8 *)Xi;
217 u32 v;
218 v = (u32)(Z.hi >> 32);
219 PUTU32(p, v);
220 v = (u32)(Z.hi);
221 PUTU32(p + 4, v);
222 v = (u32)(Z.lo >> 32);
223 PUTU32(p + 8, v);
224 v = (u32)(Z.lo);
225 PUTU32(p + 12, v);
226 # endif
227 } else {
228 Xi[0] = Z.hi;
229 Xi[1] = Z.lo;
230 }
231 }
232
233 # endif
234
235 # if !defined(GHASH_ASM)
236 # if !defined(OPENSSL_SMALL_FOOTPRINT)
237 /*
238 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
239 * details... Compiler-generated code doesn't seem to give any
240 * performance improvement, at least not on x86[_64]. It's here
241 * mostly as reference and a placeholder for possible future
242 * non-trivial optimization[s]...
243 */
244 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
245 const u8 *inp, size_t len)
246 {
247 u128 Z;
248 int cnt;
249 size_t rem, nlo, nhi;
250 DECLARE_IS_ENDIAN;
251
252 do {
253 cnt = 15;
254 nlo = ((const u8 *)Xi)[15];
255 nlo ^= inp[15];
256 nhi = nlo >> 4;
257 nlo &= 0xf;
258
259 Z.hi = Htable[nlo].hi;
260 Z.lo = Htable[nlo].lo;
261
262 while (1) {
263 rem = (size_t)Z.lo & 0xf;
264 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
265 Z.hi = (Z.hi >> 4);
266 if (sizeof(size_t) == 8)
267 Z.hi ^= rem_4bit[rem];
268 else
269 Z.hi ^= (u64)rem_4bit[rem] << 32;
270
271 Z.hi ^= Htable[nhi].hi;
272 Z.lo ^= Htable[nhi].lo;
273
274 if (--cnt < 0)
275 break;
276
277 nlo = ((const u8 *)Xi)[cnt];
278 nlo ^= inp[cnt];
279 nhi = nlo >> 4;
280 nlo &= 0xf;
281
282 rem = (size_t)Z.lo & 0xf;
283 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
284 Z.hi = (Z.hi >> 4);
285 if (sizeof(size_t) == 8)
286 Z.hi ^= rem_4bit[rem];
287 else
288 Z.hi ^= (u64)rem_4bit[rem] << 32;
289
290 Z.hi ^= Htable[nlo].hi;
291 Z.lo ^= Htable[nlo].lo;
292 }
293
294 if (IS_LITTLE_ENDIAN) {
295 # ifdef BSWAP8
296 Xi[0] = BSWAP8(Z.hi);
297 Xi[1] = BSWAP8(Z.lo);
298 # else
299 u8 *p = (u8 *)Xi;
300 u32 v;
301 v = (u32)(Z.hi >> 32);
302 PUTU32(p, v);
303 v = (u32)(Z.hi);
304 PUTU32(p + 4, v);
305 v = (u32)(Z.lo >> 32);
306 PUTU32(p + 8, v);
307 v = (u32)(Z.lo);
308 PUTU32(p + 12, v);
309 # endif
310 } else {
311 Xi[0] = Z.hi;
312 Xi[1] = Z.lo;
313 }
314
315 inp += 16;
316 /* Block size is 128 bits so len is a multiple of 16 */
317 len -= 16;
318 } while (len > 0);
319 }
320 # endif
321 # else
322 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
323 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
324 size_t len);
325 # endif
326
327 # define GCM_MUL(ctx) ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
328 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
329 # define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
330 /*
331 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
332 * effect. In other words idea is to hash data while it's still in L1 cache
333 * after encryption pass...
334 */
335 # define GHASH_CHUNK (3*1024)
336 # endif
337
338 #if (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
339 # if !defined(I386_ONLY) && \
340 (defined(__i386) || defined(__i386__) || \
341 defined(__x86_64) || defined(__x86_64__) || \
342 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
343 # define GHASH_ASM_X86_OR_64
344
345 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
346 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
347 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
348 size_t len);
349
350 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
351 # define gcm_init_avx gcm_init_clmul
352 # define gcm_gmult_avx gcm_gmult_clmul
353 # define gcm_ghash_avx gcm_ghash_clmul
354 # else
355 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
356 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
357 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
358 size_t len);
359 # endif
360
361 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
362 # define GHASH_ASM_X86
363 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
364 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
365 size_t len);
366
367 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
368 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
369 size_t len);
370 # endif
371 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
372 # include "arm_arch.h"
373 # if __ARM_MAX_ARCH__>=7
374 # define GHASH_ASM_ARM
375 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
376 # if defined(__arm__) || defined(__arm)
377 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
378 # endif
379 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
380 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
381 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
382 size_t len);
383 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
384 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
385 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
386 size_t len);
387 # endif
388 # elif defined(__sparc__) || defined(__sparc)
389 # include "crypto/sparc_arch.h"
390 # define GHASH_ASM_SPARC
391 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
392 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
393 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
394 size_t len);
395 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
396 # include "crypto/ppc_arch.h"
397 # define GHASH_ASM_PPC
398 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
399 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
400 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
401 size_t len);
402 # elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
403 # include "crypto/riscv_arch.h"
404 # define GHASH_ASM_RISCV
405 # undef GHASH
406 void gcm_init_clmul_rv64i_zbb_zbc(u128 Htable[16], const u64 Xi[2]);
407 void gcm_gmult_clmul_rv64i_zbb_zbc(u64 Xi[2], const u128 Htable[16]);
408 # endif
409 #endif
410
411 static void gcm_get_funcs(struct gcm_funcs_st *ctx)
412 {
413 /* set defaults -- overridden below as needed */
414 ctx->ginit = gcm_init_4bit;
415 #if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
416 ctx->gmult = gcm_gmult_4bit;
417 #else
418 ctx->gmult = NULL;
419 #endif
420 #if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
421 ctx->ghash = gcm_ghash_4bit;
422 #else
423 ctx->ghash = NULL;
424 #endif
425
426 #if defined(GHASH_ASM_X86_OR_64)
427 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
428 /* x86_64 */
429 if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
430 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
431 ctx->ginit = gcm_init_avx;
432 ctx->gmult = gcm_gmult_avx;
433 ctx->ghash = gcm_ghash_avx;
434 } else {
435 ctx->ginit = gcm_init_clmul;
436 ctx->gmult = gcm_gmult_clmul;
437 ctx->ghash = gcm_ghash_clmul;
438 }
439 return;
440 }
441 # endif
442 # if defined(GHASH_ASM_X86)
443 /* x86 only */
444 # if defined(OPENSSL_IA32_SSE2)
445 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
446 ctx->gmult = gcm_gmult_4bit_mmx;
447 ctx->ghash = gcm_ghash_4bit_mmx;
448 return;
449 }
450 # else
451 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
452 ctx->gmult = gcm_gmult_4bit_mmx;
453 ctx->ghash = gcm_ghash_4bit_mmx;
454 return;
455 }
456 # endif
457 ctx->gmult = gcm_gmult_4bit_x86;
458 ctx->ghash = gcm_ghash_4bit_x86;
459 return;
460 # endif
461 #elif defined(GHASH_ASM_ARM)
462 /* ARM */
463 # ifdef PMULL_CAPABLE
464 if (PMULL_CAPABLE) {
465 ctx->ginit = (gcm_init_fn)gcm_init_v8;
466 ctx->gmult = gcm_gmult_v8;
467 ctx->ghash = gcm_ghash_v8;
468 }
469 # elif defined(NEON_CAPABLE)
470 if (NEON_CAPABLE) {
471 ctx->ginit = gcm_init_neon;
472 ctx->gmult = gcm_gmult_neon;
473 ctx->ghash = gcm_ghash_neon;
474 }
475 # endif
476 return;
477 #elif defined(GHASH_ASM_SPARC)
478 /* SPARC */
479 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
480 ctx->ginit = gcm_init_vis3;
481 ctx->gmult = gcm_gmult_vis3;
482 ctx->ghash = gcm_ghash_vis3;
483 }
484 return;
485 #elif defined(GHASH_ASM_PPC)
486 /* PowerPC */
487 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
488 ctx->ginit = gcm_init_p8;
489 ctx->gmult = gcm_gmult_p8;
490 ctx->ghash = gcm_ghash_p8;
491 }
492 return;
493 #elif defined(GHASH_ASM_RISCV) && __riscv_xlen == 64
494 /* RISCV */
495 ctx->ghash = NULL;
496 if (RISCV_HAS_ZBB() && RISCV_HAS_ZBC()) {
497 ctx->ginit = gcm_init_clmul_rv64i_zbb_zbc;
498 ctx->gmult = gcm_gmult_clmul_rv64i_zbb_zbc;
499 }
500 return;
501 #endif
502 #if defined(__s390__) || defined(__s390x__)
503 ctx->gmult = gcm_gmult_4bit;
504 ctx->ghash = gcm_ghash_4bit;
505 return;
506 #endif
507 }
508
509 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
510 {
511 DECLARE_IS_ENDIAN;
512
513 memset(ctx, 0, sizeof(*ctx));
514 ctx->block = block;
515 ctx->key = key;
516
517 (*block) (ctx->H.c, ctx->H.c, key);
518
519 if (IS_LITTLE_ENDIAN) {
520 /* H is stored in host byte order */
521 #ifdef BSWAP8
522 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
523 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
524 #else
525 u8 *p = ctx->H.c;
526 u64 hi, lo;
527 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
528 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
529 ctx->H.u[0] = hi;
530 ctx->H.u[1] = lo;
531 #endif
532 }
533
534 gcm_get_funcs(&ctx->funcs);
535 ctx->funcs.ginit(ctx->Htable, ctx->H.u);
536 }
537
538 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
539 size_t len)
540 {
541 DECLARE_IS_ENDIAN;
542 unsigned int ctr;
543
544 ctx->len.u[0] = 0; /* AAD length */
545 ctx->len.u[1] = 0; /* message length */
546 ctx->ares = 0;
547 ctx->mres = 0;
548
549 if (len == 12) {
550 memcpy(ctx->Yi.c, iv, 12);
551 ctx->Yi.c[12] = 0;
552 ctx->Yi.c[13] = 0;
553 ctx->Yi.c[14] = 0;
554 ctx->Yi.c[15] = 1;
555 ctr = 1;
556 } else {
557 size_t i;
558 u64 len0 = len;
559
560 /* Borrow ctx->Xi to calculate initial Yi */
561 ctx->Xi.u[0] = 0;
562 ctx->Xi.u[1] = 0;
563
564 while (len >= 16) {
565 for (i = 0; i < 16; ++i)
566 ctx->Xi.c[i] ^= iv[i];
567 GCM_MUL(ctx);
568 iv += 16;
569 len -= 16;
570 }
571 if (len) {
572 for (i = 0; i < len; ++i)
573 ctx->Xi.c[i] ^= iv[i];
574 GCM_MUL(ctx);
575 }
576 len0 <<= 3;
577 if (IS_LITTLE_ENDIAN) {
578 #ifdef BSWAP8
579 ctx->Xi.u[1] ^= BSWAP8(len0);
580 #else
581 ctx->Xi.c[8] ^= (u8)(len0 >> 56);
582 ctx->Xi.c[9] ^= (u8)(len0 >> 48);
583 ctx->Xi.c[10] ^= (u8)(len0 >> 40);
584 ctx->Xi.c[11] ^= (u8)(len0 >> 32);
585 ctx->Xi.c[12] ^= (u8)(len0 >> 24);
586 ctx->Xi.c[13] ^= (u8)(len0 >> 16);
587 ctx->Xi.c[14] ^= (u8)(len0 >> 8);
588 ctx->Xi.c[15] ^= (u8)(len0);
589 #endif
590 } else {
591 ctx->Xi.u[1] ^= len0;
592 }
593
594 GCM_MUL(ctx);
595
596 if (IS_LITTLE_ENDIAN)
597 #ifdef BSWAP4
598 ctr = BSWAP4(ctx->Xi.d[3]);
599 #else
600 ctr = GETU32(ctx->Xi.c + 12);
601 #endif
602 else
603 ctr = ctx->Xi.d[3];
604
605 /* Copy borrowed Xi to Yi */
606 ctx->Yi.u[0] = ctx->Xi.u[0];
607 ctx->Yi.u[1] = ctx->Xi.u[1];
608 }
609
610 ctx->Xi.u[0] = 0;
611 ctx->Xi.u[1] = 0;
612
613 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
614 ++ctr;
615 if (IS_LITTLE_ENDIAN)
616 #ifdef BSWAP4
617 ctx->Yi.d[3] = BSWAP4(ctr);
618 #else
619 PUTU32(ctx->Yi.c + 12, ctr);
620 #endif
621 else
622 ctx->Yi.d[3] = ctr;
623 }
624
625 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
626 size_t len)
627 {
628 size_t i;
629 unsigned int n;
630 u64 alen = ctx->len.u[0];
631
632 if (ctx->len.u[1])
633 return -2;
634
635 alen += len;
636 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
637 return -1;
638 ctx->len.u[0] = alen;
639
640 n = ctx->ares;
641 if (n) {
642 while (n && len) {
643 ctx->Xi.c[n] ^= *(aad++);
644 --len;
645 n = (n + 1) % 16;
646 }
647 if (n == 0)
648 GCM_MUL(ctx);
649 else {
650 ctx->ares = n;
651 return 0;
652 }
653 }
654 #ifdef GHASH
655 if ((i = (len & (size_t)-16))) {
656 GHASH(ctx, aad, i);
657 aad += i;
658 len -= i;
659 }
660 #else
661 while (len >= 16) {
662 for (i = 0; i < 16; ++i)
663 ctx->Xi.c[i] ^= aad[i];
664 GCM_MUL(ctx);
665 aad += 16;
666 len -= 16;
667 }
668 #endif
669 if (len) {
670 n = (unsigned int)len;
671 for (i = 0; i < len; ++i)
672 ctx->Xi.c[i] ^= aad[i];
673 }
674
675 ctx->ares = n;
676 return 0;
677 }
678
679 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
680 const unsigned char *in, unsigned char *out,
681 size_t len)
682 {
683 DECLARE_IS_ENDIAN;
684 unsigned int n, ctr, mres;
685 size_t i;
686 u64 mlen = ctx->len.u[1];
687 block128_f block = ctx->block;
688 void *key = ctx->key;
689
690 mlen += len;
691 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
692 return -1;
693 ctx->len.u[1] = mlen;
694
695 mres = ctx->mres;
696
697 if (ctx->ares) {
698 /* First call to encrypt finalizes GHASH(AAD) */
699 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
700 if (len == 0) {
701 GCM_MUL(ctx);
702 ctx->ares = 0;
703 return 0;
704 }
705 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
706 ctx->Xi.u[0] = 0;
707 ctx->Xi.u[1] = 0;
708 mres = sizeof(ctx->Xi);
709 #else
710 GCM_MUL(ctx);
711 #endif
712 ctx->ares = 0;
713 }
714
715 if (IS_LITTLE_ENDIAN)
716 #ifdef BSWAP4
717 ctr = BSWAP4(ctx->Yi.d[3]);
718 #else
719 ctr = GETU32(ctx->Yi.c + 12);
720 #endif
721 else
722 ctr = ctx->Yi.d[3];
723
724 n = mres % 16;
725 #if !defined(OPENSSL_SMALL_FOOTPRINT)
726 if (16 % sizeof(size_t) == 0) { /* always true actually */
727 do {
728 if (n) {
729 # if defined(GHASH)
730 while (n && len) {
731 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
732 --len;
733 n = (n + 1) % 16;
734 }
735 if (n == 0) {
736 GHASH(ctx, ctx->Xn, mres);
737 mres = 0;
738 } else {
739 ctx->mres = mres;
740 return 0;
741 }
742 # else
743 while (n && len) {
744 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
745 --len;
746 n = (n + 1) % 16;
747 }
748 if (n == 0) {
749 GCM_MUL(ctx);
750 mres = 0;
751 } else {
752 ctx->mres = n;
753 return 0;
754 }
755 # endif
756 }
757 # if defined(STRICT_ALIGNMENT)
758 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
759 break;
760 # endif
761 # if defined(GHASH)
762 if (len >= 16 && mres) {
763 GHASH(ctx, ctx->Xn, mres);
764 mres = 0;
765 }
766 # if defined(GHASH_CHUNK)
767 while (len >= GHASH_CHUNK) {
768 size_t j = GHASH_CHUNK;
769
770 while (j) {
771 size_t_aX *out_t = (size_t_aX *)out;
772 const size_t_aX *in_t = (const size_t_aX *)in;
773
774 (*block) (ctx->Yi.c, ctx->EKi.c, key);
775 ++ctr;
776 if (IS_LITTLE_ENDIAN)
777 # ifdef BSWAP4
778 ctx->Yi.d[3] = BSWAP4(ctr);
779 # else
780 PUTU32(ctx->Yi.c + 12, ctr);
781 # endif
782 else
783 ctx->Yi.d[3] = ctr;
784 for (i = 0; i < 16 / sizeof(size_t); ++i)
785 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
786 out += 16;
787 in += 16;
788 j -= 16;
789 }
790 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
791 len -= GHASH_CHUNK;
792 }
793 # endif
794 if ((i = (len & (size_t)-16))) {
795 size_t j = i;
796
797 while (len >= 16) {
798 size_t_aX *out_t = (size_t_aX *)out;
799 const size_t_aX *in_t = (const size_t_aX *)in;
800
801 (*block) (ctx->Yi.c, ctx->EKi.c, key);
802 ++ctr;
803 if (IS_LITTLE_ENDIAN)
804 # ifdef BSWAP4
805 ctx->Yi.d[3] = BSWAP4(ctr);
806 # else
807 PUTU32(ctx->Yi.c + 12, ctr);
808 # endif
809 else
810 ctx->Yi.d[3] = ctr;
811 for (i = 0; i < 16 / sizeof(size_t); ++i)
812 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
813 out += 16;
814 in += 16;
815 len -= 16;
816 }
817 GHASH(ctx, out - j, j);
818 }
819 # else
820 while (len >= 16) {
821 size_t *out_t = (size_t *)out;
822 const size_t *in_t = (const size_t *)in;
823
824 (*block) (ctx->Yi.c, ctx->EKi.c, key);
825 ++ctr;
826 if (IS_LITTLE_ENDIAN)
827 # ifdef BSWAP4
828 ctx->Yi.d[3] = BSWAP4(ctr);
829 # else
830 PUTU32(ctx->Yi.c + 12, ctr);
831 # endif
832 else
833 ctx->Yi.d[3] = ctr;
834 for (i = 0; i < 16 / sizeof(size_t); ++i)
835 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
836 GCM_MUL(ctx);
837 out += 16;
838 in += 16;
839 len -= 16;
840 }
841 # endif
842 if (len) {
843 (*block) (ctx->Yi.c, ctx->EKi.c, key);
844 ++ctr;
845 if (IS_LITTLE_ENDIAN)
846 # ifdef BSWAP4
847 ctx->Yi.d[3] = BSWAP4(ctr);
848 # else
849 PUTU32(ctx->Yi.c + 12, ctr);
850 # endif
851 else
852 ctx->Yi.d[3] = ctr;
853 # if defined(GHASH)
854 while (len--) {
855 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
856 ++n;
857 }
858 # else
859 while (len--) {
860 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
861 ++n;
862 }
863 mres = n;
864 # endif
865 }
866
867 ctx->mres = mres;
868 return 0;
869 } while (0);
870 }
871 #endif
872 for (i = 0; i < len; ++i) {
873 if (n == 0) {
874 (*block) (ctx->Yi.c, ctx->EKi.c, key);
875 ++ctr;
876 if (IS_LITTLE_ENDIAN)
877 #ifdef BSWAP4
878 ctx->Yi.d[3] = BSWAP4(ctr);
879 #else
880 PUTU32(ctx->Yi.c + 12, ctr);
881 #endif
882 else
883 ctx->Yi.d[3] = ctr;
884 }
885 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
886 ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
887 n = (n + 1) % 16;
888 if (mres == sizeof(ctx->Xn)) {
889 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
890 mres = 0;
891 }
892 #else
893 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
894 mres = n = (n + 1) % 16;
895 if (n == 0)
896 GCM_MUL(ctx);
897 #endif
898 }
899
900 ctx->mres = mres;
901 return 0;
902 }
903
904 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
905 const unsigned char *in, unsigned char *out,
906 size_t len)
907 {
908 DECLARE_IS_ENDIAN;
909 unsigned int n, ctr, mres;
910 size_t i;
911 u64 mlen = ctx->len.u[1];
912 block128_f block = ctx->block;
913 void *key = ctx->key;
914
915 mlen += len;
916 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
917 return -1;
918 ctx->len.u[1] = mlen;
919
920 mres = ctx->mres;
921
922 if (ctx->ares) {
923 /* First call to decrypt finalizes GHASH(AAD) */
924 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
925 if (len == 0) {
926 GCM_MUL(ctx);
927 ctx->ares = 0;
928 return 0;
929 }
930 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
931 ctx->Xi.u[0] = 0;
932 ctx->Xi.u[1] = 0;
933 mres = sizeof(ctx->Xi);
934 #else
935 GCM_MUL(ctx);
936 #endif
937 ctx->ares = 0;
938 }
939
940 if (IS_LITTLE_ENDIAN)
941 #ifdef BSWAP4
942 ctr = BSWAP4(ctx->Yi.d[3]);
943 #else
944 ctr = GETU32(ctx->Yi.c + 12);
945 #endif
946 else
947 ctr = ctx->Yi.d[3];
948
949 n = mres % 16;
950 #if !defined(OPENSSL_SMALL_FOOTPRINT)
951 if (16 % sizeof(size_t) == 0) { /* always true actually */
952 do {
953 if (n) {
954 # if defined(GHASH)
955 while (n && len) {
956 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
957 --len;
958 n = (n + 1) % 16;
959 }
960 if (n == 0) {
961 GHASH(ctx, ctx->Xn, mres);
962 mres = 0;
963 } else {
964 ctx->mres = mres;
965 return 0;
966 }
967 # else
968 while (n && len) {
969 u8 c = *(in++);
970 *(out++) = c ^ ctx->EKi.c[n];
971 ctx->Xi.c[n] ^= c;
972 --len;
973 n = (n + 1) % 16;
974 }
975 if (n == 0) {
976 GCM_MUL(ctx);
977 mres = 0;
978 } else {
979 ctx->mres = n;
980 return 0;
981 }
982 # endif
983 }
984 # if defined(STRICT_ALIGNMENT)
985 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
986 break;
987 # endif
988 # if defined(GHASH)
989 if (len >= 16 && mres) {
990 GHASH(ctx, ctx->Xn, mres);
991 mres = 0;
992 }
993 # if defined(GHASH_CHUNK)
994 while (len >= GHASH_CHUNK) {
995 size_t j = GHASH_CHUNK;
996
997 GHASH(ctx, in, GHASH_CHUNK);
998 while (j) {
999 size_t_aX *out_t = (size_t_aX *)out;
1000 const size_t_aX *in_t = (const size_t_aX *)in;
1001
1002 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1003 ++ctr;
1004 if (IS_LITTLE_ENDIAN)
1005 # ifdef BSWAP4
1006 ctx->Yi.d[3] = BSWAP4(ctr);
1007 # else
1008 PUTU32(ctx->Yi.c + 12, ctr);
1009 # endif
1010 else
1011 ctx->Yi.d[3] = ctr;
1012 for (i = 0; i < 16 / sizeof(size_t); ++i)
1013 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1014 out += 16;
1015 in += 16;
1016 j -= 16;
1017 }
1018 len -= GHASH_CHUNK;
1019 }
1020 # endif
1021 if ((i = (len & (size_t)-16))) {
1022 GHASH(ctx, in, i);
1023 while (len >= 16) {
1024 size_t_aX *out_t = (size_t_aX *)out;
1025 const size_t_aX *in_t = (const size_t_aX *)in;
1026
1027 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1028 ++ctr;
1029 if (IS_LITTLE_ENDIAN)
1030 # ifdef BSWAP4
1031 ctx->Yi.d[3] = BSWAP4(ctr);
1032 # else
1033 PUTU32(ctx->Yi.c + 12, ctr);
1034 # endif
1035 else
1036 ctx->Yi.d[3] = ctr;
1037 for (i = 0; i < 16 / sizeof(size_t); ++i)
1038 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1039 out += 16;
1040 in += 16;
1041 len -= 16;
1042 }
1043 }
1044 # else
1045 while (len >= 16) {
1046 size_t *out_t = (size_t *)out;
1047 const size_t *in_t = (const size_t *)in;
1048
1049 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1050 ++ctr;
1051 if (IS_LITTLE_ENDIAN)
1052 # ifdef BSWAP4
1053 ctx->Yi.d[3] = BSWAP4(ctr);
1054 # else
1055 PUTU32(ctx->Yi.c + 12, ctr);
1056 # endif
1057 else
1058 ctx->Yi.d[3] = ctr;
1059 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1060 size_t c = in_t[i];
1061 out_t[i] = c ^ ctx->EKi.t[i];
1062 ctx->Xi.t[i] ^= c;
1063 }
1064 GCM_MUL(ctx);
1065 out += 16;
1066 in += 16;
1067 len -= 16;
1068 }
1069 # endif
1070 if (len) {
1071 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1072 ++ctr;
1073 if (IS_LITTLE_ENDIAN)
1074 # ifdef BSWAP4
1075 ctx->Yi.d[3] = BSWAP4(ctr);
1076 # else
1077 PUTU32(ctx->Yi.c + 12, ctr);
1078 # endif
1079 else
1080 ctx->Yi.d[3] = ctr;
1081 # if defined(GHASH)
1082 while (len--) {
1083 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1084 ++n;
1085 }
1086 # else
1087 while (len--) {
1088 u8 c = in[n];
1089 ctx->Xi.c[n] ^= c;
1090 out[n] = c ^ ctx->EKi.c[n];
1091 ++n;
1092 }
1093 mres = n;
1094 # endif
1095 }
1096
1097 ctx->mres = mres;
1098 return 0;
1099 } while (0);
1100 }
1101 #endif
1102 for (i = 0; i < len; ++i) {
1103 u8 c;
1104 if (n == 0) {
1105 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1106 ++ctr;
1107 if (IS_LITTLE_ENDIAN)
1108 #ifdef BSWAP4
1109 ctx->Yi.d[3] = BSWAP4(ctr);
1110 #else
1111 PUTU32(ctx->Yi.c + 12, ctr);
1112 #endif
1113 else
1114 ctx->Yi.d[3] = ctr;
1115 }
1116 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1117 out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1118 n = (n + 1) % 16;
1119 if (mres == sizeof(ctx->Xn)) {
1120 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1121 mres = 0;
1122 }
1123 #else
1124 c = in[i];
1125 out[i] = c ^ ctx->EKi.c[n];
1126 ctx->Xi.c[n] ^= c;
1127 mres = n = (n + 1) % 16;
1128 if (n == 0)
1129 GCM_MUL(ctx);
1130 #endif
1131 }
1132
1133 ctx->mres = mres;
1134 return 0;
1135 }
1136
1137 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1138 const unsigned char *in, unsigned char *out,
1139 size_t len, ctr128_f stream)
1140 {
1141 #if defined(OPENSSL_SMALL_FOOTPRINT)
1142 return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1143 #else
1144 DECLARE_IS_ENDIAN;
1145 unsigned int n, ctr, mres;
1146 size_t i;
1147 u64 mlen = ctx->len.u[1];
1148 void *key = ctx->key;
1149
1150 mlen += len;
1151 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1152 return -1;
1153 ctx->len.u[1] = mlen;
1154
1155 mres = ctx->mres;
1156
1157 if (ctx->ares) {
1158 /* First call to encrypt finalizes GHASH(AAD) */
1159 #if defined(GHASH)
1160 if (len == 0) {
1161 GCM_MUL(ctx);
1162 ctx->ares = 0;
1163 return 0;
1164 }
1165 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1166 ctx->Xi.u[0] = 0;
1167 ctx->Xi.u[1] = 0;
1168 mres = sizeof(ctx->Xi);
1169 #else
1170 GCM_MUL(ctx);
1171 #endif
1172 ctx->ares = 0;
1173 }
1174
1175 if (IS_LITTLE_ENDIAN)
1176 # ifdef BSWAP4
1177 ctr = BSWAP4(ctx->Yi.d[3]);
1178 # else
1179 ctr = GETU32(ctx->Yi.c + 12);
1180 # endif
1181 else
1182 ctr = ctx->Yi.d[3];
1183
1184 n = mres % 16;
1185 if (n) {
1186 # if defined(GHASH)
1187 while (n && len) {
1188 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1189 --len;
1190 n = (n + 1) % 16;
1191 }
1192 if (n == 0) {
1193 GHASH(ctx, ctx->Xn, mres);
1194 mres = 0;
1195 } else {
1196 ctx->mres = mres;
1197 return 0;
1198 }
1199 # else
1200 while (n && len) {
1201 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1202 --len;
1203 n = (n + 1) % 16;
1204 }
1205 if (n == 0) {
1206 GCM_MUL(ctx);
1207 mres = 0;
1208 } else {
1209 ctx->mres = n;
1210 return 0;
1211 }
1212 # endif
1213 }
1214 # if defined(GHASH)
1215 if (len >= 16 && mres) {
1216 GHASH(ctx, ctx->Xn, mres);
1217 mres = 0;
1218 }
1219 # if defined(GHASH_CHUNK)
1220 while (len >= GHASH_CHUNK) {
1221 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1222 ctr += GHASH_CHUNK / 16;
1223 if (IS_LITTLE_ENDIAN)
1224 # ifdef BSWAP4
1225 ctx->Yi.d[3] = BSWAP4(ctr);
1226 # else
1227 PUTU32(ctx->Yi.c + 12, ctr);
1228 # endif
1229 else
1230 ctx->Yi.d[3] = ctr;
1231 GHASH(ctx, out, GHASH_CHUNK);
1232 out += GHASH_CHUNK;
1233 in += GHASH_CHUNK;
1234 len -= GHASH_CHUNK;
1235 }
1236 # endif
1237 # endif
1238 if ((i = (len & (size_t)-16))) {
1239 size_t j = i / 16;
1240
1241 (*stream) (in, out, j, key, ctx->Yi.c);
1242 ctr += (unsigned int)j;
1243 if (IS_LITTLE_ENDIAN)
1244 # ifdef BSWAP4
1245 ctx->Yi.d[3] = BSWAP4(ctr);
1246 # else
1247 PUTU32(ctx->Yi.c + 12, ctr);
1248 # endif
1249 else
1250 ctx->Yi.d[3] = ctr;
1251 in += i;
1252 len -= i;
1253 # if defined(GHASH)
1254 GHASH(ctx, out, i);
1255 out += i;
1256 # else
1257 while (j--) {
1258 for (i = 0; i < 16; ++i)
1259 ctx->Xi.c[i] ^= out[i];
1260 GCM_MUL(ctx);
1261 out += 16;
1262 }
1263 # endif
1264 }
1265 if (len) {
1266 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1267 ++ctr;
1268 if (IS_LITTLE_ENDIAN)
1269 # ifdef BSWAP4
1270 ctx->Yi.d[3] = BSWAP4(ctr);
1271 # else
1272 PUTU32(ctx->Yi.c + 12, ctr);
1273 # endif
1274 else
1275 ctx->Yi.d[3] = ctr;
1276 while (len--) {
1277 # if defined(GHASH)
1278 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1279 # else
1280 ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1281 # endif
1282 ++n;
1283 }
1284 }
1285
1286 ctx->mres = mres;
1287 return 0;
1288 #endif
1289 }
1290
1291 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1292 const unsigned char *in, unsigned char *out,
1293 size_t len, ctr128_f stream)
1294 {
1295 #if defined(OPENSSL_SMALL_FOOTPRINT)
1296 return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1297 #else
1298 DECLARE_IS_ENDIAN;
1299 unsigned int n, ctr, mres;
1300 size_t i;
1301 u64 mlen = ctx->len.u[1];
1302 void *key = ctx->key;
1303
1304 mlen += len;
1305 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1306 return -1;
1307 ctx->len.u[1] = mlen;
1308
1309 mres = ctx->mres;
1310
1311 if (ctx->ares) {
1312 /* First call to decrypt finalizes GHASH(AAD) */
1313 # if defined(GHASH)
1314 if (len == 0) {
1315 GCM_MUL(ctx);
1316 ctx->ares = 0;
1317 return 0;
1318 }
1319 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1320 ctx->Xi.u[0] = 0;
1321 ctx->Xi.u[1] = 0;
1322 mres = sizeof(ctx->Xi);
1323 # else
1324 GCM_MUL(ctx);
1325 # endif
1326 ctx->ares = 0;
1327 }
1328
1329 if (IS_LITTLE_ENDIAN)
1330 # ifdef BSWAP4
1331 ctr = BSWAP4(ctx->Yi.d[3]);
1332 # else
1333 ctr = GETU32(ctx->Yi.c + 12);
1334 # endif
1335 else
1336 ctr = ctx->Yi.d[3];
1337
1338 n = mres % 16;
1339 if (n) {
1340 # if defined(GHASH)
1341 while (n && len) {
1342 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1343 --len;
1344 n = (n + 1) % 16;
1345 }
1346 if (n == 0) {
1347 GHASH(ctx, ctx->Xn, mres);
1348 mres = 0;
1349 } else {
1350 ctx->mres = mres;
1351 return 0;
1352 }
1353 # else
1354 while (n && len) {
1355 u8 c = *(in++);
1356 *(out++) = c ^ ctx->EKi.c[n];
1357 ctx->Xi.c[n] ^= c;
1358 --len;
1359 n = (n + 1) % 16;
1360 }
1361 if (n == 0) {
1362 GCM_MUL(ctx);
1363 mres = 0;
1364 } else {
1365 ctx->mres = n;
1366 return 0;
1367 }
1368 # endif
1369 }
1370 # if defined(GHASH)
1371 if (len >= 16 && mres) {
1372 GHASH(ctx, ctx->Xn, mres);
1373 mres = 0;
1374 }
1375 # if defined(GHASH_CHUNK)
1376 while (len >= GHASH_CHUNK) {
1377 GHASH(ctx, in, GHASH_CHUNK);
1378 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1379 ctr += GHASH_CHUNK / 16;
1380 if (IS_LITTLE_ENDIAN)
1381 # ifdef BSWAP4
1382 ctx->Yi.d[3] = BSWAP4(ctr);
1383 # else
1384 PUTU32(ctx->Yi.c + 12, ctr);
1385 # endif
1386 else
1387 ctx->Yi.d[3] = ctr;
1388 out += GHASH_CHUNK;
1389 in += GHASH_CHUNK;
1390 len -= GHASH_CHUNK;
1391 }
1392 # endif
1393 # endif
1394 if ((i = (len & (size_t)-16))) {
1395 size_t j = i / 16;
1396
1397 # if defined(GHASH)
1398 GHASH(ctx, in, i);
1399 # else
1400 while (j--) {
1401 size_t k;
1402 for (k = 0; k < 16; ++k)
1403 ctx->Xi.c[k] ^= in[k];
1404 GCM_MUL(ctx);
1405 in += 16;
1406 }
1407 j = i / 16;
1408 in -= i;
1409 # endif
1410 (*stream) (in, out, j, key, ctx->Yi.c);
1411 ctr += (unsigned int)j;
1412 if (IS_LITTLE_ENDIAN)
1413 # ifdef BSWAP4
1414 ctx->Yi.d[3] = BSWAP4(ctr);
1415 # else
1416 PUTU32(ctx->Yi.c + 12, ctr);
1417 # endif
1418 else
1419 ctx->Yi.d[3] = ctr;
1420 out += i;
1421 in += i;
1422 len -= i;
1423 }
1424 if (len) {
1425 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1426 ++ctr;
1427 if (IS_LITTLE_ENDIAN)
1428 # ifdef BSWAP4
1429 ctx->Yi.d[3] = BSWAP4(ctr);
1430 # else
1431 PUTU32(ctx->Yi.c + 12, ctr);
1432 # endif
1433 else
1434 ctx->Yi.d[3] = ctr;
1435 while (len--) {
1436 # if defined(GHASH)
1437 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1438 # else
1439 u8 c = in[n];
1440 ctx->Xi.c[mres++] ^= c;
1441 out[n] = c ^ ctx->EKi.c[n];
1442 # endif
1443 ++n;
1444 }
1445 }
1446
1447 ctx->mres = mres;
1448 return 0;
1449 #endif
1450 }
1451
1452 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1453 size_t len)
1454 {
1455 DECLARE_IS_ENDIAN;
1456 u64 alen = ctx->len.u[0] << 3;
1457 u64 clen = ctx->len.u[1] << 3;
1458
1459 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1460 u128 bitlen;
1461 unsigned int mres = ctx->mres;
1462
1463 if (mres) {
1464 unsigned blocks = (mres + 15) & -16;
1465
1466 memset(ctx->Xn + mres, 0, blocks - mres);
1467 mres = blocks;
1468 if (mres == sizeof(ctx->Xn)) {
1469 GHASH(ctx, ctx->Xn, mres);
1470 mres = 0;
1471 }
1472 } else if (ctx->ares) {
1473 GCM_MUL(ctx);
1474 }
1475 #else
1476 if (ctx->mres || ctx->ares)
1477 GCM_MUL(ctx);
1478 #endif
1479
1480 if (IS_LITTLE_ENDIAN) {
1481 #ifdef BSWAP8
1482 alen = BSWAP8(alen);
1483 clen = BSWAP8(clen);
1484 #else
1485 u8 *p = ctx->len.c;
1486
1487 ctx->len.u[0] = alen;
1488 ctx->len.u[1] = clen;
1489
1490 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1491 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1492 #endif
1493 }
1494
1495 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1496 bitlen.hi = alen;
1497 bitlen.lo = clen;
1498 memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1499 mres += sizeof(bitlen);
1500 GHASH(ctx, ctx->Xn, mres);
1501 #else
1502 ctx->Xi.u[0] ^= alen;
1503 ctx->Xi.u[1] ^= clen;
1504 GCM_MUL(ctx);
1505 #endif
1506
1507 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1508 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1509
1510 if (tag && len <= sizeof(ctx->Xi))
1511 return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1512 else
1513 return -1;
1514 }
1515
1516 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1517 {
1518 CRYPTO_gcm128_finish(ctx, NULL, 0);
1519 memcpy(tag, ctx->Xi.c,
1520 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1521 }
1522
1523 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1524 {
1525 GCM128_CONTEXT *ret;
1526
1527 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1528 CRYPTO_gcm128_init(ret, key, block);
1529
1530 return ret;
1531 }
1532
1533 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1534 {
1535 OPENSSL_clear_free(ctx, sizeof(*ctx));
1536 }