2 * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
11 #include <openssl/crypto.h>
12 #include "internal/cryptlib.h"
13 #include "internal/endian.h"
14 #include "crypto/modes.h"
16 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17 typedef size_t size_t_aX
__attribute((__aligned__(1)));
19 typedef size_t size_t_aX
;
22 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23 /* redefine, because alignment is ensured */
25 # define GETU32(p) BSWAP4(*(const u32 *)(p))
27 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
30 /* RISC-V uses C implementation of gmult as a fallback. */
32 # define INCLUDE_C_GMULT_4BIT
35 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
36 #define REDUCE1BIT(V) do { \
37 if (sizeof(size_t)==8) { \
38 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
39 V.lo = (V.hi<<63)|(V.lo>>1); \
40 V.hi = (V.hi>>1 )^T; \
43 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
44 V.lo = (V.hi<<63)|(V.lo>>1); \
45 V.hi = (V.hi>>1 )^((u64)T<<32); \
51 * NOTE: TABLE_BITS and all non-4bit implmentations have been removed in 3.1.
53 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
54 * never be set to 8. 8 is effectively reserved for testing purposes.
55 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
56 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
57 * whole spectrum of possible table driven implementations. Why? In
58 * non-"Shoup's" case memory access pattern is segmented in such manner,
59 * that it's trivial to see that cache timing information can reveal
60 * fair portion of intermediate hash value. Given that ciphertext is
61 * always available to attacker, it's possible for him to attempt to
62 * deduce secret parameter H and if successful, tamper with messages
63 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
64 * not as trivial, but there is no reason to believe that it's resistant
65 * to cache-timing attack. And the thing about "8-bit" implementation is
66 * that it consumes 16 (sixteen) times more memory, 4KB per individual
67 * key + 1KB shared. Well, on pros side it should be twice as fast as
68 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
69 * was observed to run ~75% faster, closer to 100% for commercial
70 * compilers... Yet "4-bit" procedure is preferred, because it's
71 * believed to provide better security-performance balance and adequate
72 * all-round performance. "All-round" refers to things like:
74 * - shorter setup time effectively improves overall timing for
75 * handling short messages;
76 * - larger table allocation can become unbearable because of VM
77 * subsystem penalties (for example on Windows large enough free
78 * results in VM working set trimming, meaning that consequent
79 * malloc would immediately incur working set expansion);
80 * - larger table has larger cache footprint, which can affect
81 * performance of other code paths (not necessarily even from same
82 * thread in Hyper-Threading world);
84 * Value of 1 is not appropriate for performance reasons.
87 static void gcm_init_4bit(u128 Htable
[16], const u64 H
[2])
90 # if defined(OPENSSL_SMALL_FOOTPRINT)
99 # if defined(OPENSSL_SMALL_FOOTPRINT)
100 for (Htable
[8] = V
, i
= 4; i
> 0; i
>>= 1) {
105 for (i
= 2; i
< 16; i
<<= 1) {
106 u128
*Hi
= Htable
+ i
;
108 for (V
= *Hi
, j
= 1; j
< i
; ++j
) {
109 Hi
[j
].hi
= V
.hi
^ Htable
[j
].hi
;
110 Hi
[j
].lo
= V
.lo
^ Htable
[j
].lo
;
121 Htable
[3].hi
= V
.hi
^ Htable
[2].hi
, Htable
[3].lo
= V
.lo
^ Htable
[2].lo
;
123 Htable
[5].hi
= V
.hi
^ Htable
[1].hi
, Htable
[5].lo
= V
.lo
^ Htable
[1].lo
;
124 Htable
[6].hi
= V
.hi
^ Htable
[2].hi
, Htable
[6].lo
= V
.lo
^ Htable
[2].lo
;
125 Htable
[7].hi
= V
.hi
^ Htable
[3].hi
, Htable
[7].lo
= V
.lo
^ Htable
[3].lo
;
127 Htable
[9].hi
= V
.hi
^ Htable
[1].hi
, Htable
[9].lo
= V
.lo
^ Htable
[1].lo
;
128 Htable
[10].hi
= V
.hi
^ Htable
[2].hi
, Htable
[10].lo
= V
.lo
^ Htable
[2].lo
;
129 Htable
[11].hi
= V
.hi
^ Htable
[3].hi
, Htable
[11].lo
= V
.lo
^ Htable
[3].lo
;
130 Htable
[12].hi
= V
.hi
^ Htable
[4].hi
, Htable
[12].lo
= V
.lo
^ Htable
[4].lo
;
131 Htable
[13].hi
= V
.hi
^ Htable
[5].hi
, Htable
[13].lo
= V
.lo
^ Htable
[5].lo
;
132 Htable
[14].hi
= V
.hi
^ Htable
[6].hi
, Htable
[14].lo
= V
.lo
^ Htable
[6].lo
;
133 Htable
[15].hi
= V
.hi
^ Htable
[7].hi
, Htable
[15].lo
= V
.lo
^ Htable
[7].lo
;
135 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
137 * ARM assembler expects specific dword order in Htable.
143 if (IS_LITTLE_ENDIAN
)
144 for (j
= 0; j
< 16; ++j
) {
149 for (j
= 0; j
< 16; ++j
) {
151 Htable
[j
].hi
= V
.lo
<< 32 | V
.lo
>> 32;
152 Htable
[j
].lo
= V
.hi
<< 32 | V
.hi
>> 32;
158 # if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
159 static const size_t rem_4bit
[16] = {
160 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
161 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
162 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
163 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
166 static void gcm_gmult_4bit(u64 Xi
[2], const u128 Htable
[16])
170 size_t rem
, nlo
, nhi
;
173 nlo
= ((const u8
*)Xi
)[15];
177 Z
.hi
= Htable
[nlo
].hi
;
178 Z
.lo
= Htable
[nlo
].lo
;
181 rem
= (size_t)Z
.lo
& 0xf;
182 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
184 if (sizeof(size_t) == 8)
185 Z
.hi
^= rem_4bit
[rem
];
187 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
189 Z
.hi
^= Htable
[nhi
].hi
;
190 Z
.lo
^= Htable
[nhi
].lo
;
195 nlo
= ((const u8
*)Xi
)[cnt
];
199 rem
= (size_t)Z
.lo
& 0xf;
200 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
202 if (sizeof(size_t) == 8)
203 Z
.hi
^= rem_4bit
[rem
];
205 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
207 Z
.hi
^= Htable
[nlo
].hi
;
208 Z
.lo
^= Htable
[nlo
].lo
;
211 if (IS_LITTLE_ENDIAN
) {
213 Xi
[0] = BSWAP8(Z
.hi
);
214 Xi
[1] = BSWAP8(Z
.lo
);
218 v
= (u32
)(Z
.hi
>> 32);
222 v
= (u32
)(Z
.lo
>> 32);
235 # if !defined(GHASH_ASM)
236 # if !defined(OPENSSL_SMALL_FOOTPRINT)
238 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
239 * details... Compiler-generated code doesn't seem to give any
240 * performance improvement, at least not on x86[_64]. It's here
241 * mostly as reference and a placeholder for possible future
242 * non-trivial optimization[s]...
244 static void gcm_ghash_4bit(u64 Xi
[2], const u128 Htable
[16],
245 const u8
*inp
, size_t len
)
249 size_t rem
, nlo
, nhi
;
254 nlo
= ((const u8
*)Xi
)[15];
259 Z
.hi
= Htable
[nlo
].hi
;
260 Z
.lo
= Htable
[nlo
].lo
;
263 rem
= (size_t)Z
.lo
& 0xf;
264 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
266 if (sizeof(size_t) == 8)
267 Z
.hi
^= rem_4bit
[rem
];
269 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
271 Z
.hi
^= Htable
[nhi
].hi
;
272 Z
.lo
^= Htable
[nhi
].lo
;
277 nlo
= ((const u8
*)Xi
)[cnt
];
282 rem
= (size_t)Z
.lo
& 0xf;
283 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
285 if (sizeof(size_t) == 8)
286 Z
.hi
^= rem_4bit
[rem
];
288 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
290 Z
.hi
^= Htable
[nlo
].hi
;
291 Z
.lo
^= Htable
[nlo
].lo
;
294 if (IS_LITTLE_ENDIAN
) {
296 Xi
[0] = BSWAP8(Z
.hi
);
297 Xi
[1] = BSWAP8(Z
.lo
);
301 v
= (u32
)(Z
.hi
>> 32);
305 v
= (u32
)(Z
.lo
>> 32);
316 /* Block size is 128 bits so len is a multiple of 16 */
322 void gcm_gmult_4bit(u64 Xi
[2], const u128 Htable
[16]);
323 void gcm_ghash_4bit(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
327 # define GCM_MUL(ctx) ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
328 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
329 # define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
331 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
332 * effect. In other words idea is to hash data while it's still in L1 cache
333 * after encryption pass...
335 # define GHASH_CHUNK (3*1024)
338 #if (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
339 # if !defined(I386_ONLY) && \
340 (defined(__i386) || defined(__i386__) || \
341 defined(__x86_64) || defined(__x86_64__) || \
342 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
343 # define GHASH_ASM_X86_OR_64
345 void gcm_init_clmul(u128 Htable
[16], const u64 Xi
[2]);
346 void gcm_gmult_clmul(u64 Xi
[2], const u128 Htable
[16]);
347 void gcm_ghash_clmul(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
350 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
351 # define gcm_init_avx gcm_init_clmul
352 # define gcm_gmult_avx gcm_gmult_clmul
353 # define gcm_ghash_avx gcm_ghash_clmul
355 void gcm_init_avx(u128 Htable
[16], const u64 Xi
[2]);
356 void gcm_gmult_avx(u64 Xi
[2], const u128 Htable
[16]);
357 void gcm_ghash_avx(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
361 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
362 # define GHASH_ASM_X86
363 void gcm_gmult_4bit_mmx(u64 Xi
[2], const u128 Htable
[16]);
364 void gcm_ghash_4bit_mmx(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
367 void gcm_gmult_4bit_x86(u64 Xi
[2], const u128 Htable
[16]);
368 void gcm_ghash_4bit_x86(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
371 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
372 # include "arm_arch.h"
373 # if __ARM_MAX_ARCH__>=7
374 # define GHASH_ASM_ARM
375 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
376 # if defined(__arm__) || defined(__arm)
377 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
379 void gcm_init_neon(u128 Htable
[16], const u64 Xi
[2]);
380 void gcm_gmult_neon(u64 Xi
[2], const u128 Htable
[16]);
381 void gcm_ghash_neon(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
383 void gcm_init_v8(u128 Htable
[16], const u64 Xi
[2]);
384 void gcm_gmult_v8(u64 Xi
[2], const u128 Htable
[16]);
385 void gcm_ghash_v8(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
388 # elif defined(__sparc__) || defined(__sparc)
389 # include "crypto/sparc_arch.h"
390 # define GHASH_ASM_SPARC
391 void gcm_init_vis3(u128 Htable
[16], const u64 Xi
[2]);
392 void gcm_gmult_vis3(u64 Xi
[2], const u128 Htable
[16]);
393 void gcm_ghash_vis3(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
395 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
396 # include "crypto/ppc_arch.h"
397 # define GHASH_ASM_PPC
398 void gcm_init_p8(u128 Htable
[16], const u64 Xi
[2]);
399 void gcm_gmult_p8(u64 Xi
[2], const u128 Htable
[16]);
400 void gcm_ghash_p8(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
402 # elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
403 # include "crypto/riscv_arch.h"
404 # define GHASH_ASM_RISCV
406 void gcm_init_clmul_rv64i_zbb_zbc(u128 Htable
[16], const u64 Xi
[2]);
407 void gcm_gmult_clmul_rv64i_zbb_zbc(u64 Xi
[2], const u128 Htable
[16]);
411 static void gcm_get_funcs(struct gcm_funcs_st
*ctx
)
413 /* set defaults -- overridden below as needed */
414 ctx
->ginit
= gcm_init_4bit
;
415 #if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
416 ctx
->gmult
= gcm_gmult_4bit
;
420 #if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
421 ctx
->ghash
= gcm_ghash_4bit
;
426 #if defined(GHASH_ASM_X86_OR_64)
427 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
429 if (OPENSSL_ia32cap_P
[1] & (1 << 1)) { /* check PCLMULQDQ bit */
430 if (((OPENSSL_ia32cap_P
[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
431 ctx
->ginit
= gcm_init_avx
;
432 ctx
->gmult
= gcm_gmult_avx
;
433 ctx
->ghash
= gcm_ghash_avx
;
435 ctx
->ginit
= gcm_init_clmul
;
436 ctx
->gmult
= gcm_gmult_clmul
;
437 ctx
->ghash
= gcm_ghash_clmul
;
442 # if defined(GHASH_ASM_X86)
444 # if defined(OPENSSL_IA32_SSE2)
445 if (OPENSSL_ia32cap_P
[0] & (1 << 25)) { /* check SSE bit */
446 ctx
->gmult
= gcm_gmult_4bit_mmx
;
447 ctx
->ghash
= gcm_ghash_4bit_mmx
;
451 if (OPENSSL_ia32cap_P
[0] & (1 << 23)) { /* check MMX bit */
452 ctx
->gmult
= gcm_gmult_4bit_mmx
;
453 ctx
->ghash
= gcm_ghash_4bit_mmx
;
457 ctx
->gmult
= gcm_gmult_4bit_x86
;
458 ctx
->ghash
= gcm_ghash_4bit_x86
;
461 #elif defined(GHASH_ASM_ARM)
463 # ifdef PMULL_CAPABLE
465 ctx
->ginit
= (gcm_init_fn
)gcm_init_v8
;
466 ctx
->gmult
= gcm_gmult_v8
;
467 ctx
->ghash
= gcm_ghash_v8
;
469 # elif defined(NEON_CAPABLE)
471 ctx
->ginit
= gcm_init_neon
;
472 ctx
->gmult
= gcm_gmult_neon
;
473 ctx
->ghash
= gcm_ghash_neon
;
477 #elif defined(GHASH_ASM_SPARC)
479 if (OPENSSL_sparcv9cap_P
[0] & SPARCV9_VIS3
) {
480 ctx
->ginit
= gcm_init_vis3
;
481 ctx
->gmult
= gcm_gmult_vis3
;
482 ctx
->ghash
= gcm_ghash_vis3
;
485 #elif defined(GHASH_ASM_PPC)
487 if (OPENSSL_ppccap_P
& PPC_CRYPTO207
) {
488 ctx
->ginit
= gcm_init_p8
;
489 ctx
->gmult
= gcm_gmult_p8
;
490 ctx
->ghash
= gcm_ghash_p8
;
493 #elif defined(GHASH_ASM_RISCV) && __riscv_xlen == 64
496 if (RISCV_HAS_ZBB() && RISCV_HAS_ZBC()) {
497 ctx
->ginit
= gcm_init_clmul_rv64i_zbb_zbc
;
498 ctx
->gmult
= gcm_gmult_clmul_rv64i_zbb_zbc
;
502 #if defined(__s390__) || defined(__s390x__)
503 ctx
->gmult
= gcm_gmult_4bit
;
504 ctx
->ghash
= gcm_ghash_4bit
;
509 void CRYPTO_gcm128_init(GCM128_CONTEXT
*ctx
, void *key
, block128_f block
)
513 memset(ctx
, 0, sizeof(*ctx
));
517 (*block
) (ctx
->H
.c
, ctx
->H
.c
, key
);
519 if (IS_LITTLE_ENDIAN
) {
520 /* H is stored in host byte order */
522 ctx
->H
.u
[0] = BSWAP8(ctx
->H
.u
[0]);
523 ctx
->H
.u
[1] = BSWAP8(ctx
->H
.u
[1]);
527 hi
= (u64
)GETU32(p
) << 32 | GETU32(p
+ 4);
528 lo
= (u64
)GETU32(p
+ 8) << 32 | GETU32(p
+ 12);
534 gcm_get_funcs(&ctx
->funcs
);
535 ctx
->funcs
.ginit(ctx
->Htable
, ctx
->H
.u
);
538 void CRYPTO_gcm128_setiv(GCM128_CONTEXT
*ctx
, const unsigned char *iv
,
544 ctx
->len
.u
[0] = 0; /* AAD length */
545 ctx
->len
.u
[1] = 0; /* message length */
550 memcpy(ctx
->Yi
.c
, iv
, 12);
560 /* Borrow ctx->Xi to calculate initial Yi */
565 for (i
= 0; i
< 16; ++i
)
566 ctx
->Xi
.c
[i
] ^= iv
[i
];
572 for (i
= 0; i
< len
; ++i
)
573 ctx
->Xi
.c
[i
] ^= iv
[i
];
577 if (IS_LITTLE_ENDIAN
) {
579 ctx
->Xi
.u
[1] ^= BSWAP8(len0
);
581 ctx
->Xi
.c
[8] ^= (u8
)(len0
>> 56);
582 ctx
->Xi
.c
[9] ^= (u8
)(len0
>> 48);
583 ctx
->Xi
.c
[10] ^= (u8
)(len0
>> 40);
584 ctx
->Xi
.c
[11] ^= (u8
)(len0
>> 32);
585 ctx
->Xi
.c
[12] ^= (u8
)(len0
>> 24);
586 ctx
->Xi
.c
[13] ^= (u8
)(len0
>> 16);
587 ctx
->Xi
.c
[14] ^= (u8
)(len0
>> 8);
588 ctx
->Xi
.c
[15] ^= (u8
)(len0
);
591 ctx
->Xi
.u
[1] ^= len0
;
596 if (IS_LITTLE_ENDIAN
)
598 ctr
= BSWAP4(ctx
->Xi
.d
[3]);
600 ctr
= GETU32(ctx
->Xi
.c
+ 12);
605 /* Copy borrowed Xi to Yi */
606 ctx
->Yi
.u
[0] = ctx
->Xi
.u
[0];
607 ctx
->Yi
.u
[1] = ctx
->Xi
.u
[1];
613 (*ctx
->block
) (ctx
->Yi
.c
, ctx
->EK0
.c
, ctx
->key
);
615 if (IS_LITTLE_ENDIAN
)
617 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
619 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
625 int CRYPTO_gcm128_aad(GCM128_CONTEXT
*ctx
, const unsigned char *aad
,
630 u64 alen
= ctx
->len
.u
[0];
636 if (alen
> (U64(1) << 61) || (sizeof(len
) == 8 && alen
< len
))
638 ctx
->len
.u
[0] = alen
;
643 ctx
->Xi
.c
[n
] ^= *(aad
++);
655 if ((i
= (len
& (size_t)-16))) {
662 for (i
= 0; i
< 16; ++i
)
663 ctx
->Xi
.c
[i
] ^= aad
[i
];
670 n
= (unsigned int)len
;
671 for (i
= 0; i
< len
; ++i
)
672 ctx
->Xi
.c
[i
] ^= aad
[i
];
679 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT
*ctx
,
680 const unsigned char *in
, unsigned char *out
,
684 unsigned int n
, ctr
, mres
;
686 u64 mlen
= ctx
->len
.u
[1];
687 block128_f block
= ctx
->block
;
688 void *key
= ctx
->key
;
691 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
693 ctx
->len
.u
[1] = mlen
;
698 /* First call to encrypt finalizes GHASH(AAD) */
699 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
705 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
708 mres
= sizeof(ctx
->Xi
);
715 if (IS_LITTLE_ENDIAN
)
717 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
719 ctr
= GETU32(ctx
->Yi
.c
+ 12);
725 #if !defined(OPENSSL_SMALL_FOOTPRINT)
726 if (16 % sizeof(size_t) == 0) { /* always true actually */
731 ctx
->Xn
[mres
++] = *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
736 GHASH(ctx
, ctx
->Xn
, mres
);
744 ctx
->Xi
.c
[n
] ^= *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
757 # if defined(STRICT_ALIGNMENT)
758 if (((size_t)in
| (size_t)out
) % sizeof(size_t) != 0)
762 if (len
>= 16 && mres
) {
763 GHASH(ctx
, ctx
->Xn
, mres
);
766 # if defined(GHASH_CHUNK)
767 while (len
>= GHASH_CHUNK
) {
768 size_t j
= GHASH_CHUNK
;
771 size_t_aX
*out_t
= (size_t_aX
*)out
;
772 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
774 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
776 if (IS_LITTLE_ENDIAN
)
778 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
780 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
784 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
785 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
790 GHASH(ctx
, out
- GHASH_CHUNK
, GHASH_CHUNK
);
794 if ((i
= (len
& (size_t)-16))) {
798 size_t_aX
*out_t
= (size_t_aX
*)out
;
799 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
801 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
803 if (IS_LITTLE_ENDIAN
)
805 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
807 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
811 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
812 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
817 GHASH(ctx
, out
- j
, j
);
821 size_t *out_t
= (size_t *)out
;
822 const size_t *in_t
= (const size_t *)in
;
824 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
826 if (IS_LITTLE_ENDIAN
)
828 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
830 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
834 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
835 ctx
->Xi
.t
[i
] ^= out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
843 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
845 if (IS_LITTLE_ENDIAN
)
847 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
849 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
855 ctx
->Xn
[mres
++] = out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
860 ctx
->Xi
.c
[n
] ^= out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
872 for (i
= 0; i
< len
; ++i
) {
874 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
876 if (IS_LITTLE_ENDIAN
)
878 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
880 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
885 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
886 ctx
->Xn
[mres
++] = out
[i
] = in
[i
] ^ ctx
->EKi
.c
[n
];
888 if (mres
== sizeof(ctx
->Xn
)) {
889 GHASH(ctx
,ctx
->Xn
,sizeof(ctx
->Xn
));
893 ctx
->Xi
.c
[n
] ^= out
[i
] = in
[i
] ^ ctx
->EKi
.c
[n
];
894 mres
= n
= (n
+ 1) % 16;
904 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT
*ctx
,
905 const unsigned char *in
, unsigned char *out
,
909 unsigned int n
, ctr
, mres
;
911 u64 mlen
= ctx
->len
.u
[1];
912 block128_f block
= ctx
->block
;
913 void *key
= ctx
->key
;
916 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
918 ctx
->len
.u
[1] = mlen
;
923 /* First call to decrypt finalizes GHASH(AAD) */
924 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
930 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
933 mres
= sizeof(ctx
->Xi
);
940 if (IS_LITTLE_ENDIAN
)
942 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
944 ctr
= GETU32(ctx
->Yi
.c
+ 12);
950 #if !defined(OPENSSL_SMALL_FOOTPRINT)
951 if (16 % sizeof(size_t) == 0) { /* always true actually */
956 *(out
++) = (ctx
->Xn
[mres
++] = *(in
++)) ^ ctx
->EKi
.c
[n
];
961 GHASH(ctx
, ctx
->Xn
, mres
);
970 *(out
++) = c
^ ctx
->EKi
.c
[n
];
984 # if defined(STRICT_ALIGNMENT)
985 if (((size_t)in
| (size_t)out
) % sizeof(size_t) != 0)
989 if (len
>= 16 && mres
) {
990 GHASH(ctx
, ctx
->Xn
, mres
);
993 # if defined(GHASH_CHUNK)
994 while (len
>= GHASH_CHUNK
) {
995 size_t j
= GHASH_CHUNK
;
997 GHASH(ctx
, in
, GHASH_CHUNK
);
999 size_t_aX
*out_t
= (size_t_aX
*)out
;
1000 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
1002 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1004 if (IS_LITTLE_ENDIAN
)
1006 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1008 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1012 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
1013 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
1021 if ((i
= (len
& (size_t)-16))) {
1024 size_t_aX
*out_t
= (size_t_aX
*)out
;
1025 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
1027 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1029 if (IS_LITTLE_ENDIAN
)
1031 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1033 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1037 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
1038 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
1046 size_t *out_t
= (size_t *)out
;
1047 const size_t *in_t
= (const size_t *)in
;
1049 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1051 if (IS_LITTLE_ENDIAN
)
1053 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1055 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1059 for (i
= 0; i
< 16 / sizeof(size_t); ++i
) {
1061 out_t
[i
] = c
^ ctx
->EKi
.t
[i
];
1071 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1073 if (IS_LITTLE_ENDIAN
)
1075 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1077 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1083 out
[n
] = (ctx
->Xn
[mres
++] = in
[n
]) ^ ctx
->EKi
.c
[n
];
1090 out
[n
] = c
^ ctx
->EKi
.c
[n
];
1102 for (i
= 0; i
< len
; ++i
) {
1105 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1107 if (IS_LITTLE_ENDIAN
)
1109 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1111 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1116 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1117 out
[i
] = (ctx
->Xn
[mres
++] = c
= in
[i
]) ^ ctx
->EKi
.c
[n
];
1119 if (mres
== sizeof(ctx
->Xn
)) {
1120 GHASH(ctx
,ctx
->Xn
,sizeof(ctx
->Xn
));
1125 out
[i
] = c
^ ctx
->EKi
.c
[n
];
1127 mres
= n
= (n
+ 1) % 16;
1137 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT
*ctx
,
1138 const unsigned char *in
, unsigned char *out
,
1139 size_t len
, ctr128_f stream
)
1141 #if defined(OPENSSL_SMALL_FOOTPRINT)
1142 return CRYPTO_gcm128_encrypt(ctx
, in
, out
, len
);
1145 unsigned int n
, ctr
, mres
;
1147 u64 mlen
= ctx
->len
.u
[1];
1148 void *key
= ctx
->key
;
1151 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
1153 ctx
->len
.u
[1] = mlen
;
1158 /* First call to encrypt finalizes GHASH(AAD) */
1165 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
1168 mres
= sizeof(ctx
->Xi
);
1175 if (IS_LITTLE_ENDIAN
)
1177 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1179 ctr
= GETU32(ctx
->Yi
.c
+ 12);
1188 ctx
->Xn
[mres
++] = *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
1193 GHASH(ctx
, ctx
->Xn
, mres
);
1201 ctx
->Xi
.c
[n
] ^= *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
1215 if (len
>= 16 && mres
) {
1216 GHASH(ctx
, ctx
->Xn
, mres
);
1219 # if defined(GHASH_CHUNK)
1220 while (len
>= GHASH_CHUNK
) {
1221 (*stream
) (in
, out
, GHASH_CHUNK
/ 16, key
, ctx
->Yi
.c
);
1222 ctr
+= GHASH_CHUNK
/ 16;
1223 if (IS_LITTLE_ENDIAN
)
1225 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1227 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1231 GHASH(ctx
, out
, GHASH_CHUNK
);
1238 if ((i
= (len
& (size_t)-16))) {
1241 (*stream
) (in
, out
, j
, key
, ctx
->Yi
.c
);
1242 ctr
+= (unsigned int)j
;
1243 if (IS_LITTLE_ENDIAN
)
1245 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1247 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1258 for (i
= 0; i
< 16; ++i
)
1259 ctx
->Xi
.c
[i
] ^= out
[i
];
1266 (*ctx
->block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1268 if (IS_LITTLE_ENDIAN
)
1270 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1272 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1278 ctx
->Xn
[mres
++] = out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
1280 ctx
->Xi
.c
[mres
++] ^= out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
1291 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT
*ctx
,
1292 const unsigned char *in
, unsigned char *out
,
1293 size_t len
, ctr128_f stream
)
1295 #if defined(OPENSSL_SMALL_FOOTPRINT)
1296 return CRYPTO_gcm128_decrypt(ctx
, in
, out
, len
);
1299 unsigned int n
, ctr
, mres
;
1301 u64 mlen
= ctx
->len
.u
[1];
1302 void *key
= ctx
->key
;
1305 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
1307 ctx
->len
.u
[1] = mlen
;
1312 /* First call to decrypt finalizes GHASH(AAD) */
1319 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
1322 mres
= sizeof(ctx
->Xi
);
1329 if (IS_LITTLE_ENDIAN
)
1331 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1333 ctr
= GETU32(ctx
->Yi
.c
+ 12);
1342 *(out
++) = (ctx
->Xn
[mres
++] = *(in
++)) ^ ctx
->EKi
.c
[n
];
1347 GHASH(ctx
, ctx
->Xn
, mres
);
1356 *(out
++) = c
^ ctx
->EKi
.c
[n
];
1371 if (len
>= 16 && mres
) {
1372 GHASH(ctx
, ctx
->Xn
, mres
);
1375 # if defined(GHASH_CHUNK)
1376 while (len
>= GHASH_CHUNK
) {
1377 GHASH(ctx
, in
, GHASH_CHUNK
);
1378 (*stream
) (in
, out
, GHASH_CHUNK
/ 16, key
, ctx
->Yi
.c
);
1379 ctr
+= GHASH_CHUNK
/ 16;
1380 if (IS_LITTLE_ENDIAN
)
1382 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1384 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1394 if ((i
= (len
& (size_t)-16))) {
1402 for (k
= 0; k
< 16; ++k
)
1403 ctx
->Xi
.c
[k
] ^= in
[k
];
1410 (*stream
) (in
, out
, j
, key
, ctx
->Yi
.c
);
1411 ctr
+= (unsigned int)j
;
1412 if (IS_LITTLE_ENDIAN
)
1414 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1416 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1425 (*ctx
->block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1427 if (IS_LITTLE_ENDIAN
)
1429 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1431 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1437 out
[n
] = (ctx
->Xn
[mres
++] = in
[n
]) ^ ctx
->EKi
.c
[n
];
1440 ctx
->Xi
.c
[mres
++] ^= c
;
1441 out
[n
] = c
^ ctx
->EKi
.c
[n
];
1452 int CRYPTO_gcm128_finish(GCM128_CONTEXT
*ctx
, const unsigned char *tag
,
1456 u64 alen
= ctx
->len
.u
[0] << 3;
1457 u64 clen
= ctx
->len
.u
[1] << 3;
1459 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1461 unsigned int mres
= ctx
->mres
;
1464 unsigned blocks
= (mres
+ 15) & -16;
1466 memset(ctx
->Xn
+ mres
, 0, blocks
- mres
);
1468 if (mres
== sizeof(ctx
->Xn
)) {
1469 GHASH(ctx
, ctx
->Xn
, mres
);
1472 } else if (ctx
->ares
) {
1476 if (ctx
->mres
|| ctx
->ares
)
1480 if (IS_LITTLE_ENDIAN
) {
1482 alen
= BSWAP8(alen
);
1483 clen
= BSWAP8(clen
);
1487 ctx
->len
.u
[0] = alen
;
1488 ctx
->len
.u
[1] = clen
;
1490 alen
= (u64
)GETU32(p
) << 32 | GETU32(p
+ 4);
1491 clen
= (u64
)GETU32(p
+ 8) << 32 | GETU32(p
+ 12);
1495 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1498 memcpy(ctx
->Xn
+ mres
, &bitlen
, sizeof(bitlen
));
1499 mres
+= sizeof(bitlen
);
1500 GHASH(ctx
, ctx
->Xn
, mres
);
1502 ctx
->Xi
.u
[0] ^= alen
;
1503 ctx
->Xi
.u
[1] ^= clen
;
1507 ctx
->Xi
.u
[0] ^= ctx
->EK0
.u
[0];
1508 ctx
->Xi
.u
[1] ^= ctx
->EK0
.u
[1];
1510 if (tag
&& len
<= sizeof(ctx
->Xi
))
1511 return CRYPTO_memcmp(ctx
->Xi
.c
, tag
, len
);
1516 void CRYPTO_gcm128_tag(GCM128_CONTEXT
*ctx
, unsigned char *tag
, size_t len
)
1518 CRYPTO_gcm128_finish(ctx
, NULL
, 0);
1519 memcpy(tag
, ctx
->Xi
.c
,
1520 len
<= sizeof(ctx
->Xi
.c
) ? len
: sizeof(ctx
->Xi
.c
));
1523 GCM128_CONTEXT
*CRYPTO_gcm128_new(void *key
, block128_f block
)
1525 GCM128_CONTEXT
*ret
;
1527 if ((ret
= OPENSSL_malloc(sizeof(*ret
))) != NULL
)
1528 CRYPTO_gcm128_init(ret
, key
, block
);
1533 void CRYPTO_gcm128_release(GCM128_CONTEXT
*ctx
)
1535 OPENSSL_clear_free(ctx
, sizeof(*ctx
));