2 * Copyright 2010-2023 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
11 #include <openssl/crypto.h>
12 #include "internal/cryptlib.h"
13 #include "internal/endian.h"
14 #include "crypto/modes.h"
16 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17 typedef size_t size_t_aX
__attribute((__aligned__(1)));
19 typedef size_t size_t_aX
;
22 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23 /* redefine, because alignment is ensured */
25 # define GETU32(p) BSWAP4(*(const u32 *)(p))
27 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
30 /* RISC-V uses C implementation as a fallback. */
32 # define INCLUDE_C_GMULT_4BIT
33 # define INCLUDE_C_GHASH_4BIT
36 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
37 #define REDUCE1BIT(V) do { \
38 if (sizeof(size_t)==8) { \
39 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
40 V.lo = (V.hi<<63)|(V.lo>>1); \
41 V.hi = (V.hi>>1 )^T; \
44 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
45 V.lo = (V.hi<<63)|(V.lo>>1); \
46 V.hi = (V.hi>>1 )^((u64)T<<32); \
52 * NOTE: TABLE_BITS and all non-4bit implementations have been removed in 3.1.
54 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
55 * never be set to 8. 8 is effectively reserved for testing purposes.
56 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
57 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
58 * whole spectrum of possible table driven implementations. Why? In
59 * non-"Shoup's" case memory access pattern is segmented in such manner,
60 * that it's trivial to see that cache timing information can reveal
61 * fair portion of intermediate hash value. Given that ciphertext is
62 * always available to attacker, it's possible for him to attempt to
63 * deduce secret parameter H and if successful, tamper with messages
64 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
65 * not as trivial, but there is no reason to believe that it's resistant
66 * to cache-timing attack. And the thing about "8-bit" implementation is
67 * that it consumes 16 (sixteen) times more memory, 4KB per individual
68 * key + 1KB shared. Well, on pros side it should be twice as fast as
69 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
70 * was observed to run ~75% faster, closer to 100% for commercial
71 * compilers... Yet "4-bit" procedure is preferred, because it's
72 * believed to provide better security-performance balance and adequate
73 * all-round performance. "All-round" refers to things like:
75 * - shorter setup time effectively improves overall timing for
76 * handling short messages;
77 * - larger table allocation can become unbearable because of VM
78 * subsystem penalties (for example on Windows large enough free
79 * results in VM working set trimming, meaning that consequent
80 * malloc would immediately incur working set expansion);
81 * - larger table has larger cache footprint, which can affect
82 * performance of other code paths (not necessarily even from same
83 * thread in Hyper-Threading world);
85 * Value of 1 is not appropriate for performance reasons.
88 static void gcm_init_4bit(u128 Htable
[16], const u64 H
[2])
91 # if defined(OPENSSL_SMALL_FOOTPRINT)
100 # if defined(OPENSSL_SMALL_FOOTPRINT)
101 for (Htable
[8] = V
, i
= 4; i
> 0; i
>>= 1) {
106 for (i
= 2; i
< 16; i
<<= 1) {
107 u128
*Hi
= Htable
+ i
;
109 for (V
= *Hi
, j
= 1; j
< i
; ++j
) {
110 Hi
[j
].hi
= V
.hi
^ Htable
[j
].hi
;
111 Hi
[j
].lo
= V
.lo
^ Htable
[j
].lo
;
122 Htable
[3].hi
= V
.hi
^ Htable
[2].hi
, Htable
[3].lo
= V
.lo
^ Htable
[2].lo
;
124 Htable
[5].hi
= V
.hi
^ Htable
[1].hi
, Htable
[5].lo
= V
.lo
^ Htable
[1].lo
;
125 Htable
[6].hi
= V
.hi
^ Htable
[2].hi
, Htable
[6].lo
= V
.lo
^ Htable
[2].lo
;
126 Htable
[7].hi
= V
.hi
^ Htable
[3].hi
, Htable
[7].lo
= V
.lo
^ Htable
[3].lo
;
128 Htable
[9].hi
= V
.hi
^ Htable
[1].hi
, Htable
[9].lo
= V
.lo
^ Htable
[1].lo
;
129 Htable
[10].hi
= V
.hi
^ Htable
[2].hi
, Htable
[10].lo
= V
.lo
^ Htable
[2].lo
;
130 Htable
[11].hi
= V
.hi
^ Htable
[3].hi
, Htable
[11].lo
= V
.lo
^ Htable
[3].lo
;
131 Htable
[12].hi
= V
.hi
^ Htable
[4].hi
, Htable
[12].lo
= V
.lo
^ Htable
[4].lo
;
132 Htable
[13].hi
= V
.hi
^ Htable
[5].hi
, Htable
[13].lo
= V
.lo
^ Htable
[5].lo
;
133 Htable
[14].hi
= V
.hi
^ Htable
[6].hi
, Htable
[14].lo
= V
.lo
^ Htable
[6].lo
;
134 Htable
[15].hi
= V
.hi
^ Htable
[7].hi
, Htable
[15].lo
= V
.lo
^ Htable
[7].lo
;
136 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
138 * ARM assembler expects specific dword order in Htable.
144 if (IS_LITTLE_ENDIAN
)
145 for (j
= 0; j
< 16; ++j
) {
150 for (j
= 0; j
< 16; ++j
) {
152 Htable
[j
].hi
= V
.lo
<< 32 | V
.lo
>> 32;
153 Htable
[j
].lo
= V
.hi
<< 32 | V
.hi
>> 32;
159 # if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
160 static const size_t rem_4bit
[16] = {
161 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
162 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
163 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
164 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
167 static void gcm_gmult_4bit(u64 Xi
[2], const u128 Htable
[16])
171 size_t rem
, nlo
, nhi
;
174 nlo
= ((const u8
*)Xi
)[15];
178 Z
.hi
= Htable
[nlo
].hi
;
179 Z
.lo
= Htable
[nlo
].lo
;
182 rem
= (size_t)Z
.lo
& 0xf;
183 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
185 if (sizeof(size_t) == 8)
186 Z
.hi
^= rem_4bit
[rem
];
188 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
190 Z
.hi
^= Htable
[nhi
].hi
;
191 Z
.lo
^= Htable
[nhi
].lo
;
196 nlo
= ((const u8
*)Xi
)[cnt
];
200 rem
= (size_t)Z
.lo
& 0xf;
201 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
203 if (sizeof(size_t) == 8)
204 Z
.hi
^= rem_4bit
[rem
];
206 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
208 Z
.hi
^= Htable
[nlo
].hi
;
209 Z
.lo
^= Htable
[nlo
].lo
;
212 if (IS_LITTLE_ENDIAN
) {
214 Xi
[0] = BSWAP8(Z
.hi
);
215 Xi
[1] = BSWAP8(Z
.lo
);
219 v
= (u32
)(Z
.hi
>> 32);
223 v
= (u32
)(Z
.lo
>> 32);
236 # if !defined(GHASH_ASM) || defined(INCLUDE_C_GHASH_4BIT)
237 # if !defined(OPENSSL_SMALL_FOOTPRINT)
239 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
240 * details... Compiler-generated code doesn't seem to give any
241 * performance improvement, at least not on x86[_64]. It's here
242 * mostly as reference and a placeholder for possible future
243 * non-trivial optimization[s]...
245 static void gcm_ghash_4bit(u64 Xi
[2], const u128 Htable
[16],
246 const u8
*inp
, size_t len
)
250 size_t rem
, nlo
, nhi
;
255 nlo
= ((const u8
*)Xi
)[15];
260 Z
.hi
= Htable
[nlo
].hi
;
261 Z
.lo
= Htable
[nlo
].lo
;
264 rem
= (size_t)Z
.lo
& 0xf;
265 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
267 if (sizeof(size_t) == 8)
268 Z
.hi
^= rem_4bit
[rem
];
270 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
272 Z
.hi
^= Htable
[nhi
].hi
;
273 Z
.lo
^= Htable
[nhi
].lo
;
278 nlo
= ((const u8
*)Xi
)[cnt
];
283 rem
= (size_t)Z
.lo
& 0xf;
284 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
286 if (sizeof(size_t) == 8)
287 Z
.hi
^= rem_4bit
[rem
];
289 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
291 Z
.hi
^= Htable
[nlo
].hi
;
292 Z
.lo
^= Htable
[nlo
].lo
;
295 if (IS_LITTLE_ENDIAN
) {
297 Xi
[0] = BSWAP8(Z
.hi
);
298 Xi
[1] = BSWAP8(Z
.lo
);
302 v
= (u32
)(Z
.hi
>> 32);
306 v
= (u32
)(Z
.lo
>> 32);
317 /* Block size is 128 bits so len is a multiple of 16 */
323 void gcm_gmult_4bit(u64 Xi
[2], const u128 Htable
[16]);
324 void gcm_ghash_4bit(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
328 # define GCM_MUL(ctx) ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
329 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
330 # define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
332 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
333 * effect. In other words idea is to hash data while it's still in L1 cache
334 * after encryption pass...
336 # define GHASH_CHUNK (3*1024)
339 #if (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
340 # if !defined(I386_ONLY) && \
341 (defined(__i386) || defined(__i386__) || \
342 defined(__x86_64) || defined(__x86_64__) || \
343 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
344 # define GHASH_ASM_X86_OR_64
346 void gcm_init_clmul(u128 Htable
[16], const u64 Xi
[2]);
347 void gcm_gmult_clmul(u64 Xi
[2], const u128 Htable
[16]);
348 void gcm_ghash_clmul(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
351 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
352 # define gcm_init_avx gcm_init_clmul
353 # define gcm_gmult_avx gcm_gmult_clmul
354 # define gcm_ghash_avx gcm_ghash_clmul
356 void gcm_init_avx(u128 Htable
[16], const u64 Xi
[2]);
357 void gcm_gmult_avx(u64 Xi
[2], const u128 Htable
[16]);
358 void gcm_ghash_avx(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
362 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
363 # define GHASH_ASM_X86
364 void gcm_gmult_4bit_mmx(u64 Xi
[2], const u128 Htable
[16]);
365 void gcm_ghash_4bit_mmx(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
368 void gcm_gmult_4bit_x86(u64 Xi
[2], const u128 Htable
[16]);
369 void gcm_ghash_4bit_x86(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
372 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
373 # include "arm_arch.h"
374 # if __ARM_MAX_ARCH__>=7
375 # define GHASH_ASM_ARM
376 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
377 # if defined(__arm__) || defined(__arm)
378 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
380 void gcm_init_neon(u128 Htable
[16], const u64 Xi
[2]);
381 void gcm_gmult_neon(u64 Xi
[2], const u128 Htable
[16]);
382 void gcm_ghash_neon(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
384 void gcm_init_v8(u128 Htable
[16], const u64 Xi
[2]);
385 void gcm_gmult_v8(u64 Xi
[2], const u128 Htable
[16]);
386 void gcm_ghash_v8(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
389 # elif defined(__sparc__) || defined(__sparc)
390 # include "crypto/sparc_arch.h"
391 # define GHASH_ASM_SPARC
392 void gcm_init_vis3(u128 Htable
[16], const u64 Xi
[2]);
393 void gcm_gmult_vis3(u64 Xi
[2], const u128 Htable
[16]);
394 void gcm_ghash_vis3(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
396 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
397 # include "crypto/ppc_arch.h"
398 # define GHASH_ASM_PPC
399 void gcm_init_p8(u128 Htable
[16], const u64 Xi
[2]);
400 void gcm_gmult_p8(u64 Xi
[2], const u128 Htable
[16]);
401 void gcm_ghash_p8(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
403 # elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
404 # include "crypto/riscv_arch.h"
405 # define GHASH_ASM_RV64I
406 /* Zbc/Zbkc (scalar crypto with clmul) based routines. */
407 void gcm_init_rv64i_zbc(u128 Htable
[16], const u64 Xi
[2]);
408 void gcm_init_rv64i_zbc__zbb(u128 Htable
[16], const u64 Xi
[2]);
409 void gcm_init_rv64i_zbc__zbkb(u128 Htable
[16], const u64 Xi
[2]);
410 void gcm_gmult_rv64i_zbc(u64 Xi
[2], const u128 Htable
[16]);
411 void gcm_gmult_rv64i_zbc__zbkb(u64 Xi
[2], const u128 Htable
[16]);
412 void gcm_ghash_rv64i_zbc(u64 Xi
[2], const u128 Htable
[16],
413 const u8
*inp
, size_t len
);
414 void gcm_ghash_rv64i_zbc__zbkb(u64 Xi
[2], const u128 Htable
[16],
415 const u8
*inp
, size_t len
);
419 static void gcm_get_funcs(struct gcm_funcs_st
*ctx
)
421 /* set defaults -- overridden below as needed */
422 ctx
->ginit
= gcm_init_4bit
;
423 #if !defined(GHASH_ASM)
424 ctx
->gmult
= gcm_gmult_4bit
;
428 #if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
429 ctx
->ghash
= gcm_ghash_4bit
;
434 #if defined(GHASH_ASM_X86_OR_64)
435 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
437 if (OPENSSL_ia32cap_P
[1] & (1 << 1)) { /* check PCLMULQDQ bit */
438 if (((OPENSSL_ia32cap_P
[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
439 ctx
->ginit
= gcm_init_avx
;
440 ctx
->gmult
= gcm_gmult_avx
;
441 ctx
->ghash
= gcm_ghash_avx
;
443 ctx
->ginit
= gcm_init_clmul
;
444 ctx
->gmult
= gcm_gmult_clmul
;
445 ctx
->ghash
= gcm_ghash_clmul
;
450 # if defined(GHASH_ASM_X86)
452 # if defined(OPENSSL_IA32_SSE2)
453 if (OPENSSL_ia32cap_P
[0] & (1 << 25)) { /* check SSE bit */
454 ctx
->gmult
= gcm_gmult_4bit_mmx
;
455 ctx
->ghash
= gcm_ghash_4bit_mmx
;
459 if (OPENSSL_ia32cap_P
[0] & (1 << 23)) { /* check MMX bit */
460 ctx
->gmult
= gcm_gmult_4bit_mmx
;
461 ctx
->ghash
= gcm_ghash_4bit_mmx
;
465 ctx
->gmult
= gcm_gmult_4bit_x86
;
466 ctx
->ghash
= gcm_ghash_4bit_x86
;
469 /* x86_64 fallback defaults */
470 ctx
->gmult
= gcm_gmult_4bit
;
471 ctx
->ghash
= gcm_ghash_4bit
;
474 #elif defined(GHASH_ASM_ARM)
476 ctx
->gmult
= gcm_gmult_4bit
;
477 ctx
->ghash
= gcm_ghash_4bit
;
478 # ifdef PMULL_CAPABLE
480 ctx
->ginit
= (gcm_init_fn
)gcm_init_v8
;
481 ctx
->gmult
= gcm_gmult_v8
;
482 ctx
->ghash
= gcm_ghash_v8
;
484 # elif defined(NEON_CAPABLE)
486 ctx
->ginit
= gcm_init_neon
;
487 ctx
->gmult
= gcm_gmult_neon
;
488 ctx
->ghash
= gcm_ghash_neon
;
492 #elif defined(GHASH_ASM_SPARC)
494 ctx
->gmult
= gcm_gmult_4bit
;
495 ctx
->ghash
= gcm_ghash_4bit
;
496 if (OPENSSL_sparcv9cap_P
[0] & SPARCV9_VIS3
) {
497 ctx
->ginit
= gcm_init_vis3
;
498 ctx
->gmult
= gcm_gmult_vis3
;
499 ctx
->ghash
= gcm_ghash_vis3
;
502 #elif defined(GHASH_ASM_PPC)
503 /* PowerPC does not define GHASH_ASM; defaults set above */
504 if (OPENSSL_ppccap_P
& PPC_CRYPTO207
) {
505 ctx
->ginit
= gcm_init_p8
;
506 ctx
->gmult
= gcm_gmult_p8
;
507 ctx
->ghash
= gcm_ghash_p8
;
510 #elif defined(GHASH_ASM_RV64I)
512 ctx
->gmult
= gcm_gmult_4bit
;
513 ctx
->ghash
= gcm_ghash_4bit
;
515 if (RISCV_HAS_ZBC()) {
516 if (RISCV_HAS_ZBKB()) {
517 ctx
->ginit
= gcm_init_rv64i_zbc__zbkb
;
518 ctx
->gmult
= gcm_gmult_rv64i_zbc__zbkb
;
519 ctx
->ghash
= gcm_ghash_rv64i_zbc__zbkb
;
520 } else if (RISCV_HAS_ZBB()) {
521 ctx
->ginit
= gcm_init_rv64i_zbc__zbb
;
522 ctx
->gmult
= gcm_gmult_rv64i_zbc
;
523 ctx
->ghash
= gcm_ghash_rv64i_zbc
;
525 ctx
->ginit
= gcm_init_rv64i_zbc
;
526 ctx
->gmult
= gcm_gmult_rv64i_zbc
;
527 ctx
->ghash
= gcm_ghash_rv64i_zbc
;
531 #elif defined(GHASH_ASM)
532 /* all other architectures use the generic names */
533 ctx
->gmult
= gcm_gmult_4bit
;
534 ctx
->ghash
= gcm_ghash_4bit
;
539 void ossl_gcm_init_4bit(u128 Htable
[16], const u64 H
[2])
541 struct gcm_funcs_st funcs
;
543 gcm_get_funcs(&funcs
);
544 funcs
.ginit(Htable
, H
);
547 void ossl_gcm_gmult_4bit(u64 Xi
[2], const u128 Htable
[16])
549 struct gcm_funcs_st funcs
;
551 gcm_get_funcs(&funcs
);
552 funcs
.gmult(Xi
, Htable
);
555 void ossl_gcm_ghash_4bit(u64 Xi
[2], const u128 Htable
[16],
556 const u8
*inp
, size_t len
)
558 struct gcm_funcs_st funcs
;
562 gcm_get_funcs(&funcs
);
563 if (funcs
.ghash
!= NULL
) {
564 funcs
.ghash(Xi
, Htable
, inp
, len
);
566 /* Emulate ghash if needed */
567 for (i
= 0; i
< len
; i
+= 16) {
568 memcpy(tmp
, &inp
[i
], sizeof(tmp
));
571 funcs
.gmult(Xi
, Htable
);
576 void CRYPTO_gcm128_init(GCM128_CONTEXT
*ctx
, void *key
, block128_f block
)
580 memset(ctx
, 0, sizeof(*ctx
));
584 (*block
) (ctx
->H
.c
, ctx
->H
.c
, key
);
586 if (IS_LITTLE_ENDIAN
) {
587 /* H is stored in host byte order */
589 ctx
->H
.u
[0] = BSWAP8(ctx
->H
.u
[0]);
590 ctx
->H
.u
[1] = BSWAP8(ctx
->H
.u
[1]);
594 hi
= (u64
)GETU32(p
) << 32 | GETU32(p
+ 4);
595 lo
= (u64
)GETU32(p
+ 8) << 32 | GETU32(p
+ 12);
601 gcm_get_funcs(&ctx
->funcs
);
602 ctx
->funcs
.ginit(ctx
->Htable
, ctx
->H
.u
);
605 void CRYPTO_gcm128_setiv(GCM128_CONTEXT
*ctx
, const unsigned char *iv
,
611 ctx
->len
.u
[0] = 0; /* AAD length */
612 ctx
->len
.u
[1] = 0; /* message length */
617 memcpy(ctx
->Yi
.c
, iv
, 12);
627 /* Borrow ctx->Xi to calculate initial Yi */
632 for (i
= 0; i
< 16; ++i
)
633 ctx
->Xi
.c
[i
] ^= iv
[i
];
639 for (i
= 0; i
< len
; ++i
)
640 ctx
->Xi
.c
[i
] ^= iv
[i
];
644 if (IS_LITTLE_ENDIAN
) {
646 ctx
->Xi
.u
[1] ^= BSWAP8(len0
);
648 ctx
->Xi
.c
[8] ^= (u8
)(len0
>> 56);
649 ctx
->Xi
.c
[9] ^= (u8
)(len0
>> 48);
650 ctx
->Xi
.c
[10] ^= (u8
)(len0
>> 40);
651 ctx
->Xi
.c
[11] ^= (u8
)(len0
>> 32);
652 ctx
->Xi
.c
[12] ^= (u8
)(len0
>> 24);
653 ctx
->Xi
.c
[13] ^= (u8
)(len0
>> 16);
654 ctx
->Xi
.c
[14] ^= (u8
)(len0
>> 8);
655 ctx
->Xi
.c
[15] ^= (u8
)(len0
);
658 ctx
->Xi
.u
[1] ^= len0
;
663 if (IS_LITTLE_ENDIAN
)
665 ctr
= BSWAP4(ctx
->Xi
.d
[3]);
667 ctr
= GETU32(ctx
->Xi
.c
+ 12);
672 /* Copy borrowed Xi to Yi */
673 ctx
->Yi
.u
[0] = ctx
->Xi
.u
[0];
674 ctx
->Yi
.u
[1] = ctx
->Xi
.u
[1];
680 (*ctx
->block
) (ctx
->Yi
.c
, ctx
->EK0
.c
, ctx
->key
);
682 if (IS_LITTLE_ENDIAN
)
684 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
686 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
692 int CRYPTO_gcm128_aad(GCM128_CONTEXT
*ctx
, const unsigned char *aad
,
697 u64 alen
= ctx
->len
.u
[0];
703 if (alen
> (U64(1) << 61) || (sizeof(len
) == 8 && alen
< len
))
705 ctx
->len
.u
[0] = alen
;
710 ctx
->Xi
.c
[n
] ^= *(aad
++);
722 if ((i
= (len
& (size_t)-16))) {
729 for (i
= 0; i
< 16; ++i
)
730 ctx
->Xi
.c
[i
] ^= aad
[i
];
737 n
= (unsigned int)len
;
738 for (i
= 0; i
< len
; ++i
)
739 ctx
->Xi
.c
[i
] ^= aad
[i
];
746 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT
*ctx
,
747 const unsigned char *in
, unsigned char *out
,
751 unsigned int n
, ctr
, mres
;
753 u64 mlen
= ctx
->len
.u
[1];
754 block128_f block
= ctx
->block
;
755 void *key
= ctx
->key
;
758 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
760 ctx
->len
.u
[1] = mlen
;
765 /* First call to encrypt finalizes GHASH(AAD) */
766 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
772 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
775 mres
= sizeof(ctx
->Xi
);
782 if (IS_LITTLE_ENDIAN
)
784 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
786 ctr
= GETU32(ctx
->Yi
.c
+ 12);
792 #if !defined(OPENSSL_SMALL_FOOTPRINT)
793 if (16 % sizeof(size_t) == 0) { /* always true actually */
798 ctx
->Xn
[mres
++] = *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
803 GHASH(ctx
, ctx
->Xn
, mres
);
811 ctx
->Xi
.c
[n
] ^= *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
824 # if defined(STRICT_ALIGNMENT)
825 if (((size_t)in
| (size_t)out
) % sizeof(size_t) != 0)
829 if (len
>= 16 && mres
) {
830 GHASH(ctx
, ctx
->Xn
, mres
);
833 # if defined(GHASH_CHUNK)
834 while (len
>= GHASH_CHUNK
) {
835 size_t j
= GHASH_CHUNK
;
838 size_t_aX
*out_t
= (size_t_aX
*)out
;
839 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
841 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
843 if (IS_LITTLE_ENDIAN
)
845 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
847 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
851 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
852 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
857 GHASH(ctx
, out
- GHASH_CHUNK
, GHASH_CHUNK
);
861 if ((i
= (len
& (size_t)-16))) {
865 size_t_aX
*out_t
= (size_t_aX
*)out
;
866 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
868 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
870 if (IS_LITTLE_ENDIAN
)
872 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
874 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
878 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
879 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
884 GHASH(ctx
, out
- j
, j
);
888 size_t *out_t
= (size_t *)out
;
889 const size_t *in_t
= (const size_t *)in
;
891 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
893 if (IS_LITTLE_ENDIAN
)
895 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
897 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
901 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
902 ctx
->Xi
.t
[i
] ^= out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
910 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
912 if (IS_LITTLE_ENDIAN
)
914 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
916 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
922 ctx
->Xn
[mres
++] = out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
927 ctx
->Xi
.c
[n
] ^= out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
939 for (i
= 0; i
< len
; ++i
) {
941 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
943 if (IS_LITTLE_ENDIAN
)
945 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
947 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
952 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
953 ctx
->Xn
[mres
++] = out
[i
] = in
[i
] ^ ctx
->EKi
.c
[n
];
955 if (mres
== sizeof(ctx
->Xn
)) {
956 GHASH(ctx
,ctx
->Xn
,sizeof(ctx
->Xn
));
960 ctx
->Xi
.c
[n
] ^= out
[i
] = in
[i
] ^ ctx
->EKi
.c
[n
];
961 mres
= n
= (n
+ 1) % 16;
971 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT
*ctx
,
972 const unsigned char *in
, unsigned char *out
,
976 unsigned int n
, ctr
, mres
;
978 u64 mlen
= ctx
->len
.u
[1];
979 block128_f block
= ctx
->block
;
980 void *key
= ctx
->key
;
983 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
985 ctx
->len
.u
[1] = mlen
;
990 /* First call to decrypt finalizes GHASH(AAD) */
991 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
997 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
1000 mres
= sizeof(ctx
->Xi
);
1007 if (IS_LITTLE_ENDIAN
)
1009 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1011 ctr
= GETU32(ctx
->Yi
.c
+ 12);
1017 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1018 if (16 % sizeof(size_t) == 0) { /* always true actually */
1023 *(out
++) = (ctx
->Xn
[mres
++] = *(in
++)) ^ ctx
->EKi
.c
[n
];
1028 GHASH(ctx
, ctx
->Xn
, mres
);
1037 *(out
++) = c
^ ctx
->EKi
.c
[n
];
1051 # if defined(STRICT_ALIGNMENT)
1052 if (((size_t)in
| (size_t)out
) % sizeof(size_t) != 0)
1056 if (len
>= 16 && mres
) {
1057 GHASH(ctx
, ctx
->Xn
, mres
);
1060 # if defined(GHASH_CHUNK)
1061 while (len
>= GHASH_CHUNK
) {
1062 size_t j
= GHASH_CHUNK
;
1064 GHASH(ctx
, in
, GHASH_CHUNK
);
1066 size_t_aX
*out_t
= (size_t_aX
*)out
;
1067 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
1069 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1071 if (IS_LITTLE_ENDIAN
)
1073 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1075 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1079 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
1080 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
1088 if ((i
= (len
& (size_t)-16))) {
1091 size_t_aX
*out_t
= (size_t_aX
*)out
;
1092 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
1094 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1096 if (IS_LITTLE_ENDIAN
)
1098 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1100 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1104 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
1105 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
1113 size_t *out_t
= (size_t *)out
;
1114 const size_t *in_t
= (const size_t *)in
;
1116 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1118 if (IS_LITTLE_ENDIAN
)
1120 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1122 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1126 for (i
= 0; i
< 16 / sizeof(size_t); ++i
) {
1128 out_t
[i
] = c
^ ctx
->EKi
.t
[i
];
1138 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1140 if (IS_LITTLE_ENDIAN
)
1142 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1144 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1150 out
[n
] = (ctx
->Xn
[mres
++] = in
[n
]) ^ ctx
->EKi
.c
[n
];
1157 out
[n
] = c
^ ctx
->EKi
.c
[n
];
1169 for (i
= 0; i
< len
; ++i
) {
1172 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1174 if (IS_LITTLE_ENDIAN
)
1176 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1178 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1183 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1184 out
[i
] = (ctx
->Xn
[mres
++] = c
= in
[i
]) ^ ctx
->EKi
.c
[n
];
1186 if (mres
== sizeof(ctx
->Xn
)) {
1187 GHASH(ctx
,ctx
->Xn
,sizeof(ctx
->Xn
));
1192 out
[i
] = c
^ ctx
->EKi
.c
[n
];
1194 mres
= n
= (n
+ 1) % 16;
1204 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT
*ctx
,
1205 const unsigned char *in
, unsigned char *out
,
1206 size_t len
, ctr128_f stream
)
1208 #if defined(OPENSSL_SMALL_FOOTPRINT)
1209 return CRYPTO_gcm128_encrypt(ctx
, in
, out
, len
);
1212 unsigned int n
, ctr
, mres
;
1214 u64 mlen
= ctx
->len
.u
[1];
1215 void *key
= ctx
->key
;
1218 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
1220 ctx
->len
.u
[1] = mlen
;
1225 /* First call to encrypt finalizes GHASH(AAD) */
1232 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
1235 mres
= sizeof(ctx
->Xi
);
1242 if (IS_LITTLE_ENDIAN
)
1244 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1246 ctr
= GETU32(ctx
->Yi
.c
+ 12);
1255 ctx
->Xn
[mres
++] = *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
1260 GHASH(ctx
, ctx
->Xn
, mres
);
1268 ctx
->Xi
.c
[n
] ^= *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
1282 if (len
>= 16 && mres
) {
1283 GHASH(ctx
, ctx
->Xn
, mres
);
1286 # if defined(GHASH_CHUNK)
1287 while (len
>= GHASH_CHUNK
) {
1288 (*stream
) (in
, out
, GHASH_CHUNK
/ 16, key
, ctx
->Yi
.c
);
1289 ctr
+= GHASH_CHUNK
/ 16;
1290 if (IS_LITTLE_ENDIAN
)
1292 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1294 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1298 GHASH(ctx
, out
, GHASH_CHUNK
);
1305 if ((i
= (len
& (size_t)-16))) {
1308 (*stream
) (in
, out
, j
, key
, ctx
->Yi
.c
);
1309 ctr
+= (unsigned int)j
;
1310 if (IS_LITTLE_ENDIAN
)
1312 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1314 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1325 for (i
= 0; i
< 16; ++i
)
1326 ctx
->Xi
.c
[i
] ^= out
[i
];
1333 (*ctx
->block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1335 if (IS_LITTLE_ENDIAN
)
1337 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1339 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1345 ctx
->Xn
[mres
++] = out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
1347 ctx
->Xi
.c
[mres
++] ^= out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
1358 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT
*ctx
,
1359 const unsigned char *in
, unsigned char *out
,
1360 size_t len
, ctr128_f stream
)
1362 #if defined(OPENSSL_SMALL_FOOTPRINT)
1363 return CRYPTO_gcm128_decrypt(ctx
, in
, out
, len
);
1366 unsigned int n
, ctr
, mres
;
1368 u64 mlen
= ctx
->len
.u
[1];
1369 void *key
= ctx
->key
;
1372 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
1374 ctx
->len
.u
[1] = mlen
;
1379 /* First call to decrypt finalizes GHASH(AAD) */
1386 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
1389 mres
= sizeof(ctx
->Xi
);
1396 if (IS_LITTLE_ENDIAN
)
1398 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1400 ctr
= GETU32(ctx
->Yi
.c
+ 12);
1409 *(out
++) = (ctx
->Xn
[mres
++] = *(in
++)) ^ ctx
->EKi
.c
[n
];
1414 GHASH(ctx
, ctx
->Xn
, mres
);
1423 *(out
++) = c
^ ctx
->EKi
.c
[n
];
1438 if (len
>= 16 && mres
) {
1439 GHASH(ctx
, ctx
->Xn
, mres
);
1442 # if defined(GHASH_CHUNK)
1443 while (len
>= GHASH_CHUNK
) {
1444 GHASH(ctx
, in
, GHASH_CHUNK
);
1445 (*stream
) (in
, out
, GHASH_CHUNK
/ 16, key
, ctx
->Yi
.c
);
1446 ctr
+= GHASH_CHUNK
/ 16;
1447 if (IS_LITTLE_ENDIAN
)
1449 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1451 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1461 if ((i
= (len
& (size_t)-16))) {
1469 for (k
= 0; k
< 16; ++k
)
1470 ctx
->Xi
.c
[k
] ^= in
[k
];
1477 (*stream
) (in
, out
, j
, key
, ctx
->Yi
.c
);
1478 ctr
+= (unsigned int)j
;
1479 if (IS_LITTLE_ENDIAN
)
1481 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1483 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1492 (*ctx
->block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1494 if (IS_LITTLE_ENDIAN
)
1496 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1498 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1504 out
[n
] = (ctx
->Xn
[mres
++] = in
[n
]) ^ ctx
->EKi
.c
[n
];
1507 ctx
->Xi
.c
[mres
++] ^= c
;
1508 out
[n
] = c
^ ctx
->EKi
.c
[n
];
1519 int CRYPTO_gcm128_finish(GCM128_CONTEXT
*ctx
, const unsigned char *tag
,
1523 u64 alen
= ctx
->len
.u
[0] << 3;
1524 u64 clen
= ctx
->len
.u
[1] << 3;
1526 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1528 unsigned int mres
= ctx
->mres
;
1531 unsigned blocks
= (mres
+ 15) & -16;
1533 memset(ctx
->Xn
+ mres
, 0, blocks
- mres
);
1535 if (mres
== sizeof(ctx
->Xn
)) {
1536 GHASH(ctx
, ctx
->Xn
, mres
);
1539 } else if (ctx
->ares
) {
1543 if (ctx
->mres
|| ctx
->ares
)
1547 if (IS_LITTLE_ENDIAN
) {
1549 alen
= BSWAP8(alen
);
1550 clen
= BSWAP8(clen
);
1554 ctx
->len
.u
[0] = alen
;
1555 ctx
->len
.u
[1] = clen
;
1557 alen
= (u64
)GETU32(p
) << 32 | GETU32(p
+ 4);
1558 clen
= (u64
)GETU32(p
+ 8) << 32 | GETU32(p
+ 12);
1562 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1565 memcpy(ctx
->Xn
+ mres
, &bitlen
, sizeof(bitlen
));
1566 mres
+= sizeof(bitlen
);
1567 GHASH(ctx
, ctx
->Xn
, mres
);
1569 ctx
->Xi
.u
[0] ^= alen
;
1570 ctx
->Xi
.u
[1] ^= clen
;
1574 ctx
->Xi
.u
[0] ^= ctx
->EK0
.u
[0];
1575 ctx
->Xi
.u
[1] ^= ctx
->EK0
.u
[1];
1577 if (tag
&& len
<= sizeof(ctx
->Xi
))
1578 return CRYPTO_memcmp(ctx
->Xi
.c
, tag
, len
);
1583 void CRYPTO_gcm128_tag(GCM128_CONTEXT
*ctx
, unsigned char *tag
, size_t len
)
1585 CRYPTO_gcm128_finish(ctx
, NULL
, 0);
1586 memcpy(tag
, ctx
->Xi
.c
,
1587 len
<= sizeof(ctx
->Xi
.c
) ? len
: sizeof(ctx
->Xi
.c
));
1590 GCM128_CONTEXT
*CRYPTO_gcm128_new(void *key
, block128_f block
)
1592 GCM128_CONTEXT
*ret
;
1594 if ((ret
= OPENSSL_malloc(sizeof(*ret
))) != NULL
)
1595 CRYPTO_gcm128_init(ret
, key
, block
);
1600 void CRYPTO_gcm128_release(GCM128_CONTEXT
*ctx
)
1602 OPENSSL_clear_free(ctx
, sizeof(*ctx
));