]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/gcm128.c
Fix regression from GCM mode refactoring
[thirdparty/openssl.git] / crypto / modes / gcm128.c
CommitLineData
4f22f405 1/*
999376dc 2 * Copyright 2010-2022 The OpenSSL Project Authors. All Rights Reserved.
e7f5b1cd 3 *
81cae8ce 4 * Licensed under the Apache License 2.0 (the "License"). You may not use
4f22f405
RS
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
e7f5b1cd
AP
8 */
9
e7f5b1cd 10#include <string.h>
459b15d4 11#include <openssl/crypto.h>
24fd8541 12#include "internal/cryptlib.h"
e23d850f 13#include "internal/endian.h"
25f2138b 14#include "crypto/modes.h"
e7f5b1cd 15
77286fe3
BE
16#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17typedef size_t size_t_aX __attribute((__aligned__(1)));
18#else
19typedef size_t size_t_aX;
20#endif
21
f472ec8c
AP
22#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23/* redefine, because alignment is ensured */
0f113f3e
MC
24# undef GETU32
25# define GETU32(p) BSWAP4(*(const u32 *)(p))
26# undef PUTU32
27# define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
28#endif
29
999376dc
HB
30/* RISC-V uses C implementation of gmult as a fallback. */
31#if defined(__riscv)
32# define INCLUDE_C_GMULT_4BIT
33#endif
34
0f113f3e
MC
35#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
36#define REDUCE1BIT(V) do { \
37 if (sizeof(size_t)==8) { \
38 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
39 V.lo = (V.hi<<63)|(V.lo>>1); \
40 V.hi = (V.hi>>1 )^T; \
41 } \
42 else { \
43 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
44 V.lo = (V.hi<<63)|(V.lo>>1); \
45 V.hi = (V.hi>>1 )^((u64)T<<32); \
46 } \
c1f092d1
AP
47} while(0)
48
1d97c843 49/*-
7b6e19fc
TS
50 *
51 * NOTE: TABLE_BITS and all non-4bit implmentations have been removed in 3.1.
52 *
d8d95832
AP
53 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
54 * never be set to 8. 8 is effectively reserved for testing purposes.
55 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
56 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
57 * whole spectrum of possible table driven implementations. Why? In
58 * non-"Shoup's" case memory access pattern is segmented in such manner,
59 * that it's trivial to see that cache timing information can reveal
60 * fair portion of intermediate hash value. Given that ciphertext is
61 * always available to attacker, it's possible for him to attempt to
62 * deduce secret parameter H and if successful, tamper with messages
63 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
64 * not as trivial, but there is no reason to believe that it's resistant
65 * to cache-timing attack. And the thing about "8-bit" implementation is
66 * that it consumes 16 (sixteen) times more memory, 4KB per individual
67 * key + 1KB shared. Well, on pros side it should be twice as fast as
68 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
69 * was observed to run ~75% faster, closer to 100% for commercial
70 * compilers... Yet "4-bit" procedure is preferred, because it's
71 * believed to provide better security-performance balance and adequate
72 * all-round performance. "All-round" refers to things like:
73 *
74 * - shorter setup time effectively improves overall timing for
75 * handling short messages;
76 * - larger table allocation can become unbearable because of VM
77 * subsystem penalties (for example on Windows large enough free
78 * results in VM working set trimming, meaning that consequent
79 * malloc would immediately incur working set expansion);
80 * - larger table has larger cache footprint, which can affect
81 * performance of other code paths (not necessarily even from same
82 * thread in Hyper-Threading world);
83 *
84 * Value of 1 is not appropriate for performance reasons.
85 */
a595baff 86
92c9086e 87static void gcm_init_4bit(u128 Htable[16], const u64 H[2])
e7f5b1cd 88{
0f113f3e
MC
89 u128 V;
90# if defined(OPENSSL_SMALL_FOOTPRINT)
91 int i;
92# endif
e7f5b1cd 93
0f113f3e
MC
94 Htable[0].hi = 0;
95 Htable[0].lo = 0;
96 V.hi = H[0];
97 V.lo = H[1];
98
99# if defined(OPENSSL_SMALL_FOOTPRINT)
100 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
101 REDUCE1BIT(V);
102 Htable[i] = V;
103 }
104
105 for (i = 2; i < 16; i <<= 1) {
106 u128 *Hi = Htable + i;
107 int j;
108 for (V = *Hi, j = 1; j < i; ++j) {
109 Hi[j].hi = V.hi ^ Htable[j].hi;
110 Hi[j].lo = V.lo ^ Htable[j].lo;
111 }
112 }
113# else
114 Htable[8] = V;
115 REDUCE1BIT(V);
116 Htable[4] = V;
117 REDUCE1BIT(V);
118 Htable[2] = V;
119 REDUCE1BIT(V);
120 Htable[1] = V;
121 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
122 V = Htable[4];
123 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
124 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
125 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
126 V = Htable[8];
127 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
128 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
129 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
130 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
131 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
132 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
133 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
134# endif
135# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
136 /*
137 * ARM assembler expects specific dword order in Htable.
138 */
139 {
140 int j;
e23d850f 141 DECLARE_IS_ENDIAN;
0f113f3e 142
e23d850f 143 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
144 for (j = 0; j < 16; ++j) {
145 V = Htable[j];
146 Htable[j].hi = V.lo;
147 Htable[j].lo = V.hi;
148 } else
149 for (j = 0; j < 16; ++j) {
150 V = Htable[j];
151 Htable[j].hi = V.lo << 32 | V.lo >> 32;
152 Htable[j].lo = V.hi << 32 | V.hi >> 32;
153 }
154 }
155# endif
e7f5b1cd
AP
156}
157
999376dc 158# if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
2262beef 159static const size_t rem_4bit[16] = {
0f113f3e
MC
160 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
161 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
162 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
163 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
164};
2262beef 165
4f39edbf 166static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
e7f5b1cd 167{
0f113f3e
MC
168 u128 Z;
169 int cnt = 15;
170 size_t rem, nlo, nhi;
e23d850f 171 DECLARE_IS_ENDIAN;
0f113f3e
MC
172
173 nlo = ((const u8 *)Xi)[15];
174 nhi = nlo >> 4;
175 nlo &= 0xf;
176
177 Z.hi = Htable[nlo].hi;
178 Z.lo = Htable[nlo].lo;
179
180 while (1) {
181 rem = (size_t)Z.lo & 0xf;
182 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
183 Z.hi = (Z.hi >> 4);
184 if (sizeof(size_t) == 8)
185 Z.hi ^= rem_4bit[rem];
186 else
187 Z.hi ^= (u64)rem_4bit[rem] << 32;
188
189 Z.hi ^= Htable[nhi].hi;
190 Z.lo ^= Htable[nhi].lo;
191
192 if (--cnt < 0)
193 break;
194
195 nlo = ((const u8 *)Xi)[cnt];
196 nhi = nlo >> 4;
197 nlo &= 0xf;
198
199 rem = (size_t)Z.lo & 0xf;
200 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
201 Z.hi = (Z.hi >> 4);
202 if (sizeof(size_t) == 8)
203 Z.hi ^= rem_4bit[rem];
204 else
205 Z.hi ^= (u64)rem_4bit[rem] << 32;
206
207 Z.hi ^= Htable[nlo].hi;
208 Z.lo ^= Htable[nlo].lo;
209 }
210
e23d850f 211 if (IS_LITTLE_ENDIAN) {
0f113f3e
MC
212# ifdef BSWAP8
213 Xi[0] = BSWAP8(Z.hi);
214 Xi[1] = BSWAP8(Z.lo);
215# else
216 u8 *p = (u8 *)Xi;
217 u32 v;
218 v = (u32)(Z.hi >> 32);
219 PUTU32(p, v);
220 v = (u32)(Z.hi);
221 PUTU32(p + 4, v);
222 v = (u32)(Z.lo >> 32);
223 PUTU32(p + 8, v);
224 v = (u32)(Z.lo);
225 PUTU32(p + 12, v);
226# endif
227 } else {
228 Xi[0] = Z.hi;
229 Xi[1] = Z.lo;
230 }
2262beef
AP
231}
232
999376dc
HB
233# endif
234
235# if !defined(GHASH_ASM)
0f113f3e 236# if !defined(OPENSSL_SMALL_FOOTPRINT)
2262beef
AP
237/*
238 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
a595baff
AP
239 * details... Compiler-generated code doesn't seem to give any
240 * performance improvement, at least not on x86[_64]. It's here
241 * mostly as reference and a placeholder for possible future
242 * non-trivial optimization[s]...
2262beef 243 */
0f113f3e
MC
244static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
245 const u8 *inp, size_t len)
2262beef
AP
246{
247 u128 Z;
248 int cnt;
249 size_t rem, nlo, nhi;
e23d850f 250 DECLARE_IS_ENDIAN;
0f113f3e 251
2262beef 252 do {
0f113f3e
MC
253 cnt = 15;
254 nlo = ((const u8 *)Xi)[15];
255 nlo ^= inp[15];
256 nhi = nlo >> 4;
257 nlo &= 0xf;
258
259 Z.hi = Htable[nlo].hi;
260 Z.lo = Htable[nlo].lo;
261
262 while (1) {
263 rem = (size_t)Z.lo & 0xf;
264 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
265 Z.hi = (Z.hi >> 4);
266 if (sizeof(size_t) == 8)
267 Z.hi ^= rem_4bit[rem];
268 else
269 Z.hi ^= (u64)rem_4bit[rem] << 32;
270
271 Z.hi ^= Htable[nhi].hi;
272 Z.lo ^= Htable[nhi].lo;
273
274 if (--cnt < 0)
275 break;
276
277 nlo = ((const u8 *)Xi)[cnt];
278 nlo ^= inp[cnt];
279 nhi = nlo >> 4;
280 nlo &= 0xf;
281
282 rem = (size_t)Z.lo & 0xf;
283 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
284 Z.hi = (Z.hi >> 4);
285 if (sizeof(size_t) == 8)
286 Z.hi ^= rem_4bit[rem];
287 else
288 Z.hi ^= (u64)rem_4bit[rem] << 32;
289
290 Z.hi ^= Htable[nlo].hi;
291 Z.lo ^= Htable[nlo].lo;
292 }
e7f5b1cd 293
e23d850f 294 if (IS_LITTLE_ENDIAN) {
0f113f3e
MC
295# ifdef BSWAP8
296 Xi[0] = BSWAP8(Z.hi);
297 Xi[1] = BSWAP8(Z.lo);
298# else
299 u8 *p = (u8 *)Xi;
300 u32 v;
301 v = (u32)(Z.hi >> 32);
302 PUTU32(p, v);
303 v = (u32)(Z.hi);
304 PUTU32(p + 4, v);
305 v = (u32)(Z.lo >> 32);
306 PUTU32(p + 8, v);
307 v = (u32)(Z.lo);
308 PUTU32(p + 12, v);
309# endif
310 } else {
311 Xi[0] = Z.hi;
312 Xi[1] = Z.lo;
313 }
36c269c3
DF
314
315 inp += 16;
316 /* Block size is 128 bits so len is a multiple of 16 */
317 len -= 16;
318 } while (len > 0);
e7f5b1cd 319}
0f113f3e
MC
320# endif
321# else
322void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
323void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
324 size_t len);
325# endif
2262beef 326
d50e0934 327# define GCM_MUL(ctx) ctx->funcs.gmult(ctx->Xi.u,ctx->Htable)
0f113f3e 328# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
95201ef4 329# define GHASH(ctx,in,len) ctx->funcs.ghash((ctx)->Xi.u,(ctx)->Htable,in,len)
0f113f3e
MC
330/*
331 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
332 * effect. In other words idea is to hash data while it's still in L1 cache
333 * after encryption pass...
334 */
335# define GHASH_CHUNK (3*1024)
336# endif
2262beef 337
7b6e19fc 338#if (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
0f113f3e
MC
339# if !defined(I386_ONLY) && \
340 (defined(__i386) || defined(__i386__) || \
341 defined(__x86_64) || defined(__x86_64__) || \
342 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
1e863180 343# define GHASH_ASM_X86_OR_64
c1f092d1 344
0f113f3e
MC
345void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
346void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
347void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
348 size_t len);
c1f092d1 349
0f113f3e
MC
350# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
351# define gcm_init_avx gcm_init_clmul
352# define gcm_gmult_avx gcm_gmult_clmul
353# define gcm_ghash_avx gcm_ghash_clmul
354# else
355void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
356void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
357void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
358 size_t len);
359# endif
1da5d302 360
0f113f3e 361# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
1e863180 362# define GHASH_ASM_X86
0f113f3e
MC
363void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
364void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
365 size_t len);
c1f092d1 366
0f113f3e
MC
367void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
368void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
369 size_t len);
1e863180 370# endif
82741e9c 371# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
1e863180 372# include "arm_arch.h"
c1669e1c 373# if __ARM_MAX_ARCH__>=7
1e863180 374# define GHASH_ASM_ARM
0f113f3e 375# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
82741e9c 376# if defined(__arm__) || defined(__arm)
0f113f3e 377# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
82741e9c 378# endif
0f113f3e
MC
379void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
380void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
381void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
382 size_t len);
383void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
384void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
385void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
386 size_t len);
1e863180 387# endif
23328d4b 388# elif defined(__sparc__) || defined(__sparc)
52f7e44e 389# include "crypto/sparc_arch.h"
23328d4b 390# define GHASH_ASM_SPARC
0f113f3e
MC
391void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
392void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
393void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
394 size_t len);
395# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
3d178db7 396# include "crypto/ppc_arch.h"
0e716d92 397# define GHASH_ASM_PPC
0f113f3e
MC
398void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
399void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
400void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
401 size_t len);
999376dc
HB
402# elif defined(OPENSSL_CPUID_OBJ) && defined(__riscv) && __riscv_xlen == 64
403# include "crypto/riscv_arch.h"
404# define GHASH_ASM_RISCV
999376dc
HB
405# undef GHASH
406void gcm_init_clmul_rv64i_zbb_zbc(u128 Htable[16], const u64 Xi[2]);
407void gcm_gmult_clmul_rv64i_zbb_zbc(u64 Xi[2], const u128 Htable[16]);
c1f092d1 408# endif
c1f092d1
AP
409#endif
410
92c9086e 411static void gcm_get_funcs(struct gcm_funcs_st *ctx)
e7f5b1cd 412{
92c9086e
TS
413 /* set defaults -- overridden below as needed */
414 ctx->ginit = gcm_init_4bit;
415#if !defined(GHASH_ASM) || defined(INCLUDE_C_GMULT_4BIT)
416 ctx->gmult = gcm_gmult_4bit;
e7f5b1cd 417#else
92c9086e 418 ctx->gmult = NULL;
e7f5b1cd 419#endif
92c9086e
TS
420#if !defined(GHASH_ASM) && !defined(OPENSSL_SMALL_FOOTPRINT)
421 ctx->ghash = gcm_ghash_4bit;
7b6e19fc 422#else
92c9086e 423 ctx->ghash = NULL;
7b6e19fc 424#endif
92c9086e
TS
425
426#if defined(GHASH_ASM_X86_OR_64)
427# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
428 /* x86_64 */
6e5a853b 429 if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
0f113f3e 430 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
92c9086e 431 ctx->ginit = gcm_init_avx;
0f113f3e 432 ctx->gmult = gcm_gmult_avx;
92c9086e 433 ctx->ghash = gcm_ghash_avx;
0f113f3e 434 } else {
92c9086e 435 ctx->ginit = gcm_init_clmul;
0f113f3e 436 ctx->gmult = gcm_gmult_clmul;
92c9086e 437 ctx->ghash = gcm_ghash_clmul;
0f113f3e
MC
438 }
439 return;
440 }
7b6e19fc 441# endif
92c9086e
TS
442# if defined(GHASH_ASM_X86)
443 /* x86 only */
444# if defined(OPENSSL_IA32_SSE2)
0f113f3e 445 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
92c9086e
TS
446 ctx->gmult = gcm_gmult_4bit_mmx;
447 ctx->ghash = gcm_ghash_4bit_mmx;
448 return;
449 }
7b6e19fc 450# else
0f113f3e 451 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
0f113f3e 452 ctx->gmult = gcm_gmult_4bit_mmx;
92c9086e
TS
453 ctx->ghash = gcm_ghash_4bit_mmx;
454 return;
0f113f3e 455 }
92c9086e
TS
456# endif
457 ctx->gmult = gcm_gmult_4bit_x86;
458 ctx->ghash = gcm_ghash_4bit_x86;
459 return;
7b6e19fc 460# endif
92c9086e 461#elif defined(GHASH_ASM_ARM)
186be8ed
TM
462 /* ARM defaults */
463 ctx->gmult = gcm_gmult_4bit;
464 ctx->ghash = gcm_ghash_4bit;
7b6e19fc 465# ifdef PMULL_CAPABLE
0f113f3e 466 if (PMULL_CAPABLE) {
92c9086e 467 ctx->ginit = (gcm_init_fn)gcm_init_v8;
0f113f3e 468 ctx->gmult = gcm_gmult_v8;
92c9086e
TS
469 ctx->ghash = gcm_ghash_v8;
470 }
471# elif defined(NEON_CAPABLE)
0f113f3e 472 if (NEON_CAPABLE) {
92c9086e 473 ctx->ginit = gcm_init_neon;
0f113f3e 474 ctx->gmult = gcm_gmult_neon;
92c9086e 475 ctx->ghash = gcm_ghash_neon;
0f113f3e 476 }
92c9086e
TS
477# endif
478 return;
479#elif defined(GHASH_ASM_SPARC)
186be8ed
TM
480 /* SPARC defaults */
481 ctx->gmult = gcm_gmult_4bit;
482 ctx->ghash = gcm_ghash_4bit;
0f113f3e 483 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
92c9086e 484 ctx->ginit = gcm_init_vis3;
0f113f3e 485 ctx->gmult = gcm_gmult_vis3;
92c9086e 486 ctx->ghash = gcm_ghash_vis3;
0f113f3e 487 }
92c9086e
TS
488 return;
489#elif defined(GHASH_ASM_PPC)
186be8ed 490 /* PowerPC does not define GHASH_ASM; defaults set above */
0f113f3e 491 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
92c9086e 492 ctx->ginit = gcm_init_p8;
0f113f3e 493 ctx->gmult = gcm_gmult_p8;
92c9086e 494 ctx->ghash = gcm_ghash_p8;
0f113f3e 495 }
92c9086e
TS
496 return;
497#elif defined(GHASH_ASM_RISCV) && __riscv_xlen == 64
186be8ed 498 /* RISCV defaults; gmult already set above */
92c9086e 499 ctx->ghash = NULL;
999376dc 500 if (RISCV_HAS_ZBB() && RISCV_HAS_ZBC()) {
92c9086e 501 ctx->ginit = gcm_init_clmul_rv64i_zbb_zbc;
999376dc 502 ctx->gmult = gcm_gmult_clmul_rv64i_zbb_zbc;
999376dc 503 }
92c9086e 504 return;
186be8ed
TM
505#elif defined(GHASH_ASM)
506 /* all other architectures use the generic names */
48e35b99
JC
507 ctx->gmult = gcm_gmult_4bit;
508 ctx->ghash = gcm_ghash_4bit;
509 return;
510#endif
92c9086e
TS
511}
512
513void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
514{
515 DECLARE_IS_ENDIAN;
516
517 memset(ctx, 0, sizeof(*ctx));
518 ctx->block = block;
519 ctx->key = key;
520
521 (*block) (ctx->H.c, ctx->H.c, key);
522
523 if (IS_LITTLE_ENDIAN) {
524 /* H is stored in host byte order */
525#ifdef BSWAP8
526 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
527 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
7b6e19fc 528#else
92c9086e
TS
529 u8 *p = ctx->H.c;
530 u64 hi, lo;
531 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
532 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
533 ctx->H.u[0] = hi;
534 ctx->H.u[1] = lo;
a595baff 535#endif
92c9086e
TS
536 }
537
538 gcm_get_funcs(&ctx->funcs);
539 ctx->funcs.ginit(ctx->Htable, ctx->H.u);
e7f5b1cd
AP
540}
541
0f113f3e
MC
542void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
543 size_t len)
e7f5b1cd 544{
e23d850f 545 DECLARE_IS_ENDIAN;
0f113f3e 546 unsigned int ctr;
0f113f3e 547
0f113f3e
MC
548 ctx->len.u[0] = 0; /* AAD length */
549 ctx->len.u[1] = 0; /* message length */
550 ctx->ares = 0;
551 ctx->mres = 0;
552
553 if (len == 12) {
554 memcpy(ctx->Yi.c, iv, 12);
f5791af3
AP
555 ctx->Yi.c[12] = 0;
556 ctx->Yi.c[13] = 0;
557 ctx->Yi.c[14] = 0;
0f113f3e
MC
558 ctx->Yi.c[15] = 1;
559 ctr = 1;
560 } else {
561 size_t i;
562 u64 len0 = len;
563
f5791af3
AP
564 /* Borrow ctx->Xi to calculate initial Yi */
565 ctx->Xi.u[0] = 0;
566 ctx->Xi.u[1] = 0;
567
0f113f3e
MC
568 while (len >= 16) {
569 for (i = 0; i < 16; ++i)
f5791af3
AP
570 ctx->Xi.c[i] ^= iv[i];
571 GCM_MUL(ctx);
0f113f3e
MC
572 iv += 16;
573 len -= 16;
574 }
575 if (len) {
576 for (i = 0; i < len; ++i)
f5791af3
AP
577 ctx->Xi.c[i] ^= iv[i];
578 GCM_MUL(ctx);
0f113f3e
MC
579 }
580 len0 <<= 3;
e23d850f 581 if (IS_LITTLE_ENDIAN) {
e7f5b1cd 582#ifdef BSWAP8
f5791af3 583 ctx->Xi.u[1] ^= BSWAP8(len0);
e7f5b1cd 584#else
f5791af3
AP
585 ctx->Xi.c[8] ^= (u8)(len0 >> 56);
586 ctx->Xi.c[9] ^= (u8)(len0 >> 48);
587 ctx->Xi.c[10] ^= (u8)(len0 >> 40);
588 ctx->Xi.c[11] ^= (u8)(len0 >> 32);
589 ctx->Xi.c[12] ^= (u8)(len0 >> 24);
590 ctx->Xi.c[13] ^= (u8)(len0 >> 16);
591 ctx->Xi.c[14] ^= (u8)(len0 >> 8);
592 ctx->Xi.c[15] ^= (u8)(len0);
e7f5b1cd 593#endif
f5791af3
AP
594 } else {
595 ctx->Xi.u[1] ^= len0;
596 }
e7f5b1cd 597
f5791af3 598 GCM_MUL(ctx);
e7f5b1cd 599
e23d850f 600 if (IS_LITTLE_ENDIAN)
997d1aac 601#ifdef BSWAP4
f5791af3 602 ctr = BSWAP4(ctx->Xi.d[3]);
997d1aac 603#else
f5791af3 604 ctr = GETU32(ctx->Xi.c + 12);
997d1aac 605#endif
0f113f3e 606 else
f5791af3
AP
607 ctr = ctx->Xi.d[3];
608
609 /* Copy borrowed Xi to Yi */
610 ctx->Yi.u[0] = ctx->Xi.u[0];
611 ctx->Yi.u[1] = ctx->Xi.u[1];
0f113f3e 612 }
e7f5b1cd 613
f5791af3
AP
614 ctx->Xi.u[0] = 0;
615 ctx->Xi.u[1] = 0;
616
0f113f3e
MC
617 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
618 ++ctr;
e23d850f 619 if (IS_LITTLE_ENDIAN)
997d1aac 620#ifdef BSWAP4
0f113f3e 621 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 622#else
0f113f3e 623 PUTU32(ctx->Yi.c + 12, ctr);
997d1aac 624#endif
0f113f3e
MC
625 else
626 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
627}
628
0f113f3e
MC
629int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
630 size_t len)
e7f5b1cd 631{
0f113f3e
MC
632 size_t i;
633 unsigned int n;
634 u64 alen = ctx->len.u[0];
e7f5b1cd 635
0f113f3e
MC
636 if (ctx->len.u[1])
637 return -2;
638
639 alen += len;
640 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
641 return -1;
642 ctx->len.u[0] = alen;
643
644 n = ctx->ares;
645 if (n) {
646 while (n && len) {
647 ctx->Xi.c[n] ^= *(aad++);
648 --len;
649 n = (n + 1) % 16;
650 }
651 if (n == 0)
f5791af3 652 GCM_MUL(ctx);
0f113f3e
MC
653 else {
654 ctx->ares = n;
655 return 0;
656 }
657 }
2262beef 658#ifdef GHASH
0f113f3e
MC
659 if ((i = (len & (size_t)-16))) {
660 GHASH(ctx, aad, i);
661 aad += i;
662 len -= i;
663 }
2262beef 664#else
0f113f3e
MC
665 while (len >= 16) {
666 for (i = 0; i < 16; ++i)
667 ctx->Xi.c[i] ^= aad[i];
f5791af3 668 GCM_MUL(ctx);
0f113f3e
MC
669 aad += 16;
670 len -= 16;
671 }
2262beef 672#endif
0f113f3e
MC
673 if (len) {
674 n = (unsigned int)len;
675 for (i = 0; i < len; ++i)
676 ctx->Xi.c[i] ^= aad[i];
677 }
b68c1315 678
0f113f3e
MC
679 ctx->ares = n;
680 return 0;
e7f5b1cd
AP
681}
682
1f2502eb 683int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
0f113f3e
MC
684 const unsigned char *in, unsigned char *out,
685 size_t len)
e7f5b1cd 686{
e23d850f 687 DECLARE_IS_ENDIAN;
c1b2569d 688 unsigned int n, ctr, mres;
0f113f3e
MC
689 size_t i;
690 u64 mlen = ctx->len.u[1];
691 block128_f block = ctx->block;
692 void *key = ctx->key;
1f2502eb 693
0f113f3e
MC
694 mlen += len;
695 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
696 return -1;
697 ctx->len.u[1] = mlen;
e7f5b1cd 698
c1b2569d
AP
699 mres = ctx->mres;
700
0f113f3e
MC
701 if (ctx->ares) {
702 /* First call to encrypt finalizes GHASH(AAD) */
c1b2569d
AP
703#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
704 if (len == 0) {
705 GCM_MUL(ctx);
706 ctx->ares = 0;
707 return 0;
708 }
709 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
710 ctx->Xi.u[0] = 0;
711 ctx->Xi.u[1] = 0;
712 mres = sizeof(ctx->Xi);
713#else
f5791af3 714 GCM_MUL(ctx);
c1b2569d 715#endif
0f113f3e
MC
716 ctx->ares = 0;
717 }
96a4cf8c 718
e23d850f 719 if (IS_LITTLE_ENDIAN)
997d1aac 720#ifdef BSWAP4
0f113f3e 721 ctr = BSWAP4(ctx->Yi.d[3]);
997d1aac 722#else
0f113f3e 723 ctr = GETU32(ctx->Yi.c + 12);
997d1aac 724#endif
0f113f3e
MC
725 else
726 ctr = ctx->Yi.d[3];
96a4cf8c 727
c1b2569d 728 n = mres % 16;
0f113f3e
MC
729#if !defined(OPENSSL_SMALL_FOOTPRINT)
730 if (16 % sizeof(size_t) == 0) { /* always true actually */
731 do {
732 if (n) {
c1b2569d
AP
733# if defined(GHASH)
734 while (n && len) {
735 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
736 --len;
737 n = (n + 1) % 16;
738 }
739 if (n == 0) {
740 GHASH(ctx, ctx->Xn, mres);
741 mres = 0;
742 } else {
743 ctx->mres = mres;
744 return 0;
745 }
746# else
0f113f3e
MC
747 while (n && len) {
748 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
749 --len;
750 n = (n + 1) % 16;
751 }
c1b2569d 752 if (n == 0) {
f5791af3 753 GCM_MUL(ctx);
c1b2569d
AP
754 mres = 0;
755 } else {
0f113f3e
MC
756 ctx->mres = n;
757 return 0;
758 }
c1b2569d 759# endif
0f113f3e
MC
760 }
761# if defined(STRICT_ALIGNMENT)
762 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
763 break;
764# endif
2e635aa8 765# if defined(GHASH)
c1b2569d
AP
766 if (len >= 16 && mres) {
767 GHASH(ctx, ctx->Xn, mres);
768 mres = 0;
769 }
2e635aa8 770# if defined(GHASH_CHUNK)
0f113f3e
MC
771 while (len >= GHASH_CHUNK) {
772 size_t j = GHASH_CHUNK;
773
774 while (j) {
77286fe3
BE
775 size_t_aX *out_t = (size_t_aX *)out;
776 const size_t_aX *in_t = (const size_t_aX *)in;
0f113f3e
MC
777
778 (*block) (ctx->Yi.c, ctx->EKi.c, key);
779 ++ctr;
e23d850f 780 if (IS_LITTLE_ENDIAN)
2e635aa8 781# ifdef BSWAP4
0f113f3e 782 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 783# else
0f113f3e 784 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 785# endif
0f113f3e
MC
786 else
787 ctx->Yi.d[3] = ctr;
788 for (i = 0; i < 16 / sizeof(size_t); ++i)
789 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
790 out += 16;
791 in += 16;
792 j -= 16;
793 }
794 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
795 len -= GHASH_CHUNK;
796 }
2e635aa8 797# endif
0f113f3e
MC
798 if ((i = (len & (size_t)-16))) {
799 size_t j = i;
800
801 while (len >= 16) {
77286fe3
BE
802 size_t_aX *out_t = (size_t_aX *)out;
803 const size_t_aX *in_t = (const size_t_aX *)in;
0f113f3e
MC
804
805 (*block) (ctx->Yi.c, ctx->EKi.c, key);
806 ++ctr;
e23d850f 807 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
808# ifdef BSWAP4
809 ctx->Yi.d[3] = BSWAP4(ctr);
810# else
811 PUTU32(ctx->Yi.c + 12, ctr);
812# endif
813 else
814 ctx->Yi.d[3] = ctr;
815 for (i = 0; i < 16 / sizeof(size_t); ++i)
816 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
817 out += 16;
818 in += 16;
819 len -= 16;
820 }
821 GHASH(ctx, out - j, j);
822 }
823# else
824 while (len >= 16) {
825 size_t *out_t = (size_t *)out;
826 const size_t *in_t = (const size_t *)in;
827
828 (*block) (ctx->Yi.c, ctx->EKi.c, key);
829 ++ctr;
e23d850f 830 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
831# ifdef BSWAP4
832 ctx->Yi.d[3] = BSWAP4(ctr);
833# else
834 PUTU32(ctx->Yi.c + 12, ctr);
835# endif
836 else
837 ctx->Yi.d[3] = ctr;
838 for (i = 0; i < 16 / sizeof(size_t); ++i)
839 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
f5791af3 840 GCM_MUL(ctx);
0f113f3e
MC
841 out += 16;
842 in += 16;
843 len -= 16;
844 }
845# endif
846 if (len) {
847 (*block) (ctx->Yi.c, ctx->EKi.c, key);
848 ++ctr;
e23d850f 849 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
850# ifdef BSWAP4
851 ctx->Yi.d[3] = BSWAP4(ctr);
852# else
853 PUTU32(ctx->Yi.c + 12, ctr);
854# endif
855 else
856 ctx->Yi.d[3] = ctr;
c1b2569d
AP
857# if defined(GHASH)
858 while (len--) {
859 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
860 ++n;
861 }
862# else
0f113f3e
MC
863 while (len--) {
864 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
865 ++n;
866 }
c1b2569d
AP
867 mres = n;
868# endif
0f113f3e
MC
869 }
870
c1b2569d 871 ctx->mres = mres;
0f113f3e
MC
872 return 0;
873 } while (0);
874 }
e7f5b1cd 875#endif
0f113f3e
MC
876 for (i = 0; i < len; ++i) {
877 if (n == 0) {
878 (*block) (ctx->Yi.c, ctx->EKi.c, key);
879 ++ctr;
e23d850f 880 if (IS_LITTLE_ENDIAN)
997d1aac 881#ifdef BSWAP4
0f113f3e 882 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 883#else
0f113f3e
MC
884 PUTU32(ctx->Yi.c + 12, ctr);
885#endif
886 else
887 ctx->Yi.d[3] = ctr;
888 }
c1b2569d
AP
889#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
890 ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
0f113f3e 891 n = (n + 1) % 16;
c1b2569d
AP
892 if (mres == sizeof(ctx->Xn)) {
893 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
894 mres = 0;
895 }
896#else
897 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
898 mres = n = (n + 1) % 16;
0f113f3e 899 if (n == 0)
f5791af3 900 GCM_MUL(ctx);
c1b2569d 901#endif
0f113f3e
MC
902 }
903
c1b2569d 904 ctx->mres = mres;
0f113f3e 905 return 0;
e7f5b1cd
AP
906}
907
1f2502eb 908int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
0f113f3e
MC
909 const unsigned char *in, unsigned char *out,
910 size_t len)
e7f5b1cd 911{
e23d850f 912 DECLARE_IS_ENDIAN;
c1b2569d 913 unsigned int n, ctr, mres;
0f113f3e
MC
914 size_t i;
915 u64 mlen = ctx->len.u[1];
916 block128_f block = ctx->block;
917 void *key = ctx->key;
1f2502eb 918
0f113f3e
MC
919 mlen += len;
920 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
921 return -1;
922 ctx->len.u[1] = mlen;
e7f5b1cd 923
c1b2569d
AP
924 mres = ctx->mres;
925
0f113f3e
MC
926 if (ctx->ares) {
927 /* First call to decrypt finalizes GHASH(AAD) */
c1b2569d
AP
928#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
929 if (len == 0) {
930 GCM_MUL(ctx);
931 ctx->ares = 0;
932 return 0;
933 }
934 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
935 ctx->Xi.u[0] = 0;
936 ctx->Xi.u[1] = 0;
937 mres = sizeof(ctx->Xi);
938#else
f5791af3 939 GCM_MUL(ctx);
c1b2569d 940#endif
0f113f3e
MC
941 ctx->ares = 0;
942 }
b68c1315 943
e23d850f 944 if (IS_LITTLE_ENDIAN)
997d1aac 945#ifdef BSWAP4
0f113f3e 946 ctr = BSWAP4(ctx->Yi.d[3]);
997d1aac 947#else
0f113f3e 948 ctr = GETU32(ctx->Yi.c + 12);
997d1aac 949#endif
0f113f3e
MC
950 else
951 ctr = ctx->Yi.d[3];
e7f5b1cd 952
c1b2569d 953 n = mres % 16;
e7f5b1cd 954#if !defined(OPENSSL_SMALL_FOOTPRINT)
0f113f3e
MC
955 if (16 % sizeof(size_t) == 0) { /* always true actually */
956 do {
957 if (n) {
c1b2569d
AP
958# if defined(GHASH)
959 while (n && len) {
960 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
961 --len;
962 n = (n + 1) % 16;
963 }
964 if (n == 0) {
965 GHASH(ctx, ctx->Xn, mres);
966 mres = 0;
967 } else {
968 ctx->mres = mres;
969 return 0;
970 }
971# else
0f113f3e
MC
972 while (n && len) {
973 u8 c = *(in++);
974 *(out++) = c ^ ctx->EKi.c[n];
975 ctx->Xi.c[n] ^= c;
976 --len;
977 n = (n + 1) % 16;
978 }
c1b2569d 979 if (n == 0) {
f5791af3 980 GCM_MUL(ctx);
c1b2569d
AP
981 mres = 0;
982 } else {
0f113f3e
MC
983 ctx->mres = n;
984 return 0;
985 }
c1b2569d 986# endif
0f113f3e
MC
987 }
988# if defined(STRICT_ALIGNMENT)
989 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
990 break;
991# endif
2e635aa8 992# if defined(GHASH)
c1b2569d
AP
993 if (len >= 16 && mres) {
994 GHASH(ctx, ctx->Xn, mres);
995 mres = 0;
996 }
2e635aa8 997# if defined(GHASH_CHUNK)
0f113f3e
MC
998 while (len >= GHASH_CHUNK) {
999 size_t j = GHASH_CHUNK;
1000
1001 GHASH(ctx, in, GHASH_CHUNK);
1002 while (j) {
77286fe3
BE
1003 size_t_aX *out_t = (size_t_aX *)out;
1004 const size_t_aX *in_t = (const size_t_aX *)in;
0f113f3e
MC
1005
1006 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1007 ++ctr;
e23d850f 1008 if (IS_LITTLE_ENDIAN)
2e635aa8 1009# ifdef BSWAP4
0f113f3e 1010 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1011# else
0f113f3e 1012 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1013# endif
0f113f3e
MC
1014 else
1015 ctx->Yi.d[3] = ctr;
1016 for (i = 0; i < 16 / sizeof(size_t); ++i)
1017 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1018 out += 16;
1019 in += 16;
1020 j -= 16;
1021 }
1022 len -= GHASH_CHUNK;
1023 }
2e635aa8 1024# endif
0f113f3e
MC
1025 if ((i = (len & (size_t)-16))) {
1026 GHASH(ctx, in, i);
1027 while (len >= 16) {
77286fe3
BE
1028 size_t_aX *out_t = (size_t_aX *)out;
1029 const size_t_aX *in_t = (const size_t_aX *)in;
0f113f3e
MC
1030
1031 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1032 ++ctr;
e23d850f 1033 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
1034# ifdef BSWAP4
1035 ctx->Yi.d[3] = BSWAP4(ctr);
1036# else
1037 PUTU32(ctx->Yi.c + 12, ctr);
1038# endif
1039 else
1040 ctx->Yi.d[3] = ctr;
1041 for (i = 0; i < 16 / sizeof(size_t); ++i)
1042 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1043 out += 16;
1044 in += 16;
1045 len -= 16;
1046 }
1047 }
1048# else
1049 while (len >= 16) {
1050 size_t *out_t = (size_t *)out;
1051 const size_t *in_t = (const size_t *)in;
1052
1053 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1054 ++ctr;
e23d850f 1055 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
1056# ifdef BSWAP4
1057 ctx->Yi.d[3] = BSWAP4(ctr);
1058# else
1059 PUTU32(ctx->Yi.c + 12, ctr);
1060# endif
1061 else
1062 ctx->Yi.d[3] = ctr;
1063 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1d724b5e
ZJ
1064 size_t c = in_t[i];
1065 out_t[i] = c ^ ctx->EKi.t[i];
0f113f3e
MC
1066 ctx->Xi.t[i] ^= c;
1067 }
f5791af3 1068 GCM_MUL(ctx);
0f113f3e
MC
1069 out += 16;
1070 in += 16;
1071 len -= 16;
1072 }
1073# endif
1074 if (len) {
1075 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1076 ++ctr;
e23d850f 1077 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
1078# ifdef BSWAP4
1079 ctx->Yi.d[3] = BSWAP4(ctr);
1080# else
1081 PUTU32(ctx->Yi.c + 12, ctr);
1082# endif
1083 else
1084 ctx->Yi.d[3] = ctr;
c1b2569d
AP
1085# if defined(GHASH)
1086 while (len--) {
1087 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1088 ++n;
1089 }
1090# else
0f113f3e
MC
1091 while (len--) {
1092 u8 c = in[n];
1093 ctx->Xi.c[n] ^= c;
1094 out[n] = c ^ ctx->EKi.c[n];
1095 ++n;
1096 }
c1b2569d
AP
1097 mres = n;
1098# endif
0f113f3e
MC
1099 }
1100
c1b2569d 1101 ctx->mres = mres;
0f113f3e
MC
1102 return 0;
1103 } while (0);
1104 }
997d1aac 1105#endif
0f113f3e
MC
1106 for (i = 0; i < len; ++i) {
1107 u8 c;
1108 if (n == 0) {
1109 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1110 ++ctr;
e23d850f 1111 if (IS_LITTLE_ENDIAN)
997d1aac 1112#ifdef BSWAP4
0f113f3e 1113 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 1114#else
0f113f3e
MC
1115 PUTU32(ctx->Yi.c + 12, ctr);
1116#endif
1117 else
1118 ctx->Yi.d[3] = ctr;
1119 }
c1b2569d
AP
1120#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1121 out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1122 n = (n + 1) % 16;
1123 if (mres == sizeof(ctx->Xn)) {
1124 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1125 mres = 0;
1126 }
1127#else
0f113f3e
MC
1128 c = in[i];
1129 out[i] = c ^ ctx->EKi.c[n];
1130 ctx->Xi.c[n] ^= c;
c1b2569d 1131 mres = n = (n + 1) % 16;
0f113f3e 1132 if (n == 0)
f5791af3 1133 GCM_MUL(ctx);
c1b2569d 1134#endif
0f113f3e 1135 }
96a4cf8c 1136
c1b2569d 1137 ctx->mres = mres;
0f113f3e 1138 return 0;
e7f5b1cd
AP
1139}
1140
1f2502eb 1141int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
0f113f3e
MC
1142 const unsigned char *in, unsigned char *out,
1143 size_t len, ctr128_f stream)
f71c6ace 1144{
2e635aa8
AP
1145#if defined(OPENSSL_SMALL_FOOTPRINT)
1146 return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1147#else
e23d850f 1148 DECLARE_IS_ENDIAN;
c1b2569d 1149 unsigned int n, ctr, mres;
0f113f3e
MC
1150 size_t i;
1151 u64 mlen = ctx->len.u[1];
1152 void *key = ctx->key;
1f2502eb 1153
0f113f3e
MC
1154 mlen += len;
1155 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1156 return -1;
1157 ctx->len.u[1] = mlen;
f71c6ace 1158
c1b2569d
AP
1159 mres = ctx->mres;
1160
0f113f3e
MC
1161 if (ctx->ares) {
1162 /* First call to encrypt finalizes GHASH(AAD) */
c1b2569d
AP
1163#if defined(GHASH)
1164 if (len == 0) {
1165 GCM_MUL(ctx);
1166 ctx->ares = 0;
1167 return 0;
1168 }
1169 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1170 ctx->Xi.u[0] = 0;
1171 ctx->Xi.u[1] = 0;
1172 mres = sizeof(ctx->Xi);
1173#else
f5791af3 1174 GCM_MUL(ctx);
c1b2569d 1175#endif
0f113f3e
MC
1176 ctx->ares = 0;
1177 }
b68c1315 1178
e23d850f 1179 if (IS_LITTLE_ENDIAN)
2e635aa8 1180# ifdef BSWAP4
0f113f3e 1181 ctr = BSWAP4(ctx->Yi.d[3]);
2e635aa8 1182# else
0f113f3e 1183 ctr = GETU32(ctx->Yi.c + 12);
2e635aa8 1184# endif
0f113f3e
MC
1185 else
1186 ctr = ctx->Yi.d[3];
1187
c1b2569d 1188 n = mres % 16;
0f113f3e 1189 if (n) {
c1b2569d
AP
1190# if defined(GHASH)
1191 while (n && len) {
1192 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1193 --len;
1194 n = (n + 1) % 16;
1195 }
1196 if (n == 0) {
1197 GHASH(ctx, ctx->Xn, mres);
1198 mres = 0;
1199 } else {
1200 ctx->mres = mres;
1201 return 0;
1202 }
1203# else
0f113f3e
MC
1204 while (n && len) {
1205 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1206 --len;
1207 n = (n + 1) % 16;
1208 }
c1b2569d 1209 if (n == 0) {
f5791af3 1210 GCM_MUL(ctx);
c1b2569d
AP
1211 mres = 0;
1212 } else {
0f113f3e
MC
1213 ctx->mres = n;
1214 return 0;
1215 }
c1b2569d 1216# endif
0f113f3e 1217 }
c1b2569d
AP
1218# if defined(GHASH)
1219 if (len >= 16 && mres) {
1220 GHASH(ctx, ctx->Xn, mres);
1221 mres = 0;
1222 }
1223# if defined(GHASH_CHUNK)
0f113f3e
MC
1224 while (len >= GHASH_CHUNK) {
1225 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1226 ctr += GHASH_CHUNK / 16;
e23d850f 1227 if (IS_LITTLE_ENDIAN)
c1b2569d 1228# ifdef BSWAP4
0f113f3e 1229 ctx->Yi.d[3] = BSWAP4(ctr);
c1b2569d 1230# else
0f113f3e 1231 PUTU32(ctx->Yi.c + 12, ctr);
c1b2569d 1232# endif
0f113f3e
MC
1233 else
1234 ctx->Yi.d[3] = ctr;
1235 GHASH(ctx, out, GHASH_CHUNK);
1236 out += GHASH_CHUNK;
1237 in += GHASH_CHUNK;
1238 len -= GHASH_CHUNK;
1239 }
c1b2569d 1240# endif
2e635aa8 1241# endif
0f113f3e
MC
1242 if ((i = (len & (size_t)-16))) {
1243 size_t j = i / 16;
f71c6ace 1244
0f113f3e
MC
1245 (*stream) (in, out, j, key, ctx->Yi.c);
1246 ctr += (unsigned int)j;
e23d850f 1247 if (IS_LITTLE_ENDIAN)
2e635aa8 1248# ifdef BSWAP4
0f113f3e 1249 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1250# else
0f113f3e 1251 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1252# endif
0f113f3e
MC
1253 else
1254 ctx->Yi.d[3] = ctr;
1255 in += i;
1256 len -= i;
2e635aa8 1257# if defined(GHASH)
0f113f3e
MC
1258 GHASH(ctx, out, i);
1259 out += i;
2e635aa8 1260# else
0f113f3e
MC
1261 while (j--) {
1262 for (i = 0; i < 16; ++i)
1263 ctx->Xi.c[i] ^= out[i];
f5791af3 1264 GCM_MUL(ctx);
0f113f3e
MC
1265 out += 16;
1266 }
2e635aa8 1267# endif
0f113f3e
MC
1268 }
1269 if (len) {
1270 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1271 ++ctr;
e23d850f 1272 if (IS_LITTLE_ENDIAN)
2e635aa8 1273# ifdef BSWAP4
0f113f3e 1274 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1275# else
0f113f3e 1276 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1277# endif
0f113f3e
MC
1278 else
1279 ctx->Yi.d[3] = ctr;
1280 while (len--) {
c1b2569d
AP
1281# if defined(GHASH)
1282 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1283# else
1284 ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1285# endif
0f113f3e
MC
1286 ++n;
1287 }
1288 }
1289
c1b2569d 1290 ctx->mres = mres;
0f113f3e 1291 return 0;
2e635aa8 1292#endif
f71c6ace
AP
1293}
1294
1f2502eb 1295int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
0f113f3e
MC
1296 const unsigned char *in, unsigned char *out,
1297 size_t len, ctr128_f stream)
f71c6ace 1298{
2e635aa8
AP
1299#if defined(OPENSSL_SMALL_FOOTPRINT)
1300 return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1301#else
e23d850f 1302 DECLARE_IS_ENDIAN;
c1b2569d 1303 unsigned int n, ctr, mres;
0f113f3e
MC
1304 size_t i;
1305 u64 mlen = ctx->len.u[1];
1306 void *key = ctx->key;
1f2502eb 1307
0f113f3e
MC
1308 mlen += len;
1309 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1310 return -1;
1311 ctx->len.u[1] = mlen;
f71c6ace 1312
c1b2569d
AP
1313 mres = ctx->mres;
1314
0f113f3e
MC
1315 if (ctx->ares) {
1316 /* First call to decrypt finalizes GHASH(AAD) */
c1b2569d
AP
1317# if defined(GHASH)
1318 if (len == 0) {
1319 GCM_MUL(ctx);
1320 ctx->ares = 0;
1321 return 0;
1322 }
1323 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1324 ctx->Xi.u[0] = 0;
1325 ctx->Xi.u[1] = 0;
1326 mres = sizeof(ctx->Xi);
1327# else
f5791af3 1328 GCM_MUL(ctx);
c1b2569d 1329# endif
0f113f3e
MC
1330 ctx->ares = 0;
1331 }
b68c1315 1332
e23d850f 1333 if (IS_LITTLE_ENDIAN)
2e635aa8 1334# ifdef BSWAP4
0f113f3e 1335 ctr = BSWAP4(ctx->Yi.d[3]);
2e635aa8 1336# else
0f113f3e 1337 ctr = GETU32(ctx->Yi.c + 12);
2e635aa8 1338# endif
0f113f3e
MC
1339 else
1340 ctr = ctx->Yi.d[3];
1341
c1b2569d 1342 n = mres % 16;
0f113f3e 1343 if (n) {
c1b2569d
AP
1344# if defined(GHASH)
1345 while (n && len) {
1346 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1347 --len;
1348 n = (n + 1) % 16;
1349 }
1350 if (n == 0) {
1351 GHASH(ctx, ctx->Xn, mres);
1352 mres = 0;
1353 } else {
1354 ctx->mres = mres;
1355 return 0;
1356 }
1357# else
0f113f3e
MC
1358 while (n && len) {
1359 u8 c = *(in++);
1360 *(out++) = c ^ ctx->EKi.c[n];
1361 ctx->Xi.c[n] ^= c;
1362 --len;
1363 n = (n + 1) % 16;
1364 }
c1b2569d 1365 if (n == 0) {
f5791af3 1366 GCM_MUL(ctx);
c1b2569d
AP
1367 mres = 0;
1368 } else {
0f113f3e
MC
1369 ctx->mres = n;
1370 return 0;
1371 }
c1b2569d 1372# endif
0f113f3e 1373 }
c1b2569d
AP
1374# if defined(GHASH)
1375 if (len >= 16 && mres) {
1376 GHASH(ctx, ctx->Xn, mres);
1377 mres = 0;
1378 }
1379# if defined(GHASH_CHUNK)
0f113f3e
MC
1380 while (len >= GHASH_CHUNK) {
1381 GHASH(ctx, in, GHASH_CHUNK);
1382 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1383 ctr += GHASH_CHUNK / 16;
e23d850f 1384 if (IS_LITTLE_ENDIAN)
c1b2569d 1385# ifdef BSWAP4
0f113f3e 1386 ctx->Yi.d[3] = BSWAP4(ctr);
c1b2569d 1387# else
0f113f3e 1388 PUTU32(ctx->Yi.c + 12, ctr);
c1b2569d 1389# endif
0f113f3e
MC
1390 else
1391 ctx->Yi.d[3] = ctr;
1392 out += GHASH_CHUNK;
1393 in += GHASH_CHUNK;
1394 len -= GHASH_CHUNK;
1395 }
c1b2569d 1396# endif
2e635aa8 1397# endif
0f113f3e
MC
1398 if ((i = (len & (size_t)-16))) {
1399 size_t j = i / 16;
f71c6ace 1400
2e635aa8 1401# if defined(GHASH)
0f113f3e 1402 GHASH(ctx, in, i);
2e635aa8 1403# else
0f113f3e
MC
1404 while (j--) {
1405 size_t k;
1406 for (k = 0; k < 16; ++k)
1407 ctx->Xi.c[k] ^= in[k];
f5791af3 1408 GCM_MUL(ctx);
0f113f3e
MC
1409 in += 16;
1410 }
1411 j = i / 16;
1412 in -= i;
2e635aa8 1413# endif
0f113f3e
MC
1414 (*stream) (in, out, j, key, ctx->Yi.c);
1415 ctr += (unsigned int)j;
e23d850f 1416 if (IS_LITTLE_ENDIAN)
2e635aa8 1417# ifdef BSWAP4
0f113f3e 1418 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1419# else
0f113f3e 1420 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1421# endif
0f113f3e
MC
1422 else
1423 ctx->Yi.d[3] = ctr;
1424 out += i;
1425 in += i;
1426 len -= i;
1427 }
1428 if (len) {
1429 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1430 ++ctr;
e23d850f 1431 if (IS_LITTLE_ENDIAN)
2e635aa8 1432# ifdef BSWAP4
0f113f3e 1433 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1434# else
0f113f3e 1435 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1436# endif
0f113f3e
MC
1437 else
1438 ctx->Yi.d[3] = ctr;
1439 while (len--) {
c1b2569d
AP
1440# if defined(GHASH)
1441 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1442# else
0f113f3e 1443 u8 c = in[n];
c1b2569d 1444 ctx->Xi.c[mres++] ^= c;
0f113f3e 1445 out[n] = c ^ ctx->EKi.c[n];
c1b2569d 1446# endif
0f113f3e
MC
1447 ++n;
1448 }
1449 }
1450
c1b2569d 1451 ctx->mres = mres;
0f113f3e 1452 return 0;
2e635aa8 1453#endif
f71c6ace
AP
1454}
1455
0f113f3e
MC
1456int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1457 size_t len)
e7f5b1cd 1458{
e23d850f 1459 DECLARE_IS_ENDIAN;
0f113f3e
MC
1460 u64 alen = ctx->len.u[0] << 3;
1461 u64 clen = ctx->len.u[1] << 3;
e7f5b1cd 1462
c1b2569d
AP
1463#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1464 u128 bitlen;
1465 unsigned int mres = ctx->mres;
1466
1467 if (mres) {
1468 unsigned blocks = (mres + 15) & -16;
1469
1470 memset(ctx->Xn + mres, 0, blocks - mres);
1471 mres = blocks;
1472 if (mres == sizeof(ctx->Xn)) {
1473 GHASH(ctx, ctx->Xn, mres);
1474 mres = 0;
1475 }
1476 } else if (ctx->ares) {
1477 GCM_MUL(ctx);
1478 }
1479#else
0f113f3e 1480 if (ctx->mres || ctx->ares)
f5791af3 1481 GCM_MUL(ctx);
c1b2569d 1482#endif
e7f5b1cd 1483
e23d850f 1484 if (IS_LITTLE_ENDIAN) {
e7f5b1cd 1485#ifdef BSWAP8
0f113f3e
MC
1486 alen = BSWAP8(alen);
1487 clen = BSWAP8(clen);
e7f5b1cd 1488#else
0f113f3e 1489 u8 *p = ctx->len.c;
e7f5b1cd 1490
0f113f3e
MC
1491 ctx->len.u[0] = alen;
1492 ctx->len.u[1] = clen;
e7f5b1cd 1493
0f113f3e
MC
1494 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1495 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
e7f5b1cd 1496#endif
0f113f3e 1497 }
e7f5b1cd 1498
c1b2569d
AP
1499#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1500 bitlen.hi = alen;
1501 bitlen.lo = clen;
1502 memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1503 mres += sizeof(bitlen);
1504 GHASH(ctx, ctx->Xn, mres);
1505#else
0f113f3e
MC
1506 ctx->Xi.u[0] ^= alen;
1507 ctx->Xi.u[1] ^= clen;
f5791af3 1508 GCM_MUL(ctx);
c1b2569d 1509#endif
e7f5b1cd 1510
0f113f3e
MC
1511 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1512 ctx->Xi.u[1] ^= ctx->EK0.u[1];
6acb4ff3 1513
0f113f3e 1514 if (tag && len <= sizeof(ctx->Xi))
1e4a355d 1515 return CRYPTO_memcmp(ctx->Xi.c, tag, len);
0f113f3e
MC
1516 else
1517 return -1;
6acb4ff3
AP
1518}
1519
fd3dbc1d
DSH
1520void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1521{
0f113f3e
MC
1522 CRYPTO_gcm128_finish(ctx, NULL, 0);
1523 memcpy(tag, ctx->Xi.c,
1524 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
fd3dbc1d
DSH
1525}
1526
6acb4ff3
AP
1527GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1528{
0f113f3e 1529 GCM128_CONTEXT *ret;
6acb4ff3 1530
90945fa3 1531 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
0f113f3e 1532 CRYPTO_gcm128_init(ret, key, block);
6acb4ff3 1533
0f113f3e 1534 return ret;
6acb4ff3
AP
1535}
1536
1537void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1538{
4b45c6e5 1539 OPENSSL_clear_free(ctx, sizeof(*ctx));
e7f5b1cd 1540}