]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/modes/gcm128.c
Update copyright year
[thirdparty/openssl.git] / crypto / modes / gcm128.c
CommitLineData
4f22f405 1/*
8020d79b 2 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
e7f5b1cd 3 *
81cae8ce 4 * Licensed under the Apache License 2.0 (the "License"). You may not use
4f22f405
RS
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
e7f5b1cd
AP
8 */
9
e7f5b1cd 10#include <string.h>
459b15d4 11#include <openssl/crypto.h>
24fd8541 12#include "internal/cryptlib.h"
e23d850f 13#include "internal/endian.h"
25f2138b 14#include "crypto/modes.h"
e7f5b1cd 15
77286fe3
BE
16#if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17typedef size_t size_t_aX __attribute((__aligned__(1)));
18#else
19typedef size_t size_t_aX;
20#endif
21
f472ec8c
AP
22#if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23/* redefine, because alignment is ensured */
0f113f3e
MC
24# undef GETU32
25# define GETU32(p) BSWAP4(*(const u32 *)(p))
26# undef PUTU32
27# define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
28#endif
29
30#define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
31#define REDUCE1BIT(V) do { \
32 if (sizeof(size_t)==8) { \
33 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
34 V.lo = (V.hi<<63)|(V.lo>>1); \
35 V.hi = (V.hi>>1 )^T; \
36 } \
37 else { \
38 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
39 V.lo = (V.hi<<63)|(V.lo>>1); \
40 V.hi = (V.hi>>1 )^((u64)T<<32); \
41 } \
c1f092d1
AP
42} while(0)
43
1d97c843 44/*-
d8d95832
AP
45 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46 * never be set to 8. 8 is effectively reserved for testing purposes.
47 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49 * whole spectrum of possible table driven implementations. Why? In
50 * non-"Shoup's" case memory access pattern is segmented in such manner,
51 * that it's trivial to see that cache timing information can reveal
52 * fair portion of intermediate hash value. Given that ciphertext is
53 * always available to attacker, it's possible for him to attempt to
54 * deduce secret parameter H and if successful, tamper with messages
55 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56 * not as trivial, but there is no reason to believe that it's resistant
57 * to cache-timing attack. And the thing about "8-bit" implementation is
58 * that it consumes 16 (sixteen) times more memory, 4KB per individual
59 * key + 1KB shared. Well, on pros side it should be twice as fast as
60 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61 * was observed to run ~75% faster, closer to 100% for commercial
62 * compilers... Yet "4-bit" procedure is preferred, because it's
63 * believed to provide better security-performance balance and adequate
64 * all-round performance. "All-round" refers to things like:
65 *
66 * - shorter setup time effectively improves overall timing for
67 * handling short messages;
68 * - larger table allocation can become unbearable because of VM
69 * subsystem penalties (for example on Windows large enough free
70 * results in VM working set trimming, meaning that consequent
71 * malloc would immediately incur working set expansion);
72 * - larger table has larger cache footprint, which can affect
73 * performance of other code paths (not necessarily even from same
74 * thread in Hyper-Threading world);
75 *
76 * Value of 1 is not appropriate for performance reasons.
77 */
0f113f3e 78#if TABLE_BITS==8
a595baff 79
e7f5b1cd
AP
80static void gcm_init_8bit(u128 Htable[256], u64 H[2])
81{
0f113f3e
MC
82 int i, j;
83 u128 V;
84
85 Htable[0].hi = 0;
86 Htable[0].lo = 0;
87 V.hi = H[0];
88 V.lo = H[1];
89
90 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
91 REDUCE1BIT(V);
92 Htable[i] = V;
93 }
94
95 for (i = 2; i < 256; i <<= 1) {
96 u128 *Hi = Htable + i, H0 = *Hi;
97 for (j = 1; j < i; ++j) {
98 Hi[j].hi = H0.hi ^ Htable[j].hi;
99 Hi[j].lo = H0.lo ^ Htable[j].lo;
100 }
101 }
e7f5b1cd
AP
102}
103
d8d95832 104static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
e7f5b1cd 105{
0f113f3e
MC
106 u128 Z = { 0, 0 };
107 const u8 *xi = (const u8 *)Xi + 15;
108 size_t rem, n = *xi;
e23d850f 109 DECLARE_IS_ENDIAN;
0f113f3e
MC
110 static const size_t rem_8bit[256] = {
111 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
175 };
176
177 while (1) {
178 Z.hi ^= Htable[n].hi;
179 Z.lo ^= Htable[n].lo;
180
181 if ((u8 *)Xi == xi)
182 break;
183
184 n = *(--xi);
185
186 rem = (size_t)Z.lo & 0xff;
187 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
188 Z.hi = (Z.hi >> 8);
189 if (sizeof(size_t) == 8)
190 Z.hi ^= rem_8bit[rem];
191 else
192 Z.hi ^= (u64)rem_8bit[rem] << 32;
193 }
194
e23d850f 195 if (IS_LITTLE_ENDIAN) {
0f113f3e
MC
196# ifdef BSWAP8
197 Xi[0] = BSWAP8(Z.hi);
198 Xi[1] = BSWAP8(Z.lo);
199# else
200 u8 *p = (u8 *)Xi;
201 u32 v;
202 v = (u32)(Z.hi >> 32);
203 PUTU32(p, v);
204 v = (u32)(Z.hi);
205 PUTU32(p + 4, v);
206 v = (u32)(Z.lo >> 32);
207 PUTU32(p + 8, v);
208 v = (u32)(Z.lo);
209 PUTU32(p + 12, v);
210# endif
211 } else {
212 Xi[0] = Z.hi;
213 Xi[1] = Z.lo;
214 }
e7f5b1cd 215}
e7f5b1cd 216
f5791af3 217# define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
0f113f3e
MC
218
219#elif TABLE_BITS==4
2262beef 220
e7f5b1cd
AP
221static void gcm_init_4bit(u128 Htable[16], u64 H[2])
222{
0f113f3e
MC
223 u128 V;
224# if defined(OPENSSL_SMALL_FOOTPRINT)
225 int i;
226# endif
e7f5b1cd 227
0f113f3e
MC
228 Htable[0].hi = 0;
229 Htable[0].lo = 0;
230 V.hi = H[0];
231 V.lo = H[1];
232
233# if defined(OPENSSL_SMALL_FOOTPRINT)
234 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
235 REDUCE1BIT(V);
236 Htable[i] = V;
237 }
238
239 for (i = 2; i < 16; i <<= 1) {
240 u128 *Hi = Htable + i;
241 int j;
242 for (V = *Hi, j = 1; j < i; ++j) {
243 Hi[j].hi = V.hi ^ Htable[j].hi;
244 Hi[j].lo = V.lo ^ Htable[j].lo;
245 }
246 }
247# else
248 Htable[8] = V;
249 REDUCE1BIT(V);
250 Htable[4] = V;
251 REDUCE1BIT(V);
252 Htable[2] = V;
253 REDUCE1BIT(V);
254 Htable[1] = V;
255 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
256 V = Htable[4];
257 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
258 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
259 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
260 V = Htable[8];
261 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
262 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
263 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
264 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
265 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
266 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
267 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
268# endif
269# if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
270 /*
271 * ARM assembler expects specific dword order in Htable.
272 */
273 {
274 int j;
e23d850f 275 DECLARE_IS_ENDIAN;
0f113f3e 276
e23d850f 277 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
278 for (j = 0; j < 16; ++j) {
279 V = Htable[j];
280 Htable[j].hi = V.lo;
281 Htable[j].lo = V.hi;
282 } else
283 for (j = 0; j < 16; ++j) {
284 V = Htable[j];
285 Htable[j].hi = V.lo << 32 | V.lo >> 32;
286 Htable[j].lo = V.hi << 32 | V.hi >> 32;
287 }
288 }
289# endif
e7f5b1cd
AP
290}
291
0f113f3e 292# ifndef GHASH_ASM
2262beef 293static const size_t rem_4bit[16] = {
0f113f3e
MC
294 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
295 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
296 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
297 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
298};
2262beef 299
4f39edbf 300static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
e7f5b1cd 301{
0f113f3e
MC
302 u128 Z;
303 int cnt = 15;
304 size_t rem, nlo, nhi;
e23d850f 305 DECLARE_IS_ENDIAN;
0f113f3e
MC
306
307 nlo = ((const u8 *)Xi)[15];
308 nhi = nlo >> 4;
309 nlo &= 0xf;
310
311 Z.hi = Htable[nlo].hi;
312 Z.lo = Htable[nlo].lo;
313
314 while (1) {
315 rem = (size_t)Z.lo & 0xf;
316 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
317 Z.hi = (Z.hi >> 4);
318 if (sizeof(size_t) == 8)
319 Z.hi ^= rem_4bit[rem];
320 else
321 Z.hi ^= (u64)rem_4bit[rem] << 32;
322
323 Z.hi ^= Htable[nhi].hi;
324 Z.lo ^= Htable[nhi].lo;
325
326 if (--cnt < 0)
327 break;
328
329 nlo = ((const u8 *)Xi)[cnt];
330 nhi = nlo >> 4;
331 nlo &= 0xf;
332
333 rem = (size_t)Z.lo & 0xf;
334 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
335 Z.hi = (Z.hi >> 4);
336 if (sizeof(size_t) == 8)
337 Z.hi ^= rem_4bit[rem];
338 else
339 Z.hi ^= (u64)rem_4bit[rem] << 32;
340
341 Z.hi ^= Htable[nlo].hi;
342 Z.lo ^= Htable[nlo].lo;
343 }
344
e23d850f 345 if (IS_LITTLE_ENDIAN) {
0f113f3e
MC
346# ifdef BSWAP8
347 Xi[0] = BSWAP8(Z.hi);
348 Xi[1] = BSWAP8(Z.lo);
349# else
350 u8 *p = (u8 *)Xi;
351 u32 v;
352 v = (u32)(Z.hi >> 32);
353 PUTU32(p, v);
354 v = (u32)(Z.hi);
355 PUTU32(p + 4, v);
356 v = (u32)(Z.lo >> 32);
357 PUTU32(p + 8, v);
358 v = (u32)(Z.lo);
359 PUTU32(p + 12, v);
360# endif
361 } else {
362 Xi[0] = Z.hi;
363 Xi[1] = Z.lo;
364 }
2262beef
AP
365}
366
0f113f3e 367# if !defined(OPENSSL_SMALL_FOOTPRINT)
2262beef
AP
368/*
369 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
a595baff
AP
370 * details... Compiler-generated code doesn't seem to give any
371 * performance improvement, at least not on x86[_64]. It's here
372 * mostly as reference and a placeholder for possible future
373 * non-trivial optimization[s]...
2262beef 374 */
0f113f3e
MC
375static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
376 const u8 *inp, size_t len)
2262beef
AP
377{
378 u128 Z;
379 int cnt;
380 size_t rem, nlo, nhi;
e23d850f 381 DECLARE_IS_ENDIAN;
0f113f3e
MC
382
383# if 1
2262beef 384 do {
0f113f3e
MC
385 cnt = 15;
386 nlo = ((const u8 *)Xi)[15];
387 nlo ^= inp[15];
388 nhi = nlo >> 4;
389 nlo &= 0xf;
390
391 Z.hi = Htable[nlo].hi;
392 Z.lo = Htable[nlo].lo;
393
394 while (1) {
395 rem = (size_t)Z.lo & 0xf;
396 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
397 Z.hi = (Z.hi >> 4);
398 if (sizeof(size_t) == 8)
399 Z.hi ^= rem_4bit[rem];
400 else
401 Z.hi ^= (u64)rem_4bit[rem] << 32;
402
403 Z.hi ^= Htable[nhi].hi;
404 Z.lo ^= Htable[nhi].lo;
405
406 if (--cnt < 0)
407 break;
408
409 nlo = ((const u8 *)Xi)[cnt];
410 nlo ^= inp[cnt];
411 nhi = nlo >> 4;
412 nlo &= 0xf;
413
414 rem = (size_t)Z.lo & 0xf;
415 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
416 Z.hi = (Z.hi >> 4);
417 if (sizeof(size_t) == 8)
418 Z.hi ^= rem_4bit[rem];
419 else
420 Z.hi ^= (u64)rem_4bit[rem] << 32;
421
422 Z.hi ^= Htable[nlo].hi;
423 Z.lo ^= Htable[nlo].lo;
424 }
425# else
e747f4d4
AP
426 /*
427 * Extra 256+16 bytes per-key plus 512 bytes shared tables
428 * [should] give ~50% improvement... One could have PACK()-ed
6acb4ff3
AP
429 * the rem_8bit even here, but the priority is to minimize
430 * cache footprint...
0f113f3e
MC
431 */
432 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
433 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
e747f4d4 434 static const unsigned short rem_8bit[256] = {
0f113f3e
MC
435 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
436 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
437 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
438 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
439 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
440 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
441 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
442 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
443 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
444 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
445 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
446 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
447 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
448 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
449 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
450 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
451 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
452 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
453 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
454 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
455 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
456 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
457 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
458 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
459 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
460 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
461 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
462 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
463 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
464 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
465 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
466 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
467 };
e747f4d4
AP
468 /*
469 * This pre-processing phase slows down procedure by approximately
470 * same time as it makes each loop spin faster. In other words
471 * single block performance is approximately same as straightforward
472 * "4-bit" implementation, and then it goes only faster...
473 */
0f113f3e
MC
474 for (cnt = 0; cnt < 16; ++cnt) {
475 Z.hi = Htable[cnt].hi;
476 Z.lo = Htable[cnt].lo;
477 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
478 Hshr4[cnt].hi = (Z.hi >> 4);
479 Hshl4[cnt] = (u8)(Z.lo << 4);
e747f4d4
AP
480 }
481
482 do {
0f113f3e
MC
483 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
484 nlo = ((const u8 *)Xi)[cnt];
485 nlo ^= inp[cnt];
486 nhi = nlo >> 4;
487 nlo &= 0xf;
e747f4d4 488
0f113f3e
MC
489 Z.hi ^= Htable[nlo].hi;
490 Z.lo ^= Htable[nlo].lo;
e747f4d4 491
0f113f3e 492 rem = (size_t)Z.lo & 0xff;
e747f4d4 493
0f113f3e
MC
494 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
495 Z.hi = (Z.hi >> 8);
e747f4d4 496
0f113f3e
MC
497 Z.hi ^= Hshr4[nhi].hi;
498 Z.lo ^= Hshr4[nhi].lo;
499 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
500 }
e747f4d4 501
0f113f3e
MC
502 nlo = ((const u8 *)Xi)[0];
503 nlo ^= inp[0];
504 nhi = nlo >> 4;
505 nlo &= 0xf;
e747f4d4 506
0f113f3e
MC
507 Z.hi ^= Htable[nlo].hi;
508 Z.lo ^= Htable[nlo].lo;
e747f4d4 509
0f113f3e 510 rem = (size_t)Z.lo & 0xf;
e747f4d4 511
0f113f3e
MC
512 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
513 Z.hi = (Z.hi >> 4);
e747f4d4 514
0f113f3e
MC
515 Z.hi ^= Htable[nhi].hi;
516 Z.lo ^= Htable[nhi].lo;
517 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
518# endif
e7f5b1cd 519
e23d850f 520 if (IS_LITTLE_ENDIAN) {
0f113f3e
MC
521# ifdef BSWAP8
522 Xi[0] = BSWAP8(Z.hi);
523 Xi[1] = BSWAP8(Z.lo);
524# else
525 u8 *p = (u8 *)Xi;
526 u32 v;
527 v = (u32)(Z.hi >> 32);
528 PUTU32(p, v);
529 v = (u32)(Z.hi);
530 PUTU32(p + 4, v);
531 v = (u32)(Z.lo >> 32);
532 PUTU32(p + 8, v);
533 v = (u32)(Z.lo);
534 PUTU32(p + 12, v);
535# endif
536 } else {
537 Xi[0] = Z.hi;
538 Xi[1] = Z.lo;
539 }
540 } while (inp += 16, len -= 16);
e7f5b1cd 541}
0f113f3e
MC
542# endif
543# else
544void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
545void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
546 size_t len);
547# endif
2262beef 548
f5791af3 549# define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
0f113f3e
MC
550# if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
551# define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
552/*
553 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
554 * effect. In other words idea is to hash data while it's still in L1 cache
555 * after encryption pass...
556 */
557# define GHASH_CHUNK (3*1024)
558# endif
2262beef 559
0f113f3e 560#else /* TABLE_BITS */
e7f5b1cd 561
0f113f3e 562static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
e7f5b1cd 563{
0f113f3e
MC
564 u128 V, Z = { 0, 0 };
565 long X;
566 int i, j;
567 const long *xi = (const long *)Xi;
e23d850f 568 DECLARE_IS_ENDIAN;
0f113f3e
MC
569
570 V.hi = H[0]; /* H is in host byte order, no byte swapping */
571 V.lo = H[1];
572
573 for (j = 0; j < 16 / sizeof(long); ++j) {
e23d850f 574 if (IS_LITTLE_ENDIAN) {
0f113f3e
MC
575 if (sizeof(long) == 8) {
576# ifdef BSWAP8
577 X = (long)(BSWAP8(xi[j]));
578# else
579 const u8 *p = (const u8 *)(xi + j);
580 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
581# endif
582 } else {
583 const u8 *p = (const u8 *)(xi + j);
584 X = (long)GETU32(p);
585 }
586 } else
587 X = xi[j];
588
589 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
590 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
591 Z.hi ^= V.hi & M;
592 Z.lo ^= V.lo & M;
593
594 REDUCE1BIT(V);
595 }
596 }
597
e23d850f 598 if (IS_LITTLE_ENDIAN) {
0f113f3e
MC
599# ifdef BSWAP8
600 Xi[0] = BSWAP8(Z.hi);
601 Xi[1] = BSWAP8(Z.lo);
602# else
603 u8 *p = (u8 *)Xi;
604 u32 v;
605 v = (u32)(Z.hi >> 32);
606 PUTU32(p, v);
607 v = (u32)(Z.hi);
608 PUTU32(p + 4, v);
609 v = (u32)(Z.lo >> 32);
610 PUTU32(p + 8, v);
611 v = (u32)(Z.lo);
612 PUTU32(p + 12, v);
613# endif
614 } else {
615 Xi[0] = Z.hi;
616 Xi[1] = Z.lo;
617 }
e7f5b1cd 618}
0f113f3e 619
f5791af3 620# define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
a595baff 621
e7f5b1cd
AP
622#endif
623
0f113f3e
MC
624#if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
625# if !defined(I386_ONLY) && \
626 (defined(__i386) || defined(__i386__) || \
627 defined(__x86_64) || defined(__x86_64__) || \
628 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
1e863180
AP
629# define GHASH_ASM_X86_OR_64
630# define GCM_FUNCREF_4BIT
c1f092d1 631
0f113f3e
MC
632void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
633void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
634void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
635 size_t len);
c1f092d1 636
0f113f3e
MC
637# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
638# define gcm_init_avx gcm_init_clmul
639# define gcm_gmult_avx gcm_gmult_clmul
640# define gcm_ghash_avx gcm_ghash_clmul
641# else
642void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
643void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
644void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
645 size_t len);
646# endif
1da5d302 647
0f113f3e 648# if defined(__i386) || defined(__i386__) || defined(_M_IX86)
1e863180 649# define GHASH_ASM_X86
0f113f3e
MC
650void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
651void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
652 size_t len);
c1f092d1 653
0f113f3e
MC
654void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
655void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
656 size_t len);
1e863180 657# endif
82741e9c 658# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
1e863180 659# include "arm_arch.h"
c1669e1c 660# if __ARM_MAX_ARCH__>=7
1e863180
AP
661# define GHASH_ASM_ARM
662# define GCM_FUNCREF_4BIT
0f113f3e 663# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
82741e9c 664# if defined(__arm__) || defined(__arm)
0f113f3e 665# define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
82741e9c 666# endif
0f113f3e
MC
667void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
668void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
669void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670 size_t len);
671void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
672void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
673void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
674 size_t len);
1e863180 675# endif
23328d4b
AP
676# elif defined(__sparc__) || defined(__sparc)
677# include "sparc_arch.h"
678# define GHASH_ASM_SPARC
679# define GCM_FUNCREF_4BIT
680extern unsigned int OPENSSL_sparcv9cap_P[];
0f113f3e
MC
681void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
682void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
683void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
684 size_t len);
685# elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
0e716d92
AP
686# include "ppc_arch.h"
687# define GHASH_ASM_PPC
688# define GCM_FUNCREF_4BIT
0f113f3e
MC
689void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
690void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
691void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
692 size_t len);
c1f092d1 693# endif
c1f092d1
AP
694#endif
695
7af04002
AP
696#ifdef GCM_FUNCREF_4BIT
697# undef GCM_MUL
f5791af3 698# define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
7af04002
AP
699# ifdef GHASH
700# undef GHASH
0f113f3e 701# define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
7af04002
AP
702# endif
703#endif
704
0f113f3e 705void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
e7f5b1cd 706{
e23d850f 707 DECLARE_IS_ENDIAN;
e7f5b1cd 708
0f113f3e
MC
709 memset(ctx, 0, sizeof(*ctx));
710 ctx->block = block;
711 ctx->key = key;
e7f5b1cd 712
0f113f3e 713 (*block) (ctx->H.c, ctx->H.c, key);
e7f5b1cd 714
e23d850f 715 if (IS_LITTLE_ENDIAN) {
0f113f3e 716 /* H is stored in host byte order */
e7f5b1cd 717#ifdef BSWAP8
0f113f3e
MC
718 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
719 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
e7f5b1cd 720#else
0f113f3e
MC
721 u8 *p = ctx->H.c;
722 u64 hi, lo;
723 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
724 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
725 ctx->H.u[0] = hi;
726 ctx->H.u[1] = lo;
e7f5b1cd 727#endif
0f113f3e
MC
728 }
729#if TABLE_BITS==8
730 gcm_init_8bit(ctx->Htable, ctx->H.u);
731#elif TABLE_BITS==4
2e635aa8
AP
732# if defined(GHASH)
733# define CTX__GHASH(f) (ctx->ghash = (f))
734# else
735# define CTX__GHASH(f) (ctx->ghash = NULL)
736# endif
0f113f3e
MC
737# if defined(GHASH_ASM_X86_OR_64)
738# if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
6e5a853b 739 if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
0f113f3e
MC
740 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
741 gcm_init_avx(ctx->Htable, ctx->H.u);
742 ctx->gmult = gcm_gmult_avx;
2e635aa8 743 CTX__GHASH(gcm_ghash_avx);
0f113f3e
MC
744 } else {
745 gcm_init_clmul(ctx->Htable, ctx->H.u);
746 ctx->gmult = gcm_gmult_clmul;
2e635aa8 747 CTX__GHASH(gcm_ghash_clmul);
0f113f3e
MC
748 }
749 return;
750 }
a6d915e0 751# endif
0f113f3e
MC
752 gcm_init_4bit(ctx->Htable, ctx->H.u);
753# if defined(GHASH_ASM_X86) /* x86 only */
754# if defined(OPENSSL_IA32_SSE2)
755 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
98909c1d 756# else
0f113f3e 757 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
98909c1d 758# endif
0f113f3e 759 ctx->gmult = gcm_gmult_4bit_mmx;
2e635aa8 760 CTX__GHASH(gcm_ghash_4bit_mmx);
0f113f3e
MC
761 } else {
762 ctx->gmult = gcm_gmult_4bit_x86;
2e635aa8 763 CTX__GHASH(gcm_ghash_4bit_x86);
0f113f3e 764 }
c1f092d1 765# else
0f113f3e 766 ctx->gmult = gcm_gmult_4bit;
2e635aa8 767 CTX__GHASH(gcm_ghash_4bit);
c1f092d1 768# endif
0f113f3e 769# elif defined(GHASH_ASM_ARM)
82741e9c 770# ifdef PMULL_CAPABLE
0f113f3e
MC
771 if (PMULL_CAPABLE) {
772 gcm_init_v8(ctx->Htable, ctx->H.u);
773 ctx->gmult = gcm_gmult_v8;
2e635aa8 774 CTX__GHASH(gcm_ghash_v8);
0f113f3e 775 } else
82741e9c
AP
776# endif
777# ifdef NEON_CAPABLE
0f113f3e
MC
778 if (NEON_CAPABLE) {
779 gcm_init_neon(ctx->Htable, ctx->H.u);
780 ctx->gmult = gcm_gmult_neon;
2e635aa8 781 CTX__GHASH(gcm_ghash_neon);
0f113f3e 782 } else
82741e9c 783# endif
0f113f3e
MC
784 {
785 gcm_init_4bit(ctx->Htable, ctx->H.u);
786 ctx->gmult = gcm_gmult_4bit;
2e635aa8 787 CTX__GHASH(gcm_ghash_4bit);
0f113f3e
MC
788 }
789# elif defined(GHASH_ASM_SPARC)
790 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
791 gcm_init_vis3(ctx->Htable, ctx->H.u);
792 ctx->gmult = gcm_gmult_vis3;
2e635aa8 793 CTX__GHASH(gcm_ghash_vis3);
0f113f3e
MC
794 } else {
795 gcm_init_4bit(ctx->Htable, ctx->H.u);
796 ctx->gmult = gcm_gmult_4bit;
2e635aa8 797 CTX__GHASH(gcm_ghash_4bit);
0f113f3e
MC
798 }
799# elif defined(GHASH_ASM_PPC)
800 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
801 gcm_init_p8(ctx->Htable, ctx->H.u);
802 ctx->gmult = gcm_gmult_p8;
2e635aa8 803 CTX__GHASH(gcm_ghash_p8);
0f113f3e
MC
804 } else {
805 gcm_init_4bit(ctx->Htable, ctx->H.u);
806 ctx->gmult = gcm_gmult_4bit;
2e635aa8 807 CTX__GHASH(gcm_ghash_4bit);
0f113f3e 808 }
c1f092d1 809# else
0f113f3e 810 gcm_init_4bit(ctx->Htable, ctx->H.u);
c1f092d1 811# endif
2e635aa8 812# undef CTX__GHASH
a595baff 813#endif
e7f5b1cd
AP
814}
815
0f113f3e
MC
816void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
817 size_t len)
e7f5b1cd 818{
e23d850f 819 DECLARE_IS_ENDIAN;
0f113f3e 820 unsigned int ctr;
d8d95832 821#ifdef GCM_FUNCREF_4BIT
0f113f3e
MC
822 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
823#endif
824
0f113f3e
MC
825 ctx->len.u[0] = 0; /* AAD length */
826 ctx->len.u[1] = 0; /* message length */
827 ctx->ares = 0;
828 ctx->mres = 0;
829
830 if (len == 12) {
831 memcpy(ctx->Yi.c, iv, 12);
f5791af3
AP
832 ctx->Yi.c[12] = 0;
833 ctx->Yi.c[13] = 0;
834 ctx->Yi.c[14] = 0;
0f113f3e
MC
835 ctx->Yi.c[15] = 1;
836 ctr = 1;
837 } else {
838 size_t i;
839 u64 len0 = len;
840
f5791af3
AP
841 /* Borrow ctx->Xi to calculate initial Yi */
842 ctx->Xi.u[0] = 0;
843 ctx->Xi.u[1] = 0;
844
0f113f3e
MC
845 while (len >= 16) {
846 for (i = 0; i < 16; ++i)
f5791af3
AP
847 ctx->Xi.c[i] ^= iv[i];
848 GCM_MUL(ctx);
0f113f3e
MC
849 iv += 16;
850 len -= 16;
851 }
852 if (len) {
853 for (i = 0; i < len; ++i)
f5791af3
AP
854 ctx->Xi.c[i] ^= iv[i];
855 GCM_MUL(ctx);
0f113f3e
MC
856 }
857 len0 <<= 3;
e23d850f 858 if (IS_LITTLE_ENDIAN) {
e7f5b1cd 859#ifdef BSWAP8
f5791af3 860 ctx->Xi.u[1] ^= BSWAP8(len0);
e7f5b1cd 861#else
f5791af3
AP
862 ctx->Xi.c[8] ^= (u8)(len0 >> 56);
863 ctx->Xi.c[9] ^= (u8)(len0 >> 48);
864 ctx->Xi.c[10] ^= (u8)(len0 >> 40);
865 ctx->Xi.c[11] ^= (u8)(len0 >> 32);
866 ctx->Xi.c[12] ^= (u8)(len0 >> 24);
867 ctx->Xi.c[13] ^= (u8)(len0 >> 16);
868 ctx->Xi.c[14] ^= (u8)(len0 >> 8);
869 ctx->Xi.c[15] ^= (u8)(len0);
e7f5b1cd 870#endif
f5791af3
AP
871 } else {
872 ctx->Xi.u[1] ^= len0;
873 }
e7f5b1cd 874
f5791af3 875 GCM_MUL(ctx);
e7f5b1cd 876
e23d850f 877 if (IS_LITTLE_ENDIAN)
997d1aac 878#ifdef BSWAP4
f5791af3 879 ctr = BSWAP4(ctx->Xi.d[3]);
997d1aac 880#else
f5791af3 881 ctr = GETU32(ctx->Xi.c + 12);
997d1aac 882#endif
0f113f3e 883 else
f5791af3
AP
884 ctr = ctx->Xi.d[3];
885
886 /* Copy borrowed Xi to Yi */
887 ctx->Yi.u[0] = ctx->Xi.u[0];
888 ctx->Yi.u[1] = ctx->Xi.u[1];
0f113f3e 889 }
e7f5b1cd 890
f5791af3
AP
891 ctx->Xi.u[0] = 0;
892 ctx->Xi.u[1] = 0;
893
0f113f3e
MC
894 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
895 ++ctr;
e23d850f 896 if (IS_LITTLE_ENDIAN)
997d1aac 897#ifdef BSWAP4
0f113f3e 898 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 899#else
0f113f3e 900 PUTU32(ctx->Yi.c + 12, ctr);
997d1aac 901#endif
0f113f3e
MC
902 else
903 ctx->Yi.d[3] = ctr;
e7f5b1cd
AP
904}
905
0f113f3e
MC
906int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
907 size_t len)
e7f5b1cd 908{
0f113f3e
MC
909 size_t i;
910 unsigned int n;
911 u64 alen = ctx->len.u[0];
d8d95832 912#ifdef GCM_FUNCREF_4BIT
0f113f3e 913 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
d8d95832 914# ifdef GHASH
0f113f3e
MC
915 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
916 const u8 *inp, size_t len) = ctx->ghash;
d8d95832
AP
917# endif
918#endif
e7f5b1cd 919
0f113f3e
MC
920 if (ctx->len.u[1])
921 return -2;
922
923 alen += len;
924 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
925 return -1;
926 ctx->len.u[0] = alen;
927
928 n = ctx->ares;
929 if (n) {
930 while (n && len) {
931 ctx->Xi.c[n] ^= *(aad++);
932 --len;
933 n = (n + 1) % 16;
934 }
935 if (n == 0)
f5791af3 936 GCM_MUL(ctx);
0f113f3e
MC
937 else {
938 ctx->ares = n;
939 return 0;
940 }
941 }
2262beef 942#ifdef GHASH
0f113f3e
MC
943 if ((i = (len & (size_t)-16))) {
944 GHASH(ctx, aad, i);
945 aad += i;
946 len -= i;
947 }
2262beef 948#else
0f113f3e
MC
949 while (len >= 16) {
950 for (i = 0; i < 16; ++i)
951 ctx->Xi.c[i] ^= aad[i];
f5791af3 952 GCM_MUL(ctx);
0f113f3e
MC
953 aad += 16;
954 len -= 16;
955 }
2262beef 956#endif
0f113f3e
MC
957 if (len) {
958 n = (unsigned int)len;
959 for (i = 0; i < len; ++i)
960 ctx->Xi.c[i] ^= aad[i];
961 }
b68c1315 962
0f113f3e
MC
963 ctx->ares = n;
964 return 0;
e7f5b1cd
AP
965}
966
1f2502eb 967int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
0f113f3e
MC
968 const unsigned char *in, unsigned char *out,
969 size_t len)
e7f5b1cd 970{
e23d850f 971 DECLARE_IS_ENDIAN;
c1b2569d 972 unsigned int n, ctr, mres;
0f113f3e
MC
973 size_t i;
974 u64 mlen = ctx->len.u[1];
975 block128_f block = ctx->block;
976 void *key = ctx->key;
d8d95832 977#ifdef GCM_FUNCREF_4BIT
0f113f3e 978 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
2e635aa8 979# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
0f113f3e
MC
980 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
981 const u8 *inp, size_t len) = ctx->ghash;
d8d95832
AP
982# endif
983#endif
1f2502eb 984
0f113f3e
MC
985 mlen += len;
986 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
987 return -1;
988 ctx->len.u[1] = mlen;
e7f5b1cd 989
c1b2569d
AP
990 mres = ctx->mres;
991
0f113f3e
MC
992 if (ctx->ares) {
993 /* First call to encrypt finalizes GHASH(AAD) */
c1b2569d
AP
994#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
995 if (len == 0) {
996 GCM_MUL(ctx);
997 ctx->ares = 0;
998 return 0;
999 }
1000 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1001 ctx->Xi.u[0] = 0;
1002 ctx->Xi.u[1] = 0;
1003 mres = sizeof(ctx->Xi);
1004#else
f5791af3 1005 GCM_MUL(ctx);
c1b2569d 1006#endif
0f113f3e
MC
1007 ctx->ares = 0;
1008 }
96a4cf8c 1009
e23d850f 1010 if (IS_LITTLE_ENDIAN)
997d1aac 1011#ifdef BSWAP4
0f113f3e 1012 ctr = BSWAP4(ctx->Yi.d[3]);
997d1aac 1013#else
0f113f3e 1014 ctr = GETU32(ctx->Yi.c + 12);
997d1aac 1015#endif
0f113f3e
MC
1016 else
1017 ctr = ctx->Yi.d[3];
96a4cf8c 1018
c1b2569d 1019 n = mres % 16;
0f113f3e
MC
1020#if !defined(OPENSSL_SMALL_FOOTPRINT)
1021 if (16 % sizeof(size_t) == 0) { /* always true actually */
1022 do {
1023 if (n) {
c1b2569d
AP
1024# if defined(GHASH)
1025 while (n && len) {
1026 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1027 --len;
1028 n = (n + 1) % 16;
1029 }
1030 if (n == 0) {
1031 GHASH(ctx, ctx->Xn, mres);
1032 mres = 0;
1033 } else {
1034 ctx->mres = mres;
1035 return 0;
1036 }
1037# else
0f113f3e
MC
1038 while (n && len) {
1039 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1040 --len;
1041 n = (n + 1) % 16;
1042 }
c1b2569d 1043 if (n == 0) {
f5791af3 1044 GCM_MUL(ctx);
c1b2569d
AP
1045 mres = 0;
1046 } else {
0f113f3e
MC
1047 ctx->mres = n;
1048 return 0;
1049 }
c1b2569d 1050# endif
0f113f3e
MC
1051 }
1052# if defined(STRICT_ALIGNMENT)
1053 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1054 break;
1055# endif
2e635aa8 1056# if defined(GHASH)
c1b2569d
AP
1057 if (len >= 16 && mres) {
1058 GHASH(ctx, ctx->Xn, mres);
1059 mres = 0;
1060 }
2e635aa8 1061# if defined(GHASH_CHUNK)
0f113f3e
MC
1062 while (len >= GHASH_CHUNK) {
1063 size_t j = GHASH_CHUNK;
1064
1065 while (j) {
77286fe3
BE
1066 size_t_aX *out_t = (size_t_aX *)out;
1067 const size_t_aX *in_t = (const size_t_aX *)in;
0f113f3e
MC
1068
1069 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1070 ++ctr;
e23d850f 1071 if (IS_LITTLE_ENDIAN)
2e635aa8 1072# ifdef BSWAP4
0f113f3e 1073 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1074# else
0f113f3e 1075 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1076# endif
0f113f3e
MC
1077 else
1078 ctx->Yi.d[3] = ctr;
1079 for (i = 0; i < 16 / sizeof(size_t); ++i)
1080 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1081 out += 16;
1082 in += 16;
1083 j -= 16;
1084 }
1085 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1086 len -= GHASH_CHUNK;
1087 }
2e635aa8 1088# endif
0f113f3e
MC
1089 if ((i = (len & (size_t)-16))) {
1090 size_t j = i;
1091
1092 while (len >= 16) {
77286fe3
BE
1093 size_t_aX *out_t = (size_t_aX *)out;
1094 const size_t_aX *in_t = (const size_t_aX *)in;
0f113f3e
MC
1095
1096 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1097 ++ctr;
e23d850f 1098 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
1099# ifdef BSWAP4
1100 ctx->Yi.d[3] = BSWAP4(ctr);
1101# else
1102 PUTU32(ctx->Yi.c + 12, ctr);
1103# endif
1104 else
1105 ctx->Yi.d[3] = ctr;
1106 for (i = 0; i < 16 / sizeof(size_t); ++i)
1107 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1108 out += 16;
1109 in += 16;
1110 len -= 16;
1111 }
1112 GHASH(ctx, out - j, j);
1113 }
1114# else
1115 while (len >= 16) {
1116 size_t *out_t = (size_t *)out;
1117 const size_t *in_t = (const size_t *)in;
1118
1119 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1120 ++ctr;
e23d850f 1121 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
1122# ifdef BSWAP4
1123 ctx->Yi.d[3] = BSWAP4(ctr);
1124# else
1125 PUTU32(ctx->Yi.c + 12, ctr);
1126# endif
1127 else
1128 ctx->Yi.d[3] = ctr;
1129 for (i = 0; i < 16 / sizeof(size_t); ++i)
1130 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
f5791af3 1131 GCM_MUL(ctx);
0f113f3e
MC
1132 out += 16;
1133 in += 16;
1134 len -= 16;
1135 }
1136# endif
1137 if (len) {
1138 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1139 ++ctr;
e23d850f 1140 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
1141# ifdef BSWAP4
1142 ctx->Yi.d[3] = BSWAP4(ctr);
1143# else
1144 PUTU32(ctx->Yi.c + 12, ctr);
1145# endif
1146 else
1147 ctx->Yi.d[3] = ctr;
c1b2569d
AP
1148# if defined(GHASH)
1149 while (len--) {
1150 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1151 ++n;
1152 }
1153# else
0f113f3e
MC
1154 while (len--) {
1155 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1156 ++n;
1157 }
c1b2569d
AP
1158 mres = n;
1159# endif
0f113f3e
MC
1160 }
1161
c1b2569d 1162 ctx->mres = mres;
0f113f3e
MC
1163 return 0;
1164 } while (0);
1165 }
e7f5b1cd 1166#endif
0f113f3e
MC
1167 for (i = 0; i < len; ++i) {
1168 if (n == 0) {
1169 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1170 ++ctr;
e23d850f 1171 if (IS_LITTLE_ENDIAN)
997d1aac 1172#ifdef BSWAP4
0f113f3e 1173 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 1174#else
0f113f3e
MC
1175 PUTU32(ctx->Yi.c + 12, ctr);
1176#endif
1177 else
1178 ctx->Yi.d[3] = ctr;
1179 }
c1b2569d
AP
1180#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1181 ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
0f113f3e 1182 n = (n + 1) % 16;
c1b2569d
AP
1183 if (mres == sizeof(ctx->Xn)) {
1184 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1185 mres = 0;
1186 }
1187#else
1188 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1189 mres = n = (n + 1) % 16;
0f113f3e 1190 if (n == 0)
f5791af3 1191 GCM_MUL(ctx);
c1b2569d 1192#endif
0f113f3e
MC
1193 }
1194
c1b2569d 1195 ctx->mres = mres;
0f113f3e 1196 return 0;
e7f5b1cd
AP
1197}
1198
1f2502eb 1199int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
0f113f3e
MC
1200 const unsigned char *in, unsigned char *out,
1201 size_t len)
e7f5b1cd 1202{
e23d850f 1203 DECLARE_IS_ENDIAN;
c1b2569d 1204 unsigned int n, ctr, mres;
0f113f3e
MC
1205 size_t i;
1206 u64 mlen = ctx->len.u[1];
1207 block128_f block = ctx->block;
1208 void *key = ctx->key;
d8d95832 1209#ifdef GCM_FUNCREF_4BIT
0f113f3e 1210 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
2e635aa8 1211# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
0f113f3e
MC
1212 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1213 const u8 *inp, size_t len) = ctx->ghash;
d8d95832
AP
1214# endif
1215#endif
1f2502eb 1216
0f113f3e
MC
1217 mlen += len;
1218 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1219 return -1;
1220 ctx->len.u[1] = mlen;
e7f5b1cd 1221
c1b2569d
AP
1222 mres = ctx->mres;
1223
0f113f3e
MC
1224 if (ctx->ares) {
1225 /* First call to decrypt finalizes GHASH(AAD) */
c1b2569d
AP
1226#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1227 if (len == 0) {
1228 GCM_MUL(ctx);
1229 ctx->ares = 0;
1230 return 0;
1231 }
1232 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1233 ctx->Xi.u[0] = 0;
1234 ctx->Xi.u[1] = 0;
1235 mres = sizeof(ctx->Xi);
1236#else
f5791af3 1237 GCM_MUL(ctx);
c1b2569d 1238#endif
0f113f3e
MC
1239 ctx->ares = 0;
1240 }
b68c1315 1241
e23d850f 1242 if (IS_LITTLE_ENDIAN)
997d1aac 1243#ifdef BSWAP4
0f113f3e 1244 ctr = BSWAP4(ctx->Yi.d[3]);
997d1aac 1245#else
0f113f3e 1246 ctr = GETU32(ctx->Yi.c + 12);
997d1aac 1247#endif
0f113f3e
MC
1248 else
1249 ctr = ctx->Yi.d[3];
e7f5b1cd 1250
c1b2569d 1251 n = mres % 16;
e7f5b1cd 1252#if !defined(OPENSSL_SMALL_FOOTPRINT)
0f113f3e
MC
1253 if (16 % sizeof(size_t) == 0) { /* always true actually */
1254 do {
1255 if (n) {
c1b2569d
AP
1256# if defined(GHASH)
1257 while (n && len) {
1258 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1259 --len;
1260 n = (n + 1) % 16;
1261 }
1262 if (n == 0) {
1263 GHASH(ctx, ctx->Xn, mres);
1264 mres = 0;
1265 } else {
1266 ctx->mres = mres;
1267 return 0;
1268 }
1269# else
0f113f3e
MC
1270 while (n && len) {
1271 u8 c = *(in++);
1272 *(out++) = c ^ ctx->EKi.c[n];
1273 ctx->Xi.c[n] ^= c;
1274 --len;
1275 n = (n + 1) % 16;
1276 }
c1b2569d 1277 if (n == 0) {
f5791af3 1278 GCM_MUL(ctx);
c1b2569d
AP
1279 mres = 0;
1280 } else {
0f113f3e
MC
1281 ctx->mres = n;
1282 return 0;
1283 }
c1b2569d 1284# endif
0f113f3e
MC
1285 }
1286# if defined(STRICT_ALIGNMENT)
1287 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1288 break;
1289# endif
2e635aa8 1290# if defined(GHASH)
c1b2569d
AP
1291 if (len >= 16 && mres) {
1292 GHASH(ctx, ctx->Xn, mres);
1293 mres = 0;
1294 }
2e635aa8 1295# if defined(GHASH_CHUNK)
0f113f3e
MC
1296 while (len >= GHASH_CHUNK) {
1297 size_t j = GHASH_CHUNK;
1298
1299 GHASH(ctx, in, GHASH_CHUNK);
1300 while (j) {
77286fe3
BE
1301 size_t_aX *out_t = (size_t_aX *)out;
1302 const size_t_aX *in_t = (const size_t_aX *)in;
0f113f3e
MC
1303
1304 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1305 ++ctr;
e23d850f 1306 if (IS_LITTLE_ENDIAN)
2e635aa8 1307# ifdef BSWAP4
0f113f3e 1308 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1309# else
0f113f3e 1310 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1311# endif
0f113f3e
MC
1312 else
1313 ctx->Yi.d[3] = ctr;
1314 for (i = 0; i < 16 / sizeof(size_t); ++i)
1315 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1316 out += 16;
1317 in += 16;
1318 j -= 16;
1319 }
1320 len -= GHASH_CHUNK;
1321 }
2e635aa8 1322# endif
0f113f3e
MC
1323 if ((i = (len & (size_t)-16))) {
1324 GHASH(ctx, in, i);
1325 while (len >= 16) {
77286fe3
BE
1326 size_t_aX *out_t = (size_t_aX *)out;
1327 const size_t_aX *in_t = (const size_t_aX *)in;
0f113f3e
MC
1328
1329 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1330 ++ctr;
e23d850f 1331 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
1332# ifdef BSWAP4
1333 ctx->Yi.d[3] = BSWAP4(ctr);
1334# else
1335 PUTU32(ctx->Yi.c + 12, ctr);
1336# endif
1337 else
1338 ctx->Yi.d[3] = ctr;
1339 for (i = 0; i < 16 / sizeof(size_t); ++i)
1340 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1341 out += 16;
1342 in += 16;
1343 len -= 16;
1344 }
1345 }
1346# else
1347 while (len >= 16) {
1348 size_t *out_t = (size_t *)out;
1349 const size_t *in_t = (const size_t *)in;
1350
1351 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1352 ++ctr;
e23d850f 1353 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
1354# ifdef BSWAP4
1355 ctx->Yi.d[3] = BSWAP4(ctr);
1356# else
1357 PUTU32(ctx->Yi.c + 12, ctr);
1358# endif
1359 else
1360 ctx->Yi.d[3] = ctr;
1361 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1d724b5e
ZJ
1362 size_t c = in_t[i];
1363 out_t[i] = c ^ ctx->EKi.t[i];
0f113f3e
MC
1364 ctx->Xi.t[i] ^= c;
1365 }
f5791af3 1366 GCM_MUL(ctx);
0f113f3e
MC
1367 out += 16;
1368 in += 16;
1369 len -= 16;
1370 }
1371# endif
1372 if (len) {
1373 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1374 ++ctr;
e23d850f 1375 if (IS_LITTLE_ENDIAN)
0f113f3e
MC
1376# ifdef BSWAP4
1377 ctx->Yi.d[3] = BSWAP4(ctr);
1378# else
1379 PUTU32(ctx->Yi.c + 12, ctr);
1380# endif
1381 else
1382 ctx->Yi.d[3] = ctr;
c1b2569d
AP
1383# if defined(GHASH)
1384 while (len--) {
1385 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1386 ++n;
1387 }
1388# else
0f113f3e
MC
1389 while (len--) {
1390 u8 c = in[n];
1391 ctx->Xi.c[n] ^= c;
1392 out[n] = c ^ ctx->EKi.c[n];
1393 ++n;
1394 }
c1b2569d
AP
1395 mres = n;
1396# endif
0f113f3e
MC
1397 }
1398
c1b2569d 1399 ctx->mres = mres;
0f113f3e
MC
1400 return 0;
1401 } while (0);
1402 }
997d1aac 1403#endif
0f113f3e
MC
1404 for (i = 0; i < len; ++i) {
1405 u8 c;
1406 if (n == 0) {
1407 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1408 ++ctr;
e23d850f 1409 if (IS_LITTLE_ENDIAN)
997d1aac 1410#ifdef BSWAP4
0f113f3e 1411 ctx->Yi.d[3] = BSWAP4(ctr);
997d1aac 1412#else
0f113f3e
MC
1413 PUTU32(ctx->Yi.c + 12, ctr);
1414#endif
1415 else
1416 ctx->Yi.d[3] = ctr;
1417 }
c1b2569d
AP
1418#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1419 out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1420 n = (n + 1) % 16;
1421 if (mres == sizeof(ctx->Xn)) {
1422 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1423 mres = 0;
1424 }
1425#else
0f113f3e
MC
1426 c = in[i];
1427 out[i] = c ^ ctx->EKi.c[n];
1428 ctx->Xi.c[n] ^= c;
c1b2569d 1429 mres = n = (n + 1) % 16;
0f113f3e 1430 if (n == 0)
f5791af3 1431 GCM_MUL(ctx);
c1b2569d 1432#endif
0f113f3e 1433 }
96a4cf8c 1434
c1b2569d 1435 ctx->mres = mres;
0f113f3e 1436 return 0;
e7f5b1cd
AP
1437}
1438
1f2502eb 1439int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
0f113f3e
MC
1440 const unsigned char *in, unsigned char *out,
1441 size_t len, ctr128_f stream)
f71c6ace 1442{
2e635aa8
AP
1443#if defined(OPENSSL_SMALL_FOOTPRINT)
1444 return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1445#else
e23d850f 1446 DECLARE_IS_ENDIAN;
c1b2569d 1447 unsigned int n, ctr, mres;
0f113f3e
MC
1448 size_t i;
1449 u64 mlen = ctx->len.u[1];
1450 void *key = ctx->key;
2e635aa8 1451# ifdef GCM_FUNCREF_4BIT
0f113f3e 1452 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
2e635aa8 1453# ifdef GHASH
0f113f3e
MC
1454 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1455 const u8 *inp, size_t len) = ctx->ghash;
2e635aa8 1456# endif
d8d95832 1457# endif
1f2502eb 1458
0f113f3e
MC
1459 mlen += len;
1460 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1461 return -1;
1462 ctx->len.u[1] = mlen;
f71c6ace 1463
c1b2569d
AP
1464 mres = ctx->mres;
1465
0f113f3e
MC
1466 if (ctx->ares) {
1467 /* First call to encrypt finalizes GHASH(AAD) */
c1b2569d
AP
1468#if defined(GHASH)
1469 if (len == 0) {
1470 GCM_MUL(ctx);
1471 ctx->ares = 0;
1472 return 0;
1473 }
1474 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1475 ctx->Xi.u[0] = 0;
1476 ctx->Xi.u[1] = 0;
1477 mres = sizeof(ctx->Xi);
1478#else
f5791af3 1479 GCM_MUL(ctx);
c1b2569d 1480#endif
0f113f3e
MC
1481 ctx->ares = 0;
1482 }
b68c1315 1483
e23d850f 1484 if (IS_LITTLE_ENDIAN)
2e635aa8 1485# ifdef BSWAP4
0f113f3e 1486 ctr = BSWAP4(ctx->Yi.d[3]);
2e635aa8 1487# else
0f113f3e 1488 ctr = GETU32(ctx->Yi.c + 12);
2e635aa8 1489# endif
0f113f3e
MC
1490 else
1491 ctr = ctx->Yi.d[3];
1492
c1b2569d 1493 n = mres % 16;
0f113f3e 1494 if (n) {
c1b2569d
AP
1495# if defined(GHASH)
1496 while (n && len) {
1497 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1498 --len;
1499 n = (n + 1) % 16;
1500 }
1501 if (n == 0) {
1502 GHASH(ctx, ctx->Xn, mres);
1503 mres = 0;
1504 } else {
1505 ctx->mres = mres;
1506 return 0;
1507 }
1508# else
0f113f3e
MC
1509 while (n && len) {
1510 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1511 --len;
1512 n = (n + 1) % 16;
1513 }
c1b2569d 1514 if (n == 0) {
f5791af3 1515 GCM_MUL(ctx);
c1b2569d
AP
1516 mres = 0;
1517 } else {
0f113f3e
MC
1518 ctx->mres = n;
1519 return 0;
1520 }
c1b2569d 1521# endif
0f113f3e 1522 }
c1b2569d
AP
1523# if defined(GHASH)
1524 if (len >= 16 && mres) {
1525 GHASH(ctx, ctx->Xn, mres);
1526 mres = 0;
1527 }
1528# if defined(GHASH_CHUNK)
0f113f3e
MC
1529 while (len >= GHASH_CHUNK) {
1530 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1531 ctr += GHASH_CHUNK / 16;
e23d850f 1532 if (IS_LITTLE_ENDIAN)
c1b2569d 1533# ifdef BSWAP4
0f113f3e 1534 ctx->Yi.d[3] = BSWAP4(ctr);
c1b2569d 1535# else
0f113f3e 1536 PUTU32(ctx->Yi.c + 12, ctr);
c1b2569d 1537# endif
0f113f3e
MC
1538 else
1539 ctx->Yi.d[3] = ctr;
1540 GHASH(ctx, out, GHASH_CHUNK);
1541 out += GHASH_CHUNK;
1542 in += GHASH_CHUNK;
1543 len -= GHASH_CHUNK;
1544 }
c1b2569d 1545# endif
2e635aa8 1546# endif
0f113f3e
MC
1547 if ((i = (len & (size_t)-16))) {
1548 size_t j = i / 16;
f71c6ace 1549
0f113f3e
MC
1550 (*stream) (in, out, j, key, ctx->Yi.c);
1551 ctr += (unsigned int)j;
e23d850f 1552 if (IS_LITTLE_ENDIAN)
2e635aa8 1553# ifdef BSWAP4
0f113f3e 1554 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1555# else
0f113f3e 1556 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1557# endif
0f113f3e
MC
1558 else
1559 ctx->Yi.d[3] = ctr;
1560 in += i;
1561 len -= i;
2e635aa8 1562# if defined(GHASH)
0f113f3e
MC
1563 GHASH(ctx, out, i);
1564 out += i;
2e635aa8 1565# else
0f113f3e
MC
1566 while (j--) {
1567 for (i = 0; i < 16; ++i)
1568 ctx->Xi.c[i] ^= out[i];
f5791af3 1569 GCM_MUL(ctx);
0f113f3e
MC
1570 out += 16;
1571 }
2e635aa8 1572# endif
0f113f3e
MC
1573 }
1574 if (len) {
1575 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1576 ++ctr;
e23d850f 1577 if (IS_LITTLE_ENDIAN)
2e635aa8 1578# ifdef BSWAP4
0f113f3e 1579 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1580# else
0f113f3e 1581 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1582# endif
0f113f3e
MC
1583 else
1584 ctx->Yi.d[3] = ctr;
1585 while (len--) {
c1b2569d
AP
1586# if defined(GHASH)
1587 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1588# else
1589 ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1590# endif
0f113f3e
MC
1591 ++n;
1592 }
1593 }
1594
c1b2569d 1595 ctx->mres = mres;
0f113f3e 1596 return 0;
2e635aa8 1597#endif
f71c6ace
AP
1598}
1599
1f2502eb 1600int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
0f113f3e
MC
1601 const unsigned char *in, unsigned char *out,
1602 size_t len, ctr128_f stream)
f71c6ace 1603{
2e635aa8
AP
1604#if defined(OPENSSL_SMALL_FOOTPRINT)
1605 return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1606#else
e23d850f 1607 DECLARE_IS_ENDIAN;
c1b2569d 1608 unsigned int n, ctr, mres;
0f113f3e
MC
1609 size_t i;
1610 u64 mlen = ctx->len.u[1];
1611 void *key = ctx->key;
2e635aa8 1612# ifdef GCM_FUNCREF_4BIT
0f113f3e 1613 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
2e635aa8 1614# ifdef GHASH
0f113f3e
MC
1615 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1616 const u8 *inp, size_t len) = ctx->ghash;
2e635aa8 1617# endif
d8d95832 1618# endif
1f2502eb 1619
0f113f3e
MC
1620 mlen += len;
1621 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1622 return -1;
1623 ctx->len.u[1] = mlen;
f71c6ace 1624
c1b2569d
AP
1625 mres = ctx->mres;
1626
0f113f3e
MC
1627 if (ctx->ares) {
1628 /* First call to decrypt finalizes GHASH(AAD) */
c1b2569d
AP
1629# if defined(GHASH)
1630 if (len == 0) {
1631 GCM_MUL(ctx);
1632 ctx->ares = 0;
1633 return 0;
1634 }
1635 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1636 ctx->Xi.u[0] = 0;
1637 ctx->Xi.u[1] = 0;
1638 mres = sizeof(ctx->Xi);
1639# else
f5791af3 1640 GCM_MUL(ctx);
c1b2569d 1641# endif
0f113f3e
MC
1642 ctx->ares = 0;
1643 }
b68c1315 1644
e23d850f 1645 if (IS_LITTLE_ENDIAN)
2e635aa8 1646# ifdef BSWAP4
0f113f3e 1647 ctr = BSWAP4(ctx->Yi.d[3]);
2e635aa8 1648# else
0f113f3e 1649 ctr = GETU32(ctx->Yi.c + 12);
2e635aa8 1650# endif
0f113f3e
MC
1651 else
1652 ctr = ctx->Yi.d[3];
1653
c1b2569d 1654 n = mres % 16;
0f113f3e 1655 if (n) {
c1b2569d
AP
1656# if defined(GHASH)
1657 while (n && len) {
1658 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1659 --len;
1660 n = (n + 1) % 16;
1661 }
1662 if (n == 0) {
1663 GHASH(ctx, ctx->Xn, mres);
1664 mres = 0;
1665 } else {
1666 ctx->mres = mres;
1667 return 0;
1668 }
1669# else
0f113f3e
MC
1670 while (n && len) {
1671 u8 c = *(in++);
1672 *(out++) = c ^ ctx->EKi.c[n];
1673 ctx->Xi.c[n] ^= c;
1674 --len;
1675 n = (n + 1) % 16;
1676 }
c1b2569d 1677 if (n == 0) {
f5791af3 1678 GCM_MUL(ctx);
c1b2569d
AP
1679 mres = 0;
1680 } else {
0f113f3e
MC
1681 ctx->mres = n;
1682 return 0;
1683 }
c1b2569d 1684# endif
0f113f3e 1685 }
c1b2569d
AP
1686# if defined(GHASH)
1687 if (len >= 16 && mres) {
1688 GHASH(ctx, ctx->Xn, mres);
1689 mres = 0;
1690 }
1691# if defined(GHASH_CHUNK)
0f113f3e
MC
1692 while (len >= GHASH_CHUNK) {
1693 GHASH(ctx, in, GHASH_CHUNK);
1694 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1695 ctr += GHASH_CHUNK / 16;
e23d850f 1696 if (IS_LITTLE_ENDIAN)
c1b2569d 1697# ifdef BSWAP4
0f113f3e 1698 ctx->Yi.d[3] = BSWAP4(ctr);
c1b2569d 1699# else
0f113f3e 1700 PUTU32(ctx->Yi.c + 12, ctr);
c1b2569d 1701# endif
0f113f3e
MC
1702 else
1703 ctx->Yi.d[3] = ctr;
1704 out += GHASH_CHUNK;
1705 in += GHASH_CHUNK;
1706 len -= GHASH_CHUNK;
1707 }
c1b2569d 1708# endif
2e635aa8 1709# endif
0f113f3e
MC
1710 if ((i = (len & (size_t)-16))) {
1711 size_t j = i / 16;
f71c6ace 1712
2e635aa8 1713# if defined(GHASH)
0f113f3e 1714 GHASH(ctx, in, i);
2e635aa8 1715# else
0f113f3e
MC
1716 while (j--) {
1717 size_t k;
1718 for (k = 0; k < 16; ++k)
1719 ctx->Xi.c[k] ^= in[k];
f5791af3 1720 GCM_MUL(ctx);
0f113f3e
MC
1721 in += 16;
1722 }
1723 j = i / 16;
1724 in -= i;
2e635aa8 1725# endif
0f113f3e
MC
1726 (*stream) (in, out, j, key, ctx->Yi.c);
1727 ctr += (unsigned int)j;
e23d850f 1728 if (IS_LITTLE_ENDIAN)
2e635aa8 1729# ifdef BSWAP4
0f113f3e 1730 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1731# else
0f113f3e 1732 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1733# endif
0f113f3e
MC
1734 else
1735 ctx->Yi.d[3] = ctr;
1736 out += i;
1737 in += i;
1738 len -= i;
1739 }
1740 if (len) {
1741 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1742 ++ctr;
e23d850f 1743 if (IS_LITTLE_ENDIAN)
2e635aa8 1744# ifdef BSWAP4
0f113f3e 1745 ctx->Yi.d[3] = BSWAP4(ctr);
2e635aa8 1746# else
0f113f3e 1747 PUTU32(ctx->Yi.c + 12, ctr);
2e635aa8 1748# endif
0f113f3e
MC
1749 else
1750 ctx->Yi.d[3] = ctr;
1751 while (len--) {
c1b2569d
AP
1752# if defined(GHASH)
1753 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1754# else
0f113f3e 1755 u8 c = in[n];
c1b2569d 1756 ctx->Xi.c[mres++] ^= c;
0f113f3e 1757 out[n] = c ^ ctx->EKi.c[n];
c1b2569d 1758# endif
0f113f3e
MC
1759 ++n;
1760 }
1761 }
1762
c1b2569d 1763 ctx->mres = mres;
0f113f3e 1764 return 0;
2e635aa8 1765#endif
f71c6ace
AP
1766}
1767
0f113f3e
MC
1768int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1769 size_t len)
e7f5b1cd 1770{
e23d850f 1771 DECLARE_IS_ENDIAN;
0f113f3e
MC
1772 u64 alen = ctx->len.u[0] << 3;
1773 u64 clen = ctx->len.u[1] << 3;
d8d95832 1774#ifdef GCM_FUNCREF_4BIT
0f113f3e 1775 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
c1b2569d
AP
1776# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1777 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1778 const u8 *inp, size_t len) = ctx->ghash;
1779# endif
d8d95832 1780#endif
e7f5b1cd 1781
c1b2569d
AP
1782#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1783 u128 bitlen;
1784 unsigned int mres = ctx->mres;
1785
1786 if (mres) {
1787 unsigned blocks = (mres + 15) & -16;
1788
1789 memset(ctx->Xn + mres, 0, blocks - mres);
1790 mres = blocks;
1791 if (mres == sizeof(ctx->Xn)) {
1792 GHASH(ctx, ctx->Xn, mres);
1793 mres = 0;
1794 }
1795 } else if (ctx->ares) {
1796 GCM_MUL(ctx);
1797 }
1798#else
0f113f3e 1799 if (ctx->mres || ctx->ares)
f5791af3 1800 GCM_MUL(ctx);
c1b2569d 1801#endif
e7f5b1cd 1802
e23d850f 1803 if (IS_LITTLE_ENDIAN) {
e7f5b1cd 1804#ifdef BSWAP8
0f113f3e
MC
1805 alen = BSWAP8(alen);
1806 clen = BSWAP8(clen);
e7f5b1cd 1807#else
0f113f3e 1808 u8 *p = ctx->len.c;
e7f5b1cd 1809
0f113f3e
MC
1810 ctx->len.u[0] = alen;
1811 ctx->len.u[1] = clen;
e7f5b1cd 1812
0f113f3e
MC
1813 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1814 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
e7f5b1cd 1815#endif
0f113f3e 1816 }
e7f5b1cd 1817
c1b2569d
AP
1818#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1819 bitlen.hi = alen;
1820 bitlen.lo = clen;
1821 memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1822 mres += sizeof(bitlen);
1823 GHASH(ctx, ctx->Xn, mres);
1824#else
0f113f3e
MC
1825 ctx->Xi.u[0] ^= alen;
1826 ctx->Xi.u[1] ^= clen;
f5791af3 1827 GCM_MUL(ctx);
c1b2569d 1828#endif
e7f5b1cd 1829
0f113f3e
MC
1830 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1831 ctx->Xi.u[1] ^= ctx->EK0.u[1];
6acb4ff3 1832
0f113f3e 1833 if (tag && len <= sizeof(ctx->Xi))
1e4a355d 1834 return CRYPTO_memcmp(ctx->Xi.c, tag, len);
0f113f3e
MC
1835 else
1836 return -1;
6acb4ff3
AP
1837}
1838
fd3dbc1d
DSH
1839void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1840{
0f113f3e
MC
1841 CRYPTO_gcm128_finish(ctx, NULL, 0);
1842 memcpy(tag, ctx->Xi.c,
1843 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
fd3dbc1d
DSH
1844}
1845
6acb4ff3
AP
1846GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1847{
0f113f3e 1848 GCM128_CONTEXT *ret;
6acb4ff3 1849
90945fa3 1850 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
0f113f3e 1851 CRYPTO_gcm128_init(ret, key, block);
6acb4ff3 1852
0f113f3e 1853 return ret;
6acb4ff3
AP
1854}
1855
1856void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1857{
4b45c6e5 1858 OPENSSL_clear_free(ctx, sizeof(*ctx));
e7f5b1cd 1859}