]> git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/modes/gcm128.c
8317fd48e4497193224f4e1ea81934f881053b8c
[thirdparty/openssl.git] / crypto / modes / gcm128.c
1 /*
2 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
8 */
9
10 #include <string.h>
11 #include <openssl/crypto.h>
12 #include "internal/cryptlib.h"
13 #include "internal/endian.h"
14 #include "crypto/modes.h"
15
16 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17 typedef size_t size_t_aX __attribute((__aligned__(1)));
18 #else
19 typedef size_t size_t_aX;
20 #endif
21
22 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23 /* redefine, because alignment is ensured */
24 # undef GETU32
25 # define GETU32(p) BSWAP4(*(const u32 *)(p))
26 # undef PUTU32
27 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
28 #endif
29
30 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
31 #define REDUCE1BIT(V) do { \
32 if (sizeof(size_t)==8) { \
33 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
34 V.lo = (V.hi<<63)|(V.lo>>1); \
35 V.hi = (V.hi>>1 )^T; \
36 } \
37 else { \
38 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
39 V.lo = (V.hi<<63)|(V.lo>>1); \
40 V.hi = (V.hi>>1 )^((u64)T<<32); \
41 } \
42 } while(0)
43
44 /*-
45 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46 * never be set to 8. 8 is effectively reserved for testing purposes.
47 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49 * whole spectrum of possible table driven implementations. Why? In
50 * non-"Shoup's" case memory access pattern is segmented in such manner,
51 * that it's trivial to see that cache timing information can reveal
52 * fair portion of intermediate hash value. Given that ciphertext is
53 * always available to attacker, it's possible for him to attempt to
54 * deduce secret parameter H and if successful, tamper with messages
55 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56 * not as trivial, but there is no reason to believe that it's resistant
57 * to cache-timing attack. And the thing about "8-bit" implementation is
58 * that it consumes 16 (sixteen) times more memory, 4KB per individual
59 * key + 1KB shared. Well, on pros side it should be twice as fast as
60 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61 * was observed to run ~75% faster, closer to 100% for commercial
62 * compilers... Yet "4-bit" procedure is preferred, because it's
63 * believed to provide better security-performance balance and adequate
64 * all-round performance. "All-round" refers to things like:
65 *
66 * - shorter setup time effectively improves overall timing for
67 * handling short messages;
68 * - larger table allocation can become unbearable because of VM
69 * subsystem penalties (for example on Windows large enough free
70 * results in VM working set trimming, meaning that consequent
71 * malloc would immediately incur working set expansion);
72 * - larger table has larger cache footprint, which can affect
73 * performance of other code paths (not necessarily even from same
74 * thread in Hyper-Threading world);
75 *
76 * Value of 1 is not appropriate for performance reasons.
77 */
78 #if TABLE_BITS==8
79
80 static void gcm_init_8bit(u128 Htable[256], u64 H[2])
81 {
82 int i, j;
83 u128 V;
84
85 Htable[0].hi = 0;
86 Htable[0].lo = 0;
87 V.hi = H[0];
88 V.lo = H[1];
89
90 for (Htable[128] = V, i = 64; i > 0; i >>= 1) {
91 REDUCE1BIT(V);
92 Htable[i] = V;
93 }
94
95 for (i = 2; i < 256; i <<= 1) {
96 u128 *Hi = Htable + i, H0 = *Hi;
97 for (j = 1; j < i; ++j) {
98 Hi[j].hi = H0.hi ^ Htable[j].hi;
99 Hi[j].lo = H0.lo ^ Htable[j].lo;
100 }
101 }
102 }
103
104 static void gcm_gmult_8bit(u64 Xi[2], const u128 Htable[256])
105 {
106 u128 Z = { 0, 0 };
107 const u8 *xi = (const u8 *)Xi + 15;
108 size_t rem, n = *xi;
109 DECLARE_IS_ENDIAN;
110 static const size_t rem_8bit[256] = {
111 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
175 };
176
177 while (1) {
178 Z.hi ^= Htable[n].hi;
179 Z.lo ^= Htable[n].lo;
180
181 if ((u8 *)Xi == xi)
182 break;
183
184 n = *(--xi);
185
186 rem = (size_t)Z.lo & 0xff;
187 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
188 Z.hi = (Z.hi >> 8);
189 if (sizeof(size_t) == 8)
190 Z.hi ^= rem_8bit[rem];
191 else
192 Z.hi ^= (u64)rem_8bit[rem] << 32;
193 }
194
195 if (IS_LITTLE_ENDIAN) {
196 # ifdef BSWAP8
197 Xi[0] = BSWAP8(Z.hi);
198 Xi[1] = BSWAP8(Z.lo);
199 # else
200 u8 *p = (u8 *)Xi;
201 u32 v;
202 v = (u32)(Z.hi >> 32);
203 PUTU32(p, v);
204 v = (u32)(Z.hi);
205 PUTU32(p + 4, v);
206 v = (u32)(Z.lo >> 32);
207 PUTU32(p + 8, v);
208 v = (u32)(Z.lo);
209 PUTU32(p + 12, v);
210 # endif
211 } else {
212 Xi[0] = Z.hi;
213 Xi[1] = Z.lo;
214 }
215 }
216
217 # define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
218
219 #elif TABLE_BITS==4
220
221 static void gcm_init_4bit(u128 Htable[16], u64 H[2])
222 {
223 u128 V;
224 # if defined(OPENSSL_SMALL_FOOTPRINT)
225 int i;
226 # endif
227
228 Htable[0].hi = 0;
229 Htable[0].lo = 0;
230 V.hi = H[0];
231 V.lo = H[1];
232
233 # if defined(OPENSSL_SMALL_FOOTPRINT)
234 for (Htable[8] = V, i = 4; i > 0; i >>= 1) {
235 REDUCE1BIT(V);
236 Htable[i] = V;
237 }
238
239 for (i = 2; i < 16; i <<= 1) {
240 u128 *Hi = Htable + i;
241 int j;
242 for (V = *Hi, j = 1; j < i; ++j) {
243 Hi[j].hi = V.hi ^ Htable[j].hi;
244 Hi[j].lo = V.lo ^ Htable[j].lo;
245 }
246 }
247 # else
248 Htable[8] = V;
249 REDUCE1BIT(V);
250 Htable[4] = V;
251 REDUCE1BIT(V);
252 Htable[2] = V;
253 REDUCE1BIT(V);
254 Htable[1] = V;
255 Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
256 V = Htable[4];
257 Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
258 Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
259 Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
260 V = Htable[8];
261 Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
262 Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
263 Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
264 Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
265 Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
266 Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
267 Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
268 # endif
269 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
270 /*
271 * ARM assembler expects specific dword order in Htable.
272 */
273 {
274 int j;
275 DECLARE_IS_ENDIAN;
276
277 if (IS_LITTLE_ENDIAN)
278 for (j = 0; j < 16; ++j) {
279 V = Htable[j];
280 Htable[j].hi = V.lo;
281 Htable[j].lo = V.hi;
282 } else
283 for (j = 0; j < 16; ++j) {
284 V = Htable[j];
285 Htable[j].hi = V.lo << 32 | V.lo >> 32;
286 Htable[j].lo = V.hi << 32 | V.hi >> 32;
287 }
288 }
289 # endif
290 }
291
292 # ifndef GHASH_ASM
293 static const size_t rem_4bit[16] = {
294 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
295 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
296 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
297 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
298 };
299
300 static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16])
301 {
302 u128 Z;
303 int cnt = 15;
304 size_t rem, nlo, nhi;
305 DECLARE_IS_ENDIAN;
306
307 nlo = ((const u8 *)Xi)[15];
308 nhi = nlo >> 4;
309 nlo &= 0xf;
310
311 Z.hi = Htable[nlo].hi;
312 Z.lo = Htable[nlo].lo;
313
314 while (1) {
315 rem = (size_t)Z.lo & 0xf;
316 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
317 Z.hi = (Z.hi >> 4);
318 if (sizeof(size_t) == 8)
319 Z.hi ^= rem_4bit[rem];
320 else
321 Z.hi ^= (u64)rem_4bit[rem] << 32;
322
323 Z.hi ^= Htable[nhi].hi;
324 Z.lo ^= Htable[nhi].lo;
325
326 if (--cnt < 0)
327 break;
328
329 nlo = ((const u8 *)Xi)[cnt];
330 nhi = nlo >> 4;
331 nlo &= 0xf;
332
333 rem = (size_t)Z.lo & 0xf;
334 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
335 Z.hi = (Z.hi >> 4);
336 if (sizeof(size_t) == 8)
337 Z.hi ^= rem_4bit[rem];
338 else
339 Z.hi ^= (u64)rem_4bit[rem] << 32;
340
341 Z.hi ^= Htable[nlo].hi;
342 Z.lo ^= Htable[nlo].lo;
343 }
344
345 if (IS_LITTLE_ENDIAN) {
346 # ifdef BSWAP8
347 Xi[0] = BSWAP8(Z.hi);
348 Xi[1] = BSWAP8(Z.lo);
349 # else
350 u8 *p = (u8 *)Xi;
351 u32 v;
352 v = (u32)(Z.hi >> 32);
353 PUTU32(p, v);
354 v = (u32)(Z.hi);
355 PUTU32(p + 4, v);
356 v = (u32)(Z.lo >> 32);
357 PUTU32(p + 8, v);
358 v = (u32)(Z.lo);
359 PUTU32(p + 12, v);
360 # endif
361 } else {
362 Xi[0] = Z.hi;
363 Xi[1] = Z.lo;
364 }
365 }
366
367 # if !defined(OPENSSL_SMALL_FOOTPRINT)
368 /*
369 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
370 * details... Compiler-generated code doesn't seem to give any
371 * performance improvement, at least not on x86[_64]. It's here
372 * mostly as reference and a placeholder for possible future
373 * non-trivial optimization[s]...
374 */
375 static void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16],
376 const u8 *inp, size_t len)
377 {
378 u128 Z;
379 int cnt;
380 size_t rem, nlo, nhi;
381 DECLARE_IS_ENDIAN;
382
383 # if 1
384 do {
385 cnt = 15;
386 nlo = ((const u8 *)Xi)[15];
387 nlo ^= inp[15];
388 nhi = nlo >> 4;
389 nlo &= 0xf;
390
391 Z.hi = Htable[nlo].hi;
392 Z.lo = Htable[nlo].lo;
393
394 while (1) {
395 rem = (size_t)Z.lo & 0xf;
396 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
397 Z.hi = (Z.hi >> 4);
398 if (sizeof(size_t) == 8)
399 Z.hi ^= rem_4bit[rem];
400 else
401 Z.hi ^= (u64)rem_4bit[rem] << 32;
402
403 Z.hi ^= Htable[nhi].hi;
404 Z.lo ^= Htable[nhi].lo;
405
406 if (--cnt < 0)
407 break;
408
409 nlo = ((const u8 *)Xi)[cnt];
410 nlo ^= inp[cnt];
411 nhi = nlo >> 4;
412 nlo &= 0xf;
413
414 rem = (size_t)Z.lo & 0xf;
415 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
416 Z.hi = (Z.hi >> 4);
417 if (sizeof(size_t) == 8)
418 Z.hi ^= rem_4bit[rem];
419 else
420 Z.hi ^= (u64)rem_4bit[rem] << 32;
421
422 Z.hi ^= Htable[nlo].hi;
423 Z.lo ^= Htable[nlo].lo;
424 }
425 # else
426 /*
427 * Extra 256+16 bytes per-key plus 512 bytes shared tables
428 * [should] give ~50% improvement... One could have PACK()-ed
429 * the rem_8bit even here, but the priority is to minimize
430 * cache footprint...
431 */
432 u128 Hshr4[16]; /* Htable shifted right by 4 bits */
433 u8 Hshl4[16]; /* Htable shifted left by 4 bits */
434 static const unsigned short rem_8bit[256] = {
435 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
436 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
437 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
438 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
439 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
440 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
441 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
442 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
443 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
444 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
445 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
446 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
447 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
448 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
449 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
450 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
451 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
452 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
453 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
454 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
455 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
456 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
457 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
458 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
459 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
460 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
461 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
462 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
463 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
464 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
465 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
466 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
467 };
468 /*
469 * This pre-processing phase slows down procedure by approximately
470 * same time as it makes each loop spin faster. In other words
471 * single block performance is approximately same as straightforward
472 * "4-bit" implementation, and then it goes only faster...
473 */
474 for (cnt = 0; cnt < 16; ++cnt) {
475 Z.hi = Htable[cnt].hi;
476 Z.lo = Htable[cnt].lo;
477 Hshr4[cnt].lo = (Z.hi << 60) | (Z.lo >> 4);
478 Hshr4[cnt].hi = (Z.hi >> 4);
479 Hshl4[cnt] = (u8)(Z.lo << 4);
480 }
481
482 do {
483 for (Z.lo = 0, Z.hi = 0, cnt = 15; cnt; --cnt) {
484 nlo = ((const u8 *)Xi)[cnt];
485 nlo ^= inp[cnt];
486 nhi = nlo >> 4;
487 nlo &= 0xf;
488
489 Z.hi ^= Htable[nlo].hi;
490 Z.lo ^= Htable[nlo].lo;
491
492 rem = (size_t)Z.lo & 0xff;
493
494 Z.lo = (Z.hi << 56) | (Z.lo >> 8);
495 Z.hi = (Z.hi >> 8);
496
497 Z.hi ^= Hshr4[nhi].hi;
498 Z.lo ^= Hshr4[nhi].lo;
499 Z.hi ^= (u64)rem_8bit[rem ^ Hshl4[nhi]] << 48;
500 }
501
502 nlo = ((const u8 *)Xi)[0];
503 nlo ^= inp[0];
504 nhi = nlo >> 4;
505 nlo &= 0xf;
506
507 Z.hi ^= Htable[nlo].hi;
508 Z.lo ^= Htable[nlo].lo;
509
510 rem = (size_t)Z.lo & 0xf;
511
512 Z.lo = (Z.hi << 60) | (Z.lo >> 4);
513 Z.hi = (Z.hi >> 4);
514
515 Z.hi ^= Htable[nhi].hi;
516 Z.lo ^= Htable[nhi].lo;
517 Z.hi ^= ((u64)rem_8bit[rem << 4]) << 48;
518 # endif
519
520 if (IS_LITTLE_ENDIAN) {
521 # ifdef BSWAP8
522 Xi[0] = BSWAP8(Z.hi);
523 Xi[1] = BSWAP8(Z.lo);
524 # else
525 u8 *p = (u8 *)Xi;
526 u32 v;
527 v = (u32)(Z.hi >> 32);
528 PUTU32(p, v);
529 v = (u32)(Z.hi);
530 PUTU32(p + 4, v);
531 v = (u32)(Z.lo >> 32);
532 PUTU32(p + 8, v);
533 v = (u32)(Z.lo);
534 PUTU32(p + 12, v);
535 # endif
536 } else {
537 Xi[0] = Z.hi;
538 Xi[1] = Z.lo;
539 }
540 } while (inp += 16, len -= 16);
541 }
542 # endif
543 # else
544 void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]);
545 void gcm_ghash_4bit(u64 Xi[2], const u128 Htable[16], const u8 *inp,
546 size_t len);
547 # endif
548
549 # define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
550 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
551 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
552 /*
553 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
554 * effect. In other words idea is to hash data while it's still in L1 cache
555 * after encryption pass...
556 */
557 # define GHASH_CHUNK (3*1024)
558 # endif
559
560 #else /* TABLE_BITS */
561
562 static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2])
563 {
564 u128 V, Z = { 0, 0 };
565 long X;
566 int i, j;
567 const long *xi = (const long *)Xi;
568 DECLARE_IS_ENDIAN;
569
570 V.hi = H[0]; /* H is in host byte order, no byte swapping */
571 V.lo = H[1];
572
573 for (j = 0; j < 16 / sizeof(long); ++j) {
574 if (IS_LITTLE_ENDIAN) {
575 if (sizeof(long) == 8) {
576 # ifdef BSWAP8
577 X = (long)(BSWAP8(xi[j]));
578 # else
579 const u8 *p = (const u8 *)(xi + j);
580 X = (long)((u64)GETU32(p) << 32 | GETU32(p + 4));
581 # endif
582 } else {
583 const u8 *p = (const u8 *)(xi + j);
584 X = (long)GETU32(p);
585 }
586 } else
587 X = xi[j];
588
589 for (i = 0; i < 8 * sizeof(long); ++i, X <<= 1) {
590 u64 M = (u64)(X >> (8 * sizeof(long) - 1));
591 Z.hi ^= V.hi & M;
592 Z.lo ^= V.lo & M;
593
594 REDUCE1BIT(V);
595 }
596 }
597
598 if (IS_LITTLE_ENDIAN) {
599 # ifdef BSWAP8
600 Xi[0] = BSWAP8(Z.hi);
601 Xi[1] = BSWAP8(Z.lo);
602 # else
603 u8 *p = (u8 *)Xi;
604 u32 v;
605 v = (u32)(Z.hi >> 32);
606 PUTU32(p, v);
607 v = (u32)(Z.hi);
608 PUTU32(p + 4, v);
609 v = (u32)(Z.lo >> 32);
610 PUTU32(p + 8, v);
611 v = (u32)(Z.lo);
612 PUTU32(p + 12, v);
613 # endif
614 } else {
615 Xi[0] = Z.hi;
616 Xi[1] = Z.lo;
617 }
618 }
619
620 # define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
621
622 #endif
623
624 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
625 # if !defined(I386_ONLY) && \
626 (defined(__i386) || defined(__i386__) || \
627 defined(__x86_64) || defined(__x86_64__) || \
628 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
629 # define GHASH_ASM_X86_OR_64
630 # define GCM_FUNCREF_4BIT
631
632 void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]);
633 void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]);
634 void gcm_ghash_clmul(u64 Xi[2], const u128 Htable[16], const u8 *inp,
635 size_t len);
636
637 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
638 # define gcm_init_avx gcm_init_clmul
639 # define gcm_gmult_avx gcm_gmult_clmul
640 # define gcm_ghash_avx gcm_ghash_clmul
641 # else
642 void gcm_init_avx(u128 Htable[16], const u64 Xi[2]);
643 void gcm_gmult_avx(u64 Xi[2], const u128 Htable[16]);
644 void gcm_ghash_avx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
645 size_t len);
646 # endif
647
648 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
649 # define GHASH_ASM_X86
650 void gcm_gmult_4bit_mmx(u64 Xi[2], const u128 Htable[16]);
651 void gcm_ghash_4bit_mmx(u64 Xi[2], const u128 Htable[16], const u8 *inp,
652 size_t len);
653
654 void gcm_gmult_4bit_x86(u64 Xi[2], const u128 Htable[16]);
655 void gcm_ghash_4bit_x86(u64 Xi[2], const u128 Htable[16], const u8 *inp,
656 size_t len);
657 # endif
658 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
659 # include "arm_arch.h"
660 # if __ARM_MAX_ARCH__>=7
661 # define GHASH_ASM_ARM
662 # define GCM_FUNCREF_4BIT
663 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
664 # if defined(__arm__) || defined(__arm)
665 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
666 # endif
667 void gcm_init_neon(u128 Htable[16], const u64 Xi[2]);
668 void gcm_gmult_neon(u64 Xi[2], const u128 Htable[16]);
669 void gcm_ghash_neon(u64 Xi[2], const u128 Htable[16], const u8 *inp,
670 size_t len);
671 void gcm_init_v8(u128 Htable[16], const u64 Xi[2]);
672 void gcm_gmult_v8(u64 Xi[2], const u128 Htable[16]);
673 void gcm_ghash_v8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
674 size_t len);
675 # endif
676 # elif defined(__sparc__) || defined(__sparc)
677 # include "sparc_arch.h"
678 # define GHASH_ASM_SPARC
679 # define GCM_FUNCREF_4BIT
680 extern unsigned int OPENSSL_sparcv9cap_P[];
681 void gcm_init_vis3(u128 Htable[16], const u64 Xi[2]);
682 void gcm_gmult_vis3(u64 Xi[2], const u128 Htable[16]);
683 void gcm_ghash_vis3(u64 Xi[2], const u128 Htable[16], const u8 *inp,
684 size_t len);
685 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
686 # include "crypto/ppc_arch.h"
687 # define GHASH_ASM_PPC
688 # define GCM_FUNCREF_4BIT
689 void gcm_init_p8(u128 Htable[16], const u64 Xi[2]);
690 void gcm_gmult_p8(u64 Xi[2], const u128 Htable[16]);
691 void gcm_ghash_p8(u64 Xi[2], const u128 Htable[16], const u8 *inp,
692 size_t len);
693 # endif
694 #endif
695
696 #ifdef GCM_FUNCREF_4BIT
697 # undef GCM_MUL
698 # define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
699 # ifdef GHASH
700 # undef GHASH
701 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
702 # endif
703 #endif
704
705 void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx, void *key, block128_f block)
706 {
707 DECLARE_IS_ENDIAN;
708
709 memset(ctx, 0, sizeof(*ctx));
710 ctx->block = block;
711 ctx->key = key;
712
713 (*block) (ctx->H.c, ctx->H.c, key);
714
715 if (IS_LITTLE_ENDIAN) {
716 /* H is stored in host byte order */
717 #ifdef BSWAP8
718 ctx->H.u[0] = BSWAP8(ctx->H.u[0]);
719 ctx->H.u[1] = BSWAP8(ctx->H.u[1]);
720 #else
721 u8 *p = ctx->H.c;
722 u64 hi, lo;
723 hi = (u64)GETU32(p) << 32 | GETU32(p + 4);
724 lo = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
725 ctx->H.u[0] = hi;
726 ctx->H.u[1] = lo;
727 #endif
728 }
729 #if TABLE_BITS==8
730 gcm_init_8bit(ctx->Htable, ctx->H.u);
731 #elif TABLE_BITS==4
732 # if defined(GHASH)
733 # define CTX__GHASH(f) (ctx->ghash = (f))
734 # else
735 # define CTX__GHASH(f) (ctx->ghash = NULL)
736 # endif
737 # if defined(GHASH_ASM_X86_OR_64)
738 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
739 if (OPENSSL_ia32cap_P[1] & (1 << 1)) { /* check PCLMULQDQ bit */
740 if (((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
741 gcm_init_avx(ctx->Htable, ctx->H.u);
742 ctx->gmult = gcm_gmult_avx;
743 CTX__GHASH(gcm_ghash_avx);
744 } else {
745 gcm_init_clmul(ctx->Htable, ctx->H.u);
746 ctx->gmult = gcm_gmult_clmul;
747 CTX__GHASH(gcm_ghash_clmul);
748 }
749 return;
750 }
751 # endif
752 gcm_init_4bit(ctx->Htable, ctx->H.u);
753 # if defined(GHASH_ASM_X86) /* x86 only */
754 # if defined(OPENSSL_IA32_SSE2)
755 if (OPENSSL_ia32cap_P[0] & (1 << 25)) { /* check SSE bit */
756 # else
757 if (OPENSSL_ia32cap_P[0] & (1 << 23)) { /* check MMX bit */
758 # endif
759 ctx->gmult = gcm_gmult_4bit_mmx;
760 CTX__GHASH(gcm_ghash_4bit_mmx);
761 } else {
762 ctx->gmult = gcm_gmult_4bit_x86;
763 CTX__GHASH(gcm_ghash_4bit_x86);
764 }
765 # else
766 ctx->gmult = gcm_gmult_4bit;
767 CTX__GHASH(gcm_ghash_4bit);
768 # endif
769 # elif defined(GHASH_ASM_ARM)
770 # ifdef PMULL_CAPABLE
771 if (PMULL_CAPABLE) {
772 gcm_init_v8(ctx->Htable, ctx->H.u);
773 ctx->gmult = gcm_gmult_v8;
774 CTX__GHASH(gcm_ghash_v8);
775 } else
776 # endif
777 # ifdef NEON_CAPABLE
778 if (NEON_CAPABLE) {
779 gcm_init_neon(ctx->Htable, ctx->H.u);
780 ctx->gmult = gcm_gmult_neon;
781 CTX__GHASH(gcm_ghash_neon);
782 } else
783 # endif
784 {
785 gcm_init_4bit(ctx->Htable, ctx->H.u);
786 ctx->gmult = gcm_gmult_4bit;
787 CTX__GHASH(gcm_ghash_4bit);
788 }
789 # elif defined(GHASH_ASM_SPARC)
790 if (OPENSSL_sparcv9cap_P[0] & SPARCV9_VIS3) {
791 gcm_init_vis3(ctx->Htable, ctx->H.u);
792 ctx->gmult = gcm_gmult_vis3;
793 CTX__GHASH(gcm_ghash_vis3);
794 } else {
795 gcm_init_4bit(ctx->Htable, ctx->H.u);
796 ctx->gmult = gcm_gmult_4bit;
797 CTX__GHASH(gcm_ghash_4bit);
798 }
799 # elif defined(GHASH_ASM_PPC)
800 if (OPENSSL_ppccap_P & PPC_CRYPTO207) {
801 gcm_init_p8(ctx->Htable, ctx->H.u);
802 ctx->gmult = gcm_gmult_p8;
803 CTX__GHASH(gcm_ghash_p8);
804 } else {
805 gcm_init_4bit(ctx->Htable, ctx->H.u);
806 ctx->gmult = gcm_gmult_4bit;
807 CTX__GHASH(gcm_ghash_4bit);
808 }
809 # else
810 gcm_init_4bit(ctx->Htable, ctx->H.u);
811 # endif
812 # undef CTX__GHASH
813 #endif
814 }
815
816 void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const unsigned char *iv,
817 size_t len)
818 {
819 DECLARE_IS_ENDIAN;
820 unsigned int ctr;
821 #ifdef GCM_FUNCREF_4BIT
822 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
823 #endif
824
825 ctx->len.u[0] = 0; /* AAD length */
826 ctx->len.u[1] = 0; /* message length */
827 ctx->ares = 0;
828 ctx->mres = 0;
829
830 if (len == 12) {
831 memcpy(ctx->Yi.c, iv, 12);
832 ctx->Yi.c[12] = 0;
833 ctx->Yi.c[13] = 0;
834 ctx->Yi.c[14] = 0;
835 ctx->Yi.c[15] = 1;
836 ctr = 1;
837 } else {
838 size_t i;
839 u64 len0 = len;
840
841 /* Borrow ctx->Xi to calculate initial Yi */
842 ctx->Xi.u[0] = 0;
843 ctx->Xi.u[1] = 0;
844
845 while (len >= 16) {
846 for (i = 0; i < 16; ++i)
847 ctx->Xi.c[i] ^= iv[i];
848 GCM_MUL(ctx);
849 iv += 16;
850 len -= 16;
851 }
852 if (len) {
853 for (i = 0; i < len; ++i)
854 ctx->Xi.c[i] ^= iv[i];
855 GCM_MUL(ctx);
856 }
857 len0 <<= 3;
858 if (IS_LITTLE_ENDIAN) {
859 #ifdef BSWAP8
860 ctx->Xi.u[1] ^= BSWAP8(len0);
861 #else
862 ctx->Xi.c[8] ^= (u8)(len0 >> 56);
863 ctx->Xi.c[9] ^= (u8)(len0 >> 48);
864 ctx->Xi.c[10] ^= (u8)(len0 >> 40);
865 ctx->Xi.c[11] ^= (u8)(len0 >> 32);
866 ctx->Xi.c[12] ^= (u8)(len0 >> 24);
867 ctx->Xi.c[13] ^= (u8)(len0 >> 16);
868 ctx->Xi.c[14] ^= (u8)(len0 >> 8);
869 ctx->Xi.c[15] ^= (u8)(len0);
870 #endif
871 } else {
872 ctx->Xi.u[1] ^= len0;
873 }
874
875 GCM_MUL(ctx);
876
877 if (IS_LITTLE_ENDIAN)
878 #ifdef BSWAP4
879 ctr = BSWAP4(ctx->Xi.d[3]);
880 #else
881 ctr = GETU32(ctx->Xi.c + 12);
882 #endif
883 else
884 ctr = ctx->Xi.d[3];
885
886 /* Copy borrowed Xi to Yi */
887 ctx->Yi.u[0] = ctx->Xi.u[0];
888 ctx->Yi.u[1] = ctx->Xi.u[1];
889 }
890
891 ctx->Xi.u[0] = 0;
892 ctx->Xi.u[1] = 0;
893
894 (*ctx->block) (ctx->Yi.c, ctx->EK0.c, ctx->key);
895 ++ctr;
896 if (IS_LITTLE_ENDIAN)
897 #ifdef BSWAP4
898 ctx->Yi.d[3] = BSWAP4(ctr);
899 #else
900 PUTU32(ctx->Yi.c + 12, ctr);
901 #endif
902 else
903 ctx->Yi.d[3] = ctr;
904 }
905
906 int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const unsigned char *aad,
907 size_t len)
908 {
909 size_t i;
910 unsigned int n;
911 u64 alen = ctx->len.u[0];
912 #ifdef GCM_FUNCREF_4BIT
913 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
914 # ifdef GHASH
915 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
916 const u8 *inp, size_t len) = ctx->ghash;
917 # endif
918 #endif
919
920 if (ctx->len.u[1])
921 return -2;
922
923 alen += len;
924 if (alen > (U64(1) << 61) || (sizeof(len) == 8 && alen < len))
925 return -1;
926 ctx->len.u[0] = alen;
927
928 n = ctx->ares;
929 if (n) {
930 while (n && len) {
931 ctx->Xi.c[n] ^= *(aad++);
932 --len;
933 n = (n + 1) % 16;
934 }
935 if (n == 0)
936 GCM_MUL(ctx);
937 else {
938 ctx->ares = n;
939 return 0;
940 }
941 }
942 #ifdef GHASH
943 if ((i = (len & (size_t)-16))) {
944 GHASH(ctx, aad, i);
945 aad += i;
946 len -= i;
947 }
948 #else
949 while (len >= 16) {
950 for (i = 0; i < 16; ++i)
951 ctx->Xi.c[i] ^= aad[i];
952 GCM_MUL(ctx);
953 aad += 16;
954 len -= 16;
955 }
956 #endif
957 if (len) {
958 n = (unsigned int)len;
959 for (i = 0; i < len; ++i)
960 ctx->Xi.c[i] ^= aad[i];
961 }
962
963 ctx->ares = n;
964 return 0;
965 }
966
967 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
968 const unsigned char *in, unsigned char *out,
969 size_t len)
970 {
971 DECLARE_IS_ENDIAN;
972 unsigned int n, ctr, mres;
973 size_t i;
974 u64 mlen = ctx->len.u[1];
975 block128_f block = ctx->block;
976 void *key = ctx->key;
977 #ifdef GCM_FUNCREF_4BIT
978 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
979 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
980 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
981 const u8 *inp, size_t len) = ctx->ghash;
982 # endif
983 #endif
984
985 mlen += len;
986 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
987 return -1;
988 ctx->len.u[1] = mlen;
989
990 mres = ctx->mres;
991
992 if (ctx->ares) {
993 /* First call to encrypt finalizes GHASH(AAD) */
994 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
995 if (len == 0) {
996 GCM_MUL(ctx);
997 ctx->ares = 0;
998 return 0;
999 }
1000 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1001 ctx->Xi.u[0] = 0;
1002 ctx->Xi.u[1] = 0;
1003 mres = sizeof(ctx->Xi);
1004 #else
1005 GCM_MUL(ctx);
1006 #endif
1007 ctx->ares = 0;
1008 }
1009
1010 if (IS_LITTLE_ENDIAN)
1011 #ifdef BSWAP4
1012 ctr = BSWAP4(ctx->Yi.d[3]);
1013 #else
1014 ctr = GETU32(ctx->Yi.c + 12);
1015 #endif
1016 else
1017 ctr = ctx->Yi.d[3];
1018
1019 n = mres % 16;
1020 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1021 if (16 % sizeof(size_t) == 0) { /* always true actually */
1022 do {
1023 if (n) {
1024 # if defined(GHASH)
1025 while (n && len) {
1026 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1027 --len;
1028 n = (n + 1) % 16;
1029 }
1030 if (n == 0) {
1031 GHASH(ctx, ctx->Xn, mres);
1032 mres = 0;
1033 } else {
1034 ctx->mres = mres;
1035 return 0;
1036 }
1037 # else
1038 while (n && len) {
1039 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1040 --len;
1041 n = (n + 1) % 16;
1042 }
1043 if (n == 0) {
1044 GCM_MUL(ctx);
1045 mres = 0;
1046 } else {
1047 ctx->mres = n;
1048 return 0;
1049 }
1050 # endif
1051 }
1052 # if defined(STRICT_ALIGNMENT)
1053 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1054 break;
1055 # endif
1056 # if defined(GHASH)
1057 if (len >= 16 && mres) {
1058 GHASH(ctx, ctx->Xn, mres);
1059 mres = 0;
1060 }
1061 # if defined(GHASH_CHUNK)
1062 while (len >= GHASH_CHUNK) {
1063 size_t j = GHASH_CHUNK;
1064
1065 while (j) {
1066 size_t_aX *out_t = (size_t_aX *)out;
1067 const size_t_aX *in_t = (const size_t_aX *)in;
1068
1069 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1070 ++ctr;
1071 if (IS_LITTLE_ENDIAN)
1072 # ifdef BSWAP4
1073 ctx->Yi.d[3] = BSWAP4(ctr);
1074 # else
1075 PUTU32(ctx->Yi.c + 12, ctr);
1076 # endif
1077 else
1078 ctx->Yi.d[3] = ctr;
1079 for (i = 0; i < 16 / sizeof(size_t); ++i)
1080 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1081 out += 16;
1082 in += 16;
1083 j -= 16;
1084 }
1085 GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
1086 len -= GHASH_CHUNK;
1087 }
1088 # endif
1089 if ((i = (len & (size_t)-16))) {
1090 size_t j = i;
1091
1092 while (len >= 16) {
1093 size_t_aX *out_t = (size_t_aX *)out;
1094 const size_t_aX *in_t = (const size_t_aX *)in;
1095
1096 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1097 ++ctr;
1098 if (IS_LITTLE_ENDIAN)
1099 # ifdef BSWAP4
1100 ctx->Yi.d[3] = BSWAP4(ctr);
1101 # else
1102 PUTU32(ctx->Yi.c + 12, ctr);
1103 # endif
1104 else
1105 ctx->Yi.d[3] = ctr;
1106 for (i = 0; i < 16 / sizeof(size_t); ++i)
1107 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1108 out += 16;
1109 in += 16;
1110 len -= 16;
1111 }
1112 GHASH(ctx, out - j, j);
1113 }
1114 # else
1115 while (len >= 16) {
1116 size_t *out_t = (size_t *)out;
1117 const size_t *in_t = (const size_t *)in;
1118
1119 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1120 ++ctr;
1121 if (IS_LITTLE_ENDIAN)
1122 # ifdef BSWAP4
1123 ctx->Yi.d[3] = BSWAP4(ctr);
1124 # else
1125 PUTU32(ctx->Yi.c + 12, ctr);
1126 # endif
1127 else
1128 ctx->Yi.d[3] = ctr;
1129 for (i = 0; i < 16 / sizeof(size_t); ++i)
1130 ctx->Xi.t[i] ^= out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1131 GCM_MUL(ctx);
1132 out += 16;
1133 in += 16;
1134 len -= 16;
1135 }
1136 # endif
1137 if (len) {
1138 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1139 ++ctr;
1140 if (IS_LITTLE_ENDIAN)
1141 # ifdef BSWAP4
1142 ctx->Yi.d[3] = BSWAP4(ctr);
1143 # else
1144 PUTU32(ctx->Yi.c + 12, ctr);
1145 # endif
1146 else
1147 ctx->Yi.d[3] = ctr;
1148 # if defined(GHASH)
1149 while (len--) {
1150 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1151 ++n;
1152 }
1153 # else
1154 while (len--) {
1155 ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1156 ++n;
1157 }
1158 mres = n;
1159 # endif
1160 }
1161
1162 ctx->mres = mres;
1163 return 0;
1164 } while (0);
1165 }
1166 #endif
1167 for (i = 0; i < len; ++i) {
1168 if (n == 0) {
1169 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1170 ++ctr;
1171 if (IS_LITTLE_ENDIAN)
1172 #ifdef BSWAP4
1173 ctx->Yi.d[3] = BSWAP4(ctr);
1174 #else
1175 PUTU32(ctx->Yi.c + 12, ctr);
1176 #endif
1177 else
1178 ctx->Yi.d[3] = ctr;
1179 }
1180 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1181 ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
1182 n = (n + 1) % 16;
1183 if (mres == sizeof(ctx->Xn)) {
1184 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1185 mres = 0;
1186 }
1187 #else
1188 ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
1189 mres = n = (n + 1) % 16;
1190 if (n == 0)
1191 GCM_MUL(ctx);
1192 #endif
1193 }
1194
1195 ctx->mres = mres;
1196 return 0;
1197 }
1198
1199 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
1200 const unsigned char *in, unsigned char *out,
1201 size_t len)
1202 {
1203 DECLARE_IS_ENDIAN;
1204 unsigned int n, ctr, mres;
1205 size_t i;
1206 u64 mlen = ctx->len.u[1];
1207 block128_f block = ctx->block;
1208 void *key = ctx->key;
1209 #ifdef GCM_FUNCREF_4BIT
1210 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1211 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1212 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1213 const u8 *inp, size_t len) = ctx->ghash;
1214 # endif
1215 #endif
1216
1217 mlen += len;
1218 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1219 return -1;
1220 ctx->len.u[1] = mlen;
1221
1222 mres = ctx->mres;
1223
1224 if (ctx->ares) {
1225 /* First call to decrypt finalizes GHASH(AAD) */
1226 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1227 if (len == 0) {
1228 GCM_MUL(ctx);
1229 ctx->ares = 0;
1230 return 0;
1231 }
1232 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1233 ctx->Xi.u[0] = 0;
1234 ctx->Xi.u[1] = 0;
1235 mres = sizeof(ctx->Xi);
1236 #else
1237 GCM_MUL(ctx);
1238 #endif
1239 ctx->ares = 0;
1240 }
1241
1242 if (IS_LITTLE_ENDIAN)
1243 #ifdef BSWAP4
1244 ctr = BSWAP4(ctx->Yi.d[3]);
1245 #else
1246 ctr = GETU32(ctx->Yi.c + 12);
1247 #endif
1248 else
1249 ctr = ctx->Yi.d[3];
1250
1251 n = mres % 16;
1252 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1253 if (16 % sizeof(size_t) == 0) { /* always true actually */
1254 do {
1255 if (n) {
1256 # if defined(GHASH)
1257 while (n && len) {
1258 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1259 --len;
1260 n = (n + 1) % 16;
1261 }
1262 if (n == 0) {
1263 GHASH(ctx, ctx->Xn, mres);
1264 mres = 0;
1265 } else {
1266 ctx->mres = mres;
1267 return 0;
1268 }
1269 # else
1270 while (n && len) {
1271 u8 c = *(in++);
1272 *(out++) = c ^ ctx->EKi.c[n];
1273 ctx->Xi.c[n] ^= c;
1274 --len;
1275 n = (n + 1) % 16;
1276 }
1277 if (n == 0) {
1278 GCM_MUL(ctx);
1279 mres = 0;
1280 } else {
1281 ctx->mres = n;
1282 return 0;
1283 }
1284 # endif
1285 }
1286 # if defined(STRICT_ALIGNMENT)
1287 if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
1288 break;
1289 # endif
1290 # if defined(GHASH)
1291 if (len >= 16 && mres) {
1292 GHASH(ctx, ctx->Xn, mres);
1293 mres = 0;
1294 }
1295 # if defined(GHASH_CHUNK)
1296 while (len >= GHASH_CHUNK) {
1297 size_t j = GHASH_CHUNK;
1298
1299 GHASH(ctx, in, GHASH_CHUNK);
1300 while (j) {
1301 size_t_aX *out_t = (size_t_aX *)out;
1302 const size_t_aX *in_t = (const size_t_aX *)in;
1303
1304 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1305 ++ctr;
1306 if (IS_LITTLE_ENDIAN)
1307 # ifdef BSWAP4
1308 ctx->Yi.d[3] = BSWAP4(ctr);
1309 # else
1310 PUTU32(ctx->Yi.c + 12, ctr);
1311 # endif
1312 else
1313 ctx->Yi.d[3] = ctr;
1314 for (i = 0; i < 16 / sizeof(size_t); ++i)
1315 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1316 out += 16;
1317 in += 16;
1318 j -= 16;
1319 }
1320 len -= GHASH_CHUNK;
1321 }
1322 # endif
1323 if ((i = (len & (size_t)-16))) {
1324 GHASH(ctx, in, i);
1325 while (len >= 16) {
1326 size_t_aX *out_t = (size_t_aX *)out;
1327 const size_t_aX *in_t = (const size_t_aX *)in;
1328
1329 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1330 ++ctr;
1331 if (IS_LITTLE_ENDIAN)
1332 # ifdef BSWAP4
1333 ctx->Yi.d[3] = BSWAP4(ctr);
1334 # else
1335 PUTU32(ctx->Yi.c + 12, ctr);
1336 # endif
1337 else
1338 ctx->Yi.d[3] = ctr;
1339 for (i = 0; i < 16 / sizeof(size_t); ++i)
1340 out_t[i] = in_t[i] ^ ctx->EKi.t[i];
1341 out += 16;
1342 in += 16;
1343 len -= 16;
1344 }
1345 }
1346 # else
1347 while (len >= 16) {
1348 size_t *out_t = (size_t *)out;
1349 const size_t *in_t = (const size_t *)in;
1350
1351 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1352 ++ctr;
1353 if (IS_LITTLE_ENDIAN)
1354 # ifdef BSWAP4
1355 ctx->Yi.d[3] = BSWAP4(ctr);
1356 # else
1357 PUTU32(ctx->Yi.c + 12, ctr);
1358 # endif
1359 else
1360 ctx->Yi.d[3] = ctr;
1361 for (i = 0; i < 16 / sizeof(size_t); ++i) {
1362 size_t c = in_t[i];
1363 out_t[i] = c ^ ctx->EKi.t[i];
1364 ctx->Xi.t[i] ^= c;
1365 }
1366 GCM_MUL(ctx);
1367 out += 16;
1368 in += 16;
1369 len -= 16;
1370 }
1371 # endif
1372 if (len) {
1373 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1374 ++ctr;
1375 if (IS_LITTLE_ENDIAN)
1376 # ifdef BSWAP4
1377 ctx->Yi.d[3] = BSWAP4(ctr);
1378 # else
1379 PUTU32(ctx->Yi.c + 12, ctr);
1380 # endif
1381 else
1382 ctx->Yi.d[3] = ctr;
1383 # if defined(GHASH)
1384 while (len--) {
1385 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1386 ++n;
1387 }
1388 # else
1389 while (len--) {
1390 u8 c = in[n];
1391 ctx->Xi.c[n] ^= c;
1392 out[n] = c ^ ctx->EKi.c[n];
1393 ++n;
1394 }
1395 mres = n;
1396 # endif
1397 }
1398
1399 ctx->mres = mres;
1400 return 0;
1401 } while (0);
1402 }
1403 #endif
1404 for (i = 0; i < len; ++i) {
1405 u8 c;
1406 if (n == 0) {
1407 (*block) (ctx->Yi.c, ctx->EKi.c, key);
1408 ++ctr;
1409 if (IS_LITTLE_ENDIAN)
1410 #ifdef BSWAP4
1411 ctx->Yi.d[3] = BSWAP4(ctr);
1412 #else
1413 PUTU32(ctx->Yi.c + 12, ctr);
1414 #endif
1415 else
1416 ctx->Yi.d[3] = ctr;
1417 }
1418 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1419 out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
1420 n = (n + 1) % 16;
1421 if (mres == sizeof(ctx->Xn)) {
1422 GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
1423 mres = 0;
1424 }
1425 #else
1426 c = in[i];
1427 out[i] = c ^ ctx->EKi.c[n];
1428 ctx->Xi.c[n] ^= c;
1429 mres = n = (n + 1) % 16;
1430 if (n == 0)
1431 GCM_MUL(ctx);
1432 #endif
1433 }
1434
1435 ctx->mres = mres;
1436 return 0;
1437 }
1438
1439 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
1440 const unsigned char *in, unsigned char *out,
1441 size_t len, ctr128_f stream)
1442 {
1443 #if defined(OPENSSL_SMALL_FOOTPRINT)
1444 return CRYPTO_gcm128_encrypt(ctx, in, out, len);
1445 #else
1446 DECLARE_IS_ENDIAN;
1447 unsigned int n, ctr, mres;
1448 size_t i;
1449 u64 mlen = ctx->len.u[1];
1450 void *key = ctx->key;
1451 # ifdef GCM_FUNCREF_4BIT
1452 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1453 # ifdef GHASH
1454 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1455 const u8 *inp, size_t len) = ctx->ghash;
1456 # endif
1457 # endif
1458
1459 mlen += len;
1460 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1461 return -1;
1462 ctx->len.u[1] = mlen;
1463
1464 mres = ctx->mres;
1465
1466 if (ctx->ares) {
1467 /* First call to encrypt finalizes GHASH(AAD) */
1468 #if defined(GHASH)
1469 if (len == 0) {
1470 GCM_MUL(ctx);
1471 ctx->ares = 0;
1472 return 0;
1473 }
1474 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1475 ctx->Xi.u[0] = 0;
1476 ctx->Xi.u[1] = 0;
1477 mres = sizeof(ctx->Xi);
1478 #else
1479 GCM_MUL(ctx);
1480 #endif
1481 ctx->ares = 0;
1482 }
1483
1484 if (IS_LITTLE_ENDIAN)
1485 # ifdef BSWAP4
1486 ctr = BSWAP4(ctx->Yi.d[3]);
1487 # else
1488 ctr = GETU32(ctx->Yi.c + 12);
1489 # endif
1490 else
1491 ctr = ctx->Yi.d[3];
1492
1493 n = mres % 16;
1494 if (n) {
1495 # if defined(GHASH)
1496 while (n && len) {
1497 ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
1498 --len;
1499 n = (n + 1) % 16;
1500 }
1501 if (n == 0) {
1502 GHASH(ctx, ctx->Xn, mres);
1503 mres = 0;
1504 } else {
1505 ctx->mres = mres;
1506 return 0;
1507 }
1508 # else
1509 while (n && len) {
1510 ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
1511 --len;
1512 n = (n + 1) % 16;
1513 }
1514 if (n == 0) {
1515 GCM_MUL(ctx);
1516 mres = 0;
1517 } else {
1518 ctx->mres = n;
1519 return 0;
1520 }
1521 # endif
1522 }
1523 # if defined(GHASH)
1524 if (len >= 16 && mres) {
1525 GHASH(ctx, ctx->Xn, mres);
1526 mres = 0;
1527 }
1528 # if defined(GHASH_CHUNK)
1529 while (len >= GHASH_CHUNK) {
1530 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1531 ctr += GHASH_CHUNK / 16;
1532 if (IS_LITTLE_ENDIAN)
1533 # ifdef BSWAP4
1534 ctx->Yi.d[3] = BSWAP4(ctr);
1535 # else
1536 PUTU32(ctx->Yi.c + 12, ctr);
1537 # endif
1538 else
1539 ctx->Yi.d[3] = ctr;
1540 GHASH(ctx, out, GHASH_CHUNK);
1541 out += GHASH_CHUNK;
1542 in += GHASH_CHUNK;
1543 len -= GHASH_CHUNK;
1544 }
1545 # endif
1546 # endif
1547 if ((i = (len & (size_t)-16))) {
1548 size_t j = i / 16;
1549
1550 (*stream) (in, out, j, key, ctx->Yi.c);
1551 ctr += (unsigned int)j;
1552 if (IS_LITTLE_ENDIAN)
1553 # ifdef BSWAP4
1554 ctx->Yi.d[3] = BSWAP4(ctr);
1555 # else
1556 PUTU32(ctx->Yi.c + 12, ctr);
1557 # endif
1558 else
1559 ctx->Yi.d[3] = ctr;
1560 in += i;
1561 len -= i;
1562 # if defined(GHASH)
1563 GHASH(ctx, out, i);
1564 out += i;
1565 # else
1566 while (j--) {
1567 for (i = 0; i < 16; ++i)
1568 ctx->Xi.c[i] ^= out[i];
1569 GCM_MUL(ctx);
1570 out += 16;
1571 }
1572 # endif
1573 }
1574 if (len) {
1575 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1576 ++ctr;
1577 if (IS_LITTLE_ENDIAN)
1578 # ifdef BSWAP4
1579 ctx->Yi.d[3] = BSWAP4(ctr);
1580 # else
1581 PUTU32(ctx->Yi.c + 12, ctr);
1582 # endif
1583 else
1584 ctx->Yi.d[3] = ctr;
1585 while (len--) {
1586 # if defined(GHASH)
1587 ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
1588 # else
1589 ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
1590 # endif
1591 ++n;
1592 }
1593 }
1594
1595 ctx->mres = mres;
1596 return 0;
1597 #endif
1598 }
1599
1600 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
1601 const unsigned char *in, unsigned char *out,
1602 size_t len, ctr128_f stream)
1603 {
1604 #if defined(OPENSSL_SMALL_FOOTPRINT)
1605 return CRYPTO_gcm128_decrypt(ctx, in, out, len);
1606 #else
1607 DECLARE_IS_ENDIAN;
1608 unsigned int n, ctr, mres;
1609 size_t i;
1610 u64 mlen = ctx->len.u[1];
1611 void *key = ctx->key;
1612 # ifdef GCM_FUNCREF_4BIT
1613 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1614 # ifdef GHASH
1615 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1616 const u8 *inp, size_t len) = ctx->ghash;
1617 # endif
1618 # endif
1619
1620 mlen += len;
1621 if (mlen > ((U64(1) << 36) - 32) || (sizeof(len) == 8 && mlen < len))
1622 return -1;
1623 ctx->len.u[1] = mlen;
1624
1625 mres = ctx->mres;
1626
1627 if (ctx->ares) {
1628 /* First call to decrypt finalizes GHASH(AAD) */
1629 # if defined(GHASH)
1630 if (len == 0) {
1631 GCM_MUL(ctx);
1632 ctx->ares = 0;
1633 return 0;
1634 }
1635 memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
1636 ctx->Xi.u[0] = 0;
1637 ctx->Xi.u[1] = 0;
1638 mres = sizeof(ctx->Xi);
1639 # else
1640 GCM_MUL(ctx);
1641 # endif
1642 ctx->ares = 0;
1643 }
1644
1645 if (IS_LITTLE_ENDIAN)
1646 # ifdef BSWAP4
1647 ctr = BSWAP4(ctx->Yi.d[3]);
1648 # else
1649 ctr = GETU32(ctx->Yi.c + 12);
1650 # endif
1651 else
1652 ctr = ctx->Yi.d[3];
1653
1654 n = mres % 16;
1655 if (n) {
1656 # if defined(GHASH)
1657 while (n && len) {
1658 *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
1659 --len;
1660 n = (n + 1) % 16;
1661 }
1662 if (n == 0) {
1663 GHASH(ctx, ctx->Xn, mres);
1664 mres = 0;
1665 } else {
1666 ctx->mres = mres;
1667 return 0;
1668 }
1669 # else
1670 while (n && len) {
1671 u8 c = *(in++);
1672 *(out++) = c ^ ctx->EKi.c[n];
1673 ctx->Xi.c[n] ^= c;
1674 --len;
1675 n = (n + 1) % 16;
1676 }
1677 if (n == 0) {
1678 GCM_MUL(ctx);
1679 mres = 0;
1680 } else {
1681 ctx->mres = n;
1682 return 0;
1683 }
1684 # endif
1685 }
1686 # if defined(GHASH)
1687 if (len >= 16 && mres) {
1688 GHASH(ctx, ctx->Xn, mres);
1689 mres = 0;
1690 }
1691 # if defined(GHASH_CHUNK)
1692 while (len >= GHASH_CHUNK) {
1693 GHASH(ctx, in, GHASH_CHUNK);
1694 (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
1695 ctr += GHASH_CHUNK / 16;
1696 if (IS_LITTLE_ENDIAN)
1697 # ifdef BSWAP4
1698 ctx->Yi.d[3] = BSWAP4(ctr);
1699 # else
1700 PUTU32(ctx->Yi.c + 12, ctr);
1701 # endif
1702 else
1703 ctx->Yi.d[3] = ctr;
1704 out += GHASH_CHUNK;
1705 in += GHASH_CHUNK;
1706 len -= GHASH_CHUNK;
1707 }
1708 # endif
1709 # endif
1710 if ((i = (len & (size_t)-16))) {
1711 size_t j = i / 16;
1712
1713 # if defined(GHASH)
1714 GHASH(ctx, in, i);
1715 # else
1716 while (j--) {
1717 size_t k;
1718 for (k = 0; k < 16; ++k)
1719 ctx->Xi.c[k] ^= in[k];
1720 GCM_MUL(ctx);
1721 in += 16;
1722 }
1723 j = i / 16;
1724 in -= i;
1725 # endif
1726 (*stream) (in, out, j, key, ctx->Yi.c);
1727 ctr += (unsigned int)j;
1728 if (IS_LITTLE_ENDIAN)
1729 # ifdef BSWAP4
1730 ctx->Yi.d[3] = BSWAP4(ctr);
1731 # else
1732 PUTU32(ctx->Yi.c + 12, ctr);
1733 # endif
1734 else
1735 ctx->Yi.d[3] = ctr;
1736 out += i;
1737 in += i;
1738 len -= i;
1739 }
1740 if (len) {
1741 (*ctx->block) (ctx->Yi.c, ctx->EKi.c, key);
1742 ++ctr;
1743 if (IS_LITTLE_ENDIAN)
1744 # ifdef BSWAP4
1745 ctx->Yi.d[3] = BSWAP4(ctr);
1746 # else
1747 PUTU32(ctx->Yi.c + 12, ctr);
1748 # endif
1749 else
1750 ctx->Yi.d[3] = ctr;
1751 while (len--) {
1752 # if defined(GHASH)
1753 out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
1754 # else
1755 u8 c = in[n];
1756 ctx->Xi.c[mres++] ^= c;
1757 out[n] = c ^ ctx->EKi.c[n];
1758 # endif
1759 ++n;
1760 }
1761 }
1762
1763 ctx->mres = mres;
1764 return 0;
1765 #endif
1766 }
1767
1768 int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
1769 size_t len)
1770 {
1771 DECLARE_IS_ENDIAN;
1772 u64 alen = ctx->len.u[0] << 3;
1773 u64 clen = ctx->len.u[1] << 3;
1774 #ifdef GCM_FUNCREF_4BIT
1775 void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
1776 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1777 void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
1778 const u8 *inp, size_t len) = ctx->ghash;
1779 # endif
1780 #endif
1781
1782 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1783 u128 bitlen;
1784 unsigned int mres = ctx->mres;
1785
1786 if (mres) {
1787 unsigned blocks = (mres + 15) & -16;
1788
1789 memset(ctx->Xn + mres, 0, blocks - mres);
1790 mres = blocks;
1791 if (mres == sizeof(ctx->Xn)) {
1792 GHASH(ctx, ctx->Xn, mres);
1793 mres = 0;
1794 }
1795 } else if (ctx->ares) {
1796 GCM_MUL(ctx);
1797 }
1798 #else
1799 if (ctx->mres || ctx->ares)
1800 GCM_MUL(ctx);
1801 #endif
1802
1803 if (IS_LITTLE_ENDIAN) {
1804 #ifdef BSWAP8
1805 alen = BSWAP8(alen);
1806 clen = BSWAP8(clen);
1807 #else
1808 u8 *p = ctx->len.c;
1809
1810 ctx->len.u[0] = alen;
1811 ctx->len.u[1] = clen;
1812
1813 alen = (u64)GETU32(p) << 32 | GETU32(p + 4);
1814 clen = (u64)GETU32(p + 8) << 32 | GETU32(p + 12);
1815 #endif
1816 }
1817
1818 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1819 bitlen.hi = alen;
1820 bitlen.lo = clen;
1821 memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
1822 mres += sizeof(bitlen);
1823 GHASH(ctx, ctx->Xn, mres);
1824 #else
1825 ctx->Xi.u[0] ^= alen;
1826 ctx->Xi.u[1] ^= clen;
1827 GCM_MUL(ctx);
1828 #endif
1829
1830 ctx->Xi.u[0] ^= ctx->EK0.u[0];
1831 ctx->Xi.u[1] ^= ctx->EK0.u[1];
1832
1833 if (tag && len <= sizeof(ctx->Xi))
1834 return CRYPTO_memcmp(ctx->Xi.c, tag, len);
1835 else
1836 return -1;
1837 }
1838
1839 void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len)
1840 {
1841 CRYPTO_gcm128_finish(ctx, NULL, 0);
1842 memcpy(tag, ctx->Xi.c,
1843 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
1844 }
1845
1846 GCM128_CONTEXT *CRYPTO_gcm128_new(void *key, block128_f block)
1847 {
1848 GCM128_CONTEXT *ret;
1849
1850 if ((ret = OPENSSL_malloc(sizeof(*ret))) != NULL)
1851 CRYPTO_gcm128_init(ret, key, block);
1852
1853 return ret;
1854 }
1855
1856 void CRYPTO_gcm128_release(GCM128_CONTEXT *ctx)
1857 {
1858 OPENSSL_clear_free(ctx, sizeof(*ctx));
1859 }