]>
git.ipfire.org Git - thirdparty/openssl.git/blob - crypto/modes/gcm128.c
8317fd48e4497193224f4e1ea81934f881053b8c
2 * Copyright 2010-2021 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
11 #include <openssl/crypto.h>
12 #include "internal/cryptlib.h"
13 #include "internal/endian.h"
14 #include "crypto/modes.h"
16 #if defined(__GNUC__) && !defined(STRICT_ALIGNMENT)
17 typedef size_t size_t_aX
__attribute((__aligned__(1)));
19 typedef size_t size_t_aX
;
22 #if defined(BSWAP4) && defined(STRICT_ALIGNMENT)
23 /* redefine, because alignment is ensured */
25 # define GETU32(p) BSWAP4(*(const u32 *)(p))
27 # define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v)
30 #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16))
31 #define REDUCE1BIT(V) do { \
32 if (sizeof(size_t)==8) { \
33 u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); \
34 V.lo = (V.hi<<63)|(V.lo>>1); \
35 V.hi = (V.hi>>1 )^T; \
38 u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); \
39 V.lo = (V.hi<<63)|(V.lo>>1); \
40 V.hi = (V.hi>>1 )^((u64)T<<32); \
45 * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should
46 * never be set to 8. 8 is effectively reserved for testing purposes.
47 * TABLE_BITS>1 are lookup-table-driven implementations referred to as
48 * "Shoup's" in GCM specification. In other words OpenSSL does not cover
49 * whole spectrum of possible table driven implementations. Why? In
50 * non-"Shoup's" case memory access pattern is segmented in such manner,
51 * that it's trivial to see that cache timing information can reveal
52 * fair portion of intermediate hash value. Given that ciphertext is
53 * always available to attacker, it's possible for him to attempt to
54 * deduce secret parameter H and if successful, tamper with messages
55 * [which is nothing but trivial in CTR mode]. In "Shoup's" case it's
56 * not as trivial, but there is no reason to believe that it's resistant
57 * to cache-timing attack. And the thing about "8-bit" implementation is
58 * that it consumes 16 (sixteen) times more memory, 4KB per individual
59 * key + 1KB shared. Well, on pros side it should be twice as fast as
60 * "4-bit" version. And for gcc-generated x86[_64] code, "8-bit" version
61 * was observed to run ~75% faster, closer to 100% for commercial
62 * compilers... Yet "4-bit" procedure is preferred, because it's
63 * believed to provide better security-performance balance and adequate
64 * all-round performance. "All-round" refers to things like:
66 * - shorter setup time effectively improves overall timing for
67 * handling short messages;
68 * - larger table allocation can become unbearable because of VM
69 * subsystem penalties (for example on Windows large enough free
70 * results in VM working set trimming, meaning that consequent
71 * malloc would immediately incur working set expansion);
72 * - larger table has larger cache footprint, which can affect
73 * performance of other code paths (not necessarily even from same
74 * thread in Hyper-Threading world);
76 * Value of 1 is not appropriate for performance reasons.
80 static void gcm_init_8bit(u128 Htable
[256], u64 H
[2])
90 for (Htable
[128] = V
, i
= 64; i
> 0; i
>>= 1) {
95 for (i
= 2; i
< 256; i
<<= 1) {
96 u128
*Hi
= Htable
+ i
, H0
= *Hi
;
97 for (j
= 1; j
< i
; ++j
) {
98 Hi
[j
].hi
= H0
.hi
^ Htable
[j
].hi
;
99 Hi
[j
].lo
= H0
.lo
^ Htable
[j
].lo
;
104 static void gcm_gmult_8bit(u64 Xi
[2], const u128 Htable
[256])
107 const u8
*xi
= (const u8
*)Xi
+ 15;
110 static const size_t rem_8bit
[256] = {
111 PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246),
112 PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E),
113 PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56),
114 PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E),
115 PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66),
116 PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E),
117 PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076),
118 PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E),
119 PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06),
120 PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E),
121 PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416),
122 PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E),
123 PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626),
124 PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E),
125 PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836),
126 PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E),
127 PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6),
128 PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE),
129 PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6),
130 PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE),
131 PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6),
132 PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE),
133 PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6),
134 PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE),
135 PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86),
136 PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E),
137 PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496),
138 PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E),
139 PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6),
140 PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE),
141 PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6),
142 PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE),
143 PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346),
144 PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E),
145 PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56),
146 PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E),
147 PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66),
148 PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E),
149 PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176),
150 PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E),
151 PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06),
152 PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E),
153 PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516),
154 PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E),
155 PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726),
156 PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E),
157 PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936),
158 PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E),
159 PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6),
160 PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE),
161 PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6),
162 PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE),
163 PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6),
164 PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE),
165 PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6),
166 PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE),
167 PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86),
168 PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E),
169 PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596),
170 PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E),
171 PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6),
172 PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE),
173 PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6),
174 PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE)
178 Z
.hi
^= Htable
[n
].hi
;
179 Z
.lo
^= Htable
[n
].lo
;
186 rem
= (size_t)Z
.lo
& 0xff;
187 Z
.lo
= (Z
.hi
<< 56) | (Z
.lo
>> 8);
189 if (sizeof(size_t) == 8)
190 Z
.hi
^= rem_8bit
[rem
];
192 Z
.hi
^= (u64
)rem_8bit
[rem
] << 32;
195 if (IS_LITTLE_ENDIAN
) {
197 Xi
[0] = BSWAP8(Z
.hi
);
198 Xi
[1] = BSWAP8(Z
.lo
);
202 v
= (u32
)(Z
.hi
>> 32);
206 v
= (u32
)(Z
.lo
>> 32);
217 # define GCM_MUL(ctx) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable)
221 static void gcm_init_4bit(u128 Htable
[16], u64 H
[2])
224 # if defined(OPENSSL_SMALL_FOOTPRINT)
233 # if defined(OPENSSL_SMALL_FOOTPRINT)
234 for (Htable
[8] = V
, i
= 4; i
> 0; i
>>= 1) {
239 for (i
= 2; i
< 16; i
<<= 1) {
240 u128
*Hi
= Htable
+ i
;
242 for (V
= *Hi
, j
= 1; j
< i
; ++j
) {
243 Hi
[j
].hi
= V
.hi
^ Htable
[j
].hi
;
244 Hi
[j
].lo
= V
.lo
^ Htable
[j
].lo
;
255 Htable
[3].hi
= V
.hi
^ Htable
[2].hi
, Htable
[3].lo
= V
.lo
^ Htable
[2].lo
;
257 Htable
[5].hi
= V
.hi
^ Htable
[1].hi
, Htable
[5].lo
= V
.lo
^ Htable
[1].lo
;
258 Htable
[6].hi
= V
.hi
^ Htable
[2].hi
, Htable
[6].lo
= V
.lo
^ Htable
[2].lo
;
259 Htable
[7].hi
= V
.hi
^ Htable
[3].hi
, Htable
[7].lo
= V
.lo
^ Htable
[3].lo
;
261 Htable
[9].hi
= V
.hi
^ Htable
[1].hi
, Htable
[9].lo
= V
.lo
^ Htable
[1].lo
;
262 Htable
[10].hi
= V
.hi
^ Htable
[2].hi
, Htable
[10].lo
= V
.lo
^ Htable
[2].lo
;
263 Htable
[11].hi
= V
.hi
^ Htable
[3].hi
, Htable
[11].lo
= V
.lo
^ Htable
[3].lo
;
264 Htable
[12].hi
= V
.hi
^ Htable
[4].hi
, Htable
[12].lo
= V
.lo
^ Htable
[4].lo
;
265 Htable
[13].hi
= V
.hi
^ Htable
[5].hi
, Htable
[13].lo
= V
.lo
^ Htable
[5].lo
;
266 Htable
[14].hi
= V
.hi
^ Htable
[6].hi
, Htable
[14].lo
= V
.lo
^ Htable
[6].lo
;
267 Htable
[15].hi
= V
.hi
^ Htable
[7].hi
, Htable
[15].lo
= V
.lo
^ Htable
[7].lo
;
269 # if defined(GHASH_ASM) && (defined(__arm__) || defined(__arm))
271 * ARM assembler expects specific dword order in Htable.
277 if (IS_LITTLE_ENDIAN
)
278 for (j
= 0; j
< 16; ++j
) {
283 for (j
= 0; j
< 16; ++j
) {
285 Htable
[j
].hi
= V
.lo
<< 32 | V
.lo
>> 32;
286 Htable
[j
].lo
= V
.hi
<< 32 | V
.hi
>> 32;
293 static const size_t rem_4bit
[16] = {
294 PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
295 PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),
296 PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560),
297 PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0)
300 static void gcm_gmult_4bit(u64 Xi
[2], const u128 Htable
[16])
304 size_t rem
, nlo
, nhi
;
307 nlo
= ((const u8
*)Xi
)[15];
311 Z
.hi
= Htable
[nlo
].hi
;
312 Z
.lo
= Htable
[nlo
].lo
;
315 rem
= (size_t)Z
.lo
& 0xf;
316 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
318 if (sizeof(size_t) == 8)
319 Z
.hi
^= rem_4bit
[rem
];
321 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
323 Z
.hi
^= Htable
[nhi
].hi
;
324 Z
.lo
^= Htable
[nhi
].lo
;
329 nlo
= ((const u8
*)Xi
)[cnt
];
333 rem
= (size_t)Z
.lo
& 0xf;
334 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
336 if (sizeof(size_t) == 8)
337 Z
.hi
^= rem_4bit
[rem
];
339 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
341 Z
.hi
^= Htable
[nlo
].hi
;
342 Z
.lo
^= Htable
[nlo
].lo
;
345 if (IS_LITTLE_ENDIAN
) {
347 Xi
[0] = BSWAP8(Z
.hi
);
348 Xi
[1] = BSWAP8(Z
.lo
);
352 v
= (u32
)(Z
.hi
>> 32);
356 v
= (u32
)(Z
.lo
>> 32);
367 # if !defined(OPENSSL_SMALL_FOOTPRINT)
369 * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for
370 * details... Compiler-generated code doesn't seem to give any
371 * performance improvement, at least not on x86[_64]. It's here
372 * mostly as reference and a placeholder for possible future
373 * non-trivial optimization[s]...
375 static void gcm_ghash_4bit(u64 Xi
[2], const u128 Htable
[16],
376 const u8
*inp
, size_t len
)
380 size_t rem
, nlo
, nhi
;
386 nlo
= ((const u8
*)Xi
)[15];
391 Z
.hi
= Htable
[nlo
].hi
;
392 Z
.lo
= Htable
[nlo
].lo
;
395 rem
= (size_t)Z
.lo
& 0xf;
396 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
398 if (sizeof(size_t) == 8)
399 Z
.hi
^= rem_4bit
[rem
];
401 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
403 Z
.hi
^= Htable
[nhi
].hi
;
404 Z
.lo
^= Htable
[nhi
].lo
;
409 nlo
= ((const u8
*)Xi
)[cnt
];
414 rem
= (size_t)Z
.lo
& 0xf;
415 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
417 if (sizeof(size_t) == 8)
418 Z
.hi
^= rem_4bit
[rem
];
420 Z
.hi
^= (u64
)rem_4bit
[rem
] << 32;
422 Z
.hi
^= Htable
[nlo
].hi
;
423 Z
.lo
^= Htable
[nlo
].lo
;
427 * Extra 256+16 bytes per-key plus 512 bytes shared tables
428 * [should] give ~50% improvement... One could have PACK()-ed
429 * the rem_8bit even here, but the priority is to minimize
432 u128 Hshr4
[16]; /* Htable shifted right by 4 bits */
433 u8 Hshl4
[16]; /* Htable shifted left by 4 bits */
434 static const unsigned short rem_8bit
[256] = {
435 0x0000, 0x01C2, 0x0384, 0x0246, 0x0708, 0x06CA, 0x048C, 0x054E,
436 0x0E10, 0x0FD2, 0x0D94, 0x0C56, 0x0918, 0x08DA, 0x0A9C, 0x0B5E,
437 0x1C20, 0x1DE2, 0x1FA4, 0x1E66, 0x1B28, 0x1AEA, 0x18AC, 0x196E,
438 0x1230, 0x13F2, 0x11B4, 0x1076, 0x1538, 0x14FA, 0x16BC, 0x177E,
439 0x3840, 0x3982, 0x3BC4, 0x3A06, 0x3F48, 0x3E8A, 0x3CCC, 0x3D0E,
440 0x3650, 0x3792, 0x35D4, 0x3416, 0x3158, 0x309A, 0x32DC, 0x331E,
441 0x2460, 0x25A2, 0x27E4, 0x2626, 0x2368, 0x22AA, 0x20EC, 0x212E,
442 0x2A70, 0x2BB2, 0x29F4, 0x2836, 0x2D78, 0x2CBA, 0x2EFC, 0x2F3E,
443 0x7080, 0x7142, 0x7304, 0x72C6, 0x7788, 0x764A, 0x740C, 0x75CE,
444 0x7E90, 0x7F52, 0x7D14, 0x7CD6, 0x7998, 0x785A, 0x7A1C, 0x7BDE,
445 0x6CA0, 0x6D62, 0x6F24, 0x6EE6, 0x6BA8, 0x6A6A, 0x682C, 0x69EE,
446 0x62B0, 0x6372, 0x6134, 0x60F6, 0x65B8, 0x647A, 0x663C, 0x67FE,
447 0x48C0, 0x4902, 0x4B44, 0x4A86, 0x4FC8, 0x4E0A, 0x4C4C, 0x4D8E,
448 0x46D0, 0x4712, 0x4554, 0x4496, 0x41D8, 0x401A, 0x425C, 0x439E,
449 0x54E0, 0x5522, 0x5764, 0x56A6, 0x53E8, 0x522A, 0x506C, 0x51AE,
450 0x5AF0, 0x5B32, 0x5974, 0x58B6, 0x5DF8, 0x5C3A, 0x5E7C, 0x5FBE,
451 0xE100, 0xE0C2, 0xE284, 0xE346, 0xE608, 0xE7CA, 0xE58C, 0xE44E,
452 0xEF10, 0xEED2, 0xEC94, 0xED56, 0xE818, 0xE9DA, 0xEB9C, 0xEA5E,
453 0xFD20, 0xFCE2, 0xFEA4, 0xFF66, 0xFA28, 0xFBEA, 0xF9AC, 0xF86E,
454 0xF330, 0xF2F2, 0xF0B4, 0xF176, 0xF438, 0xF5FA, 0xF7BC, 0xF67E,
455 0xD940, 0xD882, 0xDAC4, 0xDB06, 0xDE48, 0xDF8A, 0xDDCC, 0xDC0E,
456 0xD750, 0xD692, 0xD4D4, 0xD516, 0xD058, 0xD19A, 0xD3DC, 0xD21E,
457 0xC560, 0xC4A2, 0xC6E4, 0xC726, 0xC268, 0xC3AA, 0xC1EC, 0xC02E,
458 0xCB70, 0xCAB2, 0xC8F4, 0xC936, 0xCC78, 0xCDBA, 0xCFFC, 0xCE3E,
459 0x9180, 0x9042, 0x9204, 0x93C6, 0x9688, 0x974A, 0x950C, 0x94CE,
460 0x9F90, 0x9E52, 0x9C14, 0x9DD6, 0x9898, 0x995A, 0x9B1C, 0x9ADE,
461 0x8DA0, 0x8C62, 0x8E24, 0x8FE6, 0x8AA8, 0x8B6A, 0x892C, 0x88EE,
462 0x83B0, 0x8272, 0x8034, 0x81F6, 0x84B8, 0x857A, 0x873C, 0x86FE,
463 0xA9C0, 0xA802, 0xAA44, 0xAB86, 0xAEC8, 0xAF0A, 0xAD4C, 0xAC8E,
464 0xA7D0, 0xA612, 0xA454, 0xA596, 0xA0D8, 0xA11A, 0xA35C, 0xA29E,
465 0xB5E0, 0xB422, 0xB664, 0xB7A6, 0xB2E8, 0xB32A, 0xB16C, 0xB0AE,
466 0xBBF0, 0xBA32, 0xB874, 0xB9B6, 0xBCF8, 0xBD3A, 0xBF7C, 0xBEBE
469 * This pre-processing phase slows down procedure by approximately
470 * same time as it makes each loop spin faster. In other words
471 * single block performance is approximately same as straightforward
472 * "4-bit" implementation, and then it goes only faster...
474 for (cnt
= 0; cnt
< 16; ++cnt
) {
475 Z
.hi
= Htable
[cnt
].hi
;
476 Z
.lo
= Htable
[cnt
].lo
;
477 Hshr4
[cnt
].lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
478 Hshr4
[cnt
].hi
= (Z
.hi
>> 4);
479 Hshl4
[cnt
] = (u8
)(Z
.lo
<< 4);
483 for (Z
.lo
= 0, Z
.hi
= 0, cnt
= 15; cnt
; --cnt
) {
484 nlo
= ((const u8
*)Xi
)[cnt
];
489 Z
.hi
^= Htable
[nlo
].hi
;
490 Z
.lo
^= Htable
[nlo
].lo
;
492 rem
= (size_t)Z
.lo
& 0xff;
494 Z
.lo
= (Z
.hi
<< 56) | (Z
.lo
>> 8);
497 Z
.hi
^= Hshr4
[nhi
].hi
;
498 Z
.lo
^= Hshr4
[nhi
].lo
;
499 Z
.hi
^= (u64
)rem_8bit
[rem
^ Hshl4
[nhi
]] << 48;
502 nlo
= ((const u8
*)Xi
)[0];
507 Z
.hi
^= Htable
[nlo
].hi
;
508 Z
.lo
^= Htable
[nlo
].lo
;
510 rem
= (size_t)Z
.lo
& 0xf;
512 Z
.lo
= (Z
.hi
<< 60) | (Z
.lo
>> 4);
515 Z
.hi
^= Htable
[nhi
].hi
;
516 Z
.lo
^= Htable
[nhi
].lo
;
517 Z
.hi
^= ((u64
)rem_8bit
[rem
<< 4]) << 48;
520 if (IS_LITTLE_ENDIAN
) {
522 Xi
[0] = BSWAP8(Z
.hi
);
523 Xi
[1] = BSWAP8(Z
.lo
);
527 v
= (u32
)(Z
.hi
>> 32);
531 v
= (u32
)(Z
.lo
>> 32);
540 } while (inp
+= 16, len
-= 16);
544 void gcm_gmult_4bit(u64 Xi
[2], const u128 Htable
[16]);
545 void gcm_ghash_4bit(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
549 # define GCM_MUL(ctx) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable)
550 # if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT)
551 # define GHASH(ctx,in,len) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len)
553 * GHASH_CHUNK is "stride parameter" missioned to mitigate cache trashing
554 * effect. In other words idea is to hash data while it's still in L1 cache
555 * after encryption pass...
557 # define GHASH_CHUNK (3*1024)
560 #else /* TABLE_BITS */
562 static void gcm_gmult_1bit(u64 Xi
[2], const u64 H
[2])
564 u128 V
, Z
= { 0, 0 };
567 const long *xi
= (const long *)Xi
;
570 V
.hi
= H
[0]; /* H is in host byte order, no byte swapping */
573 for (j
= 0; j
< 16 / sizeof(long); ++j
) {
574 if (IS_LITTLE_ENDIAN
) {
575 if (sizeof(long) == 8) {
577 X
= (long)(BSWAP8(xi
[j
]));
579 const u8
*p
= (const u8
*)(xi
+ j
);
580 X
= (long)((u64
)GETU32(p
) << 32 | GETU32(p
+ 4));
583 const u8
*p
= (const u8
*)(xi
+ j
);
589 for (i
= 0; i
< 8 * sizeof(long); ++i
, X
<<= 1) {
590 u64 M
= (u64
)(X
>> (8 * sizeof(long) - 1));
598 if (IS_LITTLE_ENDIAN
) {
600 Xi
[0] = BSWAP8(Z
.hi
);
601 Xi
[1] = BSWAP8(Z
.lo
);
605 v
= (u32
)(Z
.hi
>> 32);
609 v
= (u32
)(Z
.lo
>> 32);
620 # define GCM_MUL(ctx) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u)
624 #if TABLE_BITS==4 && (defined(GHASH_ASM) || defined(OPENSSL_CPUID_OBJ))
625 # if !defined(I386_ONLY) && \
626 (defined(__i386) || defined(__i386__) || \
627 defined(__x86_64) || defined(__x86_64__) || \
628 defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
629 # define GHASH_ASM_X86_OR_64
630 # define GCM_FUNCREF_4BIT
632 void gcm_init_clmul(u128 Htable
[16], const u64 Xi
[2]);
633 void gcm_gmult_clmul(u64 Xi
[2], const u128 Htable
[16]);
634 void gcm_ghash_clmul(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
637 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
638 # define gcm_init_avx gcm_init_clmul
639 # define gcm_gmult_avx gcm_gmult_clmul
640 # define gcm_ghash_avx gcm_ghash_clmul
642 void gcm_init_avx(u128 Htable
[16], const u64 Xi
[2]);
643 void gcm_gmult_avx(u64 Xi
[2], const u128 Htable
[16]);
644 void gcm_ghash_avx(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
648 # if defined(__i386) || defined(__i386__) || defined(_M_IX86)
649 # define GHASH_ASM_X86
650 void gcm_gmult_4bit_mmx(u64 Xi
[2], const u128 Htable
[16]);
651 void gcm_ghash_4bit_mmx(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
654 void gcm_gmult_4bit_x86(u64 Xi
[2], const u128 Htable
[16]);
655 void gcm_ghash_4bit_x86(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
658 # elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
659 # include "arm_arch.h"
660 # if __ARM_MAX_ARCH__>=7
661 # define GHASH_ASM_ARM
662 # define GCM_FUNCREF_4BIT
663 # define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
664 # if defined(__arm__) || defined(__arm)
665 # define NEON_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
667 void gcm_init_neon(u128 Htable
[16], const u64 Xi
[2]);
668 void gcm_gmult_neon(u64 Xi
[2], const u128 Htable
[16]);
669 void gcm_ghash_neon(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
671 void gcm_init_v8(u128 Htable
[16], const u64 Xi
[2]);
672 void gcm_gmult_v8(u64 Xi
[2], const u128 Htable
[16]);
673 void gcm_ghash_v8(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
676 # elif defined(__sparc__) || defined(__sparc)
677 # include "sparc_arch.h"
678 # define GHASH_ASM_SPARC
679 # define GCM_FUNCREF_4BIT
680 extern unsigned int OPENSSL_sparcv9cap_P
[];
681 void gcm_init_vis3(u128 Htable
[16], const u64 Xi
[2]);
682 void gcm_gmult_vis3(u64 Xi
[2], const u128 Htable
[16]);
683 void gcm_ghash_vis3(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
685 # elif defined(OPENSSL_CPUID_OBJ) && (defined(__powerpc__) || defined(__ppc__) || defined(_ARCH_PPC))
686 # include "crypto/ppc_arch.h"
687 # define GHASH_ASM_PPC
688 # define GCM_FUNCREF_4BIT
689 void gcm_init_p8(u128 Htable
[16], const u64 Xi
[2]);
690 void gcm_gmult_p8(u64 Xi
[2], const u128 Htable
[16]);
691 void gcm_ghash_p8(u64 Xi
[2], const u128 Htable
[16], const u8
*inp
,
696 #ifdef GCM_FUNCREF_4BIT
698 # define GCM_MUL(ctx) (*gcm_gmult_p)(ctx->Xi.u,ctx->Htable)
701 # define GHASH(ctx,in,len) (*gcm_ghash_p)(ctx->Xi.u,ctx->Htable,in,len)
705 void CRYPTO_gcm128_init(GCM128_CONTEXT
*ctx
, void *key
, block128_f block
)
709 memset(ctx
, 0, sizeof(*ctx
));
713 (*block
) (ctx
->H
.c
, ctx
->H
.c
, key
);
715 if (IS_LITTLE_ENDIAN
) {
716 /* H is stored in host byte order */
718 ctx
->H
.u
[0] = BSWAP8(ctx
->H
.u
[0]);
719 ctx
->H
.u
[1] = BSWAP8(ctx
->H
.u
[1]);
723 hi
= (u64
)GETU32(p
) << 32 | GETU32(p
+ 4);
724 lo
= (u64
)GETU32(p
+ 8) << 32 | GETU32(p
+ 12);
730 gcm_init_8bit(ctx
->Htable
, ctx
->H
.u
);
733 # define CTX__GHASH(f) (ctx->ghash = (f))
735 # define CTX__GHASH(f) (ctx->ghash = NULL)
737 # if defined(GHASH_ASM_X86_OR_64)
738 # if !defined(GHASH_ASM_X86) || defined(OPENSSL_IA32_SSE2)
739 if (OPENSSL_ia32cap_P
[1] & (1 << 1)) { /* check PCLMULQDQ bit */
740 if (((OPENSSL_ia32cap_P
[1] >> 22) & 0x41) == 0x41) { /* AVX+MOVBE */
741 gcm_init_avx(ctx
->Htable
, ctx
->H
.u
);
742 ctx
->gmult
= gcm_gmult_avx
;
743 CTX__GHASH(gcm_ghash_avx
);
745 gcm_init_clmul(ctx
->Htable
, ctx
->H
.u
);
746 ctx
->gmult
= gcm_gmult_clmul
;
747 CTX__GHASH(gcm_ghash_clmul
);
752 gcm_init_4bit(ctx
->Htable
, ctx
->H
.u
);
753 # if defined(GHASH_ASM_X86) /* x86 only */
754 # if defined(OPENSSL_IA32_SSE2)
755 if (OPENSSL_ia32cap_P
[0] & (1 << 25)) { /* check SSE bit */
757 if (OPENSSL_ia32cap_P
[0] & (1 << 23)) { /* check MMX bit */
759 ctx
->gmult
= gcm_gmult_4bit_mmx
;
760 CTX__GHASH(gcm_ghash_4bit_mmx
);
762 ctx
->gmult
= gcm_gmult_4bit_x86
;
763 CTX__GHASH(gcm_ghash_4bit_x86
);
766 ctx
->gmult
= gcm_gmult_4bit
;
767 CTX__GHASH(gcm_ghash_4bit
);
769 # elif defined(GHASH_ASM_ARM)
770 # ifdef PMULL_CAPABLE
772 gcm_init_v8(ctx
->Htable
, ctx
->H
.u
);
773 ctx
->gmult
= gcm_gmult_v8
;
774 CTX__GHASH(gcm_ghash_v8
);
779 gcm_init_neon(ctx
->Htable
, ctx
->H
.u
);
780 ctx
->gmult
= gcm_gmult_neon
;
781 CTX__GHASH(gcm_ghash_neon
);
785 gcm_init_4bit(ctx
->Htable
, ctx
->H
.u
);
786 ctx
->gmult
= gcm_gmult_4bit
;
787 CTX__GHASH(gcm_ghash_4bit
);
789 # elif defined(GHASH_ASM_SPARC)
790 if (OPENSSL_sparcv9cap_P
[0] & SPARCV9_VIS3
) {
791 gcm_init_vis3(ctx
->Htable
, ctx
->H
.u
);
792 ctx
->gmult
= gcm_gmult_vis3
;
793 CTX__GHASH(gcm_ghash_vis3
);
795 gcm_init_4bit(ctx
->Htable
, ctx
->H
.u
);
796 ctx
->gmult
= gcm_gmult_4bit
;
797 CTX__GHASH(gcm_ghash_4bit
);
799 # elif defined(GHASH_ASM_PPC)
800 if (OPENSSL_ppccap_P
& PPC_CRYPTO207
) {
801 gcm_init_p8(ctx
->Htable
, ctx
->H
.u
);
802 ctx
->gmult
= gcm_gmult_p8
;
803 CTX__GHASH(gcm_ghash_p8
);
805 gcm_init_4bit(ctx
->Htable
, ctx
->H
.u
);
806 ctx
->gmult
= gcm_gmult_4bit
;
807 CTX__GHASH(gcm_ghash_4bit
);
810 gcm_init_4bit(ctx
->Htable
, ctx
->H
.u
);
816 void CRYPTO_gcm128_setiv(GCM128_CONTEXT
*ctx
, const unsigned char *iv
,
821 #ifdef GCM_FUNCREF_4BIT
822 void (*gcm_gmult_p
) (u64 Xi
[2], const u128 Htable
[16]) = ctx
->gmult
;
825 ctx
->len
.u
[0] = 0; /* AAD length */
826 ctx
->len
.u
[1] = 0; /* message length */
831 memcpy(ctx
->Yi
.c
, iv
, 12);
841 /* Borrow ctx->Xi to calculate initial Yi */
846 for (i
= 0; i
< 16; ++i
)
847 ctx
->Xi
.c
[i
] ^= iv
[i
];
853 for (i
= 0; i
< len
; ++i
)
854 ctx
->Xi
.c
[i
] ^= iv
[i
];
858 if (IS_LITTLE_ENDIAN
) {
860 ctx
->Xi
.u
[1] ^= BSWAP8(len0
);
862 ctx
->Xi
.c
[8] ^= (u8
)(len0
>> 56);
863 ctx
->Xi
.c
[9] ^= (u8
)(len0
>> 48);
864 ctx
->Xi
.c
[10] ^= (u8
)(len0
>> 40);
865 ctx
->Xi
.c
[11] ^= (u8
)(len0
>> 32);
866 ctx
->Xi
.c
[12] ^= (u8
)(len0
>> 24);
867 ctx
->Xi
.c
[13] ^= (u8
)(len0
>> 16);
868 ctx
->Xi
.c
[14] ^= (u8
)(len0
>> 8);
869 ctx
->Xi
.c
[15] ^= (u8
)(len0
);
872 ctx
->Xi
.u
[1] ^= len0
;
877 if (IS_LITTLE_ENDIAN
)
879 ctr
= BSWAP4(ctx
->Xi
.d
[3]);
881 ctr
= GETU32(ctx
->Xi
.c
+ 12);
886 /* Copy borrowed Xi to Yi */
887 ctx
->Yi
.u
[0] = ctx
->Xi
.u
[0];
888 ctx
->Yi
.u
[1] = ctx
->Xi
.u
[1];
894 (*ctx
->block
) (ctx
->Yi
.c
, ctx
->EK0
.c
, ctx
->key
);
896 if (IS_LITTLE_ENDIAN
)
898 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
900 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
906 int CRYPTO_gcm128_aad(GCM128_CONTEXT
*ctx
, const unsigned char *aad
,
911 u64 alen
= ctx
->len
.u
[0];
912 #ifdef GCM_FUNCREF_4BIT
913 void (*gcm_gmult_p
) (u64 Xi
[2], const u128 Htable
[16]) = ctx
->gmult
;
915 void (*gcm_ghash_p
) (u64 Xi
[2], const u128 Htable
[16],
916 const u8
*inp
, size_t len
) = ctx
->ghash
;
924 if (alen
> (U64(1) << 61) || (sizeof(len
) == 8 && alen
< len
))
926 ctx
->len
.u
[0] = alen
;
931 ctx
->Xi
.c
[n
] ^= *(aad
++);
943 if ((i
= (len
& (size_t)-16))) {
950 for (i
= 0; i
< 16; ++i
)
951 ctx
->Xi
.c
[i
] ^= aad
[i
];
958 n
= (unsigned int)len
;
959 for (i
= 0; i
< len
; ++i
)
960 ctx
->Xi
.c
[i
] ^= aad
[i
];
967 int CRYPTO_gcm128_encrypt(GCM128_CONTEXT
*ctx
,
968 const unsigned char *in
, unsigned char *out
,
972 unsigned int n
, ctr
, mres
;
974 u64 mlen
= ctx
->len
.u
[1];
975 block128_f block
= ctx
->block
;
976 void *key
= ctx
->key
;
977 #ifdef GCM_FUNCREF_4BIT
978 void (*gcm_gmult_p
) (u64 Xi
[2], const u128 Htable
[16]) = ctx
->gmult
;
979 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
980 void (*gcm_ghash_p
) (u64 Xi
[2], const u128 Htable
[16],
981 const u8
*inp
, size_t len
) = ctx
->ghash
;
986 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
988 ctx
->len
.u
[1] = mlen
;
993 /* First call to encrypt finalizes GHASH(AAD) */
994 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1000 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
1003 mres
= sizeof(ctx
->Xi
);
1010 if (IS_LITTLE_ENDIAN
)
1012 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1014 ctr
= GETU32(ctx
->Yi
.c
+ 12);
1020 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1021 if (16 % sizeof(size_t) == 0) { /* always true actually */
1026 ctx
->Xn
[mres
++] = *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
1031 GHASH(ctx
, ctx
->Xn
, mres
);
1039 ctx
->Xi
.c
[n
] ^= *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
1052 # if defined(STRICT_ALIGNMENT)
1053 if (((size_t)in
| (size_t)out
) % sizeof(size_t) != 0)
1057 if (len
>= 16 && mres
) {
1058 GHASH(ctx
, ctx
->Xn
, mres
);
1061 # if defined(GHASH_CHUNK)
1062 while (len
>= GHASH_CHUNK
) {
1063 size_t j
= GHASH_CHUNK
;
1066 size_t_aX
*out_t
= (size_t_aX
*)out
;
1067 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
1069 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1071 if (IS_LITTLE_ENDIAN
)
1073 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1075 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1079 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
1080 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
1085 GHASH(ctx
, out
- GHASH_CHUNK
, GHASH_CHUNK
);
1089 if ((i
= (len
& (size_t)-16))) {
1093 size_t_aX
*out_t
= (size_t_aX
*)out
;
1094 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
1096 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1098 if (IS_LITTLE_ENDIAN
)
1100 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1102 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1106 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
1107 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
1112 GHASH(ctx
, out
- j
, j
);
1116 size_t *out_t
= (size_t *)out
;
1117 const size_t *in_t
= (const size_t *)in
;
1119 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1121 if (IS_LITTLE_ENDIAN
)
1123 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1125 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1129 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
1130 ctx
->Xi
.t
[i
] ^= out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
1138 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1140 if (IS_LITTLE_ENDIAN
)
1142 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1144 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1150 ctx
->Xn
[mres
++] = out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
1155 ctx
->Xi
.c
[n
] ^= out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
1167 for (i
= 0; i
< len
; ++i
) {
1169 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1171 if (IS_LITTLE_ENDIAN
)
1173 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1175 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1180 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1181 ctx
->Xn
[mres
++] = out
[i
] = in
[i
] ^ ctx
->EKi
.c
[n
];
1183 if (mres
== sizeof(ctx
->Xn
)) {
1184 GHASH(ctx
,ctx
->Xn
,sizeof(ctx
->Xn
));
1188 ctx
->Xi
.c
[n
] ^= out
[i
] = in
[i
] ^ ctx
->EKi
.c
[n
];
1189 mres
= n
= (n
+ 1) % 16;
1199 int CRYPTO_gcm128_decrypt(GCM128_CONTEXT
*ctx
,
1200 const unsigned char *in
, unsigned char *out
,
1204 unsigned int n
, ctr
, mres
;
1206 u64 mlen
= ctx
->len
.u
[1];
1207 block128_f block
= ctx
->block
;
1208 void *key
= ctx
->key
;
1209 #ifdef GCM_FUNCREF_4BIT
1210 void (*gcm_gmult_p
) (u64 Xi
[2], const u128 Htable
[16]) = ctx
->gmult
;
1211 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1212 void (*gcm_ghash_p
) (u64 Xi
[2], const u128 Htable
[16],
1213 const u8
*inp
, size_t len
) = ctx
->ghash
;
1218 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
1220 ctx
->len
.u
[1] = mlen
;
1225 /* First call to decrypt finalizes GHASH(AAD) */
1226 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1232 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
1235 mres
= sizeof(ctx
->Xi
);
1242 if (IS_LITTLE_ENDIAN
)
1244 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1246 ctr
= GETU32(ctx
->Yi
.c
+ 12);
1252 #if !defined(OPENSSL_SMALL_FOOTPRINT)
1253 if (16 % sizeof(size_t) == 0) { /* always true actually */
1258 *(out
++) = (ctx
->Xn
[mres
++] = *(in
++)) ^ ctx
->EKi
.c
[n
];
1263 GHASH(ctx
, ctx
->Xn
, mres
);
1272 *(out
++) = c
^ ctx
->EKi
.c
[n
];
1286 # if defined(STRICT_ALIGNMENT)
1287 if (((size_t)in
| (size_t)out
) % sizeof(size_t) != 0)
1291 if (len
>= 16 && mres
) {
1292 GHASH(ctx
, ctx
->Xn
, mres
);
1295 # if defined(GHASH_CHUNK)
1296 while (len
>= GHASH_CHUNK
) {
1297 size_t j
= GHASH_CHUNK
;
1299 GHASH(ctx
, in
, GHASH_CHUNK
);
1301 size_t_aX
*out_t
= (size_t_aX
*)out
;
1302 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
1304 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1306 if (IS_LITTLE_ENDIAN
)
1308 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1310 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1314 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
1315 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
1323 if ((i
= (len
& (size_t)-16))) {
1326 size_t_aX
*out_t
= (size_t_aX
*)out
;
1327 const size_t_aX
*in_t
= (const size_t_aX
*)in
;
1329 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1331 if (IS_LITTLE_ENDIAN
)
1333 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1335 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1339 for (i
= 0; i
< 16 / sizeof(size_t); ++i
)
1340 out_t
[i
] = in_t
[i
] ^ ctx
->EKi
.t
[i
];
1348 size_t *out_t
= (size_t *)out
;
1349 const size_t *in_t
= (const size_t *)in
;
1351 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1353 if (IS_LITTLE_ENDIAN
)
1355 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1357 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1361 for (i
= 0; i
< 16 / sizeof(size_t); ++i
) {
1363 out_t
[i
] = c
^ ctx
->EKi
.t
[i
];
1373 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1375 if (IS_LITTLE_ENDIAN
)
1377 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1379 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1385 out
[n
] = (ctx
->Xn
[mres
++] = in
[n
]) ^ ctx
->EKi
.c
[n
];
1392 out
[n
] = c
^ ctx
->EKi
.c
[n
];
1404 for (i
= 0; i
< len
; ++i
) {
1407 (*block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1409 if (IS_LITTLE_ENDIAN
)
1411 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1413 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1418 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1419 out
[i
] = (ctx
->Xn
[mres
++] = c
= in
[i
]) ^ ctx
->EKi
.c
[n
];
1421 if (mres
== sizeof(ctx
->Xn
)) {
1422 GHASH(ctx
,ctx
->Xn
,sizeof(ctx
->Xn
));
1427 out
[i
] = c
^ ctx
->EKi
.c
[n
];
1429 mres
= n
= (n
+ 1) % 16;
1439 int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT
*ctx
,
1440 const unsigned char *in
, unsigned char *out
,
1441 size_t len
, ctr128_f stream
)
1443 #if defined(OPENSSL_SMALL_FOOTPRINT)
1444 return CRYPTO_gcm128_encrypt(ctx
, in
, out
, len
);
1447 unsigned int n
, ctr
, mres
;
1449 u64 mlen
= ctx
->len
.u
[1];
1450 void *key
= ctx
->key
;
1451 # ifdef GCM_FUNCREF_4BIT
1452 void (*gcm_gmult_p
) (u64 Xi
[2], const u128 Htable
[16]) = ctx
->gmult
;
1454 void (*gcm_ghash_p
) (u64 Xi
[2], const u128 Htable
[16],
1455 const u8
*inp
, size_t len
) = ctx
->ghash
;
1460 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
1462 ctx
->len
.u
[1] = mlen
;
1467 /* First call to encrypt finalizes GHASH(AAD) */
1474 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
1477 mres
= sizeof(ctx
->Xi
);
1484 if (IS_LITTLE_ENDIAN
)
1486 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1488 ctr
= GETU32(ctx
->Yi
.c
+ 12);
1497 ctx
->Xn
[mres
++] = *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
1502 GHASH(ctx
, ctx
->Xn
, mres
);
1510 ctx
->Xi
.c
[n
] ^= *(out
++) = *(in
++) ^ ctx
->EKi
.c
[n
];
1524 if (len
>= 16 && mres
) {
1525 GHASH(ctx
, ctx
->Xn
, mres
);
1528 # if defined(GHASH_CHUNK)
1529 while (len
>= GHASH_CHUNK
) {
1530 (*stream
) (in
, out
, GHASH_CHUNK
/ 16, key
, ctx
->Yi
.c
);
1531 ctr
+= GHASH_CHUNK
/ 16;
1532 if (IS_LITTLE_ENDIAN
)
1534 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1536 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1540 GHASH(ctx
, out
, GHASH_CHUNK
);
1547 if ((i
= (len
& (size_t)-16))) {
1550 (*stream
) (in
, out
, j
, key
, ctx
->Yi
.c
);
1551 ctr
+= (unsigned int)j
;
1552 if (IS_LITTLE_ENDIAN
)
1554 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1556 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1567 for (i
= 0; i
< 16; ++i
)
1568 ctx
->Xi
.c
[i
] ^= out
[i
];
1575 (*ctx
->block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1577 if (IS_LITTLE_ENDIAN
)
1579 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1581 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1587 ctx
->Xn
[mres
++] = out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
1589 ctx
->Xi
.c
[mres
++] ^= out
[n
] = in
[n
] ^ ctx
->EKi
.c
[n
];
1600 int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT
*ctx
,
1601 const unsigned char *in
, unsigned char *out
,
1602 size_t len
, ctr128_f stream
)
1604 #if defined(OPENSSL_SMALL_FOOTPRINT)
1605 return CRYPTO_gcm128_decrypt(ctx
, in
, out
, len
);
1608 unsigned int n
, ctr
, mres
;
1610 u64 mlen
= ctx
->len
.u
[1];
1611 void *key
= ctx
->key
;
1612 # ifdef GCM_FUNCREF_4BIT
1613 void (*gcm_gmult_p
) (u64 Xi
[2], const u128 Htable
[16]) = ctx
->gmult
;
1615 void (*gcm_ghash_p
) (u64 Xi
[2], const u128 Htable
[16],
1616 const u8
*inp
, size_t len
) = ctx
->ghash
;
1621 if (mlen
> ((U64(1) << 36) - 32) || (sizeof(len
) == 8 && mlen
< len
))
1623 ctx
->len
.u
[1] = mlen
;
1628 /* First call to decrypt finalizes GHASH(AAD) */
1635 memcpy(ctx
->Xn
, ctx
->Xi
.c
, sizeof(ctx
->Xi
));
1638 mres
= sizeof(ctx
->Xi
);
1645 if (IS_LITTLE_ENDIAN
)
1647 ctr
= BSWAP4(ctx
->Yi
.d
[3]);
1649 ctr
= GETU32(ctx
->Yi
.c
+ 12);
1658 *(out
++) = (ctx
->Xn
[mres
++] = *(in
++)) ^ ctx
->EKi
.c
[n
];
1663 GHASH(ctx
, ctx
->Xn
, mres
);
1672 *(out
++) = c
^ ctx
->EKi
.c
[n
];
1687 if (len
>= 16 && mres
) {
1688 GHASH(ctx
, ctx
->Xn
, mres
);
1691 # if defined(GHASH_CHUNK)
1692 while (len
>= GHASH_CHUNK
) {
1693 GHASH(ctx
, in
, GHASH_CHUNK
);
1694 (*stream
) (in
, out
, GHASH_CHUNK
/ 16, key
, ctx
->Yi
.c
);
1695 ctr
+= GHASH_CHUNK
/ 16;
1696 if (IS_LITTLE_ENDIAN
)
1698 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1700 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1710 if ((i
= (len
& (size_t)-16))) {
1718 for (k
= 0; k
< 16; ++k
)
1719 ctx
->Xi
.c
[k
] ^= in
[k
];
1726 (*stream
) (in
, out
, j
, key
, ctx
->Yi
.c
);
1727 ctr
+= (unsigned int)j
;
1728 if (IS_LITTLE_ENDIAN
)
1730 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1732 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1741 (*ctx
->block
) (ctx
->Yi
.c
, ctx
->EKi
.c
, key
);
1743 if (IS_LITTLE_ENDIAN
)
1745 ctx
->Yi
.d
[3] = BSWAP4(ctr
);
1747 PUTU32(ctx
->Yi
.c
+ 12, ctr
);
1753 out
[n
] = (ctx
->Xn
[mres
++] = in
[n
]) ^ ctx
->EKi
.c
[n
];
1756 ctx
->Xi
.c
[mres
++] ^= c
;
1757 out
[n
] = c
^ ctx
->EKi
.c
[n
];
1768 int CRYPTO_gcm128_finish(GCM128_CONTEXT
*ctx
, const unsigned char *tag
,
1772 u64 alen
= ctx
->len
.u
[0] << 3;
1773 u64 clen
= ctx
->len
.u
[1] << 3;
1774 #ifdef GCM_FUNCREF_4BIT
1775 void (*gcm_gmult_p
) (u64 Xi
[2], const u128 Htable
[16]) = ctx
->gmult
;
1776 # if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1777 void (*gcm_ghash_p
) (u64 Xi
[2], const u128 Htable
[16],
1778 const u8
*inp
, size_t len
) = ctx
->ghash
;
1782 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1784 unsigned int mres
= ctx
->mres
;
1787 unsigned blocks
= (mres
+ 15) & -16;
1789 memset(ctx
->Xn
+ mres
, 0, blocks
- mres
);
1791 if (mres
== sizeof(ctx
->Xn
)) {
1792 GHASH(ctx
, ctx
->Xn
, mres
);
1795 } else if (ctx
->ares
) {
1799 if (ctx
->mres
|| ctx
->ares
)
1803 if (IS_LITTLE_ENDIAN
) {
1805 alen
= BSWAP8(alen
);
1806 clen
= BSWAP8(clen
);
1810 ctx
->len
.u
[0] = alen
;
1811 ctx
->len
.u
[1] = clen
;
1813 alen
= (u64
)GETU32(p
) << 32 | GETU32(p
+ 4);
1814 clen
= (u64
)GETU32(p
+ 8) << 32 | GETU32(p
+ 12);
1818 #if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
1821 memcpy(ctx
->Xn
+ mres
, &bitlen
, sizeof(bitlen
));
1822 mres
+= sizeof(bitlen
);
1823 GHASH(ctx
, ctx
->Xn
, mres
);
1825 ctx
->Xi
.u
[0] ^= alen
;
1826 ctx
->Xi
.u
[1] ^= clen
;
1830 ctx
->Xi
.u
[0] ^= ctx
->EK0
.u
[0];
1831 ctx
->Xi
.u
[1] ^= ctx
->EK0
.u
[1];
1833 if (tag
&& len
<= sizeof(ctx
->Xi
))
1834 return CRYPTO_memcmp(ctx
->Xi
.c
, tag
, len
);
1839 void CRYPTO_gcm128_tag(GCM128_CONTEXT
*ctx
, unsigned char *tag
, size_t len
)
1841 CRYPTO_gcm128_finish(ctx
, NULL
, 0);
1842 memcpy(tag
, ctx
->Xi
.c
,
1843 len
<= sizeof(ctx
->Xi
.c
) ? len
: sizeof(ctx
->Xi
.c
));
1846 GCM128_CONTEXT
*CRYPTO_gcm128_new(void *key
, block128_f block
)
1848 GCM128_CONTEXT
*ret
;
1850 if ((ret
= OPENSSL_malloc(sizeof(*ret
))) != NULL
)
1851 CRYPTO_gcm128_init(ret
, key
, block
);
1856 void CRYPTO_gcm128_release(GCM128_CONTEXT
*ctx
)
1858 OPENSSL_clear_free(ctx
, sizeof(*ctx
));