]>
Commit | Line | Data |
---|---|---|
e7f5b1cd AP |
1 | /* ==================================================================== |
2 | * Copyright (c) 2010 The OpenSSL Project. All rights reserved. | |
3 | * | |
4 | * Redistribution and use in source and binary forms, with or without | |
5 | * modification, are permitted provided that the following conditions | |
6 | * are met: | |
7 | * | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * | |
11 | * 2. Redistributions in binary form must reproduce the above copyright | |
12 | * notice, this list of conditions and the following disclaimer in | |
13 | * the documentation and/or other materials provided with the | |
14 | * distribution. | |
15 | * | |
16 | * 3. All advertising materials mentioning features or use of this | |
17 | * software must display the following acknowledgment: | |
18 | * "This product includes software developed by the OpenSSL Project | |
19 | * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" | |
20 | * | |
21 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
22 | * endorse or promote products derived from this software without | |
23 | * prior written permission. For written permission, please contact | |
24 | * openssl-core@openssl.org. | |
25 | * | |
26 | * 5. Products derived from this software may not be called "OpenSSL" | |
27 | * nor may "OpenSSL" appear in their names without prior written | |
28 | * permission of the OpenSSL Project. | |
29 | * | |
30 | * 6. Redistributions of any form whatsoever must retain the following | |
31 | * acknowledgment: | |
32 | * "This product includes software developed by the OpenSSL Project | |
33 | * for use in the OpenSSL Toolkit (http://www.openssl.org/)" | |
34 | * | |
35 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
36 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
37 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
38 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
39 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
40 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
41 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
42 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
43 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
44 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
45 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
46 | * OF THE POSSIBILITY OF SUCH DAMAGE. | |
47 | * ==================================================================== | |
48 | */ | |
49 | ||
50 | #include "modes.h" | |
51 | #include <string.h> | |
52 | ||
53 | #ifndef MODES_DEBUG | |
54 | # ifndef NDEBUG | |
55 | # define NDEBUG | |
56 | # endif | |
57 | #endif | |
58 | #include <assert.h> | |
59 | ||
60 | #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) | |
61 | typedef __int64 i64; | |
62 | typedef unsigned __int64 u64; | |
63 | #define U64(C) C##UI64 | |
64 | #elif defined(__arch64__) | |
65 | typedef long i64; | |
66 | typedef unsigned long u64; | |
67 | #define U64(C) C##UL | |
68 | #else | |
69 | typedef long long i64; | |
70 | typedef unsigned long long u64; | |
71 | #define U64(C) C##ULL | |
72 | #endif | |
73 | ||
74 | typedef unsigned int u32; | |
75 | typedef unsigned char u8; | |
76 | typedef struct { u64 hi,lo; } u128; | |
77 | ||
78 | #define STRICT_ALIGNMENT | |
79 | #if defined(__i386) || defined(__i386__) || \ | |
80 | defined(__x86_64) || defined(__x86_64__) || \ | |
81 | defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64) || \ | |
82 | defined(__s390__) || defined(__s390x__) | |
83 | # undef STRICT_ALIGNMENT | |
84 | #endif | |
85 | ||
19f7e5e2 | 86 | #if defined(__GNUC__) && __GNUC__>=2 && !defined(PEDANTIC) |
e7f5b1cd AP |
87 | # if defined(__x86_64) || defined(__x86_64__) |
88 | # define BSWAP8(x) ({ u64 ret=(x); \ | |
89 | asm volatile ("bswapq %0" \ | |
90 | : "+r"(ret)); ret; }) | |
91 | # define BSWAP4(x) ({ u32 ret=(x); \ | |
92 | asm volatile ("bswapl %0" \ | |
93 | : "+r"(ret)); ret; }) | |
19f7e5e2 | 94 | # elif (defined(__i386) || defined(__i386__)) && !defined(PEDANTIC) |
e7f5b1cd AP |
95 | # define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ |
96 | asm volatile ("bswapl %0; bswapl %1" \ | |
97 | : "+r"(hi),"+r"(lo)); \ | |
98 | (u64)hi<<32|lo; }) | |
99 | # define BSWAP4(x) ({ u32 ret=(x); \ | |
100 | asm volatile ("bswapl %0" \ | |
101 | : "+r"(ret)); ret; }) | |
102 | # endif | |
103 | #elif defined(_MSC_VER) | |
104 | # if _MSC_VER>=1300 | |
105 | # pragma intrinsic(_byteswap_uint64,_byteswap_ulong) | |
106 | # define BSWAP8(x) _byteswap_uint64((u64)(x)) | |
107 | # define BSWAP4(x) _byteswap_ulong((u32)(x)) | |
108 | # elif defined(_M_IX86) | |
109 | # endif | |
110 | #endif | |
111 | ||
112 | #ifdef BSWAP4 | |
113 | #define GETU32(p) BSWAP4(*(const u32 *)(p)) | |
114 | #define PUTU32(p,v) *(u32 *)(p) = BSWAP4(v) | |
115 | #else | |
116 | #define GETU32(p) ((u32)(p)[0]<<24|(u32)(p)[1]<<16|(u32)(p)[2]<<8|(u32)(p)[3]) | |
117 | #define PUTU32(p,v) ((p)[0]=(u8)((v)>>24),(p)[1]=(u8)((v)>>16),(p)[2]=(u8)((v)>>8),(p)[3]=(u8)(v)) | |
118 | #endif | |
119 | ||
a595baff AP |
120 | #define PACK(s) ((size_t)(s)<<(sizeof(size_t)*8-16)) |
121 | #ifdef TABLE_BITS | |
122 | #undef TABLE_BITS | |
123 | #endif | |
2262beef | 124 | /* |
a595baff AP |
125 | * Even though permitted values for TABLE_BITS are 8, 4 and 1, it should |
126 | * never be set to 8. 8 is effectively reserved for testing purposes. | |
127 | * Under ideal conditions "8-bit" version should be twice as fast as | |
128 | * "4-bit" one. But world is far from ideal. For gcc-generated x86 code, | |
129 | * "8-bit" was observed to run only ~50% faster. On x86_64 observed | |
2262beef AP |
130 | * improvement was ~75%, much closer to optimal, but the fact of |
131 | * deviation means that references to pre-computed tables end up on | |
132 | * critical path and as tables are pretty big, 4KB per key+1KB shared, | |
a595baff | 133 | * execution time is sensitive to cache timing. It's not actually |
2262beef AP |
134 | * proven, but 4-bit procedure is believed to provide adequate |
135 | * all-round performance... | |
136 | */ | |
a595baff AP |
137 | #define TABLE_BITS 4 |
138 | ||
139 | #if TABLE_BITS==8 | |
140 | ||
e7f5b1cd AP |
141 | static void gcm_init_8bit(u128 Htable[256], u64 H[2]) |
142 | { | |
143 | int i, j; | |
144 | u128 V; | |
145 | ||
146 | Htable[0].hi = 0; | |
147 | Htable[0].lo = 0; | |
148 | V.hi = H[0]; | |
149 | V.lo = H[1]; | |
150 | ||
151 | for (Htable[128]=V, i=64; i>0; i>>=1) { | |
152 | if (sizeof(size_t)==8) { | |
153 | u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); | |
154 | V.lo = (V.hi<<63)|(V.lo>>1); | |
155 | V.hi = (V.hi>>1 )^T; | |
156 | } | |
157 | else { | |
158 | u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); | |
159 | V.lo = (V.hi<<63)|(V.lo>>1); | |
a595baff | 160 | V.hi = (V.hi>>1 )^((u64)T<<32); |
e7f5b1cd AP |
161 | } |
162 | Htable[i] = V; | |
163 | } | |
164 | ||
165 | for (i=2; i<256; i<<=1) { | |
166 | u128 *Hi = Htable+i, H0 = *Hi; | |
167 | for (j=1; j<i; ++j) { | |
168 | Hi[j].hi = H0.hi^Htable[j].hi; | |
169 | Hi[j].lo = H0.lo^Htable[j].lo; | |
170 | } | |
171 | } | |
172 | } | |
173 | ||
2262beef | 174 | static void gcm_gmult_8bit(u64 Xi[2], u128 Htable[256]) |
e7f5b1cd AP |
175 | { |
176 | u128 Z = { 0, 0}; | |
177 | const u8 *xi = (const u8 *)Xi+15; | |
178 | size_t rem, n = *xi; | |
179 | const union { long one; char little; } is_endian = {1}; | |
180 | static const size_t rem_8bit[256] = { | |
181 | PACK(0x0000), PACK(0x01C2), PACK(0x0384), PACK(0x0246), | |
182 | PACK(0x0708), PACK(0x06CA), PACK(0x048C), PACK(0x054E), | |
183 | PACK(0x0E10), PACK(0x0FD2), PACK(0x0D94), PACK(0x0C56), | |
184 | PACK(0x0918), PACK(0x08DA), PACK(0x0A9C), PACK(0x0B5E), | |
185 | PACK(0x1C20), PACK(0x1DE2), PACK(0x1FA4), PACK(0x1E66), | |
186 | PACK(0x1B28), PACK(0x1AEA), PACK(0x18AC), PACK(0x196E), | |
187 | PACK(0x1230), PACK(0x13F2), PACK(0x11B4), PACK(0x1076), | |
188 | PACK(0x1538), PACK(0x14FA), PACK(0x16BC), PACK(0x177E), | |
189 | PACK(0x3840), PACK(0x3982), PACK(0x3BC4), PACK(0x3A06), | |
190 | PACK(0x3F48), PACK(0x3E8A), PACK(0x3CCC), PACK(0x3D0E), | |
191 | PACK(0x3650), PACK(0x3792), PACK(0x35D4), PACK(0x3416), | |
192 | PACK(0x3158), PACK(0x309A), PACK(0x32DC), PACK(0x331E), | |
193 | PACK(0x2460), PACK(0x25A2), PACK(0x27E4), PACK(0x2626), | |
194 | PACK(0x2368), PACK(0x22AA), PACK(0x20EC), PACK(0x212E), | |
195 | PACK(0x2A70), PACK(0x2BB2), PACK(0x29F4), PACK(0x2836), | |
196 | PACK(0x2D78), PACK(0x2CBA), PACK(0x2EFC), PACK(0x2F3E), | |
197 | PACK(0x7080), PACK(0x7142), PACK(0x7304), PACK(0x72C6), | |
198 | PACK(0x7788), PACK(0x764A), PACK(0x740C), PACK(0x75CE), | |
199 | PACK(0x7E90), PACK(0x7F52), PACK(0x7D14), PACK(0x7CD6), | |
200 | PACK(0x7998), PACK(0x785A), PACK(0x7A1C), PACK(0x7BDE), | |
201 | PACK(0x6CA0), PACK(0x6D62), PACK(0x6F24), PACK(0x6EE6), | |
202 | PACK(0x6BA8), PACK(0x6A6A), PACK(0x682C), PACK(0x69EE), | |
203 | PACK(0x62B0), PACK(0x6372), PACK(0x6134), PACK(0x60F6), | |
204 | PACK(0x65B8), PACK(0x647A), PACK(0x663C), PACK(0x67FE), | |
205 | PACK(0x48C0), PACK(0x4902), PACK(0x4B44), PACK(0x4A86), | |
206 | PACK(0x4FC8), PACK(0x4E0A), PACK(0x4C4C), PACK(0x4D8E), | |
207 | PACK(0x46D0), PACK(0x4712), PACK(0x4554), PACK(0x4496), | |
208 | PACK(0x41D8), PACK(0x401A), PACK(0x425C), PACK(0x439E), | |
209 | PACK(0x54E0), PACK(0x5522), PACK(0x5764), PACK(0x56A6), | |
210 | PACK(0x53E8), PACK(0x522A), PACK(0x506C), PACK(0x51AE), | |
211 | PACK(0x5AF0), PACK(0x5B32), PACK(0x5974), PACK(0x58B6), | |
212 | PACK(0x5DF8), PACK(0x5C3A), PACK(0x5E7C), PACK(0x5FBE), | |
213 | PACK(0xE100), PACK(0xE0C2), PACK(0xE284), PACK(0xE346), | |
214 | PACK(0xE608), PACK(0xE7CA), PACK(0xE58C), PACK(0xE44E), | |
215 | PACK(0xEF10), PACK(0xEED2), PACK(0xEC94), PACK(0xED56), | |
216 | PACK(0xE818), PACK(0xE9DA), PACK(0xEB9C), PACK(0xEA5E), | |
217 | PACK(0xFD20), PACK(0xFCE2), PACK(0xFEA4), PACK(0xFF66), | |
218 | PACK(0xFA28), PACK(0xFBEA), PACK(0xF9AC), PACK(0xF86E), | |
219 | PACK(0xF330), PACK(0xF2F2), PACK(0xF0B4), PACK(0xF176), | |
220 | PACK(0xF438), PACK(0xF5FA), PACK(0xF7BC), PACK(0xF67E), | |
221 | PACK(0xD940), PACK(0xD882), PACK(0xDAC4), PACK(0xDB06), | |
222 | PACK(0xDE48), PACK(0xDF8A), PACK(0xDDCC), PACK(0xDC0E), | |
223 | PACK(0xD750), PACK(0xD692), PACK(0xD4D4), PACK(0xD516), | |
224 | PACK(0xD058), PACK(0xD19A), PACK(0xD3DC), PACK(0xD21E), | |
225 | PACK(0xC560), PACK(0xC4A2), PACK(0xC6E4), PACK(0xC726), | |
226 | PACK(0xC268), PACK(0xC3AA), PACK(0xC1EC), PACK(0xC02E), | |
227 | PACK(0xCB70), PACK(0xCAB2), PACK(0xC8F4), PACK(0xC936), | |
228 | PACK(0xCC78), PACK(0xCDBA), PACK(0xCFFC), PACK(0xCE3E), | |
229 | PACK(0x9180), PACK(0x9042), PACK(0x9204), PACK(0x93C6), | |
230 | PACK(0x9688), PACK(0x974A), PACK(0x950C), PACK(0x94CE), | |
231 | PACK(0x9F90), PACK(0x9E52), PACK(0x9C14), PACK(0x9DD6), | |
232 | PACK(0x9898), PACK(0x995A), PACK(0x9B1C), PACK(0x9ADE), | |
233 | PACK(0x8DA0), PACK(0x8C62), PACK(0x8E24), PACK(0x8FE6), | |
234 | PACK(0x8AA8), PACK(0x8B6A), PACK(0x892C), PACK(0x88EE), | |
235 | PACK(0x83B0), PACK(0x8272), PACK(0x8034), PACK(0x81F6), | |
236 | PACK(0x84B8), PACK(0x857A), PACK(0x873C), PACK(0x86FE), | |
237 | PACK(0xA9C0), PACK(0xA802), PACK(0xAA44), PACK(0xAB86), | |
238 | PACK(0xAEC8), PACK(0xAF0A), PACK(0xAD4C), PACK(0xAC8E), | |
239 | PACK(0xA7D0), PACK(0xA612), PACK(0xA454), PACK(0xA596), | |
240 | PACK(0xA0D8), PACK(0xA11A), PACK(0xA35C), PACK(0xA29E), | |
241 | PACK(0xB5E0), PACK(0xB422), PACK(0xB664), PACK(0xB7A6), | |
242 | PACK(0xB2E8), PACK(0xB32A), PACK(0xB16C), PACK(0xB0AE), | |
243 | PACK(0xBBF0), PACK(0xBA32), PACK(0xB874), PACK(0xB9B6), | |
244 | PACK(0xBCF8), PACK(0xBD3A), PACK(0xBF7C), PACK(0xBEBE) }; | |
245 | ||
246 | while (1) { | |
247 | Z.hi ^= Htable[n].hi; | |
248 | Z.lo ^= Htable[n].lo; | |
249 | ||
250 | if ((u8 *)Xi==xi) break; | |
251 | ||
252 | n = *(--xi); | |
253 | ||
254 | rem = (size_t)Z.lo&0xff; | |
255 | Z.lo = (Z.hi<<56)|(Z.lo>>8); | |
256 | Z.hi = (Z.hi>>8); | |
257 | if (sizeof(size_t)==8) | |
258 | Z.hi ^= rem_8bit[rem]; | |
259 | else | |
260 | Z.hi ^= (u64)rem_8bit[rem]<<32; | |
261 | } | |
262 | ||
263 | if (is_endian.little) { | |
264 | #ifdef BSWAP8 | |
265 | Xi[0] = BSWAP8(Z.hi); | |
266 | Xi[1] = BSWAP8(Z.lo); | |
267 | #else | |
268 | u8 *p = (u8 *)Xi; | |
269 | u32 v; | |
270 | v = (u32)(Z.hi>>32); PUTU32(p,v); | |
271 | v = (u32)(Z.hi); PUTU32(p+4,v); | |
272 | v = (u32)(Z.lo>>32); PUTU32(p+8,v); | |
273 | v = (u32)(Z.lo); PUTU32(p+12,v); | |
274 | #endif | |
275 | } | |
276 | else { | |
277 | Xi[0] = Z.hi; | |
278 | Xi[1] = Z.lo; | |
279 | } | |
280 | } | |
a595baff | 281 | #define GCM_MUL(ctx,Xi) gcm_gmult_8bit(ctx->Xi.u,ctx->Htable) |
e7f5b1cd | 282 | |
a595baff | 283 | #elif TABLE_BITS==4 |
2262beef | 284 | |
e7f5b1cd AP |
285 | static void gcm_init_4bit(u128 Htable[16], u64 H[2]) |
286 | { | |
2262beef | 287 | int i; |
e7f5b1cd AP |
288 | u128 V; |
289 | ||
290 | Htable[0].hi = 0; | |
291 | Htable[0].lo = 0; | |
292 | V.hi = H[0]; | |
293 | V.lo = H[1]; | |
294 | ||
295 | for (Htable[8]=V, i=4; i>0; i>>=1) { | |
296 | if (sizeof(size_t)==8) { | |
297 | u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); | |
298 | V.lo = (V.hi<<63)|(V.lo>>1); | |
299 | V.hi = (V.hi>>1 )^T; | |
300 | } | |
301 | else { | |
302 | u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); | |
303 | V.lo = (V.hi<<63)|(V.lo>>1); | |
304 | V.hi = (V.hi>>1 )^((u64)T<<32); | |
305 | } | |
306 | Htable[i] = V; | |
307 | } | |
308 | ||
2262beef | 309 | #if defined(OPENSSL_SMALL_FOOTPRINT) |
e7f5b1cd | 310 | for (i=2; i<16; i<<=1) { |
2262beef AP |
311 | u128 *Hi = Htable+i; |
312 | int j; | |
313 | for (V=*Hi, j=1; j<i; ++j) { | |
314 | Hi[j].hi = V.hi^Htable[j].hi; | |
315 | Hi[j].lo = V.lo^Htable[j].lo; | |
e7f5b1cd AP |
316 | } |
317 | } | |
2262beef AP |
318 | #else |
319 | Htable[3].hi = V.hi^Htable[2].hi, Htable[3].lo = V.lo^Htable[2].lo; | |
320 | V=Htable[4]; | |
321 | Htable[5].hi = V.hi^Htable[1].hi, Htable[5].lo = V.lo^Htable[1].lo; | |
322 | Htable[6].hi = V.hi^Htable[2].hi, Htable[6].lo = V.lo^Htable[2].lo; | |
323 | Htable[7].hi = V.hi^Htable[3].hi, Htable[7].lo = V.lo^Htable[3].lo; | |
324 | V=Htable[8]; | |
325 | Htable[9].hi = V.hi^Htable[1].hi, Htable[9].lo = V.lo^Htable[1].lo; | |
326 | Htable[10].hi = V.hi^Htable[2].hi, Htable[10].lo = V.lo^Htable[2].lo; | |
327 | Htable[11].hi = V.hi^Htable[3].hi, Htable[11].lo = V.lo^Htable[3].lo; | |
328 | Htable[12].hi = V.hi^Htable[4].hi, Htable[12].lo = V.lo^Htable[4].lo; | |
329 | Htable[13].hi = V.hi^Htable[5].hi, Htable[13].lo = V.lo^Htable[5].lo; | |
330 | Htable[14].hi = V.hi^Htable[6].hi, Htable[14].lo = V.lo^Htable[6].lo; | |
331 | Htable[15].hi = V.hi^Htable[7].hi, Htable[15].lo = V.lo^Htable[7].lo; | |
332 | #endif | |
e7f5b1cd AP |
333 | } |
334 | ||
a595baff | 335 | #ifndef GHASH_ASM |
2262beef AP |
336 | static const size_t rem_4bit[16] = { |
337 | PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460), | |
338 | PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0), | |
339 | PACK(0xE100), PACK(0xFD20), PACK(0xD940), PACK(0xC560), | |
340 | PACK(0x9180), PACK(0x8DA0), PACK(0xA9C0), PACK(0xB5E0) }; | |
341 | ||
4f39edbf | 342 | static void gcm_gmult_4bit(u64 Xi[2], const u128 Htable[16]) |
e7f5b1cd | 343 | { |
2262beef AP |
344 | u128 Z; |
345 | int cnt = 15; | |
346 | size_t rem, nlo, nhi; | |
e7f5b1cd | 347 | const union { long one; char little; } is_endian = {1}; |
2262beef AP |
348 | |
349 | nlo = ((const u8 *)Xi)[15]; | |
350 | nhi = nlo>>4; | |
351 | nlo &= 0xf; | |
352 | ||
353 | Z.hi = Htable[nlo].hi; | |
354 | Z.lo = Htable[nlo].lo; | |
e7f5b1cd AP |
355 | |
356 | while (1) { | |
2262beef AP |
357 | rem = (size_t)Z.lo&0xf; |
358 | Z.lo = (Z.hi<<60)|(Z.lo>>4); | |
359 | Z.hi = (Z.hi>>4); | |
360 | if (sizeof(size_t)==8) | |
361 | Z.hi ^= rem_4bit[rem]; | |
362 | else | |
363 | Z.hi ^= (u64)rem_4bit[rem]<<32; | |
364 | ||
365 | Z.hi ^= Htable[nhi].hi; | |
366 | Z.lo ^= Htable[nhi].lo; | |
367 | ||
368 | if (--cnt<0) break; | |
369 | ||
370 | nlo = ((const u8 *)Xi)[cnt]; | |
e7f5b1cd AP |
371 | nhi = nlo>>4; |
372 | nlo &= 0xf; | |
373 | ||
2262beef AP |
374 | rem = (size_t)Z.lo&0xf; |
375 | Z.lo = (Z.hi<<60)|(Z.lo>>4); | |
376 | Z.hi = (Z.hi>>4); | |
377 | if (sizeof(size_t)==8) | |
378 | Z.hi ^= rem_4bit[rem]; | |
379 | else | |
380 | Z.hi ^= (u64)rem_4bit[rem]<<32; | |
381 | ||
e7f5b1cd AP |
382 | Z.hi ^= Htable[nlo].hi; |
383 | Z.lo ^= Htable[nlo].lo; | |
2262beef | 384 | } |
e7f5b1cd | 385 | |
2262beef AP |
386 | if (is_endian.little) { |
387 | #ifdef BSWAP8 | |
388 | Xi[0] = BSWAP8(Z.hi); | |
389 | Xi[1] = BSWAP8(Z.lo); | |
390 | #else | |
391 | u8 *p = (u8 *)Xi; | |
392 | u32 v; | |
393 | v = (u32)(Z.hi>>32); PUTU32(p,v); | |
394 | v = (u32)(Z.hi); PUTU32(p+4,v); | |
395 | v = (u32)(Z.lo>>32); PUTU32(p+8,v); | |
396 | v = (u32)(Z.lo); PUTU32(p+12,v); | |
397 | #endif | |
398 | } | |
399 | else { | |
400 | Xi[0] = Z.hi; | |
401 | Xi[1] = Z.lo; | |
402 | } | |
403 | } | |
404 | ||
405 | #if !defined(OPENSSL_SMALL_FOOTPRINT) | |
406 | /* | |
407 | * Streamed gcm_mult_4bit, see CRYPTO_gcm128_[en|de]crypt for | |
a595baff AP |
408 | * details... Compiler-generated code doesn't seem to give any |
409 | * performance improvement, at least not on x86[_64]. It's here | |
410 | * mostly as reference and a placeholder for possible future | |
411 | * non-trivial optimization[s]... | |
2262beef | 412 | */ |
4f39edbf AP |
413 | static void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16], |
414 | const u8 *inp,size_t len) | |
2262beef AP |
415 | { |
416 | u128 Z; | |
417 | int cnt; | |
418 | size_t rem, nlo, nhi; | |
419 | const union { long one; char little; } is_endian = {1}; | |
420 | ||
421 | do { | |
422 | cnt = 15; | |
423 | nlo = ((const u8 *)Xi)[15]; | |
424 | nlo ^= inp[15]; | |
425 | nhi = nlo>>4; | |
426 | nlo &= 0xf; | |
427 | ||
428 | Z.hi = Htable[nlo].hi; | |
429 | Z.lo = Htable[nlo].lo; | |
430 | ||
431 | while (1) { | |
e7f5b1cd AP |
432 | rem = (size_t)Z.lo&0xf; |
433 | Z.lo = (Z.hi<<60)|(Z.lo>>4); | |
434 | Z.hi = (Z.hi>>4); | |
435 | if (sizeof(size_t)==8) | |
436 | Z.hi ^= rem_4bit[rem]; | |
437 | else | |
438 | Z.hi ^= (u64)rem_4bit[rem]<<32; | |
439 | ||
440 | Z.hi ^= Htable[nhi].hi; | |
441 | Z.lo ^= Htable[nhi].lo; | |
442 | ||
2262beef | 443 | if (--cnt<0) break; |
e7f5b1cd | 444 | |
2262beef AP |
445 | nlo = ((const u8 *)Xi)[cnt]; |
446 | nlo ^= inp[cnt]; | |
447 | nhi = nlo>>4; | |
448 | nlo &= 0xf; | |
e7f5b1cd AP |
449 | |
450 | rem = (size_t)Z.lo&0xf; | |
451 | Z.lo = (Z.hi<<60)|(Z.lo>>4); | |
452 | Z.hi = (Z.hi>>4); | |
453 | if (sizeof(size_t)==8) | |
454 | Z.hi ^= rem_4bit[rem]; | |
455 | else | |
456 | Z.hi ^= (u64)rem_4bit[rem]<<32; | |
2262beef AP |
457 | |
458 | Z.hi ^= Htable[nlo].hi; | |
459 | Z.lo ^= Htable[nlo].lo; | |
e7f5b1cd AP |
460 | } |
461 | ||
462 | if (is_endian.little) { | |
463 | #ifdef BSWAP8 | |
464 | Xi[0] = BSWAP8(Z.hi); | |
465 | Xi[1] = BSWAP8(Z.lo); | |
466 | #else | |
467 | u8 *p = (u8 *)Xi; | |
468 | u32 v; | |
469 | v = (u32)(Z.hi>>32); PUTU32(p,v); | |
470 | v = (u32)(Z.hi); PUTU32(p+4,v); | |
471 | v = (u32)(Z.lo>>32); PUTU32(p+8,v); | |
472 | v = (u32)(Z.lo); PUTU32(p+12,v); | |
473 | #endif | |
474 | } | |
475 | else { | |
476 | Xi[0] = Z.hi; | |
477 | Xi[1] = Z.lo; | |
478 | } | |
2262beef | 479 | } while (inp+=16, len-=16); |
e7f5b1cd | 480 | } |
2262beef AP |
481 | #endif |
482 | #else | |
4f39edbf AP |
483 | void gcm_gmult_4bit(u64 Xi[2],const u128 Htable[16]); |
484 | void gcm_ghash_4bit(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); | |
2262beef AP |
485 | #endif |
486 | ||
487 | #define GCM_MUL(ctx,Xi) gcm_gmult_4bit(ctx->Xi.u,ctx->Htable) | |
a595baff | 488 | #if defined(GHASH_ASM) || !defined(OPENSSL_SMALL_FOOTPRINT) |
4f39edbf | 489 | #define GHASH(in,len,ctx) gcm_ghash_4bit((ctx)->Xi.u,(ctx)->Htable,in,len) |
a595baff AP |
490 | /* GHASH_CHUNK is "stride parameter" missioned to mitigate cache |
491 | * trashing effect. In other words idea is to hash data while it's | |
492 | * still in L1 cache after encryption pass... */ | |
2262beef | 493 | #define GHASH_CHUNK 1024 |
a595baff | 494 | #endif |
2262beef | 495 | |
a595baff | 496 | #else /* TABLE_BITS */ |
e7f5b1cd | 497 | |
2262beef | 498 | static void gcm_gmult_1bit(u64 Xi[2],const u64 H[2]) |
e7f5b1cd AP |
499 | { |
500 | u128 V,Z = { 0,0 }; | |
501 | long X; | |
502 | int i,j; | |
503 | const long *xi = (const long *)Xi; | |
504 | const union { long one; char little; } is_endian = {1}; | |
505 | ||
2262beef | 506 | V.hi = H[0]; /* H is in host byte order, no byte swapping */ |
e7f5b1cd AP |
507 | V.lo = H[1]; |
508 | ||
509 | for (j=0; j<16/sizeof(long); ++j) { | |
510 | if (is_endian.little) { | |
511 | if (sizeof(long)==8) { | |
512 | #ifdef BSWAP8 | |
513 | X = (long)(BSWAP8(xi[j])); | |
514 | #else | |
515 | const u8 *p = (const u8 *)(xi+j); | |
516 | X = (long)((u64)GETU32(p)<<32|GETU32(p+4)); | |
517 | #endif | |
518 | } | |
519 | else { | |
520 | const u8 *p = (const u8 *)(xi+j); | |
521 | X = (long)GETU32(p); | |
522 | } | |
523 | } | |
524 | else | |
525 | X = xi[j]; | |
526 | ||
527 | for (i=0; i<8*sizeof(long); ++i, X<<=1) { | |
528 | u64 M = (u64)(X>>(8*sizeof(long)-1)); | |
529 | Z.hi ^= V.hi&M; | |
530 | Z.lo ^= V.lo&M; | |
531 | ||
532 | if (sizeof(size_t)==8) { | |
533 | u64 T = U64(0xe100000000000000) & (0-(V.lo&1)); | |
534 | V.lo = (V.hi<<63)|(V.lo>>1); | |
535 | V.hi = (V.hi>>1 )^T; | |
536 | } | |
537 | else { | |
538 | u32 T = 0xe1000000U & (0-(u32)(V.lo&1)); | |
539 | V.lo = (V.hi<<63)|(V.lo>>1); | |
540 | V.hi = (V.hi>>1 )^((u64)T<<32); | |
541 | } | |
542 | ||
543 | } | |
544 | } | |
545 | ||
546 | if (is_endian.little) { | |
547 | #ifdef BSWAP8 | |
548 | Xi[0] = BSWAP8(Z.hi); | |
549 | Xi[1] = BSWAP8(Z.lo); | |
550 | #else | |
551 | u8 *p = (u8 *)Xi; | |
552 | u32 v; | |
553 | v = (u32)(Z.hi>>32); PUTU32(p,v); | |
554 | v = (u32)(Z.hi); PUTU32(p+4,v); | |
555 | v = (u32)(Z.lo>>32); PUTU32(p+8,v); | |
556 | v = (u32)(Z.lo); PUTU32(p+12,v); | |
557 | #endif | |
558 | } | |
559 | else { | |
560 | Xi[0] = Z.hi; | |
561 | Xi[1] = Z.lo; | |
562 | } | |
563 | } | |
2262beef | 564 | #define GCM_MUL(ctx,Xi) gcm_gmult_1bit(ctx->Xi.u,ctx->H.u) |
a595baff | 565 | |
e7f5b1cd AP |
566 | #endif |
567 | ||
19f7e5e2 | 568 | struct gcm128_context { |
e7f5b1cd AP |
569 | /* Following 6 names follow names in GCM specification */ |
570 | union { u64 u[2]; u32 d[4]; u8 c[16]; } Yi,EKi,EK0, | |
571 | Xi,H, | |
572 | len; | |
a595baff AP |
573 | /* Pre-computed table used by gcm_gmult_* */ |
574 | #if TABLE_BITS==8 | |
575 | u128 Htable[256]; | |
576 | #else | |
e7f5b1cd | 577 | u128 Htable[16]; |
a595baff | 578 | #endif |
e7f5b1cd AP |
579 | unsigned int res, ctr; |
580 | block128_f block; | |
581 | void *key; | |
19f7e5e2 | 582 | }; |
e7f5b1cd AP |
583 | |
584 | void CRYPTO_gcm128_init(GCM128_CONTEXT *ctx,void *key,block128_f block) | |
585 | { | |
586 | const union { long one; char little; } is_endian = {1}; | |
587 | ||
588 | memset(ctx,0,sizeof(*ctx)); | |
589 | ctx->block = block; | |
590 | ctx->key = key; | |
591 | ||
592 | (*block)(ctx->H.c,ctx->H.c,key); | |
593 | ||
594 | if (is_endian.little) { | |
595 | /* H is stored in host byte order */ | |
596 | #ifdef BSWAP8 | |
597 | ctx->H.u[0] = BSWAP8(ctx->H.u[0]); | |
598 | ctx->H.u[1] = BSWAP8(ctx->H.u[1]); | |
599 | #else | |
600 | u8 *p = ctx->H.c; | |
601 | u64 hi,lo; | |
602 | hi = (u64)GETU32(p) <<32|GETU32(p+4); | |
603 | lo = (u64)GETU32(p+8)<<32|GETU32(p+12); | |
604 | ctx->H.u[0] = hi; | |
605 | ctx->H.u[1] = lo; | |
606 | #endif | |
607 | } | |
608 | ||
a595baff AP |
609 | #if TABLE_BITS==8 |
610 | gcm_init_8bit(ctx->Htable,ctx->H.u); | |
611 | #elif TABLE_BITS==4 | |
e7f5b1cd | 612 | gcm_init_4bit(ctx->Htable,ctx->H.u); |
a595baff | 613 | #endif |
e7f5b1cd AP |
614 | } |
615 | ||
616 | void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx,const unsigned char *iv,size_t len) | |
617 | { | |
618 | const union { long one; char little; } is_endian = {1}; | |
619 | ||
620 | ctx->Yi.u[0] = 0; | |
621 | ctx->Yi.u[1] = 0; | |
622 | ctx->Xi.u[0] = 0; | |
623 | ctx->Xi.u[1] = 0; | |
624 | ctx->len.u[0] = 0; | |
625 | ctx->len.u[1] = 0; | |
626 | ctx->res = 0; | |
627 | ||
628 | if (len==12) { | |
629 | memcpy(ctx->Yi.c,iv,12); | |
630 | ctx->Yi.c[15]=1; | |
631 | ctx->ctr=1; | |
632 | } | |
633 | else { | |
634 | size_t i; | |
635 | u64 len0 = len; | |
636 | ||
637 | while (len>=16) { | |
638 | for (i=0; i<16; ++i) ctx->Yi.c[i] ^= iv[i]; | |
639 | GCM_MUL(ctx,Yi); | |
640 | iv += 16; | |
641 | len -= 16; | |
642 | } | |
643 | if (len) { | |
644 | for (i=0; i<len; ++i) ctx->Yi.c[i] ^= iv[i]; | |
645 | GCM_MUL(ctx,Yi); | |
646 | } | |
647 | len0 <<= 3; | |
648 | if (is_endian.little) { | |
649 | #ifdef BSWAP8 | |
650 | ctx->Yi.u[1] ^= BSWAP8(len0); | |
651 | #else | |
652 | ctx->Yi.c[8] ^= (u8)(len0>>56); | |
653 | ctx->Yi.c[9] ^= (u8)(len0>>48); | |
654 | ctx->Yi.c[10] ^= (u8)(len0>>40); | |
655 | ctx->Yi.c[11] ^= (u8)(len0>>32); | |
656 | ctx->Yi.c[12] ^= (u8)(len0>>24); | |
657 | ctx->Yi.c[13] ^= (u8)(len0>>16); | |
658 | ctx->Yi.c[14] ^= (u8)(len0>>8); | |
659 | ctx->Yi.c[15] ^= (u8)(len0); | |
660 | #endif | |
661 | } | |
662 | else | |
663 | ctx->Yi.u[1] ^= len0; | |
664 | ||
665 | GCM_MUL(ctx,Yi); | |
666 | ||
667 | if (is_endian.little) | |
668 | ctx->ctr = GETU32(ctx->Yi.c+12); | |
669 | else | |
670 | ctx->ctr = ctx->Yi.d[3]; | |
671 | } | |
672 | ||
673 | (*ctx->block)(ctx->Yi.c,ctx->EK0.c,ctx->key); | |
2262beef AP |
674 | ++ctx->ctr; |
675 | if (is_endian.little) | |
676 | PUTU32(ctx->Yi.c+12,ctx->ctr); | |
677 | else | |
678 | ctx->Yi.d[3] = ctx->ctr; | |
e7f5b1cd AP |
679 | } |
680 | ||
681 | void CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx,const unsigned char *aad,size_t len) | |
682 | { | |
683 | size_t i; | |
684 | ||
685 | ctx->len.u[0] += len; | |
686 | ||
2262beef AP |
687 | #ifdef GHASH |
688 | if ((i = (len&(size_t)-16))) { | |
689 | GHASH(aad,i,ctx); | |
690 | aad += i; | |
691 | len -= i; | |
692 | } | |
693 | #else | |
e7f5b1cd AP |
694 | while (len>=16) { |
695 | for (i=0; i<16; ++i) ctx->Xi.c[i] ^= aad[i]; | |
696 | GCM_MUL(ctx,Xi); | |
697 | aad += 16; | |
698 | len -= 16; | |
699 | } | |
2262beef | 700 | #endif |
e7f5b1cd AP |
701 | if (len) { |
702 | for (i=0; i<len; ++i) ctx->Xi.c[i] ^= aad[i]; | |
703 | GCM_MUL(ctx,Xi); | |
704 | } | |
705 | } | |
706 | ||
707 | void CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, | |
708 | const unsigned char *in, unsigned char *out, | |
709 | size_t len) | |
710 | { | |
711 | const union { long one; char little; } is_endian = {1}; | |
712 | unsigned int n, ctr; | |
713 | size_t i; | |
714 | ||
715 | ctx->len.u[1] += len; | |
716 | n = ctx->res; | |
717 | ctr = ctx->ctr; | |
718 | ||
719 | #if !defined(OPENSSL_SMALL_FOOTPRINT) | |
720 | if (16%sizeof(size_t) == 0) do { /* always true actually */ | |
721 | if (n) { | |
722 | while (n && len) { | |
723 | ctx->Xi.c[n] ^= *(out++) = *(in++)^ctx->EKi.c[n]; | |
724 | --len; | |
725 | n = (n+1)%16; | |
726 | } | |
727 | if (n==0) GCM_MUL(ctx,Xi); | |
728 | else { | |
729 | ctx->res = n; | |
730 | return; | |
731 | } | |
732 | } | |
e7f5b1cd AP |
733 | #if defined(STRICT_ALIGNMENT) |
734 | if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) | |
735 | break; | |
736 | #endif | |
a595baff | 737 | #if defined(GHASH) && defined(GHASH_CHUNK) |
2262beef AP |
738 | while (len>=GHASH_CHUNK) { |
739 | size_t j=GHASH_CHUNK; | |
740 | ||
741 | while (j) { | |
742 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key); | |
e7f5b1cd AP |
743 | ++ctr; |
744 | if (is_endian.little) | |
745 | PUTU32(ctx->Yi.c+12,ctr); | |
746 | else | |
747 | ctx->Yi.d[3] = ctr; | |
2262beef AP |
748 | for (i=0; i<16; i+=sizeof(size_t)) |
749 | *(size_t *)(out+i) = | |
750 | *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i); | |
751 | out += 16; | |
752 | in += 16; | |
753 | j -= 16; | |
754 | } | |
755 | GHASH(out-GHASH_CHUNK,GHASH_CHUNK,ctx); | |
756 | len -= GHASH_CHUNK; | |
757 | } | |
758 | if ((i = (len&(size_t)-16))) { | |
759 | size_t j=i; | |
760 | ||
761 | while (len>=16) { | |
762 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key); | |
763 | ++ctr; | |
764 | if (is_endian.little) | |
765 | PUTU32(ctx->Yi.c+12,ctr); | |
766 | else | |
767 | ctx->Yi.d[3] = ctr; | |
768 | for (i=0; i<16; i+=sizeof(size_t)) | |
769 | *(size_t *)(out+i) = | |
770 | *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i); | |
771 | out += 16; | |
772 | in += 16; | |
773 | len -= 16; | |
774 | } | |
775 | GHASH(out-j,j,ctx); | |
776 | } | |
777 | #else | |
778 | while (len>=16) { | |
e7f5b1cd | 779 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key); |
2262beef AP |
780 | ++ctr; |
781 | if (is_endian.little) | |
782 | PUTU32(ctx->Yi.c+12,ctr); | |
783 | else | |
784 | ctx->Yi.d[3] = ctr; | |
e7f5b1cd AP |
785 | for (i=0; i<16; i+=sizeof(size_t)) |
786 | *(size_t *)(ctx->Xi.c+i) ^= | |
787 | *(size_t *)(out+i) = | |
788 | *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i); | |
789 | GCM_MUL(ctx,Xi); | |
790 | out += 16; | |
791 | in += 16; | |
792 | len -= 16; | |
793 | } | |
2262beef | 794 | #endif |
e7f5b1cd | 795 | if (len) { |
2262beef | 796 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key); |
e7f5b1cd AP |
797 | ++ctr; |
798 | if (is_endian.little) | |
799 | PUTU32(ctx->Yi.c+12,ctr); | |
800 | else | |
801 | ctx->Yi.d[3] = ctr; | |
e7f5b1cd AP |
802 | while (len--) { |
803 | ctx->Xi.c[n] ^= out[n] = in[n]^ctx->EKi.c[n]; | |
804 | ++n; | |
805 | } | |
806 | } | |
807 | ||
808 | ctx->res = n; | |
809 | ctx->ctr = ctr; | |
810 | return; | |
811 | } while(0); | |
812 | #endif | |
813 | for (i=0;i<len;++i) { | |
814 | if (n==0) { | |
2262beef | 815 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key); |
e7f5b1cd AP |
816 | ++ctr; |
817 | if (is_endian.little) | |
818 | PUTU32(ctx->Yi.c+12,ctr); | |
819 | else | |
820 | ctx->Yi.d[3] = ctr; | |
e7f5b1cd AP |
821 | } |
822 | ctx->Xi.c[n] ^= out[i] = in[i]^ctx->EKi.c[n]; | |
823 | n = (n+1)%16; | |
824 | if (n==0) | |
825 | GCM_MUL(ctx,Xi); | |
826 | } | |
827 | ||
828 | ctx->res = n; | |
829 | ctx->ctr = ctr; | |
830 | } | |
831 | ||
832 | void CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, | |
833 | const unsigned char *in, unsigned char *out, | |
834 | size_t len) | |
835 | { | |
836 | const union { long one; char little; } is_endian = {1}; | |
837 | unsigned int n, ctr; | |
838 | size_t i; | |
839 | ||
840 | ctx->len.u[1] += len; | |
841 | n = ctx->res; | |
842 | ctr = ctx->ctr; | |
843 | ||
844 | #if !defined(OPENSSL_SMALL_FOOTPRINT) | |
845 | if (16%sizeof(size_t) == 0) do { /* always true actually */ | |
846 | if (n) { | |
847 | while (n && len) { | |
848 | u8 c = *(in++); | |
849 | *(out++) = c^ctx->EKi.c[n]; | |
850 | ctx->Xi.c[n] ^= c; | |
851 | --len; | |
852 | n = (n+1)%16; | |
853 | } | |
854 | if (n==0) GCM_MUL (ctx,Xi); | |
855 | else { | |
856 | ctx->res = n; | |
857 | return; | |
858 | } | |
859 | } | |
e7f5b1cd AP |
860 | #if defined(STRICT_ALIGNMENT) |
861 | if (((size_t)in|(size_t)out)%sizeof(size_t) != 0) | |
862 | break; | |
863 | #endif | |
a595baff | 864 | #if defined(GHASH) && defined(GHASH_CHUNK) |
2262beef AP |
865 | while (len>=GHASH_CHUNK) { |
866 | size_t j=GHASH_CHUNK; | |
867 | ||
868 | GHASH(in,GHASH_CHUNK,ctx); | |
869 | while (j) { | |
870 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key); | |
e7f5b1cd AP |
871 | ++ctr; |
872 | if (is_endian.little) | |
873 | PUTU32(ctx->Yi.c+12,ctr); | |
874 | else | |
875 | ctx->Yi.d[3] = ctr; | |
2262beef AP |
876 | for (i=0; i<16; i+=sizeof(size_t)) |
877 | *(size_t *)(out+i) = | |
878 | *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i); | |
879 | out += 16; | |
880 | in += 16; | |
881 | j -= 16; | |
882 | } | |
883 | len -= GHASH_CHUNK; | |
884 | } | |
885 | if ((i = (len&(size_t)-16))) { | |
886 | GHASH(in,i,ctx); | |
887 | while (len>=16) { | |
888 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key); | |
889 | ++ctr; | |
890 | if (is_endian.little) | |
891 | PUTU32(ctx->Yi.c+12,ctr); | |
892 | else | |
893 | ctx->Yi.d[3] = ctr; | |
894 | for (i=0; i<16; i+=sizeof(size_t)) | |
895 | *(size_t *)(out+i) = | |
896 | *(size_t *)(in+i)^*(size_t *)(ctx->EKi.c+i); | |
897 | out += 16; | |
898 | in += 16; | |
899 | len -= 16; | |
900 | } | |
901 | } | |
902 | #else | |
903 | while (len>=16) { | |
e7f5b1cd | 904 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key); |
2262beef AP |
905 | ++ctr; |
906 | if (is_endian.little) | |
907 | PUTU32(ctx->Yi.c+12,ctr); | |
908 | else | |
909 | ctx->Yi.d[3] = ctr; | |
e7f5b1cd AP |
910 | for (i=0; i<16; i+=sizeof(size_t)) { |
911 | size_t c = *(size_t *)(in+i); | |
912 | *(size_t *)(out+i) = c^*(size_t *)(ctx->EKi.c+i); | |
913 | *(size_t *)(ctx->Xi.c+i) ^= c; | |
914 | } | |
2262beef | 915 | GCM_MUL(ctx,Xi); |
e7f5b1cd AP |
916 | out += 16; |
917 | in += 16; | |
918 | len -= 16; | |
919 | } | |
2262beef | 920 | #endif |
e7f5b1cd | 921 | if (len) { |
2262beef | 922 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key); |
e7f5b1cd AP |
923 | ++ctr; |
924 | if (is_endian.little) | |
925 | PUTU32(ctx->Yi.c+12,ctr); | |
926 | else | |
927 | ctx->Yi.d[3] = ctr; | |
e7f5b1cd AP |
928 | while (len--) { |
929 | u8 c = in[n]; | |
930 | ctx->Xi.c[n] ^= c; | |
931 | out[n] = c^ctx->EKi.c[n]; | |
932 | ++n; | |
933 | } | |
934 | } | |
935 | ||
936 | ctx->res = n; | |
937 | ctx->ctr = ctr; | |
938 | return; | |
939 | } while(0); | |
940 | #endif | |
941 | for (i=0;i<len;++i) { | |
942 | u8 c; | |
943 | if (n==0) { | |
2262beef | 944 | (*ctx->block)(ctx->Yi.c,ctx->EKi.c,ctx->key); |
e7f5b1cd AP |
945 | ++ctr; |
946 | if (is_endian.little) | |
947 | PUTU32(ctx->Yi.c+12,ctr); | |
948 | else | |
949 | ctx->Yi.d[3] = ctr; | |
e7f5b1cd AP |
950 | } |
951 | c = in[i]; | |
952 | out[i] ^= ctx->EKi.c[n]; | |
953 | ctx->Xi.c[n] ^= c; | |
954 | n = (n+1)%16; | |
955 | if (n==0) | |
956 | GCM_MUL(ctx,Xi); | |
957 | } | |
958 | ||
959 | ctx->res = n; | |
960 | ctx->ctr = ctr; | |
961 | } | |
962 | ||
963 | void CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx) | |
964 | { | |
965 | const union { long one; char little; } is_endian = {1}; | |
966 | u64 alen = ctx->len.u[0]<<3; | |
967 | u64 clen = ctx->len.u[1]<<3; | |
968 | ||
969 | if (ctx->res) | |
970 | GCM_MUL(ctx,Xi); | |
971 | ||
972 | if (is_endian.little) { | |
973 | #ifdef BSWAP8 | |
974 | alen = BSWAP8(alen); | |
975 | clen = BSWAP8(clen); | |
976 | #else | |
977 | u8 *p = ctx->len.c; | |
978 | ||
979 | ctx->len.u[0] = alen; | |
980 | ctx->len.u[1] = clen; | |
981 | ||
982 | alen = (u64)GETU32(p) <<32|GETU32(p+4); | |
983 | clen = (u64)GETU32(p+8)<<32|GETU32(p+12); | |
984 | #endif | |
985 | } | |
986 | ||
987 | ctx->Xi.u[0] ^= alen; | |
988 | ctx->Xi.u[1] ^= clen; | |
989 | GCM_MUL(ctx,Xi); | |
990 | ||
991 | ctx->Xi.u[0] ^= ctx->EK0.u[0]; | |
992 | ctx->Xi.u[1] ^= ctx->EK0.u[1]; | |
993 | } | |
994 | ||
995 | #if defined(SELFTEST) | |
996 | #include <stdio.h> | |
997 | #include <openssl/aes.h> | |
998 | ||
999 | /* Test Case 1 */ | |
1000 | static const u8 K1[16], | |
1001 | *P1=NULL, | |
1002 | *A1=NULL, | |
1003 | IV1[12], | |
1004 | *C1=NULL, | |
1005 | T1[]= {0x58,0xe2,0xfc,0xce,0xfa,0x7e,0x30,0x61,0x36,0x7f,0x1d,0x57,0xa4,0xe7,0x45,0x5a}; | |
a595baff | 1006 | |
e7f5b1cd AP |
1007 | /* Test Case 2 */ |
1008 | #define K2 K1 | |
1009 | #define A2 A1 | |
1010 | #define IV2 IV1 | |
1011 | static const u8 P2[16], | |
1012 | C2[]= {0x03,0x88,0xda,0xce,0x60,0xb6,0xa3,0x92,0xf3,0x28,0xc2,0xb9,0x71,0xb2,0xfe,0x78}, | |
1013 | T2[]= {0xab,0x6e,0x47,0xd4,0x2c,0xec,0x13,0xbd,0xf5,0x3a,0x67,0xb2,0x12,0x57,0xbd,0xdf}; | |
1014 | ||
1015 | /* Test Case 3 */ | |
1016 | #define A3 A2 | |
1017 | static const u8 K3[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08}, | |
1018 | P3[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, | |
1019 | 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, | |
1020 | 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, | |
1021 | 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55}, | |
1022 | IV3[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88}, | |
1023 | C3[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c, | |
1024 | 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e, | |
1025 | 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05, | |
1026 | 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91,0x47,0x3f,0x59,0x85}, | |
1027 | T3[]= {0x4d,0x5c,0x2a,0xf3,0x27,0xcd,0x64,0xa6,0x2c,0xf3,0x5a,0xbd,0x2b,0xa6,0xfa,0xb4,}; | |
1028 | ||
1029 | /* Test Case 4 */ | |
1030 | #define K4 K3 | |
1031 | #define IV4 IV3 | |
1032 | static const u8 P4[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, | |
1033 | 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, | |
1034 | 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, | |
1035 | 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39}, | |
1036 | A4[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, | |
1037 | 0xab,0xad,0xda,0xd2}, | |
1038 | C4[]= {0x42,0x83,0x1e,0xc2,0x21,0x77,0x74,0x24,0x4b,0x72,0x21,0xb7,0x84,0xd0,0xd4,0x9c, | |
1039 | 0xe3,0xaa,0x21,0x2f,0x2c,0x02,0xa4,0xe0,0x35,0xc1,0x7e,0x23,0x29,0xac,0xa1,0x2e, | |
1040 | 0x21,0xd5,0x14,0xb2,0x54,0x66,0x93,0x1c,0x7d,0x8f,0x6a,0x5a,0xac,0x84,0xaa,0x05, | |
1041 | 0x1b,0xa3,0x0b,0x39,0x6a,0x0a,0xac,0x97,0x3d,0x58,0xe0,0x91}, | |
1042 | T4[]= {0x5b,0xc9,0x4f,0xbc,0x32,0x21,0xa5,0xdb,0x94,0xfa,0xe9,0x5a,0xe7,0x12,0x1a,0x47}; | |
1043 | ||
1044 | /* Test Case 5 */ | |
1045 | #define K5 K4 | |
1046 | #define P5 P4 | |
1047 | static const u8 A5[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, | |
1048 | 0xab,0xad,0xda,0xd2}, | |
1049 | IV5[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad}, | |
1050 | C5[]= {0x61,0x35,0x3b,0x4c,0x28,0x06,0x93,0x4a,0x77,0x7f,0xf5,0x1f,0xa2,0x2a,0x47,0x55, | |
1051 | 0x69,0x9b,0x2a,0x71,0x4f,0xcd,0xc6,0xf8,0x37,0x66,0xe5,0xf9,0x7b,0x6c,0x74,0x23, | |
1052 | 0x73,0x80,0x69,0x00,0xe4,0x9f,0x24,0xb2,0x2b,0x09,0x75,0x44,0xd4,0x89,0x6b,0x42, | |
1053 | 0x49,0x89,0xb5,0xe1,0xeb,0xac,0x0f,0x07,0xc2,0x3f,0x45,0x98}, | |
1054 | T5[]= {0x36,0x12,0xd2,0xe7,0x9e,0x3b,0x07,0x85,0x56,0x1b,0xe1,0x4a,0xac,0xa2,0xfc,0xcb}; | |
a595baff | 1055 | |
e7f5b1cd AP |
1056 | /* Test Case 6 */ |
1057 | #define K6 K5 | |
1058 | #define P6 P5 | |
1059 | #define A6 A5 | |
1060 | static const u8 IV6[]= {0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa, | |
1061 | 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28, | |
1062 | 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54, | |
1063 | 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b}, | |
1064 | C6[]= {0x8c,0xe2,0x49,0x98,0x62,0x56,0x15,0xb6,0x03,0xa0,0x33,0xac,0xa1,0x3f,0xb8,0x94, | |
1065 | 0xbe,0x91,0x12,0xa5,0xc3,0xa2,0x11,0xa8,0xba,0x26,0x2a,0x3c,0xca,0x7e,0x2c,0xa7, | |
1066 | 0x01,0xe4,0xa9,0xa4,0xfb,0xa4,0x3c,0x90,0xcc,0xdc,0xb2,0x81,0xd4,0x8c,0x7c,0x6f, | |
1067 | 0xd6,0x28,0x75,0xd2,0xac,0xa4,0x17,0x03,0x4c,0x34,0xae,0xe5}, | |
1068 | T6[]= {0x61,0x9c,0xc5,0xae,0xff,0xfe,0x0b,0xfa,0x46,0x2a,0xf4,0x3c,0x16,0x99,0xd0,0x50}; | |
1069 | ||
1070 | /* Test Case 7 */ | |
1071 | static const u8 K7[24], | |
1072 | *P7=NULL, | |
1073 | *A7=NULL, | |
1074 | IV7[12], | |
1075 | *C7=NULL, | |
1076 | T7[]= {0xcd,0x33,0xb2,0x8a,0xc7,0x73,0xf7,0x4b,0xa0,0x0e,0xd1,0xf3,0x12,0x57,0x24,0x35}; | |
1077 | ||
1078 | /* Test Case 8 */ | |
1079 | #define K8 K7 | |
1080 | #define IV8 IV7 | |
1081 | #define A8 A7 | |
1082 | static const u8 P8[16], | |
1083 | C8[]= {0x98,0xe7,0x24,0x7c,0x07,0xf0,0xfe,0x41,0x1c,0x26,0x7e,0x43,0x84,0xb0,0xf6,0x00}, | |
1084 | T8[]= {0x2f,0xf5,0x8d,0x80,0x03,0x39,0x27,0xab,0x8e,0xf4,0xd4,0x58,0x75,0x14,0xf0,0xfb}; | |
1085 | ||
1086 | /* Test Case 9 */ | |
1087 | #define A9 A8 | |
1088 | static const u8 K9[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08, | |
1089 | 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c}, | |
1090 | P9[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, | |
1091 | 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, | |
1092 | 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, | |
1093 | 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55}, | |
1094 | IV9[]= {0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88}, | |
1095 | C9[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57, | |
1096 | 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c, | |
1097 | 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47, | |
1098 | 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10,0xac,0xad,0xe2,0x56}, | |
1099 | T9[]= {0x99,0x24,0xa7,0xc8,0x58,0x73,0x36,0xbf,0xb1,0x18,0x02,0x4d,0xb8,0x67,0x4a,0x14}; | |
1100 | ||
1101 | /* Test Case 10 */ | |
1102 | #define K10 K9 | |
1103 | #define IV10 IV9 | |
1104 | static const u8 P10[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, | |
1105 | 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, | |
1106 | 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, | |
1107 | 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39}, | |
1108 | A10[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, | |
1109 | 0xab,0xad,0xda,0xd2}, | |
1110 | C10[]= {0x39,0x80,0xca,0x0b,0x3c,0x00,0xe8,0x41,0xeb,0x06,0xfa,0xc4,0x87,0x2a,0x27,0x57, | |
1111 | 0x85,0x9e,0x1c,0xea,0xa6,0xef,0xd9,0x84,0x62,0x85,0x93,0xb4,0x0c,0xa1,0xe1,0x9c, | |
1112 | 0x7d,0x77,0x3d,0x00,0xc1,0x44,0xc5,0x25,0xac,0x61,0x9d,0x18,0xc8,0x4a,0x3f,0x47, | |
1113 | 0x18,0xe2,0x44,0x8b,0x2f,0xe3,0x24,0xd9,0xcc,0xda,0x27,0x10}, | |
1114 | T10[]= {0x25,0x19,0x49,0x8e,0x80,0xf1,0x47,0x8f,0x37,0xba,0x55,0xbd,0x6d,0x27,0x61,0x8c}; | |
1115 | ||
1116 | /* Test Case 11 */ | |
1117 | #define K11 K10 | |
1118 | #define P11 P10 | |
1119 | #define A11 A10 | |
1120 | static const u8 IV11[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad}, | |
1121 | C11[]= {0x0f,0x10,0xf5,0x99,0xae,0x14,0xa1,0x54,0xed,0x24,0xb3,0x6e,0x25,0x32,0x4d,0xb8, | |
1122 | 0xc5,0x66,0x63,0x2e,0xf2,0xbb,0xb3,0x4f,0x83,0x47,0x28,0x0f,0xc4,0x50,0x70,0x57, | |
1123 | 0xfd,0xdc,0x29,0xdf,0x9a,0x47,0x1f,0x75,0xc6,0x65,0x41,0xd4,0xd4,0xda,0xd1,0xc9, | |
1124 | 0xe9,0x3a,0x19,0xa5,0x8e,0x8b,0x47,0x3f,0xa0,0xf0,0x62,0xf7}, | |
1125 | T11[]= {0x65,0xdc,0xc5,0x7f,0xcf,0x62,0x3a,0x24,0x09,0x4f,0xcc,0xa4,0x0d,0x35,0x33,0xf8}; | |
1126 | ||
1127 | /* Test Case 12 */ | |
1128 | #define K12 K11 | |
1129 | #define P12 P11 | |
1130 | #define A12 A11 | |
1131 | static const u8 IV12[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa, | |
1132 | 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28, | |
1133 | 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54, | |
1134 | 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b}, | |
1135 | C12[]= {0xd2,0x7e,0x88,0x68,0x1c,0xe3,0x24,0x3c,0x48,0x30,0x16,0x5a,0x8f,0xdc,0xf9,0xff, | |
1136 | 0x1d,0xe9,0xa1,0xd8,0xe6,0xb4,0x47,0xef,0x6e,0xf7,0xb7,0x98,0x28,0x66,0x6e,0x45, | |
1137 | 0x81,0xe7,0x90,0x12,0xaf,0x34,0xdd,0xd9,0xe2,0xf0,0x37,0x58,0x9b,0x29,0x2d,0xb3, | |
1138 | 0xe6,0x7c,0x03,0x67,0x45,0xfa,0x22,0xe7,0xe9,0xb7,0x37,0x3b}, | |
1139 | T12[]= {0xdc,0xf5,0x66,0xff,0x29,0x1c,0x25,0xbb,0xb8,0x56,0x8f,0xc3,0xd3,0x76,0xa6,0xd9}; | |
1140 | ||
1141 | /* Test Case 13 */ | |
1142 | static const u8 K13[32], | |
1143 | *P13=NULL, | |
1144 | *A13=NULL, | |
1145 | IV13[12], | |
1146 | *C13=NULL, | |
1147 | T13[]={0x53,0x0f,0x8a,0xfb,0xc7,0x45,0x36,0xb9,0xa9,0x63,0xb4,0xf1,0xc4,0xcb,0x73,0x8b}; | |
1148 | ||
1149 | /* Test Case 14 */ | |
1150 | #define K14 K13 | |
1151 | #define A14 A13 | |
1152 | static const u8 P14[16], | |
1153 | IV14[12], | |
1154 | C14[]= {0xce,0xa7,0x40,0x3d,0x4d,0x60,0x6b,0x6e,0x07,0x4e,0xc5,0xd3,0xba,0xf3,0x9d,0x18}, | |
1155 | T14[]= {0xd0,0xd1,0xc8,0xa7,0x99,0x99,0x6b,0xf0,0x26,0x5b,0x98,0xb5,0xd4,0x8a,0xb9,0x19}; | |
1156 | ||
1157 | /* Test Case 15 */ | |
1158 | #define A15 A14 | |
1159 | static const u8 K15[]= {0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08, | |
1160 | 0xfe,0xff,0xe9,0x92,0x86,0x65,0x73,0x1c,0x6d,0x6a,0x8f,0x94,0x67,0x30,0x83,0x08}, | |
1161 | P15[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, | |
1162 | 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, | |
1163 | 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, | |
1164 | 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39,0x1a,0xaf,0xd2,0x55}, | |
1165 | IV15[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad,0xde,0xca,0xf8,0x88}, | |
1166 | C15[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d, | |
1167 | 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa, | |
1168 | 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38, | |
1169 | 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62,0x89,0x80,0x15,0xad}, | |
1170 | T15[]= {0xb0,0x94,0xda,0xc5,0xd9,0x34,0x71,0xbd,0xec,0x1a,0x50,0x22,0x70,0xe3,0xcc,0x6c}; | |
1171 | ||
1172 | /* Test Case 16 */ | |
1173 | #define K16 K15 | |
1174 | #define IV16 IV15 | |
1175 | static const u8 P16[]= {0xd9,0x31,0x32,0x25,0xf8,0x84,0x06,0xe5,0xa5,0x59,0x09,0xc5,0xaf,0xf5,0x26,0x9a, | |
1176 | 0x86,0xa7,0xa9,0x53,0x15,0x34,0xf7,0xda,0x2e,0x4c,0x30,0x3d,0x8a,0x31,0x8a,0x72, | |
1177 | 0x1c,0x3c,0x0c,0x95,0x95,0x68,0x09,0x53,0x2f,0xcf,0x0e,0x24,0x49,0xa6,0xb5,0x25, | |
1178 | 0xb1,0x6a,0xed,0xf5,0xaa,0x0d,0xe6,0x57,0xba,0x63,0x7b,0x39}, | |
1179 | A16[]= {0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef,0xfe,0xed,0xfa,0xce,0xde,0xad,0xbe,0xef, | |
1180 | 0xab,0xad,0xda,0xd2}, | |
1181 | C16[]= {0x52,0x2d,0xc1,0xf0,0x99,0x56,0x7d,0x07,0xf4,0x7f,0x37,0xa3,0x2a,0x84,0x42,0x7d, | |
1182 | 0x64,0x3a,0x8c,0xdc,0xbf,0xe5,0xc0,0xc9,0x75,0x98,0xa2,0xbd,0x25,0x55,0xd1,0xaa, | |
1183 | 0x8c,0xb0,0x8e,0x48,0x59,0x0d,0xbb,0x3d,0xa7,0xb0,0x8b,0x10,0x56,0x82,0x88,0x38, | |
1184 | 0xc5,0xf6,0x1e,0x63,0x93,0xba,0x7a,0x0a,0xbc,0xc9,0xf6,0x62}, | |
1185 | T16[]= {0x76,0xfc,0x6e,0xce,0x0f,0x4e,0x17,0x68,0xcd,0xdf,0x88,0x53,0xbb,0x2d,0x55,0x1b}; | |
1186 | ||
1187 | /* Test Case 17 */ | |
1188 | #define K17 K16 | |
1189 | #define P17 P16 | |
1190 | #define A17 A16 | |
1191 | static const u8 IV17[]={0xca,0xfe,0xba,0xbe,0xfa,0xce,0xdb,0xad}, | |
1192 | C17[]= {0xc3,0x76,0x2d,0xf1,0xca,0x78,0x7d,0x32,0xae,0x47,0xc1,0x3b,0xf1,0x98,0x44,0xcb, | |
1193 | 0xaf,0x1a,0xe1,0x4d,0x0b,0x97,0x6a,0xfa,0xc5,0x2f,0xf7,0xd7,0x9b,0xba,0x9d,0xe0, | |
1194 | 0xfe,0xb5,0x82,0xd3,0x39,0x34,0xa4,0xf0,0x95,0x4c,0xc2,0x36,0x3b,0xc7,0x3f,0x78, | |
1195 | 0x62,0xac,0x43,0x0e,0x64,0xab,0xe4,0x99,0xf4,0x7c,0x9b,0x1f}, | |
1196 | T17[]= {0x3a,0x33,0x7d,0xbf,0x46,0xa7,0x92,0xc4,0x5e,0x45,0x49,0x13,0xfe,0x2e,0xa8,0xf2}; | |
1197 | ||
1198 | /* Test Case 18 */ | |
1199 | #define K18 K17 | |
1200 | #define P18 P17 | |
1201 | #define A18 A17 | |
1202 | static const u8 IV18[]={0x93,0x13,0x22,0x5d,0xf8,0x84,0x06,0xe5,0x55,0x90,0x9c,0x5a,0xff,0x52,0x69,0xaa, | |
1203 | 0x6a,0x7a,0x95,0x38,0x53,0x4f,0x7d,0xa1,0xe4,0xc3,0x03,0xd2,0xa3,0x18,0xa7,0x28, | |
1204 | 0xc3,0xc0,0xc9,0x51,0x56,0x80,0x95,0x39,0xfc,0xf0,0xe2,0x42,0x9a,0x6b,0x52,0x54, | |
1205 | 0x16,0xae,0xdb,0xf5,0xa0,0xde,0x6a,0x57,0xa6,0x37,0xb3,0x9b}, | |
1206 | C18[]= {0x5a,0x8d,0xef,0x2f,0x0c,0x9e,0x53,0xf1,0xf7,0x5d,0x78,0x53,0x65,0x9e,0x2a,0x20, | |
1207 | 0xee,0xb2,0xb2,0x2a,0xaf,0xde,0x64,0x19,0xa0,0x58,0xab,0x4f,0x6f,0x74,0x6b,0xf4, | |
1208 | 0x0f,0xc0,0xc3,0xb7,0x80,0xf2,0x44,0x45,0x2d,0xa3,0xeb,0xf1,0xc5,0xd8,0x2c,0xde, | |
1209 | 0xa2,0x41,0x89,0x97,0x20,0x0e,0xf8,0x2e,0x44,0xae,0x7e,0x3f}, | |
1210 | T18[]= {0xa4,0x4a,0x82,0x66,0xee,0x1c,0x8e,0xb0,0xc8,0xb5,0xd4,0xcf,0x5a,0xe9,0xf1,0x9a}; | |
1211 | ||
1212 | #define TEST_CASE(n) do { \ | |
1213 | u8 out[sizeof(P##n)]; \ | |
1214 | AES_set_encrypt_key(K##n,sizeof(K##n)*8,&key); \ | |
1215 | CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); \ | |
1216 | CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \ | |
1217 | if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \ | |
1218 | if (P##n) CRYPTO_gcm128_encrypt(&ctx,P##n,out,sizeof(out)); \ | |
1219 | CRYPTO_gcm128_finish(&ctx); \ | |
1220 | if (memcmp(ctx.Xi.c,T##n,16) || (C##n && memcmp(out,C##n,sizeof(out)))) \ | |
2262beef | 1221 | ret++, printf ("encrypt test#%d failed.\n",n);\ |
e7f5b1cd AP |
1222 | CRYPTO_gcm128_setiv(&ctx,IV##n,sizeof(IV##n)); \ |
1223 | if (A##n) CRYPTO_gcm128_aad(&ctx,A##n,sizeof(A##n)); \ | |
1224 | if (C##n) CRYPTO_gcm128_decrypt(&ctx,C##n,out,sizeof(out)); \ | |
1225 | CRYPTO_gcm128_finish(&ctx); \ | |
1226 | if (memcmp(ctx.Xi.c,T##n,16) || (P##n && memcmp(out,P##n,sizeof(out)))) \ | |
2262beef | 1227 | ret++, printf ("decrypt test#%d failed.\n",n);\ |
e7f5b1cd AP |
1228 | } while(0) |
1229 | ||
1230 | int main() | |
1231 | { | |
1232 | GCM128_CONTEXT ctx; | |
1233 | AES_KEY key; | |
1234 | int ret=0; | |
1235 | ||
1236 | TEST_CASE(1); | |
1237 | TEST_CASE(2); | |
1238 | TEST_CASE(3); | |
1239 | TEST_CASE(4); | |
1240 | TEST_CASE(5); | |
1241 | TEST_CASE(6); | |
1242 | TEST_CASE(7); | |
1243 | TEST_CASE(8); | |
1244 | TEST_CASE(9); | |
1245 | TEST_CASE(10); | |
1246 | TEST_CASE(11); | |
1247 | TEST_CASE(12); | |
1248 | TEST_CASE(13); | |
1249 | TEST_CASE(14); | |
1250 | TEST_CASE(15); | |
1251 | TEST_CASE(16); | |
1252 | TEST_CASE(17); | |
1253 | TEST_CASE(18); | |
1254 | ||
a595baff | 1255 | #ifdef OPENSSL_CPUID_OBJ |
2262beef AP |
1256 | { |
1257 | size_t start,stop,gcm_t,ctr_t,OPENSSL_rdtsc(); | |
1258 | union { u64 u; u8 c[1024]; } buf; | |
2262beef AP |
1259 | |
1260 | AES_set_encrypt_key(K1,sizeof(K1)*8,&key); | |
1261 | CRYPTO_gcm128_init(&ctx,&key,(block128_f)AES_encrypt); | |
1262 | CRYPTO_gcm128_setiv(&ctx,IV1,sizeof(IV1)); | |
1263 | ||
1264 | CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf)); | |
1265 | start = OPENSSL_rdtsc(); | |
1266 | CRYPTO_gcm128_encrypt(&ctx,buf.c,buf.c,sizeof(buf)); | |
1267 | gcm_t = OPENSSL_rdtsc() - start; | |
1268 | ||
1269 | CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf), | |
1270 | &key,ctx.Yi.c,ctx.EKi.c,&ctx.res, | |
1271 | (block128_f)AES_encrypt); | |
1272 | start = OPENSSL_rdtsc(); | |
1273 | CRYPTO_ctr128_encrypt(buf.c,buf.c,sizeof(buf), | |
a595baff AP |
1274 | &key,ctx.Yi.c,ctx.EKi.c,&ctx.res, |
1275 | (block128_f)AES_encrypt); | |
2262beef AP |
1276 | ctr_t = OPENSSL_rdtsc() - start; |
1277 | ||
1278 | printf("%.2f-%.2f=%.2f\n", | |
1279 | gcm_t/(double)sizeof(buf), | |
1280 | ctr_t/(double)sizeof(buf), | |
1281 | (gcm_t-ctr_t)/(double)sizeof(buf)); | |
a595baff AP |
1282 | #ifdef GHASH |
1283 | GHASH(buf.c,sizeof(buf),&ctx); | |
1284 | start = OPENSSL_rdtsc(); | |
1285 | GHASH(buf.c,sizeof(buf),&ctx); | |
1286 | gcm_t = OPENSSL_rdtsc() - start; | |
1287 | printf("%.2f\n",gcm_t/(double)sizeof(buf)); | |
1288 | #endif | |
2262beef | 1289 | } |
a595baff | 1290 | #endif |
2262beef | 1291 | |
e7f5b1cd AP |
1292 | return ret; |
1293 | } | |
1294 | #endif |