]>
Commit | Line | Data |
---|---|---|
313811b7 MW |
1 | /* |
2 | * Copyright (C) 2015 Martin Willi | |
3 | * Copyright (C) 2015 revosec AG | |
4 | * | |
5 | * This program is free software; you can redistribute it and/or modify it | |
6 | * under the terms of the GNU General Public License as published by the | |
7 | * Free Software Foundation; either version 2 of the License, or (at your | |
8 | * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
12 | * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
13 | * for more details. | |
14 | */ | |
15 | ||
16 | #include "aesni_gcm.h" | |
17 | #include "aesni_key.h" | |
18 | ||
19 | #include <crypto/iv/iv_gen_seq.h> | |
20 | ||
21 | #include <tmmintrin.h> | |
22 | ||
23 | #define NONCE_SIZE 12 | |
24 | #define IV_SIZE 8 | |
25 | #define SALT_SIZE (NONCE_SIZE - IV_SIZE) | |
26 | ||
58c44cdd MW |
27 | /** |
28 | * Parallel pipelining | |
29 | */ | |
30 | #define GCM_CRYPT_PARALLELISM 4 | |
31 | ||
313811b7 MW |
32 | typedef struct private_aesni_gcm_t private_aesni_gcm_t; |
33 | ||
34 | /** | |
35 | * GCM en/decryption method type | |
36 | */ | |
37 | typedef void (*aesni_gcm_fn_t)(private_aesni_gcm_t*, size_t, u_char*, u_char*, | |
38 | u_char*, size_t, u_char*, u_char*); | |
39 | ||
40 | /** | |
41 | * Private data of an aesni_gcm_t object. | |
42 | */ | |
43 | struct private_aesni_gcm_t { | |
44 | ||
45 | /** | |
46 | * Public aesni_gcm_t interface. | |
47 | */ | |
48 | aesni_gcm_t public; | |
49 | ||
50 | /** | |
51 | * Encryption key schedule | |
52 | */ | |
53 | aesni_key_t *key; | |
54 | ||
55 | /** | |
56 | * IV generator. | |
57 | */ | |
58 | iv_gen_t *iv_gen; | |
59 | ||
60 | /** | |
61 | * Length of the integrity check value | |
62 | */ | |
63 | size_t icv_size; | |
64 | ||
65 | /** | |
66 | * Length of the key in bytes | |
67 | */ | |
68 | size_t key_size; | |
69 | ||
70 | /** | |
71 | * GCM encryption function | |
72 | */ | |
73 | aesni_gcm_fn_t encrypt; | |
74 | ||
75 | /** | |
76 | * GCM decryption function | |
77 | */ | |
78 | aesni_gcm_fn_t decrypt; | |
79 | ||
80 | /** | |
81 | * salt to add to nonce | |
82 | */ | |
83 | u_char salt[SALT_SIZE]; | |
84 | ||
85 | /** | |
86 | * GHASH subkey H, big-endian | |
87 | */ | |
88 | __m128i h; | |
0eb593b0 MW |
89 | |
90 | /** | |
91 | * GHASH key H^2, big-endian | |
92 | */ | |
93 | __m128i hh; | |
94 | ||
95 | /** | |
96 | * GHASH key H^3, big-endian | |
97 | */ | |
98 | __m128i hhh; | |
99 | ||
100 | /** | |
101 | * GHASH key H^4, big-endian | |
102 | */ | |
103 | __m128i hhhh; | |
313811b7 MW |
104 | }; |
105 | ||
106 | /** | |
107 | * Byte-swap a 128-bit integer | |
108 | */ | |
109 | static inline __m128i swap128(__m128i x) | |
110 | { | |
111 | return _mm_shuffle_epi8(x, | |
112 | _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); | |
113 | } | |
114 | ||
115 | /** | |
116 | * Multiply two blocks in GF128 | |
117 | */ | |
0eb593b0 | 118 | static __m128i mult_block(__m128i h, __m128i y) |
313811b7 MW |
119 | { |
120 | __m128i t1, t2, t3, t4, t5, t6; | |
121 | ||
122 | y = swap128(y); | |
123 | ||
124 | t1 = _mm_clmulepi64_si128(h, y, 0x00); | |
125 | t2 = _mm_clmulepi64_si128(h, y, 0x01); | |
126 | t3 = _mm_clmulepi64_si128(h, y, 0x10); | |
127 | t4 = _mm_clmulepi64_si128(h, y, 0x11); | |
128 | ||
129 | t2 = _mm_xor_si128(t2, t3); | |
130 | t3 = _mm_slli_si128(t2, 8); | |
131 | t2 = _mm_srli_si128(t2, 8); | |
132 | t1 = _mm_xor_si128(t1, t3); | |
133 | t4 = _mm_xor_si128(t4, t2); | |
134 | ||
135 | t5 = _mm_srli_epi32(t1, 31); | |
136 | t1 = _mm_slli_epi32(t1, 1); | |
137 | t6 = _mm_srli_epi32(t4, 31); | |
138 | t4 = _mm_slli_epi32(t4, 1); | |
139 | ||
140 | t3 = _mm_srli_si128(t5, 12); | |
141 | t6 = _mm_slli_si128(t6, 4); | |
142 | t5 = _mm_slli_si128(t5, 4); | |
143 | t1 = _mm_or_si128(t1, t5); | |
144 | t4 = _mm_or_si128(t4, t6); | |
145 | t4 = _mm_or_si128(t4, t3); | |
146 | ||
147 | t5 = _mm_slli_epi32(t1, 31); | |
148 | t6 = _mm_slli_epi32(t1, 30); | |
149 | t3 = _mm_slli_epi32(t1, 25); | |
150 | ||
151 | t5 = _mm_xor_si128(t5, t6); | |
152 | t5 = _mm_xor_si128(t5, t3); | |
153 | t6 = _mm_srli_si128(t5, 4); | |
154 | t4 = _mm_xor_si128(t4, t6); | |
155 | t5 = _mm_slli_si128(t5, 12); | |
156 | t1 = _mm_xor_si128(t1, t5); | |
157 | t4 = _mm_xor_si128(t4, t1); | |
158 | ||
159 | t5 = _mm_srli_epi32(t1, 1); | |
160 | t2 = _mm_srli_epi32(t1, 2); | |
161 | t3 = _mm_srli_epi32(t1, 7); | |
162 | t4 = _mm_xor_si128(t4, t2); | |
163 | t4 = _mm_xor_si128(t4, t3); | |
164 | t4 = _mm_xor_si128(t4, t5); | |
165 | ||
166 | return swap128(t4); | |
167 | } | |
168 | ||
0eb593b0 MW |
169 | /** |
170 | * Multiply four consecutive blocks by their respective GHASH key, XOR | |
171 | */ | |
172 | static inline __m128i mult4xor(__m128i h1, __m128i h2, __m128i h3, __m128i h4, | |
173 | __m128i d1, __m128i d2, __m128i d3, __m128i d4) | |
174 | { | |
175 | __m128i t0, t1, t2, t3, t4, t5, t6, t7, t8, t9; | |
176 | ||
177 | d1 = swap128(d1); | |
178 | d2 = swap128(d2); | |
179 | d3 = swap128(d3); | |
180 | d4 = swap128(d4); | |
181 | ||
182 | t0 = _mm_clmulepi64_si128(h1, d1, 0x00); | |
183 | t1 = _mm_clmulepi64_si128(h2, d2, 0x00); | |
184 | t2 = _mm_clmulepi64_si128(h3, d3, 0x00); | |
185 | t3 = _mm_clmulepi64_si128(h4, d4, 0x00); | |
186 | t8 = _mm_xor_si128(t0, t1); | |
187 | t8 = _mm_xor_si128(t8, t2); | |
188 | t8 = _mm_xor_si128(t8, t3); | |
189 | ||
190 | t4 = _mm_clmulepi64_si128(h1, d1, 0x11); | |
191 | t5 = _mm_clmulepi64_si128(h2, d2, 0x11); | |
192 | t6 = _mm_clmulepi64_si128(h3, d3, 0x11); | |
193 | t7 = _mm_clmulepi64_si128(h4, d4, 0x11); | |
194 | t9 = _mm_xor_si128(t4, t5); | |
195 | t9 = _mm_xor_si128(t9, t6); | |
196 | t9 = _mm_xor_si128(t9, t7); | |
197 | ||
198 | t0 = _mm_shuffle_epi32(h1, 78); | |
199 | t4 = _mm_shuffle_epi32(d1, 78); | |
200 | t0 = _mm_xor_si128(t0, h1); | |
201 | t4 = _mm_xor_si128(t4, d1); | |
202 | t1 = _mm_shuffle_epi32(h2, 78); | |
203 | t5 = _mm_shuffle_epi32(d2, 78); | |
204 | t1 = _mm_xor_si128(t1, h2); | |
205 | t5 = _mm_xor_si128(t5, d2); | |
206 | t2 = _mm_shuffle_epi32(h3, 78); | |
207 | t6 = _mm_shuffle_epi32(d3, 78); | |
208 | t2 = _mm_xor_si128(t2, h3); | |
209 | t6 = _mm_xor_si128(t6, d3); | |
210 | t3 = _mm_shuffle_epi32(h4, 78); | |
211 | t7 = _mm_shuffle_epi32(d4, 78); | |
212 | t3 = _mm_xor_si128(t3, h4); | |
213 | t7 = _mm_xor_si128(t7, d4); | |
214 | ||
215 | t0 = _mm_clmulepi64_si128(t0, t4, 0x00); | |
216 | t1 = _mm_clmulepi64_si128(t1, t5, 0x00); | |
217 | t2 = _mm_clmulepi64_si128(t2, t6, 0x00); | |
218 | t3 = _mm_clmulepi64_si128(t3, t7, 0x00); | |
219 | t0 = _mm_xor_si128(t0, t8); | |
220 | t0 = _mm_xor_si128(t0, t9); | |
221 | t0 = _mm_xor_si128(t1, t0); | |
222 | t0 = _mm_xor_si128(t2, t0); | |
223 | ||
224 | t0 = _mm_xor_si128(t3, t0); | |
225 | t4 = _mm_slli_si128(t0, 8); | |
226 | t0 = _mm_srli_si128(t0, 8); | |
227 | t3 = _mm_xor_si128(t4, t8); | |
228 | t6 = _mm_xor_si128(t0, t9); | |
229 | t7 = _mm_srli_epi32(t3, 31); | |
230 | t8 = _mm_srli_epi32(t6, 31); | |
231 | t3 = _mm_slli_epi32(t3, 1); | |
232 | t6 = _mm_slli_epi32(t6, 1); | |
233 | t9 = _mm_srli_si128(t7, 12); | |
234 | t8 = _mm_slli_si128(t8, 4); | |
235 | t7 = _mm_slli_si128(t7, 4); | |
236 | t3 = _mm_or_si128(t3, t7); | |
237 | t6 = _mm_or_si128(t6, t8); | |
238 | t6 = _mm_or_si128(t6, t9); | |
239 | t7 = _mm_slli_epi32(t3, 31); | |
240 | t8 = _mm_slli_epi32(t3, 30); | |
241 | t9 = _mm_slli_epi32(t3, 25); | |
242 | t7 = _mm_xor_si128(t7, t8); | |
243 | t7 = _mm_xor_si128(t7, t9); | |
244 | t8 = _mm_srli_si128(t7, 4); | |
245 | t7 = _mm_slli_si128(t7, 12); | |
246 | t3 = _mm_xor_si128(t3, t7); | |
247 | t2 = _mm_srli_epi32(t3, 1); | |
248 | t4 = _mm_srli_epi32(t3, 2); | |
249 | t5 = _mm_srli_epi32(t3, 7); | |
250 | t2 = _mm_xor_si128(t2, t4); | |
251 | t2 = _mm_xor_si128(t2, t5); | |
252 | t2 = _mm_xor_si128(t2, t8); | |
253 | t3 = _mm_xor_si128(t3, t2); | |
254 | t6 = _mm_xor_si128(t6, t3); | |
255 | ||
256 | return swap128(t6); | |
257 | } | |
258 | ||
313811b7 MW |
259 | /** |
260 | * GHASH on a single block | |
261 | */ | |
262 | static __m128i ghash(__m128i h, __m128i y, __m128i x) | |
263 | { | |
264 | return mult_block(h, _mm_xor_si128(y, x)); | |
265 | } | |
266 | ||
267 | /** | |
268 | * Start constructing the ICV for the associated data | |
269 | */ | |
270 | static __m128i icv_header(private_aesni_gcm_t *this, void *assoc, size_t alen) | |
271 | { | |
edab6c65 MW |
272 | u_int blocks, pblocks, rem, i; |
273 | __m128i h1, h2, h3, h4, d1, d2, d3, d4; | |
313811b7 MW |
274 | __m128i y, last, *ab; |
275 | ||
edab6c65 MW |
276 | h1 = this->hhhh; |
277 | h2 = this->hhh; | |
278 | h3 = this->hh; | |
279 | h4 = this->h; | |
280 | ||
313811b7 MW |
281 | y = _mm_setzero_si128(); |
282 | ab = assoc; | |
283 | blocks = alen / AES_BLOCK_SIZE; | |
edab6c65 | 284 | pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); |
313811b7 | 285 | rem = alen % AES_BLOCK_SIZE; |
edab6c65 MW |
286 | for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) |
287 | { | |
288 | d1 = _mm_loadu_si128(ab + i + 0); | |
289 | d2 = _mm_loadu_si128(ab + i + 1); | |
290 | d3 = _mm_loadu_si128(ab + i + 2); | |
291 | d4 = _mm_loadu_si128(ab + i + 3); | |
292 | y = _mm_xor_si128(y, d1); | |
293 | y = mult4xor(h1, h2, h3, h4, y, d2, d3, d4); | |
294 | } | |
295 | for (i = pblocks; i < blocks; i++) | |
313811b7 MW |
296 | { |
297 | y = ghash(this->h, y, _mm_loadu_si128(ab + i)); | |
298 | } | |
299 | if (rem) | |
300 | { | |
301 | last = _mm_setzero_si128(); | |
302 | memcpy(&last, ab + blocks, rem); | |
303 | ||
304 | y = ghash(this->h, y, last); | |
305 | } | |
306 | ||
307 | return y; | |
308 | } | |
309 | ||
310 | /** | |
311 | * Complete the ICV by hashing a assoc/data length block | |
312 | */ | |
313 | static __m128i icv_tailer(private_aesni_gcm_t *this, __m128i y, | |
314 | size_t alen, size_t dlen) | |
315 | { | |
316 | __m128i b; | |
317 | ||
318 | htoun64(&b, alen * 8); | |
319 | htoun64((u_char*)&b + sizeof(u_int64_t), dlen * 8); | |
320 | ||
321 | return ghash(this->h, y, b); | |
322 | } | |
323 | ||
324 | /** | |
325 | * En-/Decrypt the ICV, trim and store it | |
326 | */ | |
327 | static void icv_crypt(private_aesni_gcm_t *this, __m128i y, __m128i j, | |
328 | u_char *icv) | |
329 | { | |
37794878 | 330 | __m128i *ks, t, b; |
313811b7 MW |
331 | u_int round; |
332 | ||
37794878 MW |
333 | ks = this->key->schedule; |
334 | t = _mm_xor_si128(j, ks[0]); | |
313811b7 MW |
335 | for (round = 1; round < this->key->rounds; round++) |
336 | { | |
37794878 | 337 | t = _mm_aesenc_si128(t, ks[round]); |
313811b7 | 338 | } |
37794878 | 339 | t = _mm_aesenclast_si128(t, ks[this->key->rounds]); |
313811b7 MW |
340 | |
341 | t = _mm_xor_si128(y, t); | |
342 | ||
343 | _mm_storeu_si128(&b, t); | |
344 | memcpy(icv, &b, this->icv_size); | |
345 | } | |
346 | ||
347 | /** | |
348 | * Do big-endian increment on x | |
349 | */ | |
350 | static inline __m128i increment_be(__m128i x) | |
351 | { | |
352 | x = swap128(x); | |
353 | x = _mm_add_epi64(x, _mm_set_epi32(0, 0, 0, 1)); | |
354 | x = swap128(x); | |
355 | ||
356 | return x; | |
357 | } | |
358 | ||
359 | /** | |
360 | * Generate the block J0 | |
361 | */ | |
362 | static inline __m128i create_j(private_aesni_gcm_t *this, u_char *iv) | |
363 | { | |
364 | u_char j[AES_BLOCK_SIZE]; | |
365 | ||
366 | memcpy(j, this->salt, SALT_SIZE); | |
367 | memcpy(j + SALT_SIZE, iv, IV_SIZE); | |
368 | htoun32(j + SALT_SIZE + IV_SIZE, 1); | |
369 | ||
370 | return _mm_loadu_si128((__m128i*)j); | |
371 | } | |
372 | ||
373 | /** | |
374 | * Encrypt a remaining incomplete block, return updated Y | |
375 | */ | |
376 | static __m128i encrypt_gcm_rem(private_aesni_gcm_t *this, u_int rem, | |
377 | void *in, void *out, __m128i cb, __m128i y) | |
378 | { | |
37794878 | 379 | __m128i *ks, t, b; |
313811b7 MW |
380 | u_int round; |
381 | ||
382 | memset(&b, 0, sizeof(b)); | |
383 | memcpy(&b, in, rem); | |
384 | ||
37794878 MW |
385 | ks = this->key->schedule; |
386 | t = _mm_xor_si128(cb, ks[0]); | |
313811b7 MW |
387 | for (round = 1; round < this->key->rounds; round++) |
388 | { | |
37794878 | 389 | t = _mm_aesenc_si128(t, ks[round]); |
313811b7 | 390 | } |
37794878 | 391 | t = _mm_aesenclast_si128(t, ks[this->key->rounds]); |
313811b7 MW |
392 | b = _mm_xor_si128(t, b); |
393 | ||
394 | memcpy(out, &b, rem); | |
395 | ||
396 | memset((u_char*)&b + rem, 0, AES_BLOCK_SIZE - rem); | |
397 | return ghash(this->h, y, b); | |
398 | } | |
399 | ||
400 | /** | |
401 | * Decrypt a remaining incomplete block, return updated Y | |
402 | */ | |
403 | static __m128i decrypt_gcm_rem(private_aesni_gcm_t *this, u_int rem, | |
404 | void *in, void *out, __m128i cb, __m128i y) | |
405 | { | |
37794878 | 406 | __m128i *ks, t, b; |
313811b7 MW |
407 | u_int round; |
408 | ||
409 | memset(&b, 0, sizeof(b)); | |
410 | memcpy(&b, in, rem); | |
411 | ||
412 | y = ghash(this->h, y, b); | |
413 | ||
37794878 MW |
414 | ks = this->key->schedule; |
415 | t = _mm_xor_si128(cb, ks[0]); | |
313811b7 MW |
416 | for (round = 1; round < this->key->rounds; round++) |
417 | { | |
37794878 | 418 | t = _mm_aesenc_si128(t, ks[round]); |
313811b7 | 419 | } |
37794878 | 420 | t = _mm_aesenclast_si128(t, ks[this->key->rounds]); |
313811b7 MW |
421 | b = _mm_xor_si128(t, b); |
422 | ||
423 | memcpy(out, &b, rem); | |
424 | ||
425 | return y; | |
426 | } | |
427 | ||
428 | /** | |
677649cf | 429 | * AES-128 GCM encryption/ICV generation |
313811b7 | 430 | */ |
677649cf MW |
431 | static void encrypt_gcm128(private_aesni_gcm_t *this, |
432 | size_t len, u_char *in, u_char *out, u_char *iv, | |
433 | size_t alen, u_char *assoc, u_char *icv) | |
313811b7 | 434 | { |
37794878 MW |
435 | __m128i d1, d2, d3, d4, t1, t2, t3, t4; |
436 | __m128i *ks, y, j, cb, *bi, *bo; | |
58c44cdd | 437 | u_int blocks, pblocks, rem, i; |
313811b7 MW |
438 | |
439 | j = create_j(this, iv); | |
58c44cdd | 440 | cb = increment_be(j); |
313811b7 MW |
441 | y = icv_header(this, assoc, alen); |
442 | blocks = len / AES_BLOCK_SIZE; | |
58c44cdd | 443 | pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); |
313811b7 MW |
444 | rem = len % AES_BLOCK_SIZE; |
445 | bi = (__m128i*)in; | |
446 | bo = (__m128i*)out; | |
447 | ||
37794878 | 448 | ks = this->key->schedule; |
677649cf | 449 | |
58c44cdd | 450 | for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) |
313811b7 | 451 | { |
58c44cdd MW |
452 | d1 = _mm_loadu_si128(bi + i + 0); |
453 | d2 = _mm_loadu_si128(bi + i + 1); | |
454 | d3 = _mm_loadu_si128(bi + i + 2); | |
455 | d4 = _mm_loadu_si128(bi + i + 3); | |
677649cf | 456 | |
37794878 | 457 | t1 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 458 | cb = increment_be(cb); |
37794878 | 459 | t2 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 460 | cb = increment_be(cb); |
37794878 | 461 | t3 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 462 | cb = increment_be(cb); |
37794878 | 463 | t4 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 464 | cb = increment_be(cb); |
677649cf | 465 | |
37794878 MW |
466 | t1 = _mm_aesenc_si128(t1, ks[1]); |
467 | t2 = _mm_aesenc_si128(t2, ks[1]); | |
468 | t3 = _mm_aesenc_si128(t3, ks[1]); | |
469 | t4 = _mm_aesenc_si128(t4, ks[1]); | |
470 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
471 | t2 = _mm_aesenc_si128(t2, ks[2]); | |
472 | t3 = _mm_aesenc_si128(t3, ks[2]); | |
473 | t4 = _mm_aesenc_si128(t4, ks[2]); | |
474 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
475 | t2 = _mm_aesenc_si128(t2, ks[3]); | |
476 | t3 = _mm_aesenc_si128(t3, ks[3]); | |
477 | t4 = _mm_aesenc_si128(t4, ks[3]); | |
478 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
479 | t2 = _mm_aesenc_si128(t2, ks[4]); | |
480 | t3 = _mm_aesenc_si128(t3, ks[4]); | |
481 | t4 = _mm_aesenc_si128(t4, ks[4]); | |
482 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
483 | t2 = _mm_aesenc_si128(t2, ks[5]); | |
484 | t3 = _mm_aesenc_si128(t3, ks[5]); | |
485 | t4 = _mm_aesenc_si128(t4, ks[5]); | |
486 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
487 | t2 = _mm_aesenc_si128(t2, ks[6]); | |
488 | t3 = _mm_aesenc_si128(t3, ks[6]); | |
489 | t4 = _mm_aesenc_si128(t4, ks[6]); | |
490 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
491 | t2 = _mm_aesenc_si128(t2, ks[7]); | |
492 | t3 = _mm_aesenc_si128(t3, ks[7]); | |
493 | t4 = _mm_aesenc_si128(t4, ks[7]); | |
494 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
495 | t2 = _mm_aesenc_si128(t2, ks[8]); | |
496 | t3 = _mm_aesenc_si128(t3, ks[8]); | |
497 | t4 = _mm_aesenc_si128(t4, ks[8]); | |
498 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
499 | t2 = _mm_aesenc_si128(t2, ks[9]); | |
500 | t3 = _mm_aesenc_si128(t3, ks[9]); | |
501 | t4 = _mm_aesenc_si128(t4, ks[9]); | |
502 | ||
503 | t1 = _mm_aesenclast_si128(t1, ks[10]); | |
504 | t2 = _mm_aesenclast_si128(t2, ks[10]); | |
505 | t3 = _mm_aesenclast_si128(t3, ks[10]); | |
506 | t4 = _mm_aesenclast_si128(t4, ks[10]); | |
58c44cdd MW |
507 | |
508 | t1 = _mm_xor_si128(t1, d1); | |
509 | t2 = _mm_xor_si128(t2, d2); | |
510 | t3 = _mm_xor_si128(t3, d3); | |
511 | t4 = _mm_xor_si128(t4, d4); | |
0eb593b0 MW |
512 | |
513 | y = _mm_xor_si128(y, t1); | |
37794878 | 514 | y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, t2, t3, t4); |
0eb593b0 | 515 | |
58c44cdd MW |
516 | _mm_storeu_si128(bo + i + 0, t1); |
517 | _mm_storeu_si128(bo + i + 1, t2); | |
518 | _mm_storeu_si128(bo + i + 2, t3); | |
519 | _mm_storeu_si128(bo + i + 3, t4); | |
58c44cdd MW |
520 | } |
521 | ||
522 | for (i = pblocks; i < blocks; i++) | |
523 | { | |
524 | d1 = _mm_loadu_si128(bi + i); | |
525 | ||
37794878 MW |
526 | t1 = _mm_xor_si128(cb, ks[0]); |
527 | t1 = _mm_aesenc_si128(t1, ks[1]); | |
528 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
529 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
530 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
531 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
532 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
533 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
534 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
535 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
536 | t1 = _mm_aesenclast_si128(t1, ks[10]); | |
58c44cdd MW |
537 | |
538 | t1 = _mm_xor_si128(t1, d1); | |
539 | _mm_storeu_si128(bo + i, t1); | |
313811b7 | 540 | |
37794878 | 541 | y = ghash(this->h, y, t1); |
313811b7 MW |
542 | |
543 | cb = increment_be(cb); | |
544 | } | |
545 | ||
546 | if (rem) | |
547 | { | |
548 | y = encrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); | |
549 | } | |
550 | y = icv_tailer(this, y, alen, len); | |
551 | icv_crypt(this, y, j, icv); | |
552 | } | |
553 | ||
554 | /** | |
677649cf | 555 | * AES-128 GCM decryption/ICV generation |
313811b7 | 556 | */ |
677649cf MW |
557 | static void decrypt_gcm128(private_aesni_gcm_t *this, |
558 | size_t len, u_char *in, u_char *out, u_char *iv, | |
559 | size_t alen, u_char *assoc, u_char *icv) | |
313811b7 | 560 | { |
37794878 MW |
561 | __m128i d1, d2, d3, d4, t1, t2, t3, t4; |
562 | __m128i *ks, y, j, cb, *bi, *bo; | |
58c44cdd | 563 | u_int blocks, pblocks, rem, i; |
313811b7 MW |
564 | |
565 | j = create_j(this, iv); | |
58c44cdd | 566 | cb = increment_be(j); |
313811b7 MW |
567 | y = icv_header(this, assoc, alen); |
568 | blocks = len / AES_BLOCK_SIZE; | |
58c44cdd | 569 | pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); |
313811b7 MW |
570 | rem = len % AES_BLOCK_SIZE; |
571 | bi = (__m128i*)in; | |
572 | bo = (__m128i*)out; | |
573 | ||
37794878 | 574 | ks = this->key->schedule; |
677649cf | 575 | |
58c44cdd MW |
576 | for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) |
577 | { | |
578 | d1 = _mm_loadu_si128(bi + i + 0); | |
579 | d2 = _mm_loadu_si128(bi + i + 1); | |
580 | d3 = _mm_loadu_si128(bi + i + 2); | |
581 | d4 = _mm_loadu_si128(bi + i + 3); | |
582 | ||
0eb593b0 | 583 | y = _mm_xor_si128(y, d1); |
37794878 | 584 | y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, d2, d3, d4); |
58c44cdd | 585 | |
37794878 | 586 | t1 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 587 | cb = increment_be(cb); |
37794878 | 588 | t2 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 589 | cb = increment_be(cb); |
37794878 | 590 | t3 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 591 | cb = increment_be(cb); |
37794878 | 592 | t4 = _mm_xor_si128(cb, ks[0]); |
58c44cdd MW |
593 | cb = increment_be(cb); |
594 | ||
37794878 MW |
595 | t1 = _mm_aesenc_si128(t1, ks[1]); |
596 | t2 = _mm_aesenc_si128(t2, ks[1]); | |
597 | t3 = _mm_aesenc_si128(t3, ks[1]); | |
598 | t4 = _mm_aesenc_si128(t4, ks[1]); | |
599 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
600 | t2 = _mm_aesenc_si128(t2, ks[2]); | |
601 | t3 = _mm_aesenc_si128(t3, ks[2]); | |
602 | t4 = _mm_aesenc_si128(t4, ks[2]); | |
603 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
604 | t2 = _mm_aesenc_si128(t2, ks[3]); | |
605 | t3 = _mm_aesenc_si128(t3, ks[3]); | |
606 | t4 = _mm_aesenc_si128(t4, ks[3]); | |
607 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
608 | t2 = _mm_aesenc_si128(t2, ks[4]); | |
609 | t3 = _mm_aesenc_si128(t3, ks[4]); | |
610 | t4 = _mm_aesenc_si128(t4, ks[4]); | |
611 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
612 | t2 = _mm_aesenc_si128(t2, ks[5]); | |
613 | t3 = _mm_aesenc_si128(t3, ks[5]); | |
614 | t4 = _mm_aesenc_si128(t4, ks[5]); | |
615 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
616 | t2 = _mm_aesenc_si128(t2, ks[6]); | |
617 | t3 = _mm_aesenc_si128(t3, ks[6]); | |
618 | t4 = _mm_aesenc_si128(t4, ks[6]); | |
619 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
620 | t2 = _mm_aesenc_si128(t2, ks[7]); | |
621 | t3 = _mm_aesenc_si128(t3, ks[7]); | |
622 | t4 = _mm_aesenc_si128(t4, ks[7]); | |
623 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
624 | t2 = _mm_aesenc_si128(t2, ks[8]); | |
625 | t3 = _mm_aesenc_si128(t3, ks[8]); | |
626 | t4 = _mm_aesenc_si128(t4, ks[8]); | |
627 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
628 | t2 = _mm_aesenc_si128(t2, ks[9]); | |
629 | t3 = _mm_aesenc_si128(t3, ks[9]); | |
630 | t4 = _mm_aesenc_si128(t4, ks[9]); | |
631 | ||
632 | t1 = _mm_aesenclast_si128(t1, ks[10]); | |
633 | t2 = _mm_aesenclast_si128(t2, ks[10]); | |
634 | t3 = _mm_aesenclast_si128(t3, ks[10]); | |
635 | t4 = _mm_aesenclast_si128(t4, ks[10]); | |
58c44cdd MW |
636 | |
637 | t1 = _mm_xor_si128(t1, d1); | |
638 | t2 = _mm_xor_si128(t2, d2); | |
639 | t3 = _mm_xor_si128(t3, d3); | |
640 | t4 = _mm_xor_si128(t4, d4); | |
0eb593b0 | 641 | |
58c44cdd MW |
642 | _mm_storeu_si128(bo + i + 0, t1); |
643 | _mm_storeu_si128(bo + i + 1, t2); | |
644 | _mm_storeu_si128(bo + i + 2, t3); | |
645 | _mm_storeu_si128(bo + i + 3, t4); | |
646 | } | |
647 | ||
648 | for (i = pblocks; i < blocks; i++) | |
313811b7 | 649 | { |
58c44cdd | 650 | d1 = _mm_loadu_si128(bi + i); |
313811b7 | 651 | |
37794878 | 652 | y = ghash(this->h, y, d1); |
313811b7 | 653 | |
37794878 MW |
654 | t1 = _mm_xor_si128(cb, ks[0]); |
655 | t1 = _mm_aesenc_si128(t1, ks[1]); | |
656 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
657 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
658 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
659 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
660 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
661 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
662 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
663 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
664 | t1 = _mm_aesenclast_si128(t1, ks[10]); | |
677649cf | 665 | |
58c44cdd MW |
666 | t1 = _mm_xor_si128(t1, d1); |
667 | _mm_storeu_si128(bo + i, t1); | |
677649cf MW |
668 | |
669 | cb = increment_be(cb); | |
670 | } | |
671 | ||
672 | if (rem) | |
673 | { | |
674 | y = decrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); | |
675 | } | |
676 | y = icv_tailer(this, y, alen, len); | |
677 | icv_crypt(this, y, j, icv); | |
678 | } | |
679 | ||
680 | /** | |
681 | * AES-192 GCM encryption/ICV generation | |
682 | */ | |
683 | static void encrypt_gcm192(private_aesni_gcm_t *this, | |
684 | size_t len, u_char *in, u_char *out, u_char *iv, | |
685 | size_t alen, u_char *assoc, u_char *icv) | |
686 | { | |
37794878 MW |
687 | __m128i d1, d2, d3, d4, t1, t2, t3, t4; |
688 | __m128i *ks, y, j, cb, *bi, *bo; | |
58c44cdd | 689 | u_int blocks, pblocks, rem, i; |
677649cf MW |
690 | |
691 | j = create_j(this, iv); | |
58c44cdd | 692 | cb = increment_be(j); |
677649cf MW |
693 | y = icv_header(this, assoc, alen); |
694 | blocks = len / AES_BLOCK_SIZE; | |
58c44cdd | 695 | pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); |
677649cf MW |
696 | rem = len % AES_BLOCK_SIZE; |
697 | bi = (__m128i*)in; | |
698 | bo = (__m128i*)out; | |
699 | ||
37794878 | 700 | ks = this->key->schedule; |
677649cf | 701 | |
58c44cdd MW |
702 | for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) |
703 | { | |
704 | d1 = _mm_loadu_si128(bi + i + 0); | |
705 | d2 = _mm_loadu_si128(bi + i + 1); | |
706 | d3 = _mm_loadu_si128(bi + i + 2); | |
707 | d4 = _mm_loadu_si128(bi + i + 3); | |
708 | ||
37794878 | 709 | t1 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 710 | cb = increment_be(cb); |
37794878 | 711 | t2 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 712 | cb = increment_be(cb); |
37794878 | 713 | t3 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 714 | cb = increment_be(cb); |
37794878 | 715 | t4 = _mm_xor_si128(cb, ks[0]); |
58c44cdd MW |
716 | cb = increment_be(cb); |
717 | ||
37794878 MW |
718 | t1 = _mm_aesenc_si128(t1, ks[1]); |
719 | t2 = _mm_aesenc_si128(t2, ks[1]); | |
720 | t3 = _mm_aesenc_si128(t3, ks[1]); | |
721 | t4 = _mm_aesenc_si128(t4, ks[1]); | |
722 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
723 | t2 = _mm_aesenc_si128(t2, ks[2]); | |
724 | t3 = _mm_aesenc_si128(t3, ks[2]); | |
725 | t4 = _mm_aesenc_si128(t4, ks[2]); | |
726 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
727 | t2 = _mm_aesenc_si128(t2, ks[3]); | |
728 | t3 = _mm_aesenc_si128(t3, ks[3]); | |
729 | t4 = _mm_aesenc_si128(t4, ks[3]); | |
730 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
731 | t2 = _mm_aesenc_si128(t2, ks[4]); | |
732 | t3 = _mm_aesenc_si128(t3, ks[4]); | |
733 | t4 = _mm_aesenc_si128(t4, ks[4]); | |
734 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
735 | t2 = _mm_aesenc_si128(t2, ks[5]); | |
736 | t3 = _mm_aesenc_si128(t3, ks[5]); | |
737 | t4 = _mm_aesenc_si128(t4, ks[5]); | |
738 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
739 | t2 = _mm_aesenc_si128(t2, ks[6]); | |
740 | t3 = _mm_aesenc_si128(t3, ks[6]); | |
741 | t4 = _mm_aesenc_si128(t4, ks[6]); | |
742 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
743 | t2 = _mm_aesenc_si128(t2, ks[7]); | |
744 | t3 = _mm_aesenc_si128(t3, ks[7]); | |
745 | t4 = _mm_aesenc_si128(t4, ks[7]); | |
746 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
747 | t2 = _mm_aesenc_si128(t2, ks[8]); | |
748 | t3 = _mm_aesenc_si128(t3, ks[8]); | |
749 | t4 = _mm_aesenc_si128(t4, ks[8]); | |
750 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
751 | t2 = _mm_aesenc_si128(t2, ks[9]); | |
752 | t3 = _mm_aesenc_si128(t3, ks[9]); | |
753 | t4 = _mm_aesenc_si128(t4, ks[9]); | |
754 | t1 = _mm_aesenc_si128(t1, ks[10]); | |
755 | t2 = _mm_aesenc_si128(t2, ks[10]); | |
756 | t3 = _mm_aesenc_si128(t3, ks[10]); | |
757 | t4 = _mm_aesenc_si128(t4, ks[10]); | |
758 | t1 = _mm_aesenc_si128(t1, ks[11]); | |
759 | t2 = _mm_aesenc_si128(t2, ks[11]); | |
760 | t3 = _mm_aesenc_si128(t3, ks[11]); | |
761 | t4 = _mm_aesenc_si128(t4, ks[11]); | |
762 | ||
763 | t1 = _mm_aesenclast_si128(t1, ks[12]); | |
764 | t2 = _mm_aesenclast_si128(t2, ks[12]); | |
765 | t3 = _mm_aesenclast_si128(t3, ks[12]); | |
766 | t4 = _mm_aesenclast_si128(t4, ks[12]); | |
58c44cdd MW |
767 | |
768 | t1 = _mm_xor_si128(t1, d1); | |
769 | t2 = _mm_xor_si128(t2, d2); | |
770 | t3 = _mm_xor_si128(t3, d3); | |
771 | t4 = _mm_xor_si128(t4, d4); | |
0eb593b0 MW |
772 | |
773 | y = _mm_xor_si128(y, t1); | |
37794878 | 774 | y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, t2, t3, t4); |
0eb593b0 | 775 | |
58c44cdd MW |
776 | _mm_storeu_si128(bo + i + 0, t1); |
777 | _mm_storeu_si128(bo + i + 1, t2); | |
778 | _mm_storeu_si128(bo + i + 2, t3); | |
779 | _mm_storeu_si128(bo + i + 3, t4); | |
58c44cdd MW |
780 | } |
781 | ||
782 | for (i = pblocks; i < blocks; i++) | |
677649cf | 783 | { |
58c44cdd MW |
784 | d1 = _mm_loadu_si128(bi + i); |
785 | ||
37794878 MW |
786 | t1 = _mm_xor_si128(cb, ks[0]); |
787 | t1 = _mm_aesenc_si128(t1, ks[1]); | |
788 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
789 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
790 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
791 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
792 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
793 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
794 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
795 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
796 | t1 = _mm_aesenc_si128(t1, ks[10]); | |
797 | t1 = _mm_aesenc_si128(t1, ks[11]); | |
798 | t1 = _mm_aesenclast_si128(t1, ks[12]); | |
58c44cdd MW |
799 | |
800 | t1 = _mm_xor_si128(t1, d1); | |
801 | _mm_storeu_si128(bo + i, t1); | |
802 | ||
37794878 | 803 | y = ghash(this->h, y, t1); |
677649cf MW |
804 | |
805 | cb = increment_be(cb); | |
806 | } | |
807 | ||
808 | if (rem) | |
809 | { | |
810 | y = encrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); | |
811 | } | |
812 | y = icv_tailer(this, y, alen, len); | |
813 | icv_crypt(this, y, j, icv); | |
814 | } | |
815 | ||
816 | /** | |
817 | * AES-192 GCM decryption/ICV generation | |
818 | */ | |
819 | static void decrypt_gcm192(private_aesni_gcm_t *this, | |
820 | size_t len, u_char *in, u_char *out, u_char *iv, | |
821 | size_t alen, u_char *assoc, u_char *icv) | |
822 | { | |
37794878 MW |
823 | __m128i d1, d2, d3, d4, t1, t2, t3, t4; |
824 | __m128i *ks, y, j, cb, *bi, *bo; | |
58c44cdd | 825 | u_int blocks, pblocks, rem, i; |
677649cf MW |
826 | |
827 | j = create_j(this, iv); | |
58c44cdd | 828 | cb = increment_be(j); |
677649cf MW |
829 | y = icv_header(this, assoc, alen); |
830 | blocks = len / AES_BLOCK_SIZE; | |
58c44cdd | 831 | pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); |
677649cf MW |
832 | rem = len % AES_BLOCK_SIZE; |
833 | bi = (__m128i*)in; | |
834 | bo = (__m128i*)out; | |
835 | ||
37794878 | 836 | ks = this->key->schedule; |
677649cf | 837 | |
58c44cdd | 838 | for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) |
677649cf | 839 | { |
58c44cdd MW |
840 | d1 = _mm_loadu_si128(bi + i + 0); |
841 | d2 = _mm_loadu_si128(bi + i + 1); | |
842 | d3 = _mm_loadu_si128(bi + i + 2); | |
843 | d4 = _mm_loadu_si128(bi + i + 3); | |
844 | ||
0eb593b0 | 845 | y = _mm_xor_si128(y, d1); |
37794878 | 846 | y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, d2, d3, d4); |
58c44cdd | 847 | |
37794878 | 848 | t1 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 849 | cb = increment_be(cb); |
37794878 | 850 | t2 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 851 | cb = increment_be(cb); |
37794878 | 852 | t3 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 853 | cb = increment_be(cb); |
37794878 | 854 | t4 = _mm_xor_si128(cb, ks[0]); |
58c44cdd MW |
855 | cb = increment_be(cb); |
856 | ||
37794878 MW |
857 | t1 = _mm_aesenc_si128(t1, ks[1]); |
858 | t2 = _mm_aesenc_si128(t2, ks[1]); | |
859 | t3 = _mm_aesenc_si128(t3, ks[1]); | |
860 | t4 = _mm_aesenc_si128(t4, ks[1]); | |
861 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
862 | t2 = _mm_aesenc_si128(t2, ks[2]); | |
863 | t3 = _mm_aesenc_si128(t3, ks[2]); | |
864 | t4 = _mm_aesenc_si128(t4, ks[2]); | |
865 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
866 | t2 = _mm_aesenc_si128(t2, ks[3]); | |
867 | t3 = _mm_aesenc_si128(t3, ks[3]); | |
868 | t4 = _mm_aesenc_si128(t4, ks[3]); | |
869 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
870 | t2 = _mm_aesenc_si128(t2, ks[4]); | |
871 | t3 = _mm_aesenc_si128(t3, ks[4]); | |
872 | t4 = _mm_aesenc_si128(t4, ks[4]); | |
873 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
874 | t2 = _mm_aesenc_si128(t2, ks[5]); | |
875 | t3 = _mm_aesenc_si128(t3, ks[5]); | |
876 | t4 = _mm_aesenc_si128(t4, ks[5]); | |
877 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
878 | t2 = _mm_aesenc_si128(t2, ks[6]); | |
879 | t3 = _mm_aesenc_si128(t3, ks[6]); | |
880 | t4 = _mm_aesenc_si128(t4, ks[6]); | |
881 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
882 | t2 = _mm_aesenc_si128(t2, ks[7]); | |
883 | t3 = _mm_aesenc_si128(t3, ks[7]); | |
884 | t4 = _mm_aesenc_si128(t4, ks[7]); | |
885 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
886 | t2 = _mm_aesenc_si128(t2, ks[8]); | |
887 | t3 = _mm_aesenc_si128(t3, ks[8]); | |
888 | t4 = _mm_aesenc_si128(t4, ks[8]); | |
889 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
890 | t2 = _mm_aesenc_si128(t2, ks[9]); | |
891 | t3 = _mm_aesenc_si128(t3, ks[9]); | |
892 | t4 = _mm_aesenc_si128(t4, ks[9]); | |
893 | t1 = _mm_aesenc_si128(t1, ks[10]); | |
894 | t2 = _mm_aesenc_si128(t2, ks[10]); | |
895 | t3 = _mm_aesenc_si128(t3, ks[10]); | |
896 | t4 = _mm_aesenc_si128(t4, ks[10]); | |
897 | t1 = _mm_aesenc_si128(t1, ks[11]); | |
898 | t2 = _mm_aesenc_si128(t2, ks[11]); | |
899 | t3 = _mm_aesenc_si128(t3, ks[11]); | |
900 | t4 = _mm_aesenc_si128(t4, ks[11]); | |
901 | ||
902 | t1 = _mm_aesenclast_si128(t1, ks[12]); | |
903 | t2 = _mm_aesenclast_si128(t2, ks[12]); | |
904 | t3 = _mm_aesenclast_si128(t3, ks[12]); | |
905 | t4 = _mm_aesenclast_si128(t4, ks[12]); | |
58c44cdd MW |
906 | |
907 | t1 = _mm_xor_si128(t1, d1); | |
908 | t2 = _mm_xor_si128(t2, d2); | |
909 | t3 = _mm_xor_si128(t3, d3); | |
910 | t4 = _mm_xor_si128(t4, d4); | |
0eb593b0 | 911 | |
58c44cdd MW |
912 | _mm_storeu_si128(bo + i + 0, t1); |
913 | _mm_storeu_si128(bo + i + 1, t2); | |
914 | _mm_storeu_si128(bo + i + 2, t3); | |
915 | _mm_storeu_si128(bo + i + 3, t4); | |
916 | } | |
917 | ||
918 | for (i = pblocks; i < blocks; i++) | |
919 | { | |
920 | d1 = _mm_loadu_si128(bi + i); | |
921 | ||
37794878 MW |
922 | y = ghash(this->h, y, d1); |
923 | ||
924 | t1 = _mm_xor_si128(cb, ks[0]); | |
925 | t1 = _mm_aesenc_si128(t1, ks[1]); | |
926 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
927 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
928 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
929 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
930 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
931 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
932 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
933 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
934 | t1 = _mm_aesenc_si128(t1, ks[10]); | |
935 | t1 = _mm_aesenc_si128(t1, ks[11]); | |
936 | t1 = _mm_aesenclast_si128(t1, ks[12]); | |
58c44cdd MW |
937 | |
938 | t1 = _mm_xor_si128(t1, d1); | |
939 | _mm_storeu_si128(bo + i, t1); | |
677649cf MW |
940 | |
941 | cb = increment_be(cb); | |
942 | } | |
943 | ||
944 | if (rem) | |
945 | { | |
946 | y = decrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); | |
947 | } | |
948 | y = icv_tailer(this, y, alen, len); | |
949 | icv_crypt(this, y, j, icv); | |
950 | } | |
951 | ||
952 | /** | |
953 | * AES-256 GCM encryption/ICV generation | |
954 | */ | |
955 | static void encrypt_gcm256(private_aesni_gcm_t *this, | |
956 | size_t len, u_char *in, u_char *out, u_char *iv, | |
957 | size_t alen, u_char *assoc, u_char *icv) | |
958 | { | |
37794878 MW |
959 | __m128i d1, d2, d3, d4, t1, t2, t3, t4; |
960 | __m128i *ks, y, j, cb, *bi, *bo; | |
58c44cdd | 961 | u_int blocks, pblocks, rem, i; |
677649cf MW |
962 | |
963 | j = create_j(this, iv); | |
58c44cdd | 964 | cb = increment_be(j); |
677649cf MW |
965 | y = icv_header(this, assoc, alen); |
966 | blocks = len / AES_BLOCK_SIZE; | |
58c44cdd | 967 | pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); |
677649cf MW |
968 | rem = len % AES_BLOCK_SIZE; |
969 | bi = (__m128i*)in; | |
970 | bo = (__m128i*)out; | |
971 | ||
37794878 | 972 | ks = this->key->schedule; |
677649cf | 973 | |
58c44cdd MW |
974 | for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) |
975 | { | |
976 | d1 = _mm_loadu_si128(bi + i + 0); | |
977 | d2 = _mm_loadu_si128(bi + i + 1); | |
978 | d3 = _mm_loadu_si128(bi + i + 2); | |
979 | d4 = _mm_loadu_si128(bi + i + 3); | |
980 | ||
37794878 | 981 | t1 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 982 | cb = increment_be(cb); |
37794878 | 983 | t2 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 984 | cb = increment_be(cb); |
37794878 | 985 | t3 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 986 | cb = increment_be(cb); |
37794878 | 987 | t4 = _mm_xor_si128(cb, ks[0]); |
58c44cdd MW |
988 | cb = increment_be(cb); |
989 | ||
37794878 MW |
990 | t1 = _mm_aesenc_si128(t1, ks[1]); |
991 | t2 = _mm_aesenc_si128(t2, ks[1]); | |
992 | t3 = _mm_aesenc_si128(t3, ks[1]); | |
993 | t4 = _mm_aesenc_si128(t4, ks[1]); | |
994 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
995 | t2 = _mm_aesenc_si128(t2, ks[2]); | |
996 | t3 = _mm_aesenc_si128(t3, ks[2]); | |
997 | t4 = _mm_aesenc_si128(t4, ks[2]); | |
998 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
999 | t2 = _mm_aesenc_si128(t2, ks[3]); | |
1000 | t3 = _mm_aesenc_si128(t3, ks[3]); | |
1001 | t4 = _mm_aesenc_si128(t4, ks[3]); | |
1002 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
1003 | t2 = _mm_aesenc_si128(t2, ks[4]); | |
1004 | t3 = _mm_aesenc_si128(t3, ks[4]); | |
1005 | t4 = _mm_aesenc_si128(t4, ks[4]); | |
1006 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
1007 | t2 = _mm_aesenc_si128(t2, ks[5]); | |
1008 | t3 = _mm_aesenc_si128(t3, ks[5]); | |
1009 | t4 = _mm_aesenc_si128(t4, ks[5]); | |
1010 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
1011 | t2 = _mm_aesenc_si128(t2, ks[6]); | |
1012 | t3 = _mm_aesenc_si128(t3, ks[6]); | |
1013 | t4 = _mm_aesenc_si128(t4, ks[6]); | |
1014 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
1015 | t2 = _mm_aesenc_si128(t2, ks[7]); | |
1016 | t3 = _mm_aesenc_si128(t3, ks[7]); | |
1017 | t4 = _mm_aesenc_si128(t4, ks[7]); | |
1018 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
1019 | t2 = _mm_aesenc_si128(t2, ks[8]); | |
1020 | t3 = _mm_aesenc_si128(t3, ks[8]); | |
1021 | t4 = _mm_aesenc_si128(t4, ks[8]); | |
1022 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
1023 | t2 = _mm_aesenc_si128(t2, ks[9]); | |
1024 | t3 = _mm_aesenc_si128(t3, ks[9]); | |
1025 | t4 = _mm_aesenc_si128(t4, ks[9]); | |
1026 | t1 = _mm_aesenc_si128(t1, ks[10]); | |
1027 | t2 = _mm_aesenc_si128(t2, ks[10]); | |
1028 | t3 = _mm_aesenc_si128(t3, ks[10]); | |
1029 | t4 = _mm_aesenc_si128(t4, ks[10]); | |
1030 | t1 = _mm_aesenc_si128(t1, ks[11]); | |
1031 | t2 = _mm_aesenc_si128(t2, ks[11]); | |
1032 | t3 = _mm_aesenc_si128(t3, ks[11]); | |
1033 | t4 = _mm_aesenc_si128(t4, ks[11]); | |
1034 | t1 = _mm_aesenc_si128(t1, ks[12]); | |
1035 | t2 = _mm_aesenc_si128(t2, ks[12]); | |
1036 | t3 = _mm_aesenc_si128(t3, ks[12]); | |
1037 | t4 = _mm_aesenc_si128(t4, ks[12]); | |
1038 | t1 = _mm_aesenc_si128(t1, ks[13]); | |
1039 | t2 = _mm_aesenc_si128(t2, ks[13]); | |
1040 | t3 = _mm_aesenc_si128(t3, ks[13]); | |
1041 | t4 = _mm_aesenc_si128(t4, ks[13]); | |
1042 | ||
1043 | t1 = _mm_aesenclast_si128(t1, ks[14]); | |
1044 | t2 = _mm_aesenclast_si128(t2, ks[14]); | |
1045 | t3 = _mm_aesenclast_si128(t3, ks[14]); | |
1046 | t4 = _mm_aesenclast_si128(t4, ks[14]); | |
58c44cdd MW |
1047 | |
1048 | t1 = _mm_xor_si128(t1, d1); | |
1049 | t2 = _mm_xor_si128(t2, d2); | |
1050 | t3 = _mm_xor_si128(t3, d3); | |
1051 | t4 = _mm_xor_si128(t4, d4); | |
0eb593b0 MW |
1052 | |
1053 | y = _mm_xor_si128(y, t1); | |
37794878 | 1054 | y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, t2, t3, t4); |
0eb593b0 | 1055 | |
58c44cdd MW |
1056 | _mm_storeu_si128(bo + i + 0, t1); |
1057 | _mm_storeu_si128(bo + i + 1, t2); | |
1058 | _mm_storeu_si128(bo + i + 2, t3); | |
1059 | _mm_storeu_si128(bo + i + 3, t4); | |
58c44cdd MW |
1060 | } |
1061 | ||
1062 | for (i = pblocks; i < blocks; i++) | |
677649cf | 1063 | { |
58c44cdd MW |
1064 | d1 = _mm_loadu_si128(bi + i); |
1065 | ||
37794878 MW |
1066 | t1 = _mm_xor_si128(cb, ks[0]); |
1067 | t1 = _mm_aesenc_si128(t1, ks[1]); | |
1068 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
1069 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
1070 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
1071 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
1072 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
1073 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
1074 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
1075 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
1076 | t1 = _mm_aesenc_si128(t1, ks[10]); | |
1077 | t1 = _mm_aesenc_si128(t1, ks[11]); | |
1078 | t1 = _mm_aesenc_si128(t1, ks[12]); | |
1079 | t1 = _mm_aesenc_si128(t1, ks[13]); | |
1080 | t1 = _mm_aesenclast_si128(t1, ks[14]); | |
58c44cdd MW |
1081 | |
1082 | t1 = _mm_xor_si128(t1, d1); | |
1083 | _mm_storeu_si128(bo + i, t1); | |
1084 | ||
1085 | y = ghash(this->h, y, t1); | |
677649cf MW |
1086 | |
1087 | cb = increment_be(cb); | |
1088 | } | |
1089 | ||
1090 | if (rem) | |
1091 | { | |
1092 | y = encrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); | |
1093 | } | |
1094 | y = icv_tailer(this, y, alen, len); | |
1095 | icv_crypt(this, y, j, icv); | |
1096 | } | |
1097 | ||
1098 | /** | |
1099 | * AES-256 GCM decryption/ICV generation | |
1100 | */ | |
1101 | static void decrypt_gcm256(private_aesni_gcm_t *this, | |
1102 | size_t len, u_char *in, u_char *out, u_char *iv, | |
1103 | size_t alen, u_char *assoc, u_char *icv) | |
1104 | { | |
37794878 MW |
1105 | __m128i d1, d2, d3, d4, t1, t2, t3, t4; |
1106 | __m128i *ks, y, j, cb, *bi, *bo; | |
58c44cdd | 1107 | u_int blocks, pblocks, rem, i; |
677649cf MW |
1108 | |
1109 | j = create_j(this, iv); | |
58c44cdd | 1110 | cb = increment_be(j); |
677649cf MW |
1111 | y = icv_header(this, assoc, alen); |
1112 | blocks = len / AES_BLOCK_SIZE; | |
58c44cdd | 1113 | pblocks = blocks - (blocks % GCM_CRYPT_PARALLELISM); |
677649cf MW |
1114 | rem = len % AES_BLOCK_SIZE; |
1115 | bi = (__m128i*)in; | |
1116 | bo = (__m128i*)out; | |
1117 | ||
37794878 | 1118 | ks = this->key->schedule; |
677649cf | 1119 | |
58c44cdd MW |
1120 | for (i = 0; i < pblocks; i += GCM_CRYPT_PARALLELISM) |
1121 | { | |
1122 | d1 = _mm_loadu_si128(bi + i + 0); | |
1123 | d2 = _mm_loadu_si128(bi + i + 1); | |
1124 | d3 = _mm_loadu_si128(bi + i + 2); | |
1125 | d4 = _mm_loadu_si128(bi + i + 3); | |
1126 | ||
0eb593b0 | 1127 | y = _mm_xor_si128(y, d1); |
37794878 | 1128 | y = mult4xor(this->hhhh, this->hhh, this->hh, this->h, y, d2, d3, d4); |
58c44cdd | 1129 | |
37794878 | 1130 | t1 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 1131 | cb = increment_be(cb); |
37794878 | 1132 | t2 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 1133 | cb = increment_be(cb); |
37794878 | 1134 | t3 = _mm_xor_si128(cb, ks[0]); |
58c44cdd | 1135 | cb = increment_be(cb); |
37794878 | 1136 | t4 = _mm_xor_si128(cb, ks[0]); |
58c44cdd MW |
1137 | cb = increment_be(cb); |
1138 | ||
37794878 MW |
1139 | t1 = _mm_aesenc_si128(t1, ks[1]); |
1140 | t2 = _mm_aesenc_si128(t2, ks[1]); | |
1141 | t3 = _mm_aesenc_si128(t3, ks[1]); | |
1142 | t4 = _mm_aesenc_si128(t4, ks[1]); | |
1143 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
1144 | t2 = _mm_aesenc_si128(t2, ks[2]); | |
1145 | t3 = _mm_aesenc_si128(t3, ks[2]); | |
1146 | t4 = _mm_aesenc_si128(t4, ks[2]); | |
1147 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
1148 | t2 = _mm_aesenc_si128(t2, ks[3]); | |
1149 | t3 = _mm_aesenc_si128(t3, ks[3]); | |
1150 | t4 = _mm_aesenc_si128(t4, ks[3]); | |
1151 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
1152 | t2 = _mm_aesenc_si128(t2, ks[4]); | |
1153 | t3 = _mm_aesenc_si128(t3, ks[4]); | |
1154 | t4 = _mm_aesenc_si128(t4, ks[4]); | |
1155 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
1156 | t2 = _mm_aesenc_si128(t2, ks[5]); | |
1157 | t3 = _mm_aesenc_si128(t3, ks[5]); | |
1158 | t4 = _mm_aesenc_si128(t4, ks[5]); | |
1159 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
1160 | t2 = _mm_aesenc_si128(t2, ks[6]); | |
1161 | t3 = _mm_aesenc_si128(t3, ks[6]); | |
1162 | t4 = _mm_aesenc_si128(t4, ks[6]); | |
1163 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
1164 | t2 = _mm_aesenc_si128(t2, ks[7]); | |
1165 | t3 = _mm_aesenc_si128(t3, ks[7]); | |
1166 | t4 = _mm_aesenc_si128(t4, ks[7]); | |
1167 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
1168 | t2 = _mm_aesenc_si128(t2, ks[8]); | |
1169 | t3 = _mm_aesenc_si128(t3, ks[8]); | |
1170 | t4 = _mm_aesenc_si128(t4, ks[8]); | |
1171 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
1172 | t2 = _mm_aesenc_si128(t2, ks[9]); | |
1173 | t3 = _mm_aesenc_si128(t3, ks[9]); | |
1174 | t4 = _mm_aesenc_si128(t4, ks[9]); | |
1175 | t1 = _mm_aesenc_si128(t1, ks[10]); | |
1176 | t2 = _mm_aesenc_si128(t2, ks[10]); | |
1177 | t3 = _mm_aesenc_si128(t3, ks[10]); | |
1178 | t4 = _mm_aesenc_si128(t4, ks[10]); | |
1179 | t1 = _mm_aesenc_si128(t1, ks[11]); | |
1180 | t2 = _mm_aesenc_si128(t2, ks[11]); | |
1181 | t3 = _mm_aesenc_si128(t3, ks[11]); | |
1182 | t4 = _mm_aesenc_si128(t4, ks[11]); | |
1183 | t1 = _mm_aesenc_si128(t1, ks[12]); | |
1184 | t2 = _mm_aesenc_si128(t2, ks[12]); | |
1185 | t3 = _mm_aesenc_si128(t3, ks[12]); | |
1186 | t4 = _mm_aesenc_si128(t4, ks[12]); | |
1187 | t1 = _mm_aesenc_si128(t1, ks[13]); | |
1188 | t2 = _mm_aesenc_si128(t2, ks[13]); | |
1189 | t3 = _mm_aesenc_si128(t3, ks[13]); | |
1190 | t4 = _mm_aesenc_si128(t4, ks[13]); | |
1191 | ||
1192 | t1 = _mm_aesenclast_si128(t1, ks[14]); | |
1193 | t2 = _mm_aesenclast_si128(t2, ks[14]); | |
1194 | t3 = _mm_aesenclast_si128(t3, ks[14]); | |
1195 | t4 = _mm_aesenclast_si128(t4, ks[14]); | |
58c44cdd MW |
1196 | |
1197 | t1 = _mm_xor_si128(t1, d1); | |
1198 | t2 = _mm_xor_si128(t2, d2); | |
1199 | t3 = _mm_xor_si128(t3, d3); | |
1200 | t4 = _mm_xor_si128(t4, d4); | |
0eb593b0 | 1201 | |
58c44cdd MW |
1202 | _mm_storeu_si128(bo + i + 0, t1); |
1203 | _mm_storeu_si128(bo + i + 1, t2); | |
1204 | _mm_storeu_si128(bo + i + 2, t3); | |
1205 | _mm_storeu_si128(bo + i + 3, t4); | |
1206 | } | |
1207 | ||
1208 | for (i = pblocks; i < blocks; i++) | |
677649cf | 1209 | { |
58c44cdd MW |
1210 | d1 = _mm_loadu_si128(bi + i); |
1211 | ||
37794878 MW |
1212 | y = ghash(this->h, y, d1); |
1213 | ||
1214 | t1 = _mm_xor_si128(cb, ks[0]); | |
1215 | t1 = _mm_aesenc_si128(t1, ks[1]); | |
1216 | t1 = _mm_aesenc_si128(t1, ks[2]); | |
1217 | t1 = _mm_aesenc_si128(t1, ks[3]); | |
1218 | t1 = _mm_aesenc_si128(t1, ks[4]); | |
1219 | t1 = _mm_aesenc_si128(t1, ks[5]); | |
1220 | t1 = _mm_aesenc_si128(t1, ks[6]); | |
1221 | t1 = _mm_aesenc_si128(t1, ks[7]); | |
1222 | t1 = _mm_aesenc_si128(t1, ks[8]); | |
1223 | t1 = _mm_aesenc_si128(t1, ks[9]); | |
1224 | t1 = _mm_aesenc_si128(t1, ks[10]); | |
1225 | t1 = _mm_aesenc_si128(t1, ks[11]); | |
1226 | t1 = _mm_aesenc_si128(t1, ks[12]); | |
1227 | t1 = _mm_aesenc_si128(t1, ks[13]); | |
1228 | t1 = _mm_aesenclast_si128(t1, ks[14]); | |
58c44cdd MW |
1229 | |
1230 | t1 = _mm_xor_si128(t1, d1); | |
1231 | _mm_storeu_si128(bo + i, t1); | |
313811b7 MW |
1232 | |
1233 | cb = increment_be(cb); | |
1234 | } | |
1235 | ||
1236 | if (rem) | |
1237 | { | |
1238 | y = decrypt_gcm_rem(this, rem, bi + blocks, bo + blocks, cb, y); | |
1239 | } | |
1240 | y = icv_tailer(this, y, alen, len); | |
1241 | icv_crypt(this, y, j, icv); | |
1242 | } | |
1243 | ||
1244 | METHOD(aead_t, encrypt, bool, | |
1245 | private_aesni_gcm_t *this, chunk_t plain, chunk_t assoc, chunk_t iv, | |
1246 | chunk_t *encr) | |
1247 | { | |
1248 | u_char *out; | |
1249 | ||
1250 | if (!this->key || iv.len != IV_SIZE) | |
1251 | { | |
1252 | return FALSE; | |
1253 | } | |
1254 | out = plain.ptr; | |
1255 | if (encr) | |
1256 | { | |
1257 | *encr = chunk_alloc(plain.len + this->icv_size); | |
1258 | out = encr->ptr; | |
1259 | } | |
1260 | this->encrypt(this, plain.len, plain.ptr, out, iv.ptr, | |
1261 | assoc.len, assoc.ptr, out + plain.len); | |
1262 | return TRUE; | |
1263 | } | |
1264 | ||
1265 | METHOD(aead_t, decrypt, bool, | |
1266 | private_aesni_gcm_t *this, chunk_t encr, chunk_t assoc, chunk_t iv, | |
1267 | chunk_t *plain) | |
1268 | { | |
1269 | u_char *out, icv[this->icv_size]; | |
1270 | ||
1271 | if (!this->key || iv.len != IV_SIZE || encr.len < this->icv_size) | |
1272 | { | |
1273 | return FALSE; | |
1274 | } | |
1275 | encr.len -= this->icv_size; | |
1276 | out = encr.ptr; | |
1277 | if (plain) | |
1278 | { | |
1279 | *plain = chunk_alloc(encr.len); | |
1280 | out = plain->ptr; | |
1281 | } | |
1282 | this->decrypt(this, encr.len, encr.ptr, out, iv.ptr, | |
1283 | assoc.len, assoc.ptr, icv); | |
1284 | return memeq_const(icv, encr.ptr + encr.len, this->icv_size); | |
1285 | } | |
1286 | ||
1287 | METHOD(aead_t, get_block_size, size_t, | |
1288 | private_aesni_gcm_t *this) | |
1289 | { | |
1290 | return 1; | |
1291 | } | |
1292 | ||
1293 | METHOD(aead_t, get_icv_size, size_t, | |
1294 | private_aesni_gcm_t *this) | |
1295 | { | |
1296 | return this->icv_size; | |
1297 | } | |
1298 | ||
1299 | METHOD(aead_t, get_iv_size, size_t, | |
1300 | private_aesni_gcm_t *this) | |
1301 | { | |
1302 | return IV_SIZE; | |
1303 | } | |
1304 | ||
1305 | METHOD(aead_t, get_iv_gen, iv_gen_t*, | |
1306 | private_aesni_gcm_t *this) | |
1307 | { | |
1308 | return this->iv_gen; | |
1309 | } | |
1310 | ||
1311 | METHOD(aead_t, get_key_size, size_t, | |
1312 | private_aesni_gcm_t *this) | |
1313 | { | |
1314 | return this->key_size + SALT_SIZE; | |
1315 | } | |
1316 | ||
1317 | METHOD(aead_t, set_key, bool, | |
1318 | private_aesni_gcm_t *this, chunk_t key) | |
1319 | { | |
1320 | u_int round; | |
37794878 | 1321 | __m128i *ks, h; |
313811b7 MW |
1322 | |
1323 | if (key.len != this->key_size + SALT_SIZE) | |
1324 | { | |
1325 | return FALSE; | |
1326 | } | |
1327 | ||
1328 | memcpy(this->salt, key.ptr + key.len - SALT_SIZE, SALT_SIZE); | |
1329 | key.len -= SALT_SIZE; | |
1330 | ||
1331 | DESTROY_IF(this->key); | |
1332 | this->key = aesni_key_create(TRUE, key); | |
1333 | ||
37794878 MW |
1334 | ks = this->key->schedule; |
1335 | h = _mm_xor_si128(_mm_setzero_si128(), ks[0]); | |
313811b7 MW |
1336 | for (round = 1; round < this->key->rounds; round++) |
1337 | { | |
37794878 | 1338 | h = _mm_aesenc_si128(h, ks[round]); |
313811b7 | 1339 | } |
37794878 | 1340 | h = _mm_aesenclast_si128(h, ks[this->key->rounds]); |
313811b7 | 1341 | |
0eb593b0 MW |
1342 | this->h = h; |
1343 | h = swap128(h); | |
1344 | this->hh = mult_block(h, this->h); | |
1345 | this->hhh = mult_block(h, this->hh); | |
1346 | this->hhhh = mult_block(h, this->hhh); | |
1347 | this->h = swap128(this->h); | |
1348 | this->hh = swap128(this->hh); | |
1349 | this->hhh = swap128(this->hhh); | |
1350 | this->hhhh = swap128(this->hhhh); | |
313811b7 MW |
1351 | |
1352 | return TRUE; | |
1353 | } | |
1354 | ||
1355 | METHOD(aead_t, destroy, void, | |
1356 | private_aesni_gcm_t *this) | |
1357 | { | |
1358 | DESTROY_IF(this->key); | |
1359 | memwipe(&this->h, sizeof(this->h)); | |
0eb593b0 MW |
1360 | memwipe(&this->hh, sizeof(this->hh)); |
1361 | memwipe(&this->hhh, sizeof(this->hhh)); | |
1362 | memwipe(&this->hhhh, sizeof(this->hhhh)); | |
313811b7 | 1363 | this->iv_gen->destroy(this->iv_gen); |
93f00802 | 1364 | free_align(this); |
313811b7 MW |
1365 | } |
1366 | ||
1367 | /** | |
1368 | * See header | |
1369 | */ | |
1370 | aesni_gcm_t *aesni_gcm_create(encryption_algorithm_t algo, | |
1371 | size_t key_size, size_t salt_size) | |
1372 | { | |
1373 | private_aesni_gcm_t *this; | |
1374 | size_t icv_size; | |
1375 | ||
1376 | switch (key_size) | |
1377 | { | |
1378 | case 0: | |
1379 | key_size = 16; | |
1380 | break; | |
1381 | case 16: | |
1382 | case 24: | |
1383 | case 32: | |
1384 | break; | |
1385 | default: | |
1386 | return NULL; | |
1387 | } | |
1388 | if (salt_size && salt_size != SALT_SIZE) | |
1389 | { | |
1390 | /* currently not supported */ | |
1391 | return NULL; | |
1392 | } | |
1393 | switch (algo) | |
1394 | { | |
1395 | case ENCR_AES_GCM_ICV8: | |
1396 | algo = ENCR_AES_CBC; | |
1397 | icv_size = 8; | |
1398 | break; | |
1399 | case ENCR_AES_GCM_ICV12: | |
1400 | algo = ENCR_AES_CBC; | |
1401 | icv_size = 12; | |
1402 | break; | |
1403 | case ENCR_AES_GCM_ICV16: | |
1404 | algo = ENCR_AES_CBC; | |
1405 | icv_size = 16; | |
1406 | break; | |
1407 | default: | |
1408 | return NULL; | |
1409 | } | |
1410 | ||
93f00802 | 1411 | INIT_ALIGN(this, sizeof(__m128i), |
313811b7 MW |
1412 | .public = { |
1413 | .aead = { | |
1414 | .encrypt = _encrypt, | |
1415 | .decrypt = _decrypt, | |
1416 | .get_block_size = _get_block_size, | |
1417 | .get_icv_size = _get_icv_size, | |
1418 | .get_iv_size = _get_iv_size, | |
1419 | .get_iv_gen = _get_iv_gen, | |
1420 | .get_key_size = _get_key_size, | |
1421 | .set_key = _set_key, | |
1422 | .destroy = _destroy, | |
1423 | }, | |
1424 | }, | |
1425 | .key_size = key_size, | |
1426 | .iv_gen = iv_gen_seq_create(), | |
1427 | .icv_size = icv_size, | |
313811b7 MW |
1428 | ); |
1429 | ||
677649cf MW |
1430 | switch (key_size) |
1431 | { | |
1432 | case 16: | |
1433 | this->encrypt = encrypt_gcm128; | |
1434 | this->decrypt = decrypt_gcm128; | |
1435 | break; | |
1436 | case 24: | |
1437 | this->encrypt = encrypt_gcm192; | |
1438 | this->decrypt = decrypt_gcm192; | |
1439 | break; | |
1440 | case 32: | |
1441 | this->encrypt = encrypt_gcm256; | |
1442 | this->decrypt = decrypt_gcm256; | |
1443 | break; | |
1444 | } | |
1445 | ||
313811b7 MW |
1446 | return &this->public; |
1447 | } |