2 * Copyright (C) 2015 Martin Willi
3 * Copyright (C) 2015 revosec AG
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the
7 * Free Software Foundation; either version 2 of the License, or (at your
8 * option) any later version. See <http://www.fsf.org/copyleft/gpl.txt>.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 #include "aesni_cbc.h"
17 #include "aesni_key.h"
20 * Pipeline parallelism we use for CBC decryption
22 #define CBC_DECRYPT_PARALLELISM 4
24 typedef struct private_aesni_cbc_t private_aesni_cbc_t
;
27 * CBC en/decryption method type
29 typedef void (*aesni_cbc_fn_t
)(aesni_key_t
*, u_int
, u_char
*, u_char
*, u_char
*);
32 * Private data of an aesni_cbc_t object.
34 struct private_aesni_cbc_t
{
37 * Public aesni_cbc_t interface.
47 * Encryption key schedule
52 * Decryption key schedule
59 aesni_cbc_fn_t encrypt
;
64 aesni_cbc_fn_t decrypt
;
68 * AES-128 CBC encryption
70 static void encrypt_cbc128(aesni_key_t
*key
, u_int blocks
, u_char
*in
,
71 u_char
*iv
, u_char
*out
)
73 __m128i k0
, k1
, k2
, k3
, k4
, k5
, k6
, k7
, k8
, k9
, k10
;
74 __m128i t
, fb
, *bi
, *bo
;
77 k0
= key
->schedule
[0];
78 k1
= key
->schedule
[1];
79 k2
= key
->schedule
[2];
80 k3
= key
->schedule
[3];
81 k4
= key
->schedule
[4];
82 k5
= key
->schedule
[5];
83 k6
= key
->schedule
[6];
84 k7
= key
->schedule
[7];
85 k8
= key
->schedule
[8];
86 k9
= key
->schedule
[9];
87 k10
= key
->schedule
[10];
92 fb
= _mm_loadu_si128((__m128i
*)iv
);
93 for (i
= 0; i
< blocks
; i
++)
95 t
= _mm_loadu_si128(bi
+ i
);
96 fb
= _mm_xor_si128(t
, fb
);
97 fb
= _mm_xor_si128(fb
, k0
);
99 fb
= _mm_aesenc_si128(fb
, k1
);
100 fb
= _mm_aesenc_si128(fb
, k2
);
101 fb
= _mm_aesenc_si128(fb
, k3
);
102 fb
= _mm_aesenc_si128(fb
, k4
);
103 fb
= _mm_aesenc_si128(fb
, k5
);
104 fb
= _mm_aesenc_si128(fb
, k6
);
105 fb
= _mm_aesenc_si128(fb
, k7
);
106 fb
= _mm_aesenc_si128(fb
, k8
);
107 fb
= _mm_aesenc_si128(fb
, k9
);
109 fb
= _mm_aesenclast_si128(fb
, k10
);
110 _mm_storeu_si128(bo
+ i
, fb
);
115 * AES-128 CBC decryption
117 static void decrypt_cbc128(aesni_key_t
*key
, u_int blocks
, u_char
*in
,
118 u_char
*iv
, u_char
*out
)
120 __m128i k0
, k1
, k2
, k3
, k4
, k5
, k6
, k7
, k8
, k9
, k10
;
121 __m128i last
, *bi
, *bo
;
122 __m128i t1
, t2
, t3
, t4
;
123 __m128i f1
, f2
, f3
, f4
;
126 k0
= key
->schedule
[0];
127 k1
= key
->schedule
[1];
128 k2
= key
->schedule
[2];
129 k3
= key
->schedule
[3];
130 k4
= key
->schedule
[4];
131 k5
= key
->schedule
[5];
132 k6
= key
->schedule
[6];
133 k7
= key
->schedule
[7];
134 k8
= key
->schedule
[8];
135 k9
= key
->schedule
[9];
136 k10
= key
->schedule
[10];
140 pblocks
= blocks
- (blocks
% CBC_DECRYPT_PARALLELISM
);
142 f1
= _mm_loadu_si128((__m128i
*)iv
);
144 for (i
= 0; i
< pblocks
; i
+= CBC_DECRYPT_PARALLELISM
)
146 t1
= _mm_loadu_si128(bi
+ i
+ 0);
147 t2
= _mm_loadu_si128(bi
+ i
+ 1);
148 t3
= _mm_loadu_si128(bi
+ i
+ 2);
149 t4
= _mm_loadu_si128(bi
+ i
+ 3);
156 t1
= _mm_xor_si128(t1
, k0
);
157 t2
= _mm_xor_si128(t2
, k0
);
158 t3
= _mm_xor_si128(t3
, k0
);
159 t4
= _mm_xor_si128(t4
, k0
);
161 t1
= _mm_aesdec_si128(t1
, k1
);
162 t2
= _mm_aesdec_si128(t2
, k1
);
163 t3
= _mm_aesdec_si128(t3
, k1
);
164 t4
= _mm_aesdec_si128(t4
, k1
);
165 t1
= _mm_aesdec_si128(t1
, k2
);
166 t2
= _mm_aesdec_si128(t2
, k2
);
167 t3
= _mm_aesdec_si128(t3
, k2
);
168 t4
= _mm_aesdec_si128(t4
, k2
);
169 t1
= _mm_aesdec_si128(t1
, k3
);
170 t2
= _mm_aesdec_si128(t2
, k3
);
171 t3
= _mm_aesdec_si128(t3
, k3
);
172 t4
= _mm_aesdec_si128(t4
, k3
);
173 t1
= _mm_aesdec_si128(t1
, k4
);
174 t2
= _mm_aesdec_si128(t2
, k4
);
175 t3
= _mm_aesdec_si128(t3
, k4
);
176 t4
= _mm_aesdec_si128(t4
, k4
);
177 t1
= _mm_aesdec_si128(t1
, k5
);
178 t2
= _mm_aesdec_si128(t2
, k5
);
179 t3
= _mm_aesdec_si128(t3
, k5
);
180 t4
= _mm_aesdec_si128(t4
, k5
);
181 t1
= _mm_aesdec_si128(t1
, k6
);
182 t2
= _mm_aesdec_si128(t2
, k6
);
183 t3
= _mm_aesdec_si128(t3
, k6
);
184 t4
= _mm_aesdec_si128(t4
, k6
);
185 t1
= _mm_aesdec_si128(t1
, k7
);
186 t2
= _mm_aesdec_si128(t2
, k7
);
187 t3
= _mm_aesdec_si128(t3
, k7
);
188 t4
= _mm_aesdec_si128(t4
, k7
);
189 t1
= _mm_aesdec_si128(t1
, k8
);
190 t2
= _mm_aesdec_si128(t2
, k8
);
191 t3
= _mm_aesdec_si128(t3
, k8
);
192 t4
= _mm_aesdec_si128(t4
, k8
);
193 t1
= _mm_aesdec_si128(t1
, k9
);
194 t2
= _mm_aesdec_si128(t2
, k9
);
195 t3
= _mm_aesdec_si128(t3
, k9
);
196 t4
= _mm_aesdec_si128(t4
, k9
);
198 t1
= _mm_aesdeclast_si128(t1
, k10
);
199 t2
= _mm_aesdeclast_si128(t2
, k10
);
200 t3
= _mm_aesdeclast_si128(t3
, k10
);
201 t4
= _mm_aesdeclast_si128(t4
, k10
);
202 t1
= _mm_xor_si128(t1
, f1
);
203 t2
= _mm_xor_si128(t2
, f2
);
204 t3
= _mm_xor_si128(t3
, f3
);
205 t4
= _mm_xor_si128(t4
, f4
);
206 _mm_storeu_si128(bo
+ i
+ 0, t1
);
207 _mm_storeu_si128(bo
+ i
+ 1, t2
);
208 _mm_storeu_si128(bo
+ i
+ 2, t3
);
209 _mm_storeu_si128(bo
+ i
+ 3, t4
);
213 for (i
= pblocks
; i
< blocks
; i
++)
215 last
= _mm_loadu_si128(bi
+ i
);
216 t1
= _mm_xor_si128(last
, k0
);
218 t1
= _mm_aesdec_si128(t1
, k1
);
219 t1
= _mm_aesdec_si128(t1
, k2
);
220 t1
= _mm_aesdec_si128(t1
, k3
);
221 t1
= _mm_aesdec_si128(t1
, k4
);
222 t1
= _mm_aesdec_si128(t1
, k5
);
223 t1
= _mm_aesdec_si128(t1
, k6
);
224 t1
= _mm_aesdec_si128(t1
, k7
);
225 t1
= _mm_aesdec_si128(t1
, k8
);
226 t1
= _mm_aesdec_si128(t1
, k9
);
228 t1
= _mm_aesdeclast_si128(t1
, k10
);
229 t1
= _mm_xor_si128(t1
, f1
);
230 _mm_storeu_si128(bo
+ i
, t1
);
236 * AES-192 CBC encryption
238 static void encrypt_cbc192(aesni_key_t
*key
, u_int blocks
, u_char
*in
,
239 u_char
*iv
, u_char
*out
)
241 __m128i k0
, k1
, k2
, k3
, k4
, k5
, k6
, k7
, k8
, k9
, k10
, k11
, k12
;
242 __m128i t
, fb
, *bi
, *bo
;
245 k0
= key
->schedule
[0];
246 k1
= key
->schedule
[1];
247 k2
= key
->schedule
[2];
248 k3
= key
->schedule
[3];
249 k4
= key
->schedule
[4];
250 k5
= key
->schedule
[5];
251 k6
= key
->schedule
[6];
252 k7
= key
->schedule
[7];
253 k8
= key
->schedule
[8];
254 k9
= key
->schedule
[9];
255 k10
= key
->schedule
[10];
256 k11
= key
->schedule
[11];
257 k12
= key
->schedule
[12];
262 fb
= _mm_loadu_si128((__m128i
*)iv
);
263 for (i
= 0; i
< blocks
; i
++)
265 t
= _mm_loadu_si128(bi
+ i
);
266 fb
= _mm_xor_si128(t
, fb
);
267 fb
= _mm_xor_si128(fb
, k0
);
269 fb
= _mm_aesenc_si128(fb
, k1
);
270 fb
= _mm_aesenc_si128(fb
, k2
);
271 fb
= _mm_aesenc_si128(fb
, k3
);
272 fb
= _mm_aesenc_si128(fb
, k4
);
273 fb
= _mm_aesenc_si128(fb
, k5
);
274 fb
= _mm_aesenc_si128(fb
, k6
);
275 fb
= _mm_aesenc_si128(fb
, k7
);
276 fb
= _mm_aesenc_si128(fb
, k8
);
277 fb
= _mm_aesenc_si128(fb
, k9
);
278 fb
= _mm_aesenc_si128(fb
, k10
);
279 fb
= _mm_aesenc_si128(fb
, k11
);
281 fb
= _mm_aesenclast_si128(fb
, k12
);
282 _mm_storeu_si128(bo
+ i
, fb
);
287 * AES-192 CBC decryption
289 static void decrypt_cbc192(aesni_key_t
*key
, u_int blocks
, u_char
*in
,
290 u_char
*iv
, u_char
*out
)
292 __m128i k0
, k1
, k2
, k3
, k4
, k5
, k6
, k7
, k8
, k9
, k10
, k11
, k12
;
293 __m128i last
, *bi
, *bo
;
294 __m128i t1
, t2
, t3
, t4
;
295 __m128i f1
, f2
, f3
, f4
;
298 k0
= key
->schedule
[0];
299 k1
= key
->schedule
[1];
300 k2
= key
->schedule
[2];
301 k3
= key
->schedule
[3];
302 k4
= key
->schedule
[4];
303 k5
= key
->schedule
[5];
304 k6
= key
->schedule
[6];
305 k7
= key
->schedule
[7];
306 k8
= key
->schedule
[8];
307 k9
= key
->schedule
[9];
308 k10
= key
->schedule
[10];
309 k11
= key
->schedule
[11];
310 k12
= key
->schedule
[12];
314 pblocks
= blocks
- (blocks
% CBC_DECRYPT_PARALLELISM
);
316 f1
= _mm_loadu_si128((__m128i
*)iv
);
318 for (i
= 0; i
< pblocks
; i
+= CBC_DECRYPT_PARALLELISM
)
320 t1
= _mm_loadu_si128(bi
+ i
+ 0);
321 t2
= _mm_loadu_si128(bi
+ i
+ 1);
322 t3
= _mm_loadu_si128(bi
+ i
+ 2);
323 t4
= _mm_loadu_si128(bi
+ i
+ 3);
330 t1
= _mm_xor_si128(t1
, k0
);
331 t2
= _mm_xor_si128(t2
, k0
);
332 t3
= _mm_xor_si128(t3
, k0
);
333 t4
= _mm_xor_si128(t4
, k0
);
335 t1
= _mm_aesdec_si128(t1
, k1
);
336 t2
= _mm_aesdec_si128(t2
, k1
);
337 t3
= _mm_aesdec_si128(t3
, k1
);
338 t4
= _mm_aesdec_si128(t4
, k1
);
339 t1
= _mm_aesdec_si128(t1
, k2
);
340 t2
= _mm_aesdec_si128(t2
, k2
);
341 t3
= _mm_aesdec_si128(t3
, k2
);
342 t4
= _mm_aesdec_si128(t4
, k2
);
343 t1
= _mm_aesdec_si128(t1
, k3
);
344 t2
= _mm_aesdec_si128(t2
, k3
);
345 t3
= _mm_aesdec_si128(t3
, k3
);
346 t4
= _mm_aesdec_si128(t4
, k3
);
347 t1
= _mm_aesdec_si128(t1
, k4
);
348 t2
= _mm_aesdec_si128(t2
, k4
);
349 t3
= _mm_aesdec_si128(t3
, k4
);
350 t4
= _mm_aesdec_si128(t4
, k4
);
351 t1
= _mm_aesdec_si128(t1
, k5
);
352 t2
= _mm_aesdec_si128(t2
, k5
);
353 t3
= _mm_aesdec_si128(t3
, k5
);
354 t4
= _mm_aesdec_si128(t4
, k5
);
355 t1
= _mm_aesdec_si128(t1
, k6
);
356 t2
= _mm_aesdec_si128(t2
, k6
);
357 t3
= _mm_aesdec_si128(t3
, k6
);
358 t4
= _mm_aesdec_si128(t4
, k6
);
359 t1
= _mm_aesdec_si128(t1
, k7
);
360 t2
= _mm_aesdec_si128(t2
, k7
);
361 t3
= _mm_aesdec_si128(t3
, k7
);
362 t4
= _mm_aesdec_si128(t4
, k7
);
363 t1
= _mm_aesdec_si128(t1
, k8
);
364 t2
= _mm_aesdec_si128(t2
, k8
);
365 t3
= _mm_aesdec_si128(t3
, k8
);
366 t4
= _mm_aesdec_si128(t4
, k8
);
367 t1
= _mm_aesdec_si128(t1
, k9
);
368 t2
= _mm_aesdec_si128(t2
, k9
);
369 t3
= _mm_aesdec_si128(t3
, k9
);
370 t4
= _mm_aesdec_si128(t4
, k9
);
371 t1
= _mm_aesdec_si128(t1
, k10
);
372 t2
= _mm_aesdec_si128(t2
, k10
);
373 t3
= _mm_aesdec_si128(t3
, k10
);
374 t4
= _mm_aesdec_si128(t4
, k10
);
375 t1
= _mm_aesdec_si128(t1
, k11
);
376 t2
= _mm_aesdec_si128(t2
, k11
);
377 t3
= _mm_aesdec_si128(t3
, k11
);
378 t4
= _mm_aesdec_si128(t4
, k11
);
380 t1
= _mm_aesdeclast_si128(t1
, k12
);
381 t2
= _mm_aesdeclast_si128(t2
, k12
);
382 t3
= _mm_aesdeclast_si128(t3
, k12
);
383 t4
= _mm_aesdeclast_si128(t4
, k12
);
384 t1
= _mm_xor_si128(t1
, f1
);
385 t2
= _mm_xor_si128(t2
, f2
);
386 t3
= _mm_xor_si128(t3
, f3
);
387 t4
= _mm_xor_si128(t4
, f4
);
388 _mm_storeu_si128(bo
+ i
+ 0, t1
);
389 _mm_storeu_si128(bo
+ i
+ 1, t2
);
390 _mm_storeu_si128(bo
+ i
+ 2, t3
);
391 _mm_storeu_si128(bo
+ i
+ 3, t4
);
395 for (i
= pblocks
; i
< blocks
; i
++)
397 last
= _mm_loadu_si128(bi
+ i
);
398 t1
= _mm_xor_si128(last
, k0
);
400 t1
= _mm_aesdec_si128(t1
, k1
);
401 t1
= _mm_aesdec_si128(t1
, k2
);
402 t1
= _mm_aesdec_si128(t1
, k3
);
403 t1
= _mm_aesdec_si128(t1
, k4
);
404 t1
= _mm_aesdec_si128(t1
, k5
);
405 t1
= _mm_aesdec_si128(t1
, k6
);
406 t1
= _mm_aesdec_si128(t1
, k7
);
407 t1
= _mm_aesdec_si128(t1
, k8
);
408 t1
= _mm_aesdec_si128(t1
, k9
);
409 t1
= _mm_aesdec_si128(t1
, k10
);
410 t1
= _mm_aesdec_si128(t1
, k11
);
412 t1
= _mm_aesdeclast_si128(t1
, k12
);
413 t1
= _mm_xor_si128(t1
, f1
);
414 _mm_storeu_si128(bo
+ i
, t1
);
420 * AES-256 CBC encryption
422 static void encrypt_cbc256(aesni_key_t
*key
, u_int blocks
, u_char
*in
,
423 u_char
*iv
, u_char
*out
)
425 __m128i k0
, k1
, k2
, k3
, k4
, k5
, k6
, k7
, k8
, k9
, k10
, k11
, k12
, k13
, k14
;
426 __m128i t
, fb
, *bi
, *bo
;
429 k0
= key
->schedule
[0];
430 k1
= key
->schedule
[1];
431 k2
= key
->schedule
[2];
432 k3
= key
->schedule
[3];
433 k4
= key
->schedule
[4];
434 k5
= key
->schedule
[5];
435 k6
= key
->schedule
[6];
436 k7
= key
->schedule
[7];
437 k8
= key
->schedule
[8];
438 k9
= key
->schedule
[9];
439 k10
= key
->schedule
[10];
440 k11
= key
->schedule
[11];
441 k12
= key
->schedule
[12];
442 k13
= key
->schedule
[13];
443 k14
= key
->schedule
[14];
448 fb
= _mm_loadu_si128((__m128i
*)iv
);
449 for (i
= 0; i
< blocks
; i
++)
451 t
= _mm_loadu_si128(bi
+ i
);
452 fb
= _mm_xor_si128(t
, fb
);
453 fb
= _mm_xor_si128(fb
, k0
);
455 fb
= _mm_aesenc_si128(fb
, k1
);
456 fb
= _mm_aesenc_si128(fb
, k2
);
457 fb
= _mm_aesenc_si128(fb
, k3
);
458 fb
= _mm_aesenc_si128(fb
, k4
);
459 fb
= _mm_aesenc_si128(fb
, k5
);
460 fb
= _mm_aesenc_si128(fb
, k6
);
461 fb
= _mm_aesenc_si128(fb
, k7
);
462 fb
= _mm_aesenc_si128(fb
, k8
);
463 fb
= _mm_aesenc_si128(fb
, k9
);
464 fb
= _mm_aesenc_si128(fb
, k10
);
465 fb
= _mm_aesenc_si128(fb
, k11
);
466 fb
= _mm_aesenc_si128(fb
, k12
);
467 fb
= _mm_aesenc_si128(fb
, k13
);
469 fb
= _mm_aesenclast_si128(fb
, k14
);
470 _mm_storeu_si128(bo
+ i
, fb
);
475 * AES-256 CBC decryption
477 static void decrypt_cbc256(aesni_key_t
*key
, u_int blocks
, u_char
*in
,
478 u_char
*iv
, u_char
*out
)
480 __m128i k0
, k1
, k2
, k3
, k4
, k5
, k6
, k7
, k8
, k9
, k10
, k11
, k12
, k13
, k14
;
481 __m128i last
, *bi
, *bo
;
482 __m128i t1
, t2
, t3
, t4
;
483 __m128i f1
, f2
, f3
, f4
;
486 k0
= key
->schedule
[0];
487 k1
= key
->schedule
[1];
488 k2
= key
->schedule
[2];
489 k3
= key
->schedule
[3];
490 k4
= key
->schedule
[4];
491 k5
= key
->schedule
[5];
492 k6
= key
->schedule
[6];
493 k7
= key
->schedule
[7];
494 k8
= key
->schedule
[8];
495 k9
= key
->schedule
[9];
496 k10
= key
->schedule
[10];
497 k11
= key
->schedule
[11];
498 k12
= key
->schedule
[12];
499 k13
= key
->schedule
[13];
500 k14
= key
->schedule
[14];
504 pblocks
= blocks
- (blocks
% CBC_DECRYPT_PARALLELISM
);
506 f1
= _mm_loadu_si128((__m128i
*)iv
);
508 for (i
= 0; i
< pblocks
; i
+= CBC_DECRYPT_PARALLELISM
)
510 t1
= _mm_loadu_si128(bi
+ i
+ 0);
511 t2
= _mm_loadu_si128(bi
+ i
+ 1);
512 t3
= _mm_loadu_si128(bi
+ i
+ 2);
513 t4
= _mm_loadu_si128(bi
+ i
+ 3);
520 t1
= _mm_xor_si128(t1
, k0
);
521 t2
= _mm_xor_si128(t2
, k0
);
522 t3
= _mm_xor_si128(t3
, k0
);
523 t4
= _mm_xor_si128(t4
, k0
);
525 t1
= _mm_aesdec_si128(t1
, k1
);
526 t2
= _mm_aesdec_si128(t2
, k1
);
527 t3
= _mm_aesdec_si128(t3
, k1
);
528 t4
= _mm_aesdec_si128(t4
, k1
);
529 t1
= _mm_aesdec_si128(t1
, k2
);
530 t2
= _mm_aesdec_si128(t2
, k2
);
531 t3
= _mm_aesdec_si128(t3
, k2
);
532 t4
= _mm_aesdec_si128(t4
, k2
);
533 t1
= _mm_aesdec_si128(t1
, k3
);
534 t2
= _mm_aesdec_si128(t2
, k3
);
535 t3
= _mm_aesdec_si128(t3
, k3
);
536 t4
= _mm_aesdec_si128(t4
, k3
);
537 t1
= _mm_aesdec_si128(t1
, k4
);
538 t2
= _mm_aesdec_si128(t2
, k4
);
539 t3
= _mm_aesdec_si128(t3
, k4
);
540 t4
= _mm_aesdec_si128(t4
, k4
);
541 t1
= _mm_aesdec_si128(t1
, k5
);
542 t2
= _mm_aesdec_si128(t2
, k5
);
543 t3
= _mm_aesdec_si128(t3
, k5
);
544 t4
= _mm_aesdec_si128(t4
, k5
);
545 t1
= _mm_aesdec_si128(t1
, k6
);
546 t2
= _mm_aesdec_si128(t2
, k6
);
547 t3
= _mm_aesdec_si128(t3
, k6
);
548 t4
= _mm_aesdec_si128(t4
, k6
);
549 t1
= _mm_aesdec_si128(t1
, k7
);
550 t2
= _mm_aesdec_si128(t2
, k7
);
551 t3
= _mm_aesdec_si128(t3
, k7
);
552 t4
= _mm_aesdec_si128(t4
, k7
);
553 t1
= _mm_aesdec_si128(t1
, k8
);
554 t2
= _mm_aesdec_si128(t2
, k8
);
555 t3
= _mm_aesdec_si128(t3
, k8
);
556 t4
= _mm_aesdec_si128(t4
, k8
);
557 t1
= _mm_aesdec_si128(t1
, k9
);
558 t2
= _mm_aesdec_si128(t2
, k9
);
559 t3
= _mm_aesdec_si128(t3
, k9
);
560 t4
= _mm_aesdec_si128(t4
, k9
);
561 t1
= _mm_aesdec_si128(t1
, k10
);
562 t2
= _mm_aesdec_si128(t2
, k10
);
563 t3
= _mm_aesdec_si128(t3
, k10
);
564 t4
= _mm_aesdec_si128(t4
, k10
);
565 t1
= _mm_aesdec_si128(t1
, k11
);
566 t2
= _mm_aesdec_si128(t2
, k11
);
567 t3
= _mm_aesdec_si128(t3
, k11
);
568 t4
= _mm_aesdec_si128(t4
, k11
);
569 t1
= _mm_aesdec_si128(t1
, k12
);
570 t2
= _mm_aesdec_si128(t2
, k12
);
571 t3
= _mm_aesdec_si128(t3
, k12
);
572 t4
= _mm_aesdec_si128(t4
, k12
);
573 t1
= _mm_aesdec_si128(t1
, k13
);
574 t2
= _mm_aesdec_si128(t2
, k13
);
575 t3
= _mm_aesdec_si128(t3
, k13
);
576 t4
= _mm_aesdec_si128(t4
, k13
);
578 t1
= _mm_aesdeclast_si128(t1
, k14
);
579 t2
= _mm_aesdeclast_si128(t2
, k14
);
580 t3
= _mm_aesdeclast_si128(t3
, k14
);
581 t4
= _mm_aesdeclast_si128(t4
, k14
);
582 t1
= _mm_xor_si128(t1
, f1
);
583 t2
= _mm_xor_si128(t2
, f2
);
584 t3
= _mm_xor_si128(t3
, f3
);
585 t4
= _mm_xor_si128(t4
, f4
);
586 _mm_storeu_si128(bo
+ i
+ 0, t1
);
587 _mm_storeu_si128(bo
+ i
+ 1, t2
);
588 _mm_storeu_si128(bo
+ i
+ 2, t3
);
589 _mm_storeu_si128(bo
+ i
+ 3, t4
);
593 for (i
= pblocks
; i
< blocks
; i
++)
595 last
= _mm_loadu_si128(bi
+ i
);
596 t1
= _mm_xor_si128(last
, k0
);
598 t1
= _mm_aesdec_si128(t1
, k1
);
599 t1
= _mm_aesdec_si128(t1
, k2
);
600 t1
= _mm_aesdec_si128(t1
, k3
);
601 t1
= _mm_aesdec_si128(t1
, k4
);
602 t1
= _mm_aesdec_si128(t1
, k5
);
603 t1
= _mm_aesdec_si128(t1
, k6
);
604 t1
= _mm_aesdec_si128(t1
, k7
);
605 t1
= _mm_aesdec_si128(t1
, k8
);
606 t1
= _mm_aesdec_si128(t1
, k9
);
607 t1
= _mm_aesdec_si128(t1
, k10
);
608 t1
= _mm_aesdec_si128(t1
, k11
);
609 t1
= _mm_aesdec_si128(t1
, k12
);
610 t1
= _mm_aesdec_si128(t1
, k13
);
612 t1
= _mm_aesdeclast_si128(t1
, k14
);
613 t1
= _mm_xor_si128(t1
, f1
);
614 _mm_storeu_si128(bo
+ i
, t1
);
620 * Do inline or allocated de/encryption using key schedule
622 static bool crypt(aesni_cbc_fn_t fn
, aesni_key_t
*key
,
623 chunk_t data
, chunk_t iv
, chunk_t
*out
)
627 if (!key
|| iv
.len
!= AES_BLOCK_SIZE
|| data
.len
% AES_BLOCK_SIZE
)
633 *out
= chunk_alloc(data
.len
);
640 fn(key
, data
.len
/ AES_BLOCK_SIZE
, data
.ptr
, iv
.ptr
, buf
);
644 METHOD(crypter_t
, encrypt
, bool,
645 private_aesni_cbc_t
*this, chunk_t data
, chunk_t iv
, chunk_t
*encrypted
)
647 return crypt(this->encrypt
, this->ekey
, data
, iv
, encrypted
);
650 METHOD(crypter_t
, decrypt
, bool,
651 private_aesni_cbc_t
*this, chunk_t data
, chunk_t iv
, chunk_t
*decrypted
)
653 return crypt(this->decrypt
, this->dkey
, data
, iv
, decrypted
);
656 METHOD(crypter_t
, get_block_size
, size_t,
657 private_aesni_cbc_t
*this)
659 return AES_BLOCK_SIZE
;
662 METHOD(crypter_t
, get_iv_size
, size_t,
663 private_aesni_cbc_t
*this)
665 return AES_BLOCK_SIZE
;
668 METHOD(crypter_t
, get_key_size
, size_t,
669 private_aesni_cbc_t
*this)
671 return this->key_size
;
674 METHOD(crypter_t
, set_key
, bool,
675 private_aesni_cbc_t
*this, chunk_t key
)
677 if (key
.len
!= this->key_size
)
682 DESTROY_IF(this->ekey
);
683 DESTROY_IF(this->dkey
);
685 this->ekey
= aesni_key_create(TRUE
, key
);
686 this->dkey
= aesni_key_create(FALSE
, key
);
688 return this->ekey
&& this->dkey
;
691 METHOD(crypter_t
, destroy
, void,
692 private_aesni_cbc_t
*this)
694 DESTROY_IF(this->ekey
);
695 DESTROY_IF(this->dkey
);
702 aesni_cbc_t
*aesni_cbc_create(encryption_algorithm_t algo
, size_t key_size
)
704 private_aesni_cbc_t
*this;
706 if (algo
!= ENCR_AES_CBC
)
728 .get_block_size
= _get_block_size
,
729 .get_iv_size
= _get_iv_size
,
730 .get_key_size
= _get_key_size
,
735 .key_size
= key_size
,
741 this->encrypt
= encrypt_cbc128
;
742 this->decrypt
= decrypt_cbc128
;
745 this->encrypt
= encrypt_cbc192
;
746 this->decrypt
= decrypt_cbc192
;
749 this->encrypt
= encrypt_cbc256
;
750 this->decrypt
= decrypt_cbc256
;
754 return &this->public;