1 From bcdbd313c0e6fd630a8945fd58dc5383631dc6dd Mon Sep 17 00:00:00 2001
2 From: Timothy McCaffrey <timothy.mccaffrey@unisys.com>
3 Date: Tue, 13 Jan 2015 13:16:43 -0500
4 Subject: [PATCH] crypto: aesni - Add support for 192 & 256 bit keys to AESNI
7 These patches fix the RFC4106 implementation in the aesni-intel
8 module so it supports 192 & 256 bit keys.
10 Since the AVX support that was added to this module also only
11 supports 128 bit keys, and this patch only affects the SSE
12 implementation, changes were also made to use the SSE version
13 if key sizes other than 128 are specified.
15 RFC4106 specifies that 192 & 256 bit keys must be supported (section
18 Also, this should fix Strongswan issue 341 where the aesni module
19 needs to be unloaded if 256 bit keys are used:
21 http://wiki.strongswan.org/issues/341
23 This patch has been tested with Sandy Bridge and Haswell processors.
24 With 128 bit keys and input buffers > 512 bytes a slight performance
25 degradation was noticed (~1%). For input buffers of less than 512
26 bytes there was no performance impact. Compared to 128 bit keys,
27 256 bit key size performance is approx. .5 cycles per byte slower
28 on Sandy Bridge, and .37 cycles per byte slower on Haswell (vs.
31 This patch has also been tested with StrongSwan IPSec connections
32 where it worked correctly.
34 I created this diff from a git clone of crypto-2.6.git.
36 Any questions, please feel free to contact me.
38 Signed-off-by: Timothy McCaffrey <timothy.mccaffrey@unisys.com>
39 Signed-off-by: Jarod Wilson <jarod@redhat.com>
40 Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
42 arch/x86/crypto/aesni-intel_asm.S | 342 +++++++++++++++++++------------------
43 arch/x86/crypto/aesni-intel_glue.c | 31 +++-
44 2 files changed, 202 insertions(+), 171 deletions(-)
46 diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
47 index c92c7d8..f5cdfbf 100644
48 --- a/arch/x86/crypto/aesni-intel_asm.S
49 +++ b/arch/x86/crypto/aesni-intel_asm.S
52 #include <asm/alternative-asm.h>
55 + * The following macros are used to move an (un)aligned 16 byte value to/from
56 + * an XMM register. This can done for either FP or integer values, for FP use
57 + * movaps (move aligned packed single) or integer use movdqa (move double quad
58 + * aligned). It doesn't make a performance difference which instruction is used
59 + * since Nehalem (original Core i7) was released. However, the movaps is a byte
60 + * shorter, so that is the one we'll use for now. (same for unaligned).
62 +#define MOVADQ movaps
63 +#define MOVUDQ movups
69 .Lgf128mul_x_ble_mask:
70 .octa 0x00000000000000010000000000000087
72 POLY: .octa 0xC2000000000000000000000000000001
73 TWOONE: .octa 0x00000001000000000000000000000001
75 @@ -90,6 +101,7 @@ enc: .octa 0x2
76 #define arg8 STACK_OFFSET+16(%r14)
77 #define arg9 STACK_OFFSET+24(%r14)
78 #define arg10 STACK_OFFSET+32(%r14)
79 +#define keysize 2*15*16(%arg1)
83 @@ -214,10 +226,12 @@ enc: .octa 0x2
85 .macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
86 XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
87 + MOVADQ SHUF_MASK(%rip), %xmm14
88 mov arg7, %r10 # %r10 = AAD
89 mov arg8, %r15 # %r15 = aadLen
93 _get_AAD_loop\num_initial_blocks\operation:
96 @@ -226,6 +240,7 @@ _get_AAD_loop\num_initial_blocks\operation:
99 jne _get_AAD_loop\num_initial_blocks\operation
102 je _get_AAD_loop2_done\num_initial_blocks\operation
104 @@ -234,8 +249,8 @@ _get_AAD_loop2\num_initial_blocks\operation:
107 jne _get_AAD_loop2\num_initial_blocks\operation
109 _get_AAD_loop2_done\num_initial_blocks\operation:
110 - movdqa SHUF_MASK(%rip), %xmm14
111 PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
113 xor %r11, %r11 # initialise the data pointer offset as zero
114 @@ -244,59 +259,34 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
116 mov %arg5, %rax # %rax = *Y0
117 movdqu (%rax), \XMM0 # XMM0 = Y0
118 - movdqa SHUF_MASK(%rip), %xmm14
119 PSHUFB_XMM %xmm14, \XMM0
121 .if (\i == 5) || (\i == 6) || (\i == 7)
122 + MOVADQ ONE(%RIP),\TMP1
123 + MOVADQ (%arg1),\TMP2
125 - paddd ONE(%rip), \XMM0 # INCR Y0
126 + paddd \TMP1, \XMM0 # INCR Y0
127 movdqa \XMM0, %xmm\index
128 - movdqa SHUF_MASK(%rip), %xmm14
129 PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
133 - pxor 16*0(%arg1), %xmm\index
136 - movaps 0x10(%rdi), \TMP1
137 - AESENC \TMP1, %xmm\index # Round 1
140 - movaps 0x20(%arg1), \TMP1
141 - AESENC \TMP1, %xmm\index # Round 2
142 + pxor \TMP2, %xmm\index
145 - movaps 0x30(%arg1), \TMP1
146 - AESENC \TMP1, %xmm\index # Round 2
149 - movaps 0x40(%arg1), \TMP1
150 - AESENC \TMP1, %xmm\index # Round 2
153 - movaps 0x50(%arg1), \TMP1
154 - AESENC \TMP1, %xmm\index # Round 2
157 - movaps 0x60(%arg1), \TMP1
158 - AESENC \TMP1, %xmm\index # Round 2
161 - movaps 0x70(%arg1), \TMP1
162 - AESENC \TMP1, %xmm\index # Round 2
165 - movaps 0x80(%arg1), \TMP1
166 - AESENC \TMP1, %xmm\index # Round 2
169 - movaps 0x90(%arg1), \TMP1
170 - AESENC \TMP1, %xmm\index # Round 2
171 + lea 0x10(%arg1),%r10
173 + shr $2,%eax # 128->4, 192->6, 256->8
174 + add $5,%eax # 128->9, 192->11, 256->13
176 +aes_loop_initial_dec\num_initial_blocks:
177 + MOVADQ (%r10),\TMP1
179 + AESENC \TMP1, %xmm\index
183 + jnz aes_loop_initial_dec\num_initial_blocks
185 + MOVADQ (%r10), \TMP1
187 - movaps 0xa0(%arg1), \TMP1
188 - AESENCLAST \TMP1, %xmm\index # Round 10
189 + AESENCLAST \TMP1, %xmm\index # Last Round
192 movdqu (%arg3 , %r11, 1), \TMP1
193 @@ -306,10 +296,8 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
196 movdqa \TMP1, %xmm\index
197 - movdqa SHUF_MASK(%rip), %xmm14
198 PSHUFB_XMM %xmm14, %xmm\index
200 - # prepare plaintext/ciphertext for GHASH computation
201 + # prepare plaintext/ciphertext for GHASH computation
204 GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
205 @@ -339,30 +327,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
206 * Precomputations for HashKey parallel with encryption of first 4 blocks.
207 * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
209 - paddd ONE(%rip), \XMM0 # INCR Y0
210 - movdqa \XMM0, \XMM1
211 - movdqa SHUF_MASK(%rip), %xmm14
212 + MOVADQ ONE(%rip), \TMP1
213 + paddd \TMP1, \XMM0 # INCR Y0
214 + MOVADQ \XMM0, \XMM1
215 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
217 - paddd ONE(%rip), \XMM0 # INCR Y0
218 - movdqa \XMM0, \XMM2
219 - movdqa SHUF_MASK(%rip), %xmm14
220 + paddd \TMP1, \XMM0 # INCR Y0
221 + MOVADQ \XMM0, \XMM2
222 PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
224 - paddd ONE(%rip), \XMM0 # INCR Y0
225 - movdqa \XMM0, \XMM3
226 - movdqa SHUF_MASK(%rip), %xmm14
227 + paddd \TMP1, \XMM0 # INCR Y0
228 + MOVADQ \XMM0, \XMM3
229 PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
231 - paddd ONE(%rip), \XMM0 # INCR Y0
232 - movdqa \XMM0, \XMM4
233 - movdqa SHUF_MASK(%rip), %xmm14
234 + paddd \TMP1, \XMM0 # INCR Y0
235 + MOVADQ \XMM0, \XMM4
236 PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
238 - pxor 16*0(%arg1), \XMM1
239 - pxor 16*0(%arg1), \XMM2
240 - pxor 16*0(%arg1), \XMM3
241 - pxor 16*0(%arg1), \XMM4
242 + MOVADQ 0(%arg1),\TMP1
248 pshufd $78, \TMP3, \TMP1
250 @@ -400,7 +386,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
251 pshufd $78, \TMP5, \TMP1
253 movdqa \TMP1, HashKey_4_k(%rsp)
254 - movaps 0xa0(%arg1), \TMP2
255 + lea 0xa0(%arg1),%r10
257 + shr $2,%eax # 128->4, 192->6, 256->8
258 + sub $4,%eax # 128->0, 192->2, 256->4
259 + jz aes_loop_pre_dec_done\num_initial_blocks
261 +aes_loop_pre_dec\num_initial_blocks:
262 + MOVADQ (%r10),\TMP2
264 + AESENC \TMP2, %xmm\index
268 + jnz aes_loop_pre_dec\num_initial_blocks
270 +aes_loop_pre_dec_done\num_initial_blocks:
271 + MOVADQ (%r10), \TMP2
272 AESENCLAST \TMP2, \XMM1
273 AESENCLAST \TMP2, \XMM2
274 AESENCLAST \TMP2, \XMM3
275 @@ -422,15 +424,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
276 movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
279 - movdqa SHUF_MASK(%rip), %xmm14
280 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
282 # combine GHASHed value with the corresponding ciphertext
283 - movdqa SHUF_MASK(%rip), %xmm14
284 PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
285 - movdqa SHUF_MASK(%rip), %xmm14
286 PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
287 - movdqa SHUF_MASK(%rip), %xmm14
288 PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
290 _initial_blocks_done\num_initial_blocks\operation:
291 @@ -452,6 +450,7 @@ _initial_blocks_done\num_initial_blocks\operation:
293 .macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
294 XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
295 + MOVADQ SHUF_MASK(%rip), %xmm14
296 mov arg7, %r10 # %r10 = AAD
297 mov arg8, %r15 # %r15 = aadLen
299 @@ -473,7 +472,6 @@ _get_AAD_loop2\num_initial_blocks\operation:
301 jne _get_AAD_loop2\num_initial_blocks\operation
302 _get_AAD_loop2_done\num_initial_blocks\operation:
303 - movdqa SHUF_MASK(%rip), %xmm14
304 PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data
306 xor %r11, %r11 # initialise the data pointer offset as zero
307 @@ -482,59 +480,35 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
309 mov %arg5, %rax # %rax = *Y0
310 movdqu (%rax), \XMM0 # XMM0 = Y0
311 - movdqa SHUF_MASK(%rip), %xmm14
312 PSHUFB_XMM %xmm14, \XMM0
314 .if (\i == 5) || (\i == 6) || (\i == 7)
316 - paddd ONE(%rip), \XMM0 # INCR Y0
317 - movdqa \XMM0, %xmm\index
318 - movdqa SHUF_MASK(%rip), %xmm14
319 - PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
323 - pxor 16*0(%arg1), %xmm\index
326 - movaps 0x10(%rdi), \TMP1
327 - AESENC \TMP1, %xmm\index # Round 1
330 - movaps 0x20(%arg1), \TMP1
331 - AESENC \TMP1, %xmm\index # Round 2
333 + MOVADQ ONE(%RIP),\TMP1
334 + MOVADQ 0(%arg1),\TMP2
336 - movaps 0x30(%arg1), \TMP1
337 - AESENC \TMP1, %xmm\index # Round 2
338 + paddd \TMP1, \XMM0 # INCR Y0
339 + MOVADQ \XMM0, %xmm\index
340 + PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap
341 + pxor \TMP2, %xmm\index
344 - movaps 0x40(%arg1), \TMP1
345 - AESENC \TMP1, %xmm\index # Round 2
348 - movaps 0x50(%arg1), \TMP1
349 - AESENC \TMP1, %xmm\index # Round 2
352 - movaps 0x60(%arg1), \TMP1
353 - AESENC \TMP1, %xmm\index # Round 2
356 - movaps 0x70(%arg1), \TMP1
357 - AESENC \TMP1, %xmm\index # Round 2
360 - movaps 0x80(%arg1), \TMP1
361 - AESENC \TMP1, %xmm\index # Round 2
364 - movaps 0x90(%arg1), \TMP1
365 - AESENC \TMP1, %xmm\index # Round 2
366 + lea 0x10(%arg1),%r10
368 + shr $2,%eax # 128->4, 192->6, 256->8
369 + add $5,%eax # 128->9, 192->11, 256->13
371 +aes_loop_initial_enc\num_initial_blocks:
372 + MOVADQ (%r10),\TMP1
374 + AESENC \TMP1, %xmm\index
378 + jnz aes_loop_initial_enc\num_initial_blocks
380 + MOVADQ (%r10), \TMP1
382 - movaps 0xa0(%arg1), \TMP1
383 - AESENCLAST \TMP1, %xmm\index # Round 10
384 + AESENCLAST \TMP1, %xmm\index # Last Round
387 movdqu (%arg3 , %r11, 1), \TMP1
388 @@ -542,8 +516,6 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
389 movdqu %xmm\index, (%arg2 , %r11, 1)
390 # write back plaintext/ciphertext for num_initial_blocks
393 - movdqa SHUF_MASK(%rip), %xmm14
394 PSHUFB_XMM %xmm14, %xmm\index
396 # prepare plaintext/ciphertext for GHASH computation
397 @@ -576,30 +548,28 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
398 * Precomputations for HashKey parallel with encryption of first 4 blocks.
399 * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
401 - paddd ONE(%rip), \XMM0 # INCR Y0
402 - movdqa \XMM0, \XMM1
403 - movdqa SHUF_MASK(%rip), %xmm14
404 + MOVADQ ONE(%RIP),\TMP1
405 + paddd \TMP1, \XMM0 # INCR Y0
406 + MOVADQ \XMM0, \XMM1
407 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
409 - paddd ONE(%rip), \XMM0 # INCR Y0
410 - movdqa \XMM0, \XMM2
411 - movdqa SHUF_MASK(%rip), %xmm14
412 + paddd \TMP1, \XMM0 # INCR Y0
413 + MOVADQ \XMM0, \XMM2
414 PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
416 - paddd ONE(%rip), \XMM0 # INCR Y0
417 - movdqa \XMM0, \XMM3
418 - movdqa SHUF_MASK(%rip), %xmm14
419 + paddd \TMP1, \XMM0 # INCR Y0
420 + MOVADQ \XMM0, \XMM3
421 PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
423 - paddd ONE(%rip), \XMM0 # INCR Y0
424 - movdqa \XMM0, \XMM4
425 - movdqa SHUF_MASK(%rip), %xmm14
426 + paddd \TMP1, \XMM0 # INCR Y0
427 + MOVADQ \XMM0, \XMM4
428 PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
430 - pxor 16*0(%arg1), \XMM1
431 - pxor 16*0(%arg1), \XMM2
432 - pxor 16*0(%arg1), \XMM3
433 - pxor 16*0(%arg1), \XMM4
434 + MOVADQ 0(%arg1),\TMP1
440 pshufd $78, \TMP3, \TMP1
442 @@ -637,7 +607,23 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
443 pshufd $78, \TMP5, \TMP1
445 movdqa \TMP1, HashKey_4_k(%rsp)
446 - movaps 0xa0(%arg1), \TMP2
447 + lea 0xa0(%arg1),%r10
449 + shr $2,%eax # 128->4, 192->6, 256->8
450 + sub $4,%eax # 128->0, 192->2, 256->4
451 + jz aes_loop_pre_enc_done\num_initial_blocks
453 +aes_loop_pre_enc\num_initial_blocks:
454 + MOVADQ (%r10),\TMP2
456 + AESENC \TMP2, %xmm\index
460 + jnz aes_loop_pre_enc\num_initial_blocks
462 +aes_loop_pre_enc_done\num_initial_blocks:
463 + MOVADQ (%r10), \TMP2
464 AESENCLAST \TMP2, \XMM1
465 AESENCLAST \TMP2, \XMM2
466 AESENCLAST \TMP2, \XMM3
467 @@ -656,15 +642,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation:
468 movdqu \XMM4, 16*3(%arg2 , %r11 , 1)
471 - movdqa SHUF_MASK(%rip), %xmm14
472 PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap
474 # combine GHASHed value with the corresponding ciphertext
475 - movdqa SHUF_MASK(%rip), %xmm14
476 PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap
477 - movdqa SHUF_MASK(%rip), %xmm14
478 PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap
479 - movdqa SHUF_MASK(%rip), %xmm14
480 PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap
482 _initial_blocks_done\num_initial_blocks\operation:
483 @@ -795,7 +777,23 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
486 PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
487 - movaps 0xa0(%arg1), \TMP3
488 + lea 0xa0(%arg1),%r10
490 + shr $2,%eax # 128->4, 192->6, 256->8
491 + sub $4,%eax # 128->0, 192->2, 256->4
492 + jz aes_loop_par_enc_done
495 + MOVADQ (%r10),\TMP3
497 + AESENC \TMP3, %xmm\index
501 + jnz aes_loop_par_enc
503 +aes_loop_par_enc_done:
504 + MOVADQ (%r10), \TMP3
505 AESENCLAST \TMP3, \XMM1 # Round 10
506 AESENCLAST \TMP3, \XMM2
507 AESENCLAST \TMP3, \XMM3
508 @@ -987,8 +985,24 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
511 PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0
512 - movaps 0xa0(%arg1), \TMP3
513 - AESENCLAST \TMP3, \XMM1 # Round 10
514 + lea 0xa0(%arg1),%r10
516 + shr $2,%eax # 128->4, 192->6, 256->8
517 + sub $4,%eax # 128->0, 192->2, 256->4
518 + jz aes_loop_par_dec_done
521 + MOVADQ (%r10),\TMP3
523 + AESENC \TMP3, %xmm\index
527 + jnz aes_loop_par_dec
529 +aes_loop_par_dec_done:
530 + MOVADQ (%r10), \TMP3
531 + AESENCLAST \TMP3, \XMM1 # last round
532 AESENCLAST \TMP3, \XMM2
533 AESENCLAST \TMP3, \XMM3
534 AESENCLAST \TMP3, \XMM4
535 @@ -1156,33 +1170,29 @@ TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
536 pxor \TMP6, \XMMDst # reduced result is in XMMDst
539 -/* Encryption of a single block done*/
540 -.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
542 - pxor (%arg1), \XMM0
543 - movaps 16(%arg1), \TMP1
544 - AESENC \TMP1, \XMM0
545 - movaps 32(%arg1), \TMP1
546 - AESENC \TMP1, \XMM0
547 - movaps 48(%arg1), \TMP1
548 - AESENC \TMP1, \XMM0
549 - movaps 64(%arg1), \TMP1
550 - AESENC \TMP1, \XMM0
551 - movaps 80(%arg1), \TMP1
552 - AESENC \TMP1, \XMM0
553 - movaps 96(%arg1), \TMP1
554 - AESENC \TMP1, \XMM0
555 - movaps 112(%arg1), \TMP1
556 - AESENC \TMP1, \XMM0
557 - movaps 128(%arg1), \TMP1
558 - AESENC \TMP1, \XMM0
559 - movaps 144(%arg1), \TMP1
560 - AESENC \TMP1, \XMM0
561 - movaps 160(%arg1), \TMP1
562 - AESENCLAST \TMP1, \XMM0
564 +/* Encryption of a single block
568 +.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
570 + pxor (%arg1), \XMM0
572 + shr $2,%eax # 128->4, 192->6, 256->8
573 + add $5,%eax # 128->9, 192->11, 256->13
574 + lea 16(%arg1), %r10 # get first expanded key address
577 + MOVADQ (%r10),\TMP1
583 + MOVADQ (%r10),\TMP1
584 + AESENCLAST \TMP1,\XMM0
586 /*****************************************************************************
587 * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
588 * u8 *out, // Plaintext output. Encrypt in-place is allowed.
589 diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
590 index 6d4faba..bfaf817 100644
591 --- a/arch/x86/crypto/aesni-intel_glue.c
592 +++ b/arch/x86/crypto/aesni-intel_glue.c
593 @@ -177,7 +177,8 @@ static void aesni_gcm_enc_avx(void *ctx, u8 *out,
594 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
595 u8 *auth_tag, unsigned long auth_tag_len)
597 - if (plaintext_len < AVX_GEN2_OPTSIZE) {
598 + struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
599 + if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)){
600 aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
601 aad_len, auth_tag, auth_tag_len);
603 @@ -192,7 +193,8 @@ static void aesni_gcm_dec_avx(void *ctx, u8 *out,
604 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
605 u8 *auth_tag, unsigned long auth_tag_len)
607 - if (ciphertext_len < AVX_GEN2_OPTSIZE) {
608 + struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
609 + if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
610 aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad,
611 aad_len, auth_tag, auth_tag_len);
613 @@ -226,7 +228,8 @@ static void aesni_gcm_enc_avx2(void *ctx, u8 *out,
614 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
615 u8 *auth_tag, unsigned long auth_tag_len)
617 - if (plaintext_len < AVX_GEN2_OPTSIZE) {
618 + struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
619 + if ((plaintext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
620 aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad,
621 aad_len, auth_tag, auth_tag_len);
622 } else if (plaintext_len < AVX_GEN4_OPTSIZE) {
623 @@ -245,7 +248,8 @@ static void aesni_gcm_dec_avx2(void *ctx, u8 *out,
624 u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
625 u8 *auth_tag, unsigned long auth_tag_len)
627 - if (ciphertext_len < AVX_GEN2_OPTSIZE) {
628 + struct crypto_aes_ctx *aes_ctx = (struct crypto_aes_ctx*)ctx;
629 + if ((ciphertext_len < AVX_GEN2_OPTSIZE) || (aes_ctx-> key_length != AES_KEYSIZE_128)) {
630 aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey,
631 aad, aad_len, auth_tag, auth_tag_len);
632 } else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
633 @@ -878,7 +882,8 @@ static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key,
635 /*Account for 4 byte nonce at the end.*/
637 - if (key_len != AES_KEYSIZE_128) {
638 + if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
639 + key_len != AES_KEYSIZE_256) {
640 crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
643 @@ -989,6 +994,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
644 __be32 counter = cpu_to_be32(1);
645 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
646 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
647 + u32 key_len = ctx->aes_key_expanded.key_length;
648 void *aes_ctx = &(ctx->aes_key_expanded);
649 unsigned long auth_tag_len = crypto_aead_authsize(tfm);
650 u8 iv_tab[16+AESNI_ALIGN];
651 @@ -1003,6 +1009,13 @@ static int __driver_rfc4106_encrypt(struct aead_request *req)
652 /* to 8 or 12 bytes */
653 if (unlikely(req->assoclen != 8 && req->assoclen != 12))
655 + if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16))
657 + if (unlikely(key_len != AES_KEYSIZE_128 &&
658 + key_len != AES_KEYSIZE_192 &&
659 + key_len != AES_KEYSIZE_256))
663 for (i = 0; i < 4; i++)
664 *(iv+i) = ctx->nonce[i];
665 @@ -1067,6 +1080,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
667 struct crypto_aead *tfm = crypto_aead_reqtfm(req);
668 struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm);
669 + u32 key_len = ctx->aes_key_expanded.key_length;
670 void *aes_ctx = &(ctx->aes_key_expanded);
671 unsigned long auth_tag_len = crypto_aead_authsize(tfm);
672 u8 iv_and_authTag[32+AESNI_ALIGN];
673 @@ -1080,6 +1094,13 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
674 if (unlikely((req->cryptlen < auth_tag_len) ||
675 (req->assoclen != 8 && req->assoclen != 12)))
677 + if (unlikely(auth_tag_len != 8 && auth_tag_len != 12 && auth_tag_len != 16))
679 + if (unlikely(key_len != AES_KEYSIZE_128 &&
680 + key_len != AES_KEYSIZE_192 &&
681 + key_len != AES_KEYSIZE_256))
684 /* Assuming we are supporting rfc4106 64-bit extended */
685 /* sequence numbers We need to have the AAD length */
686 /* equal to 8 or 12 bytes */