]> git.ipfire.org Git - thirdparty/openssl.git/blob - engines/e_padlock.c
Run util/openssl-format-source -v -c .
[thirdparty/openssl.git] / engines / e_padlock.c
1 /*-
2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
5 *
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
8 * valuable work on this engine!
9 */
10
11 /* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 *
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
24 * distribution.
25 *
26 * 3. All advertising materials mentioning features or use of this
27 * software must display the following acknowledgment:
28 * "This product includes software developed by the OpenSSL Project
29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
30 *
31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 * endorse or promote products derived from this software without
33 * prior written permission. For written permission, please contact
34 * licensing@OpenSSL.org.
35 *
36 * 5. Products derived from this software may not be called "OpenSSL"
37 * nor may "OpenSSL" appear in their names without prior written
38 * permission of the OpenSSL Project.
39 *
40 * 6. Redistributions of any form whatsoever must retain the following
41 * acknowledgment:
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 * OF THE POSSIBILITY OF SUCH DAMAGE.
57 * ====================================================================
58 *
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
62 *
63 */
64
65 #include <stdio.h>
66 #include <string.h>
67
68 #include <openssl/opensslconf.h>
69 #include <openssl/crypto.h>
70 #include <openssl/dso.h>
71 #include <openssl/engine.h>
72 #include <openssl/evp.h>
73 #ifndef OPENSSL_NO_AES
74 # include <openssl/aes.h>
75 #endif
76 #include <openssl/rand.h>
77 #include <openssl/err.h>
78
79 #ifndef OPENSSL_NO_HW
80 # ifndef OPENSSL_NO_HW_PADLOCK
81
82 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
83 # if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
84 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
85 # define DYNAMIC_ENGINE
86 # endif
87 # elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
88 # ifdef ENGINE_DYNAMIC_SUPPORT
89 # define DYNAMIC_ENGINE
90 # endif
91 # else
92 # error "Only OpenSSL >= 0.9.7 is supported"
93 # endif
94
95 /*
96 * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
97 * doesn't exist elsewhere, but it even can't be compiled on other platforms!
98 *
99 * In addition, because of the heavy use of inline assembler, compiler choice
100 * is limited to GCC and Microsoft C.
101 */
102 # undef COMPILE_HW_PADLOCK
103 # if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104 # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
105 (defined(_MSC_VER) && defined(_M_IX86))
106 # define COMPILE_HW_PADLOCK
107 # endif
108 # endif
109
110 # ifdef OPENSSL_NO_DYNAMIC_ENGINE
111 # ifdef COMPILE_HW_PADLOCK
112 static ENGINE *ENGINE_padlock(void);
113 # endif
114
115 void ENGINE_load_padlock(void)
116 {
117 /* On non-x86 CPUs it just returns. */
118 # ifdef COMPILE_HW_PADLOCK
119 ENGINE *toadd = ENGINE_padlock();
120 if (!toadd)
121 return;
122 ENGINE_add(toadd);
123 ENGINE_free(toadd);
124 ERR_clear_error();
125 # endif
126 }
127
128 # endif
129
130 # ifdef COMPILE_HW_PADLOCK
131 /*
132 * We do these includes here to avoid header problems on platforms that do
133 * not have the VIA padlock anyway...
134 */
135 # include <stdlib.h>
136 # ifdef _WIN32
137 # include <malloc.h>
138 # ifndef alloca
139 # define alloca _alloca
140 # endif
141 # elif defined(__GNUC__)
142 # ifndef alloca
143 # define alloca(s) __builtin_alloca(s)
144 # endif
145 # endif
146
147 /* Function for ENGINE detection and control */
148 static int padlock_available(void);
149 static int padlock_init(ENGINE *e);
150
151 /* RNG Stuff */
152 static RAND_METHOD padlock_rand;
153
154 /* Cipher Stuff */
155 # ifndef OPENSSL_NO_AES
156 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
157 const int **nids, int nid);
158 # endif
159
160 /* Engine names */
161 static const char *padlock_id = "padlock";
162 static char padlock_name[100];
163
164 /* Available features */
165 static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
166 static int padlock_use_rng = 0; /* Random Number Generator */
167 # ifndef OPENSSL_NO_AES
168 static int padlock_aes_align_required = 1;
169 # endif
170
171 /* ===== Engine "management" functions ===== */
172
173 /* Prepare the ENGINE structure for registration */
174 static int padlock_bind_helper(ENGINE *e)
175 {
176 /* Check available features */
177 padlock_available();
178
179 # if 1 /* disable RNG for now, see commentary in
180 * vicinity of RNG code */
181 padlock_use_rng = 0;
182 # endif
183
184 /* Generate a nice engine name with available features */
185 BIO_snprintf(padlock_name, sizeof(padlock_name),
186 "VIA PadLock (%s, %s)",
187 padlock_use_rng ? "RNG" : "no-RNG",
188 padlock_use_ace ? "ACE" : "no-ACE");
189
190 /* Register everything or return with an error */
191 if (!ENGINE_set_id(e, padlock_id) ||
192 !ENGINE_set_name(e, padlock_name) ||
193 !ENGINE_set_init_function(e, padlock_init) ||
194 # ifndef OPENSSL_NO_AES
195 (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) ||
196 # endif
197 (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) {
198 return 0;
199 }
200
201 /* Everything looks good */
202 return 1;
203 }
204
205 # ifdef OPENSSL_NO_DYNAMIC_ENGINE
206
207 /* Constructor */
208 static ENGINE *ENGINE_padlock(void)
209 {
210 ENGINE *eng = ENGINE_new();
211
212 if (!eng) {
213 return NULL;
214 }
215
216 if (!padlock_bind_helper(eng)) {
217 ENGINE_free(eng);
218 return NULL;
219 }
220
221 return eng;
222 }
223
224 # endif
225
226 /* Check availability of the engine */
227 static int padlock_init(ENGINE *e)
228 {
229 return (padlock_use_rng || padlock_use_ace);
230 }
231
232 /*
233 * This stuff is needed if this ENGINE is being compiled into a
234 * self-contained shared-library.
235 */
236 # ifdef DYNAMIC_ENGINE
237 static int padlock_bind_fn(ENGINE *e, const char *id)
238 {
239 if (id && (strcmp(id, padlock_id) != 0)) {
240 return 0;
241 }
242
243 if (!padlock_bind_helper(e)) {
244 return 0;
245 }
246
247 return 1;
248 }
249
250 IMPLEMENT_DYNAMIC_CHECK_FN()
251 IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn)
252 # endif /* DYNAMIC_ENGINE */
253 /* ===== Here comes the "real" engine ===== */
254 # ifndef OPENSSL_NO_AES
255 /* Some AES-related constants */
256 # define AES_BLOCK_SIZE 16
257 # define AES_KEY_SIZE_128 16
258 # define AES_KEY_SIZE_192 24
259 # define AES_KEY_SIZE_256 32
260 /*
261 * Here we store the status information relevant to the current context.
262 */
263 /*
264 * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on
265 * the order of items in this structure. Don't blindly modify, reorder,
266 * etc!
267 */
268 struct padlock_cipher_data {
269 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
270 union {
271 unsigned int pad[4];
272 struct {
273 int rounds:4;
274 int dgst:1; /* n/a in C3 */
275 int align:1; /* n/a in C3 */
276 int ciphr:1; /* n/a in C3 */
277 unsigned int keygen:1;
278 int interm:1;
279 unsigned int encdec:1;
280 int ksize:2;
281 } b;
282 } cword; /* Control word */
283 AES_KEY ks; /* Encryption key */
284 };
285
286 /*
287 * Essentially this variable belongs in thread local storage.
288 * Having this variable global on the other hand can only cause
289 * few bogus key reloads [if any at all on single-CPU system],
290 * so we accept the penatly...
291 */
292 static volatile struct padlock_cipher_data *padlock_saved_context;
293 # endif
294
295 /*-
296 * =======================================================
297 * Inline assembler section(s).
298 * =======================================================
299 * Order of arguments is chosen to facilitate Windows port
300 * using __fastcall calling convention. If you wish to add
301 * more routines, keep in mind that first __fastcall
302 * argument is passed in %ecx and second - in %edx.
303 * =======================================================
304 */
305 # if defined(__GNUC__) && __GNUC__>=2
306 /*
307 * As for excessive "push %ebx"/"pop %ebx" found all over.
308 * When generating position-independent code GCC won't let
309 * us use "b" in assembler templates nor even respect "ebx"
310 * in "clobber description." Therefore the trouble...
311 */
312
313 /*
314 * Helper function - check if a CPUID instruction is available on this CPU
315 */
316 static int padlock_insn_cpuid_available(void)
317 {
318 int result = -1;
319
320 /*
321 * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
322 * CPUID is available.
323 */
324 asm volatile ("pushf\n"
325 "popl %%eax\n"
326 "xorl $0x200000, %%eax\n"
327 "movl %%eax, %%ecx\n"
328 "andl $0x200000, %%ecx\n"
329 "pushl %%eax\n"
330 "popf\n"
331 "pushf\n"
332 "popl %%eax\n"
333 "andl $0x200000, %%eax\n"
334 "xorl %%eax, %%ecx\n"
335 "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
336
337 return (result == 0);
338 }
339
340 /*
341 * Load supported features of the CPU to see if the PadLock is available.
342 */
343 static int padlock_available(void)
344 {
345 char vendor_string[16];
346 unsigned int eax, edx;
347
348 /* First check if the CPUID instruction is available at all... */
349 if (!padlock_insn_cpuid_available())
350 return 0;
351
352 /* Are we running on the Centaur (VIA) CPU? */
353 eax = 0x00000000;
354 vendor_string[12] = 0;
355 asm volatile ("pushl %%ebx\n"
356 "cpuid\n"
357 "movl %%ebx,(%%edi)\n"
358 "movl %%edx,4(%%edi)\n"
359 "movl %%ecx,8(%%edi)\n"
360 "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx");
361 if (strcmp(vendor_string, "CentaurHauls") != 0)
362 return 0;
363
364 /* Check for Centaur Extended Feature Flags presence */
365 eax = 0xC0000000;
366 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx");
367 if (eax < 0xC0000001)
368 return 0;
369
370 /* Read the Centaur Extended Feature Flags */
371 eax = 0xC0000001;
372 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax),
373 "=d"(edx)::"ecx");
374
375 /* Fill up some flags */
376 padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
377 padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
378
379 return padlock_use_ace + padlock_use_rng;
380 }
381
382 # ifndef OPENSSL_NO_AES
383 # ifndef AES_ASM
384 /* Our own htonl()/ntohl() */
385 static inline void padlock_bswapl(AES_KEY *ks)
386 {
387 size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
388 unsigned int *key = ks->rd_key;
389
390 while (i--) {
391 asm volatile ("bswapl %0":"+r" (*key));
392 key++;
393 }
394 }
395 # endif
396 # endif
397
398 /*
399 * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
400 * stack clears EFLAGS[30] which does the trick.
401 */
402 static inline void padlock_reload_key(void)
403 {
404 asm volatile ("pushfl; popfl");
405 }
406
407 # ifndef OPENSSL_NO_AES
408 /*
409 * This is heuristic key context tracing. At first one
410 * believes that one should use atomic swap instructions,
411 * but it's not actually necessary. Point is that if
412 * padlock_saved_context was changed by another thread
413 * after we've read it and before we compare it with cdata,
414 * our key *shall* be reloaded upon thread context switch
415 * and we are therefore set in either case...
416 */
417 static inline void padlock_verify_context(struct padlock_cipher_data *cdata)
418 {
419 asm volatile ("pushfl\n"
420 " btl $30,(%%esp)\n"
421 " jnc 1f\n"
422 " cmpl %2,%1\n"
423 " je 1f\n"
424 " popfl\n"
425 " subl $4,%%esp\n"
426 "1: addl $4,%%esp\n"
427 " movl %2,%0":"+m" (padlock_saved_context)
428 :"r"(padlock_saved_context), "r"(cdata):"cc");
429 }
430
431 /* Template for padlock_xcrypt_* modes */
432 /*
433 * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
434 * of the 'padlock_cipher_data' structure.
435 */
436 # define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
437 static inline void *name(size_t cnt, \
438 struct padlock_cipher_data *cdata, \
439 void *out, const void *inp) \
440 { void *iv; \
441 asm volatile ( "pushl %%ebx\n" \
442 " leal 16(%0),%%edx\n" \
443 " leal 32(%0),%%ebx\n" \
444 rep_xcrypt "\n" \
445 " popl %%ebx" \
446 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
447 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
448 : "edx", "cc", "memory"); \
449 return iv; \
450 }
451
452 /* Generate all functions with appropriate opcodes */
453 /* rep xcryptecb */
454 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
455 /* rep xcryptcbc */
456 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
457 /* rep xcryptcfb */
458 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
459 /* rep xcryptofb */
460 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
461 # endif
462 /* The RNG call itself */
463 static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
464 {
465 unsigned int eax_out;
466
467 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
468 :"=a" (eax_out), "=m"(*(unsigned *)addr)
469 :"D"(addr), "d"(edx_in)
470 );
471
472 return eax_out;
473 }
474
475 /*
476 * Why not inline 'rep movsd'? I failed to find information on what value in
477 * Direction Flag one can expect and consequently have to apply
478 * "better-safe-than-sorry" approach and assume "undefined." I could
479 * explicitly clear it and restore the original value upon return from
480 * padlock_aes_cipher, but it's presumably too much trouble for too little
481 * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
482 * affected by the Direction Flag and pointers advance toward larger
483 * addresses unconditionally.
484 */
485 static inline unsigned char *padlock_memcpy(void *dst, const void *src,
486 size_t n)
487 {
488 long *d = dst;
489 const long *s = src;
490
491 n /= sizeof(*d);
492 do {
493 *d++ = *s++;
494 } while (--n);
495
496 return dst;
497 }
498
499 # elif defined(_MSC_VER)
500 /*
501 * Unlike GCC these are real functions. In order to minimize impact
502 * on performance we adhere to __fastcall calling convention in
503 * order to get two first arguments passed through %ecx and %edx.
504 * Which kind of suits very well, as instructions in question use
505 * both %ecx and %edx as input:-)
506 */
507 # define REP_XCRYPT(code) \
508 _asm _emit 0xf3 \
509 _asm _emit 0x0f _asm _emit 0xa7 \
510 _asm _emit code
511
512 /*
513 * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
514 * of the 'padlock_cipher_data' structure.
515 */
516 # define PADLOCK_XCRYPT_ASM(name,code) \
517 static void * __fastcall \
518 name (size_t cnt, void *cdata, \
519 void *outp, const void *inp) \
520 { _asm mov eax,edx \
521 _asm lea edx,[eax+16] \
522 _asm lea ebx,[eax+32] \
523 _asm mov edi,outp \
524 _asm mov esi,inp \
525 REP_XCRYPT(code) \
526 }
527
528 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, 0xc8)
529 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, 0xd0)
530 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, 0xe0)
531 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, 0xe8)
532
533 static int __fastcall padlock_xstore(void *outp, unsigned int code)
534 {
535 _asm mov edi, ecx
536 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0}
537 static void __fastcall padlock_reload_key(void)
538 {
539 _asm pushfd _asm popfd}
540 static void __fastcall padlock_verify_context(void *cdata)
541 {
542 _asm {
543 pushfd bt DWORD PTR[esp], 30 jnc skip cmp ecx,
544 padlock_saved_context je skip popfd sub esp,
545 4 skip:add esp, 4 mov padlock_saved_context,
546 ecx}} static int padlock_available(void)
547 {
548 _asm {
549 pushfd pop eax mov ecx, eax xor eax,
550 1 << 21 push eax popfd pushfd pop eax xor eax, ecx bt eax,
551 21 jnc noluck mov eax, 0 cpuid xor eax, eax cmp ebx,
552 'tneC' jne noluck cmp edx, 'Hrua' jne noluck cmp ecx,
553 'slua' jne noluck mov eax, 0xC0000000 cpuid mov edx,
554 eax xor eax, eax cmp edx, 0xC0000001 jb noluck mov eax,
555 0xC0000001 cpuid xor eax, eax bt edx, 6 jnc skip_a bt edx,
556 7 jnc skip_a mov padlock_use_ace, 1 inc eax skip_a:bt edx,
557 2 jnc skip_r bt edx, 3 jnc skip_r mov padlock_use_rng,
558 1 inc eax skip_r:noluck:}} static void __fastcall
559 padlock_bswapl(void *key)
560 {
561 _asm {
562 pushfd cld mov esi, ecx mov edi, ecx mov ecx, 60 up:lodsd
563 bswap eax stosd loop up popfd}}
564 /*
565 * MS actually specifies status of Direction Flag and compiler even manages
566 * to compile following as 'rep movsd' all by itself...
567 */
568 # define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
569 # endif
570 /* ===== AES encryption/decryption ===== */
571 # ifndef OPENSSL_NO_AES
572 # if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
573 # define NID_aes_128_cfb NID_aes_128_cfb128
574 # endif
575 # if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
576 # define NID_aes_128_ofb NID_aes_128_ofb128
577 # endif
578 # if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
579 # define NID_aes_192_cfb NID_aes_192_cfb128
580 # endif
581 # if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
582 # define NID_aes_192_ofb NID_aes_192_ofb128
583 # endif
584 # if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
585 # define NID_aes_256_cfb NID_aes_256_cfb128
586 # endif
587 # if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
588 # define NID_aes_256_ofb NID_aes_256_ofb128
589 # endif
590 /* List of supported ciphers. */ static int padlock_cipher_nids[] = {
591 NID_aes_128_ecb,
592 NID_aes_128_cbc,
593 NID_aes_128_cfb,
594 NID_aes_128_ofb,
595
596 NID_aes_192_ecb,
597 NID_aes_192_cbc,
598 NID_aes_192_cfb,
599 NID_aes_192_ofb,
600
601 NID_aes_256_ecb,
602 NID_aes_256_cbc,
603 NID_aes_256_cfb,
604 NID_aes_256_ofb,
605 };
606
607 static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) /
608 sizeof(padlock_cipher_nids[0]));
609
610 /* Function prototypes ... */
611 static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
612 const unsigned char *iv, int enc);
613 static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
614 const unsigned char *in, size_t nbytes);
615
616 # define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
617 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
618 # define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
619 NEAREST_ALIGNED(ctx->cipher_data))
620
621 # define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
622 # define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
623 # define EVP_CIPHER_block_size_OFB 1
624 # define EVP_CIPHER_block_size_CFB 1
625
626 /*
627 * Declaring so many ciphers by hand would be a pain. Instead introduce a bit
628 * of preprocessor magic :-)
629 */
630 # define DECLARE_AES_EVP(ksize,lmode,umode) \
631 static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
632 NID_aes_##ksize##_##lmode, \
633 EVP_CIPHER_block_size_##umode, \
634 AES_KEY_SIZE_##ksize, \
635 AES_BLOCK_SIZE, \
636 0 | EVP_CIPH_##umode##_MODE, \
637 padlock_aes_init_key, \
638 padlock_aes_cipher, \
639 NULL, \
640 sizeof(struct padlock_cipher_data) + 16, \
641 EVP_CIPHER_set_asn1_iv, \
642 EVP_CIPHER_get_asn1_iv, \
643 NULL, \
644 NULL \
645 }
646
647 DECLARE_AES_EVP(128, ecb, ECB);
648 DECLARE_AES_EVP(128, cbc, CBC);
649 DECLARE_AES_EVP(128, cfb, CFB);
650 DECLARE_AES_EVP(128, ofb, OFB);
651
652 DECLARE_AES_EVP(192, ecb, ECB);
653 DECLARE_AES_EVP(192, cbc, CBC);
654 DECLARE_AES_EVP(192, cfb, CFB);
655 DECLARE_AES_EVP(192, ofb, OFB);
656
657 DECLARE_AES_EVP(256, ecb, ECB);
658 DECLARE_AES_EVP(256, cbc, CBC);
659 DECLARE_AES_EVP(256, cfb, CFB);
660 DECLARE_AES_EVP(256, ofb, OFB);
661
662 static int
663 padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids,
664 int nid)
665 {
666 /* No specific cipher => return a list of supported nids ... */
667 if (!cipher) {
668 *nids = padlock_cipher_nids;
669 return padlock_cipher_nids_num;
670 }
671
672 /* ... or the requested "cipher" otherwise */
673 switch (nid) {
674 case NID_aes_128_ecb:
675 *cipher = &padlock_aes_128_ecb;
676 break;
677 case NID_aes_128_cbc:
678 *cipher = &padlock_aes_128_cbc;
679 break;
680 case NID_aes_128_cfb:
681 *cipher = &padlock_aes_128_cfb;
682 break;
683 case NID_aes_128_ofb:
684 *cipher = &padlock_aes_128_ofb;
685 break;
686
687 case NID_aes_192_ecb:
688 *cipher = &padlock_aes_192_ecb;
689 break;
690 case NID_aes_192_cbc:
691 *cipher = &padlock_aes_192_cbc;
692 break;
693 case NID_aes_192_cfb:
694 *cipher = &padlock_aes_192_cfb;
695 break;
696 case NID_aes_192_ofb:
697 *cipher = &padlock_aes_192_ofb;
698 break;
699
700 case NID_aes_256_ecb:
701 *cipher = &padlock_aes_256_ecb;
702 break;
703 case NID_aes_256_cbc:
704 *cipher = &padlock_aes_256_cbc;
705 break;
706 case NID_aes_256_cfb:
707 *cipher = &padlock_aes_256_cfb;
708 break;
709 case NID_aes_256_ofb:
710 *cipher = &padlock_aes_256_ofb;
711 break;
712
713 default:
714 /* Sorry, we don't support this NID */
715 *cipher = NULL;
716 return 0;
717 }
718
719 return 1;
720 }
721
722 /* Prepare the encryption key for PadLock usage */
723 static int
724 padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
725 const unsigned char *iv, int enc)
726 {
727 struct padlock_cipher_data *cdata;
728 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
729
730 if (key == NULL)
731 return 0; /* ERROR */
732
733 cdata = ALIGNED_CIPHER_DATA(ctx);
734 memset(cdata, 0, sizeof(struct padlock_cipher_data));
735
736 /* Prepare Control word. */
737 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
738 cdata->cword.b.encdec = 0;
739 else
740 cdata->cword.b.encdec = (ctx->encrypt == 0);
741 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
742 cdata->cword.b.ksize = (key_len - 128) / 64;
743
744 switch (key_len) {
745 case 128:
746 /*
747 * PadLock can generate an extended key for AES128 in hardware
748 */
749 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
750 cdata->cword.b.keygen = 0;
751 break;
752
753 case 192:
754 case 256:
755 /*
756 * Generate an extended AES key in software. Needed for AES192/AES256
757 */
758 /*
759 * Well, the above applies to Stepping 8 CPUs and is listed as
760 * hardware errata. They most likely will fix it at some point and
761 * then a check for stepping would be due here.
762 */
763 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
764 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc)
765 AES_set_encrypt_key(key, key_len, &cdata->ks);
766 else
767 AES_set_decrypt_key(key, key_len, &cdata->ks);
768 # ifndef AES_ASM
769 /*
770 * OpenSSL C functions use byte-swapped extended key.
771 */
772 padlock_bswapl(&cdata->ks);
773 # endif
774 cdata->cword.b.keygen = 1;
775 break;
776
777 default:
778 /* ERROR */
779 return 0;
780 }
781
782 /*
783 * This is done to cover for cases when user reuses the
784 * context for new key. The catch is that if we don't do
785 * this, padlock_eas_cipher might proceed with old key...
786 */
787 padlock_reload_key();
788
789 return 1;
790 }
791
792 /*-
793 * Simplified version of padlock_aes_cipher() used when
794 * 1) both input and output buffers are at aligned addresses.
795 * or when
796 * 2) running on a newer CPU that doesn't require aligned buffers.
797 */
798 static int
799 padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
800 const unsigned char *in_arg, size_t nbytes)
801 {
802 struct padlock_cipher_data *cdata;
803 void *iv;
804
805 cdata = ALIGNED_CIPHER_DATA(ctx);
806 padlock_verify_context(cdata);
807
808 switch (EVP_CIPHER_CTX_mode(ctx)) {
809 case EVP_CIPH_ECB_MODE:
810 padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
811 break;
812
813 case EVP_CIPH_CBC_MODE:
814 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
815 iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
816 in_arg);
817 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
818 break;
819
820 case EVP_CIPH_CFB_MODE:
821 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
822 iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
823 in_arg);
824 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
825 break;
826
827 case EVP_CIPH_OFB_MODE:
828 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
829 padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
830 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
831 break;
832
833 default:
834 return 0;
835 }
836
837 memset(cdata->iv, 0, AES_BLOCK_SIZE);
838
839 return 1;
840 }
841
842 # ifndef PADLOCK_CHUNK
843 # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
844 # endif
845 # if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
846 # error "insane PADLOCK_CHUNK..."
847 # endif
848
849 /*
850 * Re-align the arguments to 16-Bytes boundaries and run the encryption
851 * function itself. This function is not AES-specific.
852 */
853 static int
854 padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
855 const unsigned char *in_arg, size_t nbytes)
856 {
857 struct padlock_cipher_data *cdata;
858 const void *inp;
859 unsigned char *out;
860 void *iv;
861 int inp_misaligned, out_misaligned, realign_in_loop;
862 size_t chunk, allocated = 0;
863
864 /*
865 * ctx->num is maintained in byte-oriented modes, such as CFB and OFB...
866 */
867 if ((chunk = ctx->num)) { /* borrow chunk variable */
868 unsigned char *ivp = ctx->iv;
869
870 switch (EVP_CIPHER_CTX_mode(ctx)) {
871 case EVP_CIPH_CFB_MODE:
872 if (chunk >= AES_BLOCK_SIZE)
873 return 0; /* bogus value */
874
875 if (ctx->encrypt)
876 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
877 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
878 chunk++, nbytes--;
879 } else
880 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
881 unsigned char c = *(in_arg++);
882 *(out_arg++) = c ^ ivp[chunk];
883 ivp[chunk++] = c, nbytes--;
884 }
885
886 ctx->num = chunk % AES_BLOCK_SIZE;
887 break;
888 case EVP_CIPH_OFB_MODE:
889 if (chunk >= AES_BLOCK_SIZE)
890 return 0; /* bogus value */
891
892 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
893 *(out_arg++) = *(in_arg++) ^ ivp[chunk];
894 chunk++, nbytes--;
895 }
896
897 ctx->num = chunk % AES_BLOCK_SIZE;
898 break;
899 }
900 }
901
902 if (nbytes == 0)
903 return 1;
904 # if 0
905 if (nbytes % AES_BLOCK_SIZE)
906 return 0; /* are we expected to do tail processing? */
907 # else
908 /*
909 * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and
910 * arbitrary value in byte-oriented modes, such as CFB and OFB...
911 */
912 # endif
913
914 /*
915 * VIA promises CPUs that won't require alignment in the future. For now
916 * padlock_aes_align_required is initialized to 1 and the condition is
917 * never met...
918 */
919 /*
920 * C7 core is capable to manage unaligned input in non-ECB[!] mode, but
921 * performance penalties appear to be approximately same as for software
922 * alignment below or ~3x. They promise to improve it in the future, but
923 * for now we can just as well pretend that it can only handle aligned
924 * input...
925 */
926 if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0)
927 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
928
929 inp_misaligned = (((size_t)in_arg) & 0x0F);
930 out_misaligned = (((size_t)out_arg) & 0x0F);
931
932 /*
933 * Note that even if output is aligned and input not, I still prefer to
934 * loop instead of copy the whole input and then encrypt in one stroke.
935 * This is done in order to improve L1 cache utilization...
936 */
937 realign_in_loop = out_misaligned | inp_misaligned;
938
939 if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0)
940 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
941
942 /* this takes one "if" out of the loops */
943 chunk = nbytes;
944 chunk %= PADLOCK_CHUNK;
945 if (chunk == 0)
946 chunk = PADLOCK_CHUNK;
947
948 if (out_misaligned) {
949 /* optmize for small input */
950 allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes);
951 out = alloca(0x10 + allocated);
952 out = NEAREST_ALIGNED(out);
953 } else
954 out = out_arg;
955
956 cdata = ALIGNED_CIPHER_DATA(ctx);
957 padlock_verify_context(cdata);
958
959 switch (EVP_CIPHER_CTX_mode(ctx)) {
960 case EVP_CIPH_ECB_MODE:
961 do {
962 if (inp_misaligned)
963 inp = padlock_memcpy(out, in_arg, chunk);
964 else
965 inp = in_arg;
966 in_arg += chunk;
967
968 padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
969
970 if (out_misaligned)
971 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
972 else
973 out = out_arg += chunk;
974
975 nbytes -= chunk;
976 chunk = PADLOCK_CHUNK;
977 } while (nbytes);
978 break;
979
980 case EVP_CIPH_CBC_MODE:
981 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
982 goto cbc_shortcut;
983 do {
984 if (iv != cdata->iv)
985 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
986 chunk = PADLOCK_CHUNK;
987 cbc_shortcut: /* optimize for small input */
988 if (inp_misaligned)
989 inp = padlock_memcpy(out, in_arg, chunk);
990 else
991 inp = in_arg;
992 in_arg += chunk;
993
994 iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp);
995
996 if (out_misaligned)
997 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
998 else
999 out = out_arg += chunk;
1000
1001 } while (nbytes -= chunk);
1002 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1003 break;
1004
1005 case EVP_CIPH_CFB_MODE:
1006 memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1007 chunk &= ~(AES_BLOCK_SIZE - 1);
1008 if (chunk)
1009 goto cfb_shortcut;
1010 else
1011 goto cfb_skiploop;
1012 do {
1013 if (iv != cdata->iv)
1014 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1015 chunk = PADLOCK_CHUNK;
1016 cfb_shortcut: /* optimize for small input */
1017 if (inp_misaligned)
1018 inp = padlock_memcpy(out, in_arg, chunk);
1019 else
1020 inp = in_arg;
1021 in_arg += chunk;
1022
1023 iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1024
1025 if (out_misaligned)
1026 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1027 else
1028 out = out_arg += chunk;
1029
1030 nbytes -= chunk;
1031 } while (nbytes >= AES_BLOCK_SIZE);
1032
1033 cfb_skiploop:
1034 if (nbytes) {
1035 unsigned char *ivp = cdata->iv;
1036
1037 if (iv != ivp) {
1038 memcpy(ivp, iv, AES_BLOCK_SIZE);
1039 iv = ivp;
1040 }
1041 ctx->num = nbytes;
1042 if (cdata->cword.b.encdec) {
1043 cdata->cword.b.encdec = 0;
1044 padlock_reload_key();
1045 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1046 cdata->cword.b.encdec = 1;
1047 padlock_reload_key();
1048 while (nbytes) {
1049 unsigned char c = *(in_arg++);
1050 *(out_arg++) = c ^ *ivp;
1051 *(ivp++) = c, nbytes--;
1052 }
1053 } else {
1054 padlock_reload_key();
1055 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1056 padlock_reload_key();
1057 while (nbytes) {
1058 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1059 ivp++, nbytes--;
1060 }
1061 }
1062 }
1063
1064 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1065 break;
1066
1067 case EVP_CIPH_OFB_MODE:
1068 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1069 chunk &= ~(AES_BLOCK_SIZE - 1);
1070 if (chunk)
1071 do {
1072 if (inp_misaligned)
1073 inp = padlock_memcpy(out, in_arg, chunk);
1074 else
1075 inp = in_arg;
1076 in_arg += chunk;
1077
1078 padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1079
1080 if (out_misaligned)
1081 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1082 else
1083 out = out_arg += chunk;
1084
1085 nbytes -= chunk;
1086 chunk = PADLOCK_CHUNK;
1087 } while (nbytes >= AES_BLOCK_SIZE);
1088
1089 if (nbytes) {
1090 unsigned char *ivp = cdata->iv;
1091
1092 ctx->num = nbytes;
1093 padlock_reload_key(); /* empirically found */
1094 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1095 padlock_reload_key(); /* empirically found */
1096 while (nbytes) {
1097 *(out_arg++) = *(in_arg++) ^ *ivp;
1098 ivp++, nbytes--;
1099 }
1100 }
1101
1102 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1103 break;
1104
1105 default:
1106 return 0;
1107 }
1108
1109 /* Clean the realign buffer if it was used */
1110 if (out_misaligned) {
1111 volatile unsigned long *p = (void *)out;
1112 size_t n = allocated / sizeof(*p);
1113 while (n--)
1114 *p++ = 0;
1115 }
1116
1117 memset(cdata->iv, 0, AES_BLOCK_SIZE);
1118
1119 return 1;
1120 }
1121
1122 # endif /* OPENSSL_NO_AES */
1123
1124 /* ===== Random Number Generator ===== */
1125 /*
1126 * This code is not engaged. The reason is that it does not comply
1127 * with recommendations for VIA RNG usage for secure applications
1128 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1129 * provide meaningful error control...
1130 */
1131 /*
1132 * Wrapper that provides an interface between the API and the raw PadLock
1133 * RNG
1134 */
1135 static int padlock_rand_bytes(unsigned char *output, int count)
1136 {
1137 unsigned int eax, buf;
1138
1139 while (count >= 8) {
1140 eax = padlock_xstore(output, 0);
1141 if (!(eax & (1 << 6)))
1142 return 0; /* RNG disabled */
1143 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1144 if (eax & (0x1F << 10))
1145 return 0;
1146 if ((eax & 0x1F) == 0)
1147 continue; /* no data, retry... */
1148 if ((eax & 0x1F) != 8)
1149 return 0; /* fatal failure... */
1150 output += 8;
1151 count -= 8;
1152 }
1153 while (count > 0) {
1154 eax = padlock_xstore(&buf, 3);
1155 if (!(eax & (1 << 6)))
1156 return 0; /* RNG disabled */
1157 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1158 if (eax & (0x1F << 10))
1159 return 0;
1160 if ((eax & 0x1F) == 0)
1161 continue; /* no data, retry... */
1162 if ((eax & 0x1F) != 1)
1163 return 0; /* fatal failure... */
1164 *output++ = (unsigned char)buf;
1165 count--;
1166 }
1167 *(volatile unsigned int *)&buf = 0;
1168
1169 return 1;
1170 }
1171
1172 /* Dummy but necessary function */
1173 static int padlock_rand_status(void)
1174 {
1175 return 1;
1176 }
1177
1178 /* Prepare structure for registration */
1179 static RAND_METHOD padlock_rand = {
1180 NULL, /* seed */
1181 padlock_rand_bytes, /* bytes */
1182 NULL, /* cleanup */
1183 NULL, /* add */
1184 padlock_rand_bytes, /* pseudorand */
1185 padlock_rand_status, /* rand status */
1186 };
1187
1188 # else /* !COMPILE_HW_PADLOCK */
1189 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
1190 OPENSSL_EXPORT
1191 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns);
1192 OPENSSL_EXPORT
1193 int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns)
1194 {
1195 return 0;
1196 }
1197
1198 IMPLEMENT_DYNAMIC_CHECK_FN()
1199 # endif
1200 # endif /* COMPILE_HW_PADLOCK */
1201 # endif /* !OPENSSL_NO_HW_PADLOCK */
1202 #endif /* !OPENSSL_NO_HW */