]> git.ipfire.org Git - thirdparty/openssl.git/blame - crypto/engine/eng_padlock.c
Run util/openssl-format-source -v -c .
[thirdparty/openssl.git] / crypto / engine / eng_padlock.c
CommitLineData
b558c8d5 1/*-
5b172463
AP
2 * Support for VIA PadLock Advanced Cryptography Engine (ACE)
3 * Written by Michal Ludvig <michal@logix.cz>
4 * http://www.logix.cz/michal
5 *
40720ce3
MC
6 * Big thanks to Andy Polyakov for a help with optimization,
7 * assembler fixes, port to MS Windows and a lot of other
5b172463
AP
8 * valuable work on this engine!
9 */
10
11/* ====================================================================
12 * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 *
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in
23 * the documentation and/or other materials provided with the
24 * distribution.
25 *
26 * 3. All advertising materials mentioning features or use of this
27 * software must display the following acknowledgment:
28 * "This product includes software developed by the OpenSSL Project
29 * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
30 *
31 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
32 * endorse or promote products derived from this software without
33 * prior written permission. For written permission, please contact
34 * licensing@OpenSSL.org.
35 *
36 * 5. Products derived from this software may not be called "OpenSSL"
37 * nor may "OpenSSL" appear in their names without prior written
38 * permission of the OpenSSL Project.
39 *
40 * 6. Redistributions of any form whatsoever must retain the following
41 * acknowledgment:
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
46 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
49 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
50 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
51 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
52 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
54 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
55 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
56 * OF THE POSSIBILITY OF SUCH DAMAGE.
57 * ====================================================================
58 *
59 * This product includes cryptographic software written by Eric Young
60 * (eay@cryptsoft.com). This product includes software written by Tim
61 * Hudson (tjh@cryptsoft.com).
62 *
63 */
64
5b172463
AP
65#include <stdio.h>
66#include <string.h>
67
e00b165e 68#include <openssl/opensslconf.h>
5b172463
AP
69#include <openssl/crypto.h>
70#include <openssl/dso.h>
71#include <openssl/engine.h>
72#include <openssl/evp.h>
e00b165e 73#ifndef OPENSSL_NO_AES
40720ce3 74# include <openssl/aes.h>
e00b165e 75#endif
c7439661 76#include <openssl/rand.h>
4913b88f 77#include <openssl/err.h>
5b172463
AP
78
79#ifndef OPENSSL_NO_HW
40720ce3 80# ifndef OPENSSL_NO_HW_PADLOCK
5b172463
AP
81
82/* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
40720ce3
MC
83# if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
84# ifndef OPENSSL_NO_DYNAMIC_ENGINE
5b172463 85# define DYNAMIC_ENGINE
40720ce3
MC
86# endif
87# elif (OPENSSL_VERSION_NUMBER >= 0x00907000L)
88# ifdef ENGINE_DYNAMIC_SUPPORT
5b172463 89# define DYNAMIC_ENGINE
40720ce3
MC
90# endif
91# else
92# error "Only OpenSSL >= 0.9.7 is supported"
5b172463 93# endif
5b172463 94
40720ce3
MC
95/*
96 * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it
97 * doesn't exist elsewhere, but it even can't be compiled on other platforms!
98 *
99 * In addition, because of the heavy use of inline assembler, compiler choice
100 * is limited to GCC and Microsoft C.
101 */
102# undef COMPILE_HW_PADLOCK
103# if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
104# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
f8fa22d8 105 (defined(_MSC_VER) && defined(_M_IX86))
40720ce3
MC
106# define COMPILE_HW_PADLOCK
107static ENGINE *ENGINE_padlock(void);
108# endif
109# endif
5b172463 110
40720ce3 111void ENGINE_load_padlock(void)
5b172463
AP
112{
113/* On non-x86 CPUs it just returns. */
40720ce3
MC
114# ifdef COMPILE_HW_PADLOCK
115 ENGINE *toadd = ENGINE_padlock();
116 if (!toadd)
117 return;
118 ENGINE_add(toadd);
119 ENGINE_free(toadd);
120 ERR_clear_error();
121# endif
5b172463
AP
122}
123
40720ce3
MC
124# ifdef COMPILE_HW_PADLOCK
125/*
126 * We do these includes here to avoid header problems on platforms that do
127 * not have the VIA padlock anyway...
128 */
129# ifdef _MSC_VER
130# include <malloc.h>
131# define alloca _alloca
132# elif defined(NETWARE_CLIB) && defined(__GNUC__)
133void *alloca(size_t);
134# define alloca(s) __builtin_alloca(s)
135# else
136# include <stdlib.h>
137# endif
c38ff58b 138
5b172463
AP
139/* Function for ENGINE detection and control */
140static int padlock_available(void);
141static int padlock_init(ENGINE *e);
142
143/* RNG Stuff */
144static RAND_METHOD padlock_rand;
145
146/* Cipher Stuff */
40720ce3
MC
147# ifndef OPENSSL_NO_AES
148static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher,
149 const int **nids, int nid);
150# endif
5b172463
AP
151
152/* Engine names */
153static const char *padlock_id = "padlock";
154static char padlock_name[100];
155
156/* Available features */
40720ce3
MC
157static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
158static int padlock_use_rng = 0; /* Random Number Generator */
159# ifndef OPENSSL_NO_AES
5b172463 160static int padlock_aes_align_required = 1;
40720ce3 161# endif
5b172463
AP
162
163/* ===== Engine "management" functions ===== */
164
165/* Prepare the ENGINE structure for registration */
40720ce3 166static int padlock_bind_helper(ENGINE *e)
5b172463 167{
40720ce3
MC
168 /* Check available features */
169 padlock_available();
170
171# if 1 /* disable RNG for now, see commentary in
172 * vicinity of RNG code */
173 padlock_use_rng = 0;
174# endif
175
176 /* Generate a nice engine name with available features */
177 BIO_snprintf(padlock_name, sizeof(padlock_name),
178 "VIA PadLock (%s, %s)",
179 padlock_use_rng ? "RNG" : "no-RNG",
180 padlock_use_ace ? "ACE" : "no-ACE");
181
182 /* Register everything or return with an error */
183 if (!ENGINE_set_id(e, padlock_id) ||
184 !ENGINE_set_name(e, padlock_name) ||
185 !ENGINE_set_init_function(e, padlock_init) ||
186# ifndef OPENSSL_NO_AES
187 (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) ||
188# endif
189 (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) {
190 return 0;
191 }
192
193 /* Everything looks good */
194 return 1;
5b172463
AP
195}
196
197/* Constructor */
40720ce3 198static ENGINE *ENGINE_padlock(void)
5b172463 199{
40720ce3 200 ENGINE *eng = ENGINE_new();
5b172463 201
40720ce3
MC
202 if (!eng) {
203 return NULL;
204 }
5b172463 205
40720ce3
MC
206 if (!padlock_bind_helper(eng)) {
207 ENGINE_free(eng);
208 return NULL;
209 }
5b172463 210
40720ce3 211 return eng;
5b172463
AP
212}
213
214/* Check availability of the engine */
40720ce3 215static int padlock_init(ENGINE *e)
5b172463 216{
40720ce3 217 return (padlock_use_rng || padlock_use_ace);
5b172463
AP
218}
219
40720ce3
MC
220/*
221 * This stuff is needed if this ENGINE is being compiled into a
222 * self-contained shared-library.
5b172463 223 */
40720ce3
MC
224# ifdef DYNAMIC_ENGINE
225static int padlock_bind_fn(ENGINE *e, const char *id)
5b172463 226{
40720ce3
MC
227 if (id && (strcmp(id, padlock_id) != 0)) {
228 return 0;
229 }
5b172463 230
40720ce3
MC
231 if (!padlock_bind_helper(e)) {
232 return 0;
233 }
5b172463 234
40720ce3 235 return 1;
5b172463
AP
236}
237
40720ce3
MC
238IMPLEMENT_DYNAMIC_CHECK_FN()
239 IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn)
240# endif /* DYNAMIC_ENGINE */
5b172463 241/* ===== Here comes the "real" engine ===== */
40720ce3 242# ifndef OPENSSL_NO_AES
5b172463 243/* Some AES-related constants */
40720ce3
MC
244# define AES_BLOCK_SIZE 16
245# define AES_KEY_SIZE_128 16
246# define AES_KEY_SIZE_192 24
247# define AES_KEY_SIZE_256 32
248 /*
249 * Here we store the status information relevant to the current context.
250 */
251 /*
252 * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on
253 * the order of items in this structure. Don't blindly modify, reorder,
254 * etc!
255 */
256struct padlock_cipher_data {
257 unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */
258 union {
259 unsigned int pad[4];
260 struct {
261 int rounds:4;
262 int dgst:1; /* n/a in C3 */
263 int align:1; /* n/a in C3 */
264 int ciphr:1; /* n/a in C3 */
265 unsigned int keygen:1;
266 int interm:1;
267 unsigned int encdec:1;
268 int ksize:2;
269 } b;
270 } cword; /* Control word */
271 AES_KEY ks; /* Encryption key */
5b172463
AP
272};
273
b88606c2
AP
274/*
275 * Essentially this variable belongs in thread local storage.
276 * Having this variable global on the other hand can only cause
277 * few bogus key reloads [if any at all on single-CPU system],
278 * so we accept the penatly...
279 */
280static volatile struct padlock_cipher_data *padlock_saved_context;
40720ce3 281# endif
b88606c2 282
3e8042c3 283/*-
5b172463
AP
284 * =======================================================
285 * Inline assembler section(s).
286 * =======================================================
287 * Order of arguments is chosen to facilitate Windows port
288 * using __fastcall calling convention. If you wish to add
b88606c2 289 * more routines, keep in mind that first __fastcall
5b172463
AP
290 * argument is passed in %ecx and second - in %edx.
291 * =======================================================
292 */
40720ce3 293# if defined(__GNUC__) && __GNUC__>=2
5b172463
AP
294/*
295 * As for excessive "push %ebx"/"pop %ebx" found all over.
296 * When generating position-independent code GCC won't let
297 * us use "b" in assembler templates nor even respect "ebx"
298 * in "clobber description." Therefore the trouble...
299 */
300
40720ce3
MC
301/*
302 * Helper function - check if a CPUID instruction is available on this CPU
303 */
304static int padlock_insn_cpuid_available(void)
5b172463 305{
40720ce3
MC
306 int result = -1;
307
308 /*
309 * We're checking if the bit #21 of EFLAGS can be toggled. If yes =
310 * CPUID is available.
311 */
312 asm volatile ("pushf\n"
313 "popl %%eax\n"
314 "xorl $0x200000, %%eax\n"
315 "movl %%eax, %%ecx\n"
316 "andl $0x200000, %%ecx\n"
317 "pushl %%eax\n"
318 "popf\n"
319 "pushf\n"
320 "popl %%eax\n"
321 "andl $0x200000, %%eax\n"
322 "xorl %%eax, %%ecx\n"
323 "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx");
324
325 return (result == 0);
5b172463
AP
326}
327
40720ce3
MC
328/*
329 * Load supported features of the CPU to see if the PadLock is available.
330 */
331static int padlock_available(void)
5b172463 332{
40720ce3
MC
333 char vendor_string[16];
334 unsigned int eax, edx;
335
336 /* First check if the CPUID instruction is available at all... */
337 if (!padlock_insn_cpuid_available())
338 return 0;
339
340 /* Are we running on the Centaur (VIA) CPU? */
341 eax = 0x00000000;
342 vendor_string[12] = 0;
343 asm volatile ("pushl %%ebx\n"
344 "cpuid\n"
345 "movl %%ebx,(%%edi)\n"
346 "movl %%edx,4(%%edi)\n"
347 "movl %%ecx,8(%%edi)\n"
348 "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx");
349 if (strcmp(vendor_string, "CentaurHauls") != 0)
350 return 0;
351
352 /* Check for Centaur Extended Feature Flags presence */
353 eax = 0xC0000000;
354 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx");
355 if (eax < 0xC0000001)
356 return 0;
357
358 /* Read the Centaur Extended Feature Flags */
359 eax = 0xC0000001;
360 asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax),
361 "=d"(edx)::"ecx");
362
363 /* Fill up some flags */
364 padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6));
365 padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2));
366
367 return padlock_use_ace + padlock_use_rng;
5b172463
AP
368}
369
40720ce3 370# ifndef OPENSSL_NO_AES
5b172463 371/* Our own htonl()/ntohl() */
40720ce3 372static inline void padlock_bswapl(AES_KEY *ks)
5b172463 373{
40720ce3
MC
374 size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]);
375 unsigned int *key = ks->rd_key;
5b172463 376
40720ce3
MC
377 while (i--) {
378 asm volatile ("bswapl %0":"+r" (*key));
379 key++;
380 }
5b172463 381}
40720ce3 382# endif
5b172463 383
40720ce3
MC
384/*
385 * Force key reload from memory to the CPU microcode. Loading EFLAGS from the
386 * stack clears EFLAGS[30] which does the trick.
387 */
388static inline void padlock_reload_key(void)
5b172463 389{
40720ce3 390 asm volatile ("pushfl; popfl");
5b172463
AP
391}
392
40720ce3 393# ifndef OPENSSL_NO_AES
5b172463
AP
394/*
395 * This is heuristic key context tracing. At first one
396 * believes that one should use atomic swap instructions,
397 * but it's not actually necessary. Point is that if
b88606c2
AP
398 * padlock_saved_context was changed by another thread
399 * after we've read it and before we compare it with cdata,
400 * our key *shall* be reloaded upon thread context switch
401 * and we are therefore set in either case...
5b172463 402 */
40720ce3 403static inline void padlock_verify_context(struct padlock_cipher_data *cdata)
5b172463 404{
40720ce3
MC
405 asm volatile ("pushfl\n"
406 " btl $30,(%%esp)\n"
407 " jnc 1f\n"
408 " cmpl %2,%1\n"
409 " je 1f\n"
410 " popfl\n"
411 " subl $4,%%esp\n"
412 "1: addl $4,%%esp\n"
413 " movl %2,%0":"+m" (padlock_saved_context)
414 :"r"(padlock_saved_context), "r"(cdata):"cc");
5b172463
AP
415}
416
417/* Template for padlock_xcrypt_* modes */
40720ce3
MC
418/*
419 * BIG FAT WARNING: The offsets used with 'leal' instructions describe items
420 * of the 'padlock_cipher_data' structure.
5b172463 421 */
40720ce3
MC
422# define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
423static inline void *name(size_t cnt, \
424 struct padlock_cipher_data *cdata, \
425 void *out, const void *inp) \
426{ void *iv; \
427 asm volatile ( "pushl %%ebx\n" \
428 " leal 16(%0),%%edx\n" \
429 " leal 32(%0),%%ebx\n" \
430 rep_xcrypt "\n" \
431 " popl %%ebx" \
432 : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
433 : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
434 : "edx", "cc", "memory"); \
435 return iv; \
5b172463
AP
436}
437
438/* Generate all functions with appropriate opcodes */
d26667b2
MC
439/* rep xcryptecb */
440PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8")
441/* rep xcryptcbc */
40720ce3 442 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0")
d26667b2 443/* rep xcryptcfb */
40720ce3 444 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0")
d26667b2 445/* rep xcryptofb */
40720ce3
MC
446 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8")
447# endif
5b172463 448/* The RNG call itself */
40720ce3 449static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in)
5b172463 450{
40720ce3 451 unsigned int eax_out;
5b172463 452
40720ce3
MC
453 asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */
454 :"=a" (eax_out), "=m"(*(unsigned *)addr)
455 :"D"(addr), "d"(edx_in)
456 );
5b172463 457
40720ce3 458 return eax_out;
5b172463
AP
459}
460
40720ce3
MC
461/*
462 * Why not inline 'rep movsd'? I failed to find information on what value in
463 * Direction Flag one can expect and consequently have to apply
464 * "better-safe-than-sorry" approach and assume "undefined." I could
465 * explicitly clear it and restore the original value upon return from
466 * padlock_aes_cipher, but it's presumably too much trouble for too little
467 * gain... In case you wonder 'rep xcrypt*' instructions above are *not*
468 * affected by the Direction Flag and pointers advance toward larger
469 * addresses unconditionally.
470 */
471static inline unsigned char *padlock_memcpy(void *dst, const void *src,
472 size_t n)
52697590 473{
40720ce3
MC
474 long *d = dst;
475 const long *s = src;
52697590 476
40720ce3
MC
477 n /= sizeof(*d);
478 do {
479 *d++ = *s++;
480 } while (--n);
52697590 481
40720ce3 482 return dst;
52697590
AP
483}
484
40720ce3 485# elif defined(_MSC_VER)
5b172463
AP
486/*
487 * Unlike GCC these are real functions. In order to minimize impact
488 * on performance we adhere to __fastcall calling convention in
489 * order to get two first arguments passed through %ecx and %edx.
490 * Which kind of suits very well, as instructions in question use
491 * both %ecx and %edx as input:-)
492 */
40720ce3
MC
493# define REP_XCRYPT(code) \
494 _asm _emit 0xf3 \
495 _asm _emit 0x0f _asm _emit 0xa7 \
496 _asm _emit code
5b172463 497
40720ce3
MC
498/*
499 * BIG FAT WARNING: The offsets used with 'lea' instructions describe items
500 * of the 'padlock_cipher_data' structure.
501 */
502# define PADLOCK_XCRYPT_ASM(name,code) \
503static void * __fastcall \
504 name (size_t cnt, void *cdata, \
505 void *outp, const void *inp) \
506{ _asm mov eax,edx \
507 _asm lea edx,[eax+16] \
508 _asm lea ebx,[eax+32] \
509 _asm mov edi,outp \
510 _asm mov esi,inp \
511 REP_XCRYPT(code) \
5b172463
AP
512}
513
40720ce3
MC
514PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, 0xc8)
515 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, 0xd0)
516 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, 0xe0)
517 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, 0xe8)
5b172463 518
40720ce3
MC
519static int __fastcall padlock_xstore(void *outp, unsigned int code)
520{
521_asm mov edi, ecx
522 _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0}
523 static void __fastcall padlock_reload_key(void)
524{
525_asm pushfd _asm popfd}
526 static void __fastcall padlock_verify_context(void *cdata)
527{
528 _asm {
529pushfd bt DWORD PTR[esp], 30 jnc skip cmp ecx,
530 padlock_saved_context je skip popfd sub esp,
531 4 skip:add esp, 4 mov padlock_saved_context,
532 ecx}} static int padlock_available(void)
533{
534 _asm {
535pushfd pop eax mov ecx, eax xor eax,
536 1 << 21 push eax popfd pushfd pop eax xor eax, ecx bt eax,
537 21 jnc noluck mov eax, 0 cpuid xor eax, eax cmp ebx,
538 'tneC' jne noluck cmp edx, 'Hrua' jne noluck cmp ecx,
539 'slua' jne noluck mov eax, 0xC0000000 cpuid mov edx,
540 eax xor eax, eax cmp edx, 0xC0000001 jb noluck mov eax,
541 0xC0000001 cpuid xor eax, eax bt edx, 6 jnc skip_a bt edx,
542 7 jnc skip_a mov padlock_use_ace, 1 inc eax skip_a:bt edx,
543 2 jnc skip_r bt edx, 3 jnc skip_r mov padlock_use_rng,
544 1 inc eax skip_r:noluck:}} static void __fastcall
5b172463 545padlock_bswapl(void *key)
40720ce3
MC
546{
547 _asm {
548pushfd cld mov esi, ecx mov edi, ecx mov ecx, 60 up:lodsd
549 bswap eax stosd loop up popfd}}
550/*
551 * MS actually specifies status of Direction Flag and compiler even manages
552 * to compile following as 'rep movsd' all by itself...
52697590 553 */
40720ce3
MC
554# define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U))
555# endif
5b172463 556/* ===== AES encryption/decryption ===== */
40720ce3
MC
557# ifndef OPENSSL_NO_AES
558# if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb)
559# define NID_aes_128_cfb NID_aes_128_cfb128
560# endif
561# if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb)
562# define NID_aes_128_ofb NID_aes_128_ofb128
563# endif
564# if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb)
565# define NID_aes_192_cfb NID_aes_192_cfb128
566# endif
567# if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb)
568# define NID_aes_192_ofb NID_aes_192_ofb128
569# endif
570# if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb)
571# define NID_aes_256_cfb NID_aes_256_cfb128
572# endif
573# if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb)
574# define NID_aes_256_ofb NID_aes_256_ofb128
575# endif
576/* List of supported ciphers. */ static int padlock_cipher_nids[] = {
577 NID_aes_128_ecb,
578 NID_aes_128_cbc,
579 NID_aes_128_cfb,
580 NID_aes_128_ofb,
581
582 NID_aes_192_ecb,
583 NID_aes_192_cbc,
584 NID_aes_192_cfb,
585 NID_aes_192_ofb,
586
587 NID_aes_256_ecb,
588 NID_aes_256_cbc,
589 NID_aes_256_cfb,
590 NID_aes_256_ofb,
5b172463 591};
40720ce3
MC
592
593static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) /
594 sizeof(padlock_cipher_nids[0]));
5b172463
AP
595
596/* Function prototypes ... */
597static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
40720ce3 598 const unsigned char *iv, int enc);
5b172463 599static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
40720ce3
MC
600 const unsigned char *in, size_t nbytes);
601
602# define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \
603 ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) )
604# define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\
605 NEAREST_ALIGNED(ctx->cipher_data))
606
607# define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE
608# define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE
609# define EVP_CIPHER_block_size_OFB 1
610# define EVP_CIPHER_block_size_CFB 1
611
612/*
613 * Declaring so many ciphers by hand would be a pain. Instead introduce a bit
614 * of preprocessor magic :-)
615 */
616# define DECLARE_AES_EVP(ksize,lmode,umode) \
617static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \
618 NID_aes_##ksize##_##lmode, \
619 EVP_CIPHER_block_size_##umode, \
620 AES_KEY_SIZE_##ksize, \
621 AES_BLOCK_SIZE, \
622 0 | EVP_CIPH_##umode##_MODE, \
623 padlock_aes_init_key, \
624 padlock_aes_cipher, \
625 NULL, \
626 sizeof(struct padlock_cipher_data) + 16, \
627 EVP_CIPHER_set_asn1_iv, \
628 EVP_CIPHER_get_asn1_iv, \
629 NULL, \
630 NULL \
5b172463
AP
631}
632
40720ce3
MC
633DECLARE_AES_EVP(128, ecb, ECB);
634DECLARE_AES_EVP(128, cbc, CBC);
635DECLARE_AES_EVP(128, cfb, CFB);
636DECLARE_AES_EVP(128, ofb, OFB);
5b172463 637
40720ce3
MC
638DECLARE_AES_EVP(192, ecb, ECB);
639DECLARE_AES_EVP(192, cbc, CBC);
640DECLARE_AES_EVP(192, cfb, CFB);
641DECLARE_AES_EVP(192, ofb, OFB);
5b172463 642
40720ce3
MC
643DECLARE_AES_EVP(256, ecb, ECB);
644DECLARE_AES_EVP(256, cbc, CBC);
645DECLARE_AES_EVP(256, cfb, CFB);
646DECLARE_AES_EVP(256, ofb, OFB);
5b172463
AP
647
648static int
40720ce3
MC
649padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids,
650 int nid)
5b172463 651{
40720ce3
MC
652 /* No specific cipher => return a list of supported nids ... */
653 if (!cipher) {
654 *nids = padlock_cipher_nids;
655 return padlock_cipher_nids_num;
656 }
657
658 /* ... or the requested "cipher" otherwise */
659 switch (nid) {
660 case NID_aes_128_ecb:
661 *cipher = &padlock_aes_128_ecb;
662 break;
663 case NID_aes_128_cbc:
664 *cipher = &padlock_aes_128_cbc;
665 break;
666 case NID_aes_128_cfb:
667 *cipher = &padlock_aes_128_cfb;
668 break;
669 case NID_aes_128_ofb:
670 *cipher = &padlock_aes_128_ofb;
671 break;
672
673 case NID_aes_192_ecb:
674 *cipher = &padlock_aes_192_ecb;
675 break;
676 case NID_aes_192_cbc:
677 *cipher = &padlock_aes_192_cbc;
678 break;
679 case NID_aes_192_cfb:
680 *cipher = &padlock_aes_192_cfb;
681 break;
682 case NID_aes_192_ofb:
683 *cipher = &padlock_aes_192_ofb;
684 break;
685
686 case NID_aes_256_ecb:
687 *cipher = &padlock_aes_256_ecb;
688 break;
689 case NID_aes_256_cbc:
690 *cipher = &padlock_aes_256_cbc;
691 break;
692 case NID_aes_256_cfb:
693 *cipher = &padlock_aes_256_cfb;
694 break;
695 case NID_aes_256_ofb:
696 *cipher = &padlock_aes_256_ofb;
697 break;
698
699 default:
700 /* Sorry, we don't support this NID */
701 *cipher = NULL;
702 return 0;
703 }
704
705 return 1;
5b172463
AP
706}
707
708/* Prepare the encryption key for PadLock usage */
709static int
40720ce3
MC
710padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
711 const unsigned char *iv, int enc)
5b172463 712{
40720ce3
MC
713 struct padlock_cipher_data *cdata;
714 int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8;
715
716 if (key == NULL)
717 return 0; /* ERROR */
718
719 cdata = ALIGNED_CIPHER_DATA(ctx);
720 memset(cdata, 0, sizeof(struct padlock_cipher_data));
721
722 /* Prepare Control word. */
723 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE)
724 cdata->cword.b.encdec = 0;
725 else
726 cdata->cword.b.encdec = (ctx->encrypt == 0);
727 cdata->cword.b.rounds = 10 + (key_len - 128) / 32;
728 cdata->cword.b.ksize = (key_len - 128) / 64;
729
730 switch (key_len) {
731 case 128:
732 /*
733 * PadLock can generate an extended key for AES128 in hardware
734 */
735 memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128);
736 cdata->cword.b.keygen = 0;
737 break;
738
739 case 192:
740 case 256:
741 /*
742 * Generate an extended AES key in software. Needed for AES192/AES256
743 */
744 /*
745 * Well, the above applies to Stepping 8 CPUs and is listed as
746 * hardware errata. They most likely will fix it at some point and
747 * then a check for stepping would be due here.
748 */
749 if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE ||
750 EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc)
751 AES_set_encrypt_key(key, key_len, &cdata->ks);
752 else
753 AES_set_decrypt_key(key, key_len, &cdata->ks);
754# ifndef AES_ASM
755 /*
756 * OpenSSL C functions use byte-swapped extended key.
757 */
758 padlock_bswapl(&cdata->ks);
759# endif
760 cdata->cword.b.keygen = 1;
761 break;
762
763 default:
764 /* ERROR */
765 return 0;
766 }
767
768 /*
769 * This is done to cover for cases when user reuses the
770 * context for new key. The catch is that if we don't do
771 * this, padlock_eas_cipher might proceed with old key...
772 */
773 padlock_reload_key();
774
775 return 1;
5b172463
AP
776}
777
40720ce3 778/*-
5b172463
AP
779 * Simplified version of padlock_aes_cipher() used when
780 * 1) both input and output buffers are at aligned addresses.
781 * or when
782 * 2) running on a newer CPU that doesn't require aligned buffers.
783 */
784static int
785padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
40720ce3 786 const unsigned char *in_arg, size_t nbytes)
5b172463 787{
40720ce3
MC
788 struct padlock_cipher_data *cdata;
789 void *iv;
790
791 cdata = ALIGNED_CIPHER_DATA(ctx);
792 padlock_verify_context(cdata);
793
794 switch (EVP_CIPHER_CTX_mode(ctx)) {
795 case EVP_CIPH_ECB_MODE:
796 padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
797 break;
798
799 case EVP_CIPH_CBC_MODE:
800 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
801 iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
802 in_arg);
803 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
804 break;
805
806 case EVP_CIPH_CFB_MODE:
807 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
808 iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg,
809 in_arg);
810 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
811 break;
812
813 case EVP_CIPH_OFB_MODE:
814 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
815 padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg);
816 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
817 break;
818
819 default:
820 return 0;
821 }
822
823 memset(cdata->iv, 0, AES_BLOCK_SIZE);
824
825 return 1;
5b172463
AP
826}
827
40720ce3
MC
828# ifndef PADLOCK_CHUNK
829# define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */
830# endif
831# if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1)
832# error "insane PADLOCK_CHUNK..."
833# endif
5b172463 834
40720ce3
MC
835/*
836 * Re-align the arguments to 16-Bytes boundaries and run the encryption
837 * function itself. This function is not AES-specific.
838 */
5b172463
AP
839static int
840padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg,
40720ce3 841 const unsigned char *in_arg, size_t nbytes)
5b172463 842{
40720ce3
MC
843 struct padlock_cipher_data *cdata;
844 const void *inp;
845 unsigned char *out;
846 void *iv;
847 int inp_misaligned, out_misaligned, realign_in_loop;
848 size_t chunk, allocated = 0;
849
850 /*
851 * ctx->num is maintained in byte-oriented modes, such as CFB and OFB...
852 */
853 if ((chunk = ctx->num)) { /* borrow chunk variable */
854 unsigned char *ivp = ctx->iv;
855
856 switch (EVP_CIPHER_CTX_mode(ctx)) {
857 case EVP_CIPH_CFB_MODE:
858 if (chunk >= AES_BLOCK_SIZE)
859 return 0; /* bogus value */
860
861 if (ctx->encrypt)
862 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
863 ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk];
864 chunk++, nbytes--;
865 } else
866 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
867 unsigned char c = *(in_arg++);
868 *(out_arg++) = c ^ ivp[chunk];
869 ivp[chunk++] = c, nbytes--;
870 }
871
872 ctx->num = chunk % AES_BLOCK_SIZE;
873 break;
874 case EVP_CIPH_OFB_MODE:
875 if (chunk >= AES_BLOCK_SIZE)
876 return 0; /* bogus value */
877
878 while (chunk < AES_BLOCK_SIZE && nbytes != 0) {
879 *(out_arg++) = *(in_arg++) ^ ivp[chunk];
880 chunk++, nbytes--;
881 }
882
883 ctx->num = chunk % AES_BLOCK_SIZE;
884 break;
885 }
886 }
887
888 if (nbytes == 0)
889 return 1;
890# if 0
891 if (nbytes % AES_BLOCK_SIZE)
892 return 0; /* are we expected to do tail processing? */
893# else
894 /*
895 * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and
896 * arbitrary value in byte-oriented modes, such as CFB and OFB...
897 */
898# endif
899
900 /*
901 * VIA promises CPUs that won't require alignment in the future. For now
902 * padlock_aes_align_required is initialized to 1 and the condition is
903 * never met...
904 */
905 /*
906 * C7 core is capable to manage unaligned input in non-ECB[!] mode, but
907 * performance penalties appear to be approximately same as for software
908 * alignment below or ~3x. They promise to improve it in the future, but
909 * for now we can just as well pretend that it can only handle aligned
910 * input...
911 */
912 if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0)
913 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
914
915 inp_misaligned = (((size_t)in_arg) & 0x0F);
916 out_misaligned = (((size_t)out_arg) & 0x0F);
917
918 /*
919 * Note that even if output is aligned and input not, I still prefer to
920 * loop instead of copy the whole input and then encrypt in one stroke.
921 * This is done in order to improve L1 cache utilization...
922 */
923 realign_in_loop = out_misaligned | inp_misaligned;
924
925 if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0)
926 return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes);
927
928 /* this takes one "if" out of the loops */
929 chunk = nbytes;
930 chunk %= PADLOCK_CHUNK;
931 if (chunk == 0)
932 chunk = PADLOCK_CHUNK;
933
934 if (out_misaligned) {
935 /* optmize for small input */
936 allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes);
937 out = alloca(0x10 + allocated);
938 out = NEAREST_ALIGNED(out);
939 } else
940 out = out_arg;
941
942 cdata = ALIGNED_CIPHER_DATA(ctx);
943 padlock_verify_context(cdata);
944
945 switch (EVP_CIPHER_CTX_mode(ctx)) {
946 case EVP_CIPH_ECB_MODE:
947 do {
948 if (inp_misaligned)
949 inp = padlock_memcpy(out, in_arg, chunk);
950 else
951 inp = in_arg;
952 in_arg += chunk;
953
954 padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
955
956 if (out_misaligned)
957 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
958 else
959 out = out_arg += chunk;
960
961 nbytes -= chunk;
962 chunk = PADLOCK_CHUNK;
963 } while (nbytes);
964 break;
965
966 case EVP_CIPH_CBC_MODE:
967 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
968 goto cbc_shortcut;
969 do {
970 if (iv != cdata->iv)
971 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
972 chunk = PADLOCK_CHUNK;
973 cbc_shortcut: /* optimize for small input */
974 if (inp_misaligned)
975 inp = padlock_memcpy(out, in_arg, chunk);
976 else
977 inp = in_arg;
978 in_arg += chunk;
979
980 iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp);
981
982 if (out_misaligned)
983 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
984 else
985 out = out_arg += chunk;
986
987 } while (nbytes -= chunk);
988 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
989 break;
990
991 case EVP_CIPH_CFB_MODE:
992 memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE);
993 chunk &= ~(AES_BLOCK_SIZE - 1);
994 if (chunk)
995 goto cfb_shortcut;
996 else
997 goto cfb_skiploop;
998 do {
999 if (iv != cdata->iv)
1000 memcpy(cdata->iv, iv, AES_BLOCK_SIZE);
1001 chunk = PADLOCK_CHUNK;
1002 cfb_shortcut: /* optimize for small input */
1003 if (inp_misaligned)
1004 inp = padlock_memcpy(out, in_arg, chunk);
1005 else
1006 inp = in_arg;
1007 in_arg += chunk;
1008
1009 iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1010
1011 if (out_misaligned)
1012 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1013 else
1014 out = out_arg += chunk;
1015
1016 nbytes -= chunk;
1017 } while (nbytes >= AES_BLOCK_SIZE);
1018
1019 cfb_skiploop:
1020 if (nbytes) {
1021 unsigned char *ivp = cdata->iv;
1022
1023 if (iv != ivp) {
1024 memcpy(ivp, iv, AES_BLOCK_SIZE);
1025 iv = ivp;
1026 }
1027 ctx->num = nbytes;
1028 if (cdata->cword.b.encdec) {
1029 cdata->cword.b.encdec = 0;
1030 padlock_reload_key();
1031 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1032 cdata->cword.b.encdec = 1;
1033 padlock_reload_key();
1034 while (nbytes) {
1035 unsigned char c = *(in_arg++);
1036 *(out_arg++) = c ^ *ivp;
1037 *(ivp++) = c, nbytes--;
1038 }
1039 } else {
1040 padlock_reload_key();
1041 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1042 padlock_reload_key();
1043 while (nbytes) {
1044 *ivp = *(out_arg++) = *(in_arg++) ^ *ivp;
1045 ivp++, nbytes--;
1046 }
1047 }
1048 }
1049
1050 memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
1051 break;
1052
1053 case EVP_CIPH_OFB_MODE:
1054 memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE);
1055 chunk &= ~(AES_BLOCK_SIZE - 1);
1056 if (chunk)
1057 do {
1058 if (inp_misaligned)
1059 inp = padlock_memcpy(out, in_arg, chunk);
1060 else
1061 inp = in_arg;
1062 in_arg += chunk;
1063
1064 padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp);
1065
1066 if (out_misaligned)
1067 out_arg = padlock_memcpy(out_arg, out, chunk) + chunk;
1068 else
1069 out = out_arg += chunk;
1070
1071 nbytes -= chunk;
1072 chunk = PADLOCK_CHUNK;
1073 } while (nbytes >= AES_BLOCK_SIZE);
1074
1075 if (nbytes) {
1076 unsigned char *ivp = cdata->iv;
1077
1078 ctx->num = nbytes;
1079 padlock_reload_key(); /* empirically found */
1080 padlock_xcrypt_ecb(1, cdata, ivp, ivp);
1081 padlock_reload_key(); /* empirically found */
1082 while (nbytes) {
1083 *(out_arg++) = *(in_arg++) ^ *ivp;
1084 ivp++, nbytes--;
1085 }
1086 }
1087
1088 memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE);
1089 break;
1090
1091 default:
1092 return 0;
1093 }
1094
1095 /* Clean the realign buffer if it was used */
1096 if (out_misaligned) {
1097 volatile unsigned long *p = (void *)out;
1098 size_t n = allocated / sizeof(*p);
1099 while (n--)
1100 *p++ = 0;
1101 }
1102
1103 memset(cdata->iv, 0, AES_BLOCK_SIZE);
1104
1105 return 1;
5b172463
AP
1106}
1107
40720ce3 1108# endif /* OPENSSL_NO_AES */
e00b165e 1109
5b172463
AP
1110/* ===== Random Number Generator ===== */
1111/*
1112 * This code is not engaged. The reason is that it does not comply
1113 * with recommendations for VIA RNG usage for secure applications
1114 * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it
1115 * provide meaningful error control...
1116 */
40720ce3
MC
1117/*
1118 * Wrapper that provides an interface between the API and the raw PadLock
1119 * RNG
1120 */
1121static int padlock_rand_bytes(unsigned char *output, int count)
5b172463 1122{
40720ce3
MC
1123 unsigned int eax, buf;
1124
1125 while (count >= 8) {
1126 eax = padlock_xstore(output, 0);
1127 if (!(eax & (1 << 6)))
1128 return 0; /* RNG disabled */
1129 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1130 if (eax & (0x1F << 10))
1131 return 0;
1132 if ((eax & 0x1F) == 0)
1133 continue; /* no data, retry... */
1134 if ((eax & 0x1F) != 8)
1135 return 0; /* fatal failure... */
1136 output += 8;
1137 count -= 8;
1138 }
1139 while (count > 0) {
1140 eax = padlock_xstore(&buf, 3);
1141 if (!(eax & (1 << 6)))
1142 return 0; /* RNG disabled */
1143 /* this ---vv--- covers DC bias, Raw Bits and String Filter */
1144 if (eax & (0x1F << 10))
1145 return 0;
1146 if ((eax & 0x1F) == 0)
1147 continue; /* no data, retry... */
1148 if ((eax & 0x1F) != 1)
1149 return 0; /* fatal failure... */
1150 *output++ = (unsigned char)buf;
1151 count--;
1152 }
1153 *(volatile unsigned int *)&buf = 0;
1154
1155 return 1;
5b172463
AP
1156}
1157
1158/* Dummy but necessary function */
40720ce3 1159static int padlock_rand_status(void)
5b172463 1160{
40720ce3 1161 return 1;
5b172463
AP
1162}
1163
1164/* Prepare structure for registration */
1165static RAND_METHOD padlock_rand = {
40720ce3
MC
1166 NULL, /* seed */
1167 padlock_rand_bytes, /* bytes */
1168 NULL, /* cleanup */
1169 NULL, /* add */
1170 padlock_rand_bytes, /* pseudorand */
1171 padlock_rand_status, /* rand status */
5b172463
AP
1172};
1173
40720ce3 1174# endif /* COMPILE_HW_PADLOCK */
5b172463 1175
40720ce3
MC
1176# endif /* !OPENSSL_NO_HW_PADLOCK */
1177#endif /* !OPENSSL_NO_HW */