]>
Commit | Line | Data |
---|---|---|
b558c8d5 | 1 | /*- |
5b172463 AP |
2 | * Support for VIA PadLock Advanced Cryptography Engine (ACE) |
3 | * Written by Michal Ludvig <michal@logix.cz> | |
4 | * http://www.logix.cz/michal | |
5 | * | |
40720ce3 MC |
6 | * Big thanks to Andy Polyakov for a help with optimization, |
7 | * assembler fixes, port to MS Windows and a lot of other | |
5b172463 AP |
8 | * valuable work on this engine! |
9 | */ | |
10 | ||
11 | /* ==================================================================== | |
12 | * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. | |
13 | * | |
14 | * Redistribution and use in source and binary forms, with or without | |
15 | * modification, are permitted provided that the following conditions | |
16 | * are met: | |
17 | * | |
18 | * 1. Redistributions of source code must retain the above copyright | |
19 | * notice, this list of conditions and the following disclaimer. | |
20 | * | |
21 | * 2. Redistributions in binary form must reproduce the above copyright | |
22 | * notice, this list of conditions and the following disclaimer in | |
23 | * the documentation and/or other materials provided with the | |
24 | * distribution. | |
25 | * | |
26 | * 3. All advertising materials mentioning features or use of this | |
27 | * software must display the following acknowledgment: | |
28 | * "This product includes software developed by the OpenSSL Project | |
29 | * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
30 | * | |
31 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
32 | * endorse or promote products derived from this software without | |
33 | * prior written permission. For written permission, please contact | |
34 | * licensing@OpenSSL.org. | |
35 | * | |
36 | * 5. Products derived from this software may not be called "OpenSSL" | |
37 | * nor may "OpenSSL" appear in their names without prior written | |
38 | * permission of the OpenSSL Project. | |
39 | * | |
40 | * 6. Redistributions of any form whatsoever must retain the following | |
41 | * acknowledgment: | |
42 | * "This product includes software developed by the OpenSSL Project | |
43 | * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
44 | * | |
45 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
46 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
47 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
48 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
49 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
50 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
51 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
52 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
53 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
54 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
55 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
56 | * OF THE POSSIBILITY OF SUCH DAMAGE. | |
57 | * ==================================================================== | |
58 | * | |
59 | * This product includes cryptographic software written by Eric Young | |
60 | * (eay@cryptsoft.com). This product includes software written by Tim | |
61 | * Hudson (tjh@cryptsoft.com). | |
62 | * | |
63 | */ | |
64 | ||
5b172463 AP |
65 | #include <stdio.h> |
66 | #include <string.h> | |
67 | ||
e00b165e | 68 | #include <openssl/opensslconf.h> |
5b172463 AP |
69 | #include <openssl/crypto.h> |
70 | #include <openssl/dso.h> | |
71 | #include <openssl/engine.h> | |
72 | #include <openssl/evp.h> | |
e00b165e | 73 | #ifndef OPENSSL_NO_AES |
40720ce3 | 74 | # include <openssl/aes.h> |
e00b165e | 75 | #endif |
c7439661 | 76 | #include <openssl/rand.h> |
4913b88f | 77 | #include <openssl/err.h> |
5b172463 AP |
78 | |
79 | #ifndef OPENSSL_NO_HW | |
40720ce3 | 80 | # ifndef OPENSSL_NO_HW_PADLOCK |
5b172463 AP |
81 | |
82 | /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ | |
40720ce3 MC |
83 | # if (OPENSSL_VERSION_NUMBER >= 0x00908000L) |
84 | # ifndef OPENSSL_NO_DYNAMIC_ENGINE | |
5b172463 | 85 | # define DYNAMIC_ENGINE |
40720ce3 MC |
86 | # endif |
87 | # elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) | |
88 | # ifdef ENGINE_DYNAMIC_SUPPORT | |
5b172463 | 89 | # define DYNAMIC_ENGINE |
40720ce3 MC |
90 | # endif |
91 | # else | |
92 | # error "Only OpenSSL >= 0.9.7 is supported" | |
5b172463 | 93 | # endif |
5b172463 | 94 | |
40720ce3 MC |
95 | /* |
96 | * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it | |
97 | * doesn't exist elsewhere, but it even can't be compiled on other platforms! | |
98 | * | |
99 | * In addition, because of the heavy use of inline assembler, compiler choice | |
100 | * is limited to GCC and Microsoft C. | |
101 | */ | |
102 | # undef COMPILE_HW_PADLOCK | |
103 | # if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) | |
104 | # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ | |
f8fa22d8 | 105 | (defined(_MSC_VER) && defined(_M_IX86)) |
40720ce3 MC |
106 | # define COMPILE_HW_PADLOCK |
107 | static ENGINE *ENGINE_padlock(void); | |
108 | # endif | |
109 | # endif | |
5b172463 | 110 | |
40720ce3 | 111 | void ENGINE_load_padlock(void) |
5b172463 AP |
112 | { |
113 | /* On non-x86 CPUs it just returns. */ | |
40720ce3 MC |
114 | # ifdef COMPILE_HW_PADLOCK |
115 | ENGINE *toadd = ENGINE_padlock(); | |
116 | if (!toadd) | |
117 | return; | |
118 | ENGINE_add(toadd); | |
119 | ENGINE_free(toadd); | |
120 | ERR_clear_error(); | |
121 | # endif | |
5b172463 AP |
122 | } |
123 | ||
40720ce3 MC |
124 | # ifdef COMPILE_HW_PADLOCK |
125 | /* | |
126 | * We do these includes here to avoid header problems on platforms that do | |
127 | * not have the VIA padlock anyway... | |
128 | */ | |
129 | # ifdef _MSC_VER | |
130 | # include <malloc.h> | |
131 | # define alloca _alloca | |
132 | # elif defined(NETWARE_CLIB) && defined(__GNUC__) | |
133 | void *alloca(size_t); | |
134 | # define alloca(s) __builtin_alloca(s) | |
135 | # else | |
136 | # include <stdlib.h> | |
137 | # endif | |
c38ff58b | 138 | |
5b172463 AP |
139 | /* Function for ENGINE detection and control */ |
140 | static int padlock_available(void); | |
141 | static int padlock_init(ENGINE *e); | |
142 | ||
143 | /* RNG Stuff */ | |
144 | static RAND_METHOD padlock_rand; | |
145 | ||
146 | /* Cipher Stuff */ | |
40720ce3 MC |
147 | # ifndef OPENSSL_NO_AES |
148 | static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, | |
149 | const int **nids, int nid); | |
150 | # endif | |
5b172463 AP |
151 | |
152 | /* Engine names */ | |
153 | static const char *padlock_id = "padlock"; | |
154 | static char padlock_name[100]; | |
155 | ||
156 | /* Available features */ | |
40720ce3 MC |
157 | static int padlock_use_ace = 0; /* Advanced Cryptography Engine */ |
158 | static int padlock_use_rng = 0; /* Random Number Generator */ | |
159 | # ifndef OPENSSL_NO_AES | |
5b172463 | 160 | static int padlock_aes_align_required = 1; |
40720ce3 | 161 | # endif |
5b172463 AP |
162 | |
163 | /* ===== Engine "management" functions ===== */ | |
164 | ||
165 | /* Prepare the ENGINE structure for registration */ | |
40720ce3 | 166 | static int padlock_bind_helper(ENGINE *e) |
5b172463 | 167 | { |
40720ce3 MC |
168 | /* Check available features */ |
169 | padlock_available(); | |
170 | ||
171 | # if 1 /* disable RNG for now, see commentary in | |
172 | * vicinity of RNG code */ | |
173 | padlock_use_rng = 0; | |
174 | # endif | |
175 | ||
176 | /* Generate a nice engine name with available features */ | |
177 | BIO_snprintf(padlock_name, sizeof(padlock_name), | |
178 | "VIA PadLock (%s, %s)", | |
179 | padlock_use_rng ? "RNG" : "no-RNG", | |
180 | padlock_use_ace ? "ACE" : "no-ACE"); | |
181 | ||
182 | /* Register everything or return with an error */ | |
183 | if (!ENGINE_set_id(e, padlock_id) || | |
184 | !ENGINE_set_name(e, padlock_name) || | |
185 | !ENGINE_set_init_function(e, padlock_init) || | |
186 | # ifndef OPENSSL_NO_AES | |
187 | (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) || | |
188 | # endif | |
189 | (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) { | |
190 | return 0; | |
191 | } | |
192 | ||
193 | /* Everything looks good */ | |
194 | return 1; | |
5b172463 AP |
195 | } |
196 | ||
197 | /* Constructor */ | |
40720ce3 | 198 | static ENGINE *ENGINE_padlock(void) |
5b172463 | 199 | { |
40720ce3 | 200 | ENGINE *eng = ENGINE_new(); |
5b172463 | 201 | |
40720ce3 MC |
202 | if (!eng) { |
203 | return NULL; | |
204 | } | |
5b172463 | 205 | |
40720ce3 MC |
206 | if (!padlock_bind_helper(eng)) { |
207 | ENGINE_free(eng); | |
208 | return NULL; | |
209 | } | |
5b172463 | 210 | |
40720ce3 | 211 | return eng; |
5b172463 AP |
212 | } |
213 | ||
214 | /* Check availability of the engine */ | |
40720ce3 | 215 | static int padlock_init(ENGINE *e) |
5b172463 | 216 | { |
40720ce3 | 217 | return (padlock_use_rng || padlock_use_ace); |
5b172463 AP |
218 | } |
219 | ||
40720ce3 MC |
220 | /* |
221 | * This stuff is needed if this ENGINE is being compiled into a | |
222 | * self-contained shared-library. | |
5b172463 | 223 | */ |
40720ce3 MC |
224 | # ifdef DYNAMIC_ENGINE |
225 | static int padlock_bind_fn(ENGINE *e, const char *id) | |
5b172463 | 226 | { |
40720ce3 MC |
227 | if (id && (strcmp(id, padlock_id) != 0)) { |
228 | return 0; | |
229 | } | |
5b172463 | 230 | |
40720ce3 MC |
231 | if (!padlock_bind_helper(e)) { |
232 | return 0; | |
233 | } | |
5b172463 | 234 | |
40720ce3 | 235 | return 1; |
5b172463 AP |
236 | } |
237 | ||
40720ce3 MC |
238 | IMPLEMENT_DYNAMIC_CHECK_FN() |
239 | IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn) | |
240 | # endif /* DYNAMIC_ENGINE */ | |
5b172463 | 241 | /* ===== Here comes the "real" engine ===== */ |
40720ce3 | 242 | # ifndef OPENSSL_NO_AES |
5b172463 | 243 | /* Some AES-related constants */ |
40720ce3 MC |
244 | # define AES_BLOCK_SIZE 16 |
245 | # define AES_KEY_SIZE_128 16 | |
246 | # define AES_KEY_SIZE_192 24 | |
247 | # define AES_KEY_SIZE_256 32 | |
248 | /* | |
249 | * Here we store the status information relevant to the current context. | |
250 | */ | |
251 | /* | |
252 | * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on | |
253 | * the order of items in this structure. Don't blindly modify, reorder, | |
254 | * etc! | |
255 | */ | |
256 | struct padlock_cipher_data { | |
257 | unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ | |
258 | union { | |
259 | unsigned int pad[4]; | |
260 | struct { | |
261 | int rounds:4; | |
262 | int dgst:1; /* n/a in C3 */ | |
263 | int align:1; /* n/a in C3 */ | |
264 | int ciphr:1; /* n/a in C3 */ | |
265 | unsigned int keygen:1; | |
266 | int interm:1; | |
267 | unsigned int encdec:1; | |
268 | int ksize:2; | |
269 | } b; | |
270 | } cword; /* Control word */ | |
271 | AES_KEY ks; /* Encryption key */ | |
5b172463 AP |
272 | }; |
273 | ||
b88606c2 AP |
274 | /* |
275 | * Essentially this variable belongs in thread local storage. | |
276 | * Having this variable global on the other hand can only cause | |
277 | * few bogus key reloads [if any at all on single-CPU system], | |
278 | * so we accept the penatly... | |
279 | */ | |
280 | static volatile struct padlock_cipher_data *padlock_saved_context; | |
40720ce3 | 281 | # endif |
b88606c2 | 282 | |
3e8042c3 | 283 | /*- |
5b172463 AP |
284 | * ======================================================= |
285 | * Inline assembler section(s). | |
286 | * ======================================================= | |
287 | * Order of arguments is chosen to facilitate Windows port | |
288 | * using __fastcall calling convention. If you wish to add | |
b88606c2 | 289 | * more routines, keep in mind that first __fastcall |
5b172463 AP |
290 | * argument is passed in %ecx and second - in %edx. |
291 | * ======================================================= | |
292 | */ | |
40720ce3 | 293 | # if defined(__GNUC__) && __GNUC__>=2 |
5b172463 AP |
294 | /* |
295 | * As for excessive "push %ebx"/"pop %ebx" found all over. | |
296 | * When generating position-independent code GCC won't let | |
297 | * us use "b" in assembler templates nor even respect "ebx" | |
298 | * in "clobber description." Therefore the trouble... | |
299 | */ | |
300 | ||
40720ce3 MC |
301 | /* |
302 | * Helper function - check if a CPUID instruction is available on this CPU | |
303 | */ | |
304 | static int padlock_insn_cpuid_available(void) | |
5b172463 | 305 | { |
40720ce3 MC |
306 | int result = -1; |
307 | ||
308 | /* | |
309 | * We're checking if the bit #21 of EFLAGS can be toggled. If yes = | |
310 | * CPUID is available. | |
311 | */ | |
312 | asm volatile ("pushf\n" | |
313 | "popl %%eax\n" | |
314 | "xorl $0x200000, %%eax\n" | |
315 | "movl %%eax, %%ecx\n" | |
316 | "andl $0x200000, %%ecx\n" | |
317 | "pushl %%eax\n" | |
318 | "popf\n" | |
319 | "pushf\n" | |
320 | "popl %%eax\n" | |
321 | "andl $0x200000, %%eax\n" | |
322 | "xorl %%eax, %%ecx\n" | |
323 | "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx"); | |
324 | ||
325 | return (result == 0); | |
5b172463 AP |
326 | } |
327 | ||
40720ce3 MC |
328 | /* |
329 | * Load supported features of the CPU to see if the PadLock is available. | |
330 | */ | |
331 | static int padlock_available(void) | |
5b172463 | 332 | { |
40720ce3 MC |
333 | char vendor_string[16]; |
334 | unsigned int eax, edx; | |
335 | ||
336 | /* First check if the CPUID instruction is available at all... */ | |
337 | if (!padlock_insn_cpuid_available()) | |
338 | return 0; | |
339 | ||
340 | /* Are we running on the Centaur (VIA) CPU? */ | |
341 | eax = 0x00000000; | |
342 | vendor_string[12] = 0; | |
343 | asm volatile ("pushl %%ebx\n" | |
344 | "cpuid\n" | |
345 | "movl %%ebx,(%%edi)\n" | |
346 | "movl %%edx,4(%%edi)\n" | |
347 | "movl %%ecx,8(%%edi)\n" | |
348 | "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx"); | |
349 | if (strcmp(vendor_string, "CentaurHauls") != 0) | |
350 | return 0; | |
351 | ||
352 | /* Check for Centaur Extended Feature Flags presence */ | |
353 | eax = 0xC0000000; | |
354 | asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx"); | |
355 | if (eax < 0xC0000001) | |
356 | return 0; | |
357 | ||
358 | /* Read the Centaur Extended Feature Flags */ | |
359 | eax = 0xC0000001; | |
360 | asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax), | |
361 | "=d"(edx)::"ecx"); | |
362 | ||
363 | /* Fill up some flags */ | |
364 | padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6)); | |
365 | padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2)); | |
366 | ||
367 | return padlock_use_ace + padlock_use_rng; | |
5b172463 AP |
368 | } |
369 | ||
40720ce3 | 370 | # ifndef OPENSSL_NO_AES |
5b172463 | 371 | /* Our own htonl()/ntohl() */ |
40720ce3 | 372 | static inline void padlock_bswapl(AES_KEY *ks) |
5b172463 | 373 | { |
40720ce3 MC |
374 | size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]); |
375 | unsigned int *key = ks->rd_key; | |
5b172463 | 376 | |
40720ce3 MC |
377 | while (i--) { |
378 | asm volatile ("bswapl %0":"+r" (*key)); | |
379 | key++; | |
380 | } | |
5b172463 | 381 | } |
40720ce3 | 382 | # endif |
5b172463 | 383 | |
40720ce3 MC |
384 | /* |
385 | * Force key reload from memory to the CPU microcode. Loading EFLAGS from the | |
386 | * stack clears EFLAGS[30] which does the trick. | |
387 | */ | |
388 | static inline void padlock_reload_key(void) | |
5b172463 | 389 | { |
40720ce3 | 390 | asm volatile ("pushfl; popfl"); |
5b172463 AP |
391 | } |
392 | ||
40720ce3 | 393 | # ifndef OPENSSL_NO_AES |
5b172463 AP |
394 | /* |
395 | * This is heuristic key context tracing. At first one | |
396 | * believes that one should use atomic swap instructions, | |
397 | * but it's not actually necessary. Point is that if | |
b88606c2 AP |
398 | * padlock_saved_context was changed by another thread |
399 | * after we've read it and before we compare it with cdata, | |
400 | * our key *shall* be reloaded upon thread context switch | |
401 | * and we are therefore set in either case... | |
5b172463 | 402 | */ |
40720ce3 | 403 | static inline void padlock_verify_context(struct padlock_cipher_data *cdata) |
5b172463 | 404 | { |
40720ce3 MC |
405 | asm volatile ("pushfl\n" |
406 | " btl $30,(%%esp)\n" | |
407 | " jnc 1f\n" | |
408 | " cmpl %2,%1\n" | |
409 | " je 1f\n" | |
410 | " popfl\n" | |
411 | " subl $4,%%esp\n" | |
412 | "1: addl $4,%%esp\n" | |
413 | " movl %2,%0":"+m" (padlock_saved_context) | |
414 | :"r"(padlock_saved_context), "r"(cdata):"cc"); | |
5b172463 AP |
415 | } |
416 | ||
417 | /* Template for padlock_xcrypt_* modes */ | |
40720ce3 MC |
418 | /* |
419 | * BIG FAT WARNING: The offsets used with 'leal' instructions describe items | |
420 | * of the 'padlock_cipher_data' structure. | |
5b172463 | 421 | */ |
40720ce3 MC |
422 | # define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ |
423 | static inline void *name(size_t cnt, \ | |
424 | struct padlock_cipher_data *cdata, \ | |
425 | void *out, const void *inp) \ | |
426 | { void *iv; \ | |
427 | asm volatile ( "pushl %%ebx\n" \ | |
428 | " leal 16(%0),%%edx\n" \ | |
429 | " leal 32(%0),%%ebx\n" \ | |
430 | rep_xcrypt "\n" \ | |
431 | " popl %%ebx" \ | |
432 | : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ | |
433 | : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ | |
434 | : "edx", "cc", "memory"); \ | |
435 | return iv; \ | |
5b172463 AP |
436 | } |
437 | ||
438 | /* Generate all functions with appropriate opcodes */ | |
d26667b2 MC |
439 | /* rep xcryptecb */ |
440 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") | |
441 | /* rep xcryptcbc */ | |
40720ce3 | 442 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") |
d26667b2 | 443 | /* rep xcryptcfb */ |
40720ce3 | 444 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") |
d26667b2 | 445 | /* rep xcryptofb */ |
40720ce3 MC |
446 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") |
447 | # endif | |
5b172463 | 448 | /* The RNG call itself */ |
40720ce3 | 449 | static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in) |
5b172463 | 450 | { |
40720ce3 | 451 | unsigned int eax_out; |
5b172463 | 452 | |
40720ce3 MC |
453 | asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ |
454 | :"=a" (eax_out), "=m"(*(unsigned *)addr) | |
455 | :"D"(addr), "d"(edx_in) | |
456 | ); | |
5b172463 | 457 | |
40720ce3 | 458 | return eax_out; |
5b172463 AP |
459 | } |
460 | ||
40720ce3 MC |
461 | /* |
462 | * Why not inline 'rep movsd'? I failed to find information on what value in | |
463 | * Direction Flag one can expect and consequently have to apply | |
464 | * "better-safe-than-sorry" approach and assume "undefined." I could | |
465 | * explicitly clear it and restore the original value upon return from | |
466 | * padlock_aes_cipher, but it's presumably too much trouble for too little | |
467 | * gain... In case you wonder 'rep xcrypt*' instructions above are *not* | |
468 | * affected by the Direction Flag and pointers advance toward larger | |
469 | * addresses unconditionally. | |
470 | */ | |
471 | static inline unsigned char *padlock_memcpy(void *dst, const void *src, | |
472 | size_t n) | |
52697590 | 473 | { |
40720ce3 MC |
474 | long *d = dst; |
475 | const long *s = src; | |
52697590 | 476 | |
40720ce3 MC |
477 | n /= sizeof(*d); |
478 | do { | |
479 | *d++ = *s++; | |
480 | } while (--n); | |
52697590 | 481 | |
40720ce3 | 482 | return dst; |
52697590 AP |
483 | } |
484 | ||
40720ce3 | 485 | # elif defined(_MSC_VER) |
5b172463 AP |
486 | /* |
487 | * Unlike GCC these are real functions. In order to minimize impact | |
488 | * on performance we adhere to __fastcall calling convention in | |
489 | * order to get two first arguments passed through %ecx and %edx. | |
490 | * Which kind of suits very well, as instructions in question use | |
491 | * both %ecx and %edx as input:-) | |
492 | */ | |
40720ce3 MC |
493 | # define REP_XCRYPT(code) \ |
494 | _asm _emit 0xf3 \ | |
495 | _asm _emit 0x0f _asm _emit 0xa7 \ | |
496 | _asm _emit code | |
5b172463 | 497 | |
40720ce3 MC |
498 | /* |
499 | * BIG FAT WARNING: The offsets used with 'lea' instructions describe items | |
500 | * of the 'padlock_cipher_data' structure. | |
501 | */ | |
502 | # define PADLOCK_XCRYPT_ASM(name,code) \ | |
503 | static void * __fastcall \ | |
504 | name (size_t cnt, void *cdata, \ | |
505 | void *outp, const void *inp) \ | |
506 | { _asm mov eax,edx \ | |
507 | _asm lea edx,[eax+16] \ | |
508 | _asm lea ebx,[eax+32] \ | |
509 | _asm mov edi,outp \ | |
510 | _asm mov esi,inp \ | |
511 | REP_XCRYPT(code) \ | |
5b172463 AP |
512 | } |
513 | ||
40720ce3 MC |
514 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, 0xc8) |
515 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, 0xd0) | |
516 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, 0xe0) | |
517 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, 0xe8) | |
5b172463 | 518 | |
40720ce3 MC |
519 | static int __fastcall padlock_xstore(void *outp, unsigned int code) |
520 | { | |
521 | _asm mov edi, ecx | |
522 | _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0} | |
523 | static void __fastcall padlock_reload_key(void) | |
524 | { | |
525 | _asm pushfd _asm popfd} | |
526 | static void __fastcall padlock_verify_context(void *cdata) | |
527 | { | |
528 | _asm { | |
529 | pushfd bt DWORD PTR[esp], 30 jnc skip cmp ecx, | |
530 | padlock_saved_context je skip popfd sub esp, | |
531 | 4 skip:add esp, 4 mov padlock_saved_context, | |
532 | ecx}} static int padlock_available(void) | |
533 | { | |
534 | _asm { | |
535 | pushfd pop eax mov ecx, eax xor eax, | |
536 | 1 << 21 push eax popfd pushfd pop eax xor eax, ecx bt eax, | |
537 | 21 jnc noluck mov eax, 0 cpuid xor eax, eax cmp ebx, | |
538 | 'tneC' jne noluck cmp edx, 'Hrua' jne noluck cmp ecx, | |
539 | 'slua' jne noluck mov eax, 0xC0000000 cpuid mov edx, | |
540 | eax xor eax, eax cmp edx, 0xC0000001 jb noluck mov eax, | |
541 | 0xC0000001 cpuid xor eax, eax bt edx, 6 jnc skip_a bt edx, | |
542 | 7 jnc skip_a mov padlock_use_ace, 1 inc eax skip_a:bt edx, | |
543 | 2 jnc skip_r bt edx, 3 jnc skip_r mov padlock_use_rng, | |
544 | 1 inc eax skip_r:noluck:}} static void __fastcall | |
5b172463 | 545 | padlock_bswapl(void *key) |
40720ce3 MC |
546 | { |
547 | _asm { | |
548 | pushfd cld mov esi, ecx mov edi, ecx mov ecx, 60 up:lodsd | |
549 | bswap eax stosd loop up popfd}} | |
550 | /* | |
551 | * MS actually specifies status of Direction Flag and compiler even manages | |
552 | * to compile following as 'rep movsd' all by itself... | |
52697590 | 553 | */ |
40720ce3 MC |
554 | # define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) |
555 | # endif | |
5b172463 | 556 | /* ===== AES encryption/decryption ===== */ |
40720ce3 MC |
557 | # ifndef OPENSSL_NO_AES |
558 | # if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) | |
559 | # define NID_aes_128_cfb NID_aes_128_cfb128 | |
560 | # endif | |
561 | # if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) | |
562 | # define NID_aes_128_ofb NID_aes_128_ofb128 | |
563 | # endif | |
564 | # if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) | |
565 | # define NID_aes_192_cfb NID_aes_192_cfb128 | |
566 | # endif | |
567 | # if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) | |
568 | # define NID_aes_192_ofb NID_aes_192_ofb128 | |
569 | # endif | |
570 | # if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) | |
571 | # define NID_aes_256_cfb NID_aes_256_cfb128 | |
572 | # endif | |
573 | # if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) | |
574 | # define NID_aes_256_ofb NID_aes_256_ofb128 | |
575 | # endif | |
576 | /* List of supported ciphers. */ static int padlock_cipher_nids[] = { | |
577 | NID_aes_128_ecb, | |
578 | NID_aes_128_cbc, | |
579 | NID_aes_128_cfb, | |
580 | NID_aes_128_ofb, | |
581 | ||
582 | NID_aes_192_ecb, | |
583 | NID_aes_192_cbc, | |
584 | NID_aes_192_cfb, | |
585 | NID_aes_192_ofb, | |
586 | ||
587 | NID_aes_256_ecb, | |
588 | NID_aes_256_cbc, | |
589 | NID_aes_256_cfb, | |
590 | NID_aes_256_ofb, | |
5b172463 | 591 | }; |
40720ce3 MC |
592 | |
593 | static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) / | |
594 | sizeof(padlock_cipher_nids[0])); | |
5b172463 AP |
595 | |
596 | /* Function prototypes ... */ | |
597 | static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, | |
40720ce3 | 598 | const unsigned char *iv, int enc); |
5b172463 | 599 | static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
40720ce3 MC |
600 | const unsigned char *in, size_t nbytes); |
601 | ||
602 | # define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ | |
603 | ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) | |
604 | # define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ | |
605 | NEAREST_ALIGNED(ctx->cipher_data)) | |
606 | ||
607 | # define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE | |
608 | # define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE | |
609 | # define EVP_CIPHER_block_size_OFB 1 | |
610 | # define EVP_CIPHER_block_size_CFB 1 | |
611 | ||
612 | /* | |
613 | * Declaring so many ciphers by hand would be a pain. Instead introduce a bit | |
614 | * of preprocessor magic :-) | |
615 | */ | |
616 | # define DECLARE_AES_EVP(ksize,lmode,umode) \ | |
617 | static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ | |
618 | NID_aes_##ksize##_##lmode, \ | |
619 | EVP_CIPHER_block_size_##umode, \ | |
620 | AES_KEY_SIZE_##ksize, \ | |
621 | AES_BLOCK_SIZE, \ | |
622 | 0 | EVP_CIPH_##umode##_MODE, \ | |
623 | padlock_aes_init_key, \ | |
624 | padlock_aes_cipher, \ | |
625 | NULL, \ | |
626 | sizeof(struct padlock_cipher_data) + 16, \ | |
627 | EVP_CIPHER_set_asn1_iv, \ | |
628 | EVP_CIPHER_get_asn1_iv, \ | |
629 | NULL, \ | |
630 | NULL \ | |
5b172463 AP |
631 | } |
632 | ||
40720ce3 MC |
633 | DECLARE_AES_EVP(128, ecb, ECB); |
634 | DECLARE_AES_EVP(128, cbc, CBC); | |
635 | DECLARE_AES_EVP(128, cfb, CFB); | |
636 | DECLARE_AES_EVP(128, ofb, OFB); | |
5b172463 | 637 | |
40720ce3 MC |
638 | DECLARE_AES_EVP(192, ecb, ECB); |
639 | DECLARE_AES_EVP(192, cbc, CBC); | |
640 | DECLARE_AES_EVP(192, cfb, CFB); | |
641 | DECLARE_AES_EVP(192, ofb, OFB); | |
5b172463 | 642 | |
40720ce3 MC |
643 | DECLARE_AES_EVP(256, ecb, ECB); |
644 | DECLARE_AES_EVP(256, cbc, CBC); | |
645 | DECLARE_AES_EVP(256, cfb, CFB); | |
646 | DECLARE_AES_EVP(256, ofb, OFB); | |
5b172463 AP |
647 | |
648 | static int | |
40720ce3 MC |
649 | padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, |
650 | int nid) | |
5b172463 | 651 | { |
40720ce3 MC |
652 | /* No specific cipher => return a list of supported nids ... */ |
653 | if (!cipher) { | |
654 | *nids = padlock_cipher_nids; | |
655 | return padlock_cipher_nids_num; | |
656 | } | |
657 | ||
658 | /* ... or the requested "cipher" otherwise */ | |
659 | switch (nid) { | |
660 | case NID_aes_128_ecb: | |
661 | *cipher = &padlock_aes_128_ecb; | |
662 | break; | |
663 | case NID_aes_128_cbc: | |
664 | *cipher = &padlock_aes_128_cbc; | |
665 | break; | |
666 | case NID_aes_128_cfb: | |
667 | *cipher = &padlock_aes_128_cfb; | |
668 | break; | |
669 | case NID_aes_128_ofb: | |
670 | *cipher = &padlock_aes_128_ofb; | |
671 | break; | |
672 | ||
673 | case NID_aes_192_ecb: | |
674 | *cipher = &padlock_aes_192_ecb; | |
675 | break; | |
676 | case NID_aes_192_cbc: | |
677 | *cipher = &padlock_aes_192_cbc; | |
678 | break; | |
679 | case NID_aes_192_cfb: | |
680 | *cipher = &padlock_aes_192_cfb; | |
681 | break; | |
682 | case NID_aes_192_ofb: | |
683 | *cipher = &padlock_aes_192_ofb; | |
684 | break; | |
685 | ||
686 | case NID_aes_256_ecb: | |
687 | *cipher = &padlock_aes_256_ecb; | |
688 | break; | |
689 | case NID_aes_256_cbc: | |
690 | *cipher = &padlock_aes_256_cbc; | |
691 | break; | |
692 | case NID_aes_256_cfb: | |
693 | *cipher = &padlock_aes_256_cfb; | |
694 | break; | |
695 | case NID_aes_256_ofb: | |
696 | *cipher = &padlock_aes_256_ofb; | |
697 | break; | |
698 | ||
699 | default: | |
700 | /* Sorry, we don't support this NID */ | |
701 | *cipher = NULL; | |
702 | return 0; | |
703 | } | |
704 | ||
705 | return 1; | |
5b172463 AP |
706 | } |
707 | ||
708 | /* Prepare the encryption key for PadLock usage */ | |
709 | static int | |
40720ce3 MC |
710 | padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
711 | const unsigned char *iv, int enc) | |
5b172463 | 712 | { |
40720ce3 MC |
713 | struct padlock_cipher_data *cdata; |
714 | int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; | |
715 | ||
716 | if (key == NULL) | |
717 | return 0; /* ERROR */ | |
718 | ||
719 | cdata = ALIGNED_CIPHER_DATA(ctx); | |
720 | memset(cdata, 0, sizeof(struct padlock_cipher_data)); | |
721 | ||
722 | /* Prepare Control word. */ | |
723 | if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) | |
724 | cdata->cword.b.encdec = 0; | |
725 | else | |
726 | cdata->cword.b.encdec = (ctx->encrypt == 0); | |
727 | cdata->cword.b.rounds = 10 + (key_len - 128) / 32; | |
728 | cdata->cword.b.ksize = (key_len - 128) / 64; | |
729 | ||
730 | switch (key_len) { | |
731 | case 128: | |
732 | /* | |
733 | * PadLock can generate an extended key for AES128 in hardware | |
734 | */ | |
735 | memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); | |
736 | cdata->cword.b.keygen = 0; | |
737 | break; | |
738 | ||
739 | case 192: | |
740 | case 256: | |
741 | /* | |
742 | * Generate an extended AES key in software. Needed for AES192/AES256 | |
743 | */ | |
744 | /* | |
745 | * Well, the above applies to Stepping 8 CPUs and is listed as | |
746 | * hardware errata. They most likely will fix it at some point and | |
747 | * then a check for stepping would be due here. | |
748 | */ | |
749 | if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || | |
750 | EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc) | |
751 | AES_set_encrypt_key(key, key_len, &cdata->ks); | |
752 | else | |
753 | AES_set_decrypt_key(key, key_len, &cdata->ks); | |
754 | # ifndef AES_ASM | |
755 | /* | |
756 | * OpenSSL C functions use byte-swapped extended key. | |
757 | */ | |
758 | padlock_bswapl(&cdata->ks); | |
759 | # endif | |
760 | cdata->cword.b.keygen = 1; | |
761 | break; | |
762 | ||
763 | default: | |
764 | /* ERROR */ | |
765 | return 0; | |
766 | } | |
767 | ||
768 | /* | |
769 | * This is done to cover for cases when user reuses the | |
770 | * context for new key. The catch is that if we don't do | |
771 | * this, padlock_eas_cipher might proceed with old key... | |
772 | */ | |
773 | padlock_reload_key(); | |
774 | ||
775 | return 1; | |
5b172463 AP |
776 | } |
777 | ||
40720ce3 | 778 | /*- |
5b172463 AP |
779 | * Simplified version of padlock_aes_cipher() used when |
780 | * 1) both input and output buffers are at aligned addresses. | |
781 | * or when | |
782 | * 2) running on a newer CPU that doesn't require aligned buffers. | |
783 | */ | |
784 | static int | |
785 | padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, | |
40720ce3 | 786 | const unsigned char *in_arg, size_t nbytes) |
5b172463 | 787 | { |
40720ce3 MC |
788 | struct padlock_cipher_data *cdata; |
789 | void *iv; | |
790 | ||
791 | cdata = ALIGNED_CIPHER_DATA(ctx); | |
792 | padlock_verify_context(cdata); | |
793 | ||
794 | switch (EVP_CIPHER_CTX_mode(ctx)) { | |
795 | case EVP_CIPH_ECB_MODE: | |
796 | padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); | |
797 | break; | |
798 | ||
799 | case EVP_CIPH_CBC_MODE: | |
800 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
801 | iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg, | |
802 | in_arg); | |
803 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
804 | break; | |
805 | ||
806 | case EVP_CIPH_CFB_MODE: | |
807 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
808 | iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, | |
809 | in_arg); | |
810 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
811 | break; | |
812 | ||
813 | case EVP_CIPH_OFB_MODE: | |
814 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
815 | padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); | |
816 | memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); | |
817 | break; | |
818 | ||
819 | default: | |
820 | return 0; | |
821 | } | |
822 | ||
823 | memset(cdata->iv, 0, AES_BLOCK_SIZE); | |
824 | ||
825 | return 1; | |
5b172463 AP |
826 | } |
827 | ||
40720ce3 MC |
828 | # ifndef PADLOCK_CHUNK |
829 | # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ | |
830 | # endif | |
831 | # if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) | |
832 | # error "insane PADLOCK_CHUNK..." | |
833 | # endif | |
5b172463 | 834 | |
40720ce3 MC |
835 | /* |
836 | * Re-align the arguments to 16-Bytes boundaries and run the encryption | |
837 | * function itself. This function is not AES-specific. | |
838 | */ | |
5b172463 AP |
839 | static int |
840 | padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, | |
40720ce3 | 841 | const unsigned char *in_arg, size_t nbytes) |
5b172463 | 842 | { |
40720ce3 MC |
843 | struct padlock_cipher_data *cdata; |
844 | const void *inp; | |
845 | unsigned char *out; | |
846 | void *iv; | |
847 | int inp_misaligned, out_misaligned, realign_in_loop; | |
848 | size_t chunk, allocated = 0; | |
849 | ||
850 | /* | |
851 | * ctx->num is maintained in byte-oriented modes, such as CFB and OFB... | |
852 | */ | |
853 | if ((chunk = ctx->num)) { /* borrow chunk variable */ | |
854 | unsigned char *ivp = ctx->iv; | |
855 | ||
856 | switch (EVP_CIPHER_CTX_mode(ctx)) { | |
857 | case EVP_CIPH_CFB_MODE: | |
858 | if (chunk >= AES_BLOCK_SIZE) | |
859 | return 0; /* bogus value */ | |
860 | ||
861 | if (ctx->encrypt) | |
862 | while (chunk < AES_BLOCK_SIZE && nbytes != 0) { | |
863 | ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk]; | |
864 | chunk++, nbytes--; | |
865 | } else | |
866 | while (chunk < AES_BLOCK_SIZE && nbytes != 0) { | |
867 | unsigned char c = *(in_arg++); | |
868 | *(out_arg++) = c ^ ivp[chunk]; | |
869 | ivp[chunk++] = c, nbytes--; | |
870 | } | |
871 | ||
872 | ctx->num = chunk % AES_BLOCK_SIZE; | |
873 | break; | |
874 | case EVP_CIPH_OFB_MODE: | |
875 | if (chunk >= AES_BLOCK_SIZE) | |
876 | return 0; /* bogus value */ | |
877 | ||
878 | while (chunk < AES_BLOCK_SIZE && nbytes != 0) { | |
879 | *(out_arg++) = *(in_arg++) ^ ivp[chunk]; | |
880 | chunk++, nbytes--; | |
881 | } | |
882 | ||
883 | ctx->num = chunk % AES_BLOCK_SIZE; | |
884 | break; | |
885 | } | |
886 | } | |
887 | ||
888 | if (nbytes == 0) | |
889 | return 1; | |
890 | # if 0 | |
891 | if (nbytes % AES_BLOCK_SIZE) | |
892 | return 0; /* are we expected to do tail processing? */ | |
893 | # else | |
894 | /* | |
895 | * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and | |
896 | * arbitrary value in byte-oriented modes, such as CFB and OFB... | |
897 | */ | |
898 | # endif | |
899 | ||
900 | /* | |
901 | * VIA promises CPUs that won't require alignment in the future. For now | |
902 | * padlock_aes_align_required is initialized to 1 and the condition is | |
903 | * never met... | |
904 | */ | |
905 | /* | |
906 | * C7 core is capable to manage unaligned input in non-ECB[!] mode, but | |
907 | * performance penalties appear to be approximately same as for software | |
908 | * alignment below or ~3x. They promise to improve it in the future, but | |
909 | * for now we can just as well pretend that it can only handle aligned | |
910 | * input... | |
911 | */ | |
912 | if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0) | |
913 | return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); | |
914 | ||
915 | inp_misaligned = (((size_t)in_arg) & 0x0F); | |
916 | out_misaligned = (((size_t)out_arg) & 0x0F); | |
917 | ||
918 | /* | |
919 | * Note that even if output is aligned and input not, I still prefer to | |
920 | * loop instead of copy the whole input and then encrypt in one stroke. | |
921 | * This is done in order to improve L1 cache utilization... | |
922 | */ | |
923 | realign_in_loop = out_misaligned | inp_misaligned; | |
924 | ||
925 | if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0) | |
926 | return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); | |
927 | ||
928 | /* this takes one "if" out of the loops */ | |
929 | chunk = nbytes; | |
930 | chunk %= PADLOCK_CHUNK; | |
931 | if (chunk == 0) | |
932 | chunk = PADLOCK_CHUNK; | |
933 | ||
934 | if (out_misaligned) { | |
935 | /* optmize for small input */ | |
936 | allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes); | |
937 | out = alloca(0x10 + allocated); | |
938 | out = NEAREST_ALIGNED(out); | |
939 | } else | |
940 | out = out_arg; | |
941 | ||
942 | cdata = ALIGNED_CIPHER_DATA(ctx); | |
943 | padlock_verify_context(cdata); | |
944 | ||
945 | switch (EVP_CIPHER_CTX_mode(ctx)) { | |
946 | case EVP_CIPH_ECB_MODE: | |
947 | do { | |
948 | if (inp_misaligned) | |
949 | inp = padlock_memcpy(out, in_arg, chunk); | |
950 | else | |
951 | inp = in_arg; | |
952 | in_arg += chunk; | |
953 | ||
954 | padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp); | |
955 | ||
956 | if (out_misaligned) | |
957 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | |
958 | else | |
959 | out = out_arg += chunk; | |
960 | ||
961 | nbytes -= chunk; | |
962 | chunk = PADLOCK_CHUNK; | |
963 | } while (nbytes); | |
964 | break; | |
965 | ||
966 | case EVP_CIPH_CBC_MODE: | |
967 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
968 | goto cbc_shortcut; | |
969 | do { | |
970 | if (iv != cdata->iv) | |
971 | memcpy(cdata->iv, iv, AES_BLOCK_SIZE); | |
972 | chunk = PADLOCK_CHUNK; | |
973 | cbc_shortcut: /* optimize for small input */ | |
974 | if (inp_misaligned) | |
975 | inp = padlock_memcpy(out, in_arg, chunk); | |
976 | else | |
977 | inp = in_arg; | |
978 | in_arg += chunk; | |
979 | ||
980 | iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp); | |
981 | ||
982 | if (out_misaligned) | |
983 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | |
984 | else | |
985 | out = out_arg += chunk; | |
986 | ||
987 | } while (nbytes -= chunk); | |
988 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
989 | break; | |
990 | ||
991 | case EVP_CIPH_CFB_MODE: | |
992 | memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
993 | chunk &= ~(AES_BLOCK_SIZE - 1); | |
994 | if (chunk) | |
995 | goto cfb_shortcut; | |
996 | else | |
997 | goto cfb_skiploop; | |
998 | do { | |
999 | if (iv != cdata->iv) | |
1000 | memcpy(cdata->iv, iv, AES_BLOCK_SIZE); | |
1001 | chunk = PADLOCK_CHUNK; | |
1002 | cfb_shortcut: /* optimize for small input */ | |
1003 | if (inp_misaligned) | |
1004 | inp = padlock_memcpy(out, in_arg, chunk); | |
1005 | else | |
1006 | inp = in_arg; | |
1007 | in_arg += chunk; | |
1008 | ||
1009 | iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp); | |
1010 | ||
1011 | if (out_misaligned) | |
1012 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | |
1013 | else | |
1014 | out = out_arg += chunk; | |
1015 | ||
1016 | nbytes -= chunk; | |
1017 | } while (nbytes >= AES_BLOCK_SIZE); | |
1018 | ||
1019 | cfb_skiploop: | |
1020 | if (nbytes) { | |
1021 | unsigned char *ivp = cdata->iv; | |
1022 | ||
1023 | if (iv != ivp) { | |
1024 | memcpy(ivp, iv, AES_BLOCK_SIZE); | |
1025 | iv = ivp; | |
1026 | } | |
1027 | ctx->num = nbytes; | |
1028 | if (cdata->cword.b.encdec) { | |
1029 | cdata->cword.b.encdec = 0; | |
1030 | padlock_reload_key(); | |
1031 | padlock_xcrypt_ecb(1, cdata, ivp, ivp); | |
1032 | cdata->cword.b.encdec = 1; | |
1033 | padlock_reload_key(); | |
1034 | while (nbytes) { | |
1035 | unsigned char c = *(in_arg++); | |
1036 | *(out_arg++) = c ^ *ivp; | |
1037 | *(ivp++) = c, nbytes--; | |
1038 | } | |
1039 | } else { | |
1040 | padlock_reload_key(); | |
1041 | padlock_xcrypt_ecb(1, cdata, ivp, ivp); | |
1042 | padlock_reload_key(); | |
1043 | while (nbytes) { | |
1044 | *ivp = *(out_arg++) = *(in_arg++) ^ *ivp; | |
1045 | ivp++, nbytes--; | |
1046 | } | |
1047 | } | |
1048 | } | |
1049 | ||
1050 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
1051 | break; | |
1052 | ||
1053 | case EVP_CIPH_OFB_MODE: | |
1054 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
1055 | chunk &= ~(AES_BLOCK_SIZE - 1); | |
1056 | if (chunk) | |
1057 | do { | |
1058 | if (inp_misaligned) | |
1059 | inp = padlock_memcpy(out, in_arg, chunk); | |
1060 | else | |
1061 | inp = in_arg; | |
1062 | in_arg += chunk; | |
1063 | ||
1064 | padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp); | |
1065 | ||
1066 | if (out_misaligned) | |
1067 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | |
1068 | else | |
1069 | out = out_arg += chunk; | |
1070 | ||
1071 | nbytes -= chunk; | |
1072 | chunk = PADLOCK_CHUNK; | |
1073 | } while (nbytes >= AES_BLOCK_SIZE); | |
1074 | ||
1075 | if (nbytes) { | |
1076 | unsigned char *ivp = cdata->iv; | |
1077 | ||
1078 | ctx->num = nbytes; | |
1079 | padlock_reload_key(); /* empirically found */ | |
1080 | padlock_xcrypt_ecb(1, cdata, ivp, ivp); | |
1081 | padlock_reload_key(); /* empirically found */ | |
1082 | while (nbytes) { | |
1083 | *(out_arg++) = *(in_arg++) ^ *ivp; | |
1084 | ivp++, nbytes--; | |
1085 | } | |
1086 | } | |
1087 | ||
1088 | memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); | |
1089 | break; | |
1090 | ||
1091 | default: | |
1092 | return 0; | |
1093 | } | |
1094 | ||
1095 | /* Clean the realign buffer if it was used */ | |
1096 | if (out_misaligned) { | |
1097 | volatile unsigned long *p = (void *)out; | |
1098 | size_t n = allocated / sizeof(*p); | |
1099 | while (n--) | |
1100 | *p++ = 0; | |
1101 | } | |
1102 | ||
1103 | memset(cdata->iv, 0, AES_BLOCK_SIZE); | |
1104 | ||
1105 | return 1; | |
5b172463 AP |
1106 | } |
1107 | ||
40720ce3 | 1108 | # endif /* OPENSSL_NO_AES */ |
e00b165e | 1109 | |
5b172463 AP |
1110 | /* ===== Random Number Generator ===== */ |
1111 | /* | |
1112 | * This code is not engaged. The reason is that it does not comply | |
1113 | * with recommendations for VIA RNG usage for secure applications | |
1114 | * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it | |
1115 | * provide meaningful error control... | |
1116 | */ | |
40720ce3 MC |
1117 | /* |
1118 | * Wrapper that provides an interface between the API and the raw PadLock | |
1119 | * RNG | |
1120 | */ | |
1121 | static int padlock_rand_bytes(unsigned char *output, int count) | |
5b172463 | 1122 | { |
40720ce3 MC |
1123 | unsigned int eax, buf; |
1124 | ||
1125 | while (count >= 8) { | |
1126 | eax = padlock_xstore(output, 0); | |
1127 | if (!(eax & (1 << 6))) | |
1128 | return 0; /* RNG disabled */ | |
1129 | /* this ---vv--- covers DC bias, Raw Bits and String Filter */ | |
1130 | if (eax & (0x1F << 10)) | |
1131 | return 0; | |
1132 | if ((eax & 0x1F) == 0) | |
1133 | continue; /* no data, retry... */ | |
1134 | if ((eax & 0x1F) != 8) | |
1135 | return 0; /* fatal failure... */ | |
1136 | output += 8; | |
1137 | count -= 8; | |
1138 | } | |
1139 | while (count > 0) { | |
1140 | eax = padlock_xstore(&buf, 3); | |
1141 | if (!(eax & (1 << 6))) | |
1142 | return 0; /* RNG disabled */ | |
1143 | /* this ---vv--- covers DC bias, Raw Bits and String Filter */ | |
1144 | if (eax & (0x1F << 10)) | |
1145 | return 0; | |
1146 | if ((eax & 0x1F) == 0) | |
1147 | continue; /* no data, retry... */ | |
1148 | if ((eax & 0x1F) != 1) | |
1149 | return 0; /* fatal failure... */ | |
1150 | *output++ = (unsigned char)buf; | |
1151 | count--; | |
1152 | } | |
1153 | *(volatile unsigned int *)&buf = 0; | |
1154 | ||
1155 | return 1; | |
5b172463 AP |
1156 | } |
1157 | ||
1158 | /* Dummy but necessary function */ | |
40720ce3 | 1159 | static int padlock_rand_status(void) |
5b172463 | 1160 | { |
40720ce3 | 1161 | return 1; |
5b172463 AP |
1162 | } |
1163 | ||
1164 | /* Prepare structure for registration */ | |
1165 | static RAND_METHOD padlock_rand = { | |
40720ce3 MC |
1166 | NULL, /* seed */ |
1167 | padlock_rand_bytes, /* bytes */ | |
1168 | NULL, /* cleanup */ | |
1169 | NULL, /* add */ | |
1170 | padlock_rand_bytes, /* pseudorand */ | |
1171 | padlock_rand_status, /* rand status */ | |
5b172463 AP |
1172 | }; |
1173 | ||
40720ce3 | 1174 | # endif /* COMPILE_HW_PADLOCK */ |
5b172463 | 1175 | |
40720ce3 MC |
1176 | # endif /* !OPENSSL_NO_HW_PADLOCK */ |
1177 | #endif /* !OPENSSL_NO_HW */ |