]>
Commit | Line | Data |
---|---|---|
3e9a08ec | 1 | /*- |
5b172463 AP |
2 | * Support for VIA PadLock Advanced Cryptography Engine (ACE) |
3 | * Written by Michal Ludvig <michal@logix.cz> | |
4 | * http://www.logix.cz/michal | |
5 | * | |
10621efd MC |
6 | * Big thanks to Andy Polyakov for a help with optimization, |
7 | * assembler fixes, port to MS Windows and a lot of other | |
5b172463 AP |
8 | * valuable work on this engine! |
9 | */ | |
10 | ||
11 | /* ==================================================================== | |
12 | * Copyright (c) 1999-2001 The OpenSSL Project. All rights reserved. | |
13 | * | |
14 | * Redistribution and use in source and binary forms, with or without | |
15 | * modification, are permitted provided that the following conditions | |
16 | * are met: | |
17 | * | |
18 | * 1. Redistributions of source code must retain the above copyright | |
19 | * notice, this list of conditions and the following disclaimer. | |
20 | * | |
21 | * 2. Redistributions in binary form must reproduce the above copyright | |
22 | * notice, this list of conditions and the following disclaimer in | |
23 | * the documentation and/or other materials provided with the | |
24 | * distribution. | |
25 | * | |
26 | * 3. All advertising materials mentioning features or use of this | |
27 | * software must display the following acknowledgment: | |
28 | * "This product includes software developed by the OpenSSL Project | |
29 | * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)" | |
30 | * | |
31 | * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to | |
32 | * endorse or promote products derived from this software without | |
33 | * prior written permission. For written permission, please contact | |
34 | * licensing@OpenSSL.org. | |
35 | * | |
36 | * 5. Products derived from this software may not be called "OpenSSL" | |
37 | * nor may "OpenSSL" appear in their names without prior written | |
38 | * permission of the OpenSSL Project. | |
39 | * | |
40 | * 6. Redistributions of any form whatsoever must retain the following | |
41 | * acknowledgment: | |
42 | * "This product includes software developed by the OpenSSL Project | |
43 | * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)" | |
44 | * | |
45 | * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY | |
46 | * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
47 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
48 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR | |
49 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
50 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
51 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
52 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
53 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, | |
54 | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
55 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED | |
56 | * OF THE POSSIBILITY OF SUCH DAMAGE. | |
57 | * ==================================================================== | |
58 | * | |
59 | * This product includes cryptographic software written by Eric Young | |
60 | * (eay@cryptsoft.com). This product includes software written by Tim | |
61 | * Hudson (tjh@cryptsoft.com). | |
62 | * | |
63 | */ | |
64 | ||
5b172463 AP |
65 | #include <stdio.h> |
66 | #include <string.h> | |
67 | ||
63d74075 | 68 | #include <openssl/opensslconf.h> |
5b172463 AP |
69 | #include <openssl/crypto.h> |
70 | #include <openssl/dso.h> | |
71 | #include <openssl/engine.h> | |
72 | #include <openssl/evp.h> | |
63d74075 | 73 | #ifndef OPENSSL_NO_AES |
10621efd | 74 | # include <openssl/aes.h> |
63d74075 | 75 | #endif |
c7439661 | 76 | #include <openssl/rand.h> |
3eeaab4b | 77 | #include <openssl/err.h> |
5b172463 AP |
78 | |
79 | #ifndef OPENSSL_NO_HW | |
10621efd | 80 | # ifndef OPENSSL_NO_HW_PADLOCK |
5b172463 AP |
81 | |
82 | /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */ | |
10621efd MC |
83 | # if (OPENSSL_VERSION_NUMBER >= 0x00908000L) |
84 | # ifndef OPENSSL_NO_DYNAMIC_ENGINE | |
5b172463 | 85 | # define DYNAMIC_ENGINE |
10621efd MC |
86 | # endif |
87 | # elif (OPENSSL_VERSION_NUMBER >= 0x00907000L) | |
88 | # ifdef ENGINE_DYNAMIC_SUPPORT | |
5b172463 | 89 | # define DYNAMIC_ENGINE |
10621efd MC |
90 | # endif |
91 | # else | |
92 | # error "Only OpenSSL >= 0.9.7 is supported" | |
5b172463 | 93 | # endif |
5b172463 | 94 | |
10621efd MC |
95 | /* |
96 | * VIA PadLock AES is available *ONLY* on some x86 CPUs. Not only that it | |
97 | * doesn't exist elsewhere, but it even can't be compiled on other platforms! | |
98 | * | |
99 | * In addition, because of the heavy use of inline assembler, compiler choice | |
100 | * is limited to GCC and Microsoft C. | |
101 | */ | |
102 | # undef COMPILE_HW_PADLOCK | |
103 | # if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) | |
104 | # if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ | |
f8fa22d8 | 105 | (defined(_MSC_VER) && defined(_M_IX86)) |
10621efd MC |
106 | # define COMPILE_HW_PADLOCK |
107 | # endif | |
108 | # endif | |
5b172463 | 109 | |
10621efd MC |
110 | # ifdef OPENSSL_NO_DYNAMIC_ENGINE |
111 | # ifdef COMPILE_HW_PADLOCK | |
112 | static ENGINE *ENGINE_padlock(void); | |
113 | # endif | |
b5b72434 | 114 | |
10621efd | 115 | void ENGINE_load_padlock(void) |
5b172463 AP |
116 | { |
117 | /* On non-x86 CPUs it just returns. */ | |
10621efd MC |
118 | # ifdef COMPILE_HW_PADLOCK |
119 | ENGINE *toadd = ENGINE_padlock(); | |
120 | if (!toadd) | |
121 | return; | |
122 | ENGINE_add(toadd); | |
123 | ENGINE_free(toadd); | |
124 | ERR_clear_error(); | |
125 | # endif | |
5b172463 AP |
126 | } |
127 | ||
10621efd | 128 | # endif |
b5b72434 | 129 | |
10621efd MC |
130 | # ifdef COMPILE_HW_PADLOCK |
131 | /* | |
132 | * We do these includes here to avoid header problems on platforms that do | |
133 | * not have the VIA padlock anyway... | |
134 | */ | |
135 | # include <stdlib.h> | |
136 | # ifdef _WIN32 | |
137 | # include <malloc.h> | |
138 | # ifndef alloca | |
139 | # define alloca _alloca | |
140 | # endif | |
141 | # elif defined(__GNUC__) | |
142 | # ifndef alloca | |
143 | # define alloca(s) __builtin_alloca(s) | |
144 | # endif | |
145 | # endif | |
c38ff58b | 146 | |
5b172463 AP |
147 | /* Function for ENGINE detection and control */ |
148 | static int padlock_available(void); | |
149 | static int padlock_init(ENGINE *e); | |
150 | ||
151 | /* RNG Stuff */ | |
152 | static RAND_METHOD padlock_rand; | |
153 | ||
154 | /* Cipher Stuff */ | |
10621efd MC |
155 | # ifndef OPENSSL_NO_AES |
156 | static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, | |
157 | const int **nids, int nid); | |
158 | # endif | |
5b172463 AP |
159 | |
160 | /* Engine names */ | |
161 | static const char *padlock_id = "padlock"; | |
162 | static char padlock_name[100]; | |
163 | ||
164 | /* Available features */ | |
10621efd MC |
165 | static int padlock_use_ace = 0; /* Advanced Cryptography Engine */ |
166 | static int padlock_use_rng = 0; /* Random Number Generator */ | |
167 | # ifndef OPENSSL_NO_AES | |
5b172463 | 168 | static int padlock_aes_align_required = 1; |
10621efd | 169 | # endif |
5b172463 AP |
170 | |
171 | /* ===== Engine "management" functions ===== */ | |
172 | ||
173 | /* Prepare the ENGINE structure for registration */ | |
10621efd | 174 | static int padlock_bind_helper(ENGINE *e) |
5b172463 | 175 | { |
10621efd MC |
176 | /* Check available features */ |
177 | padlock_available(); | |
178 | ||
179 | # if 1 /* disable RNG for now, see commentary in | |
180 | * vicinity of RNG code */ | |
181 | padlock_use_rng = 0; | |
182 | # endif | |
183 | ||
184 | /* Generate a nice engine name with available features */ | |
185 | BIO_snprintf(padlock_name, sizeof(padlock_name), | |
186 | "VIA PadLock (%s, %s)", | |
187 | padlock_use_rng ? "RNG" : "no-RNG", | |
188 | padlock_use_ace ? "ACE" : "no-ACE"); | |
189 | ||
190 | /* Register everything or return with an error */ | |
191 | if (!ENGINE_set_id(e, padlock_id) || | |
192 | !ENGINE_set_name(e, padlock_name) || | |
193 | !ENGINE_set_init_function(e, padlock_init) || | |
194 | # ifndef OPENSSL_NO_AES | |
195 | (padlock_use_ace && !ENGINE_set_ciphers(e, padlock_ciphers)) || | |
196 | # endif | |
197 | (padlock_use_rng && !ENGINE_set_RAND(e, &padlock_rand))) { | |
198 | return 0; | |
199 | } | |
200 | ||
201 | /* Everything looks good */ | |
202 | return 1; | |
5b172463 AP |
203 | } |
204 | ||
10621efd | 205 | # ifdef OPENSSL_NO_DYNAMIC_ENGINE |
c65d409a | 206 | |
5b172463 | 207 | /* Constructor */ |
10621efd | 208 | static ENGINE *ENGINE_padlock(void) |
5b172463 | 209 | { |
10621efd | 210 | ENGINE *eng = ENGINE_new(); |
5b172463 | 211 | |
10621efd MC |
212 | if (!eng) { |
213 | return NULL; | |
214 | } | |
5b172463 | 215 | |
10621efd MC |
216 | if (!padlock_bind_helper(eng)) { |
217 | ENGINE_free(eng); | |
218 | return NULL; | |
219 | } | |
5b172463 | 220 | |
10621efd | 221 | return eng; |
5b172463 AP |
222 | } |
223 | ||
10621efd | 224 | # endif |
c65d409a | 225 | |
5b172463 | 226 | /* Check availability of the engine */ |
10621efd | 227 | static int padlock_init(ENGINE *e) |
5b172463 | 228 | { |
10621efd | 229 | return (padlock_use_rng || padlock_use_ace); |
5b172463 AP |
230 | } |
231 | ||
10621efd MC |
232 | /* |
233 | * This stuff is needed if this ENGINE is being compiled into a | |
234 | * self-contained shared-library. | |
5b172463 | 235 | */ |
10621efd MC |
236 | # ifdef DYNAMIC_ENGINE |
237 | static int padlock_bind_fn(ENGINE *e, const char *id) | |
5b172463 | 238 | { |
10621efd MC |
239 | if (id && (strcmp(id, padlock_id) != 0)) { |
240 | return 0; | |
241 | } | |
5b172463 | 242 | |
10621efd MC |
243 | if (!padlock_bind_helper(e)) { |
244 | return 0; | |
245 | } | |
5b172463 | 246 | |
10621efd | 247 | return 1; |
5b172463 AP |
248 | } |
249 | ||
097f9d8c | 250 | IMPLEMENT_DYNAMIC_CHECK_FN() |
10621efd MC |
251 | IMPLEMENT_DYNAMIC_BIND_FN(padlock_bind_fn) |
252 | # endif /* DYNAMIC_ENGINE */ | |
5b172463 | 253 | /* ===== Here comes the "real" engine ===== */ |
10621efd | 254 | # ifndef OPENSSL_NO_AES |
5b172463 | 255 | /* Some AES-related constants */ |
10621efd MC |
256 | # define AES_BLOCK_SIZE 16 |
257 | # define AES_KEY_SIZE_128 16 | |
258 | # define AES_KEY_SIZE_192 24 | |
259 | # define AES_KEY_SIZE_256 32 | |
260 | /* | |
261 | * Here we store the status information relevant to the current context. | |
262 | */ | |
263 | /* | |
264 | * BIG FAT WARNING: Inline assembler in PADLOCK_XCRYPT_ASM() depends on | |
265 | * the order of items in this structure. Don't blindly modify, reorder, | |
266 | * etc! | |
267 | */ | |
268 | struct padlock_cipher_data { | |
269 | unsigned char iv[AES_BLOCK_SIZE]; /* Initialization vector */ | |
270 | union { | |
271 | unsigned int pad[4]; | |
272 | struct { | |
273 | int rounds:4; | |
274 | int dgst:1; /* n/a in C3 */ | |
275 | int align:1; /* n/a in C3 */ | |
276 | int ciphr:1; /* n/a in C3 */ | |
277 | unsigned int keygen:1; | |
278 | int interm:1; | |
279 | unsigned int encdec:1; | |
280 | int ksize:2; | |
281 | } b; | |
282 | } cword; /* Control word */ | |
283 | AES_KEY ks; /* Encryption key */ | |
5b172463 AP |
284 | }; |
285 | ||
b88606c2 AP |
286 | /* |
287 | * Essentially this variable belongs in thread local storage. | |
288 | * Having this variable global on the other hand can only cause | |
289 | * few bogus key reloads [if any at all on single-CPU system], | |
290 | * so we accept the penatly... | |
291 | */ | |
292 | static volatile struct padlock_cipher_data *padlock_saved_context; | |
10621efd | 293 | # endif |
b88606c2 | 294 | |
ac84cb4c | 295 | /*- |
5b172463 AP |
296 | * ======================================================= |
297 | * Inline assembler section(s). | |
298 | * ======================================================= | |
299 | * Order of arguments is chosen to facilitate Windows port | |
300 | * using __fastcall calling convention. If you wish to add | |
b88606c2 | 301 | * more routines, keep in mind that first __fastcall |
5b172463 AP |
302 | * argument is passed in %ecx and second - in %edx. |
303 | * ======================================================= | |
304 | */ | |
10621efd | 305 | # if defined(__GNUC__) && __GNUC__>=2 |
5b172463 AP |
306 | /* |
307 | * As for excessive "push %ebx"/"pop %ebx" found all over. | |
308 | * When generating position-independent code GCC won't let | |
309 | * us use "b" in assembler templates nor even respect "ebx" | |
310 | * in "clobber description." Therefore the trouble... | |
311 | */ | |
312 | ||
10621efd MC |
313 | /* |
314 | * Helper function - check if a CPUID instruction is available on this CPU | |
315 | */ | |
316 | static int padlock_insn_cpuid_available(void) | |
5b172463 | 317 | { |
10621efd MC |
318 | int result = -1; |
319 | ||
320 | /* | |
321 | * We're checking if the bit #21 of EFLAGS can be toggled. If yes = | |
322 | * CPUID is available. | |
323 | */ | |
324 | asm volatile ("pushf\n" | |
325 | "popl %%eax\n" | |
326 | "xorl $0x200000, %%eax\n" | |
327 | "movl %%eax, %%ecx\n" | |
328 | "andl $0x200000, %%ecx\n" | |
329 | "pushl %%eax\n" | |
330 | "popf\n" | |
331 | "pushf\n" | |
332 | "popl %%eax\n" | |
333 | "andl $0x200000, %%eax\n" | |
334 | "xorl %%eax, %%ecx\n" | |
335 | "movl %%ecx, %0\n":"=r" (result)::"eax", "ecx"); | |
336 | ||
337 | return (result == 0); | |
5b172463 AP |
338 | } |
339 | ||
10621efd MC |
340 | /* |
341 | * Load supported features of the CPU to see if the PadLock is available. | |
342 | */ | |
343 | static int padlock_available(void) | |
5b172463 | 344 | { |
10621efd MC |
345 | char vendor_string[16]; |
346 | unsigned int eax, edx; | |
347 | ||
348 | /* First check if the CPUID instruction is available at all... */ | |
349 | if (!padlock_insn_cpuid_available()) | |
350 | return 0; | |
351 | ||
352 | /* Are we running on the Centaur (VIA) CPU? */ | |
353 | eax = 0x00000000; | |
354 | vendor_string[12] = 0; | |
355 | asm volatile ("pushl %%ebx\n" | |
356 | "cpuid\n" | |
357 | "movl %%ebx,(%%edi)\n" | |
358 | "movl %%edx,4(%%edi)\n" | |
359 | "movl %%ecx,8(%%edi)\n" | |
360 | "popl %%ebx":"+a" (eax):"D"(vendor_string):"ecx", "edx"); | |
361 | if (strcmp(vendor_string, "CentaurHauls") != 0) | |
362 | return 0; | |
363 | ||
364 | /* Check for Centaur Extended Feature Flags presence */ | |
365 | eax = 0xC0000000; | |
366 | asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax)::"ecx", "edx"); | |
367 | if (eax < 0xC0000001) | |
368 | return 0; | |
369 | ||
370 | /* Read the Centaur Extended Feature Flags */ | |
371 | eax = 0xC0000001; | |
372 | asm volatile ("pushl %%ebx; cpuid; popl %%ebx":"+a" (eax), | |
373 | "=d"(edx)::"ecx"); | |
374 | ||
375 | /* Fill up some flags */ | |
376 | padlock_use_ace = ((edx & (0x3 << 6)) == (0x3 << 6)); | |
377 | padlock_use_rng = ((edx & (0x3 << 2)) == (0x3 << 2)); | |
378 | ||
379 | return padlock_use_ace + padlock_use_rng; | |
5b172463 AP |
380 | } |
381 | ||
10621efd MC |
382 | # ifndef OPENSSL_NO_AES |
383 | # ifndef AES_ASM | |
5b172463 | 384 | /* Our own htonl()/ntohl() */ |
10621efd | 385 | static inline void padlock_bswapl(AES_KEY *ks) |
5b172463 | 386 | { |
10621efd MC |
387 | size_t i = sizeof(ks->rd_key) / sizeof(ks->rd_key[0]); |
388 | unsigned int *key = ks->rd_key; | |
5b172463 | 389 | |
10621efd MC |
390 | while (i--) { |
391 | asm volatile ("bswapl %0":"+r" (*key)); | |
392 | key++; | |
393 | } | |
5b172463 | 394 | } |
10621efd MC |
395 | # endif |
396 | # endif | |
5b172463 | 397 | |
10621efd MC |
398 | /* |
399 | * Force key reload from memory to the CPU microcode. Loading EFLAGS from the | |
400 | * stack clears EFLAGS[30] which does the trick. | |
401 | */ | |
402 | static inline void padlock_reload_key(void) | |
5b172463 | 403 | { |
10621efd | 404 | asm volatile ("pushfl; popfl"); |
5b172463 AP |
405 | } |
406 | ||
10621efd | 407 | # ifndef OPENSSL_NO_AES |
5b172463 AP |
408 | /* |
409 | * This is heuristic key context tracing. At first one | |
410 | * believes that one should use atomic swap instructions, | |
411 | * but it's not actually necessary. Point is that if | |
b88606c2 AP |
412 | * padlock_saved_context was changed by another thread |
413 | * after we've read it and before we compare it with cdata, | |
414 | * our key *shall* be reloaded upon thread context switch | |
415 | * and we are therefore set in either case... | |
5b172463 | 416 | */ |
10621efd | 417 | static inline void padlock_verify_context(struct padlock_cipher_data *cdata) |
5b172463 | 418 | { |
10621efd MC |
419 | asm volatile ("pushfl\n" |
420 | " btl $30,(%%esp)\n" | |
421 | " jnc 1f\n" | |
422 | " cmpl %2,%1\n" | |
423 | " je 1f\n" | |
424 | " popfl\n" | |
425 | " subl $4,%%esp\n" | |
426 | "1: addl $4,%%esp\n" | |
427 | " movl %2,%0":"+m" (padlock_saved_context) | |
428 | :"r"(padlock_saved_context), "r"(cdata):"cc"); | |
5b172463 AP |
429 | } |
430 | ||
431 | /* Template for padlock_xcrypt_* modes */ | |
10621efd MC |
432 | /* |
433 | * BIG FAT WARNING: The offsets used with 'leal' instructions describe items | |
434 | * of the 'padlock_cipher_data' structure. | |
5b172463 | 435 | */ |
10621efd MC |
436 | # define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ |
437 | static inline void *name(size_t cnt, \ | |
438 | struct padlock_cipher_data *cdata, \ | |
439 | void *out, const void *inp) \ | |
440 | { void *iv; \ | |
441 | asm volatile ( "pushl %%ebx\n" \ | |
442 | " leal 16(%0),%%edx\n" \ | |
443 | " leal 32(%0),%%ebx\n" \ | |
444 | rep_xcrypt "\n" \ | |
445 | " popl %%ebx" \ | |
446 | : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ | |
447 | : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ | |
448 | : "edx", "cc", "memory"); \ | |
449 | return iv; \ | |
5b172463 AP |
450 | } |
451 | ||
452 | /* Generate all functions with appropriate opcodes */ | |
f7b36402 MC |
453 | /* rep xcryptecb */ |
454 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") | |
455 | /* rep xcryptcbc */ | |
10621efd | 456 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") |
f7b36402 | 457 | /* rep xcryptcfb */ |
10621efd | 458 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") |
f7b36402 | 459 | /* rep xcryptofb */ |
10621efd MC |
460 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") |
461 | # endif | |
5b172463 | 462 | /* The RNG call itself */ |
10621efd | 463 | static inline unsigned int padlock_xstore(void *addr, unsigned int edx_in) |
5b172463 | 464 | { |
10621efd | 465 | unsigned int eax_out; |
5b172463 | 466 | |
10621efd MC |
467 | asm volatile (".byte 0x0f,0xa7,0xc0" /* xstore */ |
468 | :"=a" (eax_out), "=m"(*(unsigned *)addr) | |
469 | :"D"(addr), "d"(edx_in) | |
470 | ); | |
5b172463 | 471 | |
10621efd | 472 | return eax_out; |
5b172463 AP |
473 | } |
474 | ||
10621efd MC |
475 | /* |
476 | * Why not inline 'rep movsd'? I failed to find information on what value in | |
477 | * Direction Flag one can expect and consequently have to apply | |
478 | * "better-safe-than-sorry" approach and assume "undefined." I could | |
479 | * explicitly clear it and restore the original value upon return from | |
480 | * padlock_aes_cipher, but it's presumably too much trouble for too little | |
481 | * gain... In case you wonder 'rep xcrypt*' instructions above are *not* | |
482 | * affected by the Direction Flag and pointers advance toward larger | |
483 | * addresses unconditionally. | |
484 | */ | |
485 | static inline unsigned char *padlock_memcpy(void *dst, const void *src, | |
486 | size_t n) | |
52697590 | 487 | { |
10621efd MC |
488 | long *d = dst; |
489 | const long *s = src; | |
52697590 | 490 | |
10621efd MC |
491 | n /= sizeof(*d); |
492 | do { | |
493 | *d++ = *s++; | |
494 | } while (--n); | |
52697590 | 495 | |
10621efd | 496 | return dst; |
52697590 AP |
497 | } |
498 | ||
10621efd | 499 | # elif defined(_MSC_VER) |
5b172463 AP |
500 | /* |
501 | * Unlike GCC these are real functions. In order to minimize impact | |
502 | * on performance we adhere to __fastcall calling convention in | |
503 | * order to get two first arguments passed through %ecx and %edx. | |
504 | * Which kind of suits very well, as instructions in question use | |
505 | * both %ecx and %edx as input:-) | |
506 | */ | |
10621efd MC |
507 | # define REP_XCRYPT(code) \ |
508 | _asm _emit 0xf3 \ | |
509 | _asm _emit 0x0f _asm _emit 0xa7 \ | |
510 | _asm _emit code | |
5b172463 | 511 | |
10621efd MC |
512 | /* |
513 | * BIG FAT WARNING: The offsets used with 'lea' instructions describe items | |
514 | * of the 'padlock_cipher_data' structure. | |
515 | */ | |
516 | # define PADLOCK_XCRYPT_ASM(name,code) \ | |
517 | static void * __fastcall \ | |
518 | name (size_t cnt, void *cdata, \ | |
519 | void *outp, const void *inp) \ | |
520 | { _asm mov eax,edx \ | |
521 | _asm lea edx,[eax+16] \ | |
522 | _asm lea ebx,[eax+32] \ | |
523 | _asm mov edi,outp \ | |
524 | _asm mov esi,inp \ | |
525 | REP_XCRYPT(code) \ | |
5b172463 AP |
526 | } |
527 | ||
925bfca5 MC |
528 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb,0xc8) |
529 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc,0xd0) | |
530 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb,0xe0) | |
531 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb,0xe8) | |
5b172463 | 532 | |
10621efd MC |
533 | static int __fastcall padlock_xstore(void *outp, unsigned int code) |
534 | { | |
925bfca5 MC |
535 | _asm mov edi,ecx |
536 | _asm _emit 0x0f _asm _emit 0xa7 _asm _emit 0xc0 | |
537 | } | |
538 | ||
539 | static void __fastcall padlock_reload_key(void) | |
10621efd | 540 | { |
925bfca5 MC |
541 | _asm pushfd |
542 | _asm popfd | |
543 | } | |
544 | ||
545 | static void __fastcall padlock_verify_context(void *cdata) | |
10621efd | 546 | { |
925bfca5 MC |
547 | _asm { |
548 | pushfd | |
549 | bt DWORD PTR[esp],30 | |
550 | jnc skip | |
551 | cmp ecx,padlock_saved_context | |
552 | je skip | |
553 | popfd | |
554 | sub esp,4 | |
555 | skip: add esp,4 | |
556 | mov padlock_saved_context,ecx | |
557 | } | |
558 | } | |
559 | ||
560 | static int | |
561 | padlock_available(void) | |
10621efd | 562 | { |
925bfca5 MC |
563 | _asm { |
564 | pushfd | |
565 | pop eax | |
566 | mov ecx,eax | |
567 | xor eax,1<<21 | |
568 | push eax | |
569 | popfd | |
570 | pushfd | |
571 | pop eax | |
572 | xor eax,ecx | |
573 | bt eax,21 | |
574 | jnc noluck | |
575 | mov eax,0 | |
576 | cpuid | |
577 | xor eax,eax | |
578 | cmp ebx,'tneC' | |
579 | jne noluck | |
580 | cmp edx,'Hrua' | |
581 | jne noluck | |
582 | cmp ecx,'slua' | |
583 | jne noluck | |
584 | mov eax,0xC0000000 | |
585 | cpuid | |
586 | mov edx,eax | |
587 | xor eax,eax | |
588 | cmp edx,0xC0000001 | |
589 | jb noluck | |
590 | mov eax,0xC0000001 | |
591 | cpuid | |
592 | xor eax,eax | |
593 | bt edx,6 | |
594 | jnc skip_a | |
595 | bt edx,7 | |
596 | jnc skip_a | |
597 | mov padlock_use_ace,1 | |
598 | inc eax | |
599 | skip_a: bt edx,2 | |
600 | jnc skip_r | |
601 | bt edx,3 | |
602 | jnc skip_r | |
603 | mov padlock_use_rng,1 | |
604 | inc eax | |
605 | skip_r: | |
606 | noluck: | |
607 | } | |
608 | } | |
609 | ||
610 | static void __fastcall padlock_bswapl(void *key) | |
10621efd | 611 | { |
925bfca5 MC |
612 | _asm { |
613 | pushfd | |
614 | cld | |
615 | mov esi,ecx | |
616 | mov edi,ecx | |
617 | mov ecx,60 | |
618 | up: lodsd | |
619 | bswap eax | |
620 | stosd | |
621 | loop up | |
622 | popfd | |
623 | } | |
624 | } | |
625 | ||
10621efd MC |
626 | /* |
627 | * MS actually specifies status of Direction Flag and compiler even manages | |
628 | * to compile following as 'rep movsd' all by itself... | |
52697590 | 629 | */ |
10621efd MC |
630 | # define padlock_memcpy(o,i,n) ((unsigned char *)memcpy((o),(i),(n)&~3U)) |
631 | # endif | |
5b172463 | 632 | /* ===== AES encryption/decryption ===== */ |
10621efd MC |
633 | # ifndef OPENSSL_NO_AES |
634 | # if defined(NID_aes_128_cfb128) && ! defined (NID_aes_128_cfb) | |
635 | # define NID_aes_128_cfb NID_aes_128_cfb128 | |
636 | # endif | |
637 | # if defined(NID_aes_128_ofb128) && ! defined (NID_aes_128_ofb) | |
638 | # define NID_aes_128_ofb NID_aes_128_ofb128 | |
639 | # endif | |
640 | # if defined(NID_aes_192_cfb128) && ! defined (NID_aes_192_cfb) | |
641 | # define NID_aes_192_cfb NID_aes_192_cfb128 | |
642 | # endif | |
643 | # if defined(NID_aes_192_ofb128) && ! defined (NID_aes_192_ofb) | |
644 | # define NID_aes_192_ofb NID_aes_192_ofb128 | |
645 | # endif | |
646 | # if defined(NID_aes_256_cfb128) && ! defined (NID_aes_256_cfb) | |
647 | # define NID_aes_256_cfb NID_aes_256_cfb128 | |
648 | # endif | |
649 | # if defined(NID_aes_256_ofb128) && ! defined (NID_aes_256_ofb) | |
650 | # define NID_aes_256_ofb NID_aes_256_ofb128 | |
651 | # endif | |
47050853 MC |
652 | /* |
653 | * List of supported ciphers. | |
654 | */ static int padlock_cipher_nids[] = { | |
10621efd MC |
655 | NID_aes_128_ecb, |
656 | NID_aes_128_cbc, | |
657 | NID_aes_128_cfb, | |
658 | NID_aes_128_ofb, | |
659 | ||
660 | NID_aes_192_ecb, | |
661 | NID_aes_192_cbc, | |
662 | NID_aes_192_cfb, | |
663 | NID_aes_192_ofb, | |
664 | ||
665 | NID_aes_256_ecb, | |
666 | NID_aes_256_cbc, | |
667 | NID_aes_256_cfb, | |
668 | NID_aes_256_ofb, | |
5b172463 | 669 | }; |
10621efd MC |
670 | |
671 | static int padlock_cipher_nids_num = (sizeof(padlock_cipher_nids) / | |
672 | sizeof(padlock_cipher_nids[0])); | |
5b172463 AP |
673 | |
674 | /* Function prototypes ... */ | |
675 | static int padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, | |
10621efd | 676 | const unsigned char *iv, int enc); |
5b172463 | 677 | static int padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, |
10621efd MC |
678 | const unsigned char *in, size_t nbytes); |
679 | ||
680 | # define NEAREST_ALIGNED(ptr) ( (unsigned char *)(ptr) + \ | |
681 | ( (0x10 - ((size_t)(ptr) & 0x0F)) & 0x0F ) ) | |
682 | # define ALIGNED_CIPHER_DATA(ctx) ((struct padlock_cipher_data *)\ | |
683 | NEAREST_ALIGNED(ctx->cipher_data)) | |
684 | ||
685 | # define EVP_CIPHER_block_size_ECB AES_BLOCK_SIZE | |
686 | # define EVP_CIPHER_block_size_CBC AES_BLOCK_SIZE | |
687 | # define EVP_CIPHER_block_size_OFB 1 | |
688 | # define EVP_CIPHER_block_size_CFB 1 | |
689 | ||
690 | /* | |
691 | * Declaring so many ciphers by hand would be a pain. Instead introduce a bit | |
692 | * of preprocessor magic :-) | |
693 | */ | |
694 | # define DECLARE_AES_EVP(ksize,lmode,umode) \ | |
695 | static const EVP_CIPHER padlock_aes_##ksize##_##lmode = { \ | |
696 | NID_aes_##ksize##_##lmode, \ | |
697 | EVP_CIPHER_block_size_##umode, \ | |
698 | AES_KEY_SIZE_##ksize, \ | |
699 | AES_BLOCK_SIZE, \ | |
700 | 0 | EVP_CIPH_##umode##_MODE, \ | |
701 | padlock_aes_init_key, \ | |
702 | padlock_aes_cipher, \ | |
703 | NULL, \ | |
704 | sizeof(struct padlock_cipher_data) + 16, \ | |
705 | EVP_CIPHER_set_asn1_iv, \ | |
706 | EVP_CIPHER_get_asn1_iv, \ | |
707 | NULL, \ | |
708 | NULL \ | |
5b172463 AP |
709 | } |
710 | ||
10621efd MC |
711 | DECLARE_AES_EVP(128, ecb, ECB); |
712 | DECLARE_AES_EVP(128, cbc, CBC); | |
713 | DECLARE_AES_EVP(128, cfb, CFB); | |
714 | DECLARE_AES_EVP(128, ofb, OFB); | |
5b172463 | 715 | |
10621efd MC |
716 | DECLARE_AES_EVP(192, ecb, ECB); |
717 | DECLARE_AES_EVP(192, cbc, CBC); | |
718 | DECLARE_AES_EVP(192, cfb, CFB); | |
719 | DECLARE_AES_EVP(192, ofb, OFB); | |
5b172463 | 720 | |
10621efd MC |
721 | DECLARE_AES_EVP(256, ecb, ECB); |
722 | DECLARE_AES_EVP(256, cbc, CBC); | |
723 | DECLARE_AES_EVP(256, cfb, CFB); | |
724 | DECLARE_AES_EVP(256, ofb, OFB); | |
5b172463 AP |
725 | |
726 | static int | |
10621efd MC |
727 | padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, |
728 | int nid) | |
5b172463 | 729 | { |
10621efd MC |
730 | /* No specific cipher => return a list of supported nids ... */ |
731 | if (!cipher) { | |
732 | *nids = padlock_cipher_nids; | |
733 | return padlock_cipher_nids_num; | |
734 | } | |
735 | ||
736 | /* ... or the requested "cipher" otherwise */ | |
737 | switch (nid) { | |
738 | case NID_aes_128_ecb: | |
739 | *cipher = &padlock_aes_128_ecb; | |
740 | break; | |
741 | case NID_aes_128_cbc: | |
742 | *cipher = &padlock_aes_128_cbc; | |
743 | break; | |
744 | case NID_aes_128_cfb: | |
745 | *cipher = &padlock_aes_128_cfb; | |
746 | break; | |
747 | case NID_aes_128_ofb: | |
748 | *cipher = &padlock_aes_128_ofb; | |
749 | break; | |
750 | ||
751 | case NID_aes_192_ecb: | |
752 | *cipher = &padlock_aes_192_ecb; | |
753 | break; | |
754 | case NID_aes_192_cbc: | |
755 | *cipher = &padlock_aes_192_cbc; | |
756 | break; | |
757 | case NID_aes_192_cfb: | |
758 | *cipher = &padlock_aes_192_cfb; | |
759 | break; | |
760 | case NID_aes_192_ofb: | |
761 | *cipher = &padlock_aes_192_ofb; | |
762 | break; | |
763 | ||
764 | case NID_aes_256_ecb: | |
765 | *cipher = &padlock_aes_256_ecb; | |
766 | break; | |
767 | case NID_aes_256_cbc: | |
768 | *cipher = &padlock_aes_256_cbc; | |
769 | break; | |
770 | case NID_aes_256_cfb: | |
771 | *cipher = &padlock_aes_256_cfb; | |
772 | break; | |
773 | case NID_aes_256_ofb: | |
774 | *cipher = &padlock_aes_256_ofb; | |
775 | break; | |
776 | ||
777 | default: | |
778 | /* Sorry, we don't support this NID */ | |
779 | *cipher = NULL; | |
780 | return 0; | |
781 | } | |
782 | ||
783 | return 1; | |
5b172463 AP |
784 | } |
785 | ||
786 | /* Prepare the encryption key for PadLock usage */ | |
787 | static int | |
10621efd MC |
788 | padlock_aes_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, |
789 | const unsigned char *iv, int enc) | |
5b172463 | 790 | { |
10621efd MC |
791 | struct padlock_cipher_data *cdata; |
792 | int key_len = EVP_CIPHER_CTX_key_length(ctx) * 8; | |
793 | ||
794 | if (key == NULL) | |
795 | return 0; /* ERROR */ | |
796 | ||
797 | cdata = ALIGNED_CIPHER_DATA(ctx); | |
798 | memset(cdata, 0, sizeof(struct padlock_cipher_data)); | |
799 | ||
800 | /* Prepare Control word. */ | |
801 | if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE) | |
802 | cdata->cword.b.encdec = 0; | |
803 | else | |
804 | cdata->cword.b.encdec = (ctx->encrypt == 0); | |
805 | cdata->cword.b.rounds = 10 + (key_len - 128) / 32; | |
806 | cdata->cword.b.ksize = (key_len - 128) / 64; | |
807 | ||
808 | switch (key_len) { | |
809 | case 128: | |
810 | /* | |
811 | * PadLock can generate an extended key for AES128 in hardware | |
812 | */ | |
813 | memcpy(cdata->ks.rd_key, key, AES_KEY_SIZE_128); | |
814 | cdata->cword.b.keygen = 0; | |
815 | break; | |
816 | ||
817 | case 192: | |
818 | case 256: | |
819 | /* | |
820 | * Generate an extended AES key in software. Needed for AES192/AES256 | |
821 | */ | |
822 | /* | |
823 | * Well, the above applies to Stepping 8 CPUs and is listed as | |
824 | * hardware errata. They most likely will fix it at some point and | |
825 | * then a check for stepping would be due here. | |
826 | */ | |
827 | if (EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_CFB_MODE || | |
828 | EVP_CIPHER_CTX_mode(ctx) == EVP_CIPH_OFB_MODE || enc) | |
829 | AES_set_encrypt_key(key, key_len, &cdata->ks); | |
830 | else | |
831 | AES_set_decrypt_key(key, key_len, &cdata->ks); | |
832 | # ifndef AES_ASM | |
833 | /* | |
834 | * OpenSSL C functions use byte-swapped extended key. | |
835 | */ | |
836 | padlock_bswapl(&cdata->ks); | |
837 | # endif | |
838 | cdata->cword.b.keygen = 1; | |
839 | break; | |
840 | ||
841 | default: | |
842 | /* ERROR */ | |
843 | return 0; | |
844 | } | |
845 | ||
846 | /* | |
847 | * This is done to cover for cases when user reuses the | |
848 | * context for new key. The catch is that if we don't do | |
849 | * this, padlock_eas_cipher might proceed with old key... | |
850 | */ | |
851 | padlock_reload_key(); | |
852 | ||
853 | return 1; | |
5b172463 AP |
854 | } |
855 | ||
10621efd | 856 | /*- |
5b172463 AP |
857 | * Simplified version of padlock_aes_cipher() used when |
858 | * 1) both input and output buffers are at aligned addresses. | |
859 | * or when | |
860 | * 2) running on a newer CPU that doesn't require aligned buffers. | |
861 | */ | |
862 | static int | |
863 | padlock_aes_cipher_omnivorous(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, | |
10621efd | 864 | const unsigned char *in_arg, size_t nbytes) |
5b172463 | 865 | { |
10621efd MC |
866 | struct padlock_cipher_data *cdata; |
867 | void *iv; | |
868 | ||
869 | cdata = ALIGNED_CIPHER_DATA(ctx); | |
870 | padlock_verify_context(cdata); | |
871 | ||
872 | switch (EVP_CIPHER_CTX_mode(ctx)) { | |
873 | case EVP_CIPH_ECB_MODE: | |
874 | padlock_xcrypt_ecb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); | |
875 | break; | |
876 | ||
877 | case EVP_CIPH_CBC_MODE: | |
878 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
879 | iv = padlock_xcrypt_cbc(nbytes / AES_BLOCK_SIZE, cdata, out_arg, | |
880 | in_arg); | |
881 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
882 | break; | |
883 | ||
884 | case EVP_CIPH_CFB_MODE: | |
885 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
886 | iv = padlock_xcrypt_cfb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, | |
887 | in_arg); | |
888 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
889 | break; | |
890 | ||
891 | case EVP_CIPH_OFB_MODE: | |
892 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
893 | padlock_xcrypt_ofb(nbytes / AES_BLOCK_SIZE, cdata, out_arg, in_arg); | |
894 | memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); | |
895 | break; | |
896 | ||
897 | default: | |
898 | return 0; | |
899 | } | |
900 | ||
901 | memset(cdata->iv, 0, AES_BLOCK_SIZE); | |
902 | ||
903 | return 1; | |
5b172463 AP |
904 | } |
905 | ||
10621efd MC |
906 | # ifndef PADLOCK_CHUNK |
907 | # define PADLOCK_CHUNK 512 /* Must be a power of 2 larger than 16 */ | |
908 | # endif | |
909 | # if PADLOCK_CHUNK<16 || PADLOCK_CHUNK&(PADLOCK_CHUNK-1) | |
910 | # error "insane PADLOCK_CHUNK..." | |
911 | # endif | |
5b172463 | 912 | |
10621efd MC |
913 | /* |
914 | * Re-align the arguments to 16-Bytes boundaries and run the encryption | |
915 | * function itself. This function is not AES-specific. | |
916 | */ | |
5b172463 AP |
917 | static int |
918 | padlock_aes_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out_arg, | |
10621efd | 919 | const unsigned char *in_arg, size_t nbytes) |
5b172463 | 920 | { |
10621efd MC |
921 | struct padlock_cipher_data *cdata; |
922 | const void *inp; | |
923 | unsigned char *out; | |
924 | void *iv; | |
925 | int inp_misaligned, out_misaligned, realign_in_loop; | |
926 | size_t chunk, allocated = 0; | |
927 | ||
928 | /* | |
929 | * ctx->num is maintained in byte-oriented modes, such as CFB and OFB... | |
930 | */ | |
931 | if ((chunk = ctx->num)) { /* borrow chunk variable */ | |
932 | unsigned char *ivp = ctx->iv; | |
933 | ||
934 | switch (EVP_CIPHER_CTX_mode(ctx)) { | |
935 | case EVP_CIPH_CFB_MODE: | |
936 | if (chunk >= AES_BLOCK_SIZE) | |
937 | return 0; /* bogus value */ | |
938 | ||
939 | if (ctx->encrypt) | |
940 | while (chunk < AES_BLOCK_SIZE && nbytes != 0) { | |
941 | ivp[chunk] = *(out_arg++) = *(in_arg++) ^ ivp[chunk]; | |
942 | chunk++, nbytes--; | |
943 | } else | |
944 | while (chunk < AES_BLOCK_SIZE && nbytes != 0) { | |
945 | unsigned char c = *(in_arg++); | |
946 | *(out_arg++) = c ^ ivp[chunk]; | |
947 | ivp[chunk++] = c, nbytes--; | |
948 | } | |
949 | ||
950 | ctx->num = chunk % AES_BLOCK_SIZE; | |
951 | break; | |
952 | case EVP_CIPH_OFB_MODE: | |
953 | if (chunk >= AES_BLOCK_SIZE) | |
954 | return 0; /* bogus value */ | |
955 | ||
956 | while (chunk < AES_BLOCK_SIZE && nbytes != 0) { | |
957 | *(out_arg++) = *(in_arg++) ^ ivp[chunk]; | |
958 | chunk++, nbytes--; | |
959 | } | |
960 | ||
961 | ctx->num = chunk % AES_BLOCK_SIZE; | |
962 | break; | |
963 | } | |
964 | } | |
965 | ||
966 | if (nbytes == 0) | |
967 | return 1; | |
968 | # if 0 | |
969 | if (nbytes % AES_BLOCK_SIZE) | |
970 | return 0; /* are we expected to do tail processing? */ | |
971 | # else | |
972 | /* | |
973 | * nbytes is always multiple of AES_BLOCK_SIZE in ECB and CBC modes and | |
974 | * arbitrary value in byte-oriented modes, such as CFB and OFB... | |
975 | */ | |
976 | # endif | |
977 | ||
978 | /* | |
979 | * VIA promises CPUs that won't require alignment in the future. For now | |
980 | * padlock_aes_align_required is initialized to 1 and the condition is | |
981 | * never met... | |
982 | */ | |
983 | /* | |
984 | * C7 core is capable to manage unaligned input in non-ECB[!] mode, but | |
985 | * performance penalties appear to be approximately same as for software | |
986 | * alignment below or ~3x. They promise to improve it in the future, but | |
987 | * for now we can just as well pretend that it can only handle aligned | |
988 | * input... | |
989 | */ | |
990 | if (!padlock_aes_align_required && (nbytes % AES_BLOCK_SIZE) == 0) | |
991 | return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); | |
992 | ||
993 | inp_misaligned = (((size_t)in_arg) & 0x0F); | |
994 | out_misaligned = (((size_t)out_arg) & 0x0F); | |
995 | ||
996 | /* | |
997 | * Note that even if output is aligned and input not, I still prefer to | |
998 | * loop instead of copy the whole input and then encrypt in one stroke. | |
999 | * This is done in order to improve L1 cache utilization... | |
1000 | */ | |
1001 | realign_in_loop = out_misaligned | inp_misaligned; | |
1002 | ||
1003 | if (!realign_in_loop && (nbytes % AES_BLOCK_SIZE) == 0) | |
1004 | return padlock_aes_cipher_omnivorous(ctx, out_arg, in_arg, nbytes); | |
1005 | ||
1006 | /* this takes one "if" out of the loops */ | |
1007 | chunk = nbytes; | |
1008 | chunk %= PADLOCK_CHUNK; | |
1009 | if (chunk == 0) | |
1010 | chunk = PADLOCK_CHUNK; | |
1011 | ||
1012 | if (out_misaligned) { | |
1013 | /* optmize for small input */ | |
1014 | allocated = (chunk < nbytes ? PADLOCK_CHUNK : nbytes); | |
1015 | out = alloca(0x10 + allocated); | |
1016 | out = NEAREST_ALIGNED(out); | |
1017 | } else | |
1018 | out = out_arg; | |
1019 | ||
1020 | cdata = ALIGNED_CIPHER_DATA(ctx); | |
1021 | padlock_verify_context(cdata); | |
1022 | ||
1023 | switch (EVP_CIPHER_CTX_mode(ctx)) { | |
1024 | case EVP_CIPH_ECB_MODE: | |
1025 | do { | |
1026 | if (inp_misaligned) | |
1027 | inp = padlock_memcpy(out, in_arg, chunk); | |
1028 | else | |
1029 | inp = in_arg; | |
1030 | in_arg += chunk; | |
1031 | ||
1032 | padlock_xcrypt_ecb(chunk / AES_BLOCK_SIZE, cdata, out, inp); | |
1033 | ||
1034 | if (out_misaligned) | |
1035 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | |
1036 | else | |
1037 | out = out_arg += chunk; | |
1038 | ||
1039 | nbytes -= chunk; | |
1040 | chunk = PADLOCK_CHUNK; | |
1041 | } while (nbytes); | |
1042 | break; | |
1043 | ||
1044 | case EVP_CIPH_CBC_MODE: | |
1045 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
1046 | goto cbc_shortcut; | |
1047 | do { | |
1048 | if (iv != cdata->iv) | |
1049 | memcpy(cdata->iv, iv, AES_BLOCK_SIZE); | |
1050 | chunk = PADLOCK_CHUNK; | |
1051 | cbc_shortcut: /* optimize for small input */ | |
1052 | if (inp_misaligned) | |
1053 | inp = padlock_memcpy(out, in_arg, chunk); | |
1054 | else | |
1055 | inp = in_arg; | |
1056 | in_arg += chunk; | |
1057 | ||
1058 | iv = padlock_xcrypt_cbc(chunk / AES_BLOCK_SIZE, cdata, out, inp); | |
1059 | ||
1060 | if (out_misaligned) | |
1061 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | |
1062 | else | |
1063 | out = out_arg += chunk; | |
1064 | ||
1065 | } while (nbytes -= chunk); | |
1066 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
1067 | break; | |
1068 | ||
1069 | case EVP_CIPH_CFB_MODE: | |
1070 | memcpy(iv = cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
1071 | chunk &= ~(AES_BLOCK_SIZE - 1); | |
1072 | if (chunk) | |
1073 | goto cfb_shortcut; | |
1074 | else | |
1075 | goto cfb_skiploop; | |
1076 | do { | |
1077 | if (iv != cdata->iv) | |
1078 | memcpy(cdata->iv, iv, AES_BLOCK_SIZE); | |
1079 | chunk = PADLOCK_CHUNK; | |
1080 | cfb_shortcut: /* optimize for small input */ | |
1081 | if (inp_misaligned) | |
1082 | inp = padlock_memcpy(out, in_arg, chunk); | |
1083 | else | |
1084 | inp = in_arg; | |
1085 | in_arg += chunk; | |
1086 | ||
1087 | iv = padlock_xcrypt_cfb(chunk / AES_BLOCK_SIZE, cdata, out, inp); | |
1088 | ||
1089 | if (out_misaligned) | |
1090 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | |
1091 | else | |
1092 | out = out_arg += chunk; | |
1093 | ||
1094 | nbytes -= chunk; | |
1095 | } while (nbytes >= AES_BLOCK_SIZE); | |
1096 | ||
1097 | cfb_skiploop: | |
1098 | if (nbytes) { | |
1099 | unsigned char *ivp = cdata->iv; | |
1100 | ||
1101 | if (iv != ivp) { | |
1102 | memcpy(ivp, iv, AES_BLOCK_SIZE); | |
1103 | iv = ivp; | |
1104 | } | |
1105 | ctx->num = nbytes; | |
1106 | if (cdata->cword.b.encdec) { | |
1107 | cdata->cword.b.encdec = 0; | |
1108 | padlock_reload_key(); | |
1109 | padlock_xcrypt_ecb(1, cdata, ivp, ivp); | |
1110 | cdata->cword.b.encdec = 1; | |
1111 | padlock_reload_key(); | |
1112 | while (nbytes) { | |
1113 | unsigned char c = *(in_arg++); | |
1114 | *(out_arg++) = c ^ *ivp; | |
1115 | *(ivp++) = c, nbytes--; | |
1116 | } | |
1117 | } else { | |
1118 | padlock_reload_key(); | |
1119 | padlock_xcrypt_ecb(1, cdata, ivp, ivp); | |
1120 | padlock_reload_key(); | |
1121 | while (nbytes) { | |
1122 | *ivp = *(out_arg++) = *(in_arg++) ^ *ivp; | |
1123 | ivp++, nbytes--; | |
1124 | } | |
1125 | } | |
1126 | } | |
1127 | ||
1128 | memcpy(ctx->iv, iv, AES_BLOCK_SIZE); | |
1129 | break; | |
1130 | ||
1131 | case EVP_CIPH_OFB_MODE: | |
1132 | memcpy(cdata->iv, ctx->iv, AES_BLOCK_SIZE); | |
1133 | chunk &= ~(AES_BLOCK_SIZE - 1); | |
1134 | if (chunk) | |
1135 | do { | |
1136 | if (inp_misaligned) | |
1137 | inp = padlock_memcpy(out, in_arg, chunk); | |
1138 | else | |
1139 | inp = in_arg; | |
1140 | in_arg += chunk; | |
1141 | ||
1142 | padlock_xcrypt_ofb(chunk / AES_BLOCK_SIZE, cdata, out, inp); | |
1143 | ||
1144 | if (out_misaligned) | |
1145 | out_arg = padlock_memcpy(out_arg, out, chunk) + chunk; | |
1146 | else | |
1147 | out = out_arg += chunk; | |
1148 | ||
1149 | nbytes -= chunk; | |
1150 | chunk = PADLOCK_CHUNK; | |
1151 | } while (nbytes >= AES_BLOCK_SIZE); | |
1152 | ||
1153 | if (nbytes) { | |
1154 | unsigned char *ivp = cdata->iv; | |
1155 | ||
1156 | ctx->num = nbytes; | |
1157 | padlock_reload_key(); /* empirically found */ | |
1158 | padlock_xcrypt_ecb(1, cdata, ivp, ivp); | |
1159 | padlock_reload_key(); /* empirically found */ | |
1160 | while (nbytes) { | |
1161 | *(out_arg++) = *(in_arg++) ^ *ivp; | |
1162 | ivp++, nbytes--; | |
1163 | } | |
1164 | } | |
1165 | ||
1166 | memcpy(ctx->iv, cdata->iv, AES_BLOCK_SIZE); | |
1167 | break; | |
1168 | ||
1169 | default: | |
1170 | return 0; | |
1171 | } | |
1172 | ||
1173 | /* Clean the realign buffer if it was used */ | |
1174 | if (out_misaligned) { | |
1175 | volatile unsigned long *p = (void *)out; | |
1176 | size_t n = allocated / sizeof(*p); | |
1177 | while (n--) | |
1178 | *p++ = 0; | |
1179 | } | |
1180 | ||
1181 | memset(cdata->iv, 0, AES_BLOCK_SIZE); | |
1182 | ||
1183 | return 1; | |
5b172463 AP |
1184 | } |
1185 | ||
10621efd | 1186 | # endif /* OPENSSL_NO_AES */ |
63d74075 | 1187 | |
5b172463 AP |
1188 | /* ===== Random Number Generator ===== */ |
1189 | /* | |
1190 | * This code is not engaged. The reason is that it does not comply | |
1191 | * with recommendations for VIA RNG usage for secure applications | |
1192 | * (posted at http://www.via.com.tw/en/viac3/c3.jsp) nor does it | |
1193 | * provide meaningful error control... | |
1194 | */ | |
10621efd MC |
1195 | /* |
1196 | * Wrapper that provides an interface between the API and the raw PadLock | |
1197 | * RNG | |
1198 | */ | |
1199 | static int padlock_rand_bytes(unsigned char *output, int count) | |
5b172463 | 1200 | { |
10621efd MC |
1201 | unsigned int eax, buf; |
1202 | ||
1203 | while (count >= 8) { | |
1204 | eax = padlock_xstore(output, 0); | |
1205 | if (!(eax & (1 << 6))) | |
1206 | return 0; /* RNG disabled */ | |
1207 | /* this ---vv--- covers DC bias, Raw Bits and String Filter */ | |
1208 | if (eax & (0x1F << 10)) | |
1209 | return 0; | |
1210 | if ((eax & 0x1F) == 0) | |
1211 | continue; /* no data, retry... */ | |
1212 | if ((eax & 0x1F) != 8) | |
1213 | return 0; /* fatal failure... */ | |
1214 | output += 8; | |
1215 | count -= 8; | |
1216 | } | |
1217 | while (count > 0) { | |
1218 | eax = padlock_xstore(&buf, 3); | |
1219 | if (!(eax & (1 << 6))) | |
1220 | return 0; /* RNG disabled */ | |
1221 | /* this ---vv--- covers DC bias, Raw Bits and String Filter */ | |
1222 | if (eax & (0x1F << 10)) | |
1223 | return 0; | |
1224 | if ((eax & 0x1F) == 0) | |
1225 | continue; /* no data, retry... */ | |
1226 | if ((eax & 0x1F) != 1) | |
1227 | return 0; /* fatal failure... */ | |
1228 | *output++ = (unsigned char)buf; | |
1229 | count--; | |
1230 | } | |
1231 | *(volatile unsigned int *)&buf = 0; | |
1232 | ||
1233 | return 1; | |
5b172463 AP |
1234 | } |
1235 | ||
1236 | /* Dummy but necessary function */ | |
10621efd | 1237 | static int padlock_rand_status(void) |
5b172463 | 1238 | { |
10621efd | 1239 | return 1; |
5b172463 AP |
1240 | } |
1241 | ||
1242 | /* Prepare structure for registration */ | |
1243 | static RAND_METHOD padlock_rand = { | |
10621efd MC |
1244 | NULL, /* seed */ |
1245 | padlock_rand_bytes, /* bytes */ | |
1246 | NULL, /* cleanup */ | |
1247 | NULL, /* add */ | |
1248 | padlock_rand_bytes, /* pseudorand */ | |
1249 | padlock_rand_status, /* rand status */ | |
5b172463 AP |
1250 | }; |
1251 | ||
10621efd MC |
1252 | # else /* !COMPILE_HW_PADLOCK */ |
1253 | # ifndef OPENSSL_NO_DYNAMIC_ENGINE | |
11f35a03 | 1254 | OPENSSL_EXPORT |
10621efd | 1255 | int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns); |
b5b72434 | 1256 | OPENSSL_EXPORT |
10621efd MC |
1257 | int bind_engine(ENGINE *e, const char *id, const dynamic_fns *fns) |
1258 | { | |
1259 | return 0; | |
1260 | } | |
5b172463 | 1261 | |
10621efd MC |
1262 | IMPLEMENT_DYNAMIC_CHECK_FN() |
1263 | # endif | |
1264 | # endif /* COMPILE_HW_PADLOCK */ | |
1265 | # endif /* !OPENSSL_NO_HW_PADLOCK */ | |
1266 | #endif /* !OPENSSL_NO_HW */ |