1 diff -urN openssl-0.9.8g.orig/crypto/engine/eng_padlock.c openssl-0.9.8g/crypto/engine/eng_padlock.c
2 --- openssl-0.9.8g.orig/crypto/engine/eng_padlock.c 2007-11-13 20:00:28.390611512 +0100
3 +++ openssl-0.9.8g/crypto/engine/eng_padlock.c 2007-11-13 20:02:52.398818072 +0100
6 #include <openssl/aes.h>
8 +#ifndef OPENSSL_NO_SHA
9 +#include <openssl/sha.h>
11 #include <openssl/rand.h>
12 #include <openssl/err.h>
15 #ifndef OPENSSL_NO_HW_PADLOCK
17 +/* PadLock RNG is disabled by default */
18 +#define PADLOCK_NO_RNG 1
20 +/* No ASM routines for SHA in MSC yet */
22 +#define OPENSSL_NO_SHA
25 /* Attempt to have a single source for both 0.9.7 and 0.9.8 :-) */
26 #if (OPENSSL_VERSION_NUMBER >= 0x00908000L)
27 # ifndef OPENSSL_NO_DYNAMIC_ENGINE
29 static int padlock_init(ENGINE *e);
32 +#ifndef PADLOCK_NO_RNG
33 static RAND_METHOD padlock_rand;
37 #ifndef OPENSSL_NO_AES
38 static int padlock_ciphers(ENGINE *e, const EVP_CIPHER **cipher, const int **nids, int nid);
42 +#ifndef OPENSSL_NO_SHA
43 +static int padlock_digests(ENGINE *e, const EVP_MD **digest, const int **nids, int nid);
47 static const char *padlock_id = "padlock";
48 static char padlock_name[100];
50 /* Available features */
51 -static int padlock_use_ace = 0; /* Advanced Cryptography Engine */
52 -static int padlock_use_rng = 0; /* Random Number Generator */
56 + PADLOCK_ACE2 = 0x04,
60 +enum padlock_flags padlock_flags;
62 +#define PADLOCK_HAVE_RNG (padlock_flags & PADLOCK_RNG)
63 +#define PADLOCK_HAVE_ACE (padlock_flags & (PADLOCK_ACE|PADLOCK_ACE2))
64 +#define PADLOCK_HAVE_ACE1 (padlock_flags & PADLOCK_ACE)
65 +#define PADLOCK_HAVE_ACE2 (padlock_flags & PADLOCK_ACE2)
66 +#define PADLOCK_HAVE_PHE (padlock_flags & PADLOCK_PHE)
67 +#define PADLOCK_HAVE_PMM (padlock_flags & PADLOCK_PMM)
69 #ifndef OPENSSL_NO_AES
70 static int padlock_aes_align_required = 1;
73 +/* Init / Max buffer sizes for SHA */
74 +#define PADLOCK_SHA_INIT_ORD 13 /* = 8192 */
75 +#define PADLOCK_SHA_MAX_ORD 13 /* = 8192 */
77 /* ===== Engine "management" functions ===== */
79 /* Prepare the ENGINE structure for registration */
81 padlock_bind_helper(ENGINE *e)
83 + char phe_string[20];
85 /* Check available features */
88 -#if 1 /* disable RNG for now, see commentary in vicinity of RNG code */
91 + /* Build PHE info with buffer size argument */
92 + if (PADLOCK_HAVE_PHE)
93 + BIO_snprintf(phe_string, sizeof(phe_string),
94 + "PHE(%lu) ", 1UL << PADLOCK_SHA_MAX_ORD);
96 /* Generate a nice engine name with available features */
97 BIO_snprintf(padlock_name, sizeof(padlock_name),
98 - "VIA PadLock (%s, %s)",
99 - padlock_use_rng ? "RNG" : "no-RNG",
100 - padlock_use_ace ? "ACE" : "no-ACE");
101 + "VIA PadLock: %s%s%s%s%s",
102 + padlock_flags ? "" : "not supported",
103 + PADLOCK_HAVE_RNG ? "RNG " : "",
104 + PADLOCK_HAVE_ACE ? (PADLOCK_HAVE_ACE2 ? "ACE2 " : "ACE ") : "",
105 + PADLOCK_HAVE_PHE ? phe_string : "",
106 + PADLOCK_HAVE_PMM ? "PMM " : "");
108 /* Register everything or return with an error */
109 if (!ENGINE_set_id(e, padlock_id) ||
110 !ENGINE_set_name(e, padlock_name) ||
112 - !ENGINE_set_init_function(e, padlock_init) ||
113 + !ENGINE_set_init_function(e, padlock_init)
114 #ifndef OPENSSL_NO_AES
115 - (padlock_use_ace && !ENGINE_set_ciphers (e, padlock_ciphers)) ||
116 + || (PADLOCK_HAVE_ACE && !ENGINE_set_ciphers (e, padlock_ciphers))
118 +#ifndef OPENSSL_NO_SHA
119 + || (PADLOCK_HAVE_PHE && !ENGINE_set_digests (e, padlock_digests))
121 +#ifndef PADLOCK_NO_RNG
122 + || (PADLOCK_HAVE_RNG && !ENGINE_set_RAND (e, &padlock_rand))
124 - (padlock_use_rng && !ENGINE_set_RAND (e, &padlock_rand))) {
131 padlock_init(ENGINE *e)
133 - return (padlock_use_rng || padlock_use_ace);
134 + return (padlock_flags);
137 /* This stuff is needed if this ENGINE is being compiled into a self-contained
140 /* ===== Here comes the "real" engine ===== */
143 +#define likely(x) __builtin_expect(!!(x), 1)
144 +#define unlikely(x) __builtin_expect(!!(x), 0)
146 +#define likely(x) (x)
147 +#define unlikely(x) (x)
150 +/* How to test if we need to typedef uint32_t ??? */
151 +typedef unsigned long uint32_t;
153 #ifndef OPENSSL_NO_AES
154 /* Some AES-related constants */
155 #define AES_BLOCK_SIZE 16
156 @@ -362,10 +421,22 @@
157 : "+a"(eax), "=d"(edx) : : "ecx");
159 /* Fill up some flags */
160 - padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
161 - padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
162 + padlock_flags |= ((edx & (0x3<<3)) ? PADLOCK_RNG : 0);
163 + padlock_flags |= ((edx & (0x3<<7)) ? PADLOCK_ACE : 0);
164 + padlock_flags |= ((edx & (0x3<<9)) ? PADLOCK_ACE2 : 0);
165 + padlock_flags |= ((edx & (0x3<<11)) ? PADLOCK_PHE : 0);
166 + padlock_flags |= ((edx & (0x3<<13)) ? PADLOCK_PMM : 0);
168 - return padlock_use_ace + padlock_use_rng;
169 + return padlock_flags;
173 +padlock_htonl_block(uint32_t *data, size_t count)
176 + asm volatile ("bswapl %0" : "+r"(*data));
181 #ifndef OPENSSL_NO_AES
183 padlock_bswapl(AES_KEY *ks)
185 size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
186 - unsigned int *key = ks->rd_key;
187 + uint32_t *key = (uint32_t*) ks->rd_key;
190 - asm volatile ("bswapl %0" : "+r"(*key));
193 + padlock_htonl_block(key, i);
197 @@ -1154,6 +1222,415 @@
199 #endif /* OPENSSL_NO_AES */
201 +#ifndef OPENSSL_NO_SHA
203 +// #define PADLOCK_SHA_STAT 1
207 + SHA256_CTX sha256_ctx; /* shared with SHA224 */
210 +typedef int (*f_sha_init)(void *c);
211 +typedef int (*f_sha_update)(void *c, const void *_data, size_t len);
212 +typedef int (*f_sha_final)(unsigned char *md, void *c);
213 +typedef void (*f_sha_padlock)(char *in, unsigned char *out, int count);
215 +struct sha_digest_functions {
217 + f_sha_update update;
219 + f_sha_padlock padlock;
222 +/* Don't forget to initialize all relevant
223 + * fields in padlock_sha_init() or face the
225 + * BTW We don't use bzero() on this structure
226 + * because zeroing fallback_ctx is
227 + * a waste of time. */
228 +struct padlock_digest_data {
229 + void *buf_start, *buf_alloc;
231 + unsigned long order:8, bypass:1;
232 + /* Fallback support */
233 + struct sha_digest_functions fallback_fcs;
234 + union sha_all_ctx fallback_ctx;
235 +#ifdef PADLOCK_SHA_STAT
236 + size_t stat_count, stat_total;
240 +#ifdef PADLOCK_SHA_STAT
241 +size_t all_count, all_total;
244 +#define DIGEST_DATA(ctx) ((struct padlock_digest_data *)(ctx->md_data))
245 +#define DDATA_FREE(ddata) ((size_t)(1L << ddata->order) - ddata->used)
248 +padlock_sha_bypass(struct padlock_digest_data *ddata)
253 + ddata->fallback_fcs.init(&ddata->fallback_ctx);
254 + if (ddata->buf_start && ddata->used > 0) {
255 + ddata->fallback_fcs.update(&ddata->fallback_ctx, ddata->buf_start, ddata->used);
256 + if (ddata->buf_alloc) {
257 + memset(ddata->buf_start, 0, ddata->used);
258 + free(ddata->buf_alloc);
259 + ddata->buf_alloc = 0;
262 + ddata->buf_start = 0;
270 +padlock_do_sha1(char *in, char *out, int count)
272 + /* We can't store directly to *out as it
273 + * doesn't have to be aligned. But who cares,
274 + * it's only a few bytes... */
276 + unsigned char *output = NEAREST_ALIGNED(buf);
278 + ((uint32_t*)output)[0] = 0x67452301;
279 + ((uint32_t*)output)[1] = 0xEFCDAB89;
280 + ((uint32_t*)output)[2] = 0x98BADCFE;
281 + ((uint32_t*)output)[3] = 0x10325476;
282 + ((uint32_t*)output)[4] = 0xC3D2E1F0;
284 + asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
285 + : "+S"(in), "+D"(output)
286 + : "c"(count), "a"(0));
288 + memcpy(out, output, 5 * sizeof(uint32_t));
290 + padlock_htonl_block((uint32_t*)out, 5);
294 +padlock_do_sha224(char *in, char *out, int count)
296 + /* We can't store directly to *out as it
297 + * doesn't have to be aligned. But who cares,
298 + * it's only a few bytes... */
300 + unsigned char *output = NEAREST_ALIGNED(buf);
302 + ((uint32_t*)output)[0] = 0xC1059ED8UL;
303 + ((uint32_t*)output)[1] = 0x367CD507UL;
304 + ((uint32_t*)output)[2] = 0x3070DD17UL;
305 + ((uint32_t*)output)[3] = 0xF70E5939UL;
306 + ((uint32_t*)output)[4] = 0xFFC00B31UL;
307 + ((uint32_t*)output)[5] = 0x68581511UL;
308 + ((uint32_t*)output)[6] = 0x64F98FA7UL;
309 + ((uint32_t*)output)[7] = 0xBEFA4FA4UL;
311 + asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
312 + : "+S"(in), "+D"(output)
313 + : "c"(count), "a"(0));
315 + memcpy(out, output, 7 * sizeof(uint32_t));
317 + padlock_htonl_block((uint32_t*)out, 7);
321 +padlock_do_sha256(char *in, char *out, int count)
323 + /* We can't store directly to *out as it
324 + * doesn't have to be aligned. But who cares,
325 + * it's only a few bytes... */
327 + unsigned char *output = NEAREST_ALIGNED(buf);
329 + ((uint32_t*)output)[0] = 0x6A09E667;
330 + ((uint32_t*)output)[1] = 0xBB67AE85;
331 + ((uint32_t*)output)[2] = 0x3C6EF372;
332 + ((uint32_t*)output)[3] = 0xA54FF53A;
333 + ((uint32_t*)output)[4] = 0x510E527F;
334 + ((uint32_t*)output)[5] = 0x9B05688C;
335 + ((uint32_t*)output)[6] = 0x1F83D9AB;
336 + ((uint32_t*)output)[7] = 0x5BE0CD19;
338 + asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
339 + : "+S"(in), "+D"(output)
340 + : "c"(count), "a"(0));
342 + memcpy(out, output, 8 * sizeof(uint32_t));
344 + padlock_htonl_block((uint32_t*)out, 8);
348 +padlock_sha_init(EVP_MD_CTX *ctx)
350 + struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
355 + ddata->order = PADLOCK_SHA_INIT_ORD;
356 + ddata->buf_alloc = malloc((1L << ddata->order) + 16);
357 + ddata->buf_start = NEAREST_ALIGNED(ddata->buf_alloc);
363 +padlock_sha1_init(EVP_MD_CTX *ctx)
365 + struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
367 + ddata->fallback_fcs.init = (f_sha_init)SHA1_Init;
368 + ddata->fallback_fcs.update = (f_sha_update)SHA1_Update;
369 + ddata->fallback_fcs.final = (f_sha_final)SHA1_Final;
370 + ddata->fallback_fcs.padlock = (f_sha_padlock)padlock_do_sha1;
372 + return padlock_sha_init(ctx);
376 +padlock_sha224_init(EVP_MD_CTX *ctx)
378 + struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
380 + ddata->fallback_fcs.init = (f_sha_init)SHA224_Init;
381 + ddata->fallback_fcs.update = (f_sha_update)SHA224_Update;
382 + ddata->fallback_fcs.final = (f_sha_final)SHA224_Final;
383 + ddata->fallback_fcs.padlock = (f_sha_padlock)padlock_do_sha224;
385 + return padlock_sha_init(ctx);
389 +padlock_sha256_init(EVP_MD_CTX *ctx)
391 + struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
393 + ddata->fallback_fcs.init = (f_sha_init)SHA256_Init;
394 + ddata->fallback_fcs.update = (f_sha_update)SHA256_Update;
395 + ddata->fallback_fcs.final = (f_sha_final)SHA256_Final;
396 + ddata->fallback_fcs.padlock = (f_sha_padlock)padlock_do_sha256;
398 + return padlock_sha_init(ctx);
402 +padlock_sha_update(EVP_MD_CTX *ctx, const void *data, size_t length)
404 + struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
406 +#ifdef PADLOCK_SHA_STAT
407 + ddata->stat_count++;
408 + ddata->stat_total += length;
410 + all_total += length;
412 + if (unlikely(ddata->bypass)) {
413 + ddata->fallback_fcs.update(&ddata->fallback_ctx, data, length);
416 + if (unlikely(DDATA_FREE(ddata) < length)) {
417 + if (likely(ddata->used + length > (1 << PADLOCK_SHA_MAX_ORD))) {
418 + /* Too much data to be stored -> bypass to SW SHA */
419 + padlock_sha_bypass(ddata);
420 + ddata->fallback_fcs.update(&ddata->fallback_ctx, data, length);
423 + /* Resize the alocated buffer */
427 + while ((1<<++ddata->order) < (ddata->used + length));
428 + new_size = (1<<ddata->order);
429 + if(!(new_buf = realloc(ddata->buf_alloc, new_size + 16))) {
430 + /* fallback plan again */
431 + padlock_sha_bypass(ddata);
432 + ddata->fallback_fcs.update(&ddata->fallback_ctx, data, length);
435 + ddata->buf_alloc = new_buf;
436 + ddata->buf_start = NEAREST_ALIGNED(new_buf);
440 + memcpy(ddata->buf_start + ddata->used, data, length);
441 + ddata->used += length;
447 +padlock_sha_final(EVP_MD_CTX *ctx, unsigned char *md)
449 + struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
451 +#ifdef PADLOCK_SHA_STAT
452 + fprintf(stderr, "PadLock CTX: cnt=%zu, tot=%zu, avg=%zu\n",
453 + ddata->stat_count, ddata->stat_total,
454 + ddata->stat_count ? (ddata->stat_total/ddata->stat_count) : 0);
455 + fprintf(stderr, "PadLock ALL: cnt=%zu, tot=%zu, avg=%zu\n",
456 + all_count, all_total, all_count ? (all_total/all_count) : 0);
459 + if (ddata->bypass) {
460 + ddata->fallback_fcs.final(md, &ddata->fallback_ctx);
464 + /* Pass the input buffer to PadLock microcode... */
465 + ddata->fallback_fcs.padlock(ddata->buf_start, md, ddata->used);
466 + memset(ddata->buf_start, 0, ddata->used);
467 + free(ddata->buf_alloc);
468 + ddata->buf_start = 0;
469 + ddata->buf_alloc = 0;
476 +padlock_sha_copy(EVP_MD_CTX *to,const EVP_MD_CTX *from)
478 + struct padlock_digest_data *ddata_from = DIGEST_DATA(from);
479 + struct padlock_digest_data *ddata_to = DIGEST_DATA(to);
481 + memcpy(ddata_to, ddata_from, sizeof(struct padlock_digest_data));
482 + if (ddata_from->buf_alloc) {
483 + ddata_to->buf_alloc = malloc(1L << ddata_to->order);
484 + if (!ddata_to->buf_start) {
485 + fprintf(stderr, "%s(): malloc() failed\n", __func__);
488 + ddata_to->buf_start = NEAREST_ALIGNED(ddata_to->buf_alloc);
489 + memcpy(ddata_to->buf_start, ddata_from->buf_start, ddata_from->used);
495 +padlock_sha_cleanup(EVP_MD_CTX *ctx)
497 + struct padlock_digest_data *ddata = DIGEST_DATA(ctx);
499 + if (ddata->buf_alloc) {
500 + memset(ddata->buf_start, 0, ddata->used);
501 + free(ddata->buf_alloc);
504 + memset(ddata, 0, sizeof(struct padlock_digest_data));
509 +static const EVP_MD padlock_sha1_md = {
511 + NID_sha1WithRSAEncryption,
515 + padlock_sha_update,
518 + padlock_sha_cleanup,
519 + EVP_PKEY_RSA_method,
521 + sizeof(struct padlock_digest_data),
524 +static const EVP_MD padlock_sha224_md = {
526 + NID_sha224WithRSAEncryption,
527 + SHA224_DIGEST_LENGTH,
529 + padlock_sha224_init,
530 + padlock_sha_update,
533 + padlock_sha_cleanup,
534 + EVP_PKEY_RSA_method,
536 + sizeof(struct padlock_digest_data),
539 +static const EVP_MD padlock_sha256_md = {
541 + NID_sha256WithRSAEncryption,
542 + SHA256_DIGEST_LENGTH,
544 + padlock_sha256_init,
545 + padlock_sha_update,
548 + padlock_sha_cleanup,
549 + EVP_PKEY_RSA_method,
551 + sizeof(struct padlock_digest_data),
554 +static int padlock_digest_nids[] = {
555 +#if !defined(OPENSSL_NO_SHA)
558 +#if !defined(OPENSSL_NO_SHA256)
559 +#if !defined(OPENSSL_NO_SHA224)
566 +static int padlock_digest_nids_num = sizeof(padlock_digest_nids)/sizeof(padlock_digest_nids[0]);
569 +padlock_digests (ENGINE *e, const EVP_MD **digest, const int **nids, int nid)
571 + /* No specific digest => return a list of supported nids ... */
573 + *nids = padlock_digest_nids;
574 + return padlock_digest_nids_num;
577 + /* ... or the requested "digest" otherwise */
579 +#if !defined(OPENSSL_NO_SHA)
581 + *digest = &padlock_sha1_md;
586 +#if !defined(OPENSSL_NO_SHA256)
587 +#if !defined(OPENSSL_NO_SHA224)
589 + *digest = &padlock_sha224_md;
591 +#endif /* OPENSSL_NO_SHA224 */
594 + *digest = &padlock_sha256_md;
596 +#endif /* OPENSSL_NO_SHA256 */
599 + /* Sorry, we don't support this NID */
607 +#endif /* OPENSSL_NO_SHA */
609 +#ifndef PADLOCK_NO_RNG
610 /* ===== Random Number Generator ===== */
612 * This code is not engaged. The reason is that it does not comply
613 @@ -1209,6 +1686,7 @@
614 padlock_rand_bytes, /* pseudorand */
615 padlock_rand_status, /* rand status */
617 +#endif /* PADLOCK_NO_RNG */
619 #endif /* COMPILE_HW_PADLOCK */