1 diff -up openssl-1.0.1-beta2/engines/e_padlock.c.padlock64 openssl-1.0.1-beta2/engines/e_padlock.c
2 --- openssl-1.0.1-beta2/engines/e_padlock.c.padlock64 2011-06-21 18:42:15.000000000 +0200
3 +++ openssl-1.0.1-beta2/engines/e_padlock.c 2012-02-06 20:18:52.039537799 +0100
5 compiler choice is limited to GCC and Microsoft C. */
6 #undef COMPILE_HW_PADLOCK
7 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
8 -# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
9 +# if (defined(__GNUC__) && __GNUC__>=2 && \
10 + (defined(__i386__) || defined(__i386) || \
11 + defined(__x86_64__) || defined(__x86_64)) \
13 (defined(_MSC_VER) && defined(_M_IX86))
14 # define COMPILE_HW_PADLOCK
16 @@ -137,7 +140,7 @@ void ENGINE_load_padlock (void)
18 #elif defined(__GNUC__)
20 -# define alloca(s) __builtin_alloca(s)
21 +# define alloca(s) __builtin_alloca((s))
25 @@ -304,6 +307,7 @@ static volatile struct padlock_cipher_da
26 * =======================================================
28 #if defined(__GNUC__) && __GNUC__>=2
29 +#if defined(__i386__) || defined(__i386)
31 * As for excessive "push %ebx"/"pop %ebx" found all over.
32 * When generating position-independent code GCC won't let
33 @@ -383,21 +387,6 @@ padlock_available(void)
34 return padlock_use_ace + padlock_use_rng;
37 -#ifndef OPENSSL_NO_AES
38 -/* Our own htonl()/ntohl() */
40 -padlock_bswapl(AES_KEY *ks)
42 - size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
43 - unsigned int *key = ks->rd_key;
46 - asm volatile ("bswapl %0" : "+r"(*key));
52 /* Force key reload from memory to the CPU microcode.
53 Loading EFLAGS from the stack clears EFLAGS[30]
54 which does the trick. */
55 @@ -455,12 +444,127 @@ static inline void *name(size_t cnt, \
56 : "edx", "cc", "memory"); \
61 +#elif defined(__x86_64__) || defined(__x86_64)
63 +/* Load supported features of the CPU to see if
64 + the PadLock is available. */
66 +padlock_available(void)
68 + char vendor_string[16];
69 + unsigned int eax, edx;
71 + /* Are we running on the Centaur (VIA) CPU? */
73 + vendor_string[12] = 0;
77 + "movl %%edx,4(%1)\n"
78 + "movl %%ecx,8(%1)\n"
79 + : "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx");
80 + if (strcmp(vendor_string, "CentaurHauls") != 0)
83 + /* Check for Centaur Extended Feature Flags presence */
85 + asm volatile ("cpuid"
86 + : "+a"(eax) : : "rbx", "rcx", "rdx");
87 + if (eax < 0xC0000001)
90 + /* Read the Centaur Extended Feature Flags */
92 + asm volatile ("cpuid"
93 + : "+a"(eax), "=d"(edx) : : "rbx", "rcx");
95 + /* Fill up some flags */
96 + padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
97 + padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
99 + return padlock_use_ace + padlock_use_rng;
102 +/* Force key reload from memory to the CPU microcode.
103 + Loading EFLAGS from the stack clears EFLAGS[30]
104 + which does the trick. */
106 +padlock_reload_key(void)
108 + asm volatile ("pushfq; popfq");
111 +#ifndef OPENSSL_NO_AES
113 + * This is heuristic key context tracing. At first one
114 + * believes that one should use atomic swap instructions,
115 + * but it's not actually necessary. Point is that if
116 + * padlock_saved_context was changed by another thread
117 + * after we've read it and before we compare it with cdata,
118 + * our key *shall* be reloaded upon thread context switch
119 + * and we are therefore set in either case...
122 +padlock_verify_context(struct padlock_cipher_data *cdata)
126 +" btl $30,(%%rsp)\n"
132 +"1: addq $8,%%rsp\n"
134 + :"+m"(padlock_saved_context)
135 + : "r"(padlock_saved_context), "r"(cdata) : "cc");
138 +/* Template for padlock_xcrypt_* modes */
140 + * The offsets used with 'leal' instructions
141 + * describe items of the 'padlock_cipher_data'
144 +#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
145 +static inline void *name(size_t cnt, \
146 + struct padlock_cipher_data *cdata, \
147 + void *out, const void *inp) \
149 + asm volatile ( "leaq 16(%0),%%rdx\n" \
150 + " leaq 32(%0),%%rbx\n" \
152 + : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
153 + : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
154 + : "rbx", "rdx", "cc", "memory"); \
161 +#ifndef OPENSSL_NO_AES
162 /* Generate all functions with appropriate opcodes */
163 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
164 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
165 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
166 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
168 +/* Our own htonl()/ntohl() */
170 +padlock_bswapl(AES_KEY *ks)
172 + size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
173 + unsigned int *key = ks->rd_key;
176 + asm volatile ("bswapl %0" : "+r"(*key));
182 /* The RNG call itself */
183 @@ -491,8 +595,8 @@ padlock_xstore(void *addr, unsigned int
184 static inline unsigned char *
185 padlock_memcpy(void *dst,const void *src,size_t n)
190 + const size_t *s=src;
193 do { *d++ = *s++; } while (--n);