]>
Commit | Line | Data |
---|---|---|
0595faf5 MT |
1 | diff -up openssl-1.0.1-beta2/engines/e_padlock.c.padlock64 openssl-1.0.1-beta2/engines/e_padlock.c |
2 | --- openssl-1.0.1-beta2/engines/e_padlock.c.padlock64 2011-06-21 18:42:15.000000000 +0200 | |
3 | +++ openssl-1.0.1-beta2/engines/e_padlock.c 2012-02-06 20:18:52.039537799 +0100 | |
4 | @@ -101,7 +101,10 @@ | |
5 | compiler choice is limited to GCC and Microsoft C. */ | |
6 | #undef COMPILE_HW_PADLOCK | |
7 | #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM) | |
8 | -# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \ | |
9 | +# if (defined(__GNUC__) && __GNUC__>=2 && \ | |
10 | + (defined(__i386__) || defined(__i386) || \ | |
11 | + defined(__x86_64__) || defined(__x86_64)) \ | |
12 | + ) || \ | |
13 | (defined(_MSC_VER) && defined(_M_IX86)) | |
14 | # define COMPILE_HW_PADLOCK | |
15 | # endif | |
16 | @@ -137,7 +140,7 @@ void ENGINE_load_padlock (void) | |
17 | # endif | |
18 | #elif defined(__GNUC__) | |
19 | # ifndef alloca | |
20 | -# define alloca(s) __builtin_alloca(s) | |
21 | +# define alloca(s) __builtin_alloca((s)) | |
22 | # endif | |
23 | #endif | |
24 | ||
25 | @@ -304,6 +307,7 @@ static volatile struct padlock_cipher_da | |
26 | * ======================================================= | |
27 | */ | |
28 | #if defined(__GNUC__) && __GNUC__>=2 | |
29 | +#if defined(__i386__) || defined(__i386) | |
30 | /* | |
31 | * As for excessive "push %ebx"/"pop %ebx" found all over. | |
32 | * When generating position-independent code GCC won't let | |
33 | @@ -383,21 +387,6 @@ padlock_available(void) | |
34 | return padlock_use_ace + padlock_use_rng; | |
35 | } | |
36 | ||
37 | -#ifndef OPENSSL_NO_AES | |
38 | -/* Our own htonl()/ntohl() */ | |
39 | -static inline void | |
40 | -padlock_bswapl(AES_KEY *ks) | |
41 | -{ | |
42 | - size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); | |
43 | - unsigned int *key = ks->rd_key; | |
44 | - | |
45 | - while (i--) { | |
46 | - asm volatile ("bswapl %0" : "+r"(*key)); | |
47 | - key++; | |
48 | - } | |
49 | -} | |
50 | -#endif | |
51 | - | |
52 | /* Force key reload from memory to the CPU microcode. | |
53 | Loading EFLAGS from the stack clears EFLAGS[30] | |
54 | which does the trick. */ | |
55 | @@ -455,12 +444,127 @@ static inline void *name(size_t cnt, \ | |
56 | : "edx", "cc", "memory"); \ | |
57 | return iv; \ | |
58 | } | |
59 | +#endif | |
60 | + | |
61 | +#elif defined(__x86_64__) || defined(__x86_64) | |
62 | + | |
63 | +/* Load supported features of the CPU to see if | |
64 | + the PadLock is available. */ | |
65 | +static int | |
66 | +padlock_available(void) | |
67 | +{ | |
68 | + char vendor_string[16]; | |
69 | + unsigned int eax, edx; | |
70 | ||
71 | + /* Are we running on the Centaur (VIA) CPU? */ | |
72 | + eax = 0x00000000; | |
73 | + vendor_string[12] = 0; | |
74 | + asm volatile ( | |
75 | + "cpuid\n" | |
76 | + "movl %%ebx,(%1)\n" | |
77 | + "movl %%edx,4(%1)\n" | |
78 | + "movl %%ecx,8(%1)\n" | |
79 | + : "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx"); | |
80 | + if (strcmp(vendor_string, "CentaurHauls") != 0) | |
81 | + return 0; | |
82 | + | |
83 | + /* Check for Centaur Extended Feature Flags presence */ | |
84 | + eax = 0xC0000000; | |
85 | + asm volatile ("cpuid" | |
86 | + : "+a"(eax) : : "rbx", "rcx", "rdx"); | |
87 | + if (eax < 0xC0000001) | |
88 | + return 0; | |
89 | + | |
90 | + /* Read the Centaur Extended Feature Flags */ | |
91 | + eax = 0xC0000001; | |
92 | + asm volatile ("cpuid" | |
93 | + : "+a"(eax), "=d"(edx) : : "rbx", "rcx"); | |
94 | + | |
95 | + /* Fill up some flags */ | |
96 | + padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6)); | |
97 | + padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2)); | |
98 | + | |
99 | + return padlock_use_ace + padlock_use_rng; | |
100 | +} | |
101 | + | |
102 | +/* Force key reload from memory to the CPU microcode. | |
103 | + Loading EFLAGS from the stack clears EFLAGS[30] | |
104 | + which does the trick. */ | |
105 | +static inline void | |
106 | +padlock_reload_key(void) | |
107 | +{ | |
108 | + asm volatile ("pushfq; popfq"); | |
109 | +} | |
110 | + | |
111 | +#ifndef OPENSSL_NO_AES | |
112 | +/* | |
113 | + * This is heuristic key context tracing. At first one | |
114 | + * believes that one should use atomic swap instructions, | |
115 | + * but it's not actually necessary. Point is that if | |
116 | + * padlock_saved_context was changed by another thread | |
117 | + * after we've read it and before we compare it with cdata, | |
118 | + * our key *shall* be reloaded upon thread context switch | |
119 | + * and we are therefore set in either case... | |
120 | + */ | |
121 | +static inline void | |
122 | +padlock_verify_context(struct padlock_cipher_data *cdata) | |
123 | +{ | |
124 | + asm volatile ( | |
125 | + "pushfq\n" | |
126 | +" btl $30,(%%rsp)\n" | |
127 | +" jnc 1f\n" | |
128 | +" cmpq %2,%1\n" | |
129 | +" je 1f\n" | |
130 | +" popfq\n" | |
131 | +" subq $8,%%rsp\n" | |
132 | +"1: addq $8,%%rsp\n" | |
133 | +" movq %2,%0" | |
134 | + :"+m"(padlock_saved_context) | |
135 | + : "r"(padlock_saved_context), "r"(cdata) : "cc"); | |
136 | +} | |
137 | + | |
138 | +/* Template for padlock_xcrypt_* modes */ | |
139 | +/* BIG FAT WARNING: | |
140 | + * The offsets used with 'leal' instructions | |
141 | + * describe items of the 'padlock_cipher_data' | |
142 | + * structure. | |
143 | + */ | |
144 | +#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \ | |
145 | +static inline void *name(size_t cnt, \ | |
146 | + struct padlock_cipher_data *cdata, \ | |
147 | + void *out, const void *inp) \ | |
148 | +{ void *iv; \ | |
149 | + asm volatile ( "leaq 16(%0),%%rdx\n" \ | |
150 | + " leaq 32(%0),%%rbx\n" \ | |
151 | + rep_xcrypt "\n" \ | |
152 | + : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \ | |
153 | + : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \ | |
154 | + : "rbx", "rdx", "cc", "memory"); \ | |
155 | + return iv; \ | |
156 | +} | |
157 | +#endif | |
158 | + | |
159 | +#endif /* cpu */ | |
160 | + | |
161 | +#ifndef OPENSSL_NO_AES | |
162 | /* Generate all functions with appropriate opcodes */ | |
163 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */ | |
164 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */ | |
165 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */ | |
166 | PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */ | |
167 | + | |
168 | +/* Our own htonl()/ntohl() */ | |
169 | +static inline void | |
170 | +padlock_bswapl(AES_KEY *ks) | |
171 | +{ | |
172 | + size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]); | |
173 | + unsigned int *key = ks->rd_key; | |
174 | + | |
175 | + while (i--) { | |
176 | + asm volatile ("bswapl %0" : "+r"(*key)); | |
177 | + key++; | |
178 | + } | |
179 | +} | |
180 | #endif | |
181 | ||
182 | /* The RNG call itself */ | |
183 | @@ -491,8 +595,8 @@ padlock_xstore(void *addr, unsigned int | |
184 | static inline unsigned char * | |
185 | padlock_memcpy(void *dst,const void *src,size_t n) | |
186 | { | |
187 | - long *d=dst; | |
188 | - const long *s=src; | |
189 | + size_t *d=dst; | |
190 | + const size_t *s=src; | |
191 | ||
192 | n /= sizeof(*d); | |
193 | do { *d++ = *s++; } while (--n); |