]> git.ipfire.org Git - people/arne_f/ipfire-3.x.git/blob - openssl/patches/openssl-1.0.1-beta2-padlock64.patch
Merge remote-tracking branch 'arne_f/autoconf'
[people/arne_f/ipfire-3.x.git] / openssl / patches / openssl-1.0.1-beta2-padlock64.patch
1 diff -up openssl-1.0.1-beta2/engines/e_padlock.c.padlock64 openssl-1.0.1-beta2/engines/e_padlock.c
2 --- openssl-1.0.1-beta2/engines/e_padlock.c.padlock64 2011-06-21 18:42:15.000000000 +0200
3 +++ openssl-1.0.1-beta2/engines/e_padlock.c 2012-02-06 20:18:52.039537799 +0100
4 @@ -101,7 +101,10 @@
5 compiler choice is limited to GCC and Microsoft C. */
6 #undef COMPILE_HW_PADLOCK
7 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
8 -# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
9 +# if (defined(__GNUC__) && __GNUC__>=2 && \
10 + (defined(__i386__) || defined(__i386) || \
11 + defined(__x86_64__) || defined(__x86_64)) \
12 + ) || \
13 (defined(_MSC_VER) && defined(_M_IX86))
14 # define COMPILE_HW_PADLOCK
15 # endif
16 @@ -137,7 +140,7 @@ void ENGINE_load_padlock (void)
17 # endif
18 #elif defined(__GNUC__)
19 # ifndef alloca
20 -# define alloca(s) __builtin_alloca(s)
21 +# define alloca(s) __builtin_alloca((s))
22 # endif
23 #endif
24
25 @@ -304,6 +307,7 @@ static volatile struct padlock_cipher_da
26 * =======================================================
27 */
28 #if defined(__GNUC__) && __GNUC__>=2
29 +#if defined(__i386__) || defined(__i386)
30 /*
31 * As for excessive "push %ebx"/"pop %ebx" found all over.
32 * When generating position-independent code GCC won't let
33 @@ -383,21 +387,6 @@ padlock_available(void)
34 return padlock_use_ace + padlock_use_rng;
35 }
36
37 -#ifndef OPENSSL_NO_AES
38 -/* Our own htonl()/ntohl() */
39 -static inline void
40 -padlock_bswapl(AES_KEY *ks)
41 -{
42 - size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
43 - unsigned int *key = ks->rd_key;
44 -
45 - while (i--) {
46 - asm volatile ("bswapl %0" : "+r"(*key));
47 - key++;
48 - }
49 -}
50 -#endif
51 -
52 /* Force key reload from memory to the CPU microcode.
53 Loading EFLAGS from the stack clears EFLAGS[30]
54 which does the trick. */
55 @@ -455,12 +444,127 @@ static inline void *name(size_t cnt, \
56 : "edx", "cc", "memory"); \
57 return iv; \
58 }
59 +#endif
60 +
61 +#elif defined(__x86_64__) || defined(__x86_64)
62 +
63 +/* Load supported features of the CPU to see if
64 + the PadLock is available. */
65 +static int
66 +padlock_available(void)
67 +{
68 + char vendor_string[16];
69 + unsigned int eax, edx;
70
71 + /* Are we running on the Centaur (VIA) CPU? */
72 + eax = 0x00000000;
73 + vendor_string[12] = 0;
74 + asm volatile (
75 + "cpuid\n"
76 + "movl %%ebx,(%1)\n"
77 + "movl %%edx,4(%1)\n"
78 + "movl %%ecx,8(%1)\n"
79 + : "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx");
80 + if (strcmp(vendor_string, "CentaurHauls") != 0)
81 + return 0;
82 +
83 + /* Check for Centaur Extended Feature Flags presence */
84 + eax = 0xC0000000;
85 + asm volatile ("cpuid"
86 + : "+a"(eax) : : "rbx", "rcx", "rdx");
87 + if (eax < 0xC0000001)
88 + return 0;
89 +
90 + /* Read the Centaur Extended Feature Flags */
91 + eax = 0xC0000001;
92 + asm volatile ("cpuid"
93 + : "+a"(eax), "=d"(edx) : : "rbx", "rcx");
94 +
95 + /* Fill up some flags */
96 + padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
97 + padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
98 +
99 + return padlock_use_ace + padlock_use_rng;
100 +}
101 +
102 +/* Force key reload from memory to the CPU microcode.
103 + Loading EFLAGS from the stack clears EFLAGS[30]
104 + which does the trick. */
105 +static inline void
106 +padlock_reload_key(void)
107 +{
108 + asm volatile ("pushfq; popfq");
109 +}
110 +
111 +#ifndef OPENSSL_NO_AES
112 +/*
113 + * This is heuristic key context tracing. At first one
114 + * believes that one should use atomic swap instructions,
115 + * but it's not actually necessary. Point is that if
116 + * padlock_saved_context was changed by another thread
117 + * after we've read it and before we compare it with cdata,
118 + * our key *shall* be reloaded upon thread context switch
119 + * and we are therefore set in either case...
120 + */
121 +static inline void
122 +padlock_verify_context(struct padlock_cipher_data *cdata)
123 +{
124 + asm volatile (
125 + "pushfq\n"
126 +" btl $30,(%%rsp)\n"
127 +" jnc 1f\n"
128 +" cmpq %2,%1\n"
129 +" je 1f\n"
130 +" popfq\n"
131 +" subq $8,%%rsp\n"
132 +"1: addq $8,%%rsp\n"
133 +" movq %2,%0"
134 + :"+m"(padlock_saved_context)
135 + : "r"(padlock_saved_context), "r"(cdata) : "cc");
136 +}
137 +
138 +/* Template for padlock_xcrypt_* modes */
139 +/* BIG FAT WARNING:
140 + * The offsets used with 'leal' instructions
141 + * describe items of the 'padlock_cipher_data'
142 + * structure.
143 + */
144 +#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
145 +static inline void *name(size_t cnt, \
146 + struct padlock_cipher_data *cdata, \
147 + void *out, const void *inp) \
148 +{ void *iv; \
149 + asm volatile ( "leaq 16(%0),%%rdx\n" \
150 + " leaq 32(%0),%%rbx\n" \
151 + rep_xcrypt "\n" \
152 + : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
153 + : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
154 + : "rbx", "rdx", "cc", "memory"); \
155 + return iv; \
156 +}
157 +#endif
158 +
159 +#endif /* cpu */
160 +
161 +#ifndef OPENSSL_NO_AES
162 /* Generate all functions with appropriate opcodes */
163 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
164 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
165 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
166 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
167 +
168 +/* Our own htonl()/ntohl() */
169 +static inline void
170 +padlock_bswapl(AES_KEY *ks)
171 +{
172 + size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
173 + unsigned int *key = ks->rd_key;
174 +
175 + while (i--) {
176 + asm volatile ("bswapl %0" : "+r"(*key));
177 + key++;
178 + }
179 +}
180 #endif
181
182 /* The RNG call itself */
183 @@ -491,8 +595,8 @@ padlock_xstore(void *addr, unsigned int
184 static inline unsigned char *
185 padlock_memcpy(void *dst,const void *src,size_t n)
186 {
187 - long *d=dst;
188 - const long *s=src;
189 + size_t *d=dst;
190 + const size_t *s=src;
191
192 n /= sizeof(*d);
193 do { *d++ = *s++; } while (--n);