]> git.ipfire.org Git - people/amarx/ipfire-3.x.git/blame - openssl/patches/openssl-1.0.1-beta2-padlock64.patch
Merge branch 'master' of ssh://git.ipfire.org/pub/git/ipfire-3.x
[people/amarx/ipfire-3.x.git] / openssl / patches / openssl-1.0.1-beta2-padlock64.patch
CommitLineData
0595faf5
MT
1diff -up openssl-1.0.1-beta2/engines/e_padlock.c.padlock64 openssl-1.0.1-beta2/engines/e_padlock.c
2--- openssl-1.0.1-beta2/engines/e_padlock.c.padlock64 2011-06-21 18:42:15.000000000 +0200
3+++ openssl-1.0.1-beta2/engines/e_padlock.c 2012-02-06 20:18:52.039537799 +0100
4@@ -101,7 +101,10 @@
5 compiler choice is limited to GCC and Microsoft C. */
6 #undef COMPILE_HW_PADLOCK
7 #if !defined(I386_ONLY) && !defined(OPENSSL_NO_INLINE_ASM)
8-# if (defined(__GNUC__) && (defined(__i386__) || defined(__i386))) || \
9+# if (defined(__GNUC__) && __GNUC__>=2 && \
10+ (defined(__i386__) || defined(__i386) || \
11+ defined(__x86_64__) || defined(__x86_64)) \
12+ ) || \
13 (defined(_MSC_VER) && defined(_M_IX86))
14 # define COMPILE_HW_PADLOCK
15 # endif
16@@ -137,7 +140,7 @@ void ENGINE_load_padlock (void)
17 # endif
18 #elif defined(__GNUC__)
19 # ifndef alloca
20-# define alloca(s) __builtin_alloca(s)
21+# define alloca(s) __builtin_alloca((s))
22 # endif
23 #endif
24
25@@ -304,6 +307,7 @@ static volatile struct padlock_cipher_da
26 * =======================================================
27 */
28 #if defined(__GNUC__) && __GNUC__>=2
29+#if defined(__i386__) || defined(__i386)
30 /*
31 * As for excessive "push %ebx"/"pop %ebx" found all over.
32 * When generating position-independent code GCC won't let
33@@ -383,21 +387,6 @@ padlock_available(void)
34 return padlock_use_ace + padlock_use_rng;
35 }
36
37-#ifndef OPENSSL_NO_AES
38-/* Our own htonl()/ntohl() */
39-static inline void
40-padlock_bswapl(AES_KEY *ks)
41-{
42- size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
43- unsigned int *key = ks->rd_key;
44-
45- while (i--) {
46- asm volatile ("bswapl %0" : "+r"(*key));
47- key++;
48- }
49-}
50-#endif
51-
52 /* Force key reload from memory to the CPU microcode.
53 Loading EFLAGS from the stack clears EFLAGS[30]
54 which does the trick. */
55@@ -455,12 +444,127 @@ static inline void *name(size_t cnt, \
56 : "edx", "cc", "memory"); \
57 return iv; \
58 }
59+#endif
60+
61+#elif defined(__x86_64__) || defined(__x86_64)
62+
63+/* Load supported features of the CPU to see if
64+ the PadLock is available. */
65+static int
66+padlock_available(void)
67+{
68+ char vendor_string[16];
69+ unsigned int eax, edx;
70
71+ /* Are we running on the Centaur (VIA) CPU? */
72+ eax = 0x00000000;
73+ vendor_string[12] = 0;
74+ asm volatile (
75+ "cpuid\n"
76+ "movl %%ebx,(%1)\n"
77+ "movl %%edx,4(%1)\n"
78+ "movl %%ecx,8(%1)\n"
79+ : "+a"(eax) : "r"(vendor_string) : "rbx", "rcx", "rdx");
80+ if (strcmp(vendor_string, "CentaurHauls") != 0)
81+ return 0;
82+
83+ /* Check for Centaur Extended Feature Flags presence */
84+ eax = 0xC0000000;
85+ asm volatile ("cpuid"
86+ : "+a"(eax) : : "rbx", "rcx", "rdx");
87+ if (eax < 0xC0000001)
88+ return 0;
89+
90+ /* Read the Centaur Extended Feature Flags */
91+ eax = 0xC0000001;
92+ asm volatile ("cpuid"
93+ : "+a"(eax), "=d"(edx) : : "rbx", "rcx");
94+
95+ /* Fill up some flags */
96+ padlock_use_ace = ((edx & (0x3<<6)) == (0x3<<6));
97+ padlock_use_rng = ((edx & (0x3<<2)) == (0x3<<2));
98+
99+ return padlock_use_ace + padlock_use_rng;
100+}
101+
102+/* Force key reload from memory to the CPU microcode.
103+ Loading EFLAGS from the stack clears EFLAGS[30]
104+ which does the trick. */
105+static inline void
106+padlock_reload_key(void)
107+{
108+ asm volatile ("pushfq; popfq");
109+}
110+
111+#ifndef OPENSSL_NO_AES
112+/*
113+ * This is heuristic key context tracing. At first one
114+ * believes that one should use atomic swap instructions,
115+ * but it's not actually necessary. Point is that if
116+ * padlock_saved_context was changed by another thread
117+ * after we've read it and before we compare it with cdata,
118+ * our key *shall* be reloaded upon thread context switch
119+ * and we are therefore set in either case...
120+ */
121+static inline void
122+padlock_verify_context(struct padlock_cipher_data *cdata)
123+{
124+ asm volatile (
125+ "pushfq\n"
126+" btl $30,(%%rsp)\n"
127+" jnc 1f\n"
128+" cmpq %2,%1\n"
129+" je 1f\n"
130+" popfq\n"
131+" subq $8,%%rsp\n"
132+"1: addq $8,%%rsp\n"
133+" movq %2,%0"
134+ :"+m"(padlock_saved_context)
135+ : "r"(padlock_saved_context), "r"(cdata) : "cc");
136+}
137+
138+/* Template for padlock_xcrypt_* modes */
139+/* BIG FAT WARNING:
140+ * The offsets used with 'leal' instructions
141+ * describe items of the 'padlock_cipher_data'
142+ * structure.
143+ */
144+#define PADLOCK_XCRYPT_ASM(name,rep_xcrypt) \
145+static inline void *name(size_t cnt, \
146+ struct padlock_cipher_data *cdata, \
147+ void *out, const void *inp) \
148+{ void *iv; \
149+ asm volatile ( "leaq 16(%0),%%rdx\n" \
150+ " leaq 32(%0),%%rbx\n" \
151+ rep_xcrypt "\n" \
152+ : "=a"(iv), "=c"(cnt), "=D"(out), "=S"(inp) \
153+ : "0"(cdata), "1"(cnt), "2"(out), "3"(inp) \
154+ : "rbx", "rdx", "cc", "memory"); \
155+ return iv; \
156+}
157+#endif
158+
159+#endif /* cpu */
160+
161+#ifndef OPENSSL_NO_AES
162 /* Generate all functions with appropriate opcodes */
163 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ecb, ".byte 0xf3,0x0f,0xa7,0xc8") /* rep xcryptecb */
164 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cbc, ".byte 0xf3,0x0f,0xa7,0xd0") /* rep xcryptcbc */
165 PADLOCK_XCRYPT_ASM(padlock_xcrypt_cfb, ".byte 0xf3,0x0f,0xa7,0xe0") /* rep xcryptcfb */
166 PADLOCK_XCRYPT_ASM(padlock_xcrypt_ofb, ".byte 0xf3,0x0f,0xa7,0xe8") /* rep xcryptofb */
167+
168+/* Our own htonl()/ntohl() */
169+static inline void
170+padlock_bswapl(AES_KEY *ks)
171+{
172+ size_t i = sizeof(ks->rd_key)/sizeof(ks->rd_key[0]);
173+ unsigned int *key = ks->rd_key;
174+
175+ while (i--) {
176+ asm volatile ("bswapl %0" : "+r"(*key));
177+ key++;
178+ }
179+}
180 #endif
181
182 /* The RNG call itself */
183@@ -491,8 +595,8 @@ padlock_xstore(void *addr, unsigned int
184 static inline unsigned char *
185 padlock_memcpy(void *dst,const void *src,size_t n)
186 {
187- long *d=dst;
188- const long *s=src;
189+ size_t *d=dst;
190+ const size_t *s=src;
191
192 n /= sizeof(*d);
193 do { *d++ = *s++; } while (--n);