]>
Commit | Line | Data |
---|---|---|
12788f63 MT |
1 | 2011-07-24 H.J. Lu <hongjiu.lu@intel.com> |
2 | ||
3 | * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Simplify | |
4 | AVX check. | |
5 | ||
6 | 2011-08-20 Ulrich Drepper <drepper@gmail.com> | |
7 | ||
8 | * sysdeps/x86_64/dl-trampoline.h: If MORE_CODE is defined, restore | |
9 | the CFI state in the end. | |
10 | * sysdeps/x86_64/dl-trampoline.S: Define MORE_CODE before first | |
11 | inclusion of dl-trampoline.h. | |
12 | Based on a patch by Jiri Olsa <jolsa@redhat.com>. | |
13 | ||
14 | 2011-07-23 Ulrich Drepper <drepper@gmail.com> | |
15 | ||
16 | * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix one more | |
17 | typo. | |
18 | (_dl_x86_64_save_sse): Likewise. | |
19 | ||
20 | 2011-07-22 Ulrich Drepper <drepper@gmail.com> | |
21 | ||
22 | * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix test for | |
23 | OSXSAVE. | |
24 | (_dl_x86_64_save_sse): Likewise. | |
25 | ||
26 | 2011-07-21 Andreas Schwab <schwab@redhat.com> | |
27 | ||
28 | * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix last | |
29 | change. | |
30 | (_dl_x86_64_save_sse): Use correct AVX check. | |
31 | ||
32 | 2011-07-20 Ulrich Drepper <drepper@gmail.com> | |
33 | ||
34 | [BZ #13007] | |
35 | * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): More complete | |
36 | check for AVX enablement so that we don't crash with old kernels and | |
37 | new hardware. | |
38 | * elf/tst-audit4.c: Add same checks here. | |
39 | * elf/tst-audit6.c: Likewise. | |
40 | ||
41 | Index: glibc-2.12-2-gc4ccff1/elf/tst-audit4.c | |
42 | =================================================================== | |
43 | --- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit4.c | |
44 | +++ glibc-2.12-2-gc4ccff1/elf/tst-audit4.c | |
45 | @@ -6,16 +6,30 @@ | |
46 | #include <cpuid.h> | |
47 | #include <immintrin.h> | |
48 | ||
49 | + | |
50 | +static int | |
51 | +avx_enabled (void) | |
52 | +{ | |
53 | + unsigned int eax, ebx, ecx, edx; | |
54 | + | |
55 | + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 | |
56 | + || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) | |
57 | + return 0; | |
58 | + | |
59 | + /* Check the OS has AVX and SSE saving enabled. */ | |
60 | + asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); | |
61 | + | |
62 | + return (eax & 6) == 6; | |
63 | +} | |
64 | + | |
65 | + | |
66 | extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i, | |
67 | __m256i, __m256i, __m256i, __m256i); | |
68 | int | |
69 | main (void) | |
70 | { | |
71 | - unsigned int eax, ebx, ecx, edx; | |
72 | - | |
73 | /* Run AVX test only if AVX is supported. */ | |
74 | - if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) | |
75 | - && (ecx & bit_AVX)) | |
76 | + if (avx_enabled ()) | |
77 | { | |
78 | __m256i ymm = _mm256_setzero_si256 (); | |
79 | __m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm); | |
80 | Index: glibc-2.12-2-gc4ccff1/elf/tst-audit6.c | |
81 | =================================================================== | |
82 | --- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit6.c | |
83 | +++ glibc-2.12-2-gc4ccff1/elf/tst-audit6.c | |
84 | @@ -8,14 +8,28 @@ | |
85 | extern __m128i audit_test (__m128i, __m128i, __m128i, __m128i, | |
86 | __m128i, __m128i, __m128i, __m128i); | |
87 | ||
88 | -int | |
89 | -main (void) | |
90 | + | |
91 | +static int | |
92 | +avx_enabled (void) | |
93 | { | |
94 | unsigned int eax, ebx, ecx, edx; | |
95 | ||
96 | + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0 | |
97 | + || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE)) | |
98 | + return 0; | |
99 | + | |
100 | + /* Check the OS has AVX and SSE saving enabled. */ | |
101 | + asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0)); | |
102 | + | |
103 | + return (eax & 6) == 6; | |
104 | +} | |
105 | + | |
106 | + | |
107 | +int | |
108 | +main (void) | |
109 | +{ | |
110 | /* Run AVX test only if AVX is supported. */ | |
111 | - if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) | |
112 | - && (ecx & bit_AVX)) | |
113 | + if (avx_enabled ()) | |
114 | { | |
115 | __m128i xmm = _mm_setzero_si128 (); | |
116 | __m128i ret = audit_test (xmm, xmm, xmm, xmm, xmm, xmm, xmm, xmm); | |
117 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S | |
118 | =================================================================== | |
119 | --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.S | |
120 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S | |
121 | @@ -139,24 +139,31 @@ L(have_avx): | |
122 | movl $1, %eax | |
123 | cpuid | |
124 | movq %r11,%rbx # Restore rbx | |
125 | - movl $1, %eax | |
126 | - testl $(1 << 28), %ecx | |
127 | + xorl %eax, %eax | |
128 | + // AVX and XSAVE supported? | |
129 | + andl $((1 << 28) | (1 << 27)), %ecx | |
130 | + cmpl $((1 << 28) | (1 << 27)), %ecx | |
131 | jne 2f | |
132 | - negl %eax | |
133 | -2: movl %eax, L(have_avx)(%rip) | |
134 | + xorl %ecx, %ecx | |
135 | + // Get XFEATURE_ENABLED_MASK | |
136 | + xgetbv | |
137 | + andl $0x6, %eax | |
138 | +2: subl $0x5, %eax | |
139 | + movl %eax, L(have_avx)(%rip) | |
140 | cmpl $0, %eax | |
141 | ||
142 | 1: js L(no_avx) | |
143 | ||
144 | # define RESTORE_AVX | |
145 | +# define MORE_CODE | |
146 | # include "dl-trampoline.h" | |
147 | ||
148 | .align 16 | |
149 | L(no_avx): | |
150 | # endif | |
151 | ||
152 | -# undef RESTORE_AVX | |
153 | -# include "dl-trampoline.h" | |
154 | +# undef RESTORE_AVX | |
155 | +# include "dl-trampoline.h" | |
156 | ||
157 | cfi_endproc | |
158 | .size _dl_runtime_profile, .-_dl_runtime_profile | |
159 | @@ -176,11 +183,20 @@ _dl_x86_64_save_sse: | |
160 | movl $1, %eax | |
161 | cpuid | |
162 | movq %r11,%rbx # Restore rbx | |
163 | - movl $1, %eax | |
164 | - testl $(1 << 28), %ecx | |
165 | + xorl %eax, %eax | |
166 | + // AVX and XSAVE supported? | |
167 | + andl $((1 << 28) | (1 << 27)), %ecx | |
168 | + cmpl $((1 << 28) | (1 << 27)), %ecx | |
169 | jne 2f | |
170 | - negl %eax | |
171 | -2: movl %eax, L(have_avx)(%rip) | |
172 | + xorl %ecx, %ecx | |
173 | + // Get XFEATURE_ENABLED_MASK | |
174 | + xgetbv | |
175 | + andl $0x6, %eax | |
176 | + cmpl $0x6, %eax | |
177 | + // Nonzero if SSE and AVX state saving is enabled. | |
178 | + sete %al | |
179 | +2: leal -1(%eax,%eax), %eax | |
180 | + movl %eax, L(have_avx)(%rip) | |
181 | cmpl $0, %eax | |
182 | ||
183 | 1: js L(no_avx5) | |
184 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h | |
185 | =================================================================== | |
186 | --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.h | |
187 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h | |
188 | @@ -195,14 +195,14 @@ | |
189 | _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now, | |
190 | so we just need to allocate the sizeof(La_x86_64_retval) space on | |
191 | the stack, since the alignment has already been taken care of. */ | |
192 | -# ifdef RESTORE_AVX | |
193 | +#ifdef RESTORE_AVX | |
194 | /* sizeof(La_x86_64_retval). Need extra space for 2 SSE | |
195 | registers to detect if xmm0/xmm1 registers are changed | |
196 | by audit module. */ | |
197 | subq $(LRV_SIZE + XMM_SIZE*2), %rsp | |
198 | -# else | |
199 | +#else | |
200 | subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval) | |
201 | -# endif | |
202 | +#endif | |
203 | movq %rsp, %rcx # La_x86_64_retval argument to %rcx. | |
204 | ||
205 | /* Fill in the La_x86_64_retval structure. */ | |
206 | @@ -212,7 +212,7 @@ | |
207 | movaps %xmm0, LRV_XMM0_OFFSET(%rcx) | |
208 | movaps %xmm1, LRV_XMM1_OFFSET(%rcx) | |
209 | ||
210 | -# ifdef RESTORE_AVX | |
211 | +#ifdef RESTORE_AVX | |
212 | /* This is to support AVX audit modules. */ | |
213 | vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx) | |
214 | vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx) | |
215 | @@ -221,14 +221,14 @@ | |
216 | by audit module. */ | |
217 | vmovdqa %xmm0, (LRV_SIZE)(%rcx) | |
218 | vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) | |
219 | -# endif | |
220 | +#endif | |
221 | ||
222 | fstpt LRV_ST0_OFFSET(%rcx) | |
223 | fstpt LRV_ST1_OFFSET(%rcx) | |
224 | ||
225 | movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. | |
226 | movq 40(%rbx), %rsi # Copy args pushed by PLT in register. | |
227 | - movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index | |
228 | + movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index | |
229 | call _dl_call_pltexit | |
230 | ||
231 | /* Restore return registers. */ | |
232 | @@ -238,7 +238,7 @@ | |
233 | movaps LRV_XMM0_OFFSET(%rsp), %xmm0 | |
234 | movaps LRV_XMM1_OFFSET(%rsp), %xmm1 | |
235 | ||
236 | -# ifdef RESTORE_AVX | |
237 | +#ifdef RESTORE_AVX | |
238 | /* Check if xmm0/xmm1 registers are changed by audit module. */ | |
239 | vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 | |
240 | vpmovmskb %xmm2, %esi | |
241 | @@ -253,7 +253,7 @@ | |
242 | vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1 | |
243 | ||
244 | 1: | |
245 | -# endif | |
246 | +#endif | |
247 | ||
248 | fldt LRV_ST1_OFFSET(%rsp) | |
249 | fldt LRV_ST0_OFFSET(%rsp) | |
250 | @@ -267,3 +267,10 @@ | |
251 | # (eats the reloc index and link_map) | |
252 | cfi_adjust_cfa_offset(-48) | |
253 | retq | |
254 | + | |
255 | +#ifdef MORE_CODE | |
256 | + cfi_adjust_cfa_offset(48) | |
257 | + cfi_rel_offset(%rbx, 0) | |
258 | + cfi_def_cfa_register(%rbx) | |
259 | +# undef MORE_CODE | |
260 | +#endif |