1 2011-07-24 H.J. Lu <hongjiu.lu@intel.com>
3 * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Simplify
6 2011-08-20 Ulrich Drepper <drepper@gmail.com>
8 * sysdeps/x86_64/dl-trampoline.h: If MORE_CODE is defined, restore
9 the CFI state in the end.
10 * sysdeps/x86_64/dl-trampoline.S: Define MORE_CODE before first
11 inclusion of dl-trampoline.h.
12 Based on a patch by Jiri Olsa <jolsa@redhat.com>.
14 2011-07-23 Ulrich Drepper <drepper@gmail.com>
16 * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix one more
18 (_dl_x86_64_save_sse): Likewise.
20 2011-07-22 Ulrich Drepper <drepper@gmail.com>
22 * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix test for
24 (_dl_x86_64_save_sse): Likewise.
26 2011-07-21 Andreas Schwab <schwab@redhat.com>
28 * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix last
30 (_dl_x86_64_save_sse): Use correct AVX check.
32 2011-07-20 Ulrich Drepper <drepper@gmail.com>
35 * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): More complete
36 check for AVX enablement so that we don't crash with old kernels and
38 * elf/tst-audit4.c: Add same checks here.
39 * elf/tst-audit6.c: Likewise.
41 Index: glibc-2.12-2-gc4ccff1/elf/tst-audit4.c
42 ===================================================================
43 --- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit4.c
44 +++ glibc-2.12-2-gc4ccff1/elf/tst-audit4.c
47 #include <immintrin.h>
53 + unsigned int eax, ebx, ecx, edx;
55 + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
56 + || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
59 + /* Check the OS has AVX and SSE saving enabled. */
60 + asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
62 + return (eax & 6) == 6;
66 extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i,
67 __m256i, __m256i, __m256i, __m256i);
71 - unsigned int eax, ebx, ecx, edx;
73 /* Run AVX test only if AVX is supported. */
74 - if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
78 __m256i ymm = _mm256_setzero_si256 ();
79 __m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm);
80 Index: glibc-2.12-2-gc4ccff1/elf/tst-audit6.c
81 ===================================================================
82 --- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit6.c
83 +++ glibc-2.12-2-gc4ccff1/elf/tst-audit6.c
85 extern __m128i audit_test (__m128i, __m128i, __m128i, __m128i,
86 __m128i, __m128i, __m128i, __m128i);
94 unsigned int eax, ebx, ecx, edx;
96 + if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
97 + || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
100 + /* Check the OS has AVX and SSE saving enabled. */
101 + asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
103 + return (eax & 6) == 6;
110 /* Run AVX test only if AVX is supported. */
111 - if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
112 - && (ecx & bit_AVX))
113 + if (avx_enabled ())
115 __m128i xmm = _mm_setzero_si128 ();
116 __m128i ret = audit_test (xmm, xmm, xmm, xmm, xmm, xmm, xmm, xmm);
117 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S
118 ===================================================================
119 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.S
120 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S
121 @@ -139,24 +139,31 @@ L(have_avx):
124 movq %r11,%rbx # Restore rbx
126 - testl $(1 << 28), %ecx
128 + // AVX and XSAVE supported?
129 + andl $((1 << 28) | (1 << 27)), %ecx
130 + cmpl $((1 << 28) | (1 << 27)), %ecx
133 -2: movl %eax, L(have_avx)(%rip)
135 + // Get XFEATURE_ENABLED_MASK
139 + movl %eax, L(have_avx)(%rip)
146 # include "dl-trampoline.h"
153 -# include "dl-trampoline.h"
155 +# include "dl-trampoline.h"
158 .size _dl_runtime_profile, .-_dl_runtime_profile
159 @@ -176,11 +183,20 @@ _dl_x86_64_save_sse:
162 movq %r11,%rbx # Restore rbx
164 - testl $(1 << 28), %ecx
166 + // AVX and XSAVE supported?
167 + andl $((1 << 28) | (1 << 27)), %ecx
168 + cmpl $((1 << 28) | (1 << 27)), %ecx
171 -2: movl %eax, L(have_avx)(%rip)
173 + // Get XFEATURE_ENABLED_MASK
177 + // Nonzero if SSE and AVX state saving is enabled.
179 +2: leal -1(%eax,%eax), %eax
180 + movl %eax, L(have_avx)(%rip)
184 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h
185 ===================================================================
186 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.h
187 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h
188 @@ -195,14 +195,14 @@
189 _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
190 so we just need to allocate the sizeof(La_x86_64_retval) space on
191 the stack, since the alignment has already been taken care of. */
194 /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
195 registers to detect if xmm0/xmm1 registers are changed
197 subq $(LRV_SIZE + XMM_SIZE*2), %rsp
200 subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval)
203 movq %rsp, %rcx # La_x86_64_retval argument to %rcx.
205 /* Fill in the La_x86_64_retval structure. */
207 movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
208 movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
212 /* This is to support AVX audit modules. */
213 vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
214 vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
215 @@ -221,14 +221,14 @@
217 vmovdqa %xmm0, (LRV_SIZE)(%rcx)
218 vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
222 fstpt LRV_ST0_OFFSET(%rcx)
223 fstpt LRV_ST1_OFFSET(%rcx)
225 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
226 movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
227 - movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
228 + movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
229 call _dl_call_pltexit
231 /* Restore return registers. */
233 movaps LRV_XMM0_OFFSET(%rsp), %xmm0
234 movaps LRV_XMM1_OFFSET(%rsp), %xmm1
238 /* Check if xmm0/xmm1 registers are changed by audit module. */
239 vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
240 vpmovmskb %xmm2, %esi
242 vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
248 fldt LRV_ST1_OFFSET(%rsp)
249 fldt LRV_ST0_OFFSET(%rsp)
251 # (eats the reloc index and link_map)
252 cfi_adjust_cfa_offset(-48)
256 + cfi_adjust_cfa_offset(48)
257 + cfi_rel_offset(%rbx, 0)
258 + cfi_def_cfa_register(%rbx)