--- /dev/null
+2011-07-24 H.J. Lu <hongjiu.lu@intel.com>
+
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Simplify
+ AVX check.
+
+2011-08-20 Ulrich Drepper <drepper@gmail.com>
+
+ * sysdeps/x86_64/dl-trampoline.h: If MORE_CODE is defined, restore
+ the CFI state in the end.
+ * sysdeps/x86_64/dl-trampoline.S: Define MORE_CODE before first
+ inclusion of dl-trampoline.h.
+ Based on a patch by Jiri Olsa <jolsa@redhat.com>.
+
+2011-07-23 Ulrich Drepper <drepper@gmail.com>
+
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix one more
+ typo.
+ (_dl_x86_64_save_sse): Likewise.
+
+2011-07-22 Ulrich Drepper <drepper@gmail.com>
+
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix test for
+ OSXSAVE.
+ (_dl_x86_64_save_sse): Likewise.
+
+2011-07-21 Andreas Schwab <schwab@redhat.com>
+
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Fix last
+ change.
+ (_dl_x86_64_save_sse): Use correct AVX check.
+
+2011-07-20 Ulrich Drepper <drepper@gmail.com>
+
+ [BZ #13007]
+ * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): More complete
+ check for AVX enablement so that we don't crash with old kernels and
+ new hardware.
+ * elf/tst-audit4.c: Add same checks here.
+ * elf/tst-audit6.c: Likewise.
+
+Index: glibc-2.12-2-gc4ccff1/elf/tst-audit4.c
+===================================================================
+--- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit4.c
++++ glibc-2.12-2-gc4ccff1/elf/tst-audit4.c
+@@ -6,16 +6,30 @@
+ #include <cpuid.h>
+ #include <immintrin.h>
+
++
++static int
++avx_enabled (void)
++{
++ unsigned int eax, ebx, ecx, edx;
++
++ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
++ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
++ return 0;
++
++ /* Check the OS has AVX and SSE saving enabled. */
++ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
++
++ return (eax & 6) == 6;
++}
++
++
+ extern __m256i audit_test (__m256i, __m256i, __m256i, __m256i,
+ __m256i, __m256i, __m256i, __m256i);
+ int
+ main (void)
+ {
+- unsigned int eax, ebx, ecx, edx;
+-
+ /* Run AVX test only if AVX is supported. */
+- if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+- && (ecx & bit_AVX))
++ if (avx_enabled ())
+ {
+ __m256i ymm = _mm256_setzero_si256 ();
+ __m256i ret = audit_test (ymm, ymm, ymm, ymm, ymm, ymm, ymm, ymm);
+Index: glibc-2.12-2-gc4ccff1/elf/tst-audit6.c
+===================================================================
+--- glibc-2.12-2-gc4ccff1.orig/elf/tst-audit6.c
++++ glibc-2.12-2-gc4ccff1/elf/tst-audit6.c
+@@ -8,14 +8,28 @@
+ extern __m128i audit_test (__m128i, __m128i, __m128i, __m128i,
+ __m128i, __m128i, __m128i, __m128i);
+
+-int
+-main (void)
++
++static int
++avx_enabled (void)
+ {
+ unsigned int eax, ebx, ecx, edx;
+
++ if (__get_cpuid (1, &eax, &ebx, &ecx, &edx) == 0
++ || (ecx & (bit_AVX | bit_OSXSAVE)) != (bit_AVX | bit_OSXSAVE))
++ return 0;
++
++ /* Check the OS has AVX and SSE saving enabled. */
++ asm ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
++
++ return (eax & 6) == 6;
++}
++
++
++int
++main (void)
++{
+ /* Run AVX test only if AVX is supported. */
+- if (__get_cpuid (1, &eax, &ebx, &ecx, &edx)
+- && (ecx & bit_AVX))
++ if (avx_enabled ())
+ {
+ __m128i xmm = _mm_setzero_si128 ();
+ __m128i ret = audit_test (xmm, xmm, xmm, xmm, xmm, xmm, xmm, xmm);
+Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S
+===================================================================
+--- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.S
++++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S
+@@ -139,24 +139,31 @@ L(have_avx):
+ movl $1, %eax
+ cpuid
+ movq %r11,%rbx # Restore rbx
+- movl $1, %eax
+- testl $(1 << 28), %ecx
++ xorl %eax, %eax
++ // AVX and XSAVE supported?
++ andl $((1 << 28) | (1 << 27)), %ecx
++ cmpl $((1 << 28) | (1 << 27)), %ecx
+ jne 2f
+- negl %eax
+-2: movl %eax, L(have_avx)(%rip)
++ xorl %ecx, %ecx
++ // Get XFEATURE_ENABLED_MASK
++ xgetbv
++ andl $0x6, %eax
++2: subl $0x5, %eax
++ movl %eax, L(have_avx)(%rip)
+ cmpl $0, %eax
+
+ 1: js L(no_avx)
+
+ # define RESTORE_AVX
++# define MORE_CODE
+ # include "dl-trampoline.h"
+
+ .align 16
+ L(no_avx):
+ # endif
+
+-# undef RESTORE_AVX
+-# include "dl-trampoline.h"
++# undef RESTORE_AVX
++# include "dl-trampoline.h"
+
+ cfi_endproc
+ .size _dl_runtime_profile, .-_dl_runtime_profile
+@@ -176,11 +183,20 @@ _dl_x86_64_save_sse:
+ movl $1, %eax
+ cpuid
+ movq %r11,%rbx # Restore rbx
+- movl $1, %eax
+- testl $(1 << 28), %ecx
++ xorl %eax, %eax
++ // AVX and XSAVE supported?
++ andl $((1 << 28) | (1 << 27)), %ecx
++ cmpl $((1 << 28) | (1 << 27)), %ecx
+ jne 2f
+- negl %eax
+-2: movl %eax, L(have_avx)(%rip)
++ xorl %ecx, %ecx
++ // Get XFEATURE_ENABLED_MASK
++ xgetbv
++ andl $0x6, %eax
++ cmpl $0x6, %eax
++ // Nonzero if SSE and AVX state saving is enabled.
++ sete %al
++2: leal -1(%eax,%eax), %eax
++ movl %eax, L(have_avx)(%rip)
+ cmpl $0, %eax
+
+ 1: js L(no_avx5)
+Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h
+===================================================================
+--- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/dl-trampoline.h
++++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h
+@@ -195,14 +195,14 @@
+ _dl_call_pltexit. The La_x86_64_regs is being pointed by rsp now,
+ so we just need to allocate the sizeof(La_x86_64_retval) space on
+ the stack, since the alignment has already been taken care of. */
+-# ifdef RESTORE_AVX
++#ifdef RESTORE_AVX
+ /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
+ registers to detect if xmm0/xmm1 registers are changed
+ by audit module. */
+ subq $(LRV_SIZE + XMM_SIZE*2), %rsp
+-# else
++#else
+ subq $LRV_SIZE, %rsp # sizeof(La_x86_64_retval)
+-# endif
++#endif
+ movq %rsp, %rcx # La_x86_64_retval argument to %rcx.
+
+ /* Fill in the La_x86_64_retval structure. */
+@@ -212,7 +212,7 @@
+ movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
+ movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
+
+-# ifdef RESTORE_AVX
++#ifdef RESTORE_AVX
+ /* This is to support AVX audit modules. */
+ vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
+ vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
+@@ -221,14 +221,14 @@
+ by audit module. */
+ vmovdqa %xmm0, (LRV_SIZE)(%rcx)
+ vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
+-# endif
++#endif
+
+ fstpt LRV_ST0_OFFSET(%rcx)
+ fstpt LRV_ST1_OFFSET(%rcx)
+
+ movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
+ movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
+- movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
++ movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
+ call _dl_call_pltexit
+
+ /* Restore return registers. */
+@@ -238,7 +238,7 @@
+ movaps LRV_XMM0_OFFSET(%rsp), %xmm0
+ movaps LRV_XMM1_OFFSET(%rsp), %xmm1
+
+-# ifdef RESTORE_AVX
++#ifdef RESTORE_AVX
+ /* Check if xmm0/xmm1 registers are changed by audit module. */
+ vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
+ vpmovmskb %xmm2, %esi
+@@ -253,7 +253,7 @@
+ vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
+
+ 1:
+-# endif
++#endif
+
+ fldt LRV_ST1_OFFSET(%rsp)
+ fldt LRV_ST0_OFFSET(%rsp)
+@@ -267,3 +267,10 @@
+ # (eats the reloc index and link_map)
+ cfi_adjust_cfa_offset(-48)
+ retq
++
++#ifdef MORE_CODE
++ cfi_adjust_cfa_offset(48)
++ cfi_rel_offset(%rbx, 0)
++ cfi_def_cfa_register(%rbx)
++# undef MORE_CODE
++#endif