2 # Based on AVX-512 support for glibc, but heavaily modified for rhel-6.7.
3 # Without assembler support we drop all of the configure checks and simply
4 # output using .byte directives the minimal AVX512 instructsion required
5 # by the loader. Likewise testing is also impossible, so instead we use
6 # the Intel emulator running in `-skx` (Skylake Xeon) emulation mode and
7 # verify that a pre-built set of tests passes.
9 # commit 6986b98a18490e76b16911d1c6b1ba013598d40d
10 # Author: Ulrich Drepper <drepper@gmail.com>
11 # Date: Wed Jul 20 14:20:00 2011 -0400
13 # Force :a_x86_64_ymm to be 16-byte aligned
15 # commit aa4de9cea5c07d43caeaca9722c2d417e9a2919c
16 # Author: H.J. Lu <hjl.tools@gmail.com>
17 # Date: Fri Mar 14 08:51:25 2014 -0700
19 # Check AVX-512 assembler support first
21 # It checks AVX-512 assembler support first and sets libc_cv_cc_avx512 to
22 # $libc_cv_asm_avx512, instead of yes. GCC won't support AVX-512 if
23 # assembler doesn't support it.
25 # * sysdeps/x86_64/configure.ac: Check AVX-512 assembler support
26 # first. Disable AVX-512 GCC support if assembler doesn't support
28 # * sysdeps/x86_64/configure: Regenerated.
30 # commit 2d63a517e4084ec80403cd9f278690fa8b676cc4
31 # Author: Igor Zamyatin <igor.zamyatin@intel.com>
32 # Date: Thu Mar 13 11:10:22 2014 -0700
34 # Save and restore AVX-512 zmm registers to x86-64 ld.so
36 # AVX-512 ISA adds 512-bit zmm registers. This patch updates
37 # _dl_runtime_profile to pass zmm registers to run-time audit. It also
38 # changes _dl_x86_64_save_sse and _dl_x86_64_restore_sse to upport zmm
39 # registers, which are called when only when RTLD_PREPARE_FOREIGN_CALL
40 # is used. Its performance impact is minimum.
42 # * config.h.in (HAVE_AVX512_SUPPORT): New #undef.
43 # (HAVE_AVX512_ASM_SUPPORT): Likewise.
44 # * sysdeps/x86_64/bits/link.h (La_x86_64_zmm): New.
45 # (La_x86_64_vector): Add zmm.
46 # * sysdeps/x86_64/Makefile (tests): Add tst-audit10.
47 # (modules-names): Add tst-auditmod10a and tst-auditmod10b.
48 # ($(objpfx)tst-audit10): New target.
49 # ($(objpfx)tst-audit10.out): Likewise.
50 # (tst-audit10-ENV): New.
51 # (AVX512-CFLAGS): Likewise.
52 # (CFLAGS-tst-audit10.c): Likewise.
53 # (CFLAGS-tst-auditmod10a.c): Likewise.
54 # (CFLAGS-tst-auditmod10b.c): Likewise.
55 # * sysdeps/x86_64/configure.ac: Set config-cflags-avx512,
56 # HAVE_AVX512_SUPPORT and HAVE_AVX512_ASM_SUPPORT.
57 # * sysdeps/x86_64/configure: Regenerated.
58 # * sysdeps/x86_64/dl-trampoline.S (_dl_runtime_profile): Add
59 # AVX-512 zmm register support.
60 # (_dl_x86_64_save_sse): Likewise.
61 # (_dl_x86_64_restore_sse): Likewise.
62 # * sysdeps/x86_64/dl-trampoline.h: Updated to support different
63 # size vector registers.
64 # * sysdeps/x86_64/link-defines.sym (YMM_SIZE): New.
65 # (ZMM_SIZE): Likewise.
66 # * sysdeps/x86_64/tst-audit10.c: New file.
67 # * sysdeps/x86_64/tst-auditmod10a.c: Likewise.
68 # * sysdeps/x86_64/tst-auditmod10b.c: Likewise.
71 # https://sourceware.org/ml/libc-alpha/2014-09/msg00228.html
72 # To extend zmm register checking.
74 diff -urN glibc-2.12-2-gc4ccff1/sysdeps/x86_64/bits/link.h glibc-2.12-2-gc4ccff1.mod/sysdeps/x86_64/bits/link.h
75 --- glibc-2.12-2-gc4ccff1/sysdeps/x86_64/bits/link.h 2010-05-04 07:27:23.000000000 -0400
76 +++ glibc-2.12-2-gc4ccff1.mod/sysdeps/x86_64/bits/link.h 2015-03-03 23:03:25.041829238 -0500
78 /* Registers for entry into PLT on x86-64. */
79 # if __GNUC_PREREQ (4,0)
80 typedef float La_x86_64_xmm __attribute__ ((__vector_size__ (16)));
81 -typedef float La_x86_64_ymm __attribute__ ((__vector_size__ (32)));
82 +typedef float La_x86_64_ymm __attribute__ ((__vector_size__ (32),
84 +typedef double La_x86_64_zmm __attribute__ ((__vector_size__ (64),
87 typedef float La_x86_64_xmm __attribute__ ((__mode__ (__V4SF__)));
91 # if __GNUC_PREREQ (4,0)
93 + La_x86_64_zmm zmm[1];
96 -} La_x86_64_vector __attribute__ ((aligned(16)));
97 +} La_x86_64_vector __attribute__ ((__aligned__(16)));
99 typedef struct La_x86_64_regs
101 diff -urN glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h glibc-2.12-2-gc4ccff1.mod/sysdeps/x86_64/dl-trampoline.h
102 --- glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.h 2015-03-03 23:03:05.109457627 -0500
103 +++ glibc-2.12-2-gc4ccff1.mod/sysdeps/x86_64/dl-trampoline.h 2015-03-03 23:06:58.434101818 -0500
107 /* This is to support AVX audit modules. */
108 - vmovdqu %ymm0, (LR_VECTOR_OFFSET)(%rsp)
109 - vmovdqu %ymm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
110 - vmovdqu %ymm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
111 - vmovdqu %ymm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
112 - vmovdqu %ymm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
113 - vmovdqu %ymm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
114 - vmovdqu %ymm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
115 - vmovdqu %ymm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
116 +# if HAVE_NO_AVX512_ASM_SUPPORT
117 + /* Restore AVX-512 registers. Use .byte becaues we lack assembler support. */
118 + .byte 0x62,0xf1,0xfe,0x48,0x7f,0x44,0x24,0x03 # vmovdqu64 %zmm0,0xc0(%rsp)
119 + .byte 0x62,0xf1,0xfe,0x48,0x7f,0x4c,0x24,0x04 # vmovdqu64 %zmm1,0x100(%rsp)
120 + .byte 0x62,0xf1,0xfe,0x48,0x7f,0x54,0x24,0x05 # vmovdqu64 %zmm2,0x140(%rsp)
121 + .byte 0x62,0xf1,0xfe,0x48,0x7f,0x5c,0x24,0x06 # vmovdqu64 %zmm3,0x180(%rsp)
122 + .byte 0x62,0xf1,0xfe,0x48,0x7f,0x64,0x24,0x07 # vmovdqu64 %zmm4,0x1c0(%rsp)
123 + .byte 0x62,0xf1,0xfe,0x48,0x7f,0x6c,0x24,0x08 # vmovdqu64 %zmm5,0x200(%rsp)
124 + .byte 0x62,0xf1,0xfe,0x48,0x7f,0x74,0x24,0x09 # vmovdqu64 %zmm6,0x240(%rsp)
125 + .byte 0x62,0xf1,0xfe,0x48,0x7f,0x7c,0x24,0x0a # vmovdqu64 %zmm7,0x280(%rsp)
127 + VMOV %VEC(0), (LR_VECTOR_OFFSET)(%rsp)
128 + VMOV %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
129 + VMOV %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
130 + VMOV %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
131 + VMOV %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
132 + VMOV %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
133 + VMOV %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
134 + VMOV %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
137 /* Save xmm0-xmm7 registers to detect if any of them are
138 changed by audit module. */
141 vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
143 -2: vmovdqu (LR_VECTOR_OFFSET)(%rsp), %ymm0
144 +# if HAVE_NO_AVX512_ASM_SUPPORT
145 +2: .byte 0x62,0xf1,0xfe,0x48,0x6f,0x44,0x24,0x03 # vmovdqu64 0xc0(%rsp),%zmm0
147 +2: VMOV (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
149 vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
151 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
154 vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
156 -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %ymm1
157 +# if HAVE_NO_AVX512_ASM_SUPPORT
158 +2: .byte 0x62,0xf1,0xfe,0x48,0x6f,0x4c,0x24,0x04 # vmovdqu64 0x100(%rsp),%zmm1
160 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
162 vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
164 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
167 vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
169 -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %ymm2
170 +# if HAVE_NO_AVX512_ASM_SUPPORT
171 +2: .byte 0x62,0xf1,0xfe,0x48,0x6f,0x54,0x24,0x05 # vmovdqu64 0x140(%rsp),%zmm2
173 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
175 vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
177 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
180 vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
182 -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %ymm3
183 +# if HAVE_NO_AVX512_ASM_SUPPORT
184 +2: .byte 0x62,0xf1,0xfe,0x48,0x6f,0x5c,0x24,0x06 # vmovdqu64 0x180(%rsp),%zmm3
186 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
188 vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
190 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
193 vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
195 -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %ymm4
196 +# if HAVE_NO_AVX512_ASM_SUPPORT
197 +2: .byte 0x62,0xf1,0xfe,0x48,0x6f,0x64,0x24,0x07 # vmovdqu64 0x1c0(%rsp),%zmm4
199 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
201 vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
203 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
206 vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
208 -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %ymm5
209 +# if HAVE_NO_AVX512_ASM_SUPPORT
210 +2: .byte 0x62,0xf1,0xfe,0x48,0x6f,0x6c,0x24,0x08 # vmovdqu64 0x200(%rsp),%zmm5
212 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
214 vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
216 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
219 vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
221 -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %ymm6
222 +# if HAVE_NO_AVX512_ASM_SUPPORT
223 +2: .byte 0x62,0xf1,0xfe,0x48,0x6f,0x74,0x24,0x09 # vmovdqu64 0x240(%rsp),%zmm6
225 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
227 vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
229 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
232 vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
234 -2: vmovdqu (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %ymm7
235 +# if HAVE_NO_AVX512_ASM_SUPPORT
236 +2: .byte 0x62,0xf1,0xfe,0x48,0x6f,0x7c,0x24,0x0a # vmovdqu64 0x280(%rsp),%zmm7
238 +2: VMOV (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
240 vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
246 /* This is to support AVX audit modules. */
247 - vmovdqu %ymm0, LRV_VECTOR0_OFFSET(%rcx)
248 - vmovdqu %ymm1, LRV_VECTOR1_OFFSET(%rcx)
249 +# if HAVE_NO_AVX512_ASM_SUPPORT
250 + .byte 0x62,0xf1,0xfe,0x48,0x7f,0x81,0x50,0x00,0x00,0x00 # vmovdqu64 %zmm0,0x50(%rcx)
251 + .byte 0x62,0xf1,0xfe,0x48,0x7f,0x89,0x90,0x00,0x00,0x00 # vmovdqu64 %zmm1,0x90(%rcx)
253 + VMOV %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
254 + VMOV %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
257 /* Save xmm0/xmm1 registers to detect if they are changed
259 @@ -244,13 +293,21 @@
260 vpmovmskb %xmm2, %esi
263 - vmovdqu LRV_VECTOR0_OFFSET(%rsp), %ymm0
264 +# if HAVE_NO_AVX512_ASM_SUPPORT
265 + .byte 0x62,0xf1,0xfe,0x48,0x6f,0x84,0x24,0x50,0x00,0x00,0x00 # vmovdqu64 0x50(%rsp),%zmm0
267 + VMOV LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
270 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
271 vpmovmskb %xmm2, %esi
274 - vmovdqu LRV_VECTOR1_OFFSET(%rsp), %ymm1
275 +# if HAVE_NO_AVX512_ASM_SUPPORT
276 + .byte 0x62,0xf1,0xfe,0x48,0x6f,0x8c,0x24,0x90,0x00,0x00,0x00 # vmovdqu64 0x90(%rsp),%zmm1
278 + VMOV LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
283 diff -urN glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S glibc-2.12-2-gc4ccff1.mod/sysdeps/x86_64/dl-trampoline.S
284 --- glibc-2.12-2-gc4ccff1/sysdeps/x86_64/dl-trampoline.S 2015-03-03 23:03:05.108457659 -0500
285 +++ glibc-2.12-2-gc4ccff1.mod/sysdeps/x86_64/dl-trampoline.S 2015-03-03 23:07:31.799049953 -0500
289 cmpl $0, L(have_avx)(%rip)
292 movq %rbx, %r11 # Save rbx
295 @@ -143,18 +143,51 @@
296 // AVX and XSAVE supported?
297 andl $((1 << 28) | (1 << 27)), %ecx
298 cmpl $((1 << 28) | (1 << 27)), %ecx
301 + // AVX512 supported in processor?
302 + movq %rbx, %r11 # Save rbx
306 + andl $(1 << 16), %ebx
308 // Get XFEATURE_ENABLED_MASK
313 + movq %r11, %rbx # Restore rbx
315 + // Verify that XCR0[7:5] = '111b' and
316 + // XCR0[2:1] = '11b' which means
317 + // that zmm state is enabled
321 + movl %eax, L(have_avx)(%rip)
323 +# define RESTORE_AVX
324 +# define HAVE_NO_AVX512_ASM_SUPPORT 1
325 +# define VMOV vmovdqu64
326 +# define VEC(i) zmm##i
328 +# include "dl-trampoline.h"
332 +# undef HAVE_NO_AVX512_ASM_SUPPORT
335 movl %eax, L(have_avx)(%rip)
341 + cmpl $0xe6, L(have_avx)(%rip)
346 +# define VMOV vmovdqu
347 +# define VEC(i) ymm##i
349 # include "dl-trampoline.h"
353 # ifdef HAVE_AVX_SUPPORT
354 cmpl $0, L(have_avx)(%rip)
357 movq %rbx, %r11 # Save rbx
360 @@ -187,21 +220,37 @@
361 // AVX and XSAVE supported?
362 andl $((1 << 28) | (1 << 27)), %ecx
363 cmpl $((1 << 28) | (1 << 27)), %ecx
366 + // AVX512 supported in a processor?
367 + movq %rbx, %r11 # Save rbx
371 + andl $(1 << 16), %ebx
373 // Get XFEATURE_ENABLED_MASK
377 - // Nonzero if SSE and AVX state saving is enabled.
379 -2: leal -1(%eax,%eax), %eax
381 + movq %r11, %rbx # Restore rbx
383 + // Verify that XCR0[7:5] = '111b' and
384 + // XCR0[2:1] = '11b' which means
385 + // that zmm state is enabled
387 movl %eax, L(have_avx)(%rip)
395 + movl %eax, L(have_avx)(%rip)
398 -# define YMM_SIZE 32
401 + cmpl $0xe6, L(have_avx)(%rip)
404 vmovdqa %ymm0, %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE
405 vmovdqa %ymm1, %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE
406 vmovdqa %ymm2, %fs:RTLD_SAVESPACE_SSE+2*YMM_SIZE
408 vmovdqa %ymm6, %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE
409 vmovdqa %ymm7, %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE
412 +# Original instructions:
413 +# vmovdqu64 %zmm0, %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE
414 +# vmovdqu64 %zmm1, %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE
415 +# vmovdqu64 %zmm2, %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE
416 +# vmovdqu64 %zmm3, %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE
417 +# vmovdqu64 %zmm4, %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE
418 +# vmovdqu64 %zmm5, %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE
419 +# vmovdqu64 %zmm6, %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE
420 +# vmovdqu64 %zmm7, %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE
421 +# Assembled instructions:
422 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x7f,0x04,0x25,0x80,0x00,0x00,0x00 # vmovdqu64 %zmm0,%fs:0x80
423 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x7f,0x0c,0x25,0xc0,0x00,0x00,0x00 # vmovdqu64 %zmm1,%fs:0xc0
424 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x7f,0x14,0x25,0x00,0x01,0x00,0x00 # vmovdqu64 %zmm2,%fs:0x100
425 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x7f,0x1c,0x25,0x40,0x01,0x00,0x00 # vmovdqu64 %zmm3,%fs:0x140
426 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x7f,0x24,0x25,0x80,0x01,0x00,0x00 # vmovdqu64 %zmm4,%fs:0x180
427 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x7f,0x2c,0x25,0xc0,0x01,0x00,0x00 # vmovdqu64 %zmm5,%fs:0x1c0
428 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x7f,0x34,0x25,0x00,0x02,0x00,0x00 # vmovdqu64 %zmm6,%fs:0x200
429 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x7f,0x3c,0x25,0x40,0x02,0x00,0x00 # vmovdqu64 %zmm7,%fs:0x240
433 movdqa %xmm0, %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE
435 # ifdef HAVE_AVX_SUPPORT
436 cmpl $0, L(have_avx)(%rip)
438 + cmpl $0xe6, L(have_avx)(%rip)
441 vmovdqa %fs:RTLD_SAVESPACE_SSE+0*YMM_SIZE, %ymm0
442 vmovdqa %fs:RTLD_SAVESPACE_SSE+1*YMM_SIZE, %ymm1
444 vmovdqa %fs:RTLD_SAVESPACE_SSE+6*YMM_SIZE, %ymm6
445 vmovdqa %fs:RTLD_SAVESPACE_SSE+7*YMM_SIZE, %ymm7
448 +# Original instructions:
449 +# vmovdqu64 %fs:RTLD_SAVESPACE_SSE+0*ZMM_SIZE, %zmm0
450 +# vmovdqu64 %fs:RTLD_SAVESPACE_SSE+1*ZMM_SIZE, %zmm1
451 +# vmovdqu64 %fs:RTLD_SAVESPACE_SSE+2*ZMM_SIZE, %zmm2
452 +# vmovdqu64 %fs:RTLD_SAVESPACE_SSE+3*ZMM_SIZE, %zmm3
453 +# vmovdqu64 %fs:RTLD_SAVESPACE_SSE+4*ZMM_SIZE, %zmm4
454 +# vmovdqu64 %fs:RTLD_SAVESPACE_SSE+5*ZMM_SIZE, %zmm5
455 +# vmovdqu64 %fs:RTLD_SAVESPACE_SSE+6*ZMM_SIZE, %zmm6
456 +# vmovdqu64 %fs:RTLD_SAVESPACE_SSE+7*ZMM_SIZE, %zmm7
457 +# Assembled instructions:
458 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x6f,0x04,0x25,0x80,0x00,0x00,0x00 # vmovdqu64 %fs:0x80,%zmm0
459 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x6f,0x0c,0x25,0xc0,0x00,0x00,0x00 # vmovdqu64 %fs:0xc0,%zmm1
460 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x6f,0x14,0x25,0x00,0x01,0x00,0x00 # vmovdqu64 %fs:0x100,%zmm2
461 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x6f,0x1c,0x25,0x40,0x01,0x00,0x00 # vmovdqu64 %fs:0x140,%zmm3
462 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x6f,0x24,0x25,0x80,0x01,0x00,0x00 # vmovdqu64 %fs:0x180,%zmm4
463 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x6f,0x2c,0x25,0xc0,0x01,0x00,0x00 # vmovdqu64 %fs:0x1c0,%zmm5
464 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x6f,0x34,0x25,0x00,0x02,0x00,0x00 # vmovdqu64 %fs:0x200,%zmm6
465 + .byte 0x64,0x62,0xf1,0xfe,0x48,0x6f,0x3c,0x25,0x40,0x02,0x00,0x00 # vmovdqu64 %fs:0x240,%zmm7
469 movdqa %fs:RTLD_SAVESPACE_SSE+0*XMM_SIZE, %xmm0
470 diff -urN glibc-2.12-2-gc4ccff1/sysdeps/x86_64/link-defines.sym glibc-2.12-2-gc4ccff1.mod/sysdeps/x86_64/link-defines.sym
471 --- glibc-2.12-2-gc4ccff1/sysdeps/x86_64/link-defines.sym 2010-05-04 07:27:23.000000000 -0400
472 +++ glibc-2.12-2-gc4ccff1.mod/sysdeps/x86_64/link-defines.sym 2015-03-03 23:03:25.042829206 -0500
475 VECTOR_SIZE sizeof (La_x86_64_vector)
476 XMM_SIZE sizeof (La_x86_64_xmm)
477 +YMM_SIZE sizeof (La_x86_64_ymm)
478 +ZMM_SIZE sizeof (La_x86_64_zmm)
480 LR_SIZE sizeof (struct La_x86_64_regs)
481 LR_RDX_OFFSET offsetof (struct La_x86_64_regs, lr_rdx)