]> git.ipfire.org Git - people/teissler/ipfire-2.x.git/blob - src/patches/glibc/glibc-rh676076.patch
Merge remote-tracking branch 'origin/next' into thirteen
[people/teissler/ipfire-2.x.git] / src / patches / glibc / glibc-rh676076.patch
1 2011-03-02 Harsha Jagasia <harsha.jagasia@amd.com>
2 Ulrich Drepper <drepper@gmail.com>
3
4 * sysdeps/x86_64/memset.S: After aligning destination, code
5 branches to different locations depending on the value of
6 misalignment, when multiarch is enabled. Fix this.
7
8 2011-03-02 Harsha Jagasia <harsha.jagasia@amd.com>
9
10 * sysdeps/x86_64/cacheinfo.c (init_cacheinfo):
11 Set _x86_64_preferred_memory_instruction for AMD processsors.
12 * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
13 Set bit_Prefer_SSE_for_memop for AMD processors.
14
15 2010-11-07 H.J. Lu <hongjiu.lu@intel.com>
16
17 * sysdeps/x86_64/memset.S: Check USE_MULTIARCH and USE_SSE2 for
18 IFUNC support.
19 * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
20 memset-x86-64.
21 * sysdeps/x86_64/multiarch/bzero.S: New file.
22 * sysdeps/x86_64/multiarch/cacheinfo.c: New file.
23 * sysdeps/x86_64/multiarch/memset-x86-64.S: New file.
24 * sysdeps/x86_64/multiarch/memset.S: New file.
25 * sysdeps/x86_64/multiarch/memset_chk.S: New file.
26 * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
27 Set bit_Prefer_SSE_for_memop for Intel processors.
28 * sysdeps/x86_64/multiarch/init-arch.h (bit_Prefer_SSE_for_memop):
29 Define.
30 (index_Prefer_SSE_for_memop): Define.
31 (HAS_PREFER_SSE_FOR_MEMOP): Define.
32
33 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/cacheinfo.c
34 ===================================================================
35 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/cacheinfo.c
36 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/cacheinfo.c
37 @@ -613,6 +613,25 @@ init_cacheinfo (void)
38 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
39 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
40
41 +#ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
42 +# ifdef USE_MULTIARCH
43 + eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
44 + ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
45 + ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
46 + edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
47 +# else
48 + __cpuid (1, eax, ebx, ecx, edx);
49 +# endif
50 +
51 + /* AMD prefers SSSE3 instructions for memory/string routines
52 + if they are avaiable, otherwise it prefers integer
53 + instructions. */
54 + if ((ecx & 0x200))
55 + __x86_64_preferred_memory_instruction = 3;
56 + else
57 + __x86_64_preferred_memory_instruction = 0;
58 +#endif
59 +
60 /* Get maximum extended function. */
61 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
62
63 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/memset.S
64 ===================================================================
65 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/memset.S
66 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/memset.S
67 @@ -24,7 +24,7 @@
68 #define __STOS_UPPER_BOUNDARY $65536
69
70 .text
71 -#ifndef NOT_IN_libc
72 +#if !defined NOT_IN_libc && !defined USE_MULTIARCH
73 ENTRY(__bzero)
74 mov %rsi,%rdx /* Adjust parameter. */
75 xorl %esi,%esi /* Fill with 0s. */
76 @@ -34,10 +34,10 @@ weak_alias (__bzero, bzero)
77 #endif
78
79 #if defined PIC && !defined NOT_IN_libc
80 -ENTRY (__memset_chk)
81 +ENTRY_CHK (__memset_chk)
82 cmpq %rdx, %rcx
83 jb HIDDEN_JUMPTARGET (__chk_fail)
84 -END (__memset_chk)
85 +END_CHK (__memset_chk)
86 #endif
87 ENTRY (memset)
88 L(memset_entry):
89 @@ -591,157 +591,15 @@ L(A6Q1): mov %dx,-0xe(%rdi)
90 L(A7Q0): mov %dl,-0x7(%rdi)
91 L(A6Q0): mov %dx,-0x6(%rdi)
92 mov %edx,-0x4(%rdi)
93 - jmp L(aligned_now)
94 -
95 - .balign 16
96 -L(aligned_now):
97 -
98 - cmpl $0x1,__x86_64_preferred_memory_instruction(%rip)
99 - jg L(SSE_pre)
100 -
101 -L(8byte_move_try):
102 - cmpq __STOS_LOWER_BOUNDARY,%r8
103 - jae L(8byte_stos_try)
104 -
105 - .balign 16
106 -L(8byte_move):
107 - movq %r8,%rcx
108 - shrq $7,%rcx
109 - jz L(8byte_move_skip)
110 -
111 - .p2align 4
112 -
113 -L(8byte_move_loop):
114 - decq %rcx
115 -
116 - movq %rdx, (%rdi)
117 - movq %rdx, 8 (%rdi)
118 - movq %rdx, 16 (%rdi)
119 - movq %rdx, 24 (%rdi)
120 - movq %rdx, 32 (%rdi)
121 - movq %rdx, 40 (%rdi)
122 - movq %rdx, 48 (%rdi)
123 - movq %rdx, 56 (%rdi)
124 - movq %rdx, 64 (%rdi)
125 - movq %rdx, 72 (%rdi)
126 - movq %rdx, 80 (%rdi)
127 - movq %rdx, 88 (%rdi)
128 - movq %rdx, 96 (%rdi)
129 - movq %rdx, 104 (%rdi)
130 - movq %rdx, 112 (%rdi)
131 - movq %rdx, 120 (%rdi)
132 -
133 - leaq 128 (%rdi),%rdi
134 -
135 - jnz L(8byte_move_loop)
136 -
137 -L(8byte_move_skip):
138 - andl $127,%r8d
139 - lea (%rdi,%r8,1),%rdi
140 -
141 -#ifndef PIC
142 - lea L(setPxQx)(%rip),%r11
143 - jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
144 -#else
145 - lea L(Got0)(%rip),%r11
146 - lea L(setPxQx)(%rip),%rcx
147 - movswq (%rcx,%r8,2),%rcx
148 - lea (%rcx,%r11,1),%r11
149 - jmpq *%r11
150 -#endif
151 -
152 - .balign 16
153 -L(8byte_stos_try):
154 - mov __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size
155 - cmpq %r8,%r9 // calculate the lesser of remaining
156 - cmovaq %r8,%r9 // bytes and largest cache size
157 - jbe L(8byte_stos)
158 -
159 -L(8byte_move_reuse_try):
160 - cmp __STOS_UPPER_BOUNDARY,%r8
161 - jae L(8byte_move)
162 -
163 - .balign 16
164 -L(8byte_stos):
165 - movq %r9,%rcx
166 - andq $-8,%r9
167 -
168 - shrq $3,%rcx
169 - jz L(8byte_stos_skip)
170 -
171 - xchgq %rax,%rdx
172 -
173 - rep
174 - stosq
175 -
176 - xchgq %rax,%rdx
177 -
178 -L(8byte_stos_skip):
179 - subq %r9,%r8
180 - ja L(8byte_nt_move)
181 -
182 - andl $7,%r8d
183 - lea (%rdi,%r8,1),%rdi
184 -#ifndef PIC
185 - lea L(setPxQx)(%rip),%r11
186 - jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
187 -#else
188 - lea L(Got0)(%rip),%r11
189 - lea L(setPxQx)(%rip),%rcx
190 - movswq (%rcx,%r8,2),%rcx
191 - lea (%rcx,%r11,1),%r11
192 - jmpq *%r11
193 -#endif
194
195 - .balign 16
196 -L(8byte_nt_move):
197 - movq %r8,%rcx
198 - shrq $7,%rcx
199 - jz L(8byte_nt_move_skip)
200 -
201 - .balign 16
202 -L(8byte_nt_move_loop):
203 - decq %rcx
204 -
205 - movntiq %rdx, (%rdi)
206 - movntiq %rdx, 8 (%rdi)
207 - movntiq %rdx, 16 (%rdi)
208 - movntiq %rdx, 24 (%rdi)
209 - movntiq %rdx, 32 (%rdi)
210 - movntiq %rdx, 40 (%rdi)
211 - movntiq %rdx, 48 (%rdi)
212 - movntiq %rdx, 56 (%rdi)
213 - movntiq %rdx, 64 (%rdi)
214 - movntiq %rdx, 72 (%rdi)
215 - movntiq %rdx, 80 (%rdi)
216 - movntiq %rdx, 88 (%rdi)
217 - movntiq %rdx, 96 (%rdi)
218 - movntiq %rdx, 104 (%rdi)
219 - movntiq %rdx, 112 (%rdi)
220 - movntiq %rdx, 120 (%rdi)
221 -
222 - leaq 128 (%rdi),%rdi
223 -
224 - jnz L(8byte_nt_move_loop)
225 -
226 - sfence
227 -
228 -L(8byte_nt_move_skip):
229 - andl $127,%r8d
230 +#ifndef USE_MULTIARCH
231 + jmp L(aligned_now)
232
233 - lea (%rdi,%r8,1),%rdi
234 -#ifndef PIC
235 - lea L(setPxQx)(%rip),%r11
236 - jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
237 +L(SSE_pre):
238 #else
239 - lea L(Got0)(%rip),%r11
240 - lea L(setPxQx)(%rip),%rcx
241 - movswq (%rcx,%r8,2),%rcx
242 - lea (%rcx,%r11,1),%r11
243 - jmpq *%r11
244 +L(aligned_now):
245 #endif
246 -
247 -L(SSE_pre):
248 +#if !defined USE_MULTIARCH || defined USE_SSE2
249 # fill RegXMM0 with the pattern
250 movd %rdx,%xmm0
251 punpcklqdq %xmm0,%xmm0
252 @@ -1342,11 +1200,162 @@ L(SSExDx):
253 .short L(SSE15QB)-L(SSE0Q0)
254 #endif
255 .popsection
256 +#endif /* !defined USE_MULTIARCH || defined USE_SSE2 */
257 +
258 + .balign 16
259 +#ifndef USE_MULTIARCH
260 +L(aligned_now):
261 +
262 + cmpl $0x1,__x86_64_preferred_memory_instruction(%rip)
263 + jg L(SSE_pre)
264 +#endif /* USE_MULTIARCH */
265 +
266 +L(8byte_move_try):
267 + cmpq __STOS_LOWER_BOUNDARY,%r8
268 + jae L(8byte_stos_try)
269 +
270 + .balign 16
271 +L(8byte_move):
272 + movq %r8,%rcx
273 + shrq $7,%rcx
274 + jz L(8byte_move_skip)
275 +
276 + .p2align 4
277 +
278 +L(8byte_move_loop):
279 + decq %rcx
280 +
281 + movq %rdx, (%rdi)
282 + movq %rdx, 8 (%rdi)
283 + movq %rdx, 16 (%rdi)
284 + movq %rdx, 24 (%rdi)
285 + movq %rdx, 32 (%rdi)
286 + movq %rdx, 40 (%rdi)
287 + movq %rdx, 48 (%rdi)
288 + movq %rdx, 56 (%rdi)
289 + movq %rdx, 64 (%rdi)
290 + movq %rdx, 72 (%rdi)
291 + movq %rdx, 80 (%rdi)
292 + movq %rdx, 88 (%rdi)
293 + movq %rdx, 96 (%rdi)
294 + movq %rdx, 104 (%rdi)
295 + movq %rdx, 112 (%rdi)
296 + movq %rdx, 120 (%rdi)
297 +
298 + leaq 128 (%rdi),%rdi
299 +
300 + jnz L(8byte_move_loop)
301 +
302 +L(8byte_move_skip):
303 + andl $127,%r8d
304 + lea (%rdi,%r8,1),%rdi
305 +
306 +#ifndef PIC
307 + lea L(setPxQx)(%rip),%r11
308 + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
309 +#else
310 + lea L(Got0)(%rip),%r11
311 + lea L(setPxQx)(%rip),%rcx
312 + movswq (%rcx,%r8,2),%rcx
313 + lea (%rcx,%r11,1),%r11
314 + jmpq *%r11
315 +#endif
316 +
317 + .balign 16
318 +L(8byte_stos_try):
319 + mov __x86_64_shared_cache_size(%rip),%r9d // ck largest cache size
320 + cmpq %r8,%r9 // calculate the lesser of remaining
321 + cmovaq %r8,%r9 // bytes and largest cache size
322 + jbe L(8byte_stos)
323 +
324 +L(8byte_move_reuse_try):
325 + cmp __STOS_UPPER_BOUNDARY,%r8
326 + jae L(8byte_move)
327 +
328 + .balign 16
329 +L(8byte_stos):
330 + movq %r9,%rcx
331 + andq $-8,%r9
332 +
333 + shrq $3,%rcx
334 + jz L(8byte_stos_skip)
335 +
336 + xchgq %rax,%rdx
337 +
338 + rep
339 + stosq
340 +
341 + xchgq %rax,%rdx
342 +
343 +L(8byte_stos_skip):
344 + subq %r9,%r8
345 + ja L(8byte_nt_move)
346 +
347 + andl $7,%r8d
348 + lea (%rdi,%r8,1),%rdi
349 +#ifndef PIC
350 + lea L(setPxQx)(%rip),%r11
351 + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
352 +#else
353 + lea L(Got0)(%rip),%r11
354 + lea L(setPxQx)(%rip),%rcx
355 + movswq (%rcx,%r8,2),%rcx
356 + lea (%rcx,%r11,1),%r11
357 + jmpq *%r11
358 +#endif
359 +
360 + .balign 16
361 +L(8byte_nt_move):
362 + movq %r8,%rcx
363 + shrq $7,%rcx
364 + jz L(8byte_nt_move_skip)
365 +
366 + .balign 16
367 +L(8byte_nt_move_loop):
368 + decq %rcx
369 +
370 + movntiq %rdx, (%rdi)
371 + movntiq %rdx, 8 (%rdi)
372 + movntiq %rdx, 16 (%rdi)
373 + movntiq %rdx, 24 (%rdi)
374 + movntiq %rdx, 32 (%rdi)
375 + movntiq %rdx, 40 (%rdi)
376 + movntiq %rdx, 48 (%rdi)
377 + movntiq %rdx, 56 (%rdi)
378 + movntiq %rdx, 64 (%rdi)
379 + movntiq %rdx, 72 (%rdi)
380 + movntiq %rdx, 80 (%rdi)
381 + movntiq %rdx, 88 (%rdi)
382 + movntiq %rdx, 96 (%rdi)
383 + movntiq %rdx, 104 (%rdi)
384 + movntiq %rdx, 112 (%rdi)
385 + movntiq %rdx, 120 (%rdi)
386 +
387 + leaq 128 (%rdi),%rdi
388 +
389 + jnz L(8byte_nt_move_loop)
390 +
391 + sfence
392 +
393 +L(8byte_nt_move_skip):
394 + andl $127,%r8d
395 +
396 + lea (%rdi,%r8,1),%rdi
397 +#ifndef PIC
398 + lea L(setPxQx)(%rip),%r11
399 + jmpq *(%r11,%r8,8) # old scheme remained for nonPIC
400 +#else
401 + lea L(Got0)(%rip),%r11
402 + lea L(setPxQx)(%rip),%rcx
403 + movswq (%rcx,%r8,2),%rcx
404 + lea (%rcx,%r11,1),%r11
405 + jmpq *%r11
406 +#endif
407
408 END (memset)
409 libc_hidden_builtin_def (memset)
410
411 -#if defined PIC && !defined NOT_IN_libc
412 +#if defined PIC && !defined NOT_IN_libc && !defined USE_MULTIARCH
413 strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
414 .section .gnu.warning.__memset_zero_constant_len_parameter
415 .string "memset used with constant zero length parameter; this could be due to transposed parameters"
416 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/Makefile
417 ===================================================================
418 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/Makefile
419 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/Makefile
420 @@ -7,7 +7,8 @@ ifeq ($(subdir),string)
421 sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
422 strend-sse4 memcmp-sse4 \
423 strcasestr-nonascii strcasecmp_l-ssse3 \
424 - strncase_l-ssse3
425 + strncase_l-ssse3 \
426 + memset-x86-64
427 ifeq (yes,$(config-cflags-sse4))
428 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
429 CFLAGS-strcspn-c.c += -msse4
430 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/bzero.S
431 ===================================================================
432 --- /dev/null
433 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/bzero.S
434 @@ -0,0 +1,56 @@
435 +/* Multiple versions of bzero
436 + Copyright (C) 2010 Free Software Foundation, Inc.
437 + This file is part of the GNU C Library.
438 +
439 + The GNU C Library is free software; you can redistribute it and/or
440 + modify it under the terms of the GNU Lesser General Public
441 + License as published by the Free Software Foundation; either
442 + version 2.1 of the License, or (at your option) any later version.
443 +
444 + The GNU C Library is distributed in the hope that it will be useful,
445 + but WITHOUT ANY WARRANTY; without even the implied warranty of
446 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
447 + Lesser General Public License for more details.
448 +
449 + You should have received a copy of the GNU Lesser General Public
450 + License along with the GNU C Library; if not, write to the Free
451 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
452 + 02111-1307 USA. */
453 +
454 +#include <sysdep.h>
455 +#include <init-arch.h>
456 +
457 + .text
458 +ENTRY(__bzero)
459 + .type __bzero, @gnu_indirect_function
460 + cmpl $0, __cpu_features+KIND_OFFSET(%rip)
461 + jne 1f
462 + call __init_cpu_features
463 +1: leaq __bzero_x86_64(%rip), %rax
464 + testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
465 + jz 2f
466 + leaq __bzero_sse2(%rip), %rax
467 +2: ret
468 +END(__bzero)
469 +
470 + .type __bzero_sse2, @function
471 +__bzero_sse2:
472 + cfi_startproc
473 + CALL_MCOUNT
474 + mov %rsi,%rdx /* Adjust parameter. */
475 + xorl %esi,%esi /* Fill with 0s. */
476 + jmp __memset_sse2
477 + cfi_endproc
478 + .size __bzero_sse2, .-__bzero_sse2
479 +
480 + .type __bzero_x86_64, @function
481 +__bzero_x86_64:
482 + cfi_startproc
483 + CALL_MCOUNT
484 + mov %rsi,%rdx /* Adjust parameter. */
485 + xorl %esi,%esi /* Fill with 0s. */
486 + jmp __memset_x86_64
487 + cfi_endproc
488 + .size __bzero_x86_64, .-__bzero_x86_64
489 +
490 +weak_alias (__bzero, bzero)
491 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/cacheinfo.c
492 ===================================================================
493 --- /dev/null
494 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/cacheinfo.c
495 @@ -0,0 +1,2 @@
496 +#define DISABLE_PREFERRED_MEMORY_INSTRUCTION
497 +#include "../cacheinfo.c"
498 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/init-arch.c
499 ===================================================================
500 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/init-arch.c
501 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/init-arch.c
502 @@ -59,6 +59,11 @@ __init_cpu_features (void)
503
504 get_common_indeces (&family, &model);
505
506 + /* Intel processors prefer SSE instruction for memory/string
507 + routines if they are avaiable. */
508 + __cpu_features.feature[index_Prefer_SSE_for_memop]
509 + |= bit_Prefer_SSE_for_memop;
510 +
511 unsigned int eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
512 unsigned int extended_family = (eax >> 20) & 0xff;
513 unsigned int extended_model = (eax >> 12) & 0xf0;
514 @@ -92,6 +97,14 @@ __init_cpu_features (void)
515 kind = arch_kind_amd;
516
517 get_common_indeces (&family, &model);
518 +
519 + unsigned int ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
520 +
521 + /* AMD processors prefer SSE instructions for memory/string routines
522 + if they are available, otherwise they prefer integer instructions. */
523 + if ((ecx & 0x200))
524 + __cpu_features.feature[index_Prefer_SSE_for_memop]
525 + |= bit_Prefer_SSE_for_memop;
526 }
527 else
528 kind = arch_kind_other;
529 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/init-arch.h
530 ===================================================================
531 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/init-arch.h
532 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/init-arch.h
533 @@ -16,7 +16,8 @@
534 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
535 02111-1307 USA. */
536
537 -#define bit_Fast_Rep_String (1 << 0)
538 +#define bit_Fast_Rep_String (1 << 0)
539 +#define bit_Prefer_SSE_for_memop (1 << 3)
540
541 #ifdef __ASSEMBLER__
542
543 @@ -33,6 +34,7 @@
544 # define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
545
546 #define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
547 +# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
548
549 #else /* __ASSEMBLER__ */
550
551 @@ -103,5 +105,12 @@ extern const struct cpu_features *__get_
552 # define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
553
554 # define index_Fast_Rep_String FEATURE_INDEX_1
555 +# define index_Prefer_SSE_for_memop FEATURE_INDEX_1
556 +
557 +#define HAS_ARCH_FEATURE(idx, bit) \
558 + ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
559 +
560 +#define HAS_PREFER_SSE_FOR_MEMOP \
561 + HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
562
563 #endif /* __ASSEMBLER__ */
564 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/memset-x86-64.S
565 ===================================================================
566 --- /dev/null
567 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/memset-x86-64.S
568 @@ -0,0 +1,18 @@
569 +#include <sysdep.h>
570 +
571 +#ifndef NOT_IN_libc
572 +# undef ENTRY_CHK
573 +# define ENTRY_CHK(name) \
574 + .type __memset_chk_x86_64, @function; \
575 + .globl __memset_chk_x86_64; \
576 + .p2align 4; \
577 + __memset_chk_x86_64: cfi_startproc; \
578 + CALL_MCOUNT
579 +# undef END_CHK
580 +# define END_CHK(name) \
581 + cfi_endproc; .size __memset_chk_x86_64, .-__memset_chk_x86_64
582 +
583 +# define libc_hidden_builtin_def(name)
584 +# define memset __memset_x86_64
585 +# include "../memset.S"
586 +#endif
587 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/memset.S
588 ===================================================================
589 --- /dev/null
590 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/memset.S
591 @@ -0,0 +1,74 @@
592 +/* Multiple versions of memset
593 + Copyright (C) 2010 Free Software Foundation, Inc.
594 + This file is part of the GNU C Library.
595 +
596 + The GNU C Library is free software; you can redistribute it and/or
597 + modify it under the terms of the GNU Lesser General Public
598 + License as published by the Free Software Foundation; either
599 + version 2.1 of the License, or (at your option) any later version.
600 +
601 + The GNU C Library is distributed in the hope that it will be useful,
602 + but WITHOUT ANY WARRANTY; without even the implied warranty of
603 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
604 + Lesser General Public License for more details.
605 +
606 + You should have received a copy of the GNU Lesser General Public
607 + License along with the GNU C Library; if not, write to the Free
608 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
609 + 02111-1307 USA. */
610 +
611 +#include <sysdep.h>
612 +#include <init-arch.h>
613 +
614 +/* Define multiple versions only for the definition in lib. */
615 +#ifndef NOT_IN_libc
616 +ENTRY(memset)
617 + .type memset, @gnu_indirect_function
618 + cmpl $0, __cpu_features+KIND_OFFSET(%rip)
619 + jne 1f
620 + call __init_cpu_features
621 +1: leaq __memset_x86_64(%rip), %rax
622 + testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
623 + jz 2f
624 + leaq __memset_sse2(%rip), %rax
625 +2: ret
626 +END(memset)
627 +
628 +# define USE_SSE2 1
629 +
630 +# undef ENTRY
631 +# define ENTRY(name) \
632 + .type __memset_sse2, @function; \
633 + .globl __memset_sse2; \
634 + .p2align 4; \
635 + __memset_sse2: cfi_startproc; \
636 + CALL_MCOUNT
637 +# undef END
638 +# define END(name) \
639 + cfi_endproc; .size __memset_sse2, .-__memset_sse2
640 +
641 +# undef ENTRY_CHK
642 +# define ENTRY_CHK(name) \
643 + .type __memset_chk_sse2, @function; \
644 + .globl __memset_chk_sse2; \
645 + .p2align 4; \
646 + __memset_chk_sse2: cfi_startproc; \
647 + CALL_MCOUNT
648 +# undef END_CHK
649 +# define END_CHK(name) \
650 + cfi_endproc; .size __memset_chk_sse2, .-__memset_chk_sse2
651 +
652 +# ifdef SHARED
653 +# undef libc_hidden_builtin_def
654 +/* It doesn't make sense to send libc-internal memset calls through a PLT.
655 + The speedup we get from using GPR instruction is likely eaten away
656 + by the indirect call in the PLT. */
657 +# define libc_hidden_builtin_def(name) \
658 + .globl __GI_memset; __GI_memset = __memset_sse2
659 +# endif
660 +
661 +# undef strong_alias
662 +# define strong_alias(original, alias)
663 +#endif
664 +
665 +#include "../memset.S"
666 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/memset_chk.S
667 ===================================================================
668 --- /dev/null
669 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/memset_chk.S
670 @@ -0,0 +1,44 @@
671 +/* Multiple versions of __memset_chk
672 + Copyright (C) 2010 Free Software Foundation, Inc.
673 + This file is part of the GNU C Library.
674 +
675 + The GNU C Library is free software; you can redistribute it and/or
676 + modify it under the terms of the GNU Lesser General Public
677 + License as published by the Free Software Foundation; either
678 + version 2.1 of the License, or (at your option) any later version.
679 +
680 + The GNU C Library is distributed in the hope that it will be useful,
681 + but WITHOUT ANY WARRANTY; without even the implied warranty of
682 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
683 + Lesser General Public License for more details.
684 +
685 + You should have received a copy of the GNU Lesser General Public
686 + License along with the GNU C Library; if not, write to the Free
687 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
688 + 02111-1307 USA. */
689 +
690 +#include <sysdep.h>
691 +#include <init-arch.h>
692 +
693 +/* Define multiple versions only for the definition in lib. */
694 +#ifndef NOT_IN_libc
695 +# ifdef SHARED
696 +ENTRY(__memset_chk)
697 + .type __memset_chk, @gnu_indirect_function
698 + cmpl $0, __cpu_features+KIND_OFFSET(%rip)
699 + jne 1f
700 + call __init_cpu_features
701 +1: leaq __memset_chk_x86_64(%rip), %rax
702 + testl $bit_Prefer_SSE_for_memop, __cpu_features+FEATURE_OFFSET+index_Prefer_SSE_for_memop(%rip)
703 + jz 2f
704 + leaq __memset_chk_sse2(%rip), %rax
705 +2: ret
706 +END(__memset_chk)
707 +
708 +strong_alias (__memset_chk, __memset_zero_constant_len_parameter)
709 + .section .gnu.warning.__memset_zero_constant_len_parameter
710 + .string "memset used with constant zero length parameter; this could be due to transposed parameters"
711 +# else
712 +# include "../memset_chk.S"
713 +# endif
714 +#endif