1 2010-08-27 Ulrich Drepper <drepper@redhat.com>
3 * sysdeps/x86_64/multiarch/strlen-no-bsf.S: Move to .text.slow section.
5 * sysdeps/x86_64/strlen.S: Minimal code improvement.
7 2010-08-26 H.J. Lu <hongjiu.lu@intel.com>
9 * sysdeps/x86_64/strlen.S: Unroll the loop.
10 * sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
11 strlen-sse2 strlen-sse2-bsf.
12 * sysdeps/x86_64/multiarch/strlen.S ((strlen): Return
13 __strlen_no_bsf if bit_Slow_BSF is set.
14 (__strlen_sse42): Removed.
15 * sysdeps/x86_64/multiarch/strlen-no-bsf.S: New file.
16 * sysdeps/x86_64/multiarch/strlen-sse4.S: New file.
18 2010-08-25 H.J. Lu <hongjiu.lu@intel.com>
20 * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
21 strlen-sse2 strlen-sse2-bsf.
22 * sysdeps/i386/i686/multiarch/strlen.S (strlen): Return
23 __strlen_sse2_bsf if bit_Slow_BSF is unset.
24 (__strlen_sse2): Removed.
25 * sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S: New file.
26 * sysdeps/i386/i686/multiarch/strlen-sse2.S: New file.
27 * sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features): Set
28 bit_Slow_BSF for Atom.
29 * sysdeps/x86_64/multiarch/init-arch.h (bit_Slow_BSF): Define.
30 (index_Slow_BSF): Define.
31 (HAS_SLOW_BSF): Define.
33 Index: glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/Makefile
34 ===================================================================
35 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/i386/i686/multiarch/Makefile
36 +++ glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/Makefile
37 @@ -9,7 +9,8 @@ sysdep_routines += bzero-sse2 memset-sse
38 memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
39 memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
40 strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
41 - memcmp-ssse3 memcmp-sse4 strcasestr-nonascii
42 + memcmp-ssse3 memcmp-sse4 strcasestr-nonascii \
43 + strlen-sse2 strlen-sse2-bsf
44 ifeq (yes,$(config-cflags-sse4))
45 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
46 CFLAGS-strcspn-c.c += -msse4
47 Index: glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
48 ===================================================================
50 +++ glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/strlen-sse2-bsf.S
52 +/* strlen with SSE2 and BSF
53 + Copyright (C) 2010 Free Software Foundation, Inc.
54 + Contributed by Intel Corporation.
55 + This file is part of the GNU C Library.
57 + The GNU C Library is free software; you can redistribute it and/or
58 + modify it under the terms of the GNU Lesser General Public
59 + License as published by the Free Software Foundation; either
60 + version 2.1 of the License, or (at your option) any later version.
62 + The GNU C Library is distributed in the hope that it will be useful,
63 + but WITHOUT ANY WARRANTY; without even the implied warranty of
64 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
65 + Lesser General Public License for more details.
67 + You should have received a copy of the GNU Lesser General Public
68 + License along with the GNU C Library; if not, write to the Free
69 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
72 +#if defined SHARED && !defined NOT_IN_libc
75 +#include "asm-syntax.h"
77 +#define CFI_PUSH(REG) \
78 + cfi_adjust_cfa_offset (4); \
79 + cfi_rel_offset (REG, 0)
81 +#define CFI_POP(REG) \
82 + cfi_adjust_cfa_offset (-4); \
85 +#define PUSH(REG) pushl REG; CFI_PUSH (REG)
86 +#define POP(REG) popl REG; CFI_POP (REG)
87 +#define PARMS 4 + 8 /* Preserve ESI and EDI. */
89 +#define ENTRANCE PUSH (%esi); PUSH (%edi); cfi_remember_state
90 +#define RETURN POP (%edi); POP (%esi); ret; \
91 + cfi_restore_state; cfi_remember_state
94 +ENTRY ( __strlen_sse2_bsf)
103 + movdqu (%edi), %xmm1
104 + pcmpeqb %xmm1, %xmm0
105 + pmovmskb %xmm0, %edx
110 + jmp L(align16_start)
115 + pcmpeqb (%eax), %xmm0
119 + pmovmskb %xmm0, %edx
129 + pcmpeqb 16(%eax), %xmm0
130 + pmovmskb %xmm0, %edx
134 + pcmpeqb 32(%eax), %xmm1
135 + pmovmskb %xmm1, %edx
139 + pcmpeqb 48(%eax), %xmm2
140 + pmovmskb %xmm2, %edx
144 + pcmpeqb 64(%eax), %xmm3
145 + pmovmskb %xmm3, %edx
176 +END ( __strlen_sse2_bsf)
179 Index: glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/strlen-sse2.S
180 ===================================================================
182 +++ glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/strlen-sse2.S
185 + Copyright (C) 2010 Free Software Foundation, Inc.
186 + Contributed by Intel Corporation.
187 + This file is part of the GNU C Library.
189 + The GNU C Library is free software; you can redistribute it and/or
190 + modify it under the terms of the GNU Lesser General Public
191 + License as published by the Free Software Foundation; either
192 + version 2.1 of the License, or (at your option) any later version.
194 + The GNU C Library is distributed in the hope that it will be useful,
195 + but WITHOUT ANY WARRANTY; without even the implied warranty of
196 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
197 + Lesser General Public License for more details.
199 + You should have received a copy of the GNU Lesser General Public
200 + License along with the GNU C Library; if not, write to the Free
201 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
204 +#if defined SHARED && !defined NOT_IN_libc
207 +#include "asm-syntax.h"
209 +#define CFI_PUSH(REG) \
210 + cfi_adjust_cfa_offset (4); \
211 + cfi_rel_offset (REG, 0)
213 +#define CFI_POP(REG) \
214 + cfi_adjust_cfa_offset (-4); \
217 +#define PUSH(REG) pushl REG; CFI_PUSH (REG)
218 +#define POP(REG) popl REG; CFI_POP (REG)
225 +ENTRY (__strlen_sse2)
227 + mov STR(%esp), %edx
268 + pcmpeqb (%eax), %xmm0
269 + pmovmskb %xmm0, %edx
275 + pcmpeqb (%eax), %xmm1
276 + pmovmskb %xmm1, %edx
283 + pcmpeqb (%eax), %xmm2
284 + pmovmskb %xmm2, %edx
290 + pcmpeqb (%eax), %xmm3
291 + pmovmskb %xmm3, %edx
296 + pcmpeqb (%eax), %xmm0
297 + pmovmskb %xmm0, %edx
302 + pcmpeqb (%eax), %xmm1
303 + pmovmskb %xmm1, %edx
308 + pcmpeqb (%eax), %xmm2
309 + pmovmskb %xmm2, %edx
314 + pcmpeqb (%eax), %xmm3
315 + pmovmskb %xmm3, %edx
320 + pcmpeqb (%eax), %xmm0
321 + pmovmskb %xmm0, %edx
326 + pcmpeqb (%eax), %xmm1
327 + pmovmskb %xmm1, %edx
332 + pcmpeqb (%eax), %xmm2
333 + pmovmskb %xmm2, %edx
338 + pcmpeqb (%eax), %xmm3
339 + pmovmskb %xmm3, %edx
344 + pcmpeqb (%eax), %xmm0
345 + pmovmskb %xmm0, %edx
350 + pcmpeqb (%eax), %xmm1
351 + pmovmskb %xmm1, %edx
356 + pcmpeqb (%eax), %xmm2
357 + pmovmskb %xmm2, %edx
362 + pcmpeqb (%eax), %xmm3
363 + pmovmskb %xmm3, %edx
375 + pcmpeqb (%eax), %xmm0
376 + pcmpeqb 16(%eax), %xmm1
377 + pcmpeqb 32(%eax), %xmm2
378 + pcmpeqb 48(%eax), %xmm3
379 + pmovmskb %xmm0, %edx
380 + pmovmskb %xmm1, %esi
381 + pmovmskb %xmm2, %edi
382 + pmovmskb %xmm3, %ebx
391 + jnz L(aligned_64_exit_16)
393 + jnz L(aligned_64_exit_32)
395 + jnz L(aligned_64_exit_48)
398 + jmp L(aligned_64_exit)
399 +L(aligned_64_exit_48):
400 + lea -16(%eax), %eax
402 + jmp L(aligned_64_exit)
403 +L(aligned_64_exit_32):
404 + lea -32(%eax), %eax
406 + jmp L(aligned_64_exit)
407 +L(aligned_64_exit_16):
408 + lea -48(%eax), %eax
531 Index: glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/strlen.S
532 ===================================================================
533 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/i386/i686/multiarch/strlen.S
534 +++ glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/strlen.S
535 @@ -48,6 +48,9 @@ ENTRY(strlen)
536 1: leal __strlen_ia32@GOTOFF(%ebx), %eax
537 testl $bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
539 + leal __strlen_sse2_bsf@GOTOFF(%ebx), %eax
540 + testl $bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
542 leal __strlen_sse2@GOTOFF(%ebx), %eax
544 cfi_adjust_cfa_offset (-4);
545 @@ -55,84 +58,6 @@ ENTRY(strlen)
549 -#define CFI_POP(REG) \
550 - cfi_adjust_cfa_offset (-4); \
553 -#define RETURN popl %esi; CFI_POP (esi); ret
556 -ENTRY (__strlen_sse2)
558 - * This implementation uses SSE instructions to compare up to 16 bytes
559 - * at a time looking for the end of string (null char).
562 - cfi_adjust_cfa_offset (4)
563 - cfi_rel_offset (%esi, 0)
566 - pxor %xmm0, %xmm0 /* 16 null chars */
569 - jz 1f /* string is 16 byte aligned */
572 - * Unaligned case. Round down to 16-byte boundary before comparing
573 - * 16 bytes for a null char. The code then compensates for any extra chars
574 - * preceding the start of the string.
578 - pcmpeqb (%esi), %xmm0
580 - pmovmskb %xmm0, %edx
582 - shr %cl, %edx /* Compensate for bytes preceding the string */
585 - sub %ecx, %esi /* no null, adjust to next 16-byte boundary */
586 - pxor %xmm0, %xmm0 /* clear xmm0, may have been changed... */
589 -1: /* 16 byte aligned */
590 - pcmpeqb (%esi), %xmm0 /* look for null bytes */
591 - pmovmskb %xmm0, %edx /* move each byte mask of %xmm0 to edx */
593 - add $16, %esi /* prepare to search next 16 bytes */
594 - test %edx, %edx /* if no null byte, %edx must be 0 */
595 - jnz 2f /* found a null */
597 - pcmpeqb (%esi), %xmm0
598 - pmovmskb %xmm0, %edx
603 - pcmpeqb (%esi), %xmm0
604 - pmovmskb %xmm0, %edx
609 - pcmpeqb (%esi), %xmm0
610 - pmovmskb %xmm0, %edx
617 - lea -16(%eax, %esi), %eax /* calculate exact offset */
618 - bsf %edx, %ecx /* Least significant 1 bit is index of null */
621 - cfi_adjust_cfa_offset (-4)
628 # define ENTRY(name) \
629 .type __strlen_ia32, @function; \
630 --- a/sysdeps/x86_64/multiarch/Makefile 2012-03-01 10:43:30.060487726 -0700
631 +++ b/sysdeps/x86_64/multiarch/Makefile 2012-03-01 10:45:57.894692115 -0700
632 @@ -7,7 +7,7 @@ ifeq ($(subdir),string)
633 sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
634 strend-sse4 memcmp-sse4 \
635 strcasestr-nonascii strcasecmp_l-ssse3 \
637 + strncase_l-ssse3 strlen-sse4 strlen-no-bsf \
639 ifeq (yes,$(config-cflags-sse4))
640 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
641 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/init-arch.c
642 ===================================================================
643 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/init-arch.c
644 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/init-arch.c
645 @@ -77,6 +77,12 @@ __init_cpu_features (void)
646 model += extended_model;
651 + /* BSF is slow on Atom. */
652 + __cpu_features.feature[index_Slow_BSF] |= bit_Slow_BSF;
658 --- a/sysdeps/x86_64/multiarch/init-arch.h 2012-03-01 10:43:30.061487720 -0700
659 +++ b/sysdeps/x86_64/multiarch/init-arch.h 2012-03-01 10:48:13.371963005 -0700
663 #define bit_Fast_Rep_String (1 << 0)
664 +#define bit_Slow_BSF (1 << 2)
665 #define bit_Prefer_SSE_for_memop (1 << 3)
669 # define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
671 #define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
672 +# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
673 # define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
675 #else /* __ASSEMBLER__ */
676 @@ -105,11 +107,15 @@ extern const struct cpu_features *__get_
677 # define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
679 # define index_Fast_Rep_String FEATURE_INDEX_1
680 +# define index_Slow_BSF FEATURE_INDEX_1
681 # define index_Prefer_SSE_for_memop FEATURE_INDEX_1
683 #define HAS_ARCH_FEATURE(idx, bit) \
684 ((__get_cpu_features ()->feature[idx] & (bit)) != 0)
686 +#define HAS_SLOW_BSF \
687 + HAS_ARCH_FEATURE (index_Slow_BSF, bit_Slow_BSF)
689 #define HAS_PREFER_SSE_FOR_MEMOP \
690 HAS_ARCH_FEATURE (index_Prefer_SSE_for_memop, bit_Prefer_SSE_for_memop)
692 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strlen-no-bsf.S
693 ===================================================================
695 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strlen-no-bsf.S
697 +/* strlen without BSF
698 + Copyright (C) 2010 Free Software Foundation, Inc.
699 + Contributed by Intel Corporation.
700 + This file is part of the GNU C Library.
702 + The GNU C Library is free software; you can redistribute it and/or
703 + modify it under the terms of the GNU Lesser General Public
704 + License as published by the Free Software Foundation; either
705 + version 2.1 of the License, or (at your option) any later version.
707 + The GNU C Library is distributed in the hope that it will be useful,
708 + but WITHOUT ANY WARRANTY; without even the implied warranty of
709 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
710 + Lesser General Public License for more details.
712 + You should have received a copy of the GNU Lesser General Public
713 + License along with the GNU C Library; if not, write to the Free
714 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
717 +#if defined SHARED && !defined NOT_IN_libc
721 + .section .text.slow,"ax",@progbits
722 +ENTRY (__strlen_no_bsf)
763 + pcmpeqb (%rax), %xmm0
764 + pmovmskb %xmm0, %edx
770 + pcmpeqb (%rax), %xmm1
771 + pmovmskb %xmm1, %edx
778 + pcmpeqb (%rax), %xmm2
779 + pmovmskb %xmm2, %edx
785 + pcmpeqb (%rax), %xmm3
786 + pmovmskb %xmm3, %edx
791 + pcmpeqb (%rax), %xmm0
792 + pmovmskb %xmm0, %edx
797 + pcmpeqb (%rax), %xmm1
798 + pmovmskb %xmm1, %edx
803 + pcmpeqb (%rax), %xmm2
804 + pmovmskb %xmm2, %edx
809 + pcmpeqb (%rax), %xmm3
810 + pmovmskb %xmm3, %edx
815 + pcmpeqb (%rax), %xmm0
816 + pmovmskb %xmm0, %edx
821 + pcmpeqb (%rax), %xmm1
822 + pmovmskb %xmm1, %edx
827 + pcmpeqb (%rax), %xmm2
828 + pmovmskb %xmm2, %edx
833 + pcmpeqb (%rax), %xmm3
834 + pmovmskb %xmm3, %edx
839 + pcmpeqb (%rax), %xmm0
840 + pmovmskb %xmm0, %edx
845 + pcmpeqb (%rax), %xmm1
846 + pmovmskb %xmm1, %edx
851 + pcmpeqb (%rax), %xmm2
852 + pmovmskb %xmm2, %edx
857 + pcmpeqb (%rax), %xmm3
858 + pmovmskb %xmm3, %edx
866 + pcmpeqb (%rax), %xmm0
867 + pcmpeqb 16(%rax), %xmm1
868 + pcmpeqb 32(%rax), %xmm2
869 + pcmpeqb 48(%rax), %xmm3
870 + pmovmskb %xmm0, %edx
871 + pmovmskb %xmm1, %esi
872 + pmovmskb %xmm2, %edi
873 + pmovmskb %xmm3, %r9d
882 + jnz L(aligned_64_exit_16)
884 + jnz L(aligned_64_exit_32)
886 + jnz L(aligned_64_exit_48)
887 +L(aligned_64_exit_64):
889 + jmp L(aligned_64_exit)
890 +L(aligned_64_exit_48):
891 + lea -16(%rax), %rax
893 + jmp L(aligned_64_exit)
894 +L(aligned_64_exit_32):
895 + lea -32(%rax), %rax
897 + jmp L(aligned_64_exit)
898 +L(aligned_64_exit_16):
899 + lea -48(%rax), %rax
1003 +END (__strlen_no_bsf)
1006 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strlen-sse4.S
1007 ===================================================================
1009 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strlen-sse4.S
1011 +/* strlen with SSE4
1012 + Copyright (C) 2009, 2010 Free Software Foundation, Inc.
1013 + Contributed by Ulrich Drepper <drepper@redhat.com>.
1014 + This file is part of the GNU C Library.
1016 + The GNU C Library is free software; you can redistribute it and/or
1017 + modify it under the terms of the GNU Lesser General Public
1018 + License as published by the Free Software Foundation; either
1019 + version 2.1 of the License, or (at your option) any later version.
1021 + The GNU C Library is distributed in the hope that it will be useful,
1022 + but WITHOUT ANY WARRANTY; without even the implied warranty of
1023 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1024 + Lesser General Public License for more details.
1026 + You should have received a copy of the GNU Lesser General Public
1027 + License along with the GNU C Library; if not, write to the Free
1028 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1029 + 02111-1307 USA. */
1031 +#if defined SHARED && !defined NOT_IN_libc
1033 +#include <sysdep.h>
1035 + .section .text.sse4.2,"ax",@progbits
1036 +ENTRY (__strlen_sse42)
1042 + pcmpeqb (%rdi), %xmm1
1043 + pmovmskb %xmm1, %edx
1047 + jnz L(less16bytes)
1051 +L(more64bytes_loop):
1052 + pcmpistri $0x08, 16(%rdi), %xmm1
1055 + pcmpistri $0x08, 32(%rdi), %xmm1
1058 + pcmpistri $0x08, 48(%rdi), %xmm1
1062 + pcmpistri $0x08, (%rdi), %xmm1
1063 + jnz L(more64bytes_loop)
1064 + leaq (%rdi,%rcx), %rax
1070 + leaq 16(%rdi,%rcx, 1), %rax
1076 + leaq 32(%rdi,%rcx, 1), %rax
1082 + leaq 48(%rdi,%rcx, 1), %rax
1093 +END (__strlen_sse42)
1096 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strlen.S
1097 ===================================================================
1098 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/strlen.S
1099 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strlen.S
1100 @@ -36,74 +36,12 @@ ENTRY(strlen)
1101 testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
1103 leaq __strlen_sse42(%rip), %rax
1108 - .section .text.sse4.2,"ax",@progbits
1110 - .type __strlen_sse42, @function
1119 - pcmpeqb (%rdi), %xmm1
1120 - pmovmskb %xmm1, %edx
1124 - jnz L(less16bytes)
1128 -L(more64bytes_loop):
1129 - pcmpistri $0x08, 16(%rdi), %xmm1
1132 - pcmpistri $0x08, 32(%rdi), %xmm1
1135 - pcmpistri $0x08, 48(%rdi), %xmm1
1139 - pcmpistri $0x08, (%rdi), %xmm1
1140 - jnz L(more64bytes_loop)
1141 - leaq (%rdi,%rcx), %rax
1147 - leaq 16(%rdi,%rcx, 1), %rax
1153 - leaq 32(%rdi,%rcx, 1), %rax
1159 - leaq 48(%rdi,%rcx, 1), %rax
1170 - .size __strlen_sse42, .-__strlen_sse42
1172 +2: testl $bit_Slow_BSF, __cpu_features+FEATURE_OFFSET+index_Slow_BSF(%rip)
1174 + leaq __strlen_no_bsf(%rip), %rax
1179 # define ENTRY(name) \
1180 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strlen.S
1181 ===================================================================
1182 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/strlen.S
1183 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strlen.S
1192 - movdqa %xmm2, %xmm1
1193 - pcmpeqb (%rdi), %xmm2
1194 - orl $0xffffffff, %esi
1197 - pmovmskb %xmm2, %edx
1201 -2: movdqa 16(%rdi), %xmm0
1202 - leaq 16(%rdi), %rdi
1209 + movdqu (%rdi), %xmm1
1210 pcmpeqb %xmm1, %xmm0
1211 pmovmskb %xmm0, %edx
1215 + jnz L(exit_less16)
1218 + jmp L(align16_start)
1222 + pcmpeqb (%rax), %xmm0
1226 + pmovmskb %xmm0, %edx
1236 + pcmpeqb 16(%rax), %xmm0
1237 + pmovmskb %xmm0, %edx
1244 + pcmpeqb 32(%rax), %xmm1
1245 + pmovmskb %xmm1, %edx
1249 + pcmpeqb 48(%rax), %xmm2
1250 + pmovmskb %xmm2, %edx
1254 + pcmpeqb 64(%rax), %xmm3
1255 + pmovmskb %xmm3, %edx
1256 + lea 64(%rax), %rax
1258 + jz L(align16_loop)
1269 + lea 16(%rdx,%rax), %rax
1275 + lea 32(%rdx,%rax), %rax
1281 + lea 48(%rdx,%rax), %rax
1284 libc_hidden_builtin_def (strlen)