]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / i386 / i686 / multiarch / strchr-sse2-bsf.S
1 /* strchr with SSE2 with bsf
2 Copyright (C) 2011-2015 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #if IS_IN (libc)
21
22 # include <sysdep.h>
23
24 # define CFI_PUSH(REG) \
25 cfi_adjust_cfa_offset (4); \
26 cfi_rel_offset (REG, 0)
27
28 # define CFI_POP(REG) \
29 cfi_adjust_cfa_offset (-4); \
30 cfi_restore (REG)
31
32 # define PUSH(REG) pushl REG; CFI_PUSH (REG)
33 # define POP(REG) popl REG; CFI_POP (REG)
34
35 # define PARMS 8
36 # define ENTRANCE PUSH(%edi)
37 # define RETURN POP(%edi); ret; CFI_PUSH(%edi);
38
39 # define STR1 PARMS
40 # define STR2 STR1+4
41
42 .text
43 ENTRY (__strchr_sse2_bsf)
44
45 ENTRANCE
46 mov STR1(%esp), %ecx
47 movd STR2(%esp), %xmm1
48
49 pxor %xmm2, %xmm2
50 mov %ecx, %edi
51 punpcklbw %xmm1, %xmm1
52 punpcklbw %xmm1, %xmm1
53 /* ECX has OFFSET. */
54 and $15, %ecx
55 pshufd $0, %xmm1, %xmm1
56 je L(loop)
57
58 /* Handle unaligned string. */
59 and $-16, %edi
60 movdqa (%edi), %xmm0
61 pcmpeqb %xmm0, %xmm2
62 pcmpeqb %xmm1, %xmm0
63 /* Find where NULL is. */
64 pmovmskb %xmm2, %edx
65 /* Check if there is a match. */
66 pmovmskb %xmm0, %eax
67 /* Remove the leading bytes. */
68 sarl %cl, %edx
69 sarl %cl, %eax
70 test %eax, %eax
71 je L(unaligned_no_match)
72 /* Check which byte is a match. */
73 bsf %eax, %eax
74 /* Is there a NULL? */
75 test %edx, %edx
76 je L(unaligned_match)
77 bsf %edx, %edx
78 cmpl %edx, %eax
79 /* Return NULL if NULL comes first. */
80 ja L(return_null)
81 L(unaligned_match):
82 add %edi, %eax
83 add %ecx, %eax
84 RETURN
85
86 .p2align 4
87 L(unaligned_no_match):
88 test %edx, %edx
89 jne L(return_null)
90 pxor %xmm2, %xmm2
91
92 add $16, %edi
93
94 .p2align 4
95 /* Loop start on aligned string. */
96 L(loop):
97 movdqa (%edi), %xmm0
98 pcmpeqb %xmm0, %xmm2
99 add $16, %edi
100 pcmpeqb %xmm1, %xmm0
101 pmovmskb %xmm2, %edx
102 pmovmskb %xmm0, %eax
103 or %eax, %edx
104 jnz L(matches)
105
106 movdqa (%edi), %xmm0
107 pcmpeqb %xmm0, %xmm2
108 add $16, %edi
109 pcmpeqb %xmm1, %xmm0
110 pmovmskb %xmm2, %edx
111 pmovmskb %xmm0, %eax
112 or %eax, %edx
113 jnz L(matches)
114
115 movdqa (%edi), %xmm0
116 pcmpeqb %xmm0, %xmm2
117 add $16, %edi
118 pcmpeqb %xmm1, %xmm0
119 pmovmskb %xmm2, %edx
120 pmovmskb %xmm0, %eax
121 or %eax, %edx
122 jnz L(matches)
123
124 movdqa (%edi), %xmm0
125 pcmpeqb %xmm0, %xmm2
126 add $16, %edi
127 pcmpeqb %xmm1, %xmm0
128 pmovmskb %xmm2, %edx
129 pmovmskb %xmm0, %eax
130 or %eax, %edx
131 jnz L(matches)
132 jmp L(loop)
133
134 L(matches):
135 pmovmskb %xmm2, %edx
136 test %eax, %eax
137 jz L(return_null)
138 bsf %eax, %eax
139 /* There is a match. First find where NULL is. */
140 test %edx, %edx
141 je L(match)
142 bsf %edx, %ecx
143 /* Check if NULL comes first. */
144 cmpl %ecx, %eax
145 ja L(return_null)
146 L(match):
147 sub $16, %edi
148 add %edi, %eax
149 RETURN
150
151 /* Return NULL. */
152 .p2align 4
153 L(return_null):
154 xor %eax, %eax
155 RETURN
156
157 END (__strchr_sse2_bsf)
158 #endif