]>
Commit | Line | Data |
---|---|---|
693fb948 | 1 | /* strchr with SSE2 with bsf |
b168057a | 2 | Copyright (C) 2011-2015 Free Software Foundation, Inc. |
693fb948 LD |
3 | Contributed by Intel Corporation. |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
693fb948 | 19 | |
4f41c682 | 20 | #if IS_IN (libc) |
693fb948 LD |
21 | |
22 | # include <sysdep.h> | |
23 | ||
24 | # define CFI_PUSH(REG) \ | |
25 | cfi_adjust_cfa_offset (4); \ | |
26 | cfi_rel_offset (REG, 0) | |
27 | ||
28 | # define CFI_POP(REG) \ | |
29 | cfi_adjust_cfa_offset (-4); \ | |
30 | cfi_restore (REG) | |
31 | ||
32 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
33 | # define POP(REG) popl REG; CFI_POP (REG) | |
34 | ||
35 | # define PARMS 8 | |
36 | # define ENTRANCE PUSH(%edi) | |
37 | # define RETURN POP(%edi); ret; CFI_PUSH(%edi); | |
38 | ||
39 | # define STR1 PARMS | |
40 | # define STR2 STR1+4 | |
41 | ||
48882a1a | 42 | .text |
693fb948 LD |
43 | ENTRY (__strchr_sse2_bsf) |
44 | ||
45 | ENTRANCE | |
46 | mov STR1(%esp), %ecx | |
47 | movd STR2(%esp), %xmm1 | |
48 | ||
49 | pxor %xmm2, %xmm2 | |
50 | mov %ecx, %edi | |
51 | punpcklbw %xmm1, %xmm1 | |
52 | punpcklbw %xmm1, %xmm1 | |
53 | /* ECX has OFFSET. */ | |
54 | and $15, %ecx | |
55 | pshufd $0, %xmm1, %xmm1 | |
56 | je L(loop) | |
57 | ||
58 | /* Handle unaligned string. */ | |
59 | and $-16, %edi | |
60 | movdqa (%edi), %xmm0 | |
61 | pcmpeqb %xmm0, %xmm2 | |
62 | pcmpeqb %xmm1, %xmm0 | |
63 | /* Find where NULL is. */ | |
64 | pmovmskb %xmm2, %edx | |
65 | /* Check if there is a match. */ | |
66 | pmovmskb %xmm0, %eax | |
67 | /* Remove the leading bytes. */ | |
68 | sarl %cl, %edx | |
69 | sarl %cl, %eax | |
70 | test %eax, %eax | |
71 | je L(unaligned_no_match) | |
72 | /* Check which byte is a match. */ | |
73 | bsf %eax, %eax | |
74 | /* Is there a NULL? */ | |
75 | test %edx, %edx | |
76 | je L(unaligned_match) | |
77 | bsf %edx, %edx | |
78 | cmpl %edx, %eax | |
79 | /* Return NULL if NULL comes first. */ | |
80 | ja L(return_null) | |
81 | L(unaligned_match): | |
82 | add %edi, %eax | |
83 | add %ecx, %eax | |
84 | RETURN | |
85 | ||
86 | .p2align 4 | |
87 | L(unaligned_no_match): | |
88 | test %edx, %edx | |
89 | jne L(return_null) | |
90 | pxor %xmm2, %xmm2 | |
91 | ||
92 | add $16, %edi | |
93 | ||
94 | .p2align 4 | |
95 | /* Loop start on aligned string. */ | |
96 | L(loop): | |
97 | movdqa (%edi), %xmm0 | |
98 | pcmpeqb %xmm0, %xmm2 | |
99 | add $16, %edi | |
100 | pcmpeqb %xmm1, %xmm0 | |
101 | pmovmskb %xmm2, %edx | |
102 | pmovmskb %xmm0, %eax | |
103 | or %eax, %edx | |
104 | jnz L(matches) | |
105 | ||
106 | movdqa (%edi), %xmm0 | |
107 | pcmpeqb %xmm0, %xmm2 | |
108 | add $16, %edi | |
109 | pcmpeqb %xmm1, %xmm0 | |
110 | pmovmskb %xmm2, %edx | |
111 | pmovmskb %xmm0, %eax | |
112 | or %eax, %edx | |
113 | jnz L(matches) | |
114 | ||
115 | movdqa (%edi), %xmm0 | |
116 | pcmpeqb %xmm0, %xmm2 | |
117 | add $16, %edi | |
118 | pcmpeqb %xmm1, %xmm0 | |
119 | pmovmskb %xmm2, %edx | |
120 | pmovmskb %xmm0, %eax | |
121 | or %eax, %edx | |
122 | jnz L(matches) | |
123 | ||
124 | movdqa (%edi), %xmm0 | |
125 | pcmpeqb %xmm0, %xmm2 | |
126 | add $16, %edi | |
127 | pcmpeqb %xmm1, %xmm0 | |
128 | pmovmskb %xmm2, %edx | |
129 | pmovmskb %xmm0, %eax | |
130 | or %eax, %edx | |
131 | jnz L(matches) | |
132 | jmp L(loop) | |
133 | ||
134 | L(matches): | |
135 | pmovmskb %xmm2, %edx | |
136 | test %eax, %eax | |
137 | jz L(return_null) | |
138 | bsf %eax, %eax | |
139 | /* There is a match. First find where NULL is. */ | |
140 | test %edx, %edx | |
141 | je L(match) | |
142 | bsf %edx, %ecx | |
143 | /* Check if NULL comes first. */ | |
144 | cmpl %ecx, %eax | |
145 | ja L(return_null) | |
146 | L(match): | |
147 | sub $16, %edi | |
148 | add %edi, %eax | |
149 | RETURN | |
150 | ||
151 | /* Return NULL. */ | |
152 | .p2align 4 | |
153 | L(return_null): | |
154 | xor %eax, %eax | |
155 | RETURN | |
156 | ||
157 | END (__strchr_sse2_bsf) | |
158 | #endif |