]>
Commit | Line | Data |
---|---|---|
693fb948 LD |
1 | /* strchr with SSE2 with bsf |
2 | Copyright (C) 2011 Free Software Foundation, Inc. | |
3 | Contributed by Intel Corporation. | |
4 | This file is part of the GNU C Library. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
17 | License along with the GNU C Library; if not, write to the Free | |
18 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19 | 02111-1307 USA. */ | |
20 | ||
21 | #ifndef NOT_IN_libc | |
22 | ||
23 | # include <sysdep.h> | |
24 | ||
25 | # define CFI_PUSH(REG) \ | |
26 | cfi_adjust_cfa_offset (4); \ | |
27 | cfi_rel_offset (REG, 0) | |
28 | ||
29 | # define CFI_POP(REG) \ | |
30 | cfi_adjust_cfa_offset (-4); \ | |
31 | cfi_restore (REG) | |
32 | ||
33 | # define PUSH(REG) pushl REG; CFI_PUSH (REG) | |
34 | # define POP(REG) popl REG; CFI_POP (REG) | |
35 | ||
36 | # define PARMS 8 | |
37 | # define ENTRANCE PUSH(%edi) | |
38 | # define RETURN POP(%edi); ret; CFI_PUSH(%edi); | |
39 | ||
40 | # define STR1 PARMS | |
41 | # define STR2 STR1+4 | |
42 | ||
43 | .text | |
44 | ENTRY (__strchr_sse2_bsf) | |
45 | ||
46 | ENTRANCE | |
47 | mov STR1(%esp), %ecx | |
48 | movd STR2(%esp), %xmm1 | |
49 | ||
50 | pxor %xmm2, %xmm2 | |
51 | mov %ecx, %edi | |
52 | punpcklbw %xmm1, %xmm1 | |
53 | punpcklbw %xmm1, %xmm1 | |
54 | /* ECX has OFFSET. */ | |
55 | and $15, %ecx | |
56 | pshufd $0, %xmm1, %xmm1 | |
57 | je L(loop) | |
58 | ||
59 | /* Handle unaligned string. */ | |
60 | and $-16, %edi | |
61 | movdqa (%edi), %xmm0 | |
62 | pcmpeqb %xmm0, %xmm2 | |
63 | pcmpeqb %xmm1, %xmm0 | |
64 | /* Find where NULL is. */ | |
65 | pmovmskb %xmm2, %edx | |
66 | /* Check if there is a match. */ | |
67 | pmovmskb %xmm0, %eax | |
68 | /* Remove the leading bytes. */ | |
69 | sarl %cl, %edx | |
70 | sarl %cl, %eax | |
71 | test %eax, %eax | |
72 | je L(unaligned_no_match) | |
73 | /* Check which byte is a match. */ | |
74 | bsf %eax, %eax | |
75 | /* Is there a NULL? */ | |
76 | test %edx, %edx | |
77 | je L(unaligned_match) | |
78 | bsf %edx, %edx | |
79 | cmpl %edx, %eax | |
80 | /* Return NULL if NULL comes first. */ | |
81 | ja L(return_null) | |
82 | L(unaligned_match): | |
83 | add %edi, %eax | |
84 | add %ecx, %eax | |
85 | RETURN | |
86 | ||
87 | .p2align 4 | |
88 | L(unaligned_no_match): | |
89 | test %edx, %edx | |
90 | jne L(return_null) | |
91 | pxor %xmm2, %xmm2 | |
92 | ||
93 | add $16, %edi | |
94 | ||
95 | .p2align 4 | |
96 | /* Loop start on aligned string. */ | |
97 | L(loop): | |
98 | movdqa (%edi), %xmm0 | |
99 | pcmpeqb %xmm0, %xmm2 | |
100 | add $16, %edi | |
101 | pcmpeqb %xmm1, %xmm0 | |
102 | pmovmskb %xmm2, %edx | |
103 | pmovmskb %xmm0, %eax | |
104 | or %eax, %edx | |
105 | jnz L(matches) | |
106 | ||
107 | movdqa (%edi), %xmm0 | |
108 | pcmpeqb %xmm0, %xmm2 | |
109 | add $16, %edi | |
110 | pcmpeqb %xmm1, %xmm0 | |
111 | pmovmskb %xmm2, %edx | |
112 | pmovmskb %xmm0, %eax | |
113 | or %eax, %edx | |
114 | jnz L(matches) | |
115 | ||
116 | movdqa (%edi), %xmm0 | |
117 | pcmpeqb %xmm0, %xmm2 | |
118 | add $16, %edi | |
119 | pcmpeqb %xmm1, %xmm0 | |
120 | pmovmskb %xmm2, %edx | |
121 | pmovmskb %xmm0, %eax | |
122 | or %eax, %edx | |
123 | jnz L(matches) | |
124 | ||
125 | movdqa (%edi), %xmm0 | |
126 | pcmpeqb %xmm0, %xmm2 | |
127 | add $16, %edi | |
128 | pcmpeqb %xmm1, %xmm0 | |
129 | pmovmskb %xmm2, %edx | |
130 | pmovmskb %xmm0, %eax | |
131 | or %eax, %edx | |
132 | jnz L(matches) | |
133 | jmp L(loop) | |
134 | ||
135 | L(matches): | |
136 | pmovmskb %xmm2, %edx | |
137 | test %eax, %eax | |
138 | jz L(return_null) | |
139 | bsf %eax, %eax | |
140 | /* There is a match. First find where NULL is. */ | |
141 | test %edx, %edx | |
142 | je L(match) | |
143 | bsf %edx, %ecx | |
144 | /* Check if NULL comes first. */ | |
145 | cmpl %ecx, %eax | |
146 | ja L(return_null) | |
147 | L(match): | |
148 | sub $16, %edi | |
149 | add %edi, %eax | |
150 | RETURN | |
151 | ||
152 | /* Return NULL. */ | |
153 | .p2align 4 | |
154 | L(return_null): | |
155 | xor %eax, %eax | |
156 | RETURN | |
157 | ||
158 | END (__strchr_sse2_bsf) | |
159 | #endif |