]>
Commit | Line | Data |
---|---|---|
f7a9f785 | 1 | /* Copyright (C) 2013-2016 Free Software Foundation, Inc. |
c86f7b80 CM |
2 | This file is part of the GNU C Library. |
3 | ||
4 | The GNU C Library is free software; you can redistribute it and/or | |
5 | modify it under the terms of the GNU Lesser General Public | |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
8 | ||
9 | The GNU C Library is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
12 | Lesser General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU Lesser General Public | |
15 | License along with the GNU C Library; if not, write to the Free | |
16 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
17 | 02111-1307 USA. */ | |
18 | ||
19 | /* Specification of strstr. */ | |
20 | #include <string.h> | |
21 | ||
22 | #include <stdbool.h> | |
23 | #include "string-endian.h" | |
24 | ||
25 | #define RETURN_TYPE char * | |
26 | #define AVAILABLE(h, h_l, j, n_l) \ | |
27 | (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l)) \ | |
28 | && ((h_l) = (j) + (n_l))) | |
29 | #include "str-two-way.h" | |
95dee05f | 30 | typeof(two_way_short_needle) two_way_short_needle __attribute__((unused)); |
c86f7b80 CM |
31 | |
32 | #undef strstr | |
33 | ||
34 | #ifndef STRSTR | |
35 | #define STRSTR strstr | |
36 | #endif | |
37 | ||
38 | #ifndef STRSTR2 | |
39 | #define STRSTR2 strstr2 | |
40 | #endif | |
41 | ||
42 | #ifndef STRCHR | |
43 | #define STRCHR strchr | |
44 | #endif | |
45 | ||
46 | #ifndef STRSTR_SCAN | |
47 | #define STRSTR_SCAN strstr_scan | |
48 | #endif | |
49 | ||
50 | #ifndef TOLOWER | |
51 | # define TOLOWER(Ch) (Ch) | |
52 | #endif | |
53 | ||
54 | #ifdef USE_AS_STRCASESTR | |
55 | ||
56 | static uint64_t | |
57 | vec_tolower (uint64_t cc) | |
58 | { | |
59 | /* For Uppercases letters, add 32 to convert to lower case. */ | |
60 | uint64_t less_than_eq_Z = __insn_v1cmpltui (cc, 'Z' + 1); | |
61 | uint64_t less_than_A = __insn_v1cmpltui (cc, 'A'); | |
62 | uint64_t is_upper = __insn_v1cmpne (less_than_eq_Z, less_than_A); | |
63 | return __insn_v1add (cc,__insn_v1shli (is_upper, 5)); | |
64 | } | |
65 | ||
66 | /* There is no strcasechr() defined, but needed for 1 byte case | |
67 | of strcasestr(), so create it here. */ | |
68 | ||
69 | static char * | |
70 | strcasechr (const char *s, int c) | |
71 | { | |
72 | int z, g; | |
73 | ||
74 | c = tolower (c); | |
75 | ||
76 | /* Get an aligned pointer. */ | |
77 | const uintptr_t s_int = (uintptr_t) s; | |
78 | const uint64_t *p = (const uint64_t *) (s_int & -8); | |
79 | ||
80 | /* Create eight copies of the byte for which we are looking. */ | |
81 | const uint64_t goal = copy_byte(c); | |
82 | ||
83 | /* Read the first aligned word, but force bytes before the string to | |
84 | match neither zero nor goal (we make sure the high bit of each byte | |
85 | is 1, and the low 7 bits are all the opposite of the goal byte). */ | |
86 | const uint64_t before_mask = MASK (s_int); | |
87 | uint64_t v = | |
88 | (vec_tolower (*p) | before_mask) ^ (goal & __insn_v1shrui (before_mask, 1)); | |
89 | ||
90 | uint64_t zero_matches, goal_matches; | |
91 | while (1) | |
92 | { | |
93 | /* Look for a terminating '\0'. */ | |
94 | zero_matches = __insn_v1cmpeqi (v, 0); | |
95 | ||
96 | /* Look for the goal byte. */ | |
97 | goal_matches = __insn_v1cmpeq (v, goal); | |
98 | ||
99 | if (__builtin_expect ((zero_matches | goal_matches) != 0, 0)) | |
100 | break; | |
101 | ||
102 | v = vec_tolower (*++p); | |
103 | } | |
104 | ||
105 | z = CFZ (zero_matches); | |
106 | g = CFZ (goal_matches); | |
107 | ||
108 | /* If we found c before '\0' we got a match. Note that if c == '\0' | |
109 | then g == z, and we correctly return the address of the '\0' | |
110 | rather than NULL. */ | |
111 | return (g <= z) ? ((char *) p) + (g >> 3) : NULL; | |
112 | } | |
113 | ||
114 | # define vec_load(p) vec_tolower (*(p)) | |
115 | # define STRCHR strcasechr | |
116 | # define CMP_FUNC __strncasecmp | |
117 | ||
118 | #else | |
119 | ||
120 | # define vec_load(p) (*(p)) | |
121 | # define STRCHR strchr | |
122 | # define CMP_FUNC memcmp | |
123 | ||
124 | #endif | |
125 | ||
126 | ||
127 | /* Compare 2-character needle using SIMD. */ | |
128 | static char * | |
129 | STRSTR2 (const char *haystack_start, const char *needle) | |
130 | { | |
131 | int z, g; | |
132 | ||
133 | __insn_prefetch (haystack_start + 64); | |
134 | ||
135 | /* Get an aligned pointer. */ | |
136 | const uintptr_t s_int = (uintptr_t) haystack_start; | |
137 | const uint64_t *p = (const uint64_t *) (s_int & -8); | |
138 | ||
139 | /* Create eight copies of the first byte for which we are looking. */ | |
140 | const uint64_t byte1 = copy_byte (TOLOWER (*needle)); | |
141 | /* Create eight copies of the second byte for which we are looking. */ | |
142 | const uint64_t byte2 = copy_byte (TOLOWER (*(needle + 1))); | |
143 | ||
144 | /* Read the first aligned word, but force bytes before the string to | |
145 | match neither zero nor goal (we make sure the high bit of each byte | |
146 | is 1, and the low 7 bits are all the opposite of the goal byte). */ | |
147 | const uint64_t before_mask = MASK (s_int); | |
148 | uint64_t v = | |
149 | (vec_load (p) | before_mask) ^ (byte1 & __insn_v1shrui (before_mask, 1)); | |
150 | ||
151 | uint64_t zero_matches, goal_matches; | |
152 | while (1) | |
153 | { | |
154 | /* Look for a terminating '\0'. */ | |
155 | zero_matches = __insn_v1cmpeqi (v, 0); | |
156 | uint64_t byte1_matches = __insn_v1cmpeq (v, byte1); | |
0dacd7a3 | 157 | if (__builtin_expect (zero_matches != 0, 0)) |
c86f7b80 CM |
158 | { |
159 | /* This is the last vector. Don't worry about matches | |
160 | crossing into the next vector. Shift the second byte | |
161 | back 1 byte to align it with the first byte, then and to | |
162 | check for both matching. Each vector has a 1 in the LSB | |
163 | of the byte if there was match. */ | |
164 | uint64_t byte2_matches = __insn_v1cmpeq (v, byte2); | |
165 | goal_matches = byte1_matches & STRSHIFT (byte2_matches, 8); | |
166 | break; | |
167 | } | |
168 | else | |
169 | { | |
170 | /* This is not the last vector, so load the next vector now. | |
171 | And compare byte2 to the 8-bytes starting 1 byte shifted from v, | |
172 | which goes 1-byte into the next vector. */ | |
173 | uint64_t v2 = vec_load (p + 1); | |
174 | if (byte1_matches) | |
175 | { | |
176 | /* 8-bytes starting 1 byte into v. */ | |
177 | v = __insn_dblalign (v, v2, (void*)1); | |
178 | uint64_t byte2_matches_shifted = __insn_v1cmpeq (v, byte2); | |
179 | goal_matches = byte1_matches & byte2_matches_shifted; | |
180 | if (__builtin_expect (goal_matches != 0, 0)) | |
181 | break; | |
182 | } | |
183 | __insn_prefetch (p + 4); | |
184 | /* Move to next vector. */ | |
185 | v = v2; | |
186 | p++; | |
187 | } | |
188 | } | |
189 | ||
190 | z = CFZ (zero_matches); | |
191 | g = CFZ (goal_matches); | |
192 | ||
193 | /* If we found the match before '\0' we got a true match. Note that | |
194 | if c == '\0' then g == z, and we correctly return the address of | |
195 | the '\0' rather than NULL. */ | |
196 | return (g <= z) ? ((char *) p) + (g >> 3) : NULL; | |
197 | } | |
198 | ||
199 | /* Scan for NEEDLE, using the first two characters as a filter. */ | |
200 | static char * | |
201 | STRSTR_SCAN (const char *haystack, const char *needle, | |
202 | unsigned int needle_len) | |
203 | { | |
204 | char *match; | |
205 | while (1) | |
206 | { | |
207 | match = STRSTR2 (haystack, needle); | |
208 | if (match == NULL) | |
209 | return NULL; | |
210 | /* Found first two characters of needle, check for remainder. */ | |
211 | if (CMP_FUNC (match + 2, needle + 2, needle_len - 2) == 0) | |
212 | return match; | |
213 | /* Move past the previous match. Could be +2 instead of +1 if | |
214 | first two characters are different, but that tested slower. */ | |
215 | haystack = match + 1; | |
216 | } | |
217 | } | |
218 | ||
219 | /* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK | |
220 | if NEEDLE is empty, otherwise NULL if NEEDLE is not found in | |
221 | HAYSTACK. */ | |
222 | char * | |
223 | STRSTR (const char *haystack_start, const char *needle_start) | |
224 | { | |
225 | const char *haystack = haystack_start; | |
226 | const char *needle = needle_start; | |
227 | __insn_prefetch (haystack); | |
228 | size_t needle_len = strlen (needle_start); /* Length of NEEDLE. */ | |
229 | size_t haystack_len; /* Known minimum length of HAYSTACK. */ | |
230 | ||
231 | if (needle_len <= 2) | |
232 | { | |
233 | if (needle_len == 1) | |
234 | return STRCHR (haystack_start, *needle_start); | |
235 | if (needle_len == 0) | |
236 | return (char *) haystack_start; | |
237 | else | |
238 | return STRSTR2 (haystack_start, needle_start); | |
239 | } | |
240 | ||
241 | /* Fail if NEEDLE is longer than HAYSTACK. */ | |
95dee05f | 242 | if (__strnlen (haystack, needle_len) < needle_len) |
c86f7b80 CM |
243 | return NULL; |
244 | ||
245 | /* Perform the search. Abstract memory is considered to be an array | |
246 | of 'unsigned char' values, not an array of 'char' values. See | |
247 | ISO C 99 section 6.2.6.1. */ | |
248 | if (needle_len < 40) | |
249 | return STRSTR_SCAN (haystack_start, needle_start, needle_len); | |
250 | else | |
251 | { | |
252 | /* Reduce the size of haystack using STRSTR2, since it has a smaller | |
253 | linear coefficient than the Two-Way algorithm. */ | |
254 | haystack = STRSTR2 (haystack_start, needle_start); | |
255 | if (haystack == NULL) | |
256 | return NULL; | |
257 | needle = needle_start; | |
258 | haystack_len = (haystack > haystack_start + needle_len ? 1 | |
259 | : needle_len + haystack_start - haystack); | |
260 | ||
261 | return two_way_long_needle ((const unsigned char *) haystack, | |
262 | haystack_len, | |
263 | (const unsigned char *) needle, needle_len); | |
264 | } | |
265 | } | |
266 | #ifndef USE_AS_STRCASESTR | |
267 | libc_hidden_builtin_def (STRSTR) | |
268 | #endif | |
269 | ||
270 | #undef LONG_NEEDLE_THRESHOLD |