]>
Commit | Line | Data |
---|---|---|
12788f63 MT |
1 | 2010-11-09 H.J. Lu <hongjiu.lu@intel.com> |
2 | ||
3 | [BZ #12205] | |
4 | * string/test-strncasecmp.c (check_result): New function. | |
5 | (do_one_test): Use it. | |
6 | (check1): New function. | |
7 | (test_main): Use it. | |
8 | * sysdeps/i386/i686/multiarch/strcmp.S (nibble_ashr_use_sse4_2_exit): | |
9 | Support strcasecmp and strncasecmp. | |
10 | ||
11 | 2010-10-03 Ulrich Drepper <drepper@gmail.com> | |
12 | ||
13 | [BZ #12077] | |
14 | * sysdeps/x86_64/strcmp.S: Fix handling of remaining bytes in buffer | |
15 | for strncmp and strncasecmp. | |
16 | * string/stratcliff.c: Add tests for strcmp and strncmp. | |
17 | * wcsmbs/wcsatcliff.c: Adjust for stratcliff change. | |
18 | ||
19 | 2010-09-20 Ulrich Drepper <drepper@redhat.com> | |
20 | ||
21 | * sysdeps/x86_64/strcmp.S: Fix another type in strncasecmp limit | |
22 | detection. | |
23 | ||
24 | 2010-08-19 Ulrich Drepper <drepper@redhat.com> | |
25 | ||
26 | * sysdeps/x86_64/multiarch/strcmp.S: Fix two typos in strncasecmp | |
27 | handling. | |
28 | ||
29 | 2010-08-15 Ulrich Drepper <drepper@redhat.com> | |
30 | ||
31 | * sysdeps/x86_64/strcmp.S: Use correct register for fourth parameter | |
32 | of strncasecmp_l. | |
33 | * sysdeps/multiarch/strcmp.S: Likewise. | |
34 | ||
35 | 2010-08-14 Ulrich Drepper <drepper@redhat.com> | |
36 | ||
37 | * sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add | |
38 | strncase_l-nonascii. | |
39 | * sysdeps/x86_64/multiarch/Makefile [subdir=string] (sysdep_routines): | |
40 | Add strncase_l-ssse3. | |
41 | * sysdeps/x86_64/multiarch/strcmp.S: Prepare for use as strncasecmp. | |
42 | * sysdeps/x86_64/strcmp.S: Likewise. | |
43 | * sysdeps/x86_64/multiarch/strncase_l-ssse3.S: New file. | |
44 | * sysdeps/x86_64/multiarch/strncase_l.S: New file. | |
45 | * sysdeps/x86_64/strncase.S: New file. | |
46 | * sysdeps/x86_64/strncase_l-nonascii.c: New file. | |
47 | * sysdeps/x86_64/strncase_l.S: New file. | |
48 | * string/Makefile (strop-tests): Add strncasecmp. | |
49 | * string/test-strncasecmp.c: New file. | |
50 | ||
51 | * sysdeps/x86_64/strcasecmp_l-nonascii.c: Add prototype to avoid | |
52 | warning. | |
53 | ||
54 | * sysdeps/x86_64/strcmp.S: Move definition of NO_NOLOCALE_ALIAS to... | |
55 | * sysdeps/x86_64/multiarch/strcasecmp_l-ssse3.S: ... here. | |
56 | ||
57 | 2010-07-31 Ulrich Drepper <drepper@redhat.com> | |
58 | ||
59 | * sysdeps/x86_64/multiarch/Makefile [subdir=string] (sysdep_routines): | |
60 | Add strcasecmp_l-ssse3. | |
61 | * sysdeps/x86_64/multiarch/strcmp.S: Add support to compile for | |
62 | strcasecmp. | |
63 | * sysdeps/x86_64/strcmp.S: Allow more flexible compiling of strcasecmp. | |
64 | * sysdeps/x86_64/multiarch/strcasecmp_l.S: New file. | |
65 | * sysdeps/x86_64/multiarch/strcasecmp_l-ssse3.S: New file. | |
66 | ||
67 | 2010-07-30 Ulrich Drepper <drepper@redhat.com> | |
68 | ||
69 | * sysdeps/x86_64/multiarch/strcmp.S: Pretty printing. | |
70 | ||
71 | * string/Makefile (strop-tests): Add strcasecmp. | |
72 | * sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add | |
73 | strcasecmp_l-nonascii. | |
74 | (gen-as-const-headers): Add locale-defines.sym. | |
75 | * sysdeps/x86_64/strcmp.S: Add support for strcasecmp implementation. | |
76 | * sysdeps/x86_64/strcasecmp.S: New file. | |
77 | * sysdeps/x86_64/strcasecmp_l.S: New file. | |
78 | * sysdeps/x86_64/strcasecmp_l-nonascii.c: New file. | |
79 | * sysdeps/x86_64/locale-defines.sym: New file. | |
80 | * string/test-strcasecmp.c: New file. | |
81 | ||
82 | * string/test-strcasestr.c: Test both ends of the range of characters. | |
83 | * sysdeps/x86_64/multiarch/strstr.c: Fix UCHIGH definition. | |
84 | ||
85 | 2010-07-26 Ulrich Drepper <drepper@redhat.com> | |
86 | ||
87 | * string/test-strnlen.c: New file. | |
88 | * string/Makefile (strop-tests): Add strnlen. | |
89 | * string/tester.c (test_strnlen): Add a few more test cases. | |
90 | * string/tst-strlen.c: Better error reporting. | |
91 | ||
92 | * sysdeps/x86_64/strnlen.S: New file. | |
93 | ||
94 | 2010-07-24 Ulrich Drepper <drepper@redhat.com> | |
95 | ||
96 | * sysdeps/x86_64/multiarch/strstr.c (__m128i_strloadu_tolower): Use | |
97 | lower-latency instructions. | |
98 | ||
99 | 2010-07-23 Ulrich Drepper <drepper@redhat.com> | |
100 | ||
101 | * string/test-strcasestr.c: New file. | |
102 | * string/test-strstr.c: New file. | |
103 | * string/Makefile (strop-tests): Add strstr and strcasestr. | |
104 | * string/str-two-way.h: Don't undefine MAX. | |
105 | * string/strcasestr.c: Don't define alias if NO_ALIAS is defined. | |
106 | ||
107 | 2010-07-21 Andreas Schwab <schwab@redhat.com> | |
108 | ||
109 | * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add | |
110 | strcasestr-nonascii. | |
111 | (CFLAGS-strcasestr-nonascii.c): Define. | |
112 | * sysdeps/i386/i686/multiarch/strcasestr-nonascii.c: New file. | |
113 | * sysdeps/x86_64/multiarch/strcasestr-nonascii.c (STRSTR_SSE42): | |
114 | Remove unused attribute. | |
115 | ||
116 | 2010-07-16 Ulrich Drepper <drepper@redhat.com> | |
117 | ||
118 | * sysdeps/x86_64/multiarch/strstr.c: Rewrite to avoid indirect function | |
119 | call in strcasestr. | |
120 | * sysdeps/x86_64/multiarch/strcasestr.c: Declare | |
121 | __strcasestr_sse42_nonascii. | |
122 | * sysdeps/x86_64/multiarch/Makefile: Add rules to build | |
123 | strcasestr-nonascii.c. | |
124 | * sysdeps/x86_64/multiarch/strcasestr-nonascii.c: New file. | |
125 | ||
126 | Index: glibc-2.12-2-gc4ccff1/string/Makefile | |
127 | =================================================================== | |
128 | --- glibc-2.12-2-gc4ccff1.orig/string/Makefile | |
129 | +++ glibc-2.12-2-gc4ccff1/string/Makefile | |
130 | @@ -48,7 +48,8 @@ o-objects.ob := memcpy.o memset.o memchr | |
131 | ||
132 | strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \ | |
133 | stpcpy stpncpy strcat strchr strcmp strcpy strcspn \ | |
134 | - strlen strncmp strncpy strpbrk strrchr strspn memmem | |
135 | + strlen strncmp strncpy strpbrk strrchr strspn memmem \ | |
136 | + strstr strcasestr strnlen strcasecmp strncasecmp | |
137 | tests := tester inl-tester noinl-tester testcopy test-ffs \ | |
138 | tst-strlen stratcliff tst-svc tst-inlcall \ | |
139 | bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \ | |
140 | Index: glibc-2.12-2-gc4ccff1/string/str-two-way.h | |
141 | =================================================================== | |
142 | --- glibc-2.12-2-gc4ccff1.orig/string/str-two-way.h | |
143 | +++ glibc-2.12-2-gc4ccff1/string/str-two-way.h | |
144 | @@ -426,5 +426,4 @@ two_way_long_needle (const unsigned char | |
145 | #undef AVAILABLE | |
146 | #undef CANON_ELEMENT | |
147 | #undef CMP_FUNC | |
148 | -#undef MAX | |
149 | #undef RETURN_TYPE | |
150 | Index: glibc-2.12-2-gc4ccff1/string/stratcliff.c | |
151 | =================================================================== | |
152 | --- glibc-2.12-2-gc4ccff1.orig/string/stratcliff.c | |
153 | +++ glibc-2.12-2-gc4ccff1/string/stratcliff.c | |
154 | @@ -47,6 +47,8 @@ | |
155 | # define MEMCPY memcpy | |
156 | # define MEMPCPY mempcpy | |
157 | # define MEMCHR memchr | |
158 | +# define STRCMP strcmp | |
159 | +# define STRNCMP strncmp | |
160 | #endif | |
161 | ||
162 | ||
163 | @@ -277,7 +279,74 @@ do_test (void) | |
164 | ||
165 | adr[inner] = L('T'); | |
166 | } | |
167 | - } | |
168 | + } | |
169 | + | |
170 | + /* strcmp/wcscmp tests */ | |
171 | + for (outer = 1; outer < 32; ++outer) | |
172 | + for (middle = 0; middle < 16; ++middle) | |
173 | + { | |
174 | + MEMSET (adr + middle, L('T'), 256); | |
175 | + adr[256] = L('\0'); | |
176 | + MEMSET (dest + nchars - outer, L('T'), outer - 1); | |
177 | + dest[nchars - 1] = L('\0'); | |
178 | + | |
179 | + if (STRCMP (adr + middle, dest + nchars - outer) <= 0) | |
180 | + { | |
181 | + printf ("%s 1 flunked for outer = %d, middle = %d\n", | |
182 | + STRINGIFY (STRCMP), outer, middle); | |
183 | + result = 1; | |
184 | + } | |
185 | + | |
186 | + if (STRCMP (dest + nchars - outer, adr + middle) >= 0) | |
187 | + { | |
188 | + printf ("%s 2 flunked for outer = %d, middle = %d\n", | |
189 | + STRINGIFY (STRCMP), outer, middle); | |
190 | + result = 1; | |
191 | + } | |
192 | + } | |
193 | + | |
194 | + /* strncmp/wcsncmp tests */ | |
195 | + for (outer = 1; outer < 32; ++outer) | |
196 | + for (middle = 0; middle < 16; ++middle) | |
197 | + { | |
198 | + MEMSET (adr + middle, L('T'), 256); | |
199 | + adr[256] = L('\0'); | |
200 | + MEMSET (dest + nchars - outer, L('T'), outer - 1); | |
201 | + dest[nchars - 1] = L('U'); | |
202 | + | |
203 | + for (inner = 0; inner < outer; ++inner) | |
204 | + { | |
205 | + if (STRNCMP (adr + middle, dest + nchars - outer, inner) != 0) | |
206 | + { | |
207 | + printf ("%s 1 flunked for outer = %d, middle = %d, " | |
208 | + "inner = %d\n", | |
209 | + STRINGIFY (STRNCMP), outer, middle, inner); | |
210 | + result = 1; | |
211 | + } | |
212 | + | |
213 | + if (STRNCMP (dest + nchars - outer, adr + middle, inner) != 0) | |
214 | + { | |
215 | + printf ("%s 2 flunked for outer = %d, middle = %d, " | |
216 | + "inner = %d\n", | |
217 | + STRINGIFY (STRNCMP), outer, middle, inner); | |
218 | + result = 1; | |
219 | + } | |
220 | + } | |
221 | + | |
222 | + if (STRNCMP (adr + middle, dest + nchars - outer, outer) >= 0) | |
223 | + { | |
224 | + printf ("%s 1 flunked for outer = %d, middle = %d, full\n", | |
225 | + STRINGIFY (STRNCMP), outer, middle); | |
226 | + result = 1; | |
227 | + } | |
228 | + | |
229 | + if (STRNCMP (dest + nchars - outer, adr + middle, outer) <= 0) | |
230 | + { | |
231 | + printf ("%s 2 flunked for outer = %d, middle = %d, full\n", | |
232 | + STRINGIFY (STRNCMP), outer, middle); | |
233 | + result = 1; | |
234 | + } | |
235 | + } | |
236 | ||
237 | /* strncpy/wcsncpy tests */ | |
238 | adr[nchars - 1] = L('T'); | |
239 | Index: glibc-2.12-2-gc4ccff1/string/strcasestr.c | |
240 | =================================================================== | |
241 | --- glibc-2.12-2-gc4ccff1.orig/string/strcasestr.c | |
242 | +++ glibc-2.12-2-gc4ccff1/string/strcasestr.c | |
243 | @@ -103,4 +103,6 @@ STRCASESTR (const char *haystack_start, | |
244 | ||
245 | #undef LONG_NEEDLE_THRESHOLD | |
246 | ||
247 | +#ifndef NO_ALIAS | |
248 | weak_alias (__strcasestr, strcasestr) | |
249 | +#endif | |
250 | Index: glibc-2.12-2-gc4ccff1/string/test-strcasecmp.c | |
251 | =================================================================== | |
252 | --- /dev/null | |
253 | +++ glibc-2.12-2-gc4ccff1/string/test-strcasecmp.c | |
254 | @@ -0,0 +1,276 @@ | |
255 | +/* Test and measure strcasecmp functions. | |
256 | + Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc. | |
257 | + This file is part of the GNU C Library. | |
258 | + Written by Jakub Jelinek <jakub@redhat.com>, 1999. | |
259 | + | |
260 | + The GNU C Library is free software; you can redistribute it and/or | |
261 | + modify it under the terms of the GNU Lesser General Public | |
262 | + License as published by the Free Software Foundation; either | |
263 | + version 2.1 of the License, or (at your option) any later version. | |
264 | + | |
265 | + The GNU C Library is distributed in the hope that it will be useful, | |
266 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
267 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
268 | + Lesser General Public License for more details. | |
269 | + | |
270 | + You should have received a copy of the GNU Lesser General Public | |
271 | + License along with the GNU C Library; if not, write to the Free | |
272 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
273 | + 02111-1307 USA. */ | |
274 | + | |
275 | +#include <ctype.h> | |
276 | +#define TEST_MAIN | |
277 | +#include "test-string.h" | |
278 | + | |
279 | +typedef int (*proto_t) (const char *, const char *); | |
280 | +static int simple_strcasecmp (const char *, const char *); | |
281 | +static int stupid_strcasecmp (const char *, const char *); | |
282 | + | |
283 | +IMPL (stupid_strcasecmp, 0) | |
284 | +IMPL (simple_strcasecmp, 0) | |
285 | +IMPL (strcasecmp, 1) | |
286 | + | |
287 | +static int | |
288 | +simple_strcasecmp (const char *s1, const char *s2) | |
289 | +{ | |
290 | + int ret; | |
291 | + | |
292 | + while ((ret = ((unsigned char) tolower (*s1) | |
293 | + - (unsigned char) tolower (*s2))) == 0 | |
294 | + && *s1++) | |
295 | + ++s2; | |
296 | + return ret; | |
297 | +} | |
298 | + | |
299 | +static int | |
300 | +stupid_strcasecmp (const char *s1, const char *s2) | |
301 | +{ | |
302 | + size_t ns1 = strlen (s1) + 1, ns2 = strlen (s2) + 1; | |
303 | + size_t n = ns1 < ns2 ? ns1 : ns2; | |
304 | + int ret = 0; | |
305 | + | |
306 | + while (n--) | |
307 | + { | |
308 | + if ((ret = ((unsigned char) tolower (*s1) | |
309 | + - (unsigned char) tolower (*s2))) != 0) | |
310 | + break; | |
311 | + ++s1; | |
312 | + ++s2; | |
313 | + } | |
314 | + return ret; | |
315 | +} | |
316 | + | |
317 | +static void | |
318 | +do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result) | |
319 | +{ | |
320 | + int result = CALL (impl, s1, s2); | |
321 | + if ((exp_result == 0 && result != 0) | |
322 | + || (exp_result < 0 && result >= 0) | |
323 | + || (exp_result > 0 && result <= 0)) | |
324 | + { | |
325 | + error (0, 0, "Wrong result in function %s %d %d", impl->name, | |
326 | + result, exp_result); | |
327 | + ret = 1; | |
328 | + return; | |
329 | + } | |
330 | + | |
331 | + if (HP_TIMING_AVAIL) | |
332 | + { | |
333 | + hp_timing_t start __attribute ((unused)); | |
334 | + hp_timing_t stop __attribute ((unused)); | |
335 | + hp_timing_t best_time = ~ (hp_timing_t) 0; | |
336 | + size_t i; | |
337 | + | |
338 | + for (i = 0; i < 32; ++i) | |
339 | + { | |
340 | + HP_TIMING_NOW (start); | |
341 | + CALL (impl, s1, s2); | |
342 | + HP_TIMING_NOW (stop); | |
343 | + HP_TIMING_BEST (best_time, start, stop); | |
344 | + } | |
345 | + | |
346 | + printf ("\t%zd", (size_t) best_time); | |
347 | + } | |
348 | +} | |
349 | + | |
350 | +static void | |
351 | +do_test (size_t align1, size_t align2, size_t len, int max_char, | |
352 | + int exp_result) | |
353 | +{ | |
354 | + size_t i; | |
355 | + char *s1, *s2; | |
356 | + | |
357 | + if (len == 0) | |
358 | + return; | |
359 | + | |
360 | + align1 &= 7; | |
361 | + if (align1 + len + 1 >= page_size) | |
362 | + return; | |
363 | + | |
364 | + align2 &= 7; | |
365 | + if (align2 + len + 1 >= page_size) | |
366 | + return; | |
367 | + | |
368 | + s1 = (char *) (buf1 + align1); | |
369 | + s2 = (char *) (buf2 + align2); | |
370 | + | |
371 | + for (i = 0; i < len; i++) | |
372 | + { | |
373 | + s1[i] = toupper (1 + 23 * i % max_char); | |
374 | + s2[i] = tolower (s1[i]); | |
375 | + } | |
376 | + | |
377 | + s1[len] = s2[len] = 0; | |
378 | + s1[len + 1] = 23; | |
379 | + s2[len + 1] = 24 + exp_result; | |
380 | + if ((s2[len - 1] == 'z' && exp_result == -1) | |
381 | + || (s2[len - 1] == 'a' && exp_result == 1)) | |
382 | + s1[len - 1] += exp_result; | |
383 | + else | |
384 | + s2[len - 1] -= exp_result; | |
385 | + | |
386 | + if (HP_TIMING_AVAIL) | |
387 | + printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2); | |
388 | + | |
389 | + FOR_EACH_IMPL (impl, 0) | |
390 | + do_one_test (impl, s1, s2, exp_result); | |
391 | + | |
392 | + if (HP_TIMING_AVAIL) | |
393 | + putchar ('\n'); | |
394 | +} | |
395 | + | |
396 | +static void | |
397 | +do_random_tests (void) | |
398 | +{ | |
399 | + size_t i, j, n, align1, align2, pos, len1, len2; | |
400 | + int result; | |
401 | + long r; | |
402 | + unsigned char *p1 = buf1 + page_size - 512; | |
403 | + unsigned char *p2 = buf2 + page_size - 512; | |
404 | + | |
405 | + for (n = 0; n < ITERATIONS; n++) | |
406 | + { | |
407 | + align1 = random () & 31; | |
408 | + if (random () & 1) | |
409 | + align2 = random () & 31; | |
410 | + else | |
411 | + align2 = align1 + (random () & 24); | |
412 | + pos = random () & 511; | |
413 | + j = align1 > align2 ? align1 : align2; | |
414 | + if (pos + j >= 511) | |
415 | + pos = 510 - j - (random () & 7); | |
416 | + len1 = random () & 511; | |
417 | + if (pos >= len1 && (random () & 1)) | |
418 | + len1 = pos + (random () & 7); | |
419 | + if (len1 + j >= 512) | |
420 | + len1 = 511 - j - (random () & 7); | |
421 | + if (pos >= len1) | |
422 | + len2 = len1; | |
423 | + else | |
424 | + len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0); | |
425 | + j = (pos > len2 ? pos : len2) + align1 + 64; | |
426 | + if (j > 512) | |
427 | + j = 512; | |
428 | + for (i = 0; i < j; ++i) | |
429 | + { | |
430 | + p1[i] = tolower (random () & 255); | |
431 | + if (i < len1 + align1 && !p1[i]) | |
432 | + { | |
433 | + p1[i] = tolower (random () & 255); | |
434 | + if (!p1[i]) | |
435 | + p1[i] = tolower (1 + (random () & 127)); | |
436 | + } | |
437 | + } | |
438 | + for (i = 0; i < j; ++i) | |
439 | + { | |
440 | + p2[i] = toupper (random () & 255); | |
441 | + if (i < len2 + align2 && !p2[i]) | |
442 | + { | |
443 | + p2[i] = toupper (random () & 255); | |
444 | + if (!p2[i]) | |
445 | + toupper (p2[i] = 1 + (random () & 127)); | |
446 | + } | |
447 | + } | |
448 | + | |
449 | + result = 0; | |
450 | + memcpy (p2 + align2, p1 + align1, pos); | |
451 | + if (pos < len1) | |
452 | + { | |
453 | + if (tolower (p2[align2 + pos]) == p1[align1 + pos]) | |
454 | + { | |
455 | + p2[align2 + pos] = toupper (random () & 255); | |
456 | + if (tolower (p2[align2 + pos]) == p1[align1 + pos]) | |
457 | + p2[align2 + pos] = toupper (p1[align1 + pos] | |
458 | + + 3 + (random () & 127)); | |
459 | + } | |
460 | + | |
461 | + if (p1[align1 + pos] < tolower (p2[align2 + pos])) | |
462 | + result = -1; | |
463 | + else | |
464 | + result = 1; | |
465 | + } | |
466 | + p1[len1 + align1] = 0; | |
467 | + p2[len2 + align2] = 0; | |
468 | + | |
469 | + FOR_EACH_IMPL (impl, 1) | |
470 | + { | |
471 | + r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2)); | |
472 | + /* Test whether on 64-bit architectures where ABI requires | |
473 | + callee to promote has the promotion been done. */ | |
474 | + asm ("" : "=g" (r) : "0" (r)); | |
475 | + if ((r == 0 && result) | |
476 | + || (r < 0 && result >= 0) | |
477 | + || (r > 0 && result <= 0)) | |
478 | + { | |
479 | + error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p", | |
480 | + n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2); | |
481 | + ret = 1; | |
482 | + } | |
483 | + } | |
484 | + } | |
485 | +} | |
486 | + | |
487 | +int | |
488 | +test_main (void) | |
489 | +{ | |
490 | + size_t i; | |
491 | + | |
492 | + test_init (); | |
493 | + | |
494 | + printf ("%23s", ""); | |
495 | + FOR_EACH_IMPL (impl, 0) | |
496 | + printf ("\t%s", impl->name); | |
497 | + putchar ('\n'); | |
498 | + | |
499 | + for (i = 1; i < 16; ++i) | |
500 | + { | |
501 | + do_test (i, i, i, 127, 0); | |
502 | + do_test (i, i, i, 127, 1); | |
503 | + do_test (i, i, i, 127, -1); | |
504 | + } | |
505 | + | |
506 | + for (i = 1; i < 10; ++i) | |
507 | + { | |
508 | + do_test (0, 0, 2 << i, 127, 0); | |
509 | + do_test (0, 0, 2 << i, 254, 0); | |
510 | + do_test (0, 0, 2 << i, 127, 1); | |
511 | + do_test (0, 0, 2 << i, 254, 1); | |
512 | + do_test (0, 0, 2 << i, 127, -1); | |
513 | + do_test (0, 0, 2 << i, 254, -1); | |
514 | + } | |
515 | + | |
516 | + for (i = 1; i < 8; ++i) | |
517 | + { | |
518 | + do_test (i, 2 * i, 8 << i, 127, 0); | |
519 | + do_test (2 * i, i, 8 << i, 254, 0); | |
520 | + do_test (i, 2 * i, 8 << i, 127, 1); | |
521 | + do_test (2 * i, i, 8 << i, 254, 1); | |
522 | + do_test (i, 2 * i, 8 << i, 127, -1); | |
523 | + do_test (2 * i, i, 8 << i, 254, -1); | |
524 | + } | |
525 | + | |
526 | + do_random_tests (); | |
527 | + return ret; | |
528 | +} | |
529 | + | |
530 | +#include "../test-skeleton.c" | |
531 | Index: glibc-2.12-2-gc4ccff1/string/test-strcasestr.c | |
532 | =================================================================== | |
533 | --- /dev/null | |
534 | +++ glibc-2.12-2-gc4ccff1/string/test-strcasestr.c | |
535 | @@ -0,0 +1,197 @@ | |
536 | +/* Test and measure strcasestr functions. | |
537 | + Copyright (C) 2010 Free Software Foundation, Inc. | |
538 | + This file is part of the GNU C Library. | |
539 | + Written by Ulrich Drepper <drepper@redhat.com>, 2010. | |
540 | + | |
541 | + The GNU C Library is free software; you can redistribute it and/or | |
542 | + modify it under the terms of the GNU Lesser General Public | |
543 | + License as published by the Free Software Foundation; either | |
544 | + version 2.1 of the License, or (at your option) any later version. | |
545 | + | |
546 | + The GNU C Library is distributed in the hope that it will be useful, | |
547 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
548 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
549 | + Lesser General Public License for more details. | |
550 | + | |
551 | + You should have received a copy of the GNU Lesser General Public | |
552 | + License along with the GNU C Library; if not, write to the Free | |
553 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
554 | + 02111-1307 USA. */ | |
555 | + | |
556 | +#define TEST_MAIN | |
557 | +#include "test-string.h" | |
558 | + | |
559 | + | |
560 | +#define STRCASESTR simple_strcasestr | |
561 | +#define NO_ALIAS | |
562 | +#define __strncasecmp strncasecmp | |
563 | +#include "strcasestr.c" | |
564 | + | |
565 | + | |
566 | +static char * | |
567 | +stupid_strcasestr (const char *s1, const char *s2) | |
568 | +{ | |
569 | + ssize_t s1len = strlen (s1); | |
570 | + ssize_t s2len = strlen (s2); | |
571 | + | |
572 | + if (s2len > s1len) | |
573 | + return NULL; | |
574 | + | |
575 | + for (ssize_t i = 0; i <= s1len - s2len; ++i) | |
576 | + { | |
577 | + size_t j; | |
578 | + for (j = 0; j < s2len; ++j) | |
579 | + if (tolower (s1[i + j]) != tolower (s2[j])) | |
580 | + break; | |
581 | + if (j == s2len) | |
582 | + return (char *) s1 + i; | |
583 | + } | |
584 | + | |
585 | + return NULL; | |
586 | +} | |
587 | + | |
588 | + | |
589 | +typedef char *(*proto_t) (const char *, const char *); | |
590 | + | |
591 | +IMPL (stupid_strcasestr, 0) | |
592 | +IMPL (simple_strcasestr, 0) | |
593 | +IMPL (strcasestr, 1) | |
594 | + | |
595 | + | |
596 | +static void | |
597 | +do_one_test (impl_t *impl, const char *s1, const char *s2, char *exp_result) | |
598 | +{ | |
599 | + char *result = CALL (impl, s1, s2); | |
600 | + if (result != exp_result) | |
601 | + { | |
602 | + error (0, 0, "Wrong result in function %s %s %s", impl->name, | |
603 | + result, exp_result); | |
604 | + ret = 1; | |
605 | + return; | |
606 | + } | |
607 | + | |
608 | + if (HP_TIMING_AVAIL) | |
609 | + { | |
610 | + hp_timing_t start __attribute ((unused)); | |
611 | + hp_timing_t stop __attribute ((unused)); | |
612 | + hp_timing_t best_time = ~(hp_timing_t) 0; | |
613 | + size_t i; | |
614 | + | |
615 | + for (i = 0; i < 32; ++i) | |
616 | + { | |
617 | + HP_TIMING_NOW (start); | |
618 | + CALL (impl, s1, s2); | |
619 | + HP_TIMING_NOW (stop); | |
620 | + HP_TIMING_BEST (best_time, start, stop); | |
621 | + } | |
622 | + | |
623 | + printf ("\t%zd", (size_t) best_time); | |
624 | + } | |
625 | +} | |
626 | + | |
627 | + | |
628 | +static void | |
629 | +do_test (size_t align1, size_t align2, size_t len1, size_t len2, | |
630 | + int fail) | |
631 | +{ | |
632 | + char *s1 = (char *) (buf1 + align1); | |
633 | + char *s2 = (char *) (buf2 + align2); | |
634 | + | |
635 | + static const char d[] = "1234567890abcxyz"; | |
636 | +#define dl (sizeof (d) - 1) | |
637 | + char *ss2 = s2; | |
638 | + for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0) | |
639 | + { | |
640 | + size_t t = l > dl ? dl : l; | |
641 | + ss2 = mempcpy (ss2, d, t); | |
642 | + } | |
643 | + s2[len2] = '\0'; | |
644 | + | |
645 | + if (fail) | |
646 | + { | |
647 | + char *ss1 = s1; | |
648 | + for (size_t l = len1; l > 0; l = l > dl ? l - dl : 0) | |
649 | + { | |
650 | + size_t t = l > dl ? dl : l; | |
651 | + memcpy (ss1, d, t); | |
652 | + ++ss1[len2 > 7 ? 7 : len2 - 1]; | |
653 | + ss1 += t; | |
654 | + } | |
655 | + } | |
656 | + else | |
657 | + { | |
658 | + memset (s1, '0', len1); | |
659 | + for (size_t i = 0; i < len2; ++i) | |
660 | + s1[len1 - len2 + i] = toupper (s2[i]); | |
661 | + } | |
662 | + s1[len1] = '\0'; | |
663 | + | |
664 | + if (HP_TIMING_AVAIL) | |
665 | + printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:", | |
666 | + len1, len2, align1, align2, fail ? "fail" : "found"); | |
667 | + | |
668 | + FOR_EACH_IMPL (impl, 0) | |
669 | + do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2); | |
670 | + | |
671 | + if (HP_TIMING_AVAIL) | |
672 | + putchar ('\n'); | |
673 | +} | |
674 | + | |
675 | + | |
676 | +static int | |
677 | +test_main (void) | |
678 | +{ | |
679 | + test_init (); | |
680 | + | |
681 | + printf ("%23s", ""); | |
682 | + FOR_EACH_IMPL (impl, 0) | |
683 | + printf ("\t%s", impl->name); | |
684 | + putchar ('\n'); | |
685 | + | |
686 | + for (size_t klen = 2; klen < 32; ++klen) | |
687 | + for (size_t hlen = 2 * klen; hlen < 16 * klen; hlen += klen) | |
688 | + { | |
689 | + do_test (0, 0, hlen, klen, 0); | |
690 | + do_test (0, 0, hlen, klen, 1); | |
691 | + do_test (0, 3, hlen, klen, 0); | |
692 | + do_test (0, 3, hlen, klen, 1); | |
693 | + do_test (0, 9, hlen, klen, 0); | |
694 | + do_test (0, 9, hlen, klen, 1); | |
695 | + do_test (0, 15, hlen, klen, 0); | |
696 | + do_test (0, 15, hlen, klen, 1); | |
697 | + | |
698 | + do_test (3, 0, hlen, klen, 0); | |
699 | + do_test (3, 0, hlen, klen, 1); | |
700 | + do_test (3, 3, hlen, klen, 0); | |
701 | + do_test (3, 3, hlen, klen, 1); | |
702 | + do_test (3, 9, hlen, klen, 0); | |
703 | + do_test (3, 9, hlen, klen, 1); | |
704 | + do_test (3, 15, hlen, klen, 0); | |
705 | + do_test (3, 15, hlen, klen, 1); | |
706 | + | |
707 | + do_test (9, 0, hlen, klen, 0); | |
708 | + do_test (9, 0, hlen, klen, 1); | |
709 | + do_test (9, 3, hlen, klen, 0); | |
710 | + do_test (9, 3, hlen, klen, 1); | |
711 | + do_test (9, 9, hlen, klen, 0); | |
712 | + do_test (9, 9, hlen, klen, 1); | |
713 | + do_test (9, 15, hlen, klen, 0); | |
714 | + do_test (9, 15, hlen, klen, 1); | |
715 | + | |
716 | + do_test (15, 0, hlen, klen, 0); | |
717 | + do_test (15, 0, hlen, klen, 1); | |
718 | + do_test (15, 3, hlen, klen, 0); | |
719 | + do_test (15, 3, hlen, klen, 1); | |
720 | + do_test (15, 9, hlen, klen, 0); | |
721 | + do_test (15, 9, hlen, klen, 1); | |
722 | + do_test (15, 15, hlen, klen, 0); | |
723 | + do_test (15, 15, hlen, klen, 1); | |
724 | + } | |
725 | + | |
726 | + do_test (0, 0, page_size - 1, 16, 0); | |
727 | + do_test (0, 0, page_size - 1, 16, 1); | |
728 | + | |
729 | + return ret; | |
730 | +} | |
731 | + | |
732 | +#include "../test-skeleton.c" | |
733 | Index: glibc-2.12-2-gc4ccff1/string/test-strncasecmp.c | |
734 | =================================================================== | |
735 | --- /dev/null | |
736 | +++ glibc-2.12-2-gc4ccff1/string/test-strncasecmp.c | |
737 | @@ -0,0 +1,349 @@ | |
738 | +/* Test and measure strncasecmp functions. | |
739 | + Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc. | |
740 | + This file is part of the GNU C Library. | |
741 | + Written by Jakub Jelinek <jakub@redhat.com>, 1999. | |
742 | + | |
743 | + The GNU C Library is free software; you can redistribute it and/or | |
744 | + modify it under the terms of the GNU Lesser General Public | |
745 | + License as published by the Free Software Foundation; either | |
746 | + version 2.1 of the License, or (at your option) any later version. | |
747 | + | |
748 | + The GNU C Library is distributed in the hope that it will be useful, | |
749 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
750 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
751 | + Lesser General Public License for more details. | |
752 | + | |
753 | + You should have received a copy of the GNU Lesser General Public | |
754 | + License along with the GNU C Library; if not, write to the Free | |
755 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
756 | + 02111-1307 USA. */ | |
757 | + | |
758 | +#include <ctype.h> | |
759 | +#define TEST_MAIN | |
760 | +#include "test-string.h" | |
761 | + | |
762 | +typedef int (*proto_t) (const char *, const char *, size_t); | |
763 | +static int simple_strncasecmp (const char *, const char *, size_t); | |
764 | +static int stupid_strncasecmp (const char *, const char *, size_t); | |
765 | + | |
766 | +IMPL (stupid_strncasecmp, 0) | |
767 | +IMPL (simple_strncasecmp, 0) | |
768 | +IMPL (strncasecmp, 1) | |
769 | + | |
770 | +static int | |
771 | +simple_strncasecmp (const char *s1, const char *s2, size_t n) | |
772 | +{ | |
773 | + int ret; | |
774 | + | |
775 | + if (n == 0) | |
776 | + return 0; | |
777 | + | |
778 | + while ((ret = ((unsigned char) tolower (*s1) | |
779 | + - (unsigned char) tolower (*s2))) == 0 | |
780 | + && *s1++) | |
781 | + { | |
782 | + if (--n == 0) | |
783 | + return 0; | |
784 | + ++s2; | |
785 | + } | |
786 | + return ret; | |
787 | +} | |
788 | + | |
789 | +static int | |
790 | +stupid_strncasecmp (const char *s1, const char *s2, size_t max) | |
791 | +{ | |
792 | + size_t ns1 = strlen (s1) + 1; | |
793 | + size_t ns2 = strlen (s2) + 1; | |
794 | + size_t n = ns1 < ns2 ? ns1 : ns2; | |
795 | + if (n > max) | |
796 | + n = max; | |
797 | + int ret = 0; | |
798 | + | |
799 | + while (n--) | |
800 | + { | |
801 | + if ((ret = ((unsigned char) tolower (*s1) | |
802 | + - (unsigned char) tolower (*s2))) != 0) | |
803 | + break; | |
804 | + ++s1; | |
805 | + ++s2; | |
806 | + } | |
807 | + return ret; | |
808 | +} | |
809 | + | |
810 | +static int | |
811 | +check_result (impl_t *impl, const char *s1, const char *s2, size_t n, | |
812 | + int exp_result) | |
813 | +{ | |
814 | + int result = CALL (impl, s1, s2, n); | |
815 | + if ((exp_result == 0 && result != 0) | |
816 | + || (exp_result < 0 && result >= 0) | |
817 | + || (exp_result > 0 && result <= 0)) | |
818 | + { | |
819 | + error (0, 0, "Wrong result in function %s %d %d", impl->name, | |
820 | + result, exp_result); | |
821 | + ret = 1; | |
822 | + return -1; | |
823 | + } | |
824 | + | |
825 | + return 0; | |
826 | +} | |
827 | + | |
828 | +static void | |
829 | +do_one_test (impl_t *impl, const char *s1, const char *s2, size_t n, | |
830 | + int exp_result) | |
831 | +{ | |
832 | + if (check_result (impl, s1, s2, n, exp_result) < 0) | |
833 | + return; | |
834 | + | |
835 | + if (HP_TIMING_AVAIL) | |
836 | + { | |
837 | + hp_timing_t start __attribute ((unused)); | |
838 | + hp_timing_t stop __attribute ((unused)); | |
839 | + hp_timing_t best_time = ~ (hp_timing_t) 0; | |
840 | + size_t i; | |
841 | + | |
842 | + for (i = 0; i < 32; ++i) | |
843 | + { | |
844 | + HP_TIMING_NOW (start); | |
845 | + CALL (impl, s1, s2, n); | |
846 | + HP_TIMING_NOW (stop); | |
847 | + HP_TIMING_BEST (best_time, start, stop); | |
848 | + } | |
849 | + | |
850 | + printf ("\t%zd", (size_t) best_time); | |
851 | + } | |
852 | +} | |
853 | + | |
854 | +static void | |
855 | +do_test (size_t align1, size_t align2, size_t n, size_t len, int max_char, | |
856 | + int exp_result) | |
857 | +{ | |
858 | + size_t i; | |
859 | + char *s1, *s2; | |
860 | + | |
861 | + if (len == 0) | |
862 | + return; | |
863 | + | |
864 | + align1 &= 7; | |
865 | + if (align1 + len + 1 >= page_size) | |
866 | + return; | |
867 | + | |
868 | + align2 &= 7; | |
869 | + if (align2 + len + 1 >= page_size) | |
870 | + return; | |
871 | + | |
872 | + s1 = (char *) (buf1 + align1); | |
873 | + s2 = (char *) (buf2 + align2); | |
874 | + | |
875 | + for (i = 0; i < len; i++) | |
876 | + { | |
877 | + s1[i] = toupper (1 + 23 * i % max_char); | |
878 | + s2[i] = tolower (s1[i]); | |
879 | + } | |
880 | + | |
881 | + s1[len] = s2[len] = 0; | |
882 | + s1[len + 1] = 23; | |
883 | + s2[len + 1] = 24 + exp_result; | |
884 | + if ((s2[len - 1] == 'z' && exp_result == -1) | |
885 | + || (s2[len - 1] == 'a' && exp_result == 1)) | |
886 | + s1[len - 1] += exp_result; | |
887 | + else | |
888 | + s2[len - 1] -= exp_result; | |
889 | + | |
890 | + if (HP_TIMING_AVAIL) | |
891 | + printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2); | |
892 | + | |
893 | + FOR_EACH_IMPL (impl, 0) | |
894 | + do_one_test (impl, s1, s2, n, exp_result); | |
895 | + | |
896 | + if (HP_TIMING_AVAIL) | |
897 | + putchar ('\n'); | |
898 | +} | |
899 | + | |
900 | +static void | |
901 | +do_random_tests (void) | |
902 | +{ | |
903 | + size_t i, j, n, align1, align2, pos, len1, len2; | |
904 | + int result; | |
905 | + long r; | |
906 | + unsigned char *p1 = buf1 + page_size - 512; | |
907 | + unsigned char *p2 = buf2 + page_size - 512; | |
908 | + | |
909 | + for (n = 0; n < ITERATIONS; n++) | |
910 | + { | |
911 | + align1 = random () & 31; | |
912 | + if (random () & 1) | |
913 | + align2 = random () & 31; | |
914 | + else | |
915 | + align2 = align1 + (random () & 24); | |
916 | + pos = random () & 511; | |
917 | + j = align1 > align2 ? align1 : align2; | |
918 | + if (pos + j >= 511) | |
919 | + pos = 510 - j - (random () & 7); | |
920 | + len1 = random () & 511; | |
921 | + if (pos >= len1 && (random () & 1)) | |
922 | + len1 = pos + (random () & 7); | |
923 | + if (len1 + j >= 512) | |
924 | + len1 = 511 - j - (random () & 7); | |
925 | + if (pos >= len1) | |
926 | + len2 = len1; | |
927 | + else | |
928 | + len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0); | |
929 | + j = (pos > len2 ? pos : len2) + align1 + 64; | |
930 | + if (j > 512) | |
931 | + j = 512; | |
932 | + for (i = 0; i < j; ++i) | |
933 | + { | |
934 | + p1[i] = tolower (random () & 255); | |
935 | + if (i < len1 + align1 && !p1[i]) | |
936 | + { | |
937 | + p1[i] = tolower (random () & 255); | |
938 | + if (!p1[i]) | |
939 | + p1[i] = tolower (1 + (random () & 127)); | |
940 | + } | |
941 | + } | |
942 | + for (i = 0; i < j; ++i) | |
943 | + { | |
944 | + p2[i] = toupper (random () & 255); | |
945 | + if (i < len2 + align2 && !p2[i]) | |
946 | + { | |
947 | + p2[i] = toupper (random () & 255); | |
948 | + if (!p2[i]) | |
949 | + toupper (p2[i] = 1 + (random () & 127)); | |
950 | + } | |
951 | + } | |
952 | + | |
953 | + result = 0; | |
954 | + memcpy (p2 + align2, p1 + align1, pos); | |
955 | + if (pos < len1) | |
956 | + { | |
957 | + if (tolower (p2[align2 + pos]) == p1[align1 + pos]) | |
958 | + { | |
959 | + p2[align2 + pos] = toupper (random () & 255); | |
960 | + if (tolower (p2[align2 + pos]) == p1[align1 + pos]) | |
961 | + p2[align2 + pos] = toupper (p1[align1 + pos] | |
962 | + + 3 + (random () & 127)); | |
963 | + } | |
964 | + | |
965 | + if (p1[align1 + pos] < tolower (p2[align2 + pos])) | |
966 | + result = -1; | |
967 | + else | |
968 | + result = 1; | |
969 | + } | |
970 | + p1[len1 + align1] = 0; | |
971 | + p2[len2 + align2] = 0; | |
972 | + | |
973 | + FOR_EACH_IMPL (impl, 1) | |
974 | + { | |
975 | + r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2), | |
976 | + pos + 1 + (random () & 255)); | |
977 | + /* Test whether on 64-bit architectures where ABI requires | |
978 | + callee to promote has the promotion been done. */ | |
979 | + asm ("" : "=g" (r) : "0" (r)); | |
980 | + if ((r == 0 && result) | |
981 | + || (r < 0 && result >= 0) | |
982 | + || (r > 0 && result <= 0)) | |
983 | + { | |
984 | + error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p", | |
985 | + n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2); | |
986 | + ret = 1; | |
987 | + } | |
988 | + } | |
989 | + } | |
990 | +} | |
991 | + | |
992 | + | |
993 | +static void | |
994 | +check1 (void) | |
995 | +{ | |
996 | + static char cp [4096+16] __attribute__ ((aligned(4096))); | |
997 | + static char gotrel[4096] __attribute__ ((aligned(4096))); | |
998 | + char *s1 = cp + 0xffa; | |
999 | + char *s2 = gotrel + 0xcbe; | |
1000 | + int exp_result; | |
1001 | + size_t n = 6; | |
1002 | + | |
1003 | + strcpy (s1, "gottpoff"); | |
1004 | + strcpy (s2, "GOTPLT"); | |
1005 | + | |
1006 | + exp_result = simple_strncasecmp (s1, s2, n); | |
1007 | + FOR_EACH_IMPL (impl, 0) | |
1008 | + check_result (impl, s1, s2, n, exp_result); | |
1009 | +} | |
1010 | + | |
1011 | +int | |
1012 | +test_main (void) | |
1013 | +{ | |
1014 | + size_t i; | |
1015 | + | |
1016 | + test_init (); | |
1017 | + | |
1018 | + check1 (); | |
1019 | + | |
1020 | + printf ("%23s", ""); | |
1021 | + FOR_EACH_IMPL (impl, 0) | |
1022 | + printf ("\t%s", impl->name); | |
1023 | + putchar ('\n'); | |
1024 | + | |
1025 | + for (i = 1; i < 16; ++i) | |
1026 | + { | |
1027 | + do_test (i, i, i - 1, i, 127, 0); | |
1028 | + | |
1029 | + do_test (i, i, i, i, 127, 0); | |
1030 | + do_test (i, i, i, i, 127, 1); | |
1031 | + do_test (i, i, i, i, 127, -1); | |
1032 | + | |
1033 | + do_test (i, i, i + 1, i, 127, 0); | |
1034 | + do_test (i, i, i + 1, i, 127, 1); | |
1035 | + do_test (i, i, i + 1, i, 127, -1); | |
1036 | + } | |
1037 | + | |
1038 | + for (i = 1; i < 10; ++i) | |
1039 | + { | |
1040 | + do_test (0, 0, (2 << i) - 1, 2 << i, 127, 0); | |
1041 | + do_test (0, 0, 2 << i, 2 << i, 254, 0); | |
1042 | + do_test (0, 0, (2 << i) + 1, 2 << i, 127, 0); | |
1043 | + | |
1044 | + do_test (0, 0, (2 << i) + 1, 2 << i, 254, 0); | |
1045 | + | |
1046 | + do_test (0, 0, 2 << i, 2 << i, 127, 1); | |
1047 | + do_test (0, 0, (2 << i) + 10, 2 << i, 127, 1); | |
1048 | + | |
1049 | + do_test (0, 0, 2 << i, 2 << i, 254, 1); | |
1050 | + do_test (0, 0, (2 << i) + 10, 2 << i, 254, 1); | |
1051 | + | |
1052 | + do_test (0, 0, 2 << i, 2 << i, 127, -1); | |
1053 | + do_test (0, 0, (2 << i) + 10, 2 << i, 127, -1); | |
1054 | + | |
1055 | + do_test (0, 0, 2 << i, 2 << i, 254, -1); | |
1056 | + do_test (0, 0, (2 << i) + 10, 2 << i, 254, -1); | |
1057 | + } | |
1058 | + | |
1059 | + for (i = 1; i < 8; ++i) | |
1060 | + { | |
1061 | + do_test (i, 2 * i, (8 << i) - 1, 8 << i, 127, 0); | |
1062 | + do_test (i, 2 * i, 8 << i, 8 << i, 127, 0); | |
1063 | + do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, 0); | |
1064 | + | |
1065 | + do_test (2 * i, i, (8 << i) - 1, 8 << i, 254, 0); | |
1066 | + do_test (2 * i, i, 8 << i, 8 << i, 254, 0); | |
1067 | + do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, 0); | |
1068 | + | |
1069 | + do_test (i, 2 * i, 8 << i, 8 << i, 127, 1); | |
1070 | + do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, 1); | |
1071 | + | |
1072 | + do_test (2 * i, i, 8 << i, 8 << i, 254, 1); | |
1073 | + do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, 1); | |
1074 | + | |
1075 | + do_test (i, 2 * i, 8 << i, 8 << i, 127, -1); | |
1076 | + do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, -1); | |
1077 | + | |
1078 | + do_test (2 * i, i, 8 << i, 8 << i, 254, -1); | |
1079 | + do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, -1); | |
1080 | + } | |
1081 | + | |
1082 | + do_random_tests (); | |
1083 | + return ret; | |
1084 | +} | |
1085 | + | |
1086 | +#include "../test-skeleton.c" | |
1087 | Index: glibc-2.12-2-gc4ccff1/string/test-strnlen.c | |
1088 | =================================================================== | |
1089 | --- /dev/null | |
1090 | +++ glibc-2.12-2-gc4ccff1/string/test-strnlen.c | |
1091 | @@ -0,0 +1,197 @@ | |
1092 | +/* Test and measure strlen functions. | |
1093 | + Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc. | |
1094 | + This file is part of the GNU C Library. | |
1095 | + Written by Jakub Jelinek <jakub@redhat.com>, 1999. | |
1096 | + | |
1097 | + The GNU C Library is free software; you can redistribute it and/or | |
1098 | + modify it under the terms of the GNU Lesser General Public | |
1099 | + License as published by the Free Software Foundation; either | |
1100 | + version 2.1 of the License, or (at your option) any later version. | |
1101 | + | |
1102 | + The GNU C Library is distributed in the hope that it will be useful, | |
1103 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
1104 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1105 | + Lesser General Public License for more details. | |
1106 | + | |
1107 | + You should have received a copy of the GNU Lesser General Public | |
1108 | + License along with the GNU C Library; if not, write to the Free | |
1109 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
1110 | + 02111-1307 USA. */ | |
1111 | + | |
1112 | +#define TEST_MAIN | |
1113 | +#include "test-string.h" | |
1114 | + | |
1115 | +typedef size_t (*proto_t) (const char *, size_t); | |
1116 | +size_t simple_strnlen (const char *, size_t); | |
1117 | + | |
1118 | +IMPL (simple_strnlen, 0) | |
1119 | +IMPL (strnlen, 1) | |
1120 | + | |
1121 | +size_t | |
1122 | +simple_strnlen (const char *s, size_t maxlen) | |
1123 | +{ | |
1124 | + size_t i; | |
1125 | + | |
1126 | + for (i = 0; i < maxlen && s[i]; ++i); | |
1127 | + return i; | |
1128 | +} | |
1129 | + | |
1130 | +static void | |
1131 | +do_one_test (impl_t *impl, const char *s, size_t maxlen, size_t exp_len) | |
1132 | +{ | |
1133 | + size_t len = CALL (impl, s, maxlen); | |
1134 | + if (len != exp_len) | |
1135 | + { | |
1136 | + error (0, 0, "Wrong result in function %s %zd %zd", impl->name, | |
1137 | + len, exp_len); | |
1138 | + ret = 1; | |
1139 | + return; | |
1140 | + } | |
1141 | + | |
1142 | + if (HP_TIMING_AVAIL) | |
1143 | + { | |
1144 | + hp_timing_t start __attribute ((unused)); | |
1145 | + hp_timing_t stop __attribute ((unused)); | |
1146 | + hp_timing_t best_time = ~ (hp_timing_t) 0; | |
1147 | + size_t i; | |
1148 | + | |
1149 | + for (i = 0; i < 32; ++i) | |
1150 | + { | |
1151 | + HP_TIMING_NOW (start); | |
1152 | + CALL (impl, s, maxlen); | |
1153 | + HP_TIMING_NOW (stop); | |
1154 | + HP_TIMING_BEST (best_time, start, stop); | |
1155 | + } | |
1156 | + | |
1157 | + printf ("\t%zd", (size_t) best_time); | |
1158 | + } | |
1159 | +} | |
1160 | + | |
1161 | +static void | |
1162 | +do_test (size_t align, size_t len, size_t maxlen, int max_char) | |
1163 | +{ | |
1164 | + size_t i; | |
1165 | + | |
1166 | + align &= 7; | |
1167 | + if (align + len >= page_size) | |
1168 | + return; | |
1169 | + | |
1170 | + for (i = 0; i < len; ++i) | |
1171 | + buf1[align + i] = 1 + 7 * i % max_char; | |
1172 | + buf1[align + len] = 0; | |
1173 | + | |
1174 | + if (HP_TIMING_AVAIL) | |
1175 | + printf ("Length %4zd, alignment %2zd:", len, align); | |
1176 | + | |
1177 | + FOR_EACH_IMPL (impl, 0) | |
1178 | + do_one_test (impl, (char *) (buf1 + align), maxlen, MIN (len, maxlen)); | |
1179 | + | |
1180 | + if (HP_TIMING_AVAIL) | |
1181 | + putchar ('\n'); | |
1182 | +} | |
1183 | + | |
1184 | +static void | |
1185 | +do_random_tests (void) | |
1186 | +{ | |
1187 | + size_t i, j, n, align, len; | |
1188 | + unsigned char *p = buf1 + page_size - 512; | |
1189 | + | |
1190 | + for (n = 0; n < ITERATIONS; n++) | |
1191 | + { | |
1192 | + align = random () & 15; | |
1193 | + len = random () & 511; | |
1194 | + if (len + align > 510) | |
1195 | + len = 511 - align - (random () & 7); | |
1196 | + j = len + align + 64; | |
1197 | + if (j > 512) | |
1198 | + j = 512; | |
1199 | + | |
1200 | + for (i = 0; i < j; i++) | |
1201 | + { | |
1202 | + if (i == len + align) | |
1203 | + p[i] = 0; | |
1204 | + else | |
1205 | + { | |
1206 | + p[i] = random () & 255; | |
1207 | + if (i >= align && i < len + align && !p[i]) | |
1208 | + p[i] = (random () & 127) + 1; | |
1209 | + } | |
1210 | + } | |
1211 | + | |
1212 | + FOR_EACH_IMPL (impl, 1) | |
1213 | + { | |
1214 | + if (len > 0 | |
1215 | + && CALL (impl, (char *) (p + align), len - 1) != len - 1) | |
1216 | + { | |
1217 | + error (0, 0, "Iteration %zd (limited) - wrong result in function %s (%zd) %zd != %zd, p %p", | |
1218 | + n, impl->name, align, | |
1219 | + CALL (impl, (char *) (p + align), len - 1), len - 1, p); | |
1220 | + ret = 1; | |
1221 | + } | |
1222 | + if (CALL (impl, (char *) (p + align), len) != len) | |
1223 | + { | |
1224 | + error (0, 0, "Iteration %zd (exact) - wrong result in function %s (%zd) %zd != %zd, p %p", | |
1225 | + n, impl->name, align, | |
1226 | + CALL (impl, (char *) (p + align), len), len, p); | |
1227 | + ret = 1; | |
1228 | + } | |
1229 | + if (CALL (impl, (char *) (p + align), len + 1) != len) | |
1230 | + { | |
1231 | + error (0, 0, "Iteration %zd (long) - wrong result in function %s (%zd) %zd != %zd, p %p", | |
1232 | + n, impl->name, align, | |
1233 | + CALL (impl, (char *) (p + align), len + 1), len, p); | |
1234 | + ret = 1; | |
1235 | + } | |
1236 | + } | |
1237 | + } | |
1238 | +} | |
1239 | + | |
1240 | +int | |
1241 | +test_main (void) | |
1242 | +{ | |
1243 | + size_t i; | |
1244 | + | |
1245 | + test_init (); | |
1246 | + | |
1247 | + printf ("%20s", ""); | |
1248 | + FOR_EACH_IMPL (impl, 0) | |
1249 | + printf ("\t%s", impl->name); | |
1250 | + putchar ('\n'); | |
1251 | + | |
1252 | + for (i = 1; i < 8; ++i) | |
1253 | + { | |
1254 | + do_test (0, i, i - 1, 127); | |
1255 | + do_test (0, i, i, 127); | |
1256 | + do_test (0, i, i + 1, 127); | |
1257 | + } | |
1258 | + | |
1259 | + for (i = 1; i < 8; ++i) | |
1260 | + { | |
1261 | + do_test (i, i, i - 1, 127); | |
1262 | + do_test (i, i, i, 127); | |
1263 | + do_test (i, i, i + 1, 127); | |
1264 | + } | |
1265 | + | |
1266 | + for (i = 2; i <= 10; ++i) | |
1267 | + { | |
1268 | + do_test (0, 1 << i, 5000, 127); | |
1269 | + do_test (1, 1 << i, 5000, 127); | |
1270 | + } | |
1271 | + | |
1272 | + for (i = 1; i < 8; ++i) | |
1273 | + do_test (0, i, 5000, 255); | |
1274 | + | |
1275 | + for (i = 1; i < 8; ++i) | |
1276 | + do_test (i, i, 5000, 255); | |
1277 | + | |
1278 | + for (i = 2; i <= 10; ++i) | |
1279 | + { | |
1280 | + do_test (0, 1 << i, 5000, 255); | |
1281 | + do_test (1, 1 << i, 5000, 255); | |
1282 | + } | |
1283 | + | |
1284 | + do_random_tests (); | |
1285 | + return ret; | |
1286 | +} | |
1287 | + | |
1288 | +#include "../test-skeleton.c" | |
1289 | Index: glibc-2.12-2-gc4ccff1/string/test-strstr.c | |
1290 | =================================================================== | |
1291 | --- /dev/null | |
1292 | +++ glibc-2.12-2-gc4ccff1/string/test-strstr.c | |
1293 | @@ -0,0 +1,194 @@ | |
1294 | +/* Test and measure strstr functions. | |
1295 | + Copyright (C) 2010 Free Software Foundation, Inc. | |
1296 | + This file is part of the GNU C Library. | |
1297 | + Written by Ulrich Drepper <drepper@redhat.com>, 2010. | |
1298 | + | |
1299 | + The GNU C Library is free software; you can redistribute it and/or | |
1300 | + modify it under the terms of the GNU Lesser General Public | |
1301 | + License as published by the Free Software Foundation; either | |
1302 | + version 2.1 of the License, or (at your option) any later version. | |
1303 | + | |
1304 | + The GNU C Library is distributed in the hope that it will be useful, | |
1305 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
1306 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1307 | + Lesser General Public License for more details. | |
1308 | + | |
1309 | + You should have received a copy of the GNU Lesser General Public | |
1310 | + License along with the GNU C Library; if not, write to the Free | |
1311 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
1312 | + 02111-1307 USA. */ | |
1313 | + | |
1314 | +#define TEST_MAIN | |
1315 | +#include "test-string.h" | |
1316 | + | |
1317 | + | |
1318 | +#define STRSTR simple_strstr | |
1319 | +#include "strstr.c" | |
1320 | + | |
1321 | + | |
1322 | +static char * | |
1323 | +stupid_strstr (const char *s1, const char *s2) | |
1324 | +{ | |
1325 | + ssize_t s1len = strlen (s1); | |
1326 | + ssize_t s2len = strlen (s2); | |
1327 | + | |
1328 | + if (s2len > s1len) | |
1329 | + return NULL; | |
1330 | + | |
1331 | + for (ssize_t i = 0; i <= s1len - s2len; ++i) | |
1332 | + { | |
1333 | + size_t j; | |
1334 | + for (j = 0; j < s2len; ++j) | |
1335 | + if (s1[i + j] != s2[j]) | |
1336 | + break; | |
1337 | + if (j == s2len) | |
1338 | + return (char *) s1 + i; | |
1339 | + } | |
1340 | + | |
1341 | + return NULL; | |
1342 | +} | |
1343 | + | |
1344 | + | |
1345 | +typedef char *(*proto_t) (const char *, const char *); | |
1346 | + | |
1347 | +IMPL (stupid_strstr, 0) | |
1348 | +IMPL (simple_strstr, 0) | |
1349 | +IMPL (strstr, 1) | |
1350 | + | |
1351 | + | |
1352 | +static void | |
1353 | +do_one_test (impl_t *impl, const char *s1, const char *s2, char *exp_result) | |
1354 | +{ | |
1355 | + char *result = CALL (impl, s1, s2); | |
1356 | + if (result != exp_result) | |
1357 | + { | |
1358 | + error (0, 0, "Wrong result in function %s %s %s", impl->name, | |
1359 | + result, exp_result); | |
1360 | + ret = 1; | |
1361 | + return; | |
1362 | + } | |
1363 | + | |
1364 | + if (HP_TIMING_AVAIL) | |
1365 | + { | |
1366 | + hp_timing_t start __attribute ((unused)); | |
1367 | + hp_timing_t stop __attribute ((unused)); | |
1368 | + hp_timing_t best_time = ~(hp_timing_t) 0; | |
1369 | + size_t i; | |
1370 | + | |
1371 | + for (i = 0; i < 32; ++i) | |
1372 | + { | |
1373 | + HP_TIMING_NOW (start); | |
1374 | + CALL (impl, s1, s2); | |
1375 | + HP_TIMING_NOW (stop); | |
1376 | + HP_TIMING_BEST (best_time, start, stop); | |
1377 | + } | |
1378 | + | |
1379 | + printf ("\t%zd", (size_t) best_time); | |
1380 | + } | |
1381 | +} | |
1382 | + | |
1383 | + | |
1384 | +static void | |
1385 | +do_test (size_t align1, size_t align2, size_t len1, size_t len2, | |
1386 | + int fail) | |
1387 | +{ | |
1388 | + char *s1 = (char *) (buf1 + align1); | |
1389 | + char *s2 = (char *) (buf2 + align2); | |
1390 | + | |
1391 | + static const char d[] = "1234567890abcdef"; | |
1392 | +#define dl (sizeof (d) - 1) | |
1393 | + char *ss2 = s2; | |
1394 | + for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0) | |
1395 | + { | |
1396 | + size_t t = l > dl ? dl : l; | |
1397 | + ss2 = mempcpy (ss2, d, t); | |
1398 | + } | |
1399 | + s2[len2] = '\0'; | |
1400 | + | |
1401 | + if (fail) | |
1402 | + { | |
1403 | + char *ss1 = s1; | |
1404 | + for (size_t l = len1; l > 0; l = l > dl ? l - dl : 0) | |
1405 | + { | |
1406 | + size_t t = l > dl ? dl : l; | |
1407 | + memcpy (ss1, d, t); | |
1408 | + ++ss1[len2 > 7 ? 7 : len2 - 1]; | |
1409 | + ss1 += t; | |
1410 | + } | |
1411 | + } | |
1412 | + else | |
1413 | + { | |
1414 | + memset (s1, '0', len1); | |
1415 | + memcpy (s1 + len1 - len2, s2, len2); | |
1416 | + } | |
1417 | + s1[len1] = '\0'; | |
1418 | + | |
1419 | + if (HP_TIMING_AVAIL) | |
1420 | + printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:", | |
1421 | + len1, len2, align1, align2, fail ? "fail" : "found"); | |
1422 | + | |
1423 | + FOR_EACH_IMPL (impl, 0) | |
1424 | + do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2); | |
1425 | + | |
1426 | + if (HP_TIMING_AVAIL) | |
1427 | + putchar ('\n'); | |
1428 | +} | |
1429 | + | |
1430 | + | |
1431 | +static int | |
1432 | +test_main (void) | |
1433 | +{ | |
1434 | + test_init (); | |
1435 | + | |
1436 | + printf ("%23s", ""); | |
1437 | + FOR_EACH_IMPL (impl, 0) | |
1438 | + printf ("\t%s", impl->name); | |
1439 | + putchar ('\n'); | |
1440 | + | |
1441 | + for (size_t klen = 2; klen < 32; ++klen) | |
1442 | + for (size_t hlen = 2 * klen; hlen < 16 * klen; hlen += klen) | |
1443 | + { | |
1444 | + do_test (0, 0, hlen, klen, 0); | |
1445 | + do_test (0, 0, hlen, klen, 1); | |
1446 | + do_test (0, 3, hlen, klen, 0); | |
1447 | + do_test (0, 3, hlen, klen, 1); | |
1448 | + do_test (0, 9, hlen, klen, 0); | |
1449 | + do_test (0, 9, hlen, klen, 1); | |
1450 | + do_test (0, 15, hlen, klen, 0); | |
1451 | + do_test (0, 15, hlen, klen, 1); | |
1452 | + | |
1453 | + do_test (3, 0, hlen, klen, 0); | |
1454 | + do_test (3, 0, hlen, klen, 1); | |
1455 | + do_test (3, 3, hlen, klen, 0); | |
1456 | + do_test (3, 3, hlen, klen, 1); | |
1457 | + do_test (3, 9, hlen, klen, 0); | |
1458 | + do_test (3, 9, hlen, klen, 1); | |
1459 | + do_test (3, 15, hlen, klen, 0); | |
1460 | + do_test (3, 15, hlen, klen, 1); | |
1461 | + | |
1462 | + do_test (9, 0, hlen, klen, 0); | |
1463 | + do_test (9, 0, hlen, klen, 1); | |
1464 | + do_test (9, 3, hlen, klen, 0); | |
1465 | + do_test (9, 3, hlen, klen, 1); | |
1466 | + do_test (9, 9, hlen, klen, 0); | |
1467 | + do_test (9, 9, hlen, klen, 1); | |
1468 | + do_test (9, 15, hlen, klen, 0); | |
1469 | + do_test (9, 15, hlen, klen, 1); | |
1470 | + | |
1471 | + do_test (15, 0, hlen, klen, 0); | |
1472 | + do_test (15, 0, hlen, klen, 1); | |
1473 | + do_test (15, 3, hlen, klen, 0); | |
1474 | + do_test (15, 3, hlen, klen, 1); | |
1475 | + do_test (15, 9, hlen, klen, 0); | |
1476 | + do_test (15, 9, hlen, klen, 1); | |
1477 | + do_test (15, 15, hlen, klen, 0); | |
1478 | + do_test (15, 15, hlen, klen, 1); | |
1479 | + } | |
1480 | + | |
1481 | + do_test (0, 0, page_size - 1, 16, 0); | |
1482 | + do_test (0, 0, page_size - 1, 16, 1); | |
1483 | + | |
1484 | + return ret; | |
1485 | +} | |
1486 | + | |
1487 | +#include "../test-skeleton.c" | |
1488 | Index: glibc-2.12-2-gc4ccff1/string/tester.c | |
1489 | =================================================================== | |
1490 | --- glibc-2.12-2-gc4ccff1.orig/string/tester.c | |
1491 | +++ glibc-2.12-2-gc4ccff1/string/tester.c | |
1492 | @@ -441,20 +441,21 @@ test_strnlen (void) | |
1493 | check (strnlen ("", 10) == 0, 1); /* Empty. */ | |
1494 | check (strnlen ("a", 10) == 1, 2); /* Single char. */ | |
1495 | check (strnlen ("abcd", 10) == 4, 3); /* Multiple chars. */ | |
1496 | - check (strnlen ("foo", (size_t)-1) == 3, 4); /* limits of n. */ | |
1497 | + check (strnlen ("foo", (size_t) -1) == 3, 4); /* limits of n. */ | |
1498 | + check (strnlen ("abcd", 0) == 0, 5); /* Restricted. */ | |
1499 | + check (strnlen ("abcd", 1) == 1, 6); /* Restricted. */ | |
1500 | + check (strnlen ("abcd", 2) == 2, 7); /* Restricted. */ | |
1501 | + check (strnlen ("abcd", 3) == 3, 8); /* Restricted. */ | |
1502 | + check (strnlen ("abcd", 4) == 4, 9); /* Restricted. */ | |
1503 | ||
1504 | - { | |
1505 | - char buf[4096]; | |
1506 | - int i; | |
1507 | - char *p; | |
1508 | - for (i=0; i < 0x100; i++) | |
1509 | - { | |
1510 | - p = (char *) ((unsigned long int)(buf + 0xff) & ~0xff) + i; | |
1511 | - strcpy (p, "OK"); | |
1512 | - strcpy (p+3, "BAD/WRONG"); | |
1513 | - check (strnlen (p, 100) == 2, 5+i); | |
1514 | - } | |
1515 | - } | |
1516 | + char buf[4096]; | |
1517 | + for (int i = 0; i < 0x100; ++i) | |
1518 | + { | |
1519 | + char *p = (char *) ((unsigned long int)(buf + 0xff) & ~0xff) + i; | |
1520 | + strcpy (p, "OK"); | |
1521 | + strcpy (p + 3, "BAD/WRONG"); | |
1522 | + check (strnlen (p, 100) == 2, 10 + i); | |
1523 | + } | |
1524 | } | |
1525 | ||
1526 | static void | |
1527 | Index: glibc-2.12-2-gc4ccff1/string/tst-strlen.c | |
1528 | =================================================================== | |
1529 | --- glibc-2.12-2-gc4ccff1.orig/string/tst-strlen.c | |
1530 | +++ glibc-2.12-2-gc4ccff1/string/tst-strlen.c | |
1531 | @@ -31,11 +31,21 @@ main(int argc, char *argv[]) | |
1532 | buf[words * 4 + 3] = (last & 8) != 0 ? 'e' : '\0'; | |
1533 | buf[words * 4 + 4] = '\0'; | |
1534 | ||
1535 | - if (strlen (buf) != words * 4 + lens[last] | |
1536 | - || strnlen (buf, -1) != words * 4 + lens[last]) | |
1537 | + if (strlen (buf) != words * 4 + lens[last]) | |
1538 | { | |
1539 | - printf ("failed for base=%Zu, words=%Zu, and last=%Zu\n", | |
1540 | - base, words, last); | |
1541 | + printf ("\ | |
1542 | +strlen failed for base=%Zu, words=%Zu, and last=%Zu (is %zd, expected %zd)\n", | |
1543 | + base, words, last, | |
1544 | + strlen (buf), words * 4 + lens[last]); | |
1545 | + return 1; | |
1546 | + } | |
1547 | + | |
1548 | + if (strnlen (buf, -1) != words * 4 + lens[last]) | |
1549 | + { | |
1550 | + printf ("\ | |
1551 | +strnlen failed for base=%Zu, words=%Zu, and last=%Zu (is %zd, expected %zd)\n", | |
1552 | + base, words, last, | |
1553 | + strnlen (buf, -1), words * 4 + lens[last]); | |
1554 | return 1; | |
1555 | } | |
1556 | } | |
1557 | Index: glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/Makefile | |
1558 | =================================================================== | |
1559 | --- glibc-2.12-2-gc4ccff1.orig/sysdeps/i386/i686/multiarch/Makefile | |
1560 | +++ glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/Makefile | |
1561 | @@ -9,7 +9,7 @@ sysdep_routines += bzero-sse2 memset-sse | |
1562 | memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \ | |
1563 | memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \ | |
1564 | strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \ | |
1565 | - memcmp-ssse3 memcmp-sse4 | |
1566 | + memcmp-ssse3 memcmp-sse4 strcasestr-nonascii | |
1567 | ifeq (yes,$(config-cflags-sse4)) | |
1568 | sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c | |
1569 | CFLAGS-strcspn-c.c += -msse4 | |
1570 | @@ -17,6 +17,7 @@ CFLAGS-strpbrk-c.c += -msse4 | |
1571 | CFLAGS-strspn-c.c += -msse4 | |
1572 | CFLAGS-strstr.c += -msse4 | |
1573 | CFLAGS-strcasestr.c += -msse4 | |
1574 | +CFLAGS-strcasestr-nonascii.c += -msse4 | |
1575 | endif | |
1576 | endif | |
1577 | ||
1578 | Index: glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c | |
1579 | =================================================================== | |
1580 | --- /dev/null | |
1581 | +++ glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c | |
1582 | @@ -0,0 +1,2 @@ | |
1583 | +#include <nmmintrin.h> | |
1584 | +#include <sysdeps/x86_64/multiarch/strcasestr-nonascii.c> | |
1585 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/Makefile | |
1586 | =================================================================== | |
1587 | --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/Makefile | |
1588 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/Makefile | |
1589 | @@ -12,7 +12,8 @@ sysdep_routines += _mcount | |
1590 | endif | |
1591 | ||
1592 | ifeq ($(subdir),string) | |
1593 | -sysdep_routines += cacheinfo | |
1594 | +sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii | |
1595 | +gen-as-const-headers += locale-defines.sym | |
1596 | endif | |
1597 | ||
1598 | ifeq ($(subdir),elf) | |
1599 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/locale-defines.sym | |
1600 | =================================================================== | |
1601 | --- /dev/null | |
1602 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/locale-defines.sym | |
1603 | @@ -0,0 +1,11 @@ | |
1604 | +#include <locale/localeinfo.h> | |
1605 | +#include <langinfo.h> | |
1606 | +#include <stddef.h> | |
1607 | + | |
1608 | +-- | |
1609 | + | |
1610 | +LOCALE_T___LOCALES offsetof (struct __locale_struct, __locales) | |
1611 | +LC_CTYPE | |
1612 | +_NL_CTYPE_NONASCII_CASE | |
1613 | +LOCALE_DATA_VALUES offsetof (struct __locale_data, values) | |
1614 | +SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0]) | |
1615 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/Makefile | |
1616 | =================================================================== | |
1617 | --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/Makefile | |
1618 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/Makefile | |
1619 | @@ -5,7 +5,9 @@ endif | |
1620 | ||
1621 | ifeq ($(subdir),string) | |
1622 | sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \ | |
1623 | - strend-sse4 memcmp-sse4 | |
1624 | + strend-sse4 memcmp-sse4 \ | |
1625 | + strcasestr-nonascii strcasecmp_l-ssse3 \ | |
1626 | + strncase_l-ssse3 | |
1627 | ifeq (yes,$(config-cflags-sse4)) | |
1628 | sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c | |
1629 | CFLAGS-strcspn-c.c += -msse4 | |
1630 | @@ -13,5 +15,6 @@ CFLAGS-strpbrk-c.c += -msse4 | |
1631 | CFLAGS-strspn-c.c += -msse4 | |
1632 | CFLAGS-strstr.c += -msse4 | |
1633 | CFLAGS-strcasestr.c += -msse4 | |
1634 | +CFLAGS-strcasestr-nonascii.c += -msse4 | |
1635 | endif | |
1636 | endif | |
1637 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasecmp_l-ssse3.S | |
1638 | =================================================================== | |
1639 | --- /dev/null | |
1640 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasecmp_l-ssse3.S | |
1641 | @@ -0,0 +1,6 @@ | |
1642 | +#define USE_SSSE3 1 | |
1643 | +#define USE_AS_STRCASECMP_L | |
1644 | +#define NO_NOLOCALE_ALIAS | |
1645 | +#define STRCMP __strcasecmp_l_ssse3 | |
1646 | +#define __strcasecmp __strcasecmp_ssse3 | |
1647 | +#include "../strcmp.S" | |
1648 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasecmp_l.S | |
1649 | =================================================================== | |
1650 | --- /dev/null | |
1651 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasecmp_l.S | |
1652 | @@ -0,0 +1,6 @@ | |
1653 | +#define STRCMP __strcasecmp_l | |
1654 | +#define USE_AS_STRCASECMP_L | |
1655 | +#include "strcmp.S" | |
1656 | + | |
1657 | +weak_alias (__strcasecmp_l, strcasecmp_l) | |
1658 | +libc_hidden_def (strcasecmp_l) | |
1659 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasestr-nonascii.c | |
1660 | =================================================================== | |
1661 | --- /dev/null | |
1662 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasestr-nonascii.c | |
1663 | @@ -0,0 +1,50 @@ | |
1664 | +/* strstr with SSE4.2 intrinsics | |
1665 | + Copyright (C) 2010 Free Software Foundation, Inc. | |
1666 | + This file is part of the GNU C Library. | |
1667 | + | |
1668 | + The GNU C Library is free software; you can redistribute it and/or | |
1669 | + modify it under the terms of the GNU Lesser General Public | |
1670 | + License as published by the Free Software Foundation; either | |
1671 | + version 2.1 of the License, or (at your option) any later version. | |
1672 | + | |
1673 | + The GNU C Library is distributed in the hope that it will be useful, | |
1674 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
1675 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
1676 | + Lesser General Public License for more details. | |
1677 | + | |
1678 | + You should have received a copy of the GNU Lesser General Public | |
1679 | + License along with the GNU C Library; if not, write to the Free | |
1680 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
1681 | + 02111-1307 USA. */ | |
1682 | + | |
1683 | +# include <ctype.h> | |
1684 | + | |
1685 | + | |
1686 | +/* Similar to __m128i_strloadu. Convert to lower case for none-POSIX/C | |
1687 | + locale. */ | |
1688 | +static inline __m128i | |
1689 | +__m128i_strloadu_tolower (const unsigned char *p) | |
1690 | +{ | |
1691 | + union | |
1692 | + { | |
1693 | + char b[16]; | |
1694 | + __m128i x; | |
1695 | + } u; | |
1696 | + | |
1697 | + for (int i = 0; i < 16; ++i) | |
1698 | + if (p[i] == 0) | |
1699 | + { | |
1700 | + u.b[i] = 0; | |
1701 | + break; | |
1702 | + } | |
1703 | + else | |
1704 | + u.b[i] = tolower (p[i]); | |
1705 | + | |
1706 | + return u.x; | |
1707 | +} | |
1708 | + | |
1709 | + | |
1710 | +#define STRCASESTR_NONASCII | |
1711 | +#define USE_AS_STRCASESTR | |
1712 | +#define STRSTR_SSE42 __strcasestr_sse42_nonascii | |
1713 | +#include "strstr.c" | |
1714 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasestr.c | |
1715 | =================================================================== | |
1716 | --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/strcasestr.c | |
1717 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasestr.c | |
1718 | @@ -1,3 +1,7 @@ | |
1719 | +extern char *__strcasestr_sse42_nonascii (const unsigned char *s1, | |
1720 | + const unsigned char *s2) | |
1721 | + attribute_hidden; | |
1722 | + | |
1723 | #define USE_AS_STRCASESTR | |
1724 | #define STRSTR_SSE42 __strcasestr_sse42 | |
1725 | #include "strstr.c" | |
1726 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcmp.S | |
1727 | =================================================================== | |
1728 | --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/strcmp.S | |
1729 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcmp.S | |
1730 | @@ -24,7 +24,7 @@ | |
1731 | #ifdef USE_AS_STRNCMP | |
1732 | /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz | |
1733 | if the new counter > the old one or is 0. */ | |
1734 | -#define UPDATE_STRNCMP_COUNTER \ | |
1735 | +# define UPDATE_STRNCMP_COUNTER \ | |
1736 | /* calculate left number to compare */ \ | |
1737 | lea -16(%rcx, %r11), %r9; \ | |
1738 | cmp %r9, %r11; \ | |
1739 | @@ -33,23 +33,50 @@ | |
1740 | je LABEL(strcmp_exitz_sse4_2); \ | |
1741 | mov %r9, %r11 | |
1742 | ||
1743 | -#define STRCMP_SSE42 __strncmp_sse42 | |
1744 | -#define STRCMP_SSSE3 __strncmp_ssse3 | |
1745 | -#define STRCMP_SSE2 __strncmp_sse2 | |
1746 | -#define __GI_STRCMP __GI_strncmp | |
1747 | +# define STRCMP_SSE42 __strncmp_sse42 | |
1748 | +# define STRCMP_SSSE3 __strncmp_ssse3 | |
1749 | +# define STRCMP_SSE2 __strncmp_sse2 | |
1750 | +# define __GI_STRCMP __GI_strncmp | |
1751 | +#elif defined USE_AS_STRCASECMP_L | |
1752 | +# include "locale-defines.h" | |
1753 | + | |
1754 | +# define UPDATE_STRNCMP_COUNTER | |
1755 | + | |
1756 | +# define STRCMP_SSE42 __strcasecmp_l_sse42 | |
1757 | +# define STRCMP_SSSE3 __strcasecmp_l_ssse3 | |
1758 | +# define STRCMP_SSE2 __strcasecmp_l_sse2 | |
1759 | +# define __GI_STRCMP __GI___strcasecmp_l | |
1760 | +#elif defined USE_AS_STRNCASECMP_L | |
1761 | +# include "locale-defines.h" | |
1762 | + | |
1763 | +/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz | |
1764 | + if the new counter > the old one or is 0. */ | |
1765 | +# define UPDATE_STRNCMP_COUNTER \ | |
1766 | + /* calculate left number to compare */ \ | |
1767 | + lea -16(%rcx, %r11), %r9; \ | |
1768 | + cmp %r9, %r11; \ | |
1769 | + jb LABEL(strcmp_exitz_sse4_2); \ | |
1770 | + test %r9, %r9; \ | |
1771 | + je LABEL(strcmp_exitz_sse4_2); \ | |
1772 | + mov %r9, %r11 | |
1773 | + | |
1774 | +# define STRCMP_SSE42 __strncasecmp_l_sse42 | |
1775 | +# define STRCMP_SSSE3 __strncasecmp_l_ssse3 | |
1776 | +# define STRCMP_SSE2 __strncasecmp_l_sse2 | |
1777 | +# define __GI_STRCMP __GI___strncasecmp_l | |
1778 | #else | |
1779 | -#define UPDATE_STRNCMP_COUNTER | |
1780 | -#ifndef STRCMP | |
1781 | -#define STRCMP strcmp | |
1782 | -#define STRCMP_SSE42 __strcmp_sse42 | |
1783 | -#define STRCMP_SSSE3 __strcmp_ssse3 | |
1784 | -#define STRCMP_SSE2 __strcmp_sse2 | |
1785 | -#define __GI_STRCMP __GI_strcmp | |
1786 | -#endif | |
1787 | +# define UPDATE_STRNCMP_COUNTER | |
1788 | +# ifndef STRCMP | |
1789 | +# define STRCMP strcmp | |
1790 | +# define STRCMP_SSE42 __strcmp_sse42 | |
1791 | +# define STRCMP_SSSE3 __strcmp_ssse3 | |
1792 | +# define STRCMP_SSE2 __strcmp_sse2 | |
1793 | +# define __GI_STRCMP __GI_strcmp | |
1794 | +# endif | |
1795 | #endif | |
1796 | ||
1797 | #ifndef LABEL | |
1798 | -#define LABEL(l) L(l) | |
1799 | +# define LABEL(l) L(l) | |
1800 | #endif | |
1801 | ||
1802 | /* Define multiple versions only for the definition in libc. Don't | |
1803 | @@ -73,6 +100,43 @@ ENTRY(STRCMP) | |
1804 | 2: ret | |
1805 | END(STRCMP) | |
1806 | ||
1807 | +# ifdef USE_AS_STRCASECMP_L | |
1808 | +ENTRY(__strcasecmp) | |
1809 | + .type __strcasecmp, @gnu_indirect_function | |
1810 | + cmpl $0, __cpu_features+KIND_OFFSET(%rip) | |
1811 | + jne 1f | |
1812 | + call __init_cpu_features | |
1813 | +1: | |
1814 | + leaq __strcasecmp_sse42(%rip), %rax | |
1815 | + testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) | |
1816 | + jnz 2f | |
1817 | + leaq __strcasecmp_ssse3(%rip), %rax | |
1818 | + testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) | |
1819 | + jnz 2f | |
1820 | + leaq __strcasecmp_sse2(%rip), %rax | |
1821 | +2: ret | |
1822 | +END(__strcasecmp) | |
1823 | +weak_alias (__strcasecmp, strcasecmp) | |
1824 | +# endif | |
1825 | +# ifdef USE_AS_STRNCASECMP_L | |
1826 | +ENTRY(__strncasecmp) | |
1827 | + .type __strncasecmp, @gnu_indirect_function | |
1828 | + cmpl $0, __cpu_features+KIND_OFFSET(%rip) | |
1829 | + jne 1f | |
1830 | + call __init_cpu_features | |
1831 | +1: | |
1832 | + leaq __strncasecmp_sse42(%rip), %rax | |
1833 | + testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) | |
1834 | + jnz 2f | |
1835 | + leaq __strncasecmp_ssse3(%rip), %rax | |
1836 | + testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip) | |
1837 | + jnz 2f | |
1838 | + leaq __strncasecmp_sse2(%rip), %rax | |
1839 | +2: ret | |
1840 | +END(__strncasecmp) | |
1841 | +weak_alias (__strncasecmp, strncasecmp) | |
1842 | +# endif | |
1843 | + | |
1844 | /* We use 0x1a: | |
1845 | _SIDD_SBYTE_OPS | |
1846 | | _SIDD_CMP_EQUAL_EACH | |
1847 | @@ -101,8 +165,31 @@ END(STRCMP) | |
1848 | ||
1849 | /* Put all SSE 4.2 functions together. */ | |
1850 | .section .text.sse4.2,"ax",@progbits | |
1851 | - .align 16 | |
1852 | + .align 16 | |
1853 | .type STRCMP_SSE42, @function | |
1854 | +# ifdef USE_AS_STRCASECMP_L | |
1855 | +ENTRY (__strcasecmp_sse42) | |
1856 | + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax | |
1857 | + movq %fs:(%rax),%rdx | |
1858 | + | |
1859 | + // XXX 5 byte should be before the function | |
1860 | + /* 5-byte NOP. */ | |
1861 | + .byte 0x0f,0x1f,0x44,0x00,0x00 | |
1862 | +END (__strcasecmp_sse42) | |
1863 | + /* FALLTHROUGH to strcasecmp_l. */ | |
1864 | +# endif | |
1865 | +# ifdef USE_AS_STRNCASECMP_L | |
1866 | +ENTRY (__strncasecmp_sse42) | |
1867 | + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax | |
1868 | + movq %fs:(%rax),%rcx | |
1869 | + | |
1870 | + // XXX 5 byte should be before the function | |
1871 | + /* 5-byte NOP. */ | |
1872 | + .byte 0x0f,0x1f,0x44,0x00,0x00 | |
1873 | +END (__strncasecmp_sse42) | |
1874 | + /* FALLTHROUGH to strncasecmp_l. */ | |
1875 | +# endif | |
1876 | + | |
1877 | STRCMP_SSE42: | |
1878 | cfi_startproc | |
1879 | CALL_MCOUNT | |
1880 | @@ -110,24 +197,87 @@ STRCMP_SSE42: | |
1881 | /* | |
1882 | * This implementation uses SSE to compare up to 16 bytes at a time. | |
1883 | */ | |
1884 | -#ifdef USE_AS_STRNCMP | |
1885 | +# ifdef USE_AS_STRCASECMP_L | |
1886 | + /* We have to fall back on the C implementation for locales | |
1887 | + with encodings not matching ASCII for single bytes. */ | |
1888 | +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 | |
1889 | + movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax | |
1890 | +# else | |
1891 | + movq (%rdx), %rax | |
1892 | +# endif | |
1893 | + testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) | |
1894 | + jne __strcasecmp_l_nonascii | |
1895 | +# endif | |
1896 | +# ifdef USE_AS_STRNCASECMP_L | |
1897 | + /* We have to fall back on the C implementation for locales | |
1898 | + with encodings not matching ASCII for single bytes. */ | |
1899 | +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 | |
1900 | + movq LOCALE_T___LOCALES+LC_CTYPE*8(%rcx), %rax | |
1901 | +# else | |
1902 | + movq (%rcx), %rax | |
1903 | +# endif | |
1904 | + testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) | |
1905 | + jne __strncasecmp_l_nonascii | |
1906 | +# endif | |
1907 | + | |
1908 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
1909 | test %rdx, %rdx | |
1910 | je LABEL(strcmp_exitz_sse4_2) | |
1911 | cmp $1, %rdx | |
1912 | je LABEL(Byte0_sse4_2) | |
1913 | mov %rdx, %r11 | |
1914 | -#endif | |
1915 | +# endif | |
1916 | mov %esi, %ecx | |
1917 | mov %edi, %eax | |
1918 | /* Use 64bit AND here to avoid long NOP padding. */ | |
1919 | and $0x3f, %rcx /* rsi alignment in cache line */ | |
1920 | and $0x3f, %rax /* rdi alignment in cache line */ | |
1921 | +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
1922 | + .section .rodata.cst16,"aM",@progbits,16 | |
1923 | + .align 16 | |
1924 | +.Lbelowupper_sse4: | |
1925 | + .quad 0x4040404040404040 | |
1926 | + .quad 0x4040404040404040 | |
1927 | +.Ltopupper_sse4: | |
1928 | + .quad 0x5b5b5b5b5b5b5b5b | |
1929 | + .quad 0x5b5b5b5b5b5b5b5b | |
1930 | +.Ltouppermask_sse4: | |
1931 | + .quad 0x2020202020202020 | |
1932 | + .quad 0x2020202020202020 | |
1933 | + .previous | |
1934 | + movdqa .Lbelowupper_sse4(%rip), %xmm4 | |
1935 | +# define UCLOW_reg %xmm4 | |
1936 | + movdqa .Ltopupper_sse4(%rip), %xmm5 | |
1937 | +# define UCHIGH_reg %xmm5 | |
1938 | + movdqa .Ltouppermask_sse4(%rip), %xmm6 | |
1939 | +# define LCQWORD_reg %xmm6 | |
1940 | +# endif | |
1941 | cmp $0x30, %ecx | |
1942 | ja LABEL(crosscache_sse4_2)/* rsi: 16-byte load will cross cache line */ | |
1943 | cmp $0x30, %eax | |
1944 | ja LABEL(crosscache_sse4_2)/* rdi: 16-byte load will cross cache line */ | |
1945 | movdqu (%rdi), %xmm1 | |
1946 | movdqu (%rsi), %xmm2 | |
1947 | +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
1948 | +# define TOLOWER(reg1, reg2) \ | |
1949 | + movdqa reg1, %xmm7; \ | |
1950 | + movdqa UCHIGH_reg, %xmm8; \ | |
1951 | + movdqa reg2, %xmm9; \ | |
1952 | + movdqa UCHIGH_reg, %xmm10; \ | |
1953 | + pcmpgtb UCLOW_reg, %xmm7; \ | |
1954 | + pcmpgtb reg1, %xmm8; \ | |
1955 | + pcmpgtb UCLOW_reg, %xmm9; \ | |
1956 | + pcmpgtb reg2, %xmm10; \ | |
1957 | + pand %xmm8, %xmm7; \ | |
1958 | + pand %xmm10, %xmm9; \ | |
1959 | + pand LCQWORD_reg, %xmm7; \ | |
1960 | + pand LCQWORD_reg, %xmm9; \ | |
1961 | + por %xmm7, reg1; \ | |
1962 | + por %xmm9, reg2 | |
1963 | + TOLOWER (%xmm1, %xmm2) | |
1964 | +# else | |
1965 | +# define TOLOWER(reg1, reg2) | |
1966 | +# endif | |
1967 | pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ | |
1968 | pcmpeqb %xmm1, %xmm0 /* Any null chars? */ | |
1969 | pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ | |
1970 | @@ -135,10 +285,10 @@ STRCMP_SSE42: | |
1971 | pmovmskb %xmm1, %edx | |
1972 | sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ | |
1973 | jnz LABEL(less16bytes_sse4_2)/* If not, find different value or null char */ | |
1974 | -#ifdef USE_AS_STRNCMP | |
1975 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
1976 | sub $16, %r11 | |
1977 | jbe LABEL(strcmp_exitz_sse4_2)/* finish comparision */ | |
1978 | -#endif | |
1979 | +# endif | |
1980 | add $16, %rsi /* prepare to search next 16 bytes */ | |
1981 | add $16, %rdi /* prepare to search next 16 bytes */ | |
1982 | ||
1983 | @@ -180,7 +330,13 @@ LABEL(ashr_0_sse4_2): | |
1984 | movdqa (%rsi), %xmm1 | |
1985 | pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ | |
1986 | pcmpeqb %xmm1, %xmm0 /* Any null chars? */ | |
1987 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
1988 | pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ | |
1989 | +# else | |
1990 | + movdqa (%rdi), %xmm2 | |
1991 | + TOLOWER (%xmm1, %xmm2) | |
1992 | + pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */ | |
1993 | +# endif | |
1994 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
1995 | pmovmskb %xmm1, %r9d | |
1996 | shr %cl, %edx /* adjust 0xffff for offset */ | |
1997 | @@ -204,44 +360,60 @@ LABEL(ashr_0_sse4_2): | |
1998 | .p2align 4 | |
1999 | LABEL(ashr_0_use_sse4_2): | |
2000 | movdqa (%rdi,%rdx), %xmm0 | |
2001 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2002 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2003 | +# else | |
2004 | + movdqa (%rsi,%rdx), %xmm1 | |
2005 | + TOLOWER (%xmm0, %xmm1) | |
2006 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2007 | +# endif | |
2008 | lea 16(%rdx), %rdx | |
2009 | jbe LABEL(ashr_0_use_sse4_2_exit) | |
2010 | -#ifdef USE_AS_STRNCMP | |
2011 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2012 | sub $16, %r11 | |
2013 | jbe LABEL(strcmp_exitz_sse4_2) | |
2014 | -#endif | |
2015 | +# endif | |
2016 | ||
2017 | movdqa (%rdi,%rdx), %xmm0 | |
2018 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2019 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2020 | +# else | |
2021 | + movdqa (%rsi,%rdx), %xmm1 | |
2022 | + TOLOWER (%xmm0, %xmm1) | |
2023 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2024 | +# endif | |
2025 | lea 16(%rdx), %rdx | |
2026 | jbe LABEL(ashr_0_use_sse4_2_exit) | |
2027 | -#ifdef USE_AS_STRNCMP | |
2028 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2029 | sub $16, %r11 | |
2030 | jbe LABEL(strcmp_exitz_sse4_2) | |
2031 | -#endif | |
2032 | +# endif | |
2033 | jmp LABEL(ashr_0_use_sse4_2) | |
2034 | ||
2035 | ||
2036 | .p2align 4 | |
2037 | LABEL(ashr_0_use_sse4_2_exit): | |
2038 | jnc LABEL(strcmp_exitz_sse4_2) | |
2039 | -#ifdef USE_AS_STRNCMP | |
2040 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2041 | sub %rcx, %r11 | |
2042 | jbe LABEL(strcmp_exitz_sse4_2) | |
2043 | -#endif | |
2044 | +# endif | |
2045 | lea -16(%rdx, %rcx), %rcx | |
2046 | movzbl (%rdi, %rcx), %eax | |
2047 | movzbl (%rsi, %rcx), %edx | |
2048 | +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
2049 | + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx | |
2050 | + movl (%rcx,%rax,4), %eax | |
2051 | + movl (%rcx,%rdx,4), %edx | |
2052 | +# endif | |
2053 | sub %edx, %eax | |
2054 | ret | |
2055 | ||
2056 | ||
2057 | ||
2058 | - | |
2059 | /* | |
2060 | * The following cases will be handled by ashr_1 | |
2061 | - * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2062 | + * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2063 | * n(15) n -15 0(15 +(n-15) - n) ashr_1 | |
2064 | */ | |
2065 | .p2align 4 | |
2066 | @@ -251,6 +423,7 @@ LABEL(ashr_1_sse4_2): | |
2067 | movdqa (%rsi), %xmm1 | |
2068 | pcmpeqb %xmm1, %xmm0 /* Any null chars? */ | |
2069 | pslldq $15, %xmm2 /* shift first string to align with second */ | |
2070 | + TOLOWER (%xmm1, %xmm2) | |
2071 | pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */ | |
2072 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
2073 | pmovmskb %xmm2, %r9d | |
2074 | @@ -281,12 +454,18 @@ LABEL(loop_ashr_1_use_sse4_2): | |
2075 | ||
2076 | movdqa (%rdi, %rdx), %xmm0 | |
2077 | palignr $1, -16(%rdi, %rdx), %xmm0 | |
2078 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2079 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2080 | +# else | |
2081 | + movdqa (%rsi,%rdx), %xmm1 | |
2082 | + TOLOWER (%xmm0, %xmm1) | |
2083 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2084 | +# endif | |
2085 | jbe LABEL(use_sse4_2_exit) | |
2086 | -#ifdef USE_AS_STRNCMP | |
2087 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2088 | sub $16, %r11 | |
2089 | jbe LABEL(strcmp_exitz_sse4_2) | |
2090 | -#endif | |
2091 | +# endif | |
2092 | ||
2093 | add $16, %rdx | |
2094 | add $16, %r10 | |
2095 | @@ -294,12 +473,18 @@ LABEL(loop_ashr_1_use_sse4_2): | |
2096 | ||
2097 | movdqa (%rdi, %rdx), %xmm0 | |
2098 | palignr $1, -16(%rdi, %rdx), %xmm0 | |
2099 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2100 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2101 | +# else | |
2102 | + movdqa (%rsi,%rdx), %xmm1 | |
2103 | + TOLOWER (%xmm0, %xmm1) | |
2104 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2105 | +# endif | |
2106 | jbe LABEL(use_sse4_2_exit) | |
2107 | -#ifdef USE_AS_STRNCMP | |
2108 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2109 | sub $16, %r11 | |
2110 | jbe LABEL(strcmp_exitz_sse4_2) | |
2111 | -#endif | |
2112 | +# endif | |
2113 | add $16, %rdx | |
2114 | jmp LABEL(loop_ashr_1_use_sse4_2) | |
2115 | ||
2116 | @@ -309,10 +494,10 @@ LABEL(nibble_ashr_1_use_sse4_2): | |
2117 | movdqa -16(%rdi, %rdx), %xmm0 | |
2118 | psrldq $1, %xmm0 | |
2119 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2120 | -#ifdef USE_AS_STRNCMP | |
2121 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2122 | cmp %r11, %rcx | |
2123 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2124 | -#endif | |
2125 | +# endif | |
2126 | cmp $14, %ecx | |
2127 | ja LABEL(loop_ashr_1_use_sse4_2) | |
2128 | ||
2129 | @@ -320,7 +505,7 @@ LABEL(nibble_ashr_1_use_sse4_2): | |
2130 | ||
2131 | /* | |
2132 | * The following cases will be handled by ashr_2 | |
2133 | - * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2134 | + * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2135 | * n(14~15) n -14 1(15 +(n-14) - n) ashr_2 | |
2136 | */ | |
2137 | .p2align 4 | |
2138 | @@ -330,6 +515,7 @@ LABEL(ashr_2_sse4_2): | |
2139 | movdqa (%rsi), %xmm1 | |
2140 | pcmpeqb %xmm1, %xmm0 | |
2141 | pslldq $14, %xmm2 | |
2142 | + TOLOWER (%xmm1, %xmm2) | |
2143 | pcmpeqb %xmm1, %xmm2 | |
2144 | psubb %xmm0, %xmm2 | |
2145 | pmovmskb %xmm2, %r9d | |
2146 | @@ -360,12 +546,18 @@ LABEL(loop_ashr_2_use_sse4_2): | |
2147 | ||
2148 | movdqa (%rdi, %rdx), %xmm0 | |
2149 | palignr $2, -16(%rdi, %rdx), %xmm0 | |
2150 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2151 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2152 | +# else | |
2153 | + movdqa (%rsi,%rdx), %xmm1 | |
2154 | + TOLOWER (%xmm0, %xmm1) | |
2155 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2156 | +# endif | |
2157 | jbe LABEL(use_sse4_2_exit) | |
2158 | -#ifdef USE_AS_STRNCMP | |
2159 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2160 | sub $16, %r11 | |
2161 | jbe LABEL(strcmp_exitz_sse4_2) | |
2162 | -#endif | |
2163 | +# endif | |
2164 | ||
2165 | add $16, %rdx | |
2166 | add $16, %r10 | |
2167 | @@ -373,12 +565,18 @@ LABEL(loop_ashr_2_use_sse4_2): | |
2168 | ||
2169 | movdqa (%rdi, %rdx), %xmm0 | |
2170 | palignr $2, -16(%rdi, %rdx), %xmm0 | |
2171 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2172 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2173 | +# else | |
2174 | + movdqa (%rsi,%rdx), %xmm1 | |
2175 | + TOLOWER (%xmm0, %xmm1) | |
2176 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2177 | +# endif | |
2178 | jbe LABEL(use_sse4_2_exit) | |
2179 | -#ifdef USE_AS_STRNCMP | |
2180 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2181 | sub $16, %r11 | |
2182 | jbe LABEL(strcmp_exitz_sse4_2) | |
2183 | -#endif | |
2184 | +# endif | |
2185 | add $16, %rdx | |
2186 | jmp LABEL(loop_ashr_2_use_sse4_2) | |
2187 | ||
2188 | @@ -388,10 +586,10 @@ LABEL(nibble_ashr_2_use_sse4_2): | |
2189 | movdqa -16(%rdi, %rdx), %xmm0 | |
2190 | psrldq $2, %xmm0 | |
2191 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2192 | -#ifdef USE_AS_STRNCMP | |
2193 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2194 | cmp %r11, %rcx | |
2195 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2196 | -#endif | |
2197 | +# endif | |
2198 | cmp $13, %ecx | |
2199 | ja LABEL(loop_ashr_2_use_sse4_2) | |
2200 | ||
2201 | @@ -409,6 +607,7 @@ LABEL(ashr_3_sse4_2): | |
2202 | movdqa (%rsi), %xmm1 | |
2203 | pcmpeqb %xmm1, %xmm0 | |
2204 | pslldq $13, %xmm2 | |
2205 | + TOLOWER (%xmm1, %xmm2) | |
2206 | pcmpeqb %xmm1, %xmm2 | |
2207 | psubb %xmm0, %xmm2 | |
2208 | pmovmskb %xmm2, %r9d | |
2209 | @@ -439,12 +638,18 @@ LABEL(loop_ashr_3_use_sse4_2): | |
2210 | ||
2211 | movdqa (%rdi, %rdx), %xmm0 | |
2212 | palignr $3, -16(%rdi, %rdx), %xmm0 | |
2213 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2214 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2215 | +# else | |
2216 | + movdqa (%rsi,%rdx), %xmm1 | |
2217 | + TOLOWER (%xmm0, %xmm1) | |
2218 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2219 | +# endif | |
2220 | jbe LABEL(use_sse4_2_exit) | |
2221 | -#ifdef USE_AS_STRNCMP | |
2222 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2223 | sub $16, %r11 | |
2224 | jbe LABEL(strcmp_exitz_sse4_2) | |
2225 | -#endif | |
2226 | +# endif | |
2227 | ||
2228 | add $16, %rdx | |
2229 | add $16, %r10 | |
2230 | @@ -452,12 +657,18 @@ LABEL(loop_ashr_3_use_sse4_2): | |
2231 | ||
2232 | movdqa (%rdi, %rdx), %xmm0 | |
2233 | palignr $3, -16(%rdi, %rdx), %xmm0 | |
2234 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2235 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2236 | +# else | |
2237 | + movdqa (%rsi,%rdx), %xmm1 | |
2238 | + TOLOWER (%xmm0, %xmm1) | |
2239 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2240 | +# endif | |
2241 | jbe LABEL(use_sse4_2_exit) | |
2242 | -#ifdef USE_AS_STRNCMP | |
2243 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2244 | sub $16, %r11 | |
2245 | jbe LABEL(strcmp_exitz_sse4_2) | |
2246 | -#endif | |
2247 | +# endif | |
2248 | add $16, %rdx | |
2249 | jmp LABEL(loop_ashr_3_use_sse4_2) | |
2250 | ||
2251 | @@ -467,10 +678,10 @@ LABEL(nibble_ashr_3_use_sse4_2): | |
2252 | movdqa -16(%rdi, %rdx), %xmm0 | |
2253 | psrldq $3, %xmm0 | |
2254 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2255 | -#ifdef USE_AS_STRNCMP | |
2256 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2257 | cmp %r11, %rcx | |
2258 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2259 | -#endif | |
2260 | +# endif | |
2261 | cmp $12, %ecx | |
2262 | ja LABEL(loop_ashr_3_use_sse4_2) | |
2263 | ||
2264 | @@ -488,6 +699,7 @@ LABEL(ashr_4_sse4_2): | |
2265 | movdqa (%rsi), %xmm1 | |
2266 | pcmpeqb %xmm1, %xmm0 | |
2267 | pslldq $12, %xmm2 | |
2268 | + TOLOWER (%xmm1, %xmm2) | |
2269 | pcmpeqb %xmm1, %xmm2 | |
2270 | psubb %xmm0, %xmm2 | |
2271 | pmovmskb %xmm2, %r9d | |
2272 | @@ -519,12 +731,18 @@ LABEL(loop_ashr_4_use_sse4_2): | |
2273 | ||
2274 | movdqa (%rdi, %rdx), %xmm0 | |
2275 | palignr $4, -16(%rdi, %rdx), %xmm0 | |
2276 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2277 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2278 | +# else | |
2279 | + movdqa (%rsi,%rdx), %xmm1 | |
2280 | + TOLOWER (%xmm0, %xmm1) | |
2281 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2282 | +# endif | |
2283 | jbe LABEL(use_sse4_2_exit) | |
2284 | -#ifdef USE_AS_STRNCMP | |
2285 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2286 | sub $16, %r11 | |
2287 | jbe LABEL(strcmp_exitz_sse4_2) | |
2288 | -#endif | |
2289 | +# endif | |
2290 | ||
2291 | add $16, %rdx | |
2292 | add $16, %r10 | |
2293 | @@ -532,12 +750,18 @@ LABEL(loop_ashr_4_use_sse4_2): | |
2294 | ||
2295 | movdqa (%rdi, %rdx), %xmm0 | |
2296 | palignr $4, -16(%rdi, %rdx), %xmm0 | |
2297 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2298 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2299 | +# else | |
2300 | + movdqa (%rsi,%rdx), %xmm1 | |
2301 | + TOLOWER (%xmm0, %xmm1) | |
2302 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2303 | +# endif | |
2304 | jbe LABEL(use_sse4_2_exit) | |
2305 | -#ifdef USE_AS_STRNCMP | |
2306 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2307 | sub $16, %r11 | |
2308 | jbe LABEL(strcmp_exitz_sse4_2) | |
2309 | -#endif | |
2310 | +# endif | |
2311 | add $16, %rdx | |
2312 | jmp LABEL(loop_ashr_4_use_sse4_2) | |
2313 | ||
2314 | @@ -547,10 +771,10 @@ LABEL(nibble_ashr_4_use_sse4_2): | |
2315 | movdqa -16(%rdi, %rdx), %xmm0 | |
2316 | psrldq $4, %xmm0 | |
2317 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2318 | -#ifdef USE_AS_STRNCMP | |
2319 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2320 | cmp %r11, %rcx | |
2321 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2322 | -#endif | |
2323 | +# endif | |
2324 | cmp $11, %ecx | |
2325 | ja LABEL(loop_ashr_4_use_sse4_2) | |
2326 | ||
2327 | @@ -559,7 +783,7 @@ LABEL(nibble_ashr_4_use_sse4_2): | |
2328 | /* | |
2329 | * The following cases will be handled by ashr_5 | |
2330 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2331 | - * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5 | |
2332 | + * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5 | |
2333 | */ | |
2334 | .p2align 4 | |
2335 | LABEL(ashr_5_sse4_2): | |
2336 | @@ -568,6 +792,7 @@ LABEL(ashr_5_sse4_2): | |
2337 | movdqa (%rsi), %xmm1 | |
2338 | pcmpeqb %xmm1, %xmm0 | |
2339 | pslldq $11, %xmm2 | |
2340 | + TOLOWER (%xmm1, %xmm2) | |
2341 | pcmpeqb %xmm1, %xmm2 | |
2342 | psubb %xmm0, %xmm2 | |
2343 | pmovmskb %xmm2, %r9d | |
2344 | @@ -599,12 +824,18 @@ LABEL(loop_ashr_5_use_sse4_2): | |
2345 | ||
2346 | movdqa (%rdi, %rdx), %xmm0 | |
2347 | palignr $5, -16(%rdi, %rdx), %xmm0 | |
2348 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2349 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2350 | +# else | |
2351 | + movdqa (%rsi,%rdx), %xmm1 | |
2352 | + TOLOWER (%xmm0, %xmm1) | |
2353 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2354 | +# endif | |
2355 | jbe LABEL(use_sse4_2_exit) | |
2356 | -#ifdef USE_AS_STRNCMP | |
2357 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2358 | sub $16, %r11 | |
2359 | jbe LABEL(strcmp_exitz_sse4_2) | |
2360 | -#endif | |
2361 | +# endif | |
2362 | ||
2363 | add $16, %rdx | |
2364 | add $16, %r10 | |
2365 | @@ -613,12 +844,18 @@ LABEL(loop_ashr_5_use_sse4_2): | |
2366 | movdqa (%rdi, %rdx), %xmm0 | |
2367 | ||
2368 | palignr $5, -16(%rdi, %rdx), %xmm0 | |
2369 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2370 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2371 | +# else | |
2372 | + movdqa (%rsi,%rdx), %xmm1 | |
2373 | + TOLOWER (%xmm0, %xmm1) | |
2374 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2375 | +# endif | |
2376 | jbe LABEL(use_sse4_2_exit) | |
2377 | -#ifdef USE_AS_STRNCMP | |
2378 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2379 | sub $16, %r11 | |
2380 | jbe LABEL(strcmp_exitz_sse4_2) | |
2381 | -#endif | |
2382 | +# endif | |
2383 | add $16, %rdx | |
2384 | jmp LABEL(loop_ashr_5_use_sse4_2) | |
2385 | ||
2386 | @@ -628,10 +865,10 @@ LABEL(nibble_ashr_5_use_sse4_2): | |
2387 | movdqa -16(%rdi, %rdx), %xmm0 | |
2388 | psrldq $5, %xmm0 | |
2389 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2390 | -#ifdef USE_AS_STRNCMP | |
2391 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2392 | cmp %r11, %rcx | |
2393 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2394 | -#endif | |
2395 | +# endif | |
2396 | cmp $10, %ecx | |
2397 | ja LABEL(loop_ashr_5_use_sse4_2) | |
2398 | ||
2399 | @@ -640,7 +877,7 @@ LABEL(nibble_ashr_5_use_sse4_2): | |
2400 | /* | |
2401 | * The following cases will be handled by ashr_6 | |
2402 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2403 | - * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6 | |
2404 | + * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6 | |
2405 | */ | |
2406 | .p2align 4 | |
2407 | LABEL(ashr_6_sse4_2): | |
2408 | @@ -649,6 +886,7 @@ LABEL(ashr_6_sse4_2): | |
2409 | movdqa (%rsi), %xmm1 | |
2410 | pcmpeqb %xmm1, %xmm0 | |
2411 | pslldq $10, %xmm2 | |
2412 | + TOLOWER (%xmm1, %xmm2) | |
2413 | pcmpeqb %xmm1, %xmm2 | |
2414 | psubb %xmm0, %xmm2 | |
2415 | pmovmskb %xmm2, %r9d | |
2416 | @@ -680,12 +918,18 @@ LABEL(loop_ashr_6_use_sse4_2): | |
2417 | ||
2418 | movdqa (%rdi, %rdx), %xmm0 | |
2419 | palignr $6, -16(%rdi, %rdx), %xmm0 | |
2420 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2421 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2422 | + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2423 | +# else | |
2424 | + movdqa (%rsi,%rdx), %xmm1 | |
2425 | + TOLOWER (%xmm0, %xmm1) | |
2426 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2427 | +# endif | |
2428 | jbe LABEL(use_sse4_2_exit) | |
2429 | -#ifdef USE_AS_STRNCMP | |
2430 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2431 | sub $16, %r11 | |
2432 | jbe LABEL(strcmp_exitz_sse4_2) | |
2433 | -#endif | |
2434 | +# endif | |
2435 | ||
2436 | add $16, %rdx | |
2437 | add $16, %r10 | |
2438 | @@ -693,12 +937,18 @@ LABEL(loop_ashr_6_use_sse4_2): | |
2439 | ||
2440 | movdqa (%rdi, %rdx), %xmm0 | |
2441 | palignr $6, -16(%rdi, %rdx), %xmm0 | |
2442 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2443 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2444 | + pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2445 | +# else | |
2446 | + movdqa (%rsi,%rdx), %xmm1 | |
2447 | + TOLOWER (%xmm0, %xmm1) | |
2448 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2449 | +# endif | |
2450 | jbe LABEL(use_sse4_2_exit) | |
2451 | -#ifdef USE_AS_STRNCMP | |
2452 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2453 | sub $16, %r11 | |
2454 | jbe LABEL(strcmp_exitz_sse4_2) | |
2455 | -#endif | |
2456 | +# endif | |
2457 | add $16, %rdx | |
2458 | jmp LABEL(loop_ashr_6_use_sse4_2) | |
2459 | ||
2460 | @@ -708,10 +958,10 @@ LABEL(nibble_ashr_6_use_sse4_2): | |
2461 | movdqa -16(%rdi, %rdx), %xmm0 | |
2462 | psrldq $6, %xmm0 | |
2463 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2464 | -#ifdef USE_AS_STRNCMP | |
2465 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2466 | cmp %r11, %rcx | |
2467 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2468 | -#endif | |
2469 | +# endif | |
2470 | cmp $9, %ecx | |
2471 | ja LABEL(loop_ashr_6_use_sse4_2) | |
2472 | ||
2473 | @@ -720,7 +970,7 @@ LABEL(nibble_ashr_6_use_sse4_2): | |
2474 | /* | |
2475 | * The following cases will be handled by ashr_7 | |
2476 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2477 | - * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7 | |
2478 | + * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7 | |
2479 | */ | |
2480 | .p2align 4 | |
2481 | LABEL(ashr_7_sse4_2): | |
2482 | @@ -729,6 +979,7 @@ LABEL(ashr_7_sse4_2): | |
2483 | movdqa (%rsi), %xmm1 | |
2484 | pcmpeqb %xmm1, %xmm0 | |
2485 | pslldq $9, %xmm2 | |
2486 | + TOLOWER (%xmm1, %xmm2) | |
2487 | pcmpeqb %xmm1, %xmm2 | |
2488 | psubb %xmm0, %xmm2 | |
2489 | pmovmskb %xmm2, %r9d | |
2490 | @@ -760,12 +1011,18 @@ LABEL(loop_ashr_7_use_sse4_2): | |
2491 | ||
2492 | movdqa (%rdi, %rdx), %xmm0 | |
2493 | palignr $7, -16(%rdi, %rdx), %xmm0 | |
2494 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2495 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2496 | +# else | |
2497 | + movdqa (%rsi,%rdx), %xmm1 | |
2498 | + TOLOWER (%xmm0, %xmm1) | |
2499 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2500 | +# endif | |
2501 | jbe LABEL(use_sse4_2_exit) | |
2502 | -#ifdef USE_AS_STRNCMP | |
2503 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2504 | sub $16, %r11 | |
2505 | jbe LABEL(strcmp_exitz_sse4_2) | |
2506 | -#endif | |
2507 | +# endif | |
2508 | ||
2509 | add $16, %rdx | |
2510 | add $16, %r10 | |
2511 | @@ -773,12 +1030,18 @@ LABEL(loop_ashr_7_use_sse4_2): | |
2512 | ||
2513 | movdqa (%rdi, %rdx), %xmm0 | |
2514 | palignr $7, -16(%rdi, %rdx), %xmm0 | |
2515 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2516 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2517 | +# else | |
2518 | + movdqa (%rsi,%rdx), %xmm1 | |
2519 | + TOLOWER (%xmm0, %xmm1) | |
2520 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2521 | +# endif | |
2522 | jbe LABEL(use_sse4_2_exit) | |
2523 | -#ifdef USE_AS_STRNCMP | |
2524 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2525 | sub $16, %r11 | |
2526 | jbe LABEL(strcmp_exitz_sse4_2) | |
2527 | -#endif | |
2528 | +# endif | |
2529 | add $16, %rdx | |
2530 | jmp LABEL(loop_ashr_7_use_sse4_2) | |
2531 | ||
2532 | @@ -788,10 +1051,10 @@ LABEL(nibble_ashr_7_use_sse4_2): | |
2533 | movdqa -16(%rdi, %rdx), %xmm0 | |
2534 | psrldq $7, %xmm0 | |
2535 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2536 | -#ifdef USE_AS_STRNCMP | |
2537 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2538 | cmp %r11, %rcx | |
2539 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2540 | -#endif | |
2541 | +# endif | |
2542 | cmp $8, %ecx | |
2543 | ja LABEL(loop_ashr_7_use_sse4_2) | |
2544 | ||
2545 | @@ -800,7 +1063,7 @@ LABEL(nibble_ashr_7_use_sse4_2): | |
2546 | /* | |
2547 | * The following cases will be handled by ashr_8 | |
2548 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2549 | - * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8 | |
2550 | + * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8 | |
2551 | */ | |
2552 | .p2align 4 | |
2553 | LABEL(ashr_8_sse4_2): | |
2554 | @@ -809,6 +1072,7 @@ LABEL(ashr_8_sse4_2): | |
2555 | movdqa (%rsi), %xmm1 | |
2556 | pcmpeqb %xmm1, %xmm0 | |
2557 | pslldq $8, %xmm2 | |
2558 | + TOLOWER (%xmm1, %xmm2) | |
2559 | pcmpeqb %xmm1, %xmm2 | |
2560 | psubb %xmm0, %xmm2 | |
2561 | pmovmskb %xmm2, %r9d | |
2562 | @@ -840,12 +1104,18 @@ LABEL(loop_ashr_8_use_sse4_2): | |
2563 | ||
2564 | movdqa (%rdi, %rdx), %xmm0 | |
2565 | palignr $8, -16(%rdi, %rdx), %xmm0 | |
2566 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2567 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2568 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2569 | +# else | |
2570 | + movdqa (%rsi,%rdx), %xmm1 | |
2571 | + TOLOWER (%xmm0, %xmm1) | |
2572 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2573 | +# endif | |
2574 | jbe LABEL(use_sse4_2_exit) | |
2575 | -#ifdef USE_AS_STRNCMP | |
2576 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2577 | sub $16, %r11 | |
2578 | jbe LABEL(strcmp_exitz_sse4_2) | |
2579 | -#endif | |
2580 | +# endif | |
2581 | ||
2582 | add $16, %rdx | |
2583 | add $16, %r10 | |
2584 | @@ -853,12 +1123,18 @@ LABEL(loop_ashr_8_use_sse4_2): | |
2585 | ||
2586 | movdqa (%rdi, %rdx), %xmm0 | |
2587 | palignr $8, -16(%rdi, %rdx), %xmm0 | |
2588 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2589 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2590 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2591 | +# else | |
2592 | + movdqa (%rsi,%rdx), %xmm1 | |
2593 | + TOLOWER (%xmm0, %xmm1) | |
2594 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2595 | +# endif | |
2596 | jbe LABEL(use_sse4_2_exit) | |
2597 | -#ifdef USE_AS_STRNCMP | |
2598 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2599 | sub $16, %r11 | |
2600 | jbe LABEL(strcmp_exitz_sse4_2) | |
2601 | -#endif | |
2602 | +# endif | |
2603 | add $16, %rdx | |
2604 | jmp LABEL(loop_ashr_8_use_sse4_2) | |
2605 | ||
2606 | @@ -868,10 +1144,10 @@ LABEL(nibble_ashr_8_use_sse4_2): | |
2607 | movdqa -16(%rdi, %rdx), %xmm0 | |
2608 | psrldq $8, %xmm0 | |
2609 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2610 | -#ifdef USE_AS_STRNCMP | |
2611 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2612 | cmp %r11, %rcx | |
2613 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2614 | -#endif | |
2615 | +# endif | |
2616 | cmp $7, %ecx | |
2617 | ja LABEL(loop_ashr_8_use_sse4_2) | |
2618 | ||
2619 | @@ -880,7 +1156,7 @@ LABEL(nibble_ashr_8_use_sse4_2): | |
2620 | /* | |
2621 | * The following cases will be handled by ashr_9 | |
2622 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2623 | - * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9 | |
2624 | + * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9 | |
2625 | */ | |
2626 | .p2align 4 | |
2627 | LABEL(ashr_9_sse4_2): | |
2628 | @@ -889,6 +1165,7 @@ LABEL(ashr_9_sse4_2): | |
2629 | movdqa (%rsi), %xmm1 | |
2630 | pcmpeqb %xmm1, %xmm0 | |
2631 | pslldq $7, %xmm2 | |
2632 | + TOLOWER (%xmm1, %xmm2) | |
2633 | pcmpeqb %xmm1, %xmm2 | |
2634 | psubb %xmm0, %xmm2 | |
2635 | pmovmskb %xmm2, %r9d | |
2636 | @@ -921,12 +1198,18 @@ LABEL(loop_ashr_9_use_sse4_2): | |
2637 | movdqa (%rdi, %rdx), %xmm0 | |
2638 | ||
2639 | palignr $9, -16(%rdi, %rdx), %xmm0 | |
2640 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2641 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2642 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2643 | +# else | |
2644 | + movdqa (%rsi,%rdx), %xmm1 | |
2645 | + TOLOWER (%xmm0, %xmm1) | |
2646 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2647 | +# endif | |
2648 | jbe LABEL(use_sse4_2_exit) | |
2649 | -#ifdef USE_AS_STRNCMP | |
2650 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2651 | sub $16, %r11 | |
2652 | jbe LABEL(strcmp_exitz_sse4_2) | |
2653 | -#endif | |
2654 | +# endif | |
2655 | ||
2656 | add $16, %rdx | |
2657 | add $16, %r10 | |
2658 | @@ -934,12 +1217,18 @@ LABEL(loop_ashr_9_use_sse4_2): | |
2659 | ||
2660 | movdqa (%rdi, %rdx), %xmm0 | |
2661 | palignr $9, -16(%rdi, %rdx), %xmm0 | |
2662 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2663 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2664 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2665 | +# else | |
2666 | + movdqa (%rsi,%rdx), %xmm1 | |
2667 | + TOLOWER (%xmm0, %xmm1) | |
2668 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2669 | +# endif | |
2670 | jbe LABEL(use_sse4_2_exit) | |
2671 | -#ifdef USE_AS_STRNCMP | |
2672 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2673 | sub $16, %r11 | |
2674 | jbe LABEL(strcmp_exitz_sse4_2) | |
2675 | -#endif | |
2676 | +# endif | |
2677 | add $16, %rdx | |
2678 | jmp LABEL(loop_ashr_9_use_sse4_2) | |
2679 | ||
2680 | @@ -949,10 +1238,10 @@ LABEL(nibble_ashr_9_use_sse4_2): | |
2681 | movdqa -16(%rdi, %rdx), %xmm0 | |
2682 | psrldq $9, %xmm0 | |
2683 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2684 | -#ifdef USE_AS_STRNCMP | |
2685 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2686 | cmp %r11, %rcx | |
2687 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2688 | -#endif | |
2689 | +# endif | |
2690 | cmp $6, %ecx | |
2691 | ja LABEL(loop_ashr_9_use_sse4_2) | |
2692 | ||
2693 | @@ -961,7 +1250,7 @@ LABEL(nibble_ashr_9_use_sse4_2): | |
2694 | /* | |
2695 | * The following cases will be handled by ashr_10 | |
2696 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2697 | - * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10 | |
2698 | + * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10 | |
2699 | */ | |
2700 | .p2align 4 | |
2701 | LABEL(ashr_10_sse4_2): | |
2702 | @@ -970,6 +1259,7 @@ LABEL(ashr_10_sse4_2): | |
2703 | movdqa (%rsi), %xmm1 | |
2704 | pcmpeqb %xmm1, %xmm0 | |
2705 | pslldq $6, %xmm2 | |
2706 | + TOLOWER (%xmm1, %xmm2) | |
2707 | pcmpeqb %xmm1, %xmm2 | |
2708 | psubb %xmm0, %xmm2 | |
2709 | pmovmskb %xmm2, %r9d | |
2710 | @@ -1001,12 +1291,18 @@ LABEL(loop_ashr_10_use_sse4_2): | |
2711 | ||
2712 | movdqa (%rdi, %rdx), %xmm0 | |
2713 | palignr $10, -16(%rdi, %rdx), %xmm0 | |
2714 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2715 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2716 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2717 | +# else | |
2718 | + movdqa (%rsi,%rdx), %xmm1 | |
2719 | + TOLOWER (%xmm0, %xmm1) | |
2720 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2721 | +# endif | |
2722 | jbe LABEL(use_sse4_2_exit) | |
2723 | -#ifdef USE_AS_STRNCMP | |
2724 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2725 | sub $16, %r11 | |
2726 | jbe LABEL(strcmp_exitz_sse4_2) | |
2727 | -#endif | |
2728 | +# endif | |
2729 | ||
2730 | add $16, %rdx | |
2731 | add $16, %r10 | |
2732 | @@ -1014,12 +1310,18 @@ LABEL(loop_ashr_10_use_sse4_2): | |
2733 | ||
2734 | movdqa (%rdi, %rdx), %xmm0 | |
2735 | palignr $10, -16(%rdi, %rdx), %xmm0 | |
2736 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2737 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2738 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2739 | +# else | |
2740 | + movdqa (%rsi,%rdx), %xmm1 | |
2741 | + TOLOWER (%xmm0, %xmm1) | |
2742 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2743 | +# endif | |
2744 | jbe LABEL(use_sse4_2_exit) | |
2745 | -#ifdef USE_AS_STRNCMP | |
2746 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2747 | sub $16, %r11 | |
2748 | jbe LABEL(strcmp_exitz_sse4_2) | |
2749 | -#endif | |
2750 | +# endif | |
2751 | add $16, %rdx | |
2752 | jmp LABEL(loop_ashr_10_use_sse4_2) | |
2753 | ||
2754 | @@ -1029,10 +1331,10 @@ LABEL(nibble_ashr_10_use_sse4_2): | |
2755 | movdqa -16(%rdi, %rdx), %xmm0 | |
2756 | psrldq $10, %xmm0 | |
2757 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2758 | -#ifdef USE_AS_STRNCMP | |
2759 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2760 | cmp %r11, %rcx | |
2761 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2762 | -#endif | |
2763 | +# endif | |
2764 | cmp $5, %ecx | |
2765 | ja LABEL(loop_ashr_10_use_sse4_2) | |
2766 | ||
2767 | @@ -1041,7 +1343,7 @@ LABEL(nibble_ashr_10_use_sse4_2): | |
2768 | /* | |
2769 | * The following cases will be handled by ashr_11 | |
2770 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2771 | - * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11 | |
2772 | + * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11 | |
2773 | */ | |
2774 | .p2align 4 | |
2775 | LABEL(ashr_11_sse4_2): | |
2776 | @@ -1050,6 +1352,7 @@ LABEL(ashr_11_sse4_2): | |
2777 | movdqa (%rsi), %xmm1 | |
2778 | pcmpeqb %xmm1, %xmm0 | |
2779 | pslldq $5, %xmm2 | |
2780 | + TOLOWER (%xmm1, %xmm2) | |
2781 | pcmpeqb %xmm1, %xmm2 | |
2782 | psubb %xmm0, %xmm2 | |
2783 | pmovmskb %xmm2, %r9d | |
2784 | @@ -1081,12 +1384,18 @@ LABEL(loop_ashr_11_use_sse4_2): | |
2785 | ||
2786 | movdqa (%rdi, %rdx), %xmm0 | |
2787 | palignr $11, -16(%rdi, %rdx), %xmm0 | |
2788 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2789 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2790 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2791 | +# else | |
2792 | + movdqa (%rsi,%rdx), %xmm1 | |
2793 | + TOLOWER (%xmm0, %xmm1) | |
2794 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2795 | +# endif | |
2796 | jbe LABEL(use_sse4_2_exit) | |
2797 | -#ifdef USE_AS_STRNCMP | |
2798 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2799 | sub $16, %r11 | |
2800 | jbe LABEL(strcmp_exitz_sse4_2) | |
2801 | -#endif | |
2802 | +# endif | |
2803 | ||
2804 | add $16, %rdx | |
2805 | add $16, %r10 | |
2806 | @@ -1094,12 +1403,18 @@ LABEL(loop_ashr_11_use_sse4_2): | |
2807 | ||
2808 | movdqa (%rdi, %rdx), %xmm0 | |
2809 | palignr $11, -16(%rdi, %rdx), %xmm0 | |
2810 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2811 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2812 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2813 | +# else | |
2814 | + movdqa (%rsi,%rdx), %xmm1 | |
2815 | + TOLOWER (%xmm0, %xmm1) | |
2816 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2817 | +# endif | |
2818 | jbe LABEL(use_sse4_2_exit) | |
2819 | -#ifdef USE_AS_STRNCMP | |
2820 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2821 | sub $16, %r11 | |
2822 | jbe LABEL(strcmp_exitz_sse4_2) | |
2823 | -#endif | |
2824 | +# endif | |
2825 | add $16, %rdx | |
2826 | jmp LABEL(loop_ashr_11_use_sse4_2) | |
2827 | ||
2828 | @@ -1109,10 +1424,10 @@ LABEL(nibble_ashr_11_use_sse4_2): | |
2829 | movdqa -16(%rdi, %rdx), %xmm0 | |
2830 | psrldq $11, %xmm0 | |
2831 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2832 | -#ifdef USE_AS_STRNCMP | |
2833 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2834 | cmp %r11, %rcx | |
2835 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2836 | -#endif | |
2837 | +# endif | |
2838 | cmp $4, %ecx | |
2839 | ja LABEL(loop_ashr_11_use_sse4_2) | |
2840 | ||
2841 | @@ -1121,7 +1436,7 @@ LABEL(nibble_ashr_11_use_sse4_2): | |
2842 | /* | |
2843 | * The following cases will be handled by ashr_12 | |
2844 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2845 | - * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12 | |
2846 | + * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12 | |
2847 | */ | |
2848 | .p2align 4 | |
2849 | LABEL(ashr_12_sse4_2): | |
2850 | @@ -1130,6 +1445,7 @@ LABEL(ashr_12_sse4_2): | |
2851 | movdqa (%rsi), %xmm1 | |
2852 | pcmpeqb %xmm1, %xmm0 | |
2853 | pslldq $4, %xmm2 | |
2854 | + TOLOWER (%xmm1, %xmm2) | |
2855 | pcmpeqb %xmm1, %xmm2 | |
2856 | psubb %xmm0, %xmm2 | |
2857 | pmovmskb %xmm2, %r9d | |
2858 | @@ -1161,12 +1477,18 @@ LABEL(loop_ashr_12_use_sse4_2): | |
2859 | ||
2860 | movdqa (%rdi, %rdx), %xmm0 | |
2861 | palignr $12, -16(%rdi, %rdx), %xmm0 | |
2862 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2863 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2864 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2865 | +# else | |
2866 | + movdqa (%rsi,%rdx), %xmm1 | |
2867 | + TOLOWER (%xmm0, %xmm1) | |
2868 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2869 | +# endif | |
2870 | jbe LABEL(use_sse4_2_exit) | |
2871 | -#ifdef USE_AS_STRNCMP | |
2872 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2873 | sub $16, %r11 | |
2874 | jbe LABEL(strcmp_exitz_sse4_2) | |
2875 | -#endif | |
2876 | +# endif | |
2877 | ||
2878 | add $16, %rdx | |
2879 | add $16, %r10 | |
2880 | @@ -1174,12 +1496,18 @@ LABEL(loop_ashr_12_use_sse4_2): | |
2881 | ||
2882 | movdqa (%rdi, %rdx), %xmm0 | |
2883 | palignr $12, -16(%rdi, %rdx), %xmm0 | |
2884 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2885 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2886 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2887 | +# else | |
2888 | + movdqa (%rsi,%rdx), %xmm1 | |
2889 | + TOLOWER (%xmm0, %xmm1) | |
2890 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2891 | +# endif | |
2892 | jbe LABEL(use_sse4_2_exit) | |
2893 | -#ifdef USE_AS_STRNCMP | |
2894 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2895 | sub $16, %r11 | |
2896 | jbe LABEL(strcmp_exitz_sse4_2) | |
2897 | -#endif | |
2898 | +# endif | |
2899 | add $16, %rdx | |
2900 | jmp LABEL(loop_ashr_12_use_sse4_2) | |
2901 | ||
2902 | @@ -1189,10 +1517,10 @@ LABEL(nibble_ashr_12_use_sse4_2): | |
2903 | movdqa -16(%rdi, %rdx), %xmm0 | |
2904 | psrldq $12, %xmm0 | |
2905 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2906 | -#ifdef USE_AS_STRNCMP | |
2907 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2908 | cmp %r11, %rcx | |
2909 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2910 | -#endif | |
2911 | +# endif | |
2912 | cmp $3, %ecx | |
2913 | ja LABEL(loop_ashr_12_use_sse4_2) | |
2914 | ||
2915 | @@ -1201,7 +1529,7 @@ LABEL(nibble_ashr_12_use_sse4_2): | |
2916 | /* | |
2917 | * The following cases will be handled by ashr_13 | |
2918 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2919 | - * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13 | |
2920 | + * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13 | |
2921 | */ | |
2922 | .p2align 4 | |
2923 | LABEL(ashr_13_sse4_2): | |
2924 | @@ -1210,6 +1538,7 @@ LABEL(ashr_13_sse4_2): | |
2925 | movdqa (%rsi), %xmm1 | |
2926 | pcmpeqb %xmm1, %xmm0 | |
2927 | pslldq $3, %xmm2 | |
2928 | + TOLOWER (%xmm1, %xmm2) | |
2929 | pcmpeqb %xmm1, %xmm2 | |
2930 | psubb %xmm0, %xmm2 | |
2931 | pmovmskb %xmm2, %r9d | |
2932 | @@ -1242,12 +1571,18 @@ LABEL(loop_ashr_13_use_sse4_2): | |
2933 | ||
2934 | movdqa (%rdi, %rdx), %xmm0 | |
2935 | palignr $13, -16(%rdi, %rdx), %xmm0 | |
2936 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2937 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2938 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2939 | +# else | |
2940 | + movdqa (%rsi,%rdx), %xmm1 | |
2941 | + TOLOWER (%xmm0, %xmm1) | |
2942 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2943 | +# endif | |
2944 | jbe LABEL(use_sse4_2_exit) | |
2945 | -#ifdef USE_AS_STRNCMP | |
2946 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2947 | sub $16, %r11 | |
2948 | jbe LABEL(strcmp_exitz_sse4_2) | |
2949 | -#endif | |
2950 | +# endif | |
2951 | ||
2952 | add $16, %rdx | |
2953 | add $16, %r10 | |
2954 | @@ -1255,12 +1590,18 @@ LABEL(loop_ashr_13_use_sse4_2): | |
2955 | ||
2956 | movdqa (%rdi, %rdx), %xmm0 | |
2957 | palignr $13, -16(%rdi, %rdx), %xmm0 | |
2958 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
2959 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
2960 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
2961 | +# else | |
2962 | + movdqa (%rsi,%rdx), %xmm1 | |
2963 | + TOLOWER (%xmm0, %xmm1) | |
2964 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
2965 | +# endif | |
2966 | jbe LABEL(use_sse4_2_exit) | |
2967 | -#ifdef USE_AS_STRNCMP | |
2968 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2969 | sub $16, %r11 | |
2970 | jbe LABEL(strcmp_exitz_sse4_2) | |
2971 | -#endif | |
2972 | +# endif | |
2973 | add $16, %rdx | |
2974 | jmp LABEL(loop_ashr_13_use_sse4_2) | |
2975 | ||
2976 | @@ -1270,10 +1611,10 @@ LABEL(nibble_ashr_13_use_sse4_2): | |
2977 | movdqa -16(%rdi, %rdx), %xmm0 | |
2978 | psrldq $13, %xmm0 | |
2979 | pcmpistri $0x3a,%xmm0, %xmm0 | |
2980 | -#ifdef USE_AS_STRNCMP | |
2981 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
2982 | cmp %r11, %rcx | |
2983 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
2984 | -#endif | |
2985 | +# endif | |
2986 | cmp $2, %ecx | |
2987 | ja LABEL(loop_ashr_13_use_sse4_2) | |
2988 | ||
2989 | @@ -1282,7 +1623,7 @@ LABEL(nibble_ashr_13_use_sse4_2): | |
2990 | /* | |
2991 | * The following cases will be handled by ashr_14 | |
2992 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
2993 | - * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14 | |
2994 | + * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14 | |
2995 | */ | |
2996 | .p2align 4 | |
2997 | LABEL(ashr_14_sse4_2): | |
2998 | @@ -1291,6 +1632,7 @@ LABEL(ashr_14_sse4_2): | |
2999 | movdqa (%rsi), %xmm1 | |
3000 | pcmpeqb %xmm1, %xmm0 | |
3001 | pslldq $2, %xmm2 | |
3002 | + TOLOWER (%xmm1, %xmm2) | |
3003 | pcmpeqb %xmm1, %xmm2 | |
3004 | psubb %xmm0, %xmm2 | |
3005 | pmovmskb %xmm2, %r9d | |
3006 | @@ -1323,12 +1665,18 @@ LABEL(loop_ashr_14_use_sse4_2): | |
3007 | ||
3008 | movdqa (%rdi, %rdx), %xmm0 | |
3009 | palignr $14, -16(%rdi, %rdx), %xmm0 | |
3010 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
3011 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
3012 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
3013 | +# else | |
3014 | + movdqa (%rsi,%rdx), %xmm1 | |
3015 | + TOLOWER (%xmm0, %xmm1) | |
3016 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
3017 | +# endif | |
3018 | jbe LABEL(use_sse4_2_exit) | |
3019 | -#ifdef USE_AS_STRNCMP | |
3020 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3021 | sub $16, %r11 | |
3022 | jbe LABEL(strcmp_exitz_sse4_2) | |
3023 | -#endif | |
3024 | +# endif | |
3025 | ||
3026 | add $16, %rdx | |
3027 | add $16, %r10 | |
3028 | @@ -1336,12 +1684,18 @@ LABEL(loop_ashr_14_use_sse4_2): | |
3029 | ||
3030 | movdqa (%rdi, %rdx), %xmm0 | |
3031 | palignr $14, -16(%rdi, %rdx), %xmm0 | |
3032 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
3033 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
3034 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
3035 | +# else | |
3036 | + movdqa (%rsi,%rdx), %xmm1 | |
3037 | + TOLOWER (%xmm0, %xmm1) | |
3038 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
3039 | +# endif | |
3040 | jbe LABEL(use_sse4_2_exit) | |
3041 | -#ifdef USE_AS_STRNCMP | |
3042 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3043 | sub $16, %r11 | |
3044 | jbe LABEL(strcmp_exitz_sse4_2) | |
3045 | -#endif | |
3046 | +# endif | |
3047 | add $16, %rdx | |
3048 | jmp LABEL(loop_ashr_14_use_sse4_2) | |
3049 | ||
3050 | @@ -1351,10 +1705,10 @@ LABEL(nibble_ashr_14_use_sse4_2): | |
3051 | movdqa -16(%rdi, %rdx), %xmm0 | |
3052 | psrldq $14, %xmm0 | |
3053 | pcmpistri $0x3a,%xmm0, %xmm0 | |
3054 | -#ifdef USE_AS_STRNCMP | |
3055 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3056 | cmp %r11, %rcx | |
3057 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
3058 | -#endif | |
3059 | +# endif | |
3060 | cmp $1, %ecx | |
3061 | ja LABEL(loop_ashr_14_use_sse4_2) | |
3062 | ||
3063 | @@ -1363,7 +1717,7 @@ LABEL(nibble_ashr_14_use_sse4_2): | |
3064 | /* | |
3065 | * The following cases will be handled by ashr_15 | |
3066 | * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
3067 | - * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15 | |
3068 | + * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15 | |
3069 | */ | |
3070 | .p2align 4 | |
3071 | LABEL(ashr_15_sse4_2): | |
3072 | @@ -1372,6 +1726,7 @@ LABEL(ashr_15_sse4_2): | |
3073 | movdqa (%rsi), %xmm1 | |
3074 | pcmpeqb %xmm1, %xmm0 | |
3075 | pslldq $1, %xmm2 | |
3076 | + TOLOWER (%xmm1, %xmm2) | |
3077 | pcmpeqb %xmm1, %xmm2 | |
3078 | psubb %xmm0, %xmm2 | |
3079 | pmovmskb %xmm2, %r9d | |
3080 | @@ -1406,12 +1761,18 @@ LABEL(loop_ashr_15_use_sse4_2): | |
3081 | ||
3082 | movdqa (%rdi, %rdx), %xmm0 | |
3083 | palignr $15, -16(%rdi, %rdx), %xmm0 | |
3084 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
3085 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
3086 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
3087 | +# else | |
3088 | + movdqa (%rsi,%rdx), %xmm1 | |
3089 | + TOLOWER (%xmm0, %xmm1) | |
3090 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
3091 | +# endif | |
3092 | jbe LABEL(use_sse4_2_exit) | |
3093 | -#ifdef USE_AS_STRNCMP | |
3094 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3095 | sub $16, %r11 | |
3096 | jbe LABEL(strcmp_exitz_sse4_2) | |
3097 | -#endif | |
3098 | +# endif | |
3099 | ||
3100 | add $16, %rdx | |
3101 | add $16, %r10 | |
3102 | @@ -1419,12 +1780,18 @@ LABEL(loop_ashr_15_use_sse4_2): | |
3103 | ||
3104 | movdqa (%rdi, %rdx), %xmm0 | |
3105 | palignr $15, -16(%rdi, %rdx), %xmm0 | |
3106 | - pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
3107 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
3108 | + pcmpistri $0x1a, (%rsi,%rdx), %xmm0 | |
3109 | +# else | |
3110 | + movdqa (%rsi,%rdx), %xmm1 | |
3111 | + TOLOWER (%xmm0, %xmm1) | |
3112 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
3113 | +# endif | |
3114 | jbe LABEL(use_sse4_2_exit) | |
3115 | -#ifdef USE_AS_STRNCMP | |
3116 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3117 | sub $16, %r11 | |
3118 | jbe LABEL(strcmp_exitz_sse4_2) | |
3119 | -#endif | |
3120 | +# endif | |
3121 | add $16, %rdx | |
3122 | jmp LABEL(loop_ashr_15_use_sse4_2) | |
3123 | ||
3124 | @@ -1434,22 +1801,28 @@ LABEL(nibble_ashr_15_use_sse4_2): | |
3125 | movdqa -16(%rdi, %rdx), %xmm0 | |
3126 | psrldq $15, %xmm0 | |
3127 | pcmpistri $0x3a,%xmm0, %xmm0 | |
3128 | -#ifdef USE_AS_STRNCMP | |
3129 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3130 | cmp %r11, %rcx | |
3131 | jae LABEL(nibble_ashr_use_sse4_2_exit) | |
3132 | -#endif | |
3133 | +# endif | |
3134 | cmp $0, %ecx | |
3135 | ja LABEL(loop_ashr_15_use_sse4_2) | |
3136 | ||
3137 | LABEL(nibble_ashr_use_sse4_2_exit): | |
3138 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
3139 | pcmpistri $0x1a,(%rsi,%rdx), %xmm0 | |
3140 | +# else | |
3141 | + movdqa (%rsi,%rdx), %xmm1 | |
3142 | + TOLOWER (%xmm0, %xmm1) | |
3143 | + pcmpistri $0x1a, %xmm1, %xmm0 | |
3144 | +# endif | |
3145 | .p2align 4 | |
3146 | LABEL(use_sse4_2_exit): | |
3147 | jnc LABEL(strcmp_exitz_sse4_2) | |
3148 | -#ifdef USE_AS_STRNCMP | |
3149 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3150 | sub %rcx, %r11 | |
3151 | jbe LABEL(strcmp_exitz_sse4_2) | |
3152 | -#endif | |
3153 | +# endif | |
3154 | add %rcx, %rdx | |
3155 | lea -16(%rdi, %r9), %rdi | |
3156 | movzbl (%rdi, %rdx), %eax | |
3157 | @@ -1458,6 +1831,12 @@ LABEL(use_sse4_2_exit): | |
3158 | jz LABEL(use_sse4_2_ret_sse4_2) | |
3159 | xchg %eax, %edx | |
3160 | LABEL(use_sse4_2_ret_sse4_2): | |
3161 | +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
3162 | + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx | |
3163 | + movl (%rcx,%rdx,4), %edx | |
3164 | + movl (%rcx,%rax,4), %eax | |
3165 | +# endif | |
3166 | + | |
3167 | sub %edx, %eax | |
3168 | ret | |
3169 | ||
3170 | @@ -1473,13 +1852,19 @@ LABEL(ret_sse4_2): | |
3171 | LABEL(less16bytes_sse4_2): | |
3172 | bsf %rdx, %rdx /* find and store bit index in %rdx */ | |
3173 | ||
3174 | -#ifdef USE_AS_STRNCMP | |
3175 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3176 | sub %rdx, %r11 | |
3177 | jbe LABEL(strcmp_exitz_sse4_2) | |
3178 | -#endif | |
3179 | +# endif | |
3180 | movzbl (%rsi, %rdx), %ecx | |
3181 | movzbl (%rdi, %rdx), %eax | |
3182 | ||
3183 | +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
3184 | + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx | |
3185 | + movl (%rdx,%rcx,4), %ecx | |
3186 | + movl (%rdx,%rax,4), %eax | |
3187 | +# endif | |
3188 | + | |
3189 | sub %ecx, %eax | |
3190 | ret | |
3191 | ||
3192 | @@ -1488,15 +1873,27 @@ LABEL(strcmp_exitz_sse4_2): | |
3193 | ret | |
3194 | ||
3195 | .p2align 4 | |
3196 | + // XXX Same as code above | |
3197 | LABEL(Byte0_sse4_2): | |
3198 | movzx (%rsi), %ecx | |
3199 | movzx (%rdi), %eax | |
3200 | ||
3201 | +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
3202 | + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx | |
3203 | + movl (%rdx,%rcx,4), %ecx | |
3204 | + movl (%rdx,%rax,4), %eax | |
3205 | +# endif | |
3206 | + | |
3207 | sub %ecx, %eax | |
3208 | ret | |
3209 | cfi_endproc | |
3210 | .size STRCMP_SSE42, .-STRCMP_SSE42 | |
3211 | ||
3212 | +# undef UCLOW_reg | |
3213 | +# undef UCHIGH_reg | |
3214 | +# undef LCQWORD_reg | |
3215 | +# undef TOLOWER | |
3216 | + | |
3217 | /* Put all SSE 4.2 functions together. */ | |
3218 | .section .rodata.sse4.2,"a",@progbits | |
3219 | .p2align 3 | |
3220 | @@ -1528,6 +1925,27 @@ LABEL(unaligned_table_sse4_2): | |
3221 | # undef END | |
3222 | # define END(name) \ | |
3223 | cfi_endproc; .size STRCMP_SSE2, .-STRCMP_SSE2 | |
3224 | + | |
3225 | +# ifdef USE_AS_STRCASECMP_L | |
3226 | +# define ENTRY2(name) \ | |
3227 | + .type __strcasecmp_sse2, @function; \ | |
3228 | + .align 16; \ | |
3229 | + __strcasecmp_sse2: cfi_startproc; \ | |
3230 | + CALL_MCOUNT | |
3231 | +# define END2(name) \ | |
3232 | + cfi_endproc; .size __strcasecmp_sse2, .-__strcasecmp_sse2 | |
3233 | +# endif | |
3234 | + | |
3235 | +# ifdef USE_AS_STRNCASECMP_L | |
3236 | +# define ENTRY2(name) \ | |
3237 | + .type __strncasecmp_sse2, @function; \ | |
3238 | + .align 16; \ | |
3239 | + __strncasecmp_sse2: cfi_startproc; \ | |
3240 | + CALL_MCOUNT | |
3241 | +# define END2(name) \ | |
3242 | + cfi_endproc; .size __strncasecmp_sse2, .-__strncasecmp_sse2 | |
3243 | +# endif | |
3244 | + | |
3245 | # undef libc_hidden_builtin_def | |
3246 | /* It doesn't make sense to send libc-internal strcmp calls through a PLT. | |
3247 | The speedup we get from using SSE4.2 instruction is likely eaten away | |
3248 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strncase_l-ssse3.S | |
3249 | =================================================================== | |
3250 | --- /dev/null | |
3251 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strncase_l-ssse3.S | |
3252 | @@ -0,0 +1,6 @@ | |
3253 | +#define USE_SSSE3 1 | |
3254 | +#define USE_AS_STRNCASECMP_L | |
3255 | +#define NO_NOLOCALE_ALIAS | |
3256 | +#define STRCMP __strncasecmp_l_ssse3 | |
3257 | +#define __strncasecmp __strncasecmp_ssse3 | |
3258 | +#include "../strcmp.S" | |
3259 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strncase_l.S | |
3260 | =================================================================== | |
3261 | --- /dev/null | |
3262 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strncase_l.S | |
3263 | @@ -0,0 +1,6 @@ | |
3264 | +#define STRCMP __strncasecmp_l | |
3265 | +#define USE_AS_STRNCASECMP_L | |
3266 | +#include "strcmp.S" | |
3267 | + | |
3268 | +weak_alias (__strncasecmp_l, strncasecmp_l) | |
3269 | +libc_hidden_def (strncasecmp_l) | |
3270 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strstr.c | |
3271 | =================================================================== | |
3272 | --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/strstr.c | |
3273 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strstr.c | |
3274 | @@ -67,10 +67,10 @@ | |
3275 | ||
3276 | case ECX CFlag ZFlag SFlag | |
3277 | 3 X 1 0 0/1 | |
3278 | - 4a 0 1 0 0 | |
3279 | - 4b 0 1 0 1 | |
3280 | - 4c 0 < X 1 0 0/1 | |
3281 | - 5 16 0 1 0 | |
3282 | + 4a 0 1 0 0 | |
3283 | + 4b 0 1 0 1 | |
3284 | + 4c 0 < X 1 0 0/1 | |
3285 | + 5 16 0 1 0 | |
3286 | ||
3287 | 3. An initial ordered-comparison fragment match, we fix up to do | |
3288 | subsequent string comparison | |
3289 | @@ -147,8 +147,7 @@ __m128i_shift_right (__m128i value, int | |
3290 | If EOS occurs within less than 16B before 4KB boundary, we don't | |
3291 | cross to next page. */ | |
3292 | ||
3293 | -static __m128i | |
3294 | -__attribute__ ((section (".text.sse4.2"))) | |
3295 | +static inline __m128i | |
3296 | __m128i_strloadu (const unsigned char * p) | |
3297 | { | |
3298 | int offset = ((size_t) p & (16 - 1)); | |
3299 | @@ -164,59 +163,36 @@ __m128i_strloadu (const unsigned char * | |
3300 | return _mm_loadu_si128 ((__m128i *) p); | |
3301 | } | |
3302 | ||
3303 | -#ifdef USE_AS_STRCASESTR | |
3304 | +#if defined USE_AS_STRCASESTR && !defined STRCASESTR_NONASCII | |
3305 | ||
3306 | /* Similar to __m128i_strloadu. Convert to lower case for POSIX/C | |
3307 | locale. */ | |
3308 | - | |
3309 | -static __m128i | |
3310 | -__attribute__ ((section (".text.sse4.2"))) | |
3311 | -__m128i_strloadu_tolower_posix (const unsigned char * p) | |
3312 | +static inline __m128i | |
3313 | +__m128i_strloadu_tolower (const unsigned char *p, __m128i rangeuc, | |
3314 | + __m128i u2ldelta) | |
3315 | { | |
3316 | __m128i frag = __m128i_strloadu (p); | |
3317 | ||
3318 | - /* Convert frag to lower case for POSIX/C locale. */ | |
3319 | - __m128i rangeuc = _mm_set_epi64x (0x0, 0x5a41); | |
3320 | - __m128i u2ldelta = _mm_set1_epi64x (0xe0e0e0e0e0e0e0e0); | |
3321 | - __m128i mask1 = _mm_cmpistrm (rangeuc, frag, 0x44); | |
3322 | - __m128i mask2 = _mm_blendv_epi8 (u2ldelta, frag, mask1); | |
3323 | - mask2 = _mm_sub_epi8 (mask2, u2ldelta); | |
3324 | - return _mm_blendv_epi8 (frag, mask2, mask1); | |
3325 | +#define UCLOW 0x4040404040404040ULL | |
3326 | +#define UCHIGH 0x5b5b5b5b5b5b5b5bULL | |
3327 | +#define LCQWORD 0x2020202020202020ULL | |
3328 | + /* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'. */ | |
3329 | + __m128i r2 = _mm_cmpgt_epi8 (_mm_set1_epi64x (UCHIGH), frag); | |
3330 | + /* Compare if bytes are > 'A' - 1. */ | |
3331 | + __m128i r1 = _mm_cmpgt_epi8 (frag, _mm_set1_epi64x (UCLOW)); | |
3332 | + /* Mask byte == ff if byte(r2) <= 'Z' and byte(r1) > 'A' - 1. */ | |
3333 | + __m128i mask = _mm_and_si128 (r2, r1); | |
3334 | + /* Apply lowercase bit 6 mask for above mask bytes == ff. */ | |
3335 | + return _mm_or_si128 (frag, _mm_and_si128 (mask, _mm_set1_epi64x (LCQWORD))); | |
3336 | } | |
3337 | ||
3338 | -/* Similar to __m128i_strloadu. Convert to lower case for none-POSIX/C | |
3339 | - locale. */ | |
3340 | - | |
3341 | -static __m128i | |
3342 | -__attribute__ ((section (".text.sse4.2"))) | |
3343 | -__m128i_strloadu_tolower (const unsigned char * p) | |
3344 | -{ | |
3345 | - union | |
3346 | - { | |
3347 | - char b[16]; | |
3348 | - __m128i x; | |
3349 | - } u; | |
3350 | - | |
3351 | - for (int i = 0; i < 16; i++) | |
3352 | - if (p[i] == 0) | |
3353 | - { | |
3354 | - u.b[i] = 0; | |
3355 | - break; | |
3356 | - } | |
3357 | - else | |
3358 | - u.b[i] = tolower (p[i]); | |
3359 | - | |
3360 | - return u.x; | |
3361 | -} | |
3362 | #endif | |
3363 | ||
3364 | /* Calculate Knuth-Morris-Pratt string searching algorithm (or KMP | |
3365 | algorithm) overlap for a fully populated 16B vector. | |
3366 | Input parameter: 1st 16Byte loaded from the reference string of a | |
3367 | strstr function. | |
3368 | - We don't use KMP algorithm if reference string is less than 16B. | |
3369 | - */ | |
3370 | - | |
3371 | + We don't use KMP algorithm if reference string is less than 16B. */ | |
3372 | static int | |
3373 | __inline__ __attribute__ ((__always_inline__,)) | |
3374 | KMP16Bovrlap (__m128i s2) | |
3375 | @@ -236,7 +212,7 @@ KMP16Bovrlap (__m128i s2) | |
3376 | return 1; | |
3377 | else if (!k1) | |
3378 | { | |
3379 | - /* There are al least two ditinct char in s2. If byte 0 and 1 are | |
3380 | + /* There are al least two distinct chars in s2. If byte 0 and 1 are | |
3381 | idential and the distinct value lies farther down, we can deduce | |
3382 | the next byte offset to restart full compare is least no earlier | |
3383 | than byte 3. */ | |
3384 | @@ -256,23 +232,30 @@ STRSTR_SSE42 (const unsigned char *s1, c | |
3385 | #define p1 s1 | |
3386 | const unsigned char *p2 = s2; | |
3387 | ||
3388 | - if (p2[0] == '\0') | |
3389 | +#ifndef STRCASESTR_NONASCII | |
3390 | + if (__builtin_expect (p2[0] == '\0', 0)) | |
3391 | return (char *) p1; | |
3392 | ||
3393 | - if (p1[0] == '\0') | |
3394 | + if (__builtin_expect (p1[0] == '\0', 0)) | |
3395 | return NULL; | |
3396 | ||
3397 | /* Check if p1 length is 1 byte long. */ | |
3398 | - if (p1[1] == '\0') | |
3399 | + if (__builtin_expect (p1[1] == '\0', 0)) | |
3400 | return p2[1] == '\0' && CMPBYTE (p1[0], p2[0]) ? (char *) p1 : NULL; | |
3401 | +#endif | |
3402 | ||
3403 | #ifdef USE_AS_STRCASESTR | |
3404 | - __m128i (*strloadu) (const unsigned char *); | |
3405 | - | |
3406 | - if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE) == 0) | |
3407 | - strloadu = __m128i_strloadu_tolower_posix; | |
3408 | - else | |
3409 | - strloadu = __m128i_strloadu_tolower; | |
3410 | +# ifndef STRCASESTR_NONASCII | |
3411 | + if (__builtin_expect (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE) | |
3412 | + != 0, 0)) | |
3413 | + return __strcasestr_sse42_nonascii (s1, s2); | |
3414 | + | |
3415 | + const __m128i rangeuc = _mm_set_epi64x (0x0, 0x5a41); | |
3416 | + const __m128i u2ldelta = _mm_set1_epi64x (0xe0e0e0e0e0e0e0e0); | |
3417 | +# define strloadu(p) __m128i_strloadu_tolower (p, rangeuc, u2ldelta) | |
3418 | +# else | |
3419 | +# define strloadu __m128i_strloadu_tolower | |
3420 | +# endif | |
3421 | #else | |
3422 | # define strloadu __m128i_strloadu | |
3423 | #endif | |
3424 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp.S | |
3425 | =================================================================== | |
3426 | --- /dev/null | |
3427 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp.S | |
3428 | @@ -0,0 +1 @@ | |
3429 | +/* In strcasecmp_l.S. */ | |
3430 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp_l-nonascii.c | |
3431 | =================================================================== | |
3432 | --- /dev/null | |
3433 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp_l-nonascii.c | |
3434 | @@ -0,0 +1,8 @@ | |
3435 | +#include <string.h> | |
3436 | + | |
3437 | +extern int __strcasecmp_l_nonascii (__const char *__s1, __const char *__s2, | |
3438 | + __locale_t __loc); | |
3439 | + | |
3440 | +#define __strcasecmp_l __strcasecmp_l_nonascii | |
3441 | +#define USE_IN_EXTENDED_LOCALE_MODEL 1 | |
3442 | +#include <string/strcasecmp.c> | |
3443 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp_l.S | |
3444 | =================================================================== | |
3445 | --- /dev/null | |
3446 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp_l.S | |
3447 | @@ -0,0 +1,6 @@ | |
3448 | +#define STRCMP __strcasecmp_l | |
3449 | +#define USE_AS_STRCASECMP_L | |
3450 | +#include "strcmp.S" | |
3451 | + | |
3452 | +weak_alias (__strcasecmp_l, strcasecmp_l) | |
3453 | +libc_hidden_def (strcasecmp_l) | |
3454 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcmp.S | |
3455 | =================================================================== | |
3456 | --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/strcmp.S | |
3457 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcmp.S | |
3458 | @@ -51,6 +51,31 @@ | |
3459 | je LABEL(strcmp_exitz); \ | |
3460 | mov %r9, %r11 | |
3461 | ||
3462 | +#elif defined USE_AS_STRCASECMP_L | |
3463 | +# include "locale-defines.h" | |
3464 | + | |
3465 | +/* No support for strcasecmp outside libc so far since it is not needed. */ | |
3466 | +# ifdef NOT_IN_lib | |
3467 | +# error "strcasecmp_l not implemented so far" | |
3468 | +# endif | |
3469 | + | |
3470 | +# define UPDATE_STRNCMP_COUNTER | |
3471 | +#elif defined USE_AS_STRNCASECMP_L | |
3472 | +# include "locale-defines.h" | |
3473 | + | |
3474 | +/* No support for strncasecmp outside libc so far since it is not needed. */ | |
3475 | +# ifdef NOT_IN_lib | |
3476 | +# error "strncasecmp_l not implemented so far" | |
3477 | +# endif | |
3478 | + | |
3479 | +# define UPDATE_STRNCMP_COUNTER \ | |
3480 | + /* calculate left number to compare */ \ | |
3481 | + lea -16(%rcx, %r11), %r9; \ | |
3482 | + cmp %r9, %r11; \ | |
3483 | + jb LABEL(strcmp_exitz); \ | |
3484 | + test %r9, %r9; \ | |
3485 | + je LABEL(strcmp_exitz); \ | |
3486 | + mov %r9, %r11 | |
3487 | #else | |
3488 | # define UPDATE_STRNCMP_COUNTER | |
3489 | # ifndef STRCMP | |
3490 | @@ -64,6 +89,46 @@ | |
3491 | .section .text.ssse3,"ax",@progbits | |
3492 | #endif | |
3493 | ||
3494 | +#ifdef USE_AS_STRCASECMP_L | |
3495 | +# ifndef ENTRY2 | |
3496 | +# define ENTRY2(name) ENTRY (name) | |
3497 | +# define END2(name) END (name) | |
3498 | +# endif | |
3499 | + | |
3500 | +ENTRY2 (__strcasecmp) | |
3501 | + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax | |
3502 | + movq %fs:(%rax),%rdx | |
3503 | + | |
3504 | + // XXX 5 byte should be before the function | |
3505 | + /* 5-byte NOP. */ | |
3506 | + .byte 0x0f,0x1f,0x44,0x00,0x00 | |
3507 | +END2 (__strcasecmp) | |
3508 | +# ifndef NO_NOLOCALE_ALIAS | |
3509 | +weak_alias (__strcasecmp, strcasecmp) | |
3510 | +libc_hidden_def (__strcasecmp) | |
3511 | +# endif | |
3512 | + /* FALLTHROUGH to strcasecmp_l. */ | |
3513 | +#elif defined USE_AS_STRNCASECMP_L | |
3514 | +# ifndef ENTRY2 | |
3515 | +# define ENTRY2(name) ENTRY (name) | |
3516 | +# define END2(name) END (name) | |
3517 | +# endif | |
3518 | + | |
3519 | +ENTRY2 (__strncasecmp) | |
3520 | + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax | |
3521 | + movq %fs:(%rax),%rcx | |
3522 | + | |
3523 | + // XXX 5 byte should be before the function | |
3524 | + /* 5-byte NOP. */ | |
3525 | + .byte 0x0f,0x1f,0x44,0x00,0x00 | |
3526 | +END2 (__strncasecmp) | |
3527 | +# ifndef NO_NOLOCALE_ALIAS | |
3528 | +weak_alias (__strncasecmp, strncasecmp) | |
3529 | +libc_hidden_def (__strncasecmp) | |
3530 | +# endif | |
3531 | + /* FALLTHROUGH to strncasecmp_l. */ | |
3532 | +#endif | |
3533 | + | |
3534 | ENTRY (BP_SYM (STRCMP)) | |
3535 | #ifdef NOT_IN_libc | |
3536 | /* Simple version since we can't use SSE registers in ld.so. */ | |
3537 | @@ -84,10 +149,32 @@ L(neq): movl $1, %eax | |
3538 | ret | |
3539 | END (BP_SYM (STRCMP)) | |
3540 | #else /* NOT_IN_libc */ | |
3541 | +# ifdef USE_AS_STRCASECMP_L | |
3542 | + /* We have to fall back on the C implementation for locales | |
3543 | + with encodings not matching ASCII for single bytes. */ | |
3544 | +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 | |
3545 | + movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax | |
3546 | +# else | |
3547 | + movq (%rdx), %rax | |
3548 | +# endif | |
3549 | + testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) | |
3550 | + jne __strcasecmp_l_nonascii | |
3551 | +# elif defined USE_AS_STRNCASECMP_L | |
3552 | + /* We have to fall back on the C implementation for locales | |
3553 | + with encodings not matching ASCII for single bytes. */ | |
3554 | +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0 | |
3555 | + movq LOCALE_T___LOCALES+LC_CTYPE*8(%rcx), %rax | |
3556 | +# else | |
3557 | + movq (%rcx), %rax | |
3558 | +# endif | |
3559 | + testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax) | |
3560 | + jne __strncasecmp_l_nonascii | |
3561 | +# endif | |
3562 | + | |
3563 | /* | |
3564 | * This implementation uses SSE to compare up to 16 bytes at a time. | |
3565 | */ | |
3566 | -# ifdef USE_AS_STRNCMP | |
3567 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3568 | test %rdx, %rdx | |
3569 | je LABEL(strcmp_exitz) | |
3570 | cmp $1, %rdx | |
3571 | @@ -99,6 +186,26 @@ END (BP_SYM (STRCMP)) | |
3572 | /* Use 64bit AND here to avoid long NOP padding. */ | |
3573 | and $0x3f, %rcx /* rsi alignment in cache line */ | |
3574 | and $0x3f, %rax /* rdi alignment in cache line */ | |
3575 | +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
3576 | + .section .rodata.cst16,"aM",@progbits,16 | |
3577 | + .align 16 | |
3578 | +.Lbelowupper: | |
3579 | + .quad 0x4040404040404040 | |
3580 | + .quad 0x4040404040404040 | |
3581 | +.Ltopupper: | |
3582 | + .quad 0x5b5b5b5b5b5b5b5b | |
3583 | + .quad 0x5b5b5b5b5b5b5b5b | |
3584 | +.Ltouppermask: | |
3585 | + .quad 0x2020202020202020 | |
3586 | + .quad 0x2020202020202020 | |
3587 | + .previous | |
3588 | + movdqa .Lbelowupper(%rip), %xmm5 | |
3589 | +# define UCLOW_reg %xmm5 | |
3590 | + movdqa .Ltopupper(%rip), %xmm6 | |
3591 | +# define UCHIGH_reg %xmm6 | |
3592 | + movdqa .Ltouppermask(%rip), %xmm7 | |
3593 | +# define LCQWORD_reg %xmm7 | |
3594 | +# endif | |
3595 | cmp $0x30, %ecx | |
3596 | ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */ | |
3597 | cmp $0x30, %eax | |
3598 | @@ -107,6 +214,26 @@ END (BP_SYM (STRCMP)) | |
3599 | movlpd (%rsi), %xmm2 | |
3600 | movhpd 8(%rdi), %xmm1 | |
3601 | movhpd 8(%rsi), %xmm2 | |
3602 | +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
3603 | +# define TOLOWER(reg1, reg2) \ | |
3604 | + movdqa reg1, %xmm8; \ | |
3605 | + movdqa UCHIGH_reg, %xmm9; \ | |
3606 | + movdqa reg2, %xmm10; \ | |
3607 | + movdqa UCHIGH_reg, %xmm11; \ | |
3608 | + pcmpgtb UCLOW_reg, %xmm8; \ | |
3609 | + pcmpgtb reg1, %xmm9; \ | |
3610 | + pcmpgtb UCLOW_reg, %xmm10; \ | |
3611 | + pcmpgtb reg2, %xmm11; \ | |
3612 | + pand %xmm9, %xmm8; \ | |
3613 | + pand %xmm11, %xmm10; \ | |
3614 | + pand LCQWORD_reg, %xmm8; \ | |
3615 | + pand LCQWORD_reg, %xmm10; \ | |
3616 | + por %xmm8, reg1; \ | |
3617 | + por %xmm10, reg2 | |
3618 | + TOLOWER (%xmm1, %xmm2) | |
3619 | +# else | |
3620 | +# define TOLOWER(reg1, reg2) | |
3621 | +# endif | |
3622 | pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ | |
3623 | pcmpeqb %xmm1, %xmm0 /* Any null chars? */ | |
3624 | pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */ | |
3625 | @@ -114,7 +241,7 @@ END (BP_SYM (STRCMP)) | |
3626 | pmovmskb %xmm1, %edx | |
3627 | sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */ | |
3628 | jnz LABEL(less16bytes) /* If not, find different value or null char */ | |
3629 | -# ifdef USE_AS_STRNCMP | |
3630 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3631 | sub $16, %r11 | |
3632 | jbe LABEL(strcmp_exitz) /* finish comparision */ | |
3633 | # endif | |
3634 | @@ -159,7 +286,13 @@ LABEL(ashr_0): | |
3635 | movdqa (%rsi), %xmm1 | |
3636 | pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */ | |
3637 | pcmpeqb %xmm1, %xmm0 /* Any null chars? */ | |
3638 | +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L | |
3639 | pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */ | |
3640 | +# else | |
3641 | + movdqa (%rdi), %xmm2 | |
3642 | + TOLOWER (%xmm1, %xmm2) | |
3643 | + pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */ | |
3644 | +# endif | |
3645 | psubb %xmm0, %xmm1 /* packed sub of comparison results*/ | |
3646 | pmovmskb %xmm1, %r9d | |
3647 | shr %cl, %edx /* adjust 0xffff for offset */ | |
3648 | @@ -183,6 +316,7 @@ LABEL(ashr_0): | |
3649 | LABEL(loop_ashr_0): | |
3650 | movdqa (%rsi, %rcx), %xmm1 | |
3651 | movdqa (%rdi, %rcx), %xmm2 | |
3652 | + TOLOWER (%xmm1, %xmm2) | |
3653 | ||
3654 | pcmpeqb %xmm1, %xmm0 | |
3655 | pcmpeqb %xmm2, %xmm1 | |
3656 | @@ -191,13 +325,14 @@ LABEL(loop_ashr_0): | |
3657 | sub $0xffff, %edx | |
3658 | jnz LABEL(exit) /* mismatch or null char seen */ | |
3659 | ||
3660 | -# ifdef USE_AS_STRNCMP | |
3661 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3662 | sub $16, %r11 | |
3663 | jbe LABEL(strcmp_exitz) | |
3664 | # endif | |
3665 | add $16, %rcx | |
3666 | movdqa (%rsi, %rcx), %xmm1 | |
3667 | movdqa (%rdi, %rcx), %xmm2 | |
3668 | + TOLOWER (%xmm1, %xmm2) | |
3669 | ||
3670 | pcmpeqb %xmm1, %xmm0 | |
3671 | pcmpeqb %xmm2, %xmm1 | |
3672 | @@ -205,7 +340,7 @@ LABEL(loop_ashr_0): | |
3673 | pmovmskb %xmm1, %edx | |
3674 | sub $0xffff, %edx | |
3675 | jnz LABEL(exit) | |
3676 | -# ifdef USE_AS_STRNCMP | |
3677 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3678 | sub $16, %r11 | |
3679 | jbe LABEL(strcmp_exitz) | |
3680 | # endif | |
3681 | @@ -214,7 +349,7 @@ LABEL(loop_ashr_0): | |
3682 | ||
3683 | /* | |
3684 | * The following cases will be handled by ashr_1 | |
3685 | - * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
3686 | + * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case | |
3687 | * n(15) n -15 0(15 +(n-15) - n) ashr_1 | |
3688 | */ | |
3689 | .p2align 4 | |
3690 | @@ -224,6 +359,7 @@ LABEL(ashr_1): | |
3691 | movdqa (%rsi), %xmm1 | |
3692 | pcmpeqb %xmm1, %xmm0 /* Any null chars? */ | |
3693 | pslldq $15, %xmm2 /* shift first string to align with second */ | |
3694 | + TOLOWER (%xmm1, %xmm2) | |
3695 | pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */ | |
3696 | psubb %xmm0, %xmm2 /* packed sub of comparison results*/ | |
3697 | pmovmskb %xmm2, %r9d | |
3698 | @@ -263,6 +399,7 @@ LABEL(gobble_ashr_1): | |
3699 | # else | |
3700 | palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3701 | # endif | |
3702 | + TOLOWER (%xmm1, %xmm2) | |
3703 | ||
3704 | pcmpeqb %xmm1, %xmm0 | |
3705 | pcmpeqb %xmm2, %xmm1 | |
3706 | @@ -271,7 +408,7 @@ LABEL(gobble_ashr_1): | |
3707 | sub $0xffff, %edx | |
3708 | jnz LABEL(exit) | |
3709 | ||
3710 | -# ifdef USE_AS_STRNCMP | |
3711 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3712 | sub $16, %r11 | |
3713 | jbe LABEL(strcmp_exitz) | |
3714 | # endif | |
3715 | @@ -292,6 +429,7 @@ LABEL(gobble_ashr_1): | |
3716 | # else | |
3717 | palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3718 | # endif | |
3719 | + TOLOWER (%xmm1, %xmm2) | |
3720 | ||
3721 | pcmpeqb %xmm1, %xmm0 | |
3722 | pcmpeqb %xmm2, %xmm1 | |
3723 | @@ -300,7 +438,7 @@ LABEL(gobble_ashr_1): | |
3724 | sub $0xffff, %edx | |
3725 | jnz LABEL(exit) | |
3726 | ||
3727 | -# ifdef USE_AS_STRNCMP | |
3728 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3729 | sub $16, %r11 | |
3730 | jbe LABEL(strcmp_exitz) | |
3731 | # endif | |
3732 | @@ -319,8 +457,8 @@ LABEL(nibble_ashr_1): | |
3733 | test $0xfffe, %edx | |
3734 | jnz LABEL(ashr_1_exittail) /* find null char*/ | |
3735 | ||
3736 | -# ifdef USE_AS_STRNCMP | |
3737 | - cmp $14, %r11 | |
3738 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3739 | + cmp $15, %r11 | |
3740 | jbe LABEL(ashr_1_exittail) | |
3741 | # endif | |
3742 | ||
3743 | @@ -351,6 +489,7 @@ LABEL(ashr_2): | |
3744 | movdqa (%rsi), %xmm1 | |
3745 | pcmpeqb %xmm1, %xmm0 | |
3746 | pslldq $14, %xmm2 | |
3747 | + TOLOWER (%xmm1, %xmm2) | |
3748 | pcmpeqb %xmm1, %xmm2 | |
3749 | psubb %xmm0, %xmm2 | |
3750 | pmovmskb %xmm2, %r9d | |
3751 | @@ -390,6 +529,7 @@ LABEL(gobble_ashr_2): | |
3752 | # else | |
3753 | palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3754 | # endif | |
3755 | + TOLOWER (%xmm1, %xmm2) | |
3756 | ||
3757 | pcmpeqb %xmm1, %xmm0 | |
3758 | pcmpeqb %xmm2, %xmm1 | |
3759 | @@ -398,7 +538,7 @@ LABEL(gobble_ashr_2): | |
3760 | sub $0xffff, %edx | |
3761 | jnz LABEL(exit) | |
3762 | ||
3763 | -# ifdef USE_AS_STRNCMP | |
3764 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3765 | sub $16, %r11 | |
3766 | jbe LABEL(strcmp_exitz) | |
3767 | # endif | |
3768 | @@ -420,6 +560,7 @@ LABEL(gobble_ashr_2): | |
3769 | # else | |
3770 | palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3771 | # endif | |
3772 | + TOLOWER (%xmm1, %xmm2) | |
3773 | ||
3774 | pcmpeqb %xmm1, %xmm0 | |
3775 | pcmpeqb %xmm2, %xmm1 | |
3776 | @@ -428,7 +569,7 @@ LABEL(gobble_ashr_2): | |
3777 | sub $0xffff, %edx | |
3778 | jnz LABEL(exit) | |
3779 | ||
3780 | -# ifdef USE_AS_STRNCMP | |
3781 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3782 | sub $16, %r11 | |
3783 | jbe LABEL(strcmp_exitz) | |
3784 | # endif | |
3785 | @@ -444,8 +585,8 @@ LABEL(nibble_ashr_2): | |
3786 | test $0xfffc, %edx | |
3787 | jnz LABEL(ashr_2_exittail) | |
3788 | ||
3789 | -# ifdef USE_AS_STRNCMP | |
3790 | - cmp $13, %r11 | |
3791 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3792 | + cmp $14, %r11 | |
3793 | jbe LABEL(ashr_2_exittail) | |
3794 | # endif | |
3795 | ||
3796 | @@ -472,6 +613,7 @@ LABEL(ashr_3): | |
3797 | movdqa (%rsi), %xmm1 | |
3798 | pcmpeqb %xmm1, %xmm0 | |
3799 | pslldq $13, %xmm2 | |
3800 | + TOLOWER (%xmm1, %xmm2) | |
3801 | pcmpeqb %xmm1, %xmm2 | |
3802 | psubb %xmm0, %xmm2 | |
3803 | pmovmskb %xmm2, %r9d | |
3804 | @@ -512,6 +654,7 @@ LABEL(gobble_ashr_3): | |
3805 | # else | |
3806 | palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3807 | # endif | |
3808 | + TOLOWER (%xmm1, %xmm2) | |
3809 | ||
3810 | pcmpeqb %xmm1, %xmm0 | |
3811 | pcmpeqb %xmm2, %xmm1 | |
3812 | @@ -520,7 +663,7 @@ LABEL(gobble_ashr_3): | |
3813 | sub $0xffff, %edx | |
3814 | jnz LABEL(exit) | |
3815 | ||
3816 | -# ifdef USE_AS_STRNCMP | |
3817 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3818 | sub $16, %r11 | |
3819 | jbe LABEL(strcmp_exitz) | |
3820 | # endif | |
3821 | @@ -542,6 +685,7 @@ LABEL(gobble_ashr_3): | |
3822 | # else | |
3823 | palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3824 | # endif | |
3825 | + TOLOWER (%xmm1, %xmm2) | |
3826 | ||
3827 | pcmpeqb %xmm1, %xmm0 | |
3828 | pcmpeqb %xmm2, %xmm1 | |
3829 | @@ -550,7 +694,7 @@ LABEL(gobble_ashr_3): | |
3830 | sub $0xffff, %edx | |
3831 | jnz LABEL(exit) | |
3832 | ||
3833 | -# ifdef USE_AS_STRNCMP | |
3834 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3835 | sub $16, %r11 | |
3836 | jbe LABEL(strcmp_exitz) | |
3837 | # endif | |
3838 | @@ -566,8 +710,8 @@ LABEL(nibble_ashr_3): | |
3839 | test $0xfff8, %edx | |
3840 | jnz LABEL(ashr_3_exittail) | |
3841 | ||
3842 | -# ifdef USE_AS_STRNCMP | |
3843 | - cmp $12, %r11 | |
3844 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3845 | + cmp $13, %r11 | |
3846 | jbe LABEL(ashr_3_exittail) | |
3847 | # endif | |
3848 | ||
3849 | @@ -594,6 +738,7 @@ LABEL(ashr_4): | |
3850 | movdqa (%rsi), %xmm1 | |
3851 | pcmpeqb %xmm1, %xmm0 | |
3852 | pslldq $12, %xmm2 | |
3853 | + TOLOWER (%xmm1, %xmm2) | |
3854 | pcmpeqb %xmm1, %xmm2 | |
3855 | psubb %xmm0, %xmm2 | |
3856 | pmovmskb %xmm2, %r9d | |
3857 | @@ -634,6 +779,7 @@ LABEL(gobble_ashr_4): | |
3858 | # else | |
3859 | palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3860 | # endif | |
3861 | + TOLOWER (%xmm1, %xmm2) | |
3862 | ||
3863 | pcmpeqb %xmm1, %xmm0 | |
3864 | pcmpeqb %xmm2, %xmm1 | |
3865 | @@ -642,7 +788,7 @@ LABEL(gobble_ashr_4): | |
3866 | sub $0xffff, %edx | |
3867 | jnz LABEL(exit) | |
3868 | ||
3869 | -# ifdef USE_AS_STRNCMP | |
3870 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3871 | sub $16, %r11 | |
3872 | jbe LABEL(strcmp_exitz) | |
3873 | # endif | |
3874 | @@ -664,6 +810,7 @@ LABEL(gobble_ashr_4): | |
3875 | # else | |
3876 | palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3877 | # endif | |
3878 | + TOLOWER (%xmm1, %xmm2) | |
3879 | ||
3880 | pcmpeqb %xmm1, %xmm0 | |
3881 | pcmpeqb %xmm2, %xmm1 | |
3882 | @@ -672,7 +819,7 @@ LABEL(gobble_ashr_4): | |
3883 | sub $0xffff, %edx | |
3884 | jnz LABEL(exit) | |
3885 | ||
3886 | -# ifdef USE_AS_STRNCMP | |
3887 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3888 | sub $16, %r11 | |
3889 | jbe LABEL(strcmp_exitz) | |
3890 | # endif | |
3891 | @@ -688,8 +835,8 @@ LABEL(nibble_ashr_4): | |
3892 | test $0xfff0, %edx | |
3893 | jnz LABEL(ashr_4_exittail) | |
3894 | ||
3895 | -# ifdef USE_AS_STRNCMP | |
3896 | - cmp $11, %r11 | |
3897 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3898 | + cmp $12, %r11 | |
3899 | jbe LABEL(ashr_4_exittail) | |
3900 | # endif | |
3901 | ||
3902 | @@ -716,6 +863,7 @@ LABEL(ashr_5): | |
3903 | movdqa (%rsi), %xmm1 | |
3904 | pcmpeqb %xmm1, %xmm0 | |
3905 | pslldq $11, %xmm2 | |
3906 | + TOLOWER (%xmm1, %xmm2) | |
3907 | pcmpeqb %xmm1, %xmm2 | |
3908 | psubb %xmm0, %xmm2 | |
3909 | pmovmskb %xmm2, %r9d | |
3910 | @@ -756,6 +904,7 @@ LABEL(gobble_ashr_5): | |
3911 | # else | |
3912 | palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3913 | # endif | |
3914 | + TOLOWER (%xmm1, %xmm2) | |
3915 | ||
3916 | pcmpeqb %xmm1, %xmm0 | |
3917 | pcmpeqb %xmm2, %xmm1 | |
3918 | @@ -764,7 +913,7 @@ LABEL(gobble_ashr_5): | |
3919 | sub $0xffff, %edx | |
3920 | jnz LABEL(exit) | |
3921 | ||
3922 | -# ifdef USE_AS_STRNCMP | |
3923 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3924 | sub $16, %r11 | |
3925 | jbe LABEL(strcmp_exitz) | |
3926 | # endif | |
3927 | @@ -786,6 +935,7 @@ LABEL(gobble_ashr_5): | |
3928 | # else | |
3929 | palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3930 | # endif | |
3931 | + TOLOWER (%xmm1, %xmm2) | |
3932 | ||
3933 | pcmpeqb %xmm1, %xmm0 | |
3934 | pcmpeqb %xmm2, %xmm1 | |
3935 | @@ -794,7 +944,7 @@ LABEL(gobble_ashr_5): | |
3936 | sub $0xffff, %edx | |
3937 | jnz LABEL(exit) | |
3938 | ||
3939 | -# ifdef USE_AS_STRNCMP | |
3940 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3941 | sub $16, %r11 | |
3942 | jbe LABEL(strcmp_exitz) | |
3943 | # endif | |
3944 | @@ -810,8 +960,8 @@ LABEL(nibble_ashr_5): | |
3945 | test $0xffe0, %edx | |
3946 | jnz LABEL(ashr_5_exittail) | |
3947 | ||
3948 | -# ifdef USE_AS_STRNCMP | |
3949 | - cmp $10, %r11 | |
3950 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3951 | + cmp $11, %r11 | |
3952 | jbe LABEL(ashr_5_exittail) | |
3953 | # endif | |
3954 | ||
3955 | @@ -838,6 +988,7 @@ LABEL(ashr_6): | |
3956 | movdqa (%rsi), %xmm1 | |
3957 | pcmpeqb %xmm1, %xmm0 | |
3958 | pslldq $10, %xmm2 | |
3959 | + TOLOWER (%xmm1, %xmm2) | |
3960 | pcmpeqb %xmm1, %xmm2 | |
3961 | psubb %xmm0, %xmm2 | |
3962 | pmovmskb %xmm2, %r9d | |
3963 | @@ -878,6 +1029,7 @@ LABEL(gobble_ashr_6): | |
3964 | # else | |
3965 | palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3966 | # endif | |
3967 | + TOLOWER (%xmm1, %xmm2) | |
3968 | ||
3969 | pcmpeqb %xmm1, %xmm0 | |
3970 | pcmpeqb %xmm2, %xmm1 | |
3971 | @@ -886,7 +1038,7 @@ LABEL(gobble_ashr_6): | |
3972 | sub $0xffff, %edx | |
3973 | jnz LABEL(exit) | |
3974 | ||
3975 | -# ifdef USE_AS_STRNCMP | |
3976 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3977 | sub $16, %r11 | |
3978 | jbe LABEL(strcmp_exitz) | |
3979 | # endif | |
3980 | @@ -908,6 +1060,7 @@ LABEL(gobble_ashr_6): | |
3981 | # else | |
3982 | palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */ | |
3983 | # endif | |
3984 | + TOLOWER (%xmm1, %xmm2) | |
3985 | ||
3986 | pcmpeqb %xmm1, %xmm0 | |
3987 | pcmpeqb %xmm2, %xmm1 | |
3988 | @@ -916,7 +1069,7 @@ LABEL(gobble_ashr_6): | |
3989 | sub $0xffff, %edx | |
3990 | jnz LABEL(exit) | |
3991 | ||
3992 | -# ifdef USE_AS_STRNCMP | |
3993 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
3994 | sub $16, %r11 | |
3995 | jbe LABEL(strcmp_exitz) | |
3996 | # endif | |
3997 | @@ -932,8 +1085,8 @@ LABEL(nibble_ashr_6): | |
3998 | test $0xffc0, %edx | |
3999 | jnz LABEL(ashr_6_exittail) | |
4000 | ||
4001 | -# ifdef USE_AS_STRNCMP | |
4002 | - cmp $9, %r11 | |
4003 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4004 | + cmp $10, %r11 | |
4005 | jbe LABEL(ashr_6_exittail) | |
4006 | # endif | |
4007 | ||
4008 | @@ -960,6 +1113,7 @@ LABEL(ashr_7): | |
4009 | movdqa (%rsi), %xmm1 | |
4010 | pcmpeqb %xmm1, %xmm0 | |
4011 | pslldq $9, %xmm2 | |
4012 | + TOLOWER (%xmm1, %xmm2) | |
4013 | pcmpeqb %xmm1, %xmm2 | |
4014 | psubb %xmm0, %xmm2 | |
4015 | pmovmskb %xmm2, %r9d | |
4016 | @@ -1000,6 +1154,7 @@ LABEL(gobble_ashr_7): | |
4017 | # else | |
4018 | palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4019 | # endif | |
4020 | + TOLOWER (%xmm1, %xmm2) | |
4021 | ||
4022 | pcmpeqb %xmm1, %xmm0 | |
4023 | pcmpeqb %xmm2, %xmm1 | |
4024 | @@ -1008,7 +1163,7 @@ LABEL(gobble_ashr_7): | |
4025 | sub $0xffff, %edx | |
4026 | jnz LABEL(exit) | |
4027 | ||
4028 | -# ifdef USE_AS_STRNCMP | |
4029 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4030 | sub $16, %r11 | |
4031 | jbe LABEL(strcmp_exitz) | |
4032 | # endif | |
4033 | @@ -1030,6 +1185,7 @@ LABEL(gobble_ashr_7): | |
4034 | # else | |
4035 | palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4036 | # endif | |
4037 | + TOLOWER (%xmm1, %xmm2) | |
4038 | ||
4039 | pcmpeqb %xmm1, %xmm0 | |
4040 | pcmpeqb %xmm2, %xmm1 | |
4041 | @@ -1038,7 +1194,7 @@ LABEL(gobble_ashr_7): | |
4042 | sub $0xffff, %edx | |
4043 | jnz LABEL(exit) | |
4044 | ||
4045 | -# ifdef USE_AS_STRNCMP | |
4046 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4047 | sub $16, %r11 | |
4048 | jbe LABEL(strcmp_exitz) | |
4049 | # endif | |
4050 | @@ -1054,8 +1210,8 @@ LABEL(nibble_ashr_7): | |
4051 | test $0xff80, %edx | |
4052 | jnz LABEL(ashr_7_exittail) | |
4053 | ||
4054 | -# ifdef USE_AS_STRNCMP | |
4055 | - cmp $8, %r11 | |
4056 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4057 | + cmp $9, %r11 | |
4058 | jbe LABEL(ashr_7_exittail) | |
4059 | # endif | |
4060 | ||
4061 | @@ -1082,6 +1238,7 @@ LABEL(ashr_8): | |
4062 | movdqa (%rsi), %xmm1 | |
4063 | pcmpeqb %xmm1, %xmm0 | |
4064 | pslldq $8, %xmm2 | |
4065 | + TOLOWER (%xmm1, %xmm2) | |
4066 | pcmpeqb %xmm1, %xmm2 | |
4067 | psubb %xmm0, %xmm2 | |
4068 | pmovmskb %xmm2, %r9d | |
4069 | @@ -1122,6 +1279,7 @@ LABEL(gobble_ashr_8): | |
4070 | # else | |
4071 | palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4072 | # endif | |
4073 | + TOLOWER (%xmm1, %xmm2) | |
4074 | ||
4075 | pcmpeqb %xmm1, %xmm0 | |
4076 | pcmpeqb %xmm2, %xmm1 | |
4077 | @@ -1130,7 +1288,7 @@ LABEL(gobble_ashr_8): | |
4078 | sub $0xffff, %edx | |
4079 | jnz LABEL(exit) | |
4080 | ||
4081 | -# ifdef USE_AS_STRNCMP | |
4082 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4083 | sub $16, %r11 | |
4084 | jbe LABEL(strcmp_exitz) | |
4085 | # endif | |
4086 | @@ -1152,6 +1310,7 @@ LABEL(gobble_ashr_8): | |
4087 | # else | |
4088 | palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4089 | # endif | |
4090 | + TOLOWER (%xmm1, %xmm2) | |
4091 | ||
4092 | pcmpeqb %xmm1, %xmm0 | |
4093 | pcmpeqb %xmm2, %xmm1 | |
4094 | @@ -1160,7 +1319,7 @@ LABEL(gobble_ashr_8): | |
4095 | sub $0xffff, %edx | |
4096 | jnz LABEL(exit) | |
4097 | ||
4098 | -# ifdef USE_AS_STRNCMP | |
4099 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4100 | sub $16, %r11 | |
4101 | jbe LABEL(strcmp_exitz) | |
4102 | # endif | |
4103 | @@ -1176,8 +1335,8 @@ LABEL(nibble_ashr_8): | |
4104 | test $0xff00, %edx | |
4105 | jnz LABEL(ashr_8_exittail) | |
4106 | ||
4107 | -# ifdef USE_AS_STRNCMP | |
4108 | - cmp $7, %r11 | |
4109 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4110 | + cmp $8, %r11 | |
4111 | jbe LABEL(ashr_8_exittail) | |
4112 | # endif | |
4113 | ||
4114 | @@ -1204,6 +1363,7 @@ LABEL(ashr_9): | |
4115 | movdqa (%rsi), %xmm1 | |
4116 | pcmpeqb %xmm1, %xmm0 | |
4117 | pslldq $7, %xmm2 | |
4118 | + TOLOWER (%xmm1, %xmm2) | |
4119 | pcmpeqb %xmm1, %xmm2 | |
4120 | psubb %xmm0, %xmm2 | |
4121 | pmovmskb %xmm2, %r9d | |
4122 | @@ -1244,6 +1404,7 @@ LABEL(gobble_ashr_9): | |
4123 | # else | |
4124 | palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4125 | # endif | |
4126 | + TOLOWER (%xmm1, %xmm2) | |
4127 | ||
4128 | pcmpeqb %xmm1, %xmm0 | |
4129 | pcmpeqb %xmm2, %xmm1 | |
4130 | @@ -1252,7 +1413,7 @@ LABEL(gobble_ashr_9): | |
4131 | sub $0xffff, %edx | |
4132 | jnz LABEL(exit) | |
4133 | ||
4134 | -# ifdef USE_AS_STRNCMP | |
4135 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4136 | sub $16, %r11 | |
4137 | jbe LABEL(strcmp_exitz) | |
4138 | # endif | |
4139 | @@ -1274,6 +1435,7 @@ LABEL(gobble_ashr_9): | |
4140 | # else | |
4141 | palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4142 | # endif | |
4143 | + TOLOWER (%xmm1, %xmm2) | |
4144 | ||
4145 | pcmpeqb %xmm1, %xmm0 | |
4146 | pcmpeqb %xmm2, %xmm1 | |
4147 | @@ -1282,7 +1444,7 @@ LABEL(gobble_ashr_9): | |
4148 | sub $0xffff, %edx | |
4149 | jnz LABEL(exit) | |
4150 | ||
4151 | -# ifdef USE_AS_STRNCMP | |
4152 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4153 | sub $16, %r11 | |
4154 | jbe LABEL(strcmp_exitz) | |
4155 | # endif | |
4156 | @@ -1298,8 +1460,8 @@ LABEL(nibble_ashr_9): | |
4157 | test $0xfe00, %edx | |
4158 | jnz LABEL(ashr_9_exittail) | |
4159 | ||
4160 | -# ifdef USE_AS_STRNCMP | |
4161 | - cmp $6, %r11 | |
4162 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4163 | + cmp $7, %r11 | |
4164 | jbe LABEL(ashr_9_exittail) | |
4165 | # endif | |
4166 | ||
4167 | @@ -1326,6 +1488,7 @@ LABEL(ashr_10): | |
4168 | movdqa (%rsi), %xmm1 | |
4169 | pcmpeqb %xmm1, %xmm0 | |
4170 | pslldq $6, %xmm2 | |
4171 | + TOLOWER (%xmm1, %xmm2) | |
4172 | pcmpeqb %xmm1, %xmm2 | |
4173 | psubb %xmm0, %xmm2 | |
4174 | pmovmskb %xmm2, %r9d | |
4175 | @@ -1366,6 +1529,7 @@ LABEL(gobble_ashr_10): | |
4176 | # else | |
4177 | palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4178 | # endif | |
4179 | + TOLOWER (%xmm1, %xmm2) | |
4180 | ||
4181 | pcmpeqb %xmm1, %xmm0 | |
4182 | pcmpeqb %xmm2, %xmm1 | |
4183 | @@ -1374,7 +1538,7 @@ LABEL(gobble_ashr_10): | |
4184 | sub $0xffff, %edx | |
4185 | jnz LABEL(exit) | |
4186 | ||
4187 | -# ifdef USE_AS_STRNCMP | |
4188 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4189 | sub $16, %r11 | |
4190 | jbe LABEL(strcmp_exitz) | |
4191 | # endif | |
4192 | @@ -1396,6 +1560,7 @@ LABEL(gobble_ashr_10): | |
4193 | # else | |
4194 | palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4195 | # endif | |
4196 | + TOLOWER (%xmm1, %xmm2) | |
4197 | ||
4198 | pcmpeqb %xmm1, %xmm0 | |
4199 | pcmpeqb %xmm2, %xmm1 | |
4200 | @@ -1404,7 +1569,7 @@ LABEL(gobble_ashr_10): | |
4201 | sub $0xffff, %edx | |
4202 | jnz LABEL(exit) | |
4203 | ||
4204 | -# ifdef USE_AS_STRNCMP | |
4205 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4206 | sub $16, %r11 | |
4207 | jbe LABEL(strcmp_exitz) | |
4208 | # endif | |
4209 | @@ -1420,8 +1585,8 @@ LABEL(nibble_ashr_10): | |
4210 | test $0xfc00, %edx | |
4211 | jnz LABEL(ashr_10_exittail) | |
4212 | ||
4213 | -# ifdef USE_AS_STRNCMP | |
4214 | - cmp $5, %r11 | |
4215 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4216 | + cmp $6, %r11 | |
4217 | jbe LABEL(ashr_10_exittail) | |
4218 | # endif | |
4219 | ||
4220 | @@ -1448,6 +1613,7 @@ LABEL(ashr_11): | |
4221 | movdqa (%rsi), %xmm1 | |
4222 | pcmpeqb %xmm1, %xmm0 | |
4223 | pslldq $5, %xmm2 | |
4224 | + TOLOWER (%xmm1, %xmm2) | |
4225 | pcmpeqb %xmm1, %xmm2 | |
4226 | psubb %xmm0, %xmm2 | |
4227 | pmovmskb %xmm2, %r9d | |
4228 | @@ -1488,6 +1654,7 @@ LABEL(gobble_ashr_11): | |
4229 | # else | |
4230 | palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4231 | # endif | |
4232 | + TOLOWER (%xmm1, %xmm2) | |
4233 | ||
4234 | pcmpeqb %xmm1, %xmm0 | |
4235 | pcmpeqb %xmm2, %xmm1 | |
4236 | @@ -1496,7 +1663,7 @@ LABEL(gobble_ashr_11): | |
4237 | sub $0xffff, %edx | |
4238 | jnz LABEL(exit) | |
4239 | ||
4240 | -# ifdef USE_AS_STRNCMP | |
4241 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4242 | sub $16, %r11 | |
4243 | jbe LABEL(strcmp_exitz) | |
4244 | # endif | |
4245 | @@ -1518,6 +1685,7 @@ LABEL(gobble_ashr_11): | |
4246 | # else | |
4247 | palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4248 | # endif | |
4249 | + TOLOWER (%xmm1, %xmm2) | |
4250 | ||
4251 | pcmpeqb %xmm1, %xmm0 | |
4252 | pcmpeqb %xmm2, %xmm1 | |
4253 | @@ -1526,7 +1694,7 @@ LABEL(gobble_ashr_11): | |
4254 | sub $0xffff, %edx | |
4255 | jnz LABEL(exit) | |
4256 | ||
4257 | -# ifdef USE_AS_STRNCMP | |
4258 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4259 | sub $16, %r11 | |
4260 | jbe LABEL(strcmp_exitz) | |
4261 | # endif | |
4262 | @@ -1542,8 +1710,8 @@ LABEL(nibble_ashr_11): | |
4263 | test $0xf800, %edx | |
4264 | jnz LABEL(ashr_11_exittail) | |
4265 | ||
4266 | -# ifdef USE_AS_STRNCMP | |
4267 | - cmp $4, %r11 | |
4268 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4269 | + cmp $5, %r11 | |
4270 | jbe LABEL(ashr_11_exittail) | |
4271 | # endif | |
4272 | ||
4273 | @@ -1570,6 +1738,7 @@ LABEL(ashr_12): | |
4274 | movdqa (%rsi), %xmm1 | |
4275 | pcmpeqb %xmm1, %xmm0 | |
4276 | pslldq $4, %xmm2 | |
4277 | + TOLOWER (%xmm1, %xmm2) | |
4278 | pcmpeqb %xmm1, %xmm2 | |
4279 | psubb %xmm0, %xmm2 | |
4280 | pmovmskb %xmm2, %r9d | |
4281 | @@ -1610,6 +1779,7 @@ LABEL(gobble_ashr_12): | |
4282 | # else | |
4283 | palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4284 | # endif | |
4285 | + TOLOWER (%xmm1, %xmm2) | |
4286 | ||
4287 | pcmpeqb %xmm1, %xmm0 | |
4288 | pcmpeqb %xmm2, %xmm1 | |
4289 | @@ -1618,7 +1788,7 @@ LABEL(gobble_ashr_12): | |
4290 | sub $0xffff, %edx | |
4291 | jnz LABEL(exit) | |
4292 | ||
4293 | -# ifdef USE_AS_STRNCMP | |
4294 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4295 | sub $16, %r11 | |
4296 | jbe LABEL(strcmp_exitz) | |
4297 | # endif | |
4298 | @@ -1640,6 +1810,7 @@ LABEL(gobble_ashr_12): | |
4299 | # else | |
4300 | palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4301 | # endif | |
4302 | + TOLOWER (%xmm1, %xmm2) | |
4303 | ||
4304 | pcmpeqb %xmm1, %xmm0 | |
4305 | pcmpeqb %xmm2, %xmm1 | |
4306 | @@ -1648,7 +1819,7 @@ LABEL(gobble_ashr_12): | |
4307 | sub $0xffff, %edx | |
4308 | jnz LABEL(exit) | |
4309 | ||
4310 | -# ifdef USE_AS_STRNCMP | |
4311 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4312 | sub $16, %r11 | |
4313 | jbe LABEL(strcmp_exitz) | |
4314 | # endif | |
4315 | @@ -1664,8 +1835,8 @@ LABEL(nibble_ashr_12): | |
4316 | test $0xf000, %edx | |
4317 | jnz LABEL(ashr_12_exittail) | |
4318 | ||
4319 | -# ifdef USE_AS_STRNCMP | |
4320 | - cmp $3, %r11 | |
4321 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4322 | + cmp $4, %r11 | |
4323 | jbe LABEL(ashr_12_exittail) | |
4324 | # endif | |
4325 | ||
4326 | @@ -1692,6 +1863,7 @@ LABEL(ashr_13): | |
4327 | movdqa (%rsi), %xmm1 | |
4328 | pcmpeqb %xmm1, %xmm0 | |
4329 | pslldq $3, %xmm2 | |
4330 | + TOLOWER (%xmm1, %xmm2) | |
4331 | pcmpeqb %xmm1, %xmm2 | |
4332 | psubb %xmm0, %xmm2 | |
4333 | pmovmskb %xmm2, %r9d | |
4334 | @@ -1732,6 +1904,7 @@ LABEL(gobble_ashr_13): | |
4335 | # else | |
4336 | palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4337 | # endif | |
4338 | + TOLOWER (%xmm1, %xmm2) | |
4339 | ||
4340 | pcmpeqb %xmm1, %xmm0 | |
4341 | pcmpeqb %xmm2, %xmm1 | |
4342 | @@ -1740,7 +1913,7 @@ LABEL(gobble_ashr_13): | |
4343 | sub $0xffff, %edx | |
4344 | jnz LABEL(exit) | |
4345 | ||
4346 | -# ifdef USE_AS_STRNCMP | |
4347 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4348 | sub $16, %r11 | |
4349 | jbe LABEL(strcmp_exitz) | |
4350 | # endif | |
4351 | @@ -1762,6 +1935,7 @@ LABEL(gobble_ashr_13): | |
4352 | # else | |
4353 | palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4354 | # endif | |
4355 | + TOLOWER (%xmm1, %xmm2) | |
4356 | ||
4357 | pcmpeqb %xmm1, %xmm0 | |
4358 | pcmpeqb %xmm2, %xmm1 | |
4359 | @@ -1770,7 +1944,7 @@ LABEL(gobble_ashr_13): | |
4360 | sub $0xffff, %edx | |
4361 | jnz LABEL(exit) | |
4362 | ||
4363 | -# ifdef USE_AS_STRNCMP | |
4364 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4365 | sub $16, %r11 | |
4366 | jbe LABEL(strcmp_exitz) | |
4367 | # endif | |
4368 | @@ -1786,8 +1960,8 @@ LABEL(nibble_ashr_13): | |
4369 | test $0xe000, %edx | |
4370 | jnz LABEL(ashr_13_exittail) | |
4371 | ||
4372 | -# ifdef USE_AS_STRNCMP | |
4373 | - cmp $2, %r11 | |
4374 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4375 | + cmp $3, %r11 | |
4376 | jbe LABEL(ashr_13_exittail) | |
4377 | # endif | |
4378 | ||
4379 | @@ -1814,6 +1988,7 @@ LABEL(ashr_14): | |
4380 | movdqa (%rsi), %xmm1 | |
4381 | pcmpeqb %xmm1, %xmm0 | |
4382 | pslldq $2, %xmm2 | |
4383 | + TOLOWER (%xmm1, %xmm2) | |
4384 | pcmpeqb %xmm1, %xmm2 | |
4385 | psubb %xmm0, %xmm2 | |
4386 | pmovmskb %xmm2, %r9d | |
4387 | @@ -1854,6 +2029,7 @@ LABEL(gobble_ashr_14): | |
4388 | # else | |
4389 | palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4390 | # endif | |
4391 | + TOLOWER (%xmm1, %xmm2) | |
4392 | ||
4393 | pcmpeqb %xmm1, %xmm0 | |
4394 | pcmpeqb %xmm2, %xmm1 | |
4395 | @@ -1862,7 +2038,7 @@ LABEL(gobble_ashr_14): | |
4396 | sub $0xffff, %edx | |
4397 | jnz LABEL(exit) | |
4398 | ||
4399 | -# ifdef USE_AS_STRNCMP | |
4400 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4401 | sub $16, %r11 | |
4402 | jbe LABEL(strcmp_exitz) | |
4403 | # endif | |
4404 | @@ -1884,6 +2060,7 @@ LABEL(gobble_ashr_14): | |
4405 | # else | |
4406 | palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4407 | # endif | |
4408 | + TOLOWER (%xmm1, %xmm2) | |
4409 | ||
4410 | pcmpeqb %xmm1, %xmm0 | |
4411 | pcmpeqb %xmm2, %xmm1 | |
4412 | @@ -1892,7 +2069,7 @@ LABEL(gobble_ashr_14): | |
4413 | sub $0xffff, %edx | |
4414 | jnz LABEL(exit) | |
4415 | ||
4416 | -# ifdef USE_AS_STRNCMP | |
4417 | +# if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L | |
4418 | sub $16, %r11 | |
4419 | jbe LABEL(strcmp_exitz) | |
4420 | # endif | |
4421 | @@ -1908,8 +2085,8 @@ LABEL(nibble_ashr_14): | |
4422 | test $0xc000, %edx | |
4423 | jnz LABEL(ashr_14_exittail) | |
4424 | ||
4425 | -# ifdef USE_AS_STRNCMP | |
4426 | - cmp $1, %r11 | |
4427 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4428 | + cmp $2, %r11 | |
4429 | jbe LABEL(ashr_14_exittail) | |
4430 | # endif | |
4431 | ||
4432 | @@ -1936,6 +2113,7 @@ LABEL(ashr_15): | |
4433 | movdqa (%rsi), %xmm1 | |
4434 | pcmpeqb %xmm1, %xmm0 | |
4435 | pslldq $1, %xmm2 | |
4436 | + TOLOWER (%xmm1, %xmm2) | |
4437 | pcmpeqb %xmm1, %xmm2 | |
4438 | psubb %xmm0, %xmm2 | |
4439 | pmovmskb %xmm2, %r9d | |
4440 | @@ -1978,6 +2156,7 @@ LABEL(gobble_ashr_15): | |
4441 | # else | |
4442 | palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4443 | # endif | |
4444 | + TOLOWER (%xmm1, %xmm2) | |
4445 | ||
4446 | pcmpeqb %xmm1, %xmm0 | |
4447 | pcmpeqb %xmm2, %xmm1 | |
4448 | @@ -1986,7 +2165,7 @@ LABEL(gobble_ashr_15): | |
4449 | sub $0xffff, %edx | |
4450 | jnz LABEL(exit) | |
4451 | ||
4452 | -# ifdef USE_AS_STRNCMP | |
4453 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4454 | sub $16, %r11 | |
4455 | jbe LABEL(strcmp_exitz) | |
4456 | # endif | |
4457 | @@ -2008,6 +2187,7 @@ LABEL(gobble_ashr_15): | |
4458 | # else | |
4459 | palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */ | |
4460 | # endif | |
4461 | + TOLOWER (%xmm1, %xmm2) | |
4462 | ||
4463 | pcmpeqb %xmm1, %xmm0 | |
4464 | pcmpeqb %xmm2, %xmm1 | |
4465 | @@ -2016,7 +2196,7 @@ LABEL(gobble_ashr_15): | |
4466 | sub $0xffff, %edx | |
4467 | jnz LABEL(exit) | |
4468 | ||
4469 | -# ifdef USE_AS_STRNCMP | |
4470 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4471 | sub $16, %r11 | |
4472 | jbe LABEL(strcmp_exitz) | |
4473 | # endif | |
4474 | @@ -2032,9 +2212,9 @@ LABEL(nibble_ashr_15): | |
4475 | test $0x8000, %edx | |
4476 | jnz LABEL(ashr_15_exittail) | |
4477 | ||
4478 | -# ifdef USE_AS_STRNCMP | |
4479 | - test %r11, %r11 | |
4480 | - je LABEL(ashr_15_exittail) | |
4481 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4482 | + cmpq $1, %r11 | |
4483 | + jbe LABEL(ashr_15_exittail) | |
4484 | # endif | |
4485 | ||
4486 | pxor %xmm0, %xmm0 | |
4487 | @@ -2049,6 +2229,7 @@ LABEL(ashr_15_exittail): | |
4488 | ||
4489 | .p2align 4 | |
4490 | LABEL(aftertail): | |
4491 | + TOLOWER (%xmm1, %xmm3) | |
4492 | pcmpeqb %xmm3, %xmm1 | |
4493 | psubb %xmm0, %xmm1 | |
4494 | pmovmskb %xmm1, %edx | |
4495 | @@ -2069,13 +2250,19 @@ LABEL(ret): | |
4496 | LABEL(less16bytes): | |
4497 | bsf %rdx, %rdx /* find and store bit index in %rdx */ | |
4498 | ||
4499 | -# ifdef USE_AS_STRNCMP | |
4500 | +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L | |
4501 | sub %rdx, %r11 | |
4502 | jbe LABEL(strcmp_exitz) | |
4503 | # endif | |
4504 | movzbl (%rsi, %rdx), %ecx | |
4505 | movzbl (%rdi, %rdx), %eax | |
4506 | ||
4507 | +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
4508 | + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx | |
4509 | + movl (%rdx,%rcx,4), %ecx | |
4510 | + movl (%rdx,%rax,4), %eax | |
4511 | +# endif | |
4512 | + | |
4513 | sub %ecx, %eax | |
4514 | ret | |
4515 | ||
4516 | @@ -2088,6 +2275,12 @@ LABEL(Byte0): | |
4517 | movzx (%rsi), %ecx | |
4518 | movzx (%rdi), %eax | |
4519 | ||
4520 | +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L | |
4521 | + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx | |
4522 | + movl (%rdx,%rcx,4), %ecx | |
4523 | + movl (%rdx,%rax,4), %eax | |
4524 | +# endif | |
4525 | + | |
4526 | sub %ecx, %eax | |
4527 | ret | |
4528 | END (BP_SYM (STRCMP)) | |
4529 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase.S | |
4530 | =================================================================== | |
4531 | --- /dev/null | |
4532 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase.S | |
4533 | @@ -0,0 +1 @@ | |
4534 | +/* In strncase_l.S. */ | |
4535 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase_l-nonascii.c | |
4536 | =================================================================== | |
4537 | --- /dev/null | |
4538 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase_l-nonascii.c | |
4539 | @@ -0,0 +1,8 @@ | |
4540 | +#include <string.h> | |
4541 | + | |
4542 | +extern int __strncasecmp_l_nonascii (__const char *__s1, __const char *__s2, | |
4543 | + size_t __n, __locale_t __loc); | |
4544 | + | |
4545 | +#define __strncasecmp_l __strncasecmp_l_nonascii | |
4546 | +#define USE_IN_EXTENDED_LOCALE_MODEL 1 | |
4547 | +#include <string/strncase.c> | |
4548 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase_l.S | |
4549 | =================================================================== | |
4550 | --- /dev/null | |
4551 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase_l.S | |
4552 | @@ -0,0 +1,6 @@ | |
4553 | +#define STRCMP __strncasecmp_l | |
4554 | +#define USE_AS_STRNCASECMP_L | |
4555 | +#include "strcmp.S" | |
4556 | + | |
4557 | +weak_alias (__strncasecmp_l, strncasecmp_l) | |
4558 | +libc_hidden_def (strncasecmp_l) | |
4559 | Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strnlen.S | |
4560 | =================================================================== | |
4561 | --- /dev/null | |
4562 | +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strnlen.S | |
4563 | @@ -0,0 +1,64 @@ | |
4564 | +/* strnlen(str,maxlen) -- determine the length of the string STR up to MAXLEN. | |
4565 | + Copyright (C) 2010 Free Software Foundation, Inc. | |
4566 | + Contributed by Ulrich Drepper <drepper@redhat.com>. | |
4567 | + This file is part of the GNU C Library. | |
4568 | + | |
4569 | + The GNU C Library is free software; you can redistribute it and/or | |
4570 | + modify it under the terms of the GNU Lesser General Public | |
4571 | + License as published by the Free Software Foundation; either | |
4572 | + version 2.1 of the License, or (at your option) any later version. | |
4573 | + | |
4574 | + The GNU C Library is distributed in the hope that it will be useful, | |
4575 | + but WITHOUT ANY WARRANTY; without even the implied warranty of | |
4576 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
4577 | + Lesser General Public License for more details. | |
4578 | + | |
4579 | + You should have received a copy of the GNU Lesser General Public | |
4580 | + License along with the GNU C Library; if not, write to the Free | |
4581 | + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
4582 | + 02111-1307 USA. */ | |
4583 | + | |
4584 | +#include <sysdep.h> | |
4585 | + | |
4586 | + | |
4587 | + .text | |
4588 | +ENTRY(__strnlen) | |
4589 | + movq %rsi, %rax | |
4590 | + testq %rsi, %rsi | |
4591 | + jz 3f | |
4592 | + pxor %xmm2, %xmm2 | |
4593 | + movq %rdi, %rcx | |
4594 | + movq %rdi, %r8 | |
4595 | + movq $16, %r9 | |
4596 | + andq $~15, %rdi | |
4597 | + movdqa %xmm2, %xmm1 | |
4598 | + pcmpeqb (%rdi), %xmm2 | |
4599 | + orl $0xffffffff, %r10d | |
4600 | + subq %rdi, %rcx | |
4601 | + shll %cl, %r10d | |
4602 | + subq %rcx, %r9 | |
4603 | + pmovmskb %xmm2, %edx | |
4604 | + andl %r10d, %edx | |
4605 | + jnz 1f | |
4606 | + subq %r9, %rsi | |
4607 | + jbe 3f | |
4608 | + | |
4609 | +2: movdqa 16(%rdi), %xmm0 | |
4610 | + leaq 16(%rdi), %rdi | |
4611 | + pcmpeqb %xmm1, %xmm0 | |
4612 | + pmovmskb %xmm0, %edx | |
4613 | + testl %edx, %edx | |
4614 | + jnz 1f | |
4615 | + subq $16, %rsi | |
4616 | + jnbe 2b | |
4617 | +3: ret | |
4618 | + | |
4619 | +1: subq %r8, %rdi | |
4620 | + bsfl %edx, %edx | |
4621 | + addq %rdi, %rdx | |
4622 | + cmpq %rdx, %rax | |
4623 | + cmovnbq %rdx, %rax | |
4624 | + ret | |
4625 | +END(__strnlen) | |
4626 | +weak_alias (__strnlen, strnlen) | |
4627 | +libc_hidden_def (strnlen) | |
4628 | Index: glibc-2.12-2-gc4ccff1/wcsmbs/wcsatcliff.c | |
4629 | =================================================================== | |
4630 | --- glibc-2.12-2-gc4ccff1.orig/wcsmbs/wcsatcliff.c | |
4631 | +++ glibc-2.12-2-gc4ccff1/wcsmbs/wcsatcliff.c | |
4632 | @@ -16,6 +16,8 @@ | |
4633 | #define MEMCPY wmemcpy | |
4634 | #define MEMPCPY wmempcpy | |
4635 | #define MEMCHR wmemchr | |
4636 | +#define STRCMP wcscmp | |
4637 | +#define STRNCMP wcsncmp | |
4638 | ||
4639 | ||
4640 | #include "../string/stratcliff.c" |