]> git.ipfire.org Git - ipfire-2.x.git/blob - src/patches/glibc/glibc-rh601686.patch
Merge branch 'iptables-upnpfw' into core67-merge
[ipfire-2.x.git] / src / patches / glibc / glibc-rh601686.patch
1 2010-11-09 H.J. Lu <hongjiu.lu@intel.com>
2
3 [BZ #12205]
4 * string/test-strncasecmp.c (check_result): New function.
5 (do_one_test): Use it.
6 (check1): New function.
7 (test_main): Use it.
8 * sysdeps/i386/i686/multiarch/strcmp.S (nibble_ashr_use_sse4_2_exit):
9 Support strcasecmp and strncasecmp.
10
11 2010-10-03 Ulrich Drepper <drepper@gmail.com>
12
13 [BZ #12077]
14 * sysdeps/x86_64/strcmp.S: Fix handling of remaining bytes in buffer
15 for strncmp and strncasecmp.
16 * string/stratcliff.c: Add tests for strcmp and strncmp.
17 * wcsmbs/wcsatcliff.c: Adjust for stratcliff change.
18
19 2010-09-20 Ulrich Drepper <drepper@redhat.com>
20
21 * sysdeps/x86_64/strcmp.S: Fix another type in strncasecmp limit
22 detection.
23
24 2010-08-19 Ulrich Drepper <drepper@redhat.com>
25
26 * sysdeps/x86_64/multiarch/strcmp.S: Fix two typos in strncasecmp
27 handling.
28
29 2010-08-15 Ulrich Drepper <drepper@redhat.com>
30
31 * sysdeps/x86_64/strcmp.S: Use correct register for fourth parameter
32 of strncasecmp_l.
33 * sysdeps/multiarch/strcmp.S: Likewise.
34
35 2010-08-14 Ulrich Drepper <drepper@redhat.com>
36
37 * sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add
38 strncase_l-nonascii.
39 * sysdeps/x86_64/multiarch/Makefile [subdir=string] (sysdep_routines):
40 Add strncase_l-ssse3.
41 * sysdeps/x86_64/multiarch/strcmp.S: Prepare for use as strncasecmp.
42 * sysdeps/x86_64/strcmp.S: Likewise.
43 * sysdeps/x86_64/multiarch/strncase_l-ssse3.S: New file.
44 * sysdeps/x86_64/multiarch/strncase_l.S: New file.
45 * sysdeps/x86_64/strncase.S: New file.
46 * sysdeps/x86_64/strncase_l-nonascii.c: New file.
47 * sysdeps/x86_64/strncase_l.S: New file.
48 * string/Makefile (strop-tests): Add strncasecmp.
49 * string/test-strncasecmp.c: New file.
50
51 * sysdeps/x86_64/strcasecmp_l-nonascii.c: Add prototype to avoid
52 warning.
53
54 * sysdeps/x86_64/strcmp.S: Move definition of NO_NOLOCALE_ALIAS to...
55 * sysdeps/x86_64/multiarch/strcasecmp_l-ssse3.S: ... here.
56
57 2010-07-31 Ulrich Drepper <drepper@redhat.com>
58
59 * sysdeps/x86_64/multiarch/Makefile [subdir=string] (sysdep_routines):
60 Add strcasecmp_l-ssse3.
61 * sysdeps/x86_64/multiarch/strcmp.S: Add support to compile for
62 strcasecmp.
63 * sysdeps/x86_64/strcmp.S: Allow more flexible compiling of strcasecmp.
64 * sysdeps/x86_64/multiarch/strcasecmp_l.S: New file.
65 * sysdeps/x86_64/multiarch/strcasecmp_l-ssse3.S: New file.
66
67 2010-07-30 Ulrich Drepper <drepper@redhat.com>
68
69 * sysdeps/x86_64/multiarch/strcmp.S: Pretty printing.
70
71 * string/Makefile (strop-tests): Add strcasecmp.
72 * sysdeps/x86_64/Makefile [subdir=string] (sysdep_routines): Add
73 strcasecmp_l-nonascii.
74 (gen-as-const-headers): Add locale-defines.sym.
75 * sysdeps/x86_64/strcmp.S: Add support for strcasecmp implementation.
76 * sysdeps/x86_64/strcasecmp.S: New file.
77 * sysdeps/x86_64/strcasecmp_l.S: New file.
78 * sysdeps/x86_64/strcasecmp_l-nonascii.c: New file.
79 * sysdeps/x86_64/locale-defines.sym: New file.
80 * string/test-strcasecmp.c: New file.
81
82 * string/test-strcasestr.c: Test both ends of the range of characters.
83 * sysdeps/x86_64/multiarch/strstr.c: Fix UCHIGH definition.
84
85 2010-07-26 Ulrich Drepper <drepper@redhat.com>
86
87 * string/test-strnlen.c: New file.
88 * string/Makefile (strop-tests): Add strnlen.
89 * string/tester.c (test_strnlen): Add a few more test cases.
90 * string/tst-strlen.c: Better error reporting.
91
92 * sysdeps/x86_64/strnlen.S: New file.
93
94 2010-07-24 Ulrich Drepper <drepper@redhat.com>
95
96 * sysdeps/x86_64/multiarch/strstr.c (__m128i_strloadu_tolower): Use
97 lower-latency instructions.
98
99 2010-07-23 Ulrich Drepper <drepper@redhat.com>
100
101 * string/test-strcasestr.c: New file.
102 * string/test-strstr.c: New file.
103 * string/Makefile (strop-tests): Add strstr and strcasestr.
104 * string/str-two-way.h: Don't undefine MAX.
105 * string/strcasestr.c: Don't define alias if NO_ALIAS is defined.
106
107 2010-07-21 Andreas Schwab <schwab@redhat.com>
108
109 * sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
110 strcasestr-nonascii.
111 (CFLAGS-strcasestr-nonascii.c): Define.
112 * sysdeps/i386/i686/multiarch/strcasestr-nonascii.c: New file.
113 * sysdeps/x86_64/multiarch/strcasestr-nonascii.c (STRSTR_SSE42):
114 Remove unused attribute.
115
116 2010-07-16 Ulrich Drepper <drepper@redhat.com>
117
118 * sysdeps/x86_64/multiarch/strstr.c: Rewrite to avoid indirect function
119 call in strcasestr.
120 * sysdeps/x86_64/multiarch/strcasestr.c: Declare
121 __strcasestr_sse42_nonascii.
122 * sysdeps/x86_64/multiarch/Makefile: Add rules to build
123 strcasestr-nonascii.c.
124 * sysdeps/x86_64/multiarch/strcasestr-nonascii.c: New file.
125
126 Index: glibc-2.12-2-gc4ccff1/string/Makefile
127 ===================================================================
128 --- glibc-2.12-2-gc4ccff1.orig/string/Makefile
129 +++ glibc-2.12-2-gc4ccff1/string/Makefile
130 @@ -48,7 +48,8 @@ o-objects.ob := memcpy.o memset.o memchr
131
132 strop-tests := memchr memcmp memcpy memmove mempcpy memset memccpy \
133 stpcpy stpncpy strcat strchr strcmp strcpy strcspn \
134 - strlen strncmp strncpy strpbrk strrchr strspn memmem
135 + strlen strncmp strncpy strpbrk strrchr strspn memmem \
136 + strstr strcasestr strnlen strcasecmp strncasecmp
137 tests := tester inl-tester noinl-tester testcopy test-ffs \
138 tst-strlen stratcliff tst-svc tst-inlcall \
139 bug-strncat1 bug-strspn1 bug-strpbrk1 tst-bswap \
140 Index: glibc-2.12-2-gc4ccff1/string/str-two-way.h
141 ===================================================================
142 --- glibc-2.12-2-gc4ccff1.orig/string/str-two-way.h
143 +++ glibc-2.12-2-gc4ccff1/string/str-two-way.h
144 @@ -426,5 +426,4 @@ two_way_long_needle (const unsigned char
145 #undef AVAILABLE
146 #undef CANON_ELEMENT
147 #undef CMP_FUNC
148 -#undef MAX
149 #undef RETURN_TYPE
150 Index: glibc-2.12-2-gc4ccff1/string/stratcliff.c
151 ===================================================================
152 --- glibc-2.12-2-gc4ccff1.orig/string/stratcliff.c
153 +++ glibc-2.12-2-gc4ccff1/string/stratcliff.c
154 @@ -47,6 +47,8 @@
155 # define MEMCPY memcpy
156 # define MEMPCPY mempcpy
157 # define MEMCHR memchr
158 +# define STRCMP strcmp
159 +# define STRNCMP strncmp
160 #endif
161
162
163 @@ -277,7 +279,74 @@ do_test (void)
164
165 adr[inner] = L('T');
166 }
167 - }
168 + }
169 +
170 + /* strcmp/wcscmp tests */
171 + for (outer = 1; outer < 32; ++outer)
172 + for (middle = 0; middle < 16; ++middle)
173 + {
174 + MEMSET (adr + middle, L('T'), 256);
175 + adr[256] = L('\0');
176 + MEMSET (dest + nchars - outer, L('T'), outer - 1);
177 + dest[nchars - 1] = L('\0');
178 +
179 + if (STRCMP (adr + middle, dest + nchars - outer) <= 0)
180 + {
181 + printf ("%s 1 flunked for outer = %d, middle = %d\n",
182 + STRINGIFY (STRCMP), outer, middle);
183 + result = 1;
184 + }
185 +
186 + if (STRCMP (dest + nchars - outer, adr + middle) >= 0)
187 + {
188 + printf ("%s 2 flunked for outer = %d, middle = %d\n",
189 + STRINGIFY (STRCMP), outer, middle);
190 + result = 1;
191 + }
192 + }
193 +
194 + /* strncmp/wcsncmp tests */
195 + for (outer = 1; outer < 32; ++outer)
196 + for (middle = 0; middle < 16; ++middle)
197 + {
198 + MEMSET (adr + middle, L('T'), 256);
199 + adr[256] = L('\0');
200 + MEMSET (dest + nchars - outer, L('T'), outer - 1);
201 + dest[nchars - 1] = L('U');
202 +
203 + for (inner = 0; inner < outer; ++inner)
204 + {
205 + if (STRNCMP (adr + middle, dest + nchars - outer, inner) != 0)
206 + {
207 + printf ("%s 1 flunked for outer = %d, middle = %d, "
208 + "inner = %d\n",
209 + STRINGIFY (STRNCMP), outer, middle, inner);
210 + result = 1;
211 + }
212 +
213 + if (STRNCMP (dest + nchars - outer, adr + middle, inner) != 0)
214 + {
215 + printf ("%s 2 flunked for outer = %d, middle = %d, "
216 + "inner = %d\n",
217 + STRINGIFY (STRNCMP), outer, middle, inner);
218 + result = 1;
219 + }
220 + }
221 +
222 + if (STRNCMP (adr + middle, dest + nchars - outer, outer) >= 0)
223 + {
224 + printf ("%s 1 flunked for outer = %d, middle = %d, full\n",
225 + STRINGIFY (STRNCMP), outer, middle);
226 + result = 1;
227 + }
228 +
229 + if (STRNCMP (dest + nchars - outer, adr + middle, outer) <= 0)
230 + {
231 + printf ("%s 2 flunked for outer = %d, middle = %d, full\n",
232 + STRINGIFY (STRNCMP), outer, middle);
233 + result = 1;
234 + }
235 + }
236
237 /* strncpy/wcsncpy tests */
238 adr[nchars - 1] = L('T');
239 Index: glibc-2.12-2-gc4ccff1/string/strcasestr.c
240 ===================================================================
241 --- glibc-2.12-2-gc4ccff1.orig/string/strcasestr.c
242 +++ glibc-2.12-2-gc4ccff1/string/strcasestr.c
243 @@ -103,4 +103,6 @@ STRCASESTR (const char *haystack_start,
244
245 #undef LONG_NEEDLE_THRESHOLD
246
247 +#ifndef NO_ALIAS
248 weak_alias (__strcasestr, strcasestr)
249 +#endif
250 Index: glibc-2.12-2-gc4ccff1/string/test-strcasecmp.c
251 ===================================================================
252 --- /dev/null
253 +++ glibc-2.12-2-gc4ccff1/string/test-strcasecmp.c
254 @@ -0,0 +1,276 @@
255 +/* Test and measure strcasecmp functions.
256 + Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc.
257 + This file is part of the GNU C Library.
258 + Written by Jakub Jelinek <jakub@redhat.com>, 1999.
259 +
260 + The GNU C Library is free software; you can redistribute it and/or
261 + modify it under the terms of the GNU Lesser General Public
262 + License as published by the Free Software Foundation; either
263 + version 2.1 of the License, or (at your option) any later version.
264 +
265 + The GNU C Library is distributed in the hope that it will be useful,
266 + but WITHOUT ANY WARRANTY; without even the implied warranty of
267 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
268 + Lesser General Public License for more details.
269 +
270 + You should have received a copy of the GNU Lesser General Public
271 + License along with the GNU C Library; if not, write to the Free
272 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
273 + 02111-1307 USA. */
274 +
275 +#include <ctype.h>
276 +#define TEST_MAIN
277 +#include "test-string.h"
278 +
279 +typedef int (*proto_t) (const char *, const char *);
280 +static int simple_strcasecmp (const char *, const char *);
281 +static int stupid_strcasecmp (const char *, const char *);
282 +
283 +IMPL (stupid_strcasecmp, 0)
284 +IMPL (simple_strcasecmp, 0)
285 +IMPL (strcasecmp, 1)
286 +
287 +static int
288 +simple_strcasecmp (const char *s1, const char *s2)
289 +{
290 + int ret;
291 +
292 + while ((ret = ((unsigned char) tolower (*s1)
293 + - (unsigned char) tolower (*s2))) == 0
294 + && *s1++)
295 + ++s2;
296 + return ret;
297 +}
298 +
299 +static int
300 +stupid_strcasecmp (const char *s1, const char *s2)
301 +{
302 + size_t ns1 = strlen (s1) + 1, ns2 = strlen (s2) + 1;
303 + size_t n = ns1 < ns2 ? ns1 : ns2;
304 + int ret = 0;
305 +
306 + while (n--)
307 + {
308 + if ((ret = ((unsigned char) tolower (*s1)
309 + - (unsigned char) tolower (*s2))) != 0)
310 + break;
311 + ++s1;
312 + ++s2;
313 + }
314 + return ret;
315 +}
316 +
317 +static void
318 +do_one_test (impl_t *impl, const char *s1, const char *s2, int exp_result)
319 +{
320 + int result = CALL (impl, s1, s2);
321 + if ((exp_result == 0 && result != 0)
322 + || (exp_result < 0 && result >= 0)
323 + || (exp_result > 0 && result <= 0))
324 + {
325 + error (0, 0, "Wrong result in function %s %d %d", impl->name,
326 + result, exp_result);
327 + ret = 1;
328 + return;
329 + }
330 +
331 + if (HP_TIMING_AVAIL)
332 + {
333 + hp_timing_t start __attribute ((unused));
334 + hp_timing_t stop __attribute ((unused));
335 + hp_timing_t best_time = ~ (hp_timing_t) 0;
336 + size_t i;
337 +
338 + for (i = 0; i < 32; ++i)
339 + {
340 + HP_TIMING_NOW (start);
341 + CALL (impl, s1, s2);
342 + HP_TIMING_NOW (stop);
343 + HP_TIMING_BEST (best_time, start, stop);
344 + }
345 +
346 + printf ("\t%zd", (size_t) best_time);
347 + }
348 +}
349 +
350 +static void
351 +do_test (size_t align1, size_t align2, size_t len, int max_char,
352 + int exp_result)
353 +{
354 + size_t i;
355 + char *s1, *s2;
356 +
357 + if (len == 0)
358 + return;
359 +
360 + align1 &= 7;
361 + if (align1 + len + 1 >= page_size)
362 + return;
363 +
364 + align2 &= 7;
365 + if (align2 + len + 1 >= page_size)
366 + return;
367 +
368 + s1 = (char *) (buf1 + align1);
369 + s2 = (char *) (buf2 + align2);
370 +
371 + for (i = 0; i < len; i++)
372 + {
373 + s1[i] = toupper (1 + 23 * i % max_char);
374 + s2[i] = tolower (s1[i]);
375 + }
376 +
377 + s1[len] = s2[len] = 0;
378 + s1[len + 1] = 23;
379 + s2[len + 1] = 24 + exp_result;
380 + if ((s2[len - 1] == 'z' && exp_result == -1)
381 + || (s2[len - 1] == 'a' && exp_result == 1))
382 + s1[len - 1] += exp_result;
383 + else
384 + s2[len - 1] -= exp_result;
385 +
386 + if (HP_TIMING_AVAIL)
387 + printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
388 +
389 + FOR_EACH_IMPL (impl, 0)
390 + do_one_test (impl, s1, s2, exp_result);
391 +
392 + if (HP_TIMING_AVAIL)
393 + putchar ('\n');
394 +}
395 +
396 +static void
397 +do_random_tests (void)
398 +{
399 + size_t i, j, n, align1, align2, pos, len1, len2;
400 + int result;
401 + long r;
402 + unsigned char *p1 = buf1 + page_size - 512;
403 + unsigned char *p2 = buf2 + page_size - 512;
404 +
405 + for (n = 0; n < ITERATIONS; n++)
406 + {
407 + align1 = random () & 31;
408 + if (random () & 1)
409 + align2 = random () & 31;
410 + else
411 + align2 = align1 + (random () & 24);
412 + pos = random () & 511;
413 + j = align1 > align2 ? align1 : align2;
414 + if (pos + j >= 511)
415 + pos = 510 - j - (random () & 7);
416 + len1 = random () & 511;
417 + if (pos >= len1 && (random () & 1))
418 + len1 = pos + (random () & 7);
419 + if (len1 + j >= 512)
420 + len1 = 511 - j - (random () & 7);
421 + if (pos >= len1)
422 + len2 = len1;
423 + else
424 + len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
425 + j = (pos > len2 ? pos : len2) + align1 + 64;
426 + if (j > 512)
427 + j = 512;
428 + for (i = 0; i < j; ++i)
429 + {
430 + p1[i] = tolower (random () & 255);
431 + if (i < len1 + align1 && !p1[i])
432 + {
433 + p1[i] = tolower (random () & 255);
434 + if (!p1[i])
435 + p1[i] = tolower (1 + (random () & 127));
436 + }
437 + }
438 + for (i = 0; i < j; ++i)
439 + {
440 + p2[i] = toupper (random () & 255);
441 + if (i < len2 + align2 && !p2[i])
442 + {
443 + p2[i] = toupper (random () & 255);
444 + if (!p2[i])
445 + toupper (p2[i] = 1 + (random () & 127));
446 + }
447 + }
448 +
449 + result = 0;
450 + memcpy (p2 + align2, p1 + align1, pos);
451 + if (pos < len1)
452 + {
453 + if (tolower (p2[align2 + pos]) == p1[align1 + pos])
454 + {
455 + p2[align2 + pos] = toupper (random () & 255);
456 + if (tolower (p2[align2 + pos]) == p1[align1 + pos])
457 + p2[align2 + pos] = toupper (p1[align1 + pos]
458 + + 3 + (random () & 127));
459 + }
460 +
461 + if (p1[align1 + pos] < tolower (p2[align2 + pos]))
462 + result = -1;
463 + else
464 + result = 1;
465 + }
466 + p1[len1 + align1] = 0;
467 + p2[len2 + align2] = 0;
468 +
469 + FOR_EACH_IMPL (impl, 1)
470 + {
471 + r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2));
472 + /* Test whether on 64-bit architectures where ABI requires
473 + callee to promote has the promotion been done. */
474 + asm ("" : "=g" (r) : "0" (r));
475 + if ((r == 0 && result)
476 + || (r < 0 && result >= 0)
477 + || (r > 0 && result <= 0))
478 + {
479 + error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
480 + n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2);
481 + ret = 1;
482 + }
483 + }
484 + }
485 +}
486 +
487 +int
488 +test_main (void)
489 +{
490 + size_t i;
491 +
492 + test_init ();
493 +
494 + printf ("%23s", "");
495 + FOR_EACH_IMPL (impl, 0)
496 + printf ("\t%s", impl->name);
497 + putchar ('\n');
498 +
499 + for (i = 1; i < 16; ++i)
500 + {
501 + do_test (i, i, i, 127, 0);
502 + do_test (i, i, i, 127, 1);
503 + do_test (i, i, i, 127, -1);
504 + }
505 +
506 + for (i = 1; i < 10; ++i)
507 + {
508 + do_test (0, 0, 2 << i, 127, 0);
509 + do_test (0, 0, 2 << i, 254, 0);
510 + do_test (0, 0, 2 << i, 127, 1);
511 + do_test (0, 0, 2 << i, 254, 1);
512 + do_test (0, 0, 2 << i, 127, -1);
513 + do_test (0, 0, 2 << i, 254, -1);
514 + }
515 +
516 + for (i = 1; i < 8; ++i)
517 + {
518 + do_test (i, 2 * i, 8 << i, 127, 0);
519 + do_test (2 * i, i, 8 << i, 254, 0);
520 + do_test (i, 2 * i, 8 << i, 127, 1);
521 + do_test (2 * i, i, 8 << i, 254, 1);
522 + do_test (i, 2 * i, 8 << i, 127, -1);
523 + do_test (2 * i, i, 8 << i, 254, -1);
524 + }
525 +
526 + do_random_tests ();
527 + return ret;
528 +}
529 +
530 +#include "../test-skeleton.c"
531 Index: glibc-2.12-2-gc4ccff1/string/test-strcasestr.c
532 ===================================================================
533 --- /dev/null
534 +++ glibc-2.12-2-gc4ccff1/string/test-strcasestr.c
535 @@ -0,0 +1,197 @@
536 +/* Test and measure strcasestr functions.
537 + Copyright (C) 2010 Free Software Foundation, Inc.
538 + This file is part of the GNU C Library.
539 + Written by Ulrich Drepper <drepper@redhat.com>, 2010.
540 +
541 + The GNU C Library is free software; you can redistribute it and/or
542 + modify it under the terms of the GNU Lesser General Public
543 + License as published by the Free Software Foundation; either
544 + version 2.1 of the License, or (at your option) any later version.
545 +
546 + The GNU C Library is distributed in the hope that it will be useful,
547 + but WITHOUT ANY WARRANTY; without even the implied warranty of
548 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
549 + Lesser General Public License for more details.
550 +
551 + You should have received a copy of the GNU Lesser General Public
552 + License along with the GNU C Library; if not, write to the Free
553 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
554 + 02111-1307 USA. */
555 +
556 +#define TEST_MAIN
557 +#include "test-string.h"
558 +
559 +
560 +#define STRCASESTR simple_strcasestr
561 +#define NO_ALIAS
562 +#define __strncasecmp strncasecmp
563 +#include "strcasestr.c"
564 +
565 +
566 +static char *
567 +stupid_strcasestr (const char *s1, const char *s2)
568 +{
569 + ssize_t s1len = strlen (s1);
570 + ssize_t s2len = strlen (s2);
571 +
572 + if (s2len > s1len)
573 + return NULL;
574 +
575 + for (ssize_t i = 0; i <= s1len - s2len; ++i)
576 + {
577 + size_t j;
578 + for (j = 0; j < s2len; ++j)
579 + if (tolower (s1[i + j]) != tolower (s2[j]))
580 + break;
581 + if (j == s2len)
582 + return (char *) s1 + i;
583 + }
584 +
585 + return NULL;
586 +}
587 +
588 +
589 +typedef char *(*proto_t) (const char *, const char *);
590 +
591 +IMPL (stupid_strcasestr, 0)
592 +IMPL (simple_strcasestr, 0)
593 +IMPL (strcasestr, 1)
594 +
595 +
596 +static void
597 +do_one_test (impl_t *impl, const char *s1, const char *s2, char *exp_result)
598 +{
599 + char *result = CALL (impl, s1, s2);
600 + if (result != exp_result)
601 + {
602 + error (0, 0, "Wrong result in function %s %s %s", impl->name,
603 + result, exp_result);
604 + ret = 1;
605 + return;
606 + }
607 +
608 + if (HP_TIMING_AVAIL)
609 + {
610 + hp_timing_t start __attribute ((unused));
611 + hp_timing_t stop __attribute ((unused));
612 + hp_timing_t best_time = ~(hp_timing_t) 0;
613 + size_t i;
614 +
615 + for (i = 0; i < 32; ++i)
616 + {
617 + HP_TIMING_NOW (start);
618 + CALL (impl, s1, s2);
619 + HP_TIMING_NOW (stop);
620 + HP_TIMING_BEST (best_time, start, stop);
621 + }
622 +
623 + printf ("\t%zd", (size_t) best_time);
624 + }
625 +}
626 +
627 +
628 +static void
629 +do_test (size_t align1, size_t align2, size_t len1, size_t len2,
630 + int fail)
631 +{
632 + char *s1 = (char *) (buf1 + align1);
633 + char *s2 = (char *) (buf2 + align2);
634 +
635 + static const char d[] = "1234567890abcxyz";
636 +#define dl (sizeof (d) - 1)
637 + char *ss2 = s2;
638 + for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)
639 + {
640 + size_t t = l > dl ? dl : l;
641 + ss2 = mempcpy (ss2, d, t);
642 + }
643 + s2[len2] = '\0';
644 +
645 + if (fail)
646 + {
647 + char *ss1 = s1;
648 + for (size_t l = len1; l > 0; l = l > dl ? l - dl : 0)
649 + {
650 + size_t t = l > dl ? dl : l;
651 + memcpy (ss1, d, t);
652 + ++ss1[len2 > 7 ? 7 : len2 - 1];
653 + ss1 += t;
654 + }
655 + }
656 + else
657 + {
658 + memset (s1, '0', len1);
659 + for (size_t i = 0; i < len2; ++i)
660 + s1[len1 - len2 + i] = toupper (s2[i]);
661 + }
662 + s1[len1] = '\0';
663 +
664 + if (HP_TIMING_AVAIL)
665 + printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
666 + len1, len2, align1, align2, fail ? "fail" : "found");
667 +
668 + FOR_EACH_IMPL (impl, 0)
669 + do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2);
670 +
671 + if (HP_TIMING_AVAIL)
672 + putchar ('\n');
673 +}
674 +
675 +
676 +static int
677 +test_main (void)
678 +{
679 + test_init ();
680 +
681 + printf ("%23s", "");
682 + FOR_EACH_IMPL (impl, 0)
683 + printf ("\t%s", impl->name);
684 + putchar ('\n');
685 +
686 + for (size_t klen = 2; klen < 32; ++klen)
687 + for (size_t hlen = 2 * klen; hlen < 16 * klen; hlen += klen)
688 + {
689 + do_test (0, 0, hlen, klen, 0);
690 + do_test (0, 0, hlen, klen, 1);
691 + do_test (0, 3, hlen, klen, 0);
692 + do_test (0, 3, hlen, klen, 1);
693 + do_test (0, 9, hlen, klen, 0);
694 + do_test (0, 9, hlen, klen, 1);
695 + do_test (0, 15, hlen, klen, 0);
696 + do_test (0, 15, hlen, klen, 1);
697 +
698 + do_test (3, 0, hlen, klen, 0);
699 + do_test (3, 0, hlen, klen, 1);
700 + do_test (3, 3, hlen, klen, 0);
701 + do_test (3, 3, hlen, klen, 1);
702 + do_test (3, 9, hlen, klen, 0);
703 + do_test (3, 9, hlen, klen, 1);
704 + do_test (3, 15, hlen, klen, 0);
705 + do_test (3, 15, hlen, klen, 1);
706 +
707 + do_test (9, 0, hlen, klen, 0);
708 + do_test (9, 0, hlen, klen, 1);
709 + do_test (9, 3, hlen, klen, 0);
710 + do_test (9, 3, hlen, klen, 1);
711 + do_test (9, 9, hlen, klen, 0);
712 + do_test (9, 9, hlen, klen, 1);
713 + do_test (9, 15, hlen, klen, 0);
714 + do_test (9, 15, hlen, klen, 1);
715 +
716 + do_test (15, 0, hlen, klen, 0);
717 + do_test (15, 0, hlen, klen, 1);
718 + do_test (15, 3, hlen, klen, 0);
719 + do_test (15, 3, hlen, klen, 1);
720 + do_test (15, 9, hlen, klen, 0);
721 + do_test (15, 9, hlen, klen, 1);
722 + do_test (15, 15, hlen, klen, 0);
723 + do_test (15, 15, hlen, klen, 1);
724 + }
725 +
726 + do_test (0, 0, page_size - 1, 16, 0);
727 + do_test (0, 0, page_size - 1, 16, 1);
728 +
729 + return ret;
730 +}
731 +
732 +#include "../test-skeleton.c"
733 Index: glibc-2.12-2-gc4ccff1/string/test-strncasecmp.c
734 ===================================================================
735 --- /dev/null
736 +++ glibc-2.12-2-gc4ccff1/string/test-strncasecmp.c
737 @@ -0,0 +1,349 @@
738 +/* Test and measure strncasecmp functions.
739 + Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc.
740 + This file is part of the GNU C Library.
741 + Written by Jakub Jelinek <jakub@redhat.com>, 1999.
742 +
743 + The GNU C Library is free software; you can redistribute it and/or
744 + modify it under the terms of the GNU Lesser General Public
745 + License as published by the Free Software Foundation; either
746 + version 2.1 of the License, or (at your option) any later version.
747 +
748 + The GNU C Library is distributed in the hope that it will be useful,
749 + but WITHOUT ANY WARRANTY; without even the implied warranty of
750 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
751 + Lesser General Public License for more details.
752 +
753 + You should have received a copy of the GNU Lesser General Public
754 + License along with the GNU C Library; if not, write to the Free
755 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
756 + 02111-1307 USA. */
757 +
758 +#include <ctype.h>
759 +#define TEST_MAIN
760 +#include "test-string.h"
761 +
762 +typedef int (*proto_t) (const char *, const char *, size_t);
763 +static int simple_strncasecmp (const char *, const char *, size_t);
764 +static int stupid_strncasecmp (const char *, const char *, size_t);
765 +
766 +IMPL (stupid_strncasecmp, 0)
767 +IMPL (simple_strncasecmp, 0)
768 +IMPL (strncasecmp, 1)
769 +
770 +static int
771 +simple_strncasecmp (const char *s1, const char *s2, size_t n)
772 +{
773 + int ret;
774 +
775 + if (n == 0)
776 + return 0;
777 +
778 + while ((ret = ((unsigned char) tolower (*s1)
779 + - (unsigned char) tolower (*s2))) == 0
780 + && *s1++)
781 + {
782 + if (--n == 0)
783 + return 0;
784 + ++s2;
785 + }
786 + return ret;
787 +}
788 +
789 +static int
790 +stupid_strncasecmp (const char *s1, const char *s2, size_t max)
791 +{
792 + size_t ns1 = strlen (s1) + 1;
793 + size_t ns2 = strlen (s2) + 1;
794 + size_t n = ns1 < ns2 ? ns1 : ns2;
795 + if (n > max)
796 + n = max;
797 + int ret = 0;
798 +
799 + while (n--)
800 + {
801 + if ((ret = ((unsigned char) tolower (*s1)
802 + - (unsigned char) tolower (*s2))) != 0)
803 + break;
804 + ++s1;
805 + ++s2;
806 + }
807 + return ret;
808 +}
809 +
810 +static int
811 +check_result (impl_t *impl, const char *s1, const char *s2, size_t n,
812 + int exp_result)
813 +{
814 + int result = CALL (impl, s1, s2, n);
815 + if ((exp_result == 0 && result != 0)
816 + || (exp_result < 0 && result >= 0)
817 + || (exp_result > 0 && result <= 0))
818 + {
819 + error (0, 0, "Wrong result in function %s %d %d", impl->name,
820 + result, exp_result);
821 + ret = 1;
822 + return -1;
823 + }
824 +
825 + return 0;
826 +}
827 +
828 +static void
829 +do_one_test (impl_t *impl, const char *s1, const char *s2, size_t n,
830 + int exp_result)
831 +{
832 + if (check_result (impl, s1, s2, n, exp_result) < 0)
833 + return;
834 +
835 + if (HP_TIMING_AVAIL)
836 + {
837 + hp_timing_t start __attribute ((unused));
838 + hp_timing_t stop __attribute ((unused));
839 + hp_timing_t best_time = ~ (hp_timing_t) 0;
840 + size_t i;
841 +
842 + for (i = 0; i < 32; ++i)
843 + {
844 + HP_TIMING_NOW (start);
845 + CALL (impl, s1, s2, n);
846 + HP_TIMING_NOW (stop);
847 + HP_TIMING_BEST (best_time, start, stop);
848 + }
849 +
850 + printf ("\t%zd", (size_t) best_time);
851 + }
852 +}
853 +
854 +static void
855 +do_test (size_t align1, size_t align2, size_t n, size_t len, int max_char,
856 + int exp_result)
857 +{
858 + size_t i;
859 + char *s1, *s2;
860 +
861 + if (len == 0)
862 + return;
863 +
864 + align1 &= 7;
865 + if (align1 + len + 1 >= page_size)
866 + return;
867 +
868 + align2 &= 7;
869 + if (align2 + len + 1 >= page_size)
870 + return;
871 +
872 + s1 = (char *) (buf1 + align1);
873 + s2 = (char *) (buf2 + align2);
874 +
875 + for (i = 0; i < len; i++)
876 + {
877 + s1[i] = toupper (1 + 23 * i % max_char);
878 + s2[i] = tolower (s1[i]);
879 + }
880 +
881 + s1[len] = s2[len] = 0;
882 + s1[len + 1] = 23;
883 + s2[len + 1] = 24 + exp_result;
884 + if ((s2[len - 1] == 'z' && exp_result == -1)
885 + || (s2[len - 1] == 'a' && exp_result == 1))
886 + s1[len - 1] += exp_result;
887 + else
888 + s2[len - 1] -= exp_result;
889 +
890 + if (HP_TIMING_AVAIL)
891 + printf ("Length %4zd, alignment %2zd/%2zd:", len, align1, align2);
892 +
893 + FOR_EACH_IMPL (impl, 0)
894 + do_one_test (impl, s1, s2, n, exp_result);
895 +
896 + if (HP_TIMING_AVAIL)
897 + putchar ('\n');
898 +}
899 +
900 +static void
901 +do_random_tests (void)
902 +{
903 + size_t i, j, n, align1, align2, pos, len1, len2;
904 + int result;
905 + long r;
906 + unsigned char *p1 = buf1 + page_size - 512;
907 + unsigned char *p2 = buf2 + page_size - 512;
908 +
909 + for (n = 0; n < ITERATIONS; n++)
910 + {
911 + align1 = random () & 31;
912 + if (random () & 1)
913 + align2 = random () & 31;
914 + else
915 + align2 = align1 + (random () & 24);
916 + pos = random () & 511;
917 + j = align1 > align2 ? align1 : align2;
918 + if (pos + j >= 511)
919 + pos = 510 - j - (random () & 7);
920 + len1 = random () & 511;
921 + if (pos >= len1 && (random () & 1))
922 + len1 = pos + (random () & 7);
923 + if (len1 + j >= 512)
924 + len1 = 511 - j - (random () & 7);
925 + if (pos >= len1)
926 + len2 = len1;
927 + else
928 + len2 = len1 + (len1 != 511 - j ? random () % (511 - j - len1) : 0);
929 + j = (pos > len2 ? pos : len2) + align1 + 64;
930 + if (j > 512)
931 + j = 512;
932 + for (i = 0; i < j; ++i)
933 + {
934 + p1[i] = tolower (random () & 255);
935 + if (i < len1 + align1 && !p1[i])
936 + {
937 + p1[i] = tolower (random () & 255);
938 + if (!p1[i])
939 + p1[i] = tolower (1 + (random () & 127));
940 + }
941 + }
942 + for (i = 0; i < j; ++i)
943 + {
944 + p2[i] = toupper (random () & 255);
945 + if (i < len2 + align2 && !p2[i])
946 + {
947 + p2[i] = toupper (random () & 255);
948 + if (!p2[i])
949 + toupper (p2[i] = 1 + (random () & 127));
950 + }
951 + }
952 +
953 + result = 0;
954 + memcpy (p2 + align2, p1 + align1, pos);
955 + if (pos < len1)
956 + {
957 + if (tolower (p2[align2 + pos]) == p1[align1 + pos])
958 + {
959 + p2[align2 + pos] = toupper (random () & 255);
960 + if (tolower (p2[align2 + pos]) == p1[align1 + pos])
961 + p2[align2 + pos] = toupper (p1[align1 + pos]
962 + + 3 + (random () & 127));
963 + }
964 +
965 + if (p1[align1 + pos] < tolower (p2[align2 + pos]))
966 + result = -1;
967 + else
968 + result = 1;
969 + }
970 + p1[len1 + align1] = 0;
971 + p2[len2 + align2] = 0;
972 +
973 + FOR_EACH_IMPL (impl, 1)
974 + {
975 + r = CALL (impl, (char *) (p1 + align1), (char *) (p2 + align2),
976 + pos + 1 + (random () & 255));
977 + /* Test whether on 64-bit architectures where ABI requires
978 + callee to promote has the promotion been done. */
979 + asm ("" : "=g" (r) : "0" (r));
980 + if ((r == 0 && result)
981 + || (r < 0 && result >= 0)
982 + || (r > 0 && result <= 0))
983 + {
984 + error (0, 0, "Iteration %zd - wrong result in function %s (%zd, %zd, %zd, %zd, %zd) %ld != %d, p1 %p p2 %p",
985 + n, impl->name, align1, align2, len1, len2, pos, r, result, p1, p2);
986 + ret = 1;
987 + }
988 + }
989 + }
990 +}
991 +
992 +
993 +static void
994 +check1 (void)
995 +{
996 + static char cp [4096+16] __attribute__ ((aligned(4096)));
997 + static char gotrel[4096] __attribute__ ((aligned(4096)));
998 + char *s1 = cp + 0xffa;
999 + char *s2 = gotrel + 0xcbe;
1000 + int exp_result;
1001 + size_t n = 6;
1002 +
1003 + strcpy (s1, "gottpoff");
1004 + strcpy (s2, "GOTPLT");
1005 +
1006 + exp_result = simple_strncasecmp (s1, s2, n);
1007 + FOR_EACH_IMPL (impl, 0)
1008 + check_result (impl, s1, s2, n, exp_result);
1009 +}
1010 +
1011 +int
1012 +test_main (void)
1013 +{
1014 + size_t i;
1015 +
1016 + test_init ();
1017 +
1018 + check1 ();
1019 +
1020 + printf ("%23s", "");
1021 + FOR_EACH_IMPL (impl, 0)
1022 + printf ("\t%s", impl->name);
1023 + putchar ('\n');
1024 +
1025 + for (i = 1; i < 16; ++i)
1026 + {
1027 + do_test (i, i, i - 1, i, 127, 0);
1028 +
1029 + do_test (i, i, i, i, 127, 0);
1030 + do_test (i, i, i, i, 127, 1);
1031 + do_test (i, i, i, i, 127, -1);
1032 +
1033 + do_test (i, i, i + 1, i, 127, 0);
1034 + do_test (i, i, i + 1, i, 127, 1);
1035 + do_test (i, i, i + 1, i, 127, -1);
1036 + }
1037 +
1038 + for (i = 1; i < 10; ++i)
1039 + {
1040 + do_test (0, 0, (2 << i) - 1, 2 << i, 127, 0);
1041 + do_test (0, 0, 2 << i, 2 << i, 254, 0);
1042 + do_test (0, 0, (2 << i) + 1, 2 << i, 127, 0);
1043 +
1044 + do_test (0, 0, (2 << i) + 1, 2 << i, 254, 0);
1045 +
1046 + do_test (0, 0, 2 << i, 2 << i, 127, 1);
1047 + do_test (0, 0, (2 << i) + 10, 2 << i, 127, 1);
1048 +
1049 + do_test (0, 0, 2 << i, 2 << i, 254, 1);
1050 + do_test (0, 0, (2 << i) + 10, 2 << i, 254, 1);
1051 +
1052 + do_test (0, 0, 2 << i, 2 << i, 127, -1);
1053 + do_test (0, 0, (2 << i) + 10, 2 << i, 127, -1);
1054 +
1055 + do_test (0, 0, 2 << i, 2 << i, 254, -1);
1056 + do_test (0, 0, (2 << i) + 10, 2 << i, 254, -1);
1057 + }
1058 +
1059 + for (i = 1; i < 8; ++i)
1060 + {
1061 + do_test (i, 2 * i, (8 << i) - 1, 8 << i, 127, 0);
1062 + do_test (i, 2 * i, 8 << i, 8 << i, 127, 0);
1063 + do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, 0);
1064 +
1065 + do_test (2 * i, i, (8 << i) - 1, 8 << i, 254, 0);
1066 + do_test (2 * i, i, 8 << i, 8 << i, 254, 0);
1067 + do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, 0);
1068 +
1069 + do_test (i, 2 * i, 8 << i, 8 << i, 127, 1);
1070 + do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, 1);
1071 +
1072 + do_test (2 * i, i, 8 << i, 8 << i, 254, 1);
1073 + do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, 1);
1074 +
1075 + do_test (i, 2 * i, 8 << i, 8 << i, 127, -1);
1076 + do_test (i, 2 * i, (8 << i) + 100, 8 << i, 127, -1);
1077 +
1078 + do_test (2 * i, i, 8 << i, 8 << i, 254, -1);
1079 + do_test (2 * i, i, (8 << i) + 100, 8 << i, 254, -1);
1080 + }
1081 +
1082 + do_random_tests ();
1083 + return ret;
1084 +}
1085 +
1086 +#include "../test-skeleton.c"
1087 Index: glibc-2.12-2-gc4ccff1/string/test-strnlen.c
1088 ===================================================================
1089 --- /dev/null
1090 +++ glibc-2.12-2-gc4ccff1/string/test-strnlen.c
1091 @@ -0,0 +1,197 @@
1092 +/* Test and measure strlen functions.
1093 + Copyright (C) 1999, 2002, 2003, 2005, 2010 Free Software Foundation, Inc.
1094 + This file is part of the GNU C Library.
1095 + Written by Jakub Jelinek <jakub@redhat.com>, 1999.
1096 +
1097 + The GNU C Library is free software; you can redistribute it and/or
1098 + modify it under the terms of the GNU Lesser General Public
1099 + License as published by the Free Software Foundation; either
1100 + version 2.1 of the License, or (at your option) any later version.
1101 +
1102 + The GNU C Library is distributed in the hope that it will be useful,
1103 + but WITHOUT ANY WARRANTY; without even the implied warranty of
1104 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1105 + Lesser General Public License for more details.
1106 +
1107 + You should have received a copy of the GNU Lesser General Public
1108 + License along with the GNU C Library; if not, write to the Free
1109 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1110 + 02111-1307 USA. */
1111 +
1112 +#define TEST_MAIN
1113 +#include "test-string.h"
1114 +
1115 +typedef size_t (*proto_t) (const char *, size_t);
1116 +size_t simple_strnlen (const char *, size_t);
1117 +
1118 +IMPL (simple_strnlen, 0)
1119 +IMPL (strnlen, 1)
1120 +
1121 +size_t
1122 +simple_strnlen (const char *s, size_t maxlen)
1123 +{
1124 + size_t i;
1125 +
1126 + for (i = 0; i < maxlen && s[i]; ++i);
1127 + return i;
1128 +}
1129 +
1130 +static void
1131 +do_one_test (impl_t *impl, const char *s, size_t maxlen, size_t exp_len)
1132 +{
1133 + size_t len = CALL (impl, s, maxlen);
1134 + if (len != exp_len)
1135 + {
1136 + error (0, 0, "Wrong result in function %s %zd %zd", impl->name,
1137 + len, exp_len);
1138 + ret = 1;
1139 + return;
1140 + }
1141 +
1142 + if (HP_TIMING_AVAIL)
1143 + {
1144 + hp_timing_t start __attribute ((unused));
1145 + hp_timing_t stop __attribute ((unused));
1146 + hp_timing_t best_time = ~ (hp_timing_t) 0;
1147 + size_t i;
1148 +
1149 + for (i = 0; i < 32; ++i)
1150 + {
1151 + HP_TIMING_NOW (start);
1152 + CALL (impl, s, maxlen);
1153 + HP_TIMING_NOW (stop);
1154 + HP_TIMING_BEST (best_time, start, stop);
1155 + }
1156 +
1157 + printf ("\t%zd", (size_t) best_time);
1158 + }
1159 +}
1160 +
1161 +static void
1162 +do_test (size_t align, size_t len, size_t maxlen, int max_char)
1163 +{
1164 + size_t i;
1165 +
1166 + align &= 7;
1167 + if (align + len >= page_size)
1168 + return;
1169 +
1170 + for (i = 0; i < len; ++i)
1171 + buf1[align + i] = 1 + 7 * i % max_char;
1172 + buf1[align + len] = 0;
1173 +
1174 + if (HP_TIMING_AVAIL)
1175 + printf ("Length %4zd, alignment %2zd:", len, align);
1176 +
1177 + FOR_EACH_IMPL (impl, 0)
1178 + do_one_test (impl, (char *) (buf1 + align), maxlen, MIN (len, maxlen));
1179 +
1180 + if (HP_TIMING_AVAIL)
1181 + putchar ('\n');
1182 +}
1183 +
1184 +static void
1185 +do_random_tests (void)
1186 +{
1187 + size_t i, j, n, align, len;
1188 + unsigned char *p = buf1 + page_size - 512;
1189 +
1190 + for (n = 0; n < ITERATIONS; n++)
1191 + {
1192 + align = random () & 15;
1193 + len = random () & 511;
1194 + if (len + align > 510)
1195 + len = 511 - align - (random () & 7);
1196 + j = len + align + 64;
1197 + if (j > 512)
1198 + j = 512;
1199 +
1200 + for (i = 0; i < j; i++)
1201 + {
1202 + if (i == len + align)
1203 + p[i] = 0;
1204 + else
1205 + {
1206 + p[i] = random () & 255;
1207 + if (i >= align && i < len + align && !p[i])
1208 + p[i] = (random () & 127) + 1;
1209 + }
1210 + }
1211 +
1212 + FOR_EACH_IMPL (impl, 1)
1213 + {
1214 + if (len > 0
1215 + && CALL (impl, (char *) (p + align), len - 1) != len - 1)
1216 + {
1217 + error (0, 0, "Iteration %zd (limited) - wrong result in function %s (%zd) %zd != %zd, p %p",
1218 + n, impl->name, align,
1219 + CALL (impl, (char *) (p + align), len - 1), len - 1, p);
1220 + ret = 1;
1221 + }
1222 + if (CALL (impl, (char *) (p + align), len) != len)
1223 + {
1224 + error (0, 0, "Iteration %zd (exact) - wrong result in function %s (%zd) %zd != %zd, p %p",
1225 + n, impl->name, align,
1226 + CALL (impl, (char *) (p + align), len), len, p);
1227 + ret = 1;
1228 + }
1229 + if (CALL (impl, (char *) (p + align), len + 1) != len)
1230 + {
1231 + error (0, 0, "Iteration %zd (long) - wrong result in function %s (%zd) %zd != %zd, p %p",
1232 + n, impl->name, align,
1233 + CALL (impl, (char *) (p + align), len + 1), len, p);
1234 + ret = 1;
1235 + }
1236 + }
1237 + }
1238 +}
1239 +
1240 +int
1241 +test_main (void)
1242 +{
1243 + size_t i;
1244 +
1245 + test_init ();
1246 +
1247 + printf ("%20s", "");
1248 + FOR_EACH_IMPL (impl, 0)
1249 + printf ("\t%s", impl->name);
1250 + putchar ('\n');
1251 +
1252 + for (i = 1; i < 8; ++i)
1253 + {
1254 + do_test (0, i, i - 1, 127);
1255 + do_test (0, i, i, 127);
1256 + do_test (0, i, i + 1, 127);
1257 + }
1258 +
1259 + for (i = 1; i < 8; ++i)
1260 + {
1261 + do_test (i, i, i - 1, 127);
1262 + do_test (i, i, i, 127);
1263 + do_test (i, i, i + 1, 127);
1264 + }
1265 +
1266 + for (i = 2; i <= 10; ++i)
1267 + {
1268 + do_test (0, 1 << i, 5000, 127);
1269 + do_test (1, 1 << i, 5000, 127);
1270 + }
1271 +
1272 + for (i = 1; i < 8; ++i)
1273 + do_test (0, i, 5000, 255);
1274 +
1275 + for (i = 1; i < 8; ++i)
1276 + do_test (i, i, 5000, 255);
1277 +
1278 + for (i = 2; i <= 10; ++i)
1279 + {
1280 + do_test (0, 1 << i, 5000, 255);
1281 + do_test (1, 1 << i, 5000, 255);
1282 + }
1283 +
1284 + do_random_tests ();
1285 + return ret;
1286 +}
1287 +
1288 +#include "../test-skeleton.c"
1289 Index: glibc-2.12-2-gc4ccff1/string/test-strstr.c
1290 ===================================================================
1291 --- /dev/null
1292 +++ glibc-2.12-2-gc4ccff1/string/test-strstr.c
1293 @@ -0,0 +1,194 @@
1294 +/* Test and measure strstr functions.
1295 + Copyright (C) 2010 Free Software Foundation, Inc.
1296 + This file is part of the GNU C Library.
1297 + Written by Ulrich Drepper <drepper@redhat.com>, 2010.
1298 +
1299 + The GNU C Library is free software; you can redistribute it and/or
1300 + modify it under the terms of the GNU Lesser General Public
1301 + License as published by the Free Software Foundation; either
1302 + version 2.1 of the License, or (at your option) any later version.
1303 +
1304 + The GNU C Library is distributed in the hope that it will be useful,
1305 + but WITHOUT ANY WARRANTY; without even the implied warranty of
1306 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1307 + Lesser General Public License for more details.
1308 +
1309 + You should have received a copy of the GNU Lesser General Public
1310 + License along with the GNU C Library; if not, write to the Free
1311 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1312 + 02111-1307 USA. */
1313 +
1314 +#define TEST_MAIN
1315 +#include "test-string.h"
1316 +
1317 +
1318 +#define STRSTR simple_strstr
1319 +#include "strstr.c"
1320 +
1321 +
1322 +static char *
1323 +stupid_strstr (const char *s1, const char *s2)
1324 +{
1325 + ssize_t s1len = strlen (s1);
1326 + ssize_t s2len = strlen (s2);
1327 +
1328 + if (s2len > s1len)
1329 + return NULL;
1330 +
1331 + for (ssize_t i = 0; i <= s1len - s2len; ++i)
1332 + {
1333 + size_t j;
1334 + for (j = 0; j < s2len; ++j)
1335 + if (s1[i + j] != s2[j])
1336 + break;
1337 + if (j == s2len)
1338 + return (char *) s1 + i;
1339 + }
1340 +
1341 + return NULL;
1342 +}
1343 +
1344 +
1345 +typedef char *(*proto_t) (const char *, const char *);
1346 +
1347 +IMPL (stupid_strstr, 0)
1348 +IMPL (simple_strstr, 0)
1349 +IMPL (strstr, 1)
1350 +
1351 +
1352 +static void
1353 +do_one_test (impl_t *impl, const char *s1, const char *s2, char *exp_result)
1354 +{
1355 + char *result = CALL (impl, s1, s2);
1356 + if (result != exp_result)
1357 + {
1358 + error (0, 0, "Wrong result in function %s %s %s", impl->name,
1359 + result, exp_result);
1360 + ret = 1;
1361 + return;
1362 + }
1363 +
1364 + if (HP_TIMING_AVAIL)
1365 + {
1366 + hp_timing_t start __attribute ((unused));
1367 + hp_timing_t stop __attribute ((unused));
1368 + hp_timing_t best_time = ~(hp_timing_t) 0;
1369 + size_t i;
1370 +
1371 + for (i = 0; i < 32; ++i)
1372 + {
1373 + HP_TIMING_NOW (start);
1374 + CALL (impl, s1, s2);
1375 + HP_TIMING_NOW (stop);
1376 + HP_TIMING_BEST (best_time, start, stop);
1377 + }
1378 +
1379 + printf ("\t%zd", (size_t) best_time);
1380 + }
1381 +}
1382 +
1383 +
1384 +static void
1385 +do_test (size_t align1, size_t align2, size_t len1, size_t len2,
1386 + int fail)
1387 +{
1388 + char *s1 = (char *) (buf1 + align1);
1389 + char *s2 = (char *) (buf2 + align2);
1390 +
1391 + static const char d[] = "1234567890abcdef";
1392 +#define dl (sizeof (d) - 1)
1393 + char *ss2 = s2;
1394 + for (size_t l = len2; l > 0; l = l > dl ? l - dl : 0)
1395 + {
1396 + size_t t = l > dl ? dl : l;
1397 + ss2 = mempcpy (ss2, d, t);
1398 + }
1399 + s2[len2] = '\0';
1400 +
1401 + if (fail)
1402 + {
1403 + char *ss1 = s1;
1404 + for (size_t l = len1; l > 0; l = l > dl ? l - dl : 0)
1405 + {
1406 + size_t t = l > dl ? dl : l;
1407 + memcpy (ss1, d, t);
1408 + ++ss1[len2 > 7 ? 7 : len2 - 1];
1409 + ss1 += t;
1410 + }
1411 + }
1412 + else
1413 + {
1414 + memset (s1, '0', len1);
1415 + memcpy (s1 + len1 - len2, s2, len2);
1416 + }
1417 + s1[len1] = '\0';
1418 +
1419 + if (HP_TIMING_AVAIL)
1420 + printf ("Length %4zd/%zd, alignment %2zd/%2zd, %s:",
1421 + len1, len2, align1, align2, fail ? "fail" : "found");
1422 +
1423 + FOR_EACH_IMPL (impl, 0)
1424 + do_one_test (impl, s1, s2, fail ? NULL : s1 + len1 - len2);
1425 +
1426 + if (HP_TIMING_AVAIL)
1427 + putchar ('\n');
1428 +}
1429 +
1430 +
1431 +static int
1432 +test_main (void)
1433 +{
1434 + test_init ();
1435 +
1436 + printf ("%23s", "");
1437 + FOR_EACH_IMPL (impl, 0)
1438 + printf ("\t%s", impl->name);
1439 + putchar ('\n');
1440 +
1441 + for (size_t klen = 2; klen < 32; ++klen)
1442 + for (size_t hlen = 2 * klen; hlen < 16 * klen; hlen += klen)
1443 + {
1444 + do_test (0, 0, hlen, klen, 0);
1445 + do_test (0, 0, hlen, klen, 1);
1446 + do_test (0, 3, hlen, klen, 0);
1447 + do_test (0, 3, hlen, klen, 1);
1448 + do_test (0, 9, hlen, klen, 0);
1449 + do_test (0, 9, hlen, klen, 1);
1450 + do_test (0, 15, hlen, klen, 0);
1451 + do_test (0, 15, hlen, klen, 1);
1452 +
1453 + do_test (3, 0, hlen, klen, 0);
1454 + do_test (3, 0, hlen, klen, 1);
1455 + do_test (3, 3, hlen, klen, 0);
1456 + do_test (3, 3, hlen, klen, 1);
1457 + do_test (3, 9, hlen, klen, 0);
1458 + do_test (3, 9, hlen, klen, 1);
1459 + do_test (3, 15, hlen, klen, 0);
1460 + do_test (3, 15, hlen, klen, 1);
1461 +
1462 + do_test (9, 0, hlen, klen, 0);
1463 + do_test (9, 0, hlen, klen, 1);
1464 + do_test (9, 3, hlen, klen, 0);
1465 + do_test (9, 3, hlen, klen, 1);
1466 + do_test (9, 9, hlen, klen, 0);
1467 + do_test (9, 9, hlen, klen, 1);
1468 + do_test (9, 15, hlen, klen, 0);
1469 + do_test (9, 15, hlen, klen, 1);
1470 +
1471 + do_test (15, 0, hlen, klen, 0);
1472 + do_test (15, 0, hlen, klen, 1);
1473 + do_test (15, 3, hlen, klen, 0);
1474 + do_test (15, 3, hlen, klen, 1);
1475 + do_test (15, 9, hlen, klen, 0);
1476 + do_test (15, 9, hlen, klen, 1);
1477 + do_test (15, 15, hlen, klen, 0);
1478 + do_test (15, 15, hlen, klen, 1);
1479 + }
1480 +
1481 + do_test (0, 0, page_size - 1, 16, 0);
1482 + do_test (0, 0, page_size - 1, 16, 1);
1483 +
1484 + return ret;
1485 +}
1486 +
1487 +#include "../test-skeleton.c"
1488 Index: glibc-2.12-2-gc4ccff1/string/tester.c
1489 ===================================================================
1490 --- glibc-2.12-2-gc4ccff1.orig/string/tester.c
1491 +++ glibc-2.12-2-gc4ccff1/string/tester.c
1492 @@ -441,20 +441,21 @@ test_strnlen (void)
1493 check (strnlen ("", 10) == 0, 1); /* Empty. */
1494 check (strnlen ("a", 10) == 1, 2); /* Single char. */
1495 check (strnlen ("abcd", 10) == 4, 3); /* Multiple chars. */
1496 - check (strnlen ("foo", (size_t)-1) == 3, 4); /* limits of n. */
1497 + check (strnlen ("foo", (size_t) -1) == 3, 4); /* limits of n. */
1498 + check (strnlen ("abcd", 0) == 0, 5); /* Restricted. */
1499 + check (strnlen ("abcd", 1) == 1, 6); /* Restricted. */
1500 + check (strnlen ("abcd", 2) == 2, 7); /* Restricted. */
1501 + check (strnlen ("abcd", 3) == 3, 8); /* Restricted. */
1502 + check (strnlen ("abcd", 4) == 4, 9); /* Restricted. */
1503
1504 - {
1505 - char buf[4096];
1506 - int i;
1507 - char *p;
1508 - for (i=0; i < 0x100; i++)
1509 - {
1510 - p = (char *) ((unsigned long int)(buf + 0xff) & ~0xff) + i;
1511 - strcpy (p, "OK");
1512 - strcpy (p+3, "BAD/WRONG");
1513 - check (strnlen (p, 100) == 2, 5+i);
1514 - }
1515 - }
1516 + char buf[4096];
1517 + for (int i = 0; i < 0x100; ++i)
1518 + {
1519 + char *p = (char *) ((unsigned long int)(buf + 0xff) & ~0xff) + i;
1520 + strcpy (p, "OK");
1521 + strcpy (p + 3, "BAD/WRONG");
1522 + check (strnlen (p, 100) == 2, 10 + i);
1523 + }
1524 }
1525
1526 static void
1527 Index: glibc-2.12-2-gc4ccff1/string/tst-strlen.c
1528 ===================================================================
1529 --- glibc-2.12-2-gc4ccff1.orig/string/tst-strlen.c
1530 +++ glibc-2.12-2-gc4ccff1/string/tst-strlen.c
1531 @@ -31,11 +31,21 @@ main(int argc, char *argv[])
1532 buf[words * 4 + 3] = (last & 8) != 0 ? 'e' : '\0';
1533 buf[words * 4 + 4] = '\0';
1534
1535 - if (strlen (buf) != words * 4 + lens[last]
1536 - || strnlen (buf, -1) != words * 4 + lens[last])
1537 + if (strlen (buf) != words * 4 + lens[last])
1538 {
1539 - printf ("failed for base=%Zu, words=%Zu, and last=%Zu\n",
1540 - base, words, last);
1541 + printf ("\
1542 +strlen failed for base=%Zu, words=%Zu, and last=%Zu (is %zd, expected %zd)\n",
1543 + base, words, last,
1544 + strlen (buf), words * 4 + lens[last]);
1545 + return 1;
1546 + }
1547 +
1548 + if (strnlen (buf, -1) != words * 4 + lens[last])
1549 + {
1550 + printf ("\
1551 +strnlen failed for base=%Zu, words=%Zu, and last=%Zu (is %zd, expected %zd)\n",
1552 + base, words, last,
1553 + strnlen (buf, -1), words * 4 + lens[last]);
1554 return 1;
1555 }
1556 }
1557 Index: glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/Makefile
1558 ===================================================================
1559 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/i386/i686/multiarch/Makefile
1560 +++ glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/Makefile
1561 @@ -9,7 +9,7 @@ sysdep_routines += bzero-sse2 memset-sse
1562 memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
1563 memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
1564 strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
1565 - memcmp-ssse3 memcmp-sse4
1566 + memcmp-ssse3 memcmp-sse4 strcasestr-nonascii
1567 ifeq (yes,$(config-cflags-sse4))
1568 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
1569 CFLAGS-strcspn-c.c += -msse4
1570 @@ -17,6 +17,7 @@ CFLAGS-strpbrk-c.c += -msse4
1571 CFLAGS-strspn-c.c += -msse4
1572 CFLAGS-strstr.c += -msse4
1573 CFLAGS-strcasestr.c += -msse4
1574 +CFLAGS-strcasestr-nonascii.c += -msse4
1575 endif
1576 endif
1577
1578 Index: glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c
1579 ===================================================================
1580 --- /dev/null
1581 +++ glibc-2.12-2-gc4ccff1/sysdeps/i386/i686/multiarch/strcasestr-nonascii.c
1582 @@ -0,0 +1,2 @@
1583 +#include <nmmintrin.h>
1584 +#include <sysdeps/x86_64/multiarch/strcasestr-nonascii.c>
1585 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/Makefile
1586 ===================================================================
1587 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/Makefile
1588 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/Makefile
1589 @@ -12,7 +12,8 @@ sysdep_routines += _mcount
1590 endif
1591
1592 ifeq ($(subdir),string)
1593 -sysdep_routines += cacheinfo
1594 +sysdep_routines += cacheinfo strcasecmp_l-nonascii strncase_l-nonascii
1595 +gen-as-const-headers += locale-defines.sym
1596 endif
1597
1598 ifeq ($(subdir),elf)
1599 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/locale-defines.sym
1600 ===================================================================
1601 --- /dev/null
1602 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/locale-defines.sym
1603 @@ -0,0 +1,11 @@
1604 +#include <locale/localeinfo.h>
1605 +#include <langinfo.h>
1606 +#include <stddef.h>
1607 +
1608 +--
1609 +
1610 +LOCALE_T___LOCALES offsetof (struct __locale_struct, __locales)
1611 +LC_CTYPE
1612 +_NL_CTYPE_NONASCII_CASE
1613 +LOCALE_DATA_VALUES offsetof (struct __locale_data, values)
1614 +SIZEOF_VALUES sizeof (((struct __locale_data *) 0)->values[0])
1615 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/Makefile
1616 ===================================================================
1617 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/Makefile
1618 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/Makefile
1619 @@ -5,7 +5,9 @@ endif
1620
1621 ifeq ($(subdir),string)
1622 sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
1623 - strend-sse4 memcmp-sse4
1624 + strend-sse4 memcmp-sse4 \
1625 + strcasestr-nonascii strcasecmp_l-ssse3 \
1626 + strncase_l-ssse3
1627 ifeq (yes,$(config-cflags-sse4))
1628 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
1629 CFLAGS-strcspn-c.c += -msse4
1630 @@ -13,5 +15,6 @@ CFLAGS-strpbrk-c.c += -msse4
1631 CFLAGS-strspn-c.c += -msse4
1632 CFLAGS-strstr.c += -msse4
1633 CFLAGS-strcasestr.c += -msse4
1634 +CFLAGS-strcasestr-nonascii.c += -msse4
1635 endif
1636 endif
1637 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasecmp_l-ssse3.S
1638 ===================================================================
1639 --- /dev/null
1640 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasecmp_l-ssse3.S
1641 @@ -0,0 +1,6 @@
1642 +#define USE_SSSE3 1
1643 +#define USE_AS_STRCASECMP_L
1644 +#define NO_NOLOCALE_ALIAS
1645 +#define STRCMP __strcasecmp_l_ssse3
1646 +#define __strcasecmp __strcasecmp_ssse3
1647 +#include "../strcmp.S"
1648 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasecmp_l.S
1649 ===================================================================
1650 --- /dev/null
1651 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasecmp_l.S
1652 @@ -0,0 +1,6 @@
1653 +#define STRCMP __strcasecmp_l
1654 +#define USE_AS_STRCASECMP_L
1655 +#include "strcmp.S"
1656 +
1657 +weak_alias (__strcasecmp_l, strcasecmp_l)
1658 +libc_hidden_def (strcasecmp_l)
1659 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasestr-nonascii.c
1660 ===================================================================
1661 --- /dev/null
1662 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasestr-nonascii.c
1663 @@ -0,0 +1,50 @@
1664 +/* strstr with SSE4.2 intrinsics
1665 + Copyright (C) 2010 Free Software Foundation, Inc.
1666 + This file is part of the GNU C Library.
1667 +
1668 + The GNU C Library is free software; you can redistribute it and/or
1669 + modify it under the terms of the GNU Lesser General Public
1670 + License as published by the Free Software Foundation; either
1671 + version 2.1 of the License, or (at your option) any later version.
1672 +
1673 + The GNU C Library is distributed in the hope that it will be useful,
1674 + but WITHOUT ANY WARRANTY; without even the implied warranty of
1675 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1676 + Lesser General Public License for more details.
1677 +
1678 + You should have received a copy of the GNU Lesser General Public
1679 + License along with the GNU C Library; if not, write to the Free
1680 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
1681 + 02111-1307 USA. */
1682 +
1683 +# include <ctype.h>
1684 +
1685 +
1686 +/* Similar to __m128i_strloadu. Convert to lower case for none-POSIX/C
1687 + locale. */
1688 +static inline __m128i
1689 +__m128i_strloadu_tolower (const unsigned char *p)
1690 +{
1691 + union
1692 + {
1693 + char b[16];
1694 + __m128i x;
1695 + } u;
1696 +
1697 + for (int i = 0; i < 16; ++i)
1698 + if (p[i] == 0)
1699 + {
1700 + u.b[i] = 0;
1701 + break;
1702 + }
1703 + else
1704 + u.b[i] = tolower (p[i]);
1705 +
1706 + return u.x;
1707 +}
1708 +
1709 +
1710 +#define STRCASESTR_NONASCII
1711 +#define USE_AS_STRCASESTR
1712 +#define STRSTR_SSE42 __strcasestr_sse42_nonascii
1713 +#include "strstr.c"
1714 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasestr.c
1715 ===================================================================
1716 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/strcasestr.c
1717 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcasestr.c
1718 @@ -1,3 +1,7 @@
1719 +extern char *__strcasestr_sse42_nonascii (const unsigned char *s1,
1720 + const unsigned char *s2)
1721 + attribute_hidden;
1722 +
1723 #define USE_AS_STRCASESTR
1724 #define STRSTR_SSE42 __strcasestr_sse42
1725 #include "strstr.c"
1726 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcmp.S
1727 ===================================================================
1728 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/strcmp.S
1729 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strcmp.S
1730 @@ -24,7 +24,7 @@
1731 #ifdef USE_AS_STRNCMP
1732 /* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
1733 if the new counter > the old one or is 0. */
1734 -#define UPDATE_STRNCMP_COUNTER \
1735 +# define UPDATE_STRNCMP_COUNTER \
1736 /* calculate left number to compare */ \
1737 lea -16(%rcx, %r11), %r9; \
1738 cmp %r9, %r11; \
1739 @@ -33,23 +33,50 @@
1740 je LABEL(strcmp_exitz_sse4_2); \
1741 mov %r9, %r11
1742
1743 -#define STRCMP_SSE42 __strncmp_sse42
1744 -#define STRCMP_SSSE3 __strncmp_ssse3
1745 -#define STRCMP_SSE2 __strncmp_sse2
1746 -#define __GI_STRCMP __GI_strncmp
1747 +# define STRCMP_SSE42 __strncmp_sse42
1748 +# define STRCMP_SSSE3 __strncmp_ssse3
1749 +# define STRCMP_SSE2 __strncmp_sse2
1750 +# define __GI_STRCMP __GI_strncmp
1751 +#elif defined USE_AS_STRCASECMP_L
1752 +# include "locale-defines.h"
1753 +
1754 +# define UPDATE_STRNCMP_COUNTER
1755 +
1756 +# define STRCMP_SSE42 __strcasecmp_l_sse42
1757 +# define STRCMP_SSSE3 __strcasecmp_l_ssse3
1758 +# define STRCMP_SSE2 __strcasecmp_l_sse2
1759 +# define __GI_STRCMP __GI___strcasecmp_l
1760 +#elif defined USE_AS_STRNCASECMP_L
1761 +# include "locale-defines.h"
1762 +
1763 +/* Since the counter, %r11, is unsigned, we branch to strcmp_exitz
1764 + if the new counter > the old one or is 0. */
1765 +# define UPDATE_STRNCMP_COUNTER \
1766 + /* calculate left number to compare */ \
1767 + lea -16(%rcx, %r11), %r9; \
1768 + cmp %r9, %r11; \
1769 + jb LABEL(strcmp_exitz_sse4_2); \
1770 + test %r9, %r9; \
1771 + je LABEL(strcmp_exitz_sse4_2); \
1772 + mov %r9, %r11
1773 +
1774 +# define STRCMP_SSE42 __strncasecmp_l_sse42
1775 +# define STRCMP_SSSE3 __strncasecmp_l_ssse3
1776 +# define STRCMP_SSE2 __strncasecmp_l_sse2
1777 +# define __GI_STRCMP __GI___strncasecmp_l
1778 #else
1779 -#define UPDATE_STRNCMP_COUNTER
1780 -#ifndef STRCMP
1781 -#define STRCMP strcmp
1782 -#define STRCMP_SSE42 __strcmp_sse42
1783 -#define STRCMP_SSSE3 __strcmp_ssse3
1784 -#define STRCMP_SSE2 __strcmp_sse2
1785 -#define __GI_STRCMP __GI_strcmp
1786 -#endif
1787 +# define UPDATE_STRNCMP_COUNTER
1788 +# ifndef STRCMP
1789 +# define STRCMP strcmp
1790 +# define STRCMP_SSE42 __strcmp_sse42
1791 +# define STRCMP_SSSE3 __strcmp_ssse3
1792 +# define STRCMP_SSE2 __strcmp_sse2
1793 +# define __GI_STRCMP __GI_strcmp
1794 +# endif
1795 #endif
1796
1797 #ifndef LABEL
1798 -#define LABEL(l) L(l)
1799 +# define LABEL(l) L(l)
1800 #endif
1801
1802 /* Define multiple versions only for the definition in libc. Don't
1803 @@ -73,6 +100,43 @@ ENTRY(STRCMP)
1804 2: ret
1805 END(STRCMP)
1806
1807 +# ifdef USE_AS_STRCASECMP_L
1808 +ENTRY(__strcasecmp)
1809 + .type __strcasecmp, @gnu_indirect_function
1810 + cmpl $0, __cpu_features+KIND_OFFSET(%rip)
1811 + jne 1f
1812 + call __init_cpu_features
1813 +1:
1814 + leaq __strcasecmp_sse42(%rip), %rax
1815 + testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
1816 + jnz 2f
1817 + leaq __strcasecmp_ssse3(%rip), %rax
1818 + testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
1819 + jnz 2f
1820 + leaq __strcasecmp_sse2(%rip), %rax
1821 +2: ret
1822 +END(__strcasecmp)
1823 +weak_alias (__strcasecmp, strcasecmp)
1824 +# endif
1825 +# ifdef USE_AS_STRNCASECMP_L
1826 +ENTRY(__strncasecmp)
1827 + .type __strncasecmp, @gnu_indirect_function
1828 + cmpl $0, __cpu_features+KIND_OFFSET(%rip)
1829 + jne 1f
1830 + call __init_cpu_features
1831 +1:
1832 + leaq __strncasecmp_sse42(%rip), %rax
1833 + testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
1834 + jnz 2f
1835 + leaq __strncasecmp_ssse3(%rip), %rax
1836 + testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
1837 + jnz 2f
1838 + leaq __strncasecmp_sse2(%rip), %rax
1839 +2: ret
1840 +END(__strncasecmp)
1841 +weak_alias (__strncasecmp, strncasecmp)
1842 +# endif
1843 +
1844 /* We use 0x1a:
1845 _SIDD_SBYTE_OPS
1846 | _SIDD_CMP_EQUAL_EACH
1847 @@ -101,8 +165,31 @@ END(STRCMP)
1848
1849 /* Put all SSE 4.2 functions together. */
1850 .section .text.sse4.2,"ax",@progbits
1851 - .align 16
1852 + .align 16
1853 .type STRCMP_SSE42, @function
1854 +# ifdef USE_AS_STRCASECMP_L
1855 +ENTRY (__strcasecmp_sse42)
1856 + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
1857 + movq %fs:(%rax),%rdx
1858 +
1859 + // XXX 5 byte should be before the function
1860 + /* 5-byte NOP. */
1861 + .byte 0x0f,0x1f,0x44,0x00,0x00
1862 +END (__strcasecmp_sse42)
1863 + /* FALLTHROUGH to strcasecmp_l. */
1864 +# endif
1865 +# ifdef USE_AS_STRNCASECMP_L
1866 +ENTRY (__strncasecmp_sse42)
1867 + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
1868 + movq %fs:(%rax),%rcx
1869 +
1870 + // XXX 5 byte should be before the function
1871 + /* 5-byte NOP. */
1872 + .byte 0x0f,0x1f,0x44,0x00,0x00
1873 +END (__strncasecmp_sse42)
1874 + /* FALLTHROUGH to strncasecmp_l. */
1875 +# endif
1876 +
1877 STRCMP_SSE42:
1878 cfi_startproc
1879 CALL_MCOUNT
1880 @@ -110,24 +197,87 @@ STRCMP_SSE42:
1881 /*
1882 * This implementation uses SSE to compare up to 16 bytes at a time.
1883 */
1884 -#ifdef USE_AS_STRNCMP
1885 +# ifdef USE_AS_STRCASECMP_L
1886 + /* We have to fall back on the C implementation for locales
1887 + with encodings not matching ASCII for single bytes. */
1888 +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
1889 + movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax
1890 +# else
1891 + movq (%rdx), %rax
1892 +# endif
1893 + testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
1894 + jne __strcasecmp_l_nonascii
1895 +# endif
1896 +# ifdef USE_AS_STRNCASECMP_L
1897 + /* We have to fall back on the C implementation for locales
1898 + with encodings not matching ASCII for single bytes. */
1899 +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
1900 + movq LOCALE_T___LOCALES+LC_CTYPE*8(%rcx), %rax
1901 +# else
1902 + movq (%rcx), %rax
1903 +# endif
1904 + testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
1905 + jne __strncasecmp_l_nonascii
1906 +# endif
1907 +
1908 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1909 test %rdx, %rdx
1910 je LABEL(strcmp_exitz_sse4_2)
1911 cmp $1, %rdx
1912 je LABEL(Byte0_sse4_2)
1913 mov %rdx, %r11
1914 -#endif
1915 +# endif
1916 mov %esi, %ecx
1917 mov %edi, %eax
1918 /* Use 64bit AND here to avoid long NOP padding. */
1919 and $0x3f, %rcx /* rsi alignment in cache line */
1920 and $0x3f, %rax /* rdi alignment in cache line */
1921 +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
1922 + .section .rodata.cst16,"aM",@progbits,16
1923 + .align 16
1924 +.Lbelowupper_sse4:
1925 + .quad 0x4040404040404040
1926 + .quad 0x4040404040404040
1927 +.Ltopupper_sse4:
1928 + .quad 0x5b5b5b5b5b5b5b5b
1929 + .quad 0x5b5b5b5b5b5b5b5b
1930 +.Ltouppermask_sse4:
1931 + .quad 0x2020202020202020
1932 + .quad 0x2020202020202020
1933 + .previous
1934 + movdqa .Lbelowupper_sse4(%rip), %xmm4
1935 +# define UCLOW_reg %xmm4
1936 + movdqa .Ltopupper_sse4(%rip), %xmm5
1937 +# define UCHIGH_reg %xmm5
1938 + movdqa .Ltouppermask_sse4(%rip), %xmm6
1939 +# define LCQWORD_reg %xmm6
1940 +# endif
1941 cmp $0x30, %ecx
1942 ja LABEL(crosscache_sse4_2)/* rsi: 16-byte load will cross cache line */
1943 cmp $0x30, %eax
1944 ja LABEL(crosscache_sse4_2)/* rdi: 16-byte load will cross cache line */
1945 movdqu (%rdi), %xmm1
1946 movdqu (%rsi), %xmm2
1947 +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
1948 +# define TOLOWER(reg1, reg2) \
1949 + movdqa reg1, %xmm7; \
1950 + movdqa UCHIGH_reg, %xmm8; \
1951 + movdqa reg2, %xmm9; \
1952 + movdqa UCHIGH_reg, %xmm10; \
1953 + pcmpgtb UCLOW_reg, %xmm7; \
1954 + pcmpgtb reg1, %xmm8; \
1955 + pcmpgtb UCLOW_reg, %xmm9; \
1956 + pcmpgtb reg2, %xmm10; \
1957 + pand %xmm8, %xmm7; \
1958 + pand %xmm10, %xmm9; \
1959 + pand LCQWORD_reg, %xmm7; \
1960 + pand LCQWORD_reg, %xmm9; \
1961 + por %xmm7, reg1; \
1962 + por %xmm9, reg2
1963 + TOLOWER (%xmm1, %xmm2)
1964 +# else
1965 +# define TOLOWER(reg1, reg2)
1966 +# endif
1967 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
1968 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
1969 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
1970 @@ -135,10 +285,10 @@ STRCMP_SSE42:
1971 pmovmskb %xmm1, %edx
1972 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
1973 jnz LABEL(less16bytes_sse4_2)/* If not, find different value or null char */
1974 -#ifdef USE_AS_STRNCMP
1975 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
1976 sub $16, %r11
1977 jbe LABEL(strcmp_exitz_sse4_2)/* finish comparision */
1978 -#endif
1979 +# endif
1980 add $16, %rsi /* prepare to search next 16 bytes */
1981 add $16, %rdi /* prepare to search next 16 bytes */
1982
1983 @@ -180,7 +330,13 @@ LABEL(ashr_0_sse4_2):
1984 movdqa (%rsi), %xmm1
1985 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
1986 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
1987 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
1988 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
1989 +# else
1990 + movdqa (%rdi), %xmm2
1991 + TOLOWER (%xmm1, %xmm2)
1992 + pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
1993 +# endif
1994 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
1995 pmovmskb %xmm1, %r9d
1996 shr %cl, %edx /* adjust 0xffff for offset */
1997 @@ -204,44 +360,60 @@ LABEL(ashr_0_sse4_2):
1998 .p2align 4
1999 LABEL(ashr_0_use_sse4_2):
2000 movdqa (%rdi,%rdx), %xmm0
2001 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2002 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2003 +# else
2004 + movdqa (%rsi,%rdx), %xmm1
2005 + TOLOWER (%xmm0, %xmm1)
2006 + pcmpistri $0x1a, %xmm1, %xmm0
2007 +# endif
2008 lea 16(%rdx), %rdx
2009 jbe LABEL(ashr_0_use_sse4_2_exit)
2010 -#ifdef USE_AS_STRNCMP
2011 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2012 sub $16, %r11
2013 jbe LABEL(strcmp_exitz_sse4_2)
2014 -#endif
2015 +# endif
2016
2017 movdqa (%rdi,%rdx), %xmm0
2018 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2019 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2020 +# else
2021 + movdqa (%rsi,%rdx), %xmm1
2022 + TOLOWER (%xmm0, %xmm1)
2023 + pcmpistri $0x1a, %xmm1, %xmm0
2024 +# endif
2025 lea 16(%rdx), %rdx
2026 jbe LABEL(ashr_0_use_sse4_2_exit)
2027 -#ifdef USE_AS_STRNCMP
2028 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2029 sub $16, %r11
2030 jbe LABEL(strcmp_exitz_sse4_2)
2031 -#endif
2032 +# endif
2033 jmp LABEL(ashr_0_use_sse4_2)
2034
2035
2036 .p2align 4
2037 LABEL(ashr_0_use_sse4_2_exit):
2038 jnc LABEL(strcmp_exitz_sse4_2)
2039 -#ifdef USE_AS_STRNCMP
2040 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2041 sub %rcx, %r11
2042 jbe LABEL(strcmp_exitz_sse4_2)
2043 -#endif
2044 +# endif
2045 lea -16(%rdx, %rcx), %rcx
2046 movzbl (%rdi, %rcx), %eax
2047 movzbl (%rsi, %rcx), %edx
2048 +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
2049 + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx
2050 + movl (%rcx,%rax,4), %eax
2051 + movl (%rcx,%rdx,4), %edx
2052 +# endif
2053 sub %edx, %eax
2054 ret
2055
2056
2057
2058 -
2059 /*
2060 * The following cases will be handled by ashr_1
2061 - * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2062 + * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2063 * n(15) n -15 0(15 +(n-15) - n) ashr_1
2064 */
2065 .p2align 4
2066 @@ -251,6 +423,7 @@ LABEL(ashr_1_sse4_2):
2067 movdqa (%rsi), %xmm1
2068 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
2069 pslldq $15, %xmm2 /* shift first string to align with second */
2070 + TOLOWER (%xmm1, %xmm2)
2071 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
2072 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
2073 pmovmskb %xmm2, %r9d
2074 @@ -281,12 +454,18 @@ LABEL(loop_ashr_1_use_sse4_2):
2075
2076 movdqa (%rdi, %rdx), %xmm0
2077 palignr $1, -16(%rdi, %rdx), %xmm0
2078 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2079 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2080 +# else
2081 + movdqa (%rsi,%rdx), %xmm1
2082 + TOLOWER (%xmm0, %xmm1)
2083 + pcmpistri $0x1a, %xmm1, %xmm0
2084 +# endif
2085 jbe LABEL(use_sse4_2_exit)
2086 -#ifdef USE_AS_STRNCMP
2087 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2088 sub $16, %r11
2089 jbe LABEL(strcmp_exitz_sse4_2)
2090 -#endif
2091 +# endif
2092
2093 add $16, %rdx
2094 add $16, %r10
2095 @@ -294,12 +473,18 @@ LABEL(loop_ashr_1_use_sse4_2):
2096
2097 movdqa (%rdi, %rdx), %xmm0
2098 palignr $1, -16(%rdi, %rdx), %xmm0
2099 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2100 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2101 +# else
2102 + movdqa (%rsi,%rdx), %xmm1
2103 + TOLOWER (%xmm0, %xmm1)
2104 + pcmpistri $0x1a, %xmm1, %xmm0
2105 +# endif
2106 jbe LABEL(use_sse4_2_exit)
2107 -#ifdef USE_AS_STRNCMP
2108 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2109 sub $16, %r11
2110 jbe LABEL(strcmp_exitz_sse4_2)
2111 -#endif
2112 +# endif
2113 add $16, %rdx
2114 jmp LABEL(loop_ashr_1_use_sse4_2)
2115
2116 @@ -309,10 +494,10 @@ LABEL(nibble_ashr_1_use_sse4_2):
2117 movdqa -16(%rdi, %rdx), %xmm0
2118 psrldq $1, %xmm0
2119 pcmpistri $0x3a,%xmm0, %xmm0
2120 -#ifdef USE_AS_STRNCMP
2121 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2122 cmp %r11, %rcx
2123 jae LABEL(nibble_ashr_use_sse4_2_exit)
2124 -#endif
2125 +# endif
2126 cmp $14, %ecx
2127 ja LABEL(loop_ashr_1_use_sse4_2)
2128
2129 @@ -320,7 +505,7 @@ LABEL(nibble_ashr_1_use_sse4_2):
2130
2131 /*
2132 * The following cases will be handled by ashr_2
2133 - * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2134 + * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2135 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
2136 */
2137 .p2align 4
2138 @@ -330,6 +515,7 @@ LABEL(ashr_2_sse4_2):
2139 movdqa (%rsi), %xmm1
2140 pcmpeqb %xmm1, %xmm0
2141 pslldq $14, %xmm2
2142 + TOLOWER (%xmm1, %xmm2)
2143 pcmpeqb %xmm1, %xmm2
2144 psubb %xmm0, %xmm2
2145 pmovmskb %xmm2, %r9d
2146 @@ -360,12 +546,18 @@ LABEL(loop_ashr_2_use_sse4_2):
2147
2148 movdqa (%rdi, %rdx), %xmm0
2149 palignr $2, -16(%rdi, %rdx), %xmm0
2150 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2151 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2152 +# else
2153 + movdqa (%rsi,%rdx), %xmm1
2154 + TOLOWER (%xmm0, %xmm1)
2155 + pcmpistri $0x1a, %xmm1, %xmm0
2156 +# endif
2157 jbe LABEL(use_sse4_2_exit)
2158 -#ifdef USE_AS_STRNCMP
2159 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2160 sub $16, %r11
2161 jbe LABEL(strcmp_exitz_sse4_2)
2162 -#endif
2163 +# endif
2164
2165 add $16, %rdx
2166 add $16, %r10
2167 @@ -373,12 +565,18 @@ LABEL(loop_ashr_2_use_sse4_2):
2168
2169 movdqa (%rdi, %rdx), %xmm0
2170 palignr $2, -16(%rdi, %rdx), %xmm0
2171 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2172 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2173 +# else
2174 + movdqa (%rsi,%rdx), %xmm1
2175 + TOLOWER (%xmm0, %xmm1)
2176 + pcmpistri $0x1a, %xmm1, %xmm0
2177 +# endif
2178 jbe LABEL(use_sse4_2_exit)
2179 -#ifdef USE_AS_STRNCMP
2180 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2181 sub $16, %r11
2182 jbe LABEL(strcmp_exitz_sse4_2)
2183 -#endif
2184 +# endif
2185 add $16, %rdx
2186 jmp LABEL(loop_ashr_2_use_sse4_2)
2187
2188 @@ -388,10 +586,10 @@ LABEL(nibble_ashr_2_use_sse4_2):
2189 movdqa -16(%rdi, %rdx), %xmm0
2190 psrldq $2, %xmm0
2191 pcmpistri $0x3a,%xmm0, %xmm0
2192 -#ifdef USE_AS_STRNCMP
2193 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2194 cmp %r11, %rcx
2195 jae LABEL(nibble_ashr_use_sse4_2_exit)
2196 -#endif
2197 +# endif
2198 cmp $13, %ecx
2199 ja LABEL(loop_ashr_2_use_sse4_2)
2200
2201 @@ -409,6 +607,7 @@ LABEL(ashr_3_sse4_2):
2202 movdqa (%rsi), %xmm1
2203 pcmpeqb %xmm1, %xmm0
2204 pslldq $13, %xmm2
2205 + TOLOWER (%xmm1, %xmm2)
2206 pcmpeqb %xmm1, %xmm2
2207 psubb %xmm0, %xmm2
2208 pmovmskb %xmm2, %r9d
2209 @@ -439,12 +638,18 @@ LABEL(loop_ashr_3_use_sse4_2):
2210
2211 movdqa (%rdi, %rdx), %xmm0
2212 palignr $3, -16(%rdi, %rdx), %xmm0
2213 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2214 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2215 +# else
2216 + movdqa (%rsi,%rdx), %xmm1
2217 + TOLOWER (%xmm0, %xmm1)
2218 + pcmpistri $0x1a, %xmm1, %xmm0
2219 +# endif
2220 jbe LABEL(use_sse4_2_exit)
2221 -#ifdef USE_AS_STRNCMP
2222 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2223 sub $16, %r11
2224 jbe LABEL(strcmp_exitz_sse4_2)
2225 -#endif
2226 +# endif
2227
2228 add $16, %rdx
2229 add $16, %r10
2230 @@ -452,12 +657,18 @@ LABEL(loop_ashr_3_use_sse4_2):
2231
2232 movdqa (%rdi, %rdx), %xmm0
2233 palignr $3, -16(%rdi, %rdx), %xmm0
2234 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2235 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2236 +# else
2237 + movdqa (%rsi,%rdx), %xmm1
2238 + TOLOWER (%xmm0, %xmm1)
2239 + pcmpistri $0x1a, %xmm1, %xmm0
2240 +# endif
2241 jbe LABEL(use_sse4_2_exit)
2242 -#ifdef USE_AS_STRNCMP
2243 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2244 sub $16, %r11
2245 jbe LABEL(strcmp_exitz_sse4_2)
2246 -#endif
2247 +# endif
2248 add $16, %rdx
2249 jmp LABEL(loop_ashr_3_use_sse4_2)
2250
2251 @@ -467,10 +678,10 @@ LABEL(nibble_ashr_3_use_sse4_2):
2252 movdqa -16(%rdi, %rdx), %xmm0
2253 psrldq $3, %xmm0
2254 pcmpistri $0x3a,%xmm0, %xmm0
2255 -#ifdef USE_AS_STRNCMP
2256 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2257 cmp %r11, %rcx
2258 jae LABEL(nibble_ashr_use_sse4_2_exit)
2259 -#endif
2260 +# endif
2261 cmp $12, %ecx
2262 ja LABEL(loop_ashr_3_use_sse4_2)
2263
2264 @@ -488,6 +699,7 @@ LABEL(ashr_4_sse4_2):
2265 movdqa (%rsi), %xmm1
2266 pcmpeqb %xmm1, %xmm0
2267 pslldq $12, %xmm2
2268 + TOLOWER (%xmm1, %xmm2)
2269 pcmpeqb %xmm1, %xmm2
2270 psubb %xmm0, %xmm2
2271 pmovmskb %xmm2, %r9d
2272 @@ -519,12 +731,18 @@ LABEL(loop_ashr_4_use_sse4_2):
2273
2274 movdqa (%rdi, %rdx), %xmm0
2275 palignr $4, -16(%rdi, %rdx), %xmm0
2276 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2277 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2278 +# else
2279 + movdqa (%rsi,%rdx), %xmm1
2280 + TOLOWER (%xmm0, %xmm1)
2281 + pcmpistri $0x1a, %xmm1, %xmm0
2282 +# endif
2283 jbe LABEL(use_sse4_2_exit)
2284 -#ifdef USE_AS_STRNCMP
2285 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2286 sub $16, %r11
2287 jbe LABEL(strcmp_exitz_sse4_2)
2288 -#endif
2289 +# endif
2290
2291 add $16, %rdx
2292 add $16, %r10
2293 @@ -532,12 +750,18 @@ LABEL(loop_ashr_4_use_sse4_2):
2294
2295 movdqa (%rdi, %rdx), %xmm0
2296 palignr $4, -16(%rdi, %rdx), %xmm0
2297 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2298 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2299 +# else
2300 + movdqa (%rsi,%rdx), %xmm1
2301 + TOLOWER (%xmm0, %xmm1)
2302 + pcmpistri $0x1a, %xmm1, %xmm0
2303 +# endif
2304 jbe LABEL(use_sse4_2_exit)
2305 -#ifdef USE_AS_STRNCMP
2306 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2307 sub $16, %r11
2308 jbe LABEL(strcmp_exitz_sse4_2)
2309 -#endif
2310 +# endif
2311 add $16, %rdx
2312 jmp LABEL(loop_ashr_4_use_sse4_2)
2313
2314 @@ -547,10 +771,10 @@ LABEL(nibble_ashr_4_use_sse4_2):
2315 movdqa -16(%rdi, %rdx), %xmm0
2316 psrldq $4, %xmm0
2317 pcmpistri $0x3a,%xmm0, %xmm0
2318 -#ifdef USE_AS_STRNCMP
2319 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2320 cmp %r11, %rcx
2321 jae LABEL(nibble_ashr_use_sse4_2_exit)
2322 -#endif
2323 +# endif
2324 cmp $11, %ecx
2325 ja LABEL(loop_ashr_4_use_sse4_2)
2326
2327 @@ -559,7 +783,7 @@ LABEL(nibble_ashr_4_use_sse4_2):
2328 /*
2329 * The following cases will be handled by ashr_5
2330 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2331 - * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
2332 + * n(11~15) n - 11 4(15 +(n-11) - n) ashr_5
2333 */
2334 .p2align 4
2335 LABEL(ashr_5_sse4_2):
2336 @@ -568,6 +792,7 @@ LABEL(ashr_5_sse4_2):
2337 movdqa (%rsi), %xmm1
2338 pcmpeqb %xmm1, %xmm0
2339 pslldq $11, %xmm2
2340 + TOLOWER (%xmm1, %xmm2)
2341 pcmpeqb %xmm1, %xmm2
2342 psubb %xmm0, %xmm2
2343 pmovmskb %xmm2, %r9d
2344 @@ -599,12 +824,18 @@ LABEL(loop_ashr_5_use_sse4_2):
2345
2346 movdqa (%rdi, %rdx), %xmm0
2347 palignr $5, -16(%rdi, %rdx), %xmm0
2348 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2349 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2350 +# else
2351 + movdqa (%rsi,%rdx), %xmm1
2352 + TOLOWER (%xmm0, %xmm1)
2353 + pcmpistri $0x1a, %xmm1, %xmm0
2354 +# endif
2355 jbe LABEL(use_sse4_2_exit)
2356 -#ifdef USE_AS_STRNCMP
2357 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2358 sub $16, %r11
2359 jbe LABEL(strcmp_exitz_sse4_2)
2360 -#endif
2361 +# endif
2362
2363 add $16, %rdx
2364 add $16, %r10
2365 @@ -613,12 +844,18 @@ LABEL(loop_ashr_5_use_sse4_2):
2366 movdqa (%rdi, %rdx), %xmm0
2367
2368 palignr $5, -16(%rdi, %rdx), %xmm0
2369 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2370 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2371 +# else
2372 + movdqa (%rsi,%rdx), %xmm1
2373 + TOLOWER (%xmm0, %xmm1)
2374 + pcmpistri $0x1a, %xmm1, %xmm0
2375 +# endif
2376 jbe LABEL(use_sse4_2_exit)
2377 -#ifdef USE_AS_STRNCMP
2378 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2379 sub $16, %r11
2380 jbe LABEL(strcmp_exitz_sse4_2)
2381 -#endif
2382 +# endif
2383 add $16, %rdx
2384 jmp LABEL(loop_ashr_5_use_sse4_2)
2385
2386 @@ -628,10 +865,10 @@ LABEL(nibble_ashr_5_use_sse4_2):
2387 movdqa -16(%rdi, %rdx), %xmm0
2388 psrldq $5, %xmm0
2389 pcmpistri $0x3a,%xmm0, %xmm0
2390 -#ifdef USE_AS_STRNCMP
2391 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2392 cmp %r11, %rcx
2393 jae LABEL(nibble_ashr_use_sse4_2_exit)
2394 -#endif
2395 +# endif
2396 cmp $10, %ecx
2397 ja LABEL(loop_ashr_5_use_sse4_2)
2398
2399 @@ -640,7 +877,7 @@ LABEL(nibble_ashr_5_use_sse4_2):
2400 /*
2401 * The following cases will be handled by ashr_6
2402 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2403 - * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
2404 + * n(10~15) n - 10 5(15 +(n-10) - n) ashr_6
2405 */
2406 .p2align 4
2407 LABEL(ashr_6_sse4_2):
2408 @@ -649,6 +886,7 @@ LABEL(ashr_6_sse4_2):
2409 movdqa (%rsi), %xmm1
2410 pcmpeqb %xmm1, %xmm0
2411 pslldq $10, %xmm2
2412 + TOLOWER (%xmm1, %xmm2)
2413 pcmpeqb %xmm1, %xmm2
2414 psubb %xmm0, %xmm2
2415 pmovmskb %xmm2, %r9d
2416 @@ -680,12 +918,18 @@ LABEL(loop_ashr_6_use_sse4_2):
2417
2418 movdqa (%rdi, %rdx), %xmm0
2419 palignr $6, -16(%rdi, %rdx), %xmm0
2420 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2421 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2422 + pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2423 +# else
2424 + movdqa (%rsi,%rdx), %xmm1
2425 + TOLOWER (%xmm0, %xmm1)
2426 + pcmpistri $0x1a, %xmm1, %xmm0
2427 +# endif
2428 jbe LABEL(use_sse4_2_exit)
2429 -#ifdef USE_AS_STRNCMP
2430 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2431 sub $16, %r11
2432 jbe LABEL(strcmp_exitz_sse4_2)
2433 -#endif
2434 +# endif
2435
2436 add $16, %rdx
2437 add $16, %r10
2438 @@ -693,12 +937,18 @@ LABEL(loop_ashr_6_use_sse4_2):
2439
2440 movdqa (%rdi, %rdx), %xmm0
2441 palignr $6, -16(%rdi, %rdx), %xmm0
2442 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2443 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2444 + pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2445 +# else
2446 + movdqa (%rsi,%rdx), %xmm1
2447 + TOLOWER (%xmm0, %xmm1)
2448 + pcmpistri $0x1a, %xmm1, %xmm0
2449 +# endif
2450 jbe LABEL(use_sse4_2_exit)
2451 -#ifdef USE_AS_STRNCMP
2452 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2453 sub $16, %r11
2454 jbe LABEL(strcmp_exitz_sse4_2)
2455 -#endif
2456 +# endif
2457 add $16, %rdx
2458 jmp LABEL(loop_ashr_6_use_sse4_2)
2459
2460 @@ -708,10 +958,10 @@ LABEL(nibble_ashr_6_use_sse4_2):
2461 movdqa -16(%rdi, %rdx), %xmm0
2462 psrldq $6, %xmm0
2463 pcmpistri $0x3a,%xmm0, %xmm0
2464 -#ifdef USE_AS_STRNCMP
2465 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2466 cmp %r11, %rcx
2467 jae LABEL(nibble_ashr_use_sse4_2_exit)
2468 -#endif
2469 +# endif
2470 cmp $9, %ecx
2471 ja LABEL(loop_ashr_6_use_sse4_2)
2472
2473 @@ -720,7 +970,7 @@ LABEL(nibble_ashr_6_use_sse4_2):
2474 /*
2475 * The following cases will be handled by ashr_7
2476 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2477 - * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
2478 + * n(9~15) n - 9 6(15 +(n - 9) - n) ashr_7
2479 */
2480 .p2align 4
2481 LABEL(ashr_7_sse4_2):
2482 @@ -729,6 +979,7 @@ LABEL(ashr_7_sse4_2):
2483 movdqa (%rsi), %xmm1
2484 pcmpeqb %xmm1, %xmm0
2485 pslldq $9, %xmm2
2486 + TOLOWER (%xmm1, %xmm2)
2487 pcmpeqb %xmm1, %xmm2
2488 psubb %xmm0, %xmm2
2489 pmovmskb %xmm2, %r9d
2490 @@ -760,12 +1011,18 @@ LABEL(loop_ashr_7_use_sse4_2):
2491
2492 movdqa (%rdi, %rdx), %xmm0
2493 palignr $7, -16(%rdi, %rdx), %xmm0
2494 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2495 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2496 +# else
2497 + movdqa (%rsi,%rdx), %xmm1
2498 + TOLOWER (%xmm0, %xmm1)
2499 + pcmpistri $0x1a, %xmm1, %xmm0
2500 +# endif
2501 jbe LABEL(use_sse4_2_exit)
2502 -#ifdef USE_AS_STRNCMP
2503 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2504 sub $16, %r11
2505 jbe LABEL(strcmp_exitz_sse4_2)
2506 -#endif
2507 +# endif
2508
2509 add $16, %rdx
2510 add $16, %r10
2511 @@ -773,12 +1030,18 @@ LABEL(loop_ashr_7_use_sse4_2):
2512
2513 movdqa (%rdi, %rdx), %xmm0
2514 palignr $7, -16(%rdi, %rdx), %xmm0
2515 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2516 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2517 +# else
2518 + movdqa (%rsi,%rdx), %xmm1
2519 + TOLOWER (%xmm0, %xmm1)
2520 + pcmpistri $0x1a, %xmm1, %xmm0
2521 +# endif
2522 jbe LABEL(use_sse4_2_exit)
2523 -#ifdef USE_AS_STRNCMP
2524 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2525 sub $16, %r11
2526 jbe LABEL(strcmp_exitz_sse4_2)
2527 -#endif
2528 +# endif
2529 add $16, %rdx
2530 jmp LABEL(loop_ashr_7_use_sse4_2)
2531
2532 @@ -788,10 +1051,10 @@ LABEL(nibble_ashr_7_use_sse4_2):
2533 movdqa -16(%rdi, %rdx), %xmm0
2534 psrldq $7, %xmm0
2535 pcmpistri $0x3a,%xmm0, %xmm0
2536 -#ifdef USE_AS_STRNCMP
2537 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2538 cmp %r11, %rcx
2539 jae LABEL(nibble_ashr_use_sse4_2_exit)
2540 -#endif
2541 +# endif
2542 cmp $8, %ecx
2543 ja LABEL(loop_ashr_7_use_sse4_2)
2544
2545 @@ -800,7 +1063,7 @@ LABEL(nibble_ashr_7_use_sse4_2):
2546 /*
2547 * The following cases will be handled by ashr_8
2548 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2549 - * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
2550 + * n(8~15) n - 8 7(15 +(n - 8) - n) ashr_8
2551 */
2552 .p2align 4
2553 LABEL(ashr_8_sse4_2):
2554 @@ -809,6 +1072,7 @@ LABEL(ashr_8_sse4_2):
2555 movdqa (%rsi), %xmm1
2556 pcmpeqb %xmm1, %xmm0
2557 pslldq $8, %xmm2
2558 + TOLOWER (%xmm1, %xmm2)
2559 pcmpeqb %xmm1, %xmm2
2560 psubb %xmm0, %xmm2
2561 pmovmskb %xmm2, %r9d
2562 @@ -840,12 +1104,18 @@ LABEL(loop_ashr_8_use_sse4_2):
2563
2564 movdqa (%rdi, %rdx), %xmm0
2565 palignr $8, -16(%rdi, %rdx), %xmm0
2566 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2567 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2568 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2569 +# else
2570 + movdqa (%rsi,%rdx), %xmm1
2571 + TOLOWER (%xmm0, %xmm1)
2572 + pcmpistri $0x1a, %xmm1, %xmm0
2573 +# endif
2574 jbe LABEL(use_sse4_2_exit)
2575 -#ifdef USE_AS_STRNCMP
2576 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2577 sub $16, %r11
2578 jbe LABEL(strcmp_exitz_sse4_2)
2579 -#endif
2580 +# endif
2581
2582 add $16, %rdx
2583 add $16, %r10
2584 @@ -853,12 +1123,18 @@ LABEL(loop_ashr_8_use_sse4_2):
2585
2586 movdqa (%rdi, %rdx), %xmm0
2587 palignr $8, -16(%rdi, %rdx), %xmm0
2588 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2589 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2590 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2591 +# else
2592 + movdqa (%rsi,%rdx), %xmm1
2593 + TOLOWER (%xmm0, %xmm1)
2594 + pcmpistri $0x1a, %xmm1, %xmm0
2595 +# endif
2596 jbe LABEL(use_sse4_2_exit)
2597 -#ifdef USE_AS_STRNCMP
2598 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2599 sub $16, %r11
2600 jbe LABEL(strcmp_exitz_sse4_2)
2601 -#endif
2602 +# endif
2603 add $16, %rdx
2604 jmp LABEL(loop_ashr_8_use_sse4_2)
2605
2606 @@ -868,10 +1144,10 @@ LABEL(nibble_ashr_8_use_sse4_2):
2607 movdqa -16(%rdi, %rdx), %xmm0
2608 psrldq $8, %xmm0
2609 pcmpistri $0x3a,%xmm0, %xmm0
2610 -#ifdef USE_AS_STRNCMP
2611 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2612 cmp %r11, %rcx
2613 jae LABEL(nibble_ashr_use_sse4_2_exit)
2614 -#endif
2615 +# endif
2616 cmp $7, %ecx
2617 ja LABEL(loop_ashr_8_use_sse4_2)
2618
2619 @@ -880,7 +1156,7 @@ LABEL(nibble_ashr_8_use_sse4_2):
2620 /*
2621 * The following cases will be handled by ashr_9
2622 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2623 - * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
2624 + * n(7~15) n - 7 8(15 +(n - 7) - n) ashr_9
2625 */
2626 .p2align 4
2627 LABEL(ashr_9_sse4_2):
2628 @@ -889,6 +1165,7 @@ LABEL(ashr_9_sse4_2):
2629 movdqa (%rsi), %xmm1
2630 pcmpeqb %xmm1, %xmm0
2631 pslldq $7, %xmm2
2632 + TOLOWER (%xmm1, %xmm2)
2633 pcmpeqb %xmm1, %xmm2
2634 psubb %xmm0, %xmm2
2635 pmovmskb %xmm2, %r9d
2636 @@ -921,12 +1198,18 @@ LABEL(loop_ashr_9_use_sse4_2):
2637 movdqa (%rdi, %rdx), %xmm0
2638
2639 palignr $9, -16(%rdi, %rdx), %xmm0
2640 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2641 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2642 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2643 +# else
2644 + movdqa (%rsi,%rdx), %xmm1
2645 + TOLOWER (%xmm0, %xmm1)
2646 + pcmpistri $0x1a, %xmm1, %xmm0
2647 +# endif
2648 jbe LABEL(use_sse4_2_exit)
2649 -#ifdef USE_AS_STRNCMP
2650 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2651 sub $16, %r11
2652 jbe LABEL(strcmp_exitz_sse4_2)
2653 -#endif
2654 +# endif
2655
2656 add $16, %rdx
2657 add $16, %r10
2658 @@ -934,12 +1217,18 @@ LABEL(loop_ashr_9_use_sse4_2):
2659
2660 movdqa (%rdi, %rdx), %xmm0
2661 palignr $9, -16(%rdi, %rdx), %xmm0
2662 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2663 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2664 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2665 +# else
2666 + movdqa (%rsi,%rdx), %xmm1
2667 + TOLOWER (%xmm0, %xmm1)
2668 + pcmpistri $0x1a, %xmm1, %xmm0
2669 +# endif
2670 jbe LABEL(use_sse4_2_exit)
2671 -#ifdef USE_AS_STRNCMP
2672 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2673 sub $16, %r11
2674 jbe LABEL(strcmp_exitz_sse4_2)
2675 -#endif
2676 +# endif
2677 add $16, %rdx
2678 jmp LABEL(loop_ashr_9_use_sse4_2)
2679
2680 @@ -949,10 +1238,10 @@ LABEL(nibble_ashr_9_use_sse4_2):
2681 movdqa -16(%rdi, %rdx), %xmm0
2682 psrldq $9, %xmm0
2683 pcmpistri $0x3a,%xmm0, %xmm0
2684 -#ifdef USE_AS_STRNCMP
2685 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2686 cmp %r11, %rcx
2687 jae LABEL(nibble_ashr_use_sse4_2_exit)
2688 -#endif
2689 +# endif
2690 cmp $6, %ecx
2691 ja LABEL(loop_ashr_9_use_sse4_2)
2692
2693 @@ -961,7 +1250,7 @@ LABEL(nibble_ashr_9_use_sse4_2):
2694 /*
2695 * The following cases will be handled by ashr_10
2696 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2697 - * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
2698 + * n(6~15) n - 6 9(15 +(n - 6) - n) ashr_10
2699 */
2700 .p2align 4
2701 LABEL(ashr_10_sse4_2):
2702 @@ -970,6 +1259,7 @@ LABEL(ashr_10_sse4_2):
2703 movdqa (%rsi), %xmm1
2704 pcmpeqb %xmm1, %xmm0
2705 pslldq $6, %xmm2
2706 + TOLOWER (%xmm1, %xmm2)
2707 pcmpeqb %xmm1, %xmm2
2708 psubb %xmm0, %xmm2
2709 pmovmskb %xmm2, %r9d
2710 @@ -1001,12 +1291,18 @@ LABEL(loop_ashr_10_use_sse4_2):
2711
2712 movdqa (%rdi, %rdx), %xmm0
2713 palignr $10, -16(%rdi, %rdx), %xmm0
2714 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2715 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2716 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2717 +# else
2718 + movdqa (%rsi,%rdx), %xmm1
2719 + TOLOWER (%xmm0, %xmm1)
2720 + pcmpistri $0x1a, %xmm1, %xmm0
2721 +# endif
2722 jbe LABEL(use_sse4_2_exit)
2723 -#ifdef USE_AS_STRNCMP
2724 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2725 sub $16, %r11
2726 jbe LABEL(strcmp_exitz_sse4_2)
2727 -#endif
2728 +# endif
2729
2730 add $16, %rdx
2731 add $16, %r10
2732 @@ -1014,12 +1310,18 @@ LABEL(loop_ashr_10_use_sse4_2):
2733
2734 movdqa (%rdi, %rdx), %xmm0
2735 palignr $10, -16(%rdi, %rdx), %xmm0
2736 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2737 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2738 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2739 +# else
2740 + movdqa (%rsi,%rdx), %xmm1
2741 + TOLOWER (%xmm0, %xmm1)
2742 + pcmpistri $0x1a, %xmm1, %xmm0
2743 +# endif
2744 jbe LABEL(use_sse4_2_exit)
2745 -#ifdef USE_AS_STRNCMP
2746 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2747 sub $16, %r11
2748 jbe LABEL(strcmp_exitz_sse4_2)
2749 -#endif
2750 +# endif
2751 add $16, %rdx
2752 jmp LABEL(loop_ashr_10_use_sse4_2)
2753
2754 @@ -1029,10 +1331,10 @@ LABEL(nibble_ashr_10_use_sse4_2):
2755 movdqa -16(%rdi, %rdx), %xmm0
2756 psrldq $10, %xmm0
2757 pcmpistri $0x3a,%xmm0, %xmm0
2758 -#ifdef USE_AS_STRNCMP
2759 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2760 cmp %r11, %rcx
2761 jae LABEL(nibble_ashr_use_sse4_2_exit)
2762 -#endif
2763 +# endif
2764 cmp $5, %ecx
2765 ja LABEL(loop_ashr_10_use_sse4_2)
2766
2767 @@ -1041,7 +1343,7 @@ LABEL(nibble_ashr_10_use_sse4_2):
2768 /*
2769 * The following cases will be handled by ashr_11
2770 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2771 - * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
2772 + * n(5~15) n - 5 10(15 +(n - 5) - n) ashr_11
2773 */
2774 .p2align 4
2775 LABEL(ashr_11_sse4_2):
2776 @@ -1050,6 +1352,7 @@ LABEL(ashr_11_sse4_2):
2777 movdqa (%rsi), %xmm1
2778 pcmpeqb %xmm1, %xmm0
2779 pslldq $5, %xmm2
2780 + TOLOWER (%xmm1, %xmm2)
2781 pcmpeqb %xmm1, %xmm2
2782 psubb %xmm0, %xmm2
2783 pmovmskb %xmm2, %r9d
2784 @@ -1081,12 +1384,18 @@ LABEL(loop_ashr_11_use_sse4_2):
2785
2786 movdqa (%rdi, %rdx), %xmm0
2787 palignr $11, -16(%rdi, %rdx), %xmm0
2788 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2789 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2790 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2791 +# else
2792 + movdqa (%rsi,%rdx), %xmm1
2793 + TOLOWER (%xmm0, %xmm1)
2794 + pcmpistri $0x1a, %xmm1, %xmm0
2795 +# endif
2796 jbe LABEL(use_sse4_2_exit)
2797 -#ifdef USE_AS_STRNCMP
2798 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2799 sub $16, %r11
2800 jbe LABEL(strcmp_exitz_sse4_2)
2801 -#endif
2802 +# endif
2803
2804 add $16, %rdx
2805 add $16, %r10
2806 @@ -1094,12 +1403,18 @@ LABEL(loop_ashr_11_use_sse4_2):
2807
2808 movdqa (%rdi, %rdx), %xmm0
2809 palignr $11, -16(%rdi, %rdx), %xmm0
2810 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2811 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2812 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2813 +# else
2814 + movdqa (%rsi,%rdx), %xmm1
2815 + TOLOWER (%xmm0, %xmm1)
2816 + pcmpistri $0x1a, %xmm1, %xmm0
2817 +# endif
2818 jbe LABEL(use_sse4_2_exit)
2819 -#ifdef USE_AS_STRNCMP
2820 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2821 sub $16, %r11
2822 jbe LABEL(strcmp_exitz_sse4_2)
2823 -#endif
2824 +# endif
2825 add $16, %rdx
2826 jmp LABEL(loop_ashr_11_use_sse4_2)
2827
2828 @@ -1109,10 +1424,10 @@ LABEL(nibble_ashr_11_use_sse4_2):
2829 movdqa -16(%rdi, %rdx), %xmm0
2830 psrldq $11, %xmm0
2831 pcmpistri $0x3a,%xmm0, %xmm0
2832 -#ifdef USE_AS_STRNCMP
2833 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2834 cmp %r11, %rcx
2835 jae LABEL(nibble_ashr_use_sse4_2_exit)
2836 -#endif
2837 +# endif
2838 cmp $4, %ecx
2839 ja LABEL(loop_ashr_11_use_sse4_2)
2840
2841 @@ -1121,7 +1436,7 @@ LABEL(nibble_ashr_11_use_sse4_2):
2842 /*
2843 * The following cases will be handled by ashr_12
2844 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2845 - * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
2846 + * n(4~15) n - 4 11(15 +(n - 4) - n) ashr_12
2847 */
2848 .p2align 4
2849 LABEL(ashr_12_sse4_2):
2850 @@ -1130,6 +1445,7 @@ LABEL(ashr_12_sse4_2):
2851 movdqa (%rsi), %xmm1
2852 pcmpeqb %xmm1, %xmm0
2853 pslldq $4, %xmm2
2854 + TOLOWER (%xmm1, %xmm2)
2855 pcmpeqb %xmm1, %xmm2
2856 psubb %xmm0, %xmm2
2857 pmovmskb %xmm2, %r9d
2858 @@ -1161,12 +1477,18 @@ LABEL(loop_ashr_12_use_sse4_2):
2859
2860 movdqa (%rdi, %rdx), %xmm0
2861 palignr $12, -16(%rdi, %rdx), %xmm0
2862 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2863 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2864 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2865 +# else
2866 + movdqa (%rsi,%rdx), %xmm1
2867 + TOLOWER (%xmm0, %xmm1)
2868 + pcmpistri $0x1a, %xmm1, %xmm0
2869 +# endif
2870 jbe LABEL(use_sse4_2_exit)
2871 -#ifdef USE_AS_STRNCMP
2872 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2873 sub $16, %r11
2874 jbe LABEL(strcmp_exitz_sse4_2)
2875 -#endif
2876 +# endif
2877
2878 add $16, %rdx
2879 add $16, %r10
2880 @@ -1174,12 +1496,18 @@ LABEL(loop_ashr_12_use_sse4_2):
2881
2882 movdqa (%rdi, %rdx), %xmm0
2883 palignr $12, -16(%rdi, %rdx), %xmm0
2884 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2885 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2886 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2887 +# else
2888 + movdqa (%rsi,%rdx), %xmm1
2889 + TOLOWER (%xmm0, %xmm1)
2890 + pcmpistri $0x1a, %xmm1, %xmm0
2891 +# endif
2892 jbe LABEL(use_sse4_2_exit)
2893 -#ifdef USE_AS_STRNCMP
2894 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2895 sub $16, %r11
2896 jbe LABEL(strcmp_exitz_sse4_2)
2897 -#endif
2898 +# endif
2899 add $16, %rdx
2900 jmp LABEL(loop_ashr_12_use_sse4_2)
2901
2902 @@ -1189,10 +1517,10 @@ LABEL(nibble_ashr_12_use_sse4_2):
2903 movdqa -16(%rdi, %rdx), %xmm0
2904 psrldq $12, %xmm0
2905 pcmpistri $0x3a,%xmm0, %xmm0
2906 -#ifdef USE_AS_STRNCMP
2907 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2908 cmp %r11, %rcx
2909 jae LABEL(nibble_ashr_use_sse4_2_exit)
2910 -#endif
2911 +# endif
2912 cmp $3, %ecx
2913 ja LABEL(loop_ashr_12_use_sse4_2)
2914
2915 @@ -1201,7 +1529,7 @@ LABEL(nibble_ashr_12_use_sse4_2):
2916 /*
2917 * The following cases will be handled by ashr_13
2918 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2919 - * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
2920 + * n(3~15) n - 3 12(15 +(n - 3) - n) ashr_13
2921 */
2922 .p2align 4
2923 LABEL(ashr_13_sse4_2):
2924 @@ -1210,6 +1538,7 @@ LABEL(ashr_13_sse4_2):
2925 movdqa (%rsi), %xmm1
2926 pcmpeqb %xmm1, %xmm0
2927 pslldq $3, %xmm2
2928 + TOLOWER (%xmm1, %xmm2)
2929 pcmpeqb %xmm1, %xmm2
2930 psubb %xmm0, %xmm2
2931 pmovmskb %xmm2, %r9d
2932 @@ -1242,12 +1571,18 @@ LABEL(loop_ashr_13_use_sse4_2):
2933
2934 movdqa (%rdi, %rdx), %xmm0
2935 palignr $13, -16(%rdi, %rdx), %xmm0
2936 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2937 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2938 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2939 +# else
2940 + movdqa (%rsi,%rdx), %xmm1
2941 + TOLOWER (%xmm0, %xmm1)
2942 + pcmpistri $0x1a, %xmm1, %xmm0
2943 +# endif
2944 jbe LABEL(use_sse4_2_exit)
2945 -#ifdef USE_AS_STRNCMP
2946 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2947 sub $16, %r11
2948 jbe LABEL(strcmp_exitz_sse4_2)
2949 -#endif
2950 +# endif
2951
2952 add $16, %rdx
2953 add $16, %r10
2954 @@ -1255,12 +1590,18 @@ LABEL(loop_ashr_13_use_sse4_2):
2955
2956 movdqa (%rdi, %rdx), %xmm0
2957 palignr $13, -16(%rdi, %rdx), %xmm0
2958 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
2959 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
2960 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
2961 +# else
2962 + movdqa (%rsi,%rdx), %xmm1
2963 + TOLOWER (%xmm0, %xmm1)
2964 + pcmpistri $0x1a, %xmm1, %xmm0
2965 +# endif
2966 jbe LABEL(use_sse4_2_exit)
2967 -#ifdef USE_AS_STRNCMP
2968 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2969 sub $16, %r11
2970 jbe LABEL(strcmp_exitz_sse4_2)
2971 -#endif
2972 +# endif
2973 add $16, %rdx
2974 jmp LABEL(loop_ashr_13_use_sse4_2)
2975
2976 @@ -1270,10 +1611,10 @@ LABEL(nibble_ashr_13_use_sse4_2):
2977 movdqa -16(%rdi, %rdx), %xmm0
2978 psrldq $13, %xmm0
2979 pcmpistri $0x3a,%xmm0, %xmm0
2980 -#ifdef USE_AS_STRNCMP
2981 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
2982 cmp %r11, %rcx
2983 jae LABEL(nibble_ashr_use_sse4_2_exit)
2984 -#endif
2985 +# endif
2986 cmp $2, %ecx
2987 ja LABEL(loop_ashr_13_use_sse4_2)
2988
2989 @@ -1282,7 +1623,7 @@ LABEL(nibble_ashr_13_use_sse4_2):
2990 /*
2991 * The following cases will be handled by ashr_14
2992 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
2993 - * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
2994 + * n(2~15) n - 2 13(15 +(n - 2) - n) ashr_14
2995 */
2996 .p2align 4
2997 LABEL(ashr_14_sse4_2):
2998 @@ -1291,6 +1632,7 @@ LABEL(ashr_14_sse4_2):
2999 movdqa (%rsi), %xmm1
3000 pcmpeqb %xmm1, %xmm0
3001 pslldq $2, %xmm2
3002 + TOLOWER (%xmm1, %xmm2)
3003 pcmpeqb %xmm1, %xmm2
3004 psubb %xmm0, %xmm2
3005 pmovmskb %xmm2, %r9d
3006 @@ -1323,12 +1665,18 @@ LABEL(loop_ashr_14_use_sse4_2):
3007
3008 movdqa (%rdi, %rdx), %xmm0
3009 palignr $14, -16(%rdi, %rdx), %xmm0
3010 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
3011 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
3012 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
3013 +# else
3014 + movdqa (%rsi,%rdx), %xmm1
3015 + TOLOWER (%xmm0, %xmm1)
3016 + pcmpistri $0x1a, %xmm1, %xmm0
3017 +# endif
3018 jbe LABEL(use_sse4_2_exit)
3019 -#ifdef USE_AS_STRNCMP
3020 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3021 sub $16, %r11
3022 jbe LABEL(strcmp_exitz_sse4_2)
3023 -#endif
3024 +# endif
3025
3026 add $16, %rdx
3027 add $16, %r10
3028 @@ -1336,12 +1684,18 @@ LABEL(loop_ashr_14_use_sse4_2):
3029
3030 movdqa (%rdi, %rdx), %xmm0
3031 palignr $14, -16(%rdi, %rdx), %xmm0
3032 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
3033 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
3034 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
3035 +# else
3036 + movdqa (%rsi,%rdx), %xmm1
3037 + TOLOWER (%xmm0, %xmm1)
3038 + pcmpistri $0x1a, %xmm1, %xmm0
3039 +# endif
3040 jbe LABEL(use_sse4_2_exit)
3041 -#ifdef USE_AS_STRNCMP
3042 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3043 sub $16, %r11
3044 jbe LABEL(strcmp_exitz_sse4_2)
3045 -#endif
3046 +# endif
3047 add $16, %rdx
3048 jmp LABEL(loop_ashr_14_use_sse4_2)
3049
3050 @@ -1351,10 +1705,10 @@ LABEL(nibble_ashr_14_use_sse4_2):
3051 movdqa -16(%rdi, %rdx), %xmm0
3052 psrldq $14, %xmm0
3053 pcmpistri $0x3a,%xmm0, %xmm0
3054 -#ifdef USE_AS_STRNCMP
3055 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3056 cmp %r11, %rcx
3057 jae LABEL(nibble_ashr_use_sse4_2_exit)
3058 -#endif
3059 +# endif
3060 cmp $1, %ecx
3061 ja LABEL(loop_ashr_14_use_sse4_2)
3062
3063 @@ -1363,7 +1717,7 @@ LABEL(nibble_ashr_14_use_sse4_2):
3064 /*
3065 * The following cases will be handled by ashr_15
3066 * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
3067 - * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
3068 + * n(1~15) n - 1 14(15 +(n - 1) - n) ashr_15
3069 */
3070 .p2align 4
3071 LABEL(ashr_15_sse4_2):
3072 @@ -1372,6 +1726,7 @@ LABEL(ashr_15_sse4_2):
3073 movdqa (%rsi), %xmm1
3074 pcmpeqb %xmm1, %xmm0
3075 pslldq $1, %xmm2
3076 + TOLOWER (%xmm1, %xmm2)
3077 pcmpeqb %xmm1, %xmm2
3078 psubb %xmm0, %xmm2
3079 pmovmskb %xmm2, %r9d
3080 @@ -1406,12 +1761,18 @@ LABEL(loop_ashr_15_use_sse4_2):
3081
3082 movdqa (%rdi, %rdx), %xmm0
3083 palignr $15, -16(%rdi, %rdx), %xmm0
3084 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
3085 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
3086 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
3087 +# else
3088 + movdqa (%rsi,%rdx), %xmm1
3089 + TOLOWER (%xmm0, %xmm1)
3090 + pcmpistri $0x1a, %xmm1, %xmm0
3091 +# endif
3092 jbe LABEL(use_sse4_2_exit)
3093 -#ifdef USE_AS_STRNCMP
3094 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3095 sub $16, %r11
3096 jbe LABEL(strcmp_exitz_sse4_2)
3097 -#endif
3098 +# endif
3099
3100 add $16, %rdx
3101 add $16, %r10
3102 @@ -1419,12 +1780,18 @@ LABEL(loop_ashr_15_use_sse4_2):
3103
3104 movdqa (%rdi, %rdx), %xmm0
3105 palignr $15, -16(%rdi, %rdx), %xmm0
3106 - pcmpistri $0x1a,(%rsi,%rdx), %xmm0
3107 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
3108 + pcmpistri $0x1a, (%rsi,%rdx), %xmm0
3109 +# else
3110 + movdqa (%rsi,%rdx), %xmm1
3111 + TOLOWER (%xmm0, %xmm1)
3112 + pcmpistri $0x1a, %xmm1, %xmm0
3113 +# endif
3114 jbe LABEL(use_sse4_2_exit)
3115 -#ifdef USE_AS_STRNCMP
3116 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3117 sub $16, %r11
3118 jbe LABEL(strcmp_exitz_sse4_2)
3119 -#endif
3120 +# endif
3121 add $16, %rdx
3122 jmp LABEL(loop_ashr_15_use_sse4_2)
3123
3124 @@ -1434,22 +1801,28 @@ LABEL(nibble_ashr_15_use_sse4_2):
3125 movdqa -16(%rdi, %rdx), %xmm0
3126 psrldq $15, %xmm0
3127 pcmpistri $0x3a,%xmm0, %xmm0
3128 -#ifdef USE_AS_STRNCMP
3129 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3130 cmp %r11, %rcx
3131 jae LABEL(nibble_ashr_use_sse4_2_exit)
3132 -#endif
3133 +# endif
3134 cmp $0, %ecx
3135 ja LABEL(loop_ashr_15_use_sse4_2)
3136
3137 LABEL(nibble_ashr_use_sse4_2_exit):
3138 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
3139 pcmpistri $0x1a,(%rsi,%rdx), %xmm0
3140 +# else
3141 + movdqa (%rsi,%rdx), %xmm1
3142 + TOLOWER (%xmm0, %xmm1)
3143 + pcmpistri $0x1a, %xmm1, %xmm0
3144 +# endif
3145 .p2align 4
3146 LABEL(use_sse4_2_exit):
3147 jnc LABEL(strcmp_exitz_sse4_2)
3148 -#ifdef USE_AS_STRNCMP
3149 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3150 sub %rcx, %r11
3151 jbe LABEL(strcmp_exitz_sse4_2)
3152 -#endif
3153 +# endif
3154 add %rcx, %rdx
3155 lea -16(%rdi, %r9), %rdi
3156 movzbl (%rdi, %rdx), %eax
3157 @@ -1458,6 +1831,12 @@ LABEL(use_sse4_2_exit):
3158 jz LABEL(use_sse4_2_ret_sse4_2)
3159 xchg %eax, %edx
3160 LABEL(use_sse4_2_ret_sse4_2):
3161 +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
3162 + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rcx
3163 + movl (%rcx,%rdx,4), %edx
3164 + movl (%rcx,%rax,4), %eax
3165 +# endif
3166 +
3167 sub %edx, %eax
3168 ret
3169
3170 @@ -1473,13 +1852,19 @@ LABEL(ret_sse4_2):
3171 LABEL(less16bytes_sse4_2):
3172 bsf %rdx, %rdx /* find and store bit index in %rdx */
3173
3174 -#ifdef USE_AS_STRNCMP
3175 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3176 sub %rdx, %r11
3177 jbe LABEL(strcmp_exitz_sse4_2)
3178 -#endif
3179 +# endif
3180 movzbl (%rsi, %rdx), %ecx
3181 movzbl (%rdi, %rdx), %eax
3182
3183 +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
3184 + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
3185 + movl (%rdx,%rcx,4), %ecx
3186 + movl (%rdx,%rax,4), %eax
3187 +# endif
3188 +
3189 sub %ecx, %eax
3190 ret
3191
3192 @@ -1488,15 +1873,27 @@ LABEL(strcmp_exitz_sse4_2):
3193 ret
3194
3195 .p2align 4
3196 + // XXX Same as code above
3197 LABEL(Byte0_sse4_2):
3198 movzx (%rsi), %ecx
3199 movzx (%rdi), %eax
3200
3201 +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
3202 + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
3203 + movl (%rdx,%rcx,4), %ecx
3204 + movl (%rdx,%rax,4), %eax
3205 +# endif
3206 +
3207 sub %ecx, %eax
3208 ret
3209 cfi_endproc
3210 .size STRCMP_SSE42, .-STRCMP_SSE42
3211
3212 +# undef UCLOW_reg
3213 +# undef UCHIGH_reg
3214 +# undef LCQWORD_reg
3215 +# undef TOLOWER
3216 +
3217 /* Put all SSE 4.2 functions together. */
3218 .section .rodata.sse4.2,"a",@progbits
3219 .p2align 3
3220 @@ -1528,6 +1925,27 @@ LABEL(unaligned_table_sse4_2):
3221 # undef END
3222 # define END(name) \
3223 cfi_endproc; .size STRCMP_SSE2, .-STRCMP_SSE2
3224 +
3225 +# ifdef USE_AS_STRCASECMP_L
3226 +# define ENTRY2(name) \
3227 + .type __strcasecmp_sse2, @function; \
3228 + .align 16; \
3229 + __strcasecmp_sse2: cfi_startproc; \
3230 + CALL_MCOUNT
3231 +# define END2(name) \
3232 + cfi_endproc; .size __strcasecmp_sse2, .-__strcasecmp_sse2
3233 +# endif
3234 +
3235 +# ifdef USE_AS_STRNCASECMP_L
3236 +# define ENTRY2(name) \
3237 + .type __strncasecmp_sse2, @function; \
3238 + .align 16; \
3239 + __strncasecmp_sse2: cfi_startproc; \
3240 + CALL_MCOUNT
3241 +# define END2(name) \
3242 + cfi_endproc; .size __strncasecmp_sse2, .-__strncasecmp_sse2
3243 +# endif
3244 +
3245 # undef libc_hidden_builtin_def
3246 /* It doesn't make sense to send libc-internal strcmp calls through a PLT.
3247 The speedup we get from using SSE4.2 instruction is likely eaten away
3248 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strncase_l-ssse3.S
3249 ===================================================================
3250 --- /dev/null
3251 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strncase_l-ssse3.S
3252 @@ -0,0 +1,6 @@
3253 +#define USE_SSSE3 1
3254 +#define USE_AS_STRNCASECMP_L
3255 +#define NO_NOLOCALE_ALIAS
3256 +#define STRCMP __strncasecmp_l_ssse3
3257 +#define __strncasecmp __strncasecmp_ssse3
3258 +#include "../strcmp.S"
3259 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strncase_l.S
3260 ===================================================================
3261 --- /dev/null
3262 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strncase_l.S
3263 @@ -0,0 +1,6 @@
3264 +#define STRCMP __strncasecmp_l
3265 +#define USE_AS_STRNCASECMP_L
3266 +#include "strcmp.S"
3267 +
3268 +weak_alias (__strncasecmp_l, strncasecmp_l)
3269 +libc_hidden_def (strncasecmp_l)
3270 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strstr.c
3271 ===================================================================
3272 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/multiarch/strstr.c
3273 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/multiarch/strstr.c
3274 @@ -67,10 +67,10 @@
3275
3276 case ECX CFlag ZFlag SFlag
3277 3 X 1 0 0/1
3278 - 4a 0 1 0 0
3279 - 4b 0 1 0 1
3280 - 4c 0 < X 1 0 0/1
3281 - 5 16 0 1 0
3282 + 4a 0 1 0 0
3283 + 4b 0 1 0 1
3284 + 4c 0 < X 1 0 0/1
3285 + 5 16 0 1 0
3286
3287 3. An initial ordered-comparison fragment match, we fix up to do
3288 subsequent string comparison
3289 @@ -147,8 +147,7 @@ __m128i_shift_right (__m128i value, int
3290 If EOS occurs within less than 16B before 4KB boundary, we don't
3291 cross to next page. */
3292
3293 -static __m128i
3294 -__attribute__ ((section (".text.sse4.2")))
3295 +static inline __m128i
3296 __m128i_strloadu (const unsigned char * p)
3297 {
3298 int offset = ((size_t) p & (16 - 1));
3299 @@ -164,59 +163,36 @@ __m128i_strloadu (const unsigned char *
3300 return _mm_loadu_si128 ((__m128i *) p);
3301 }
3302
3303 -#ifdef USE_AS_STRCASESTR
3304 +#if defined USE_AS_STRCASESTR && !defined STRCASESTR_NONASCII
3305
3306 /* Similar to __m128i_strloadu. Convert to lower case for POSIX/C
3307 locale. */
3308 -
3309 -static __m128i
3310 -__attribute__ ((section (".text.sse4.2")))
3311 -__m128i_strloadu_tolower_posix (const unsigned char * p)
3312 +static inline __m128i
3313 +__m128i_strloadu_tolower (const unsigned char *p, __m128i rangeuc,
3314 + __m128i u2ldelta)
3315 {
3316 __m128i frag = __m128i_strloadu (p);
3317
3318 - /* Convert frag to lower case for POSIX/C locale. */
3319 - __m128i rangeuc = _mm_set_epi64x (0x0, 0x5a41);
3320 - __m128i u2ldelta = _mm_set1_epi64x (0xe0e0e0e0e0e0e0e0);
3321 - __m128i mask1 = _mm_cmpistrm (rangeuc, frag, 0x44);
3322 - __m128i mask2 = _mm_blendv_epi8 (u2ldelta, frag, mask1);
3323 - mask2 = _mm_sub_epi8 (mask2, u2ldelta);
3324 - return _mm_blendv_epi8 (frag, mask2, mask1);
3325 +#define UCLOW 0x4040404040404040ULL
3326 +#define UCHIGH 0x5b5b5b5b5b5b5b5bULL
3327 +#define LCQWORD 0x2020202020202020ULL
3328 + /* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'. */
3329 + __m128i r2 = _mm_cmpgt_epi8 (_mm_set1_epi64x (UCHIGH), frag);
3330 + /* Compare if bytes are > 'A' - 1. */
3331 + __m128i r1 = _mm_cmpgt_epi8 (frag, _mm_set1_epi64x (UCLOW));
3332 + /* Mask byte == ff if byte(r2) <= 'Z' and byte(r1) > 'A' - 1. */
3333 + __m128i mask = _mm_and_si128 (r2, r1);
3334 + /* Apply lowercase bit 6 mask for above mask bytes == ff. */
3335 + return _mm_or_si128 (frag, _mm_and_si128 (mask, _mm_set1_epi64x (LCQWORD)));
3336 }
3337
3338 -/* Similar to __m128i_strloadu. Convert to lower case for none-POSIX/C
3339 - locale. */
3340 -
3341 -static __m128i
3342 -__attribute__ ((section (".text.sse4.2")))
3343 -__m128i_strloadu_tolower (const unsigned char * p)
3344 -{
3345 - union
3346 - {
3347 - char b[16];
3348 - __m128i x;
3349 - } u;
3350 -
3351 - for (int i = 0; i < 16; i++)
3352 - if (p[i] == 0)
3353 - {
3354 - u.b[i] = 0;
3355 - break;
3356 - }
3357 - else
3358 - u.b[i] = tolower (p[i]);
3359 -
3360 - return u.x;
3361 -}
3362 #endif
3363
3364 /* Calculate Knuth-Morris-Pratt string searching algorithm (or KMP
3365 algorithm) overlap for a fully populated 16B vector.
3366 Input parameter: 1st 16Byte loaded from the reference string of a
3367 strstr function.
3368 - We don't use KMP algorithm if reference string is less than 16B.
3369 - */
3370 -
3371 + We don't use KMP algorithm if reference string is less than 16B. */
3372 static int
3373 __inline__ __attribute__ ((__always_inline__,))
3374 KMP16Bovrlap (__m128i s2)
3375 @@ -236,7 +212,7 @@ KMP16Bovrlap (__m128i s2)
3376 return 1;
3377 else if (!k1)
3378 {
3379 - /* There are al least two ditinct char in s2. If byte 0 and 1 are
3380 + /* There are al least two distinct chars in s2. If byte 0 and 1 are
3381 idential and the distinct value lies farther down, we can deduce
3382 the next byte offset to restart full compare is least no earlier
3383 than byte 3. */
3384 @@ -256,23 +232,30 @@ STRSTR_SSE42 (const unsigned char *s1, c
3385 #define p1 s1
3386 const unsigned char *p2 = s2;
3387
3388 - if (p2[0] == '\0')
3389 +#ifndef STRCASESTR_NONASCII
3390 + if (__builtin_expect (p2[0] == '\0', 0))
3391 return (char *) p1;
3392
3393 - if (p1[0] == '\0')
3394 + if (__builtin_expect (p1[0] == '\0', 0))
3395 return NULL;
3396
3397 /* Check if p1 length is 1 byte long. */
3398 - if (p1[1] == '\0')
3399 + if (__builtin_expect (p1[1] == '\0', 0))
3400 return p2[1] == '\0' && CMPBYTE (p1[0], p2[0]) ? (char *) p1 : NULL;
3401 +#endif
3402
3403 #ifdef USE_AS_STRCASESTR
3404 - __m128i (*strloadu) (const unsigned char *);
3405 -
3406 - if (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE) == 0)
3407 - strloadu = __m128i_strloadu_tolower_posix;
3408 - else
3409 - strloadu = __m128i_strloadu_tolower;
3410 +# ifndef STRCASESTR_NONASCII
3411 + if (__builtin_expect (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE)
3412 + != 0, 0))
3413 + return __strcasestr_sse42_nonascii (s1, s2);
3414 +
3415 + const __m128i rangeuc = _mm_set_epi64x (0x0, 0x5a41);
3416 + const __m128i u2ldelta = _mm_set1_epi64x (0xe0e0e0e0e0e0e0e0);
3417 +# define strloadu(p) __m128i_strloadu_tolower (p, rangeuc, u2ldelta)
3418 +# else
3419 +# define strloadu __m128i_strloadu_tolower
3420 +# endif
3421 #else
3422 # define strloadu __m128i_strloadu
3423 #endif
3424 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp.S
3425 ===================================================================
3426 --- /dev/null
3427 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp.S
3428 @@ -0,0 +1 @@
3429 +/* In strcasecmp_l.S. */
3430 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp_l-nonascii.c
3431 ===================================================================
3432 --- /dev/null
3433 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp_l-nonascii.c
3434 @@ -0,0 +1,8 @@
3435 +#include <string.h>
3436 +
3437 +extern int __strcasecmp_l_nonascii (__const char *__s1, __const char *__s2,
3438 + __locale_t __loc);
3439 +
3440 +#define __strcasecmp_l __strcasecmp_l_nonascii
3441 +#define USE_IN_EXTENDED_LOCALE_MODEL 1
3442 +#include <string/strcasecmp.c>
3443 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp_l.S
3444 ===================================================================
3445 --- /dev/null
3446 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcasecmp_l.S
3447 @@ -0,0 +1,6 @@
3448 +#define STRCMP __strcasecmp_l
3449 +#define USE_AS_STRCASECMP_L
3450 +#include "strcmp.S"
3451 +
3452 +weak_alias (__strcasecmp_l, strcasecmp_l)
3453 +libc_hidden_def (strcasecmp_l)
3454 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcmp.S
3455 ===================================================================
3456 --- glibc-2.12-2-gc4ccff1.orig/sysdeps/x86_64/strcmp.S
3457 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strcmp.S
3458 @@ -51,6 +51,31 @@
3459 je LABEL(strcmp_exitz); \
3460 mov %r9, %r11
3461
3462 +#elif defined USE_AS_STRCASECMP_L
3463 +# include "locale-defines.h"
3464 +
3465 +/* No support for strcasecmp outside libc so far since it is not needed. */
3466 +# ifdef NOT_IN_lib
3467 +# error "strcasecmp_l not implemented so far"
3468 +# endif
3469 +
3470 +# define UPDATE_STRNCMP_COUNTER
3471 +#elif defined USE_AS_STRNCASECMP_L
3472 +# include "locale-defines.h"
3473 +
3474 +/* No support for strncasecmp outside libc so far since it is not needed. */
3475 +# ifdef NOT_IN_lib
3476 +# error "strncasecmp_l not implemented so far"
3477 +# endif
3478 +
3479 +# define UPDATE_STRNCMP_COUNTER \
3480 + /* calculate left number to compare */ \
3481 + lea -16(%rcx, %r11), %r9; \
3482 + cmp %r9, %r11; \
3483 + jb LABEL(strcmp_exitz); \
3484 + test %r9, %r9; \
3485 + je LABEL(strcmp_exitz); \
3486 + mov %r9, %r11
3487 #else
3488 # define UPDATE_STRNCMP_COUNTER
3489 # ifndef STRCMP
3490 @@ -64,6 +89,46 @@
3491 .section .text.ssse3,"ax",@progbits
3492 #endif
3493
3494 +#ifdef USE_AS_STRCASECMP_L
3495 +# ifndef ENTRY2
3496 +# define ENTRY2(name) ENTRY (name)
3497 +# define END2(name) END (name)
3498 +# endif
3499 +
3500 +ENTRY2 (__strcasecmp)
3501 + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
3502 + movq %fs:(%rax),%rdx
3503 +
3504 + // XXX 5 byte should be before the function
3505 + /* 5-byte NOP. */
3506 + .byte 0x0f,0x1f,0x44,0x00,0x00
3507 +END2 (__strcasecmp)
3508 +# ifndef NO_NOLOCALE_ALIAS
3509 +weak_alias (__strcasecmp, strcasecmp)
3510 +libc_hidden_def (__strcasecmp)
3511 +# endif
3512 + /* FALLTHROUGH to strcasecmp_l. */
3513 +#elif defined USE_AS_STRNCASECMP_L
3514 +# ifndef ENTRY2
3515 +# define ENTRY2(name) ENTRY (name)
3516 +# define END2(name) END (name)
3517 +# endif
3518 +
3519 +ENTRY2 (__strncasecmp)
3520 + movq __libc_tsd_LOCALE@gottpoff(%rip),%rax
3521 + movq %fs:(%rax),%rcx
3522 +
3523 + // XXX 5 byte should be before the function
3524 + /* 5-byte NOP. */
3525 + .byte 0x0f,0x1f,0x44,0x00,0x00
3526 +END2 (__strncasecmp)
3527 +# ifndef NO_NOLOCALE_ALIAS
3528 +weak_alias (__strncasecmp, strncasecmp)
3529 +libc_hidden_def (__strncasecmp)
3530 +# endif
3531 + /* FALLTHROUGH to strncasecmp_l. */
3532 +#endif
3533 +
3534 ENTRY (BP_SYM (STRCMP))
3535 #ifdef NOT_IN_libc
3536 /* Simple version since we can't use SSE registers in ld.so. */
3537 @@ -84,10 +149,32 @@ L(neq): movl $1, %eax
3538 ret
3539 END (BP_SYM (STRCMP))
3540 #else /* NOT_IN_libc */
3541 +# ifdef USE_AS_STRCASECMP_L
3542 + /* We have to fall back on the C implementation for locales
3543 + with encodings not matching ASCII for single bytes. */
3544 +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
3545 + movq LOCALE_T___LOCALES+LC_CTYPE*8(%rdx), %rax
3546 +# else
3547 + movq (%rdx), %rax
3548 +# endif
3549 + testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
3550 + jne __strcasecmp_l_nonascii
3551 +# elif defined USE_AS_STRNCASECMP_L
3552 + /* We have to fall back on the C implementation for locales
3553 + with encodings not matching ASCII for single bytes. */
3554 +# if LOCALE_T___LOCALES != 0 || LC_CTYPE != 0
3555 + movq LOCALE_T___LOCALES+LC_CTYPE*8(%rcx), %rax
3556 +# else
3557 + movq (%rcx), %rax
3558 +# endif
3559 + testl $0, LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES(%rax)
3560 + jne __strncasecmp_l_nonascii
3561 +# endif
3562 +
3563 /*
3564 * This implementation uses SSE to compare up to 16 bytes at a time.
3565 */
3566 -# ifdef USE_AS_STRNCMP
3567 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3568 test %rdx, %rdx
3569 je LABEL(strcmp_exitz)
3570 cmp $1, %rdx
3571 @@ -99,6 +186,26 @@ END (BP_SYM (STRCMP))
3572 /* Use 64bit AND here to avoid long NOP padding. */
3573 and $0x3f, %rcx /* rsi alignment in cache line */
3574 and $0x3f, %rax /* rdi alignment in cache line */
3575 +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
3576 + .section .rodata.cst16,"aM",@progbits,16
3577 + .align 16
3578 +.Lbelowupper:
3579 + .quad 0x4040404040404040
3580 + .quad 0x4040404040404040
3581 +.Ltopupper:
3582 + .quad 0x5b5b5b5b5b5b5b5b
3583 + .quad 0x5b5b5b5b5b5b5b5b
3584 +.Ltouppermask:
3585 + .quad 0x2020202020202020
3586 + .quad 0x2020202020202020
3587 + .previous
3588 + movdqa .Lbelowupper(%rip), %xmm5
3589 +# define UCLOW_reg %xmm5
3590 + movdqa .Ltopupper(%rip), %xmm6
3591 +# define UCHIGH_reg %xmm6
3592 + movdqa .Ltouppermask(%rip), %xmm7
3593 +# define LCQWORD_reg %xmm7
3594 +# endif
3595 cmp $0x30, %ecx
3596 ja LABEL(crosscache) /* rsi: 16-byte load will cross cache line */
3597 cmp $0x30, %eax
3598 @@ -107,6 +214,26 @@ END (BP_SYM (STRCMP))
3599 movlpd (%rsi), %xmm2
3600 movhpd 8(%rdi), %xmm1
3601 movhpd 8(%rsi), %xmm2
3602 +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
3603 +# define TOLOWER(reg1, reg2) \
3604 + movdqa reg1, %xmm8; \
3605 + movdqa UCHIGH_reg, %xmm9; \
3606 + movdqa reg2, %xmm10; \
3607 + movdqa UCHIGH_reg, %xmm11; \
3608 + pcmpgtb UCLOW_reg, %xmm8; \
3609 + pcmpgtb reg1, %xmm9; \
3610 + pcmpgtb UCLOW_reg, %xmm10; \
3611 + pcmpgtb reg2, %xmm11; \
3612 + pand %xmm9, %xmm8; \
3613 + pand %xmm11, %xmm10; \
3614 + pand LCQWORD_reg, %xmm8; \
3615 + pand LCQWORD_reg, %xmm10; \
3616 + por %xmm8, reg1; \
3617 + por %xmm10, reg2
3618 + TOLOWER (%xmm1, %xmm2)
3619 +# else
3620 +# define TOLOWER(reg1, reg2)
3621 +# endif
3622 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
3623 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
3624 pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
3625 @@ -114,7 +241,7 @@ END (BP_SYM (STRCMP))
3626 pmovmskb %xmm1, %edx
3627 sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
3628 jnz LABEL(less16bytes) /* If not, find different value or null char */
3629 -# ifdef USE_AS_STRNCMP
3630 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3631 sub $16, %r11
3632 jbe LABEL(strcmp_exitz) /* finish comparision */
3633 # endif
3634 @@ -159,7 +286,13 @@ LABEL(ashr_0):
3635 movdqa (%rsi), %xmm1
3636 pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
3637 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
3638 +# if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
3639 pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
3640 +# else
3641 + movdqa (%rdi), %xmm2
3642 + TOLOWER (%xmm1, %xmm2)
3643 + pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
3644 +# endif
3645 psubb %xmm0, %xmm1 /* packed sub of comparison results*/
3646 pmovmskb %xmm1, %r9d
3647 shr %cl, %edx /* adjust 0xffff for offset */
3648 @@ -183,6 +316,7 @@ LABEL(ashr_0):
3649 LABEL(loop_ashr_0):
3650 movdqa (%rsi, %rcx), %xmm1
3651 movdqa (%rdi, %rcx), %xmm2
3652 + TOLOWER (%xmm1, %xmm2)
3653
3654 pcmpeqb %xmm1, %xmm0
3655 pcmpeqb %xmm2, %xmm1
3656 @@ -191,13 +325,14 @@ LABEL(loop_ashr_0):
3657 sub $0xffff, %edx
3658 jnz LABEL(exit) /* mismatch or null char seen */
3659
3660 -# ifdef USE_AS_STRNCMP
3661 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3662 sub $16, %r11
3663 jbe LABEL(strcmp_exitz)
3664 # endif
3665 add $16, %rcx
3666 movdqa (%rsi, %rcx), %xmm1
3667 movdqa (%rdi, %rcx), %xmm2
3668 + TOLOWER (%xmm1, %xmm2)
3669
3670 pcmpeqb %xmm1, %xmm0
3671 pcmpeqb %xmm2, %xmm1
3672 @@ -205,7 +340,7 @@ LABEL(loop_ashr_0):
3673 pmovmskb %xmm1, %edx
3674 sub $0xffff, %edx
3675 jnz LABEL(exit)
3676 -# ifdef USE_AS_STRNCMP
3677 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3678 sub $16, %r11
3679 jbe LABEL(strcmp_exitz)
3680 # endif
3681 @@ -214,7 +349,7 @@ LABEL(loop_ashr_0):
3682
3683 /*
3684 * The following cases will be handled by ashr_1
3685 - * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
3686 + * rcx(offset of rsi) rax(offset of rdi) relative offset corresponding case
3687 * n(15) n -15 0(15 +(n-15) - n) ashr_1
3688 */
3689 .p2align 4
3690 @@ -224,6 +359,7 @@ LABEL(ashr_1):
3691 movdqa (%rsi), %xmm1
3692 pcmpeqb %xmm1, %xmm0 /* Any null chars? */
3693 pslldq $15, %xmm2 /* shift first string to align with second */
3694 + TOLOWER (%xmm1, %xmm2)
3695 pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
3696 psubb %xmm0, %xmm2 /* packed sub of comparison results*/
3697 pmovmskb %xmm2, %r9d
3698 @@ -263,6 +399,7 @@ LABEL(gobble_ashr_1):
3699 # else
3700 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
3701 # endif
3702 + TOLOWER (%xmm1, %xmm2)
3703
3704 pcmpeqb %xmm1, %xmm0
3705 pcmpeqb %xmm2, %xmm1
3706 @@ -271,7 +408,7 @@ LABEL(gobble_ashr_1):
3707 sub $0xffff, %edx
3708 jnz LABEL(exit)
3709
3710 -# ifdef USE_AS_STRNCMP
3711 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3712 sub $16, %r11
3713 jbe LABEL(strcmp_exitz)
3714 # endif
3715 @@ -292,6 +429,7 @@ LABEL(gobble_ashr_1):
3716 # else
3717 palignr $1, %xmm3, %xmm2 /* merge into one 16byte value */
3718 # endif
3719 + TOLOWER (%xmm1, %xmm2)
3720
3721 pcmpeqb %xmm1, %xmm0
3722 pcmpeqb %xmm2, %xmm1
3723 @@ -300,7 +438,7 @@ LABEL(gobble_ashr_1):
3724 sub $0xffff, %edx
3725 jnz LABEL(exit)
3726
3727 -# ifdef USE_AS_STRNCMP
3728 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3729 sub $16, %r11
3730 jbe LABEL(strcmp_exitz)
3731 # endif
3732 @@ -319,8 +457,8 @@ LABEL(nibble_ashr_1):
3733 test $0xfffe, %edx
3734 jnz LABEL(ashr_1_exittail) /* find null char*/
3735
3736 -# ifdef USE_AS_STRNCMP
3737 - cmp $14, %r11
3738 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3739 + cmp $15, %r11
3740 jbe LABEL(ashr_1_exittail)
3741 # endif
3742
3743 @@ -351,6 +489,7 @@ LABEL(ashr_2):
3744 movdqa (%rsi), %xmm1
3745 pcmpeqb %xmm1, %xmm0
3746 pslldq $14, %xmm2
3747 + TOLOWER (%xmm1, %xmm2)
3748 pcmpeqb %xmm1, %xmm2
3749 psubb %xmm0, %xmm2
3750 pmovmskb %xmm2, %r9d
3751 @@ -390,6 +529,7 @@ LABEL(gobble_ashr_2):
3752 # else
3753 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
3754 # endif
3755 + TOLOWER (%xmm1, %xmm2)
3756
3757 pcmpeqb %xmm1, %xmm0
3758 pcmpeqb %xmm2, %xmm1
3759 @@ -398,7 +538,7 @@ LABEL(gobble_ashr_2):
3760 sub $0xffff, %edx
3761 jnz LABEL(exit)
3762
3763 -# ifdef USE_AS_STRNCMP
3764 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3765 sub $16, %r11
3766 jbe LABEL(strcmp_exitz)
3767 # endif
3768 @@ -420,6 +560,7 @@ LABEL(gobble_ashr_2):
3769 # else
3770 palignr $2, %xmm3, %xmm2 /* merge into one 16byte value */
3771 # endif
3772 + TOLOWER (%xmm1, %xmm2)
3773
3774 pcmpeqb %xmm1, %xmm0
3775 pcmpeqb %xmm2, %xmm1
3776 @@ -428,7 +569,7 @@ LABEL(gobble_ashr_2):
3777 sub $0xffff, %edx
3778 jnz LABEL(exit)
3779
3780 -# ifdef USE_AS_STRNCMP
3781 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3782 sub $16, %r11
3783 jbe LABEL(strcmp_exitz)
3784 # endif
3785 @@ -444,8 +585,8 @@ LABEL(nibble_ashr_2):
3786 test $0xfffc, %edx
3787 jnz LABEL(ashr_2_exittail)
3788
3789 -# ifdef USE_AS_STRNCMP
3790 - cmp $13, %r11
3791 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3792 + cmp $14, %r11
3793 jbe LABEL(ashr_2_exittail)
3794 # endif
3795
3796 @@ -472,6 +613,7 @@ LABEL(ashr_3):
3797 movdqa (%rsi), %xmm1
3798 pcmpeqb %xmm1, %xmm0
3799 pslldq $13, %xmm2
3800 + TOLOWER (%xmm1, %xmm2)
3801 pcmpeqb %xmm1, %xmm2
3802 psubb %xmm0, %xmm2
3803 pmovmskb %xmm2, %r9d
3804 @@ -512,6 +654,7 @@ LABEL(gobble_ashr_3):
3805 # else
3806 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
3807 # endif
3808 + TOLOWER (%xmm1, %xmm2)
3809
3810 pcmpeqb %xmm1, %xmm0
3811 pcmpeqb %xmm2, %xmm1
3812 @@ -520,7 +663,7 @@ LABEL(gobble_ashr_3):
3813 sub $0xffff, %edx
3814 jnz LABEL(exit)
3815
3816 -# ifdef USE_AS_STRNCMP
3817 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3818 sub $16, %r11
3819 jbe LABEL(strcmp_exitz)
3820 # endif
3821 @@ -542,6 +685,7 @@ LABEL(gobble_ashr_3):
3822 # else
3823 palignr $3, %xmm3, %xmm2 /* merge into one 16byte value */
3824 # endif
3825 + TOLOWER (%xmm1, %xmm2)
3826
3827 pcmpeqb %xmm1, %xmm0
3828 pcmpeqb %xmm2, %xmm1
3829 @@ -550,7 +694,7 @@ LABEL(gobble_ashr_3):
3830 sub $0xffff, %edx
3831 jnz LABEL(exit)
3832
3833 -# ifdef USE_AS_STRNCMP
3834 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3835 sub $16, %r11
3836 jbe LABEL(strcmp_exitz)
3837 # endif
3838 @@ -566,8 +710,8 @@ LABEL(nibble_ashr_3):
3839 test $0xfff8, %edx
3840 jnz LABEL(ashr_3_exittail)
3841
3842 -# ifdef USE_AS_STRNCMP
3843 - cmp $12, %r11
3844 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3845 + cmp $13, %r11
3846 jbe LABEL(ashr_3_exittail)
3847 # endif
3848
3849 @@ -594,6 +738,7 @@ LABEL(ashr_4):
3850 movdqa (%rsi), %xmm1
3851 pcmpeqb %xmm1, %xmm0
3852 pslldq $12, %xmm2
3853 + TOLOWER (%xmm1, %xmm2)
3854 pcmpeqb %xmm1, %xmm2
3855 psubb %xmm0, %xmm2
3856 pmovmskb %xmm2, %r9d
3857 @@ -634,6 +779,7 @@ LABEL(gobble_ashr_4):
3858 # else
3859 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
3860 # endif
3861 + TOLOWER (%xmm1, %xmm2)
3862
3863 pcmpeqb %xmm1, %xmm0
3864 pcmpeqb %xmm2, %xmm1
3865 @@ -642,7 +788,7 @@ LABEL(gobble_ashr_4):
3866 sub $0xffff, %edx
3867 jnz LABEL(exit)
3868
3869 -# ifdef USE_AS_STRNCMP
3870 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3871 sub $16, %r11
3872 jbe LABEL(strcmp_exitz)
3873 # endif
3874 @@ -664,6 +810,7 @@ LABEL(gobble_ashr_4):
3875 # else
3876 palignr $4, %xmm3, %xmm2 /* merge into one 16byte value */
3877 # endif
3878 + TOLOWER (%xmm1, %xmm2)
3879
3880 pcmpeqb %xmm1, %xmm0
3881 pcmpeqb %xmm2, %xmm1
3882 @@ -672,7 +819,7 @@ LABEL(gobble_ashr_4):
3883 sub $0xffff, %edx
3884 jnz LABEL(exit)
3885
3886 -# ifdef USE_AS_STRNCMP
3887 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3888 sub $16, %r11
3889 jbe LABEL(strcmp_exitz)
3890 # endif
3891 @@ -688,8 +835,8 @@ LABEL(nibble_ashr_4):
3892 test $0xfff0, %edx
3893 jnz LABEL(ashr_4_exittail)
3894
3895 -# ifdef USE_AS_STRNCMP
3896 - cmp $11, %r11
3897 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3898 + cmp $12, %r11
3899 jbe LABEL(ashr_4_exittail)
3900 # endif
3901
3902 @@ -716,6 +863,7 @@ LABEL(ashr_5):
3903 movdqa (%rsi), %xmm1
3904 pcmpeqb %xmm1, %xmm0
3905 pslldq $11, %xmm2
3906 + TOLOWER (%xmm1, %xmm2)
3907 pcmpeqb %xmm1, %xmm2
3908 psubb %xmm0, %xmm2
3909 pmovmskb %xmm2, %r9d
3910 @@ -756,6 +904,7 @@ LABEL(gobble_ashr_5):
3911 # else
3912 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
3913 # endif
3914 + TOLOWER (%xmm1, %xmm2)
3915
3916 pcmpeqb %xmm1, %xmm0
3917 pcmpeqb %xmm2, %xmm1
3918 @@ -764,7 +913,7 @@ LABEL(gobble_ashr_5):
3919 sub $0xffff, %edx
3920 jnz LABEL(exit)
3921
3922 -# ifdef USE_AS_STRNCMP
3923 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3924 sub $16, %r11
3925 jbe LABEL(strcmp_exitz)
3926 # endif
3927 @@ -786,6 +935,7 @@ LABEL(gobble_ashr_5):
3928 # else
3929 palignr $5, %xmm3, %xmm2 /* merge into one 16byte value */
3930 # endif
3931 + TOLOWER (%xmm1, %xmm2)
3932
3933 pcmpeqb %xmm1, %xmm0
3934 pcmpeqb %xmm2, %xmm1
3935 @@ -794,7 +944,7 @@ LABEL(gobble_ashr_5):
3936 sub $0xffff, %edx
3937 jnz LABEL(exit)
3938
3939 -# ifdef USE_AS_STRNCMP
3940 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3941 sub $16, %r11
3942 jbe LABEL(strcmp_exitz)
3943 # endif
3944 @@ -810,8 +960,8 @@ LABEL(nibble_ashr_5):
3945 test $0xffe0, %edx
3946 jnz LABEL(ashr_5_exittail)
3947
3948 -# ifdef USE_AS_STRNCMP
3949 - cmp $10, %r11
3950 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3951 + cmp $11, %r11
3952 jbe LABEL(ashr_5_exittail)
3953 # endif
3954
3955 @@ -838,6 +988,7 @@ LABEL(ashr_6):
3956 movdqa (%rsi), %xmm1
3957 pcmpeqb %xmm1, %xmm0
3958 pslldq $10, %xmm2
3959 + TOLOWER (%xmm1, %xmm2)
3960 pcmpeqb %xmm1, %xmm2
3961 psubb %xmm0, %xmm2
3962 pmovmskb %xmm2, %r9d
3963 @@ -878,6 +1029,7 @@ LABEL(gobble_ashr_6):
3964 # else
3965 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
3966 # endif
3967 + TOLOWER (%xmm1, %xmm2)
3968
3969 pcmpeqb %xmm1, %xmm0
3970 pcmpeqb %xmm2, %xmm1
3971 @@ -886,7 +1038,7 @@ LABEL(gobble_ashr_6):
3972 sub $0xffff, %edx
3973 jnz LABEL(exit)
3974
3975 -# ifdef USE_AS_STRNCMP
3976 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3977 sub $16, %r11
3978 jbe LABEL(strcmp_exitz)
3979 # endif
3980 @@ -908,6 +1060,7 @@ LABEL(gobble_ashr_6):
3981 # else
3982 palignr $6, %xmm3, %xmm2 /* merge into one 16byte value */
3983 # endif
3984 + TOLOWER (%xmm1, %xmm2)
3985
3986 pcmpeqb %xmm1, %xmm0
3987 pcmpeqb %xmm2, %xmm1
3988 @@ -916,7 +1069,7 @@ LABEL(gobble_ashr_6):
3989 sub $0xffff, %edx
3990 jnz LABEL(exit)
3991
3992 -# ifdef USE_AS_STRNCMP
3993 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
3994 sub $16, %r11
3995 jbe LABEL(strcmp_exitz)
3996 # endif
3997 @@ -932,8 +1085,8 @@ LABEL(nibble_ashr_6):
3998 test $0xffc0, %edx
3999 jnz LABEL(ashr_6_exittail)
4000
4001 -# ifdef USE_AS_STRNCMP
4002 - cmp $9, %r11
4003 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4004 + cmp $10, %r11
4005 jbe LABEL(ashr_6_exittail)
4006 # endif
4007
4008 @@ -960,6 +1113,7 @@ LABEL(ashr_7):
4009 movdqa (%rsi), %xmm1
4010 pcmpeqb %xmm1, %xmm0
4011 pslldq $9, %xmm2
4012 + TOLOWER (%xmm1, %xmm2)
4013 pcmpeqb %xmm1, %xmm2
4014 psubb %xmm0, %xmm2
4015 pmovmskb %xmm2, %r9d
4016 @@ -1000,6 +1154,7 @@ LABEL(gobble_ashr_7):
4017 # else
4018 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
4019 # endif
4020 + TOLOWER (%xmm1, %xmm2)
4021
4022 pcmpeqb %xmm1, %xmm0
4023 pcmpeqb %xmm2, %xmm1
4024 @@ -1008,7 +1163,7 @@ LABEL(gobble_ashr_7):
4025 sub $0xffff, %edx
4026 jnz LABEL(exit)
4027
4028 -# ifdef USE_AS_STRNCMP
4029 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4030 sub $16, %r11
4031 jbe LABEL(strcmp_exitz)
4032 # endif
4033 @@ -1030,6 +1185,7 @@ LABEL(gobble_ashr_7):
4034 # else
4035 palignr $7, %xmm3, %xmm2 /* merge into one 16byte value */
4036 # endif
4037 + TOLOWER (%xmm1, %xmm2)
4038
4039 pcmpeqb %xmm1, %xmm0
4040 pcmpeqb %xmm2, %xmm1
4041 @@ -1038,7 +1194,7 @@ LABEL(gobble_ashr_7):
4042 sub $0xffff, %edx
4043 jnz LABEL(exit)
4044
4045 -# ifdef USE_AS_STRNCMP
4046 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4047 sub $16, %r11
4048 jbe LABEL(strcmp_exitz)
4049 # endif
4050 @@ -1054,8 +1210,8 @@ LABEL(nibble_ashr_7):
4051 test $0xff80, %edx
4052 jnz LABEL(ashr_7_exittail)
4053
4054 -# ifdef USE_AS_STRNCMP
4055 - cmp $8, %r11
4056 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4057 + cmp $9, %r11
4058 jbe LABEL(ashr_7_exittail)
4059 # endif
4060
4061 @@ -1082,6 +1238,7 @@ LABEL(ashr_8):
4062 movdqa (%rsi), %xmm1
4063 pcmpeqb %xmm1, %xmm0
4064 pslldq $8, %xmm2
4065 + TOLOWER (%xmm1, %xmm2)
4066 pcmpeqb %xmm1, %xmm2
4067 psubb %xmm0, %xmm2
4068 pmovmskb %xmm2, %r9d
4069 @@ -1122,6 +1279,7 @@ LABEL(gobble_ashr_8):
4070 # else
4071 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
4072 # endif
4073 + TOLOWER (%xmm1, %xmm2)
4074
4075 pcmpeqb %xmm1, %xmm0
4076 pcmpeqb %xmm2, %xmm1
4077 @@ -1130,7 +1288,7 @@ LABEL(gobble_ashr_8):
4078 sub $0xffff, %edx
4079 jnz LABEL(exit)
4080
4081 -# ifdef USE_AS_STRNCMP
4082 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4083 sub $16, %r11
4084 jbe LABEL(strcmp_exitz)
4085 # endif
4086 @@ -1152,6 +1310,7 @@ LABEL(gobble_ashr_8):
4087 # else
4088 palignr $8, %xmm3, %xmm2 /* merge into one 16byte value */
4089 # endif
4090 + TOLOWER (%xmm1, %xmm2)
4091
4092 pcmpeqb %xmm1, %xmm0
4093 pcmpeqb %xmm2, %xmm1
4094 @@ -1160,7 +1319,7 @@ LABEL(gobble_ashr_8):
4095 sub $0xffff, %edx
4096 jnz LABEL(exit)
4097
4098 -# ifdef USE_AS_STRNCMP
4099 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4100 sub $16, %r11
4101 jbe LABEL(strcmp_exitz)
4102 # endif
4103 @@ -1176,8 +1335,8 @@ LABEL(nibble_ashr_8):
4104 test $0xff00, %edx
4105 jnz LABEL(ashr_8_exittail)
4106
4107 -# ifdef USE_AS_STRNCMP
4108 - cmp $7, %r11
4109 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4110 + cmp $8, %r11
4111 jbe LABEL(ashr_8_exittail)
4112 # endif
4113
4114 @@ -1204,6 +1363,7 @@ LABEL(ashr_9):
4115 movdqa (%rsi), %xmm1
4116 pcmpeqb %xmm1, %xmm0
4117 pslldq $7, %xmm2
4118 + TOLOWER (%xmm1, %xmm2)
4119 pcmpeqb %xmm1, %xmm2
4120 psubb %xmm0, %xmm2
4121 pmovmskb %xmm2, %r9d
4122 @@ -1244,6 +1404,7 @@ LABEL(gobble_ashr_9):
4123 # else
4124 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
4125 # endif
4126 + TOLOWER (%xmm1, %xmm2)
4127
4128 pcmpeqb %xmm1, %xmm0
4129 pcmpeqb %xmm2, %xmm1
4130 @@ -1252,7 +1413,7 @@ LABEL(gobble_ashr_9):
4131 sub $0xffff, %edx
4132 jnz LABEL(exit)
4133
4134 -# ifdef USE_AS_STRNCMP
4135 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4136 sub $16, %r11
4137 jbe LABEL(strcmp_exitz)
4138 # endif
4139 @@ -1274,6 +1435,7 @@ LABEL(gobble_ashr_9):
4140 # else
4141 palignr $9, %xmm3, %xmm2 /* merge into one 16byte value */
4142 # endif
4143 + TOLOWER (%xmm1, %xmm2)
4144
4145 pcmpeqb %xmm1, %xmm0
4146 pcmpeqb %xmm2, %xmm1
4147 @@ -1282,7 +1444,7 @@ LABEL(gobble_ashr_9):
4148 sub $0xffff, %edx
4149 jnz LABEL(exit)
4150
4151 -# ifdef USE_AS_STRNCMP
4152 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4153 sub $16, %r11
4154 jbe LABEL(strcmp_exitz)
4155 # endif
4156 @@ -1298,8 +1460,8 @@ LABEL(nibble_ashr_9):
4157 test $0xfe00, %edx
4158 jnz LABEL(ashr_9_exittail)
4159
4160 -# ifdef USE_AS_STRNCMP
4161 - cmp $6, %r11
4162 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4163 + cmp $7, %r11
4164 jbe LABEL(ashr_9_exittail)
4165 # endif
4166
4167 @@ -1326,6 +1488,7 @@ LABEL(ashr_10):
4168 movdqa (%rsi), %xmm1
4169 pcmpeqb %xmm1, %xmm0
4170 pslldq $6, %xmm2
4171 + TOLOWER (%xmm1, %xmm2)
4172 pcmpeqb %xmm1, %xmm2
4173 psubb %xmm0, %xmm2
4174 pmovmskb %xmm2, %r9d
4175 @@ -1366,6 +1529,7 @@ LABEL(gobble_ashr_10):
4176 # else
4177 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
4178 # endif
4179 + TOLOWER (%xmm1, %xmm2)
4180
4181 pcmpeqb %xmm1, %xmm0
4182 pcmpeqb %xmm2, %xmm1
4183 @@ -1374,7 +1538,7 @@ LABEL(gobble_ashr_10):
4184 sub $0xffff, %edx
4185 jnz LABEL(exit)
4186
4187 -# ifdef USE_AS_STRNCMP
4188 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4189 sub $16, %r11
4190 jbe LABEL(strcmp_exitz)
4191 # endif
4192 @@ -1396,6 +1560,7 @@ LABEL(gobble_ashr_10):
4193 # else
4194 palignr $10, %xmm3, %xmm2 /* merge into one 16byte value */
4195 # endif
4196 + TOLOWER (%xmm1, %xmm2)
4197
4198 pcmpeqb %xmm1, %xmm0
4199 pcmpeqb %xmm2, %xmm1
4200 @@ -1404,7 +1569,7 @@ LABEL(gobble_ashr_10):
4201 sub $0xffff, %edx
4202 jnz LABEL(exit)
4203
4204 -# ifdef USE_AS_STRNCMP
4205 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4206 sub $16, %r11
4207 jbe LABEL(strcmp_exitz)
4208 # endif
4209 @@ -1420,8 +1585,8 @@ LABEL(nibble_ashr_10):
4210 test $0xfc00, %edx
4211 jnz LABEL(ashr_10_exittail)
4212
4213 -# ifdef USE_AS_STRNCMP
4214 - cmp $5, %r11
4215 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4216 + cmp $6, %r11
4217 jbe LABEL(ashr_10_exittail)
4218 # endif
4219
4220 @@ -1448,6 +1613,7 @@ LABEL(ashr_11):
4221 movdqa (%rsi), %xmm1
4222 pcmpeqb %xmm1, %xmm0
4223 pslldq $5, %xmm2
4224 + TOLOWER (%xmm1, %xmm2)
4225 pcmpeqb %xmm1, %xmm2
4226 psubb %xmm0, %xmm2
4227 pmovmskb %xmm2, %r9d
4228 @@ -1488,6 +1654,7 @@ LABEL(gobble_ashr_11):
4229 # else
4230 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
4231 # endif
4232 + TOLOWER (%xmm1, %xmm2)
4233
4234 pcmpeqb %xmm1, %xmm0
4235 pcmpeqb %xmm2, %xmm1
4236 @@ -1496,7 +1663,7 @@ LABEL(gobble_ashr_11):
4237 sub $0xffff, %edx
4238 jnz LABEL(exit)
4239
4240 -# ifdef USE_AS_STRNCMP
4241 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4242 sub $16, %r11
4243 jbe LABEL(strcmp_exitz)
4244 # endif
4245 @@ -1518,6 +1685,7 @@ LABEL(gobble_ashr_11):
4246 # else
4247 palignr $11, %xmm3, %xmm2 /* merge into one 16byte value */
4248 # endif
4249 + TOLOWER (%xmm1, %xmm2)
4250
4251 pcmpeqb %xmm1, %xmm0
4252 pcmpeqb %xmm2, %xmm1
4253 @@ -1526,7 +1694,7 @@ LABEL(gobble_ashr_11):
4254 sub $0xffff, %edx
4255 jnz LABEL(exit)
4256
4257 -# ifdef USE_AS_STRNCMP
4258 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4259 sub $16, %r11
4260 jbe LABEL(strcmp_exitz)
4261 # endif
4262 @@ -1542,8 +1710,8 @@ LABEL(nibble_ashr_11):
4263 test $0xf800, %edx
4264 jnz LABEL(ashr_11_exittail)
4265
4266 -# ifdef USE_AS_STRNCMP
4267 - cmp $4, %r11
4268 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4269 + cmp $5, %r11
4270 jbe LABEL(ashr_11_exittail)
4271 # endif
4272
4273 @@ -1570,6 +1738,7 @@ LABEL(ashr_12):
4274 movdqa (%rsi), %xmm1
4275 pcmpeqb %xmm1, %xmm0
4276 pslldq $4, %xmm2
4277 + TOLOWER (%xmm1, %xmm2)
4278 pcmpeqb %xmm1, %xmm2
4279 psubb %xmm0, %xmm2
4280 pmovmskb %xmm2, %r9d
4281 @@ -1610,6 +1779,7 @@ LABEL(gobble_ashr_12):
4282 # else
4283 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
4284 # endif
4285 + TOLOWER (%xmm1, %xmm2)
4286
4287 pcmpeqb %xmm1, %xmm0
4288 pcmpeqb %xmm2, %xmm1
4289 @@ -1618,7 +1788,7 @@ LABEL(gobble_ashr_12):
4290 sub $0xffff, %edx
4291 jnz LABEL(exit)
4292
4293 -# ifdef USE_AS_STRNCMP
4294 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4295 sub $16, %r11
4296 jbe LABEL(strcmp_exitz)
4297 # endif
4298 @@ -1640,6 +1810,7 @@ LABEL(gobble_ashr_12):
4299 # else
4300 palignr $12, %xmm3, %xmm2 /* merge into one 16byte value */
4301 # endif
4302 + TOLOWER (%xmm1, %xmm2)
4303
4304 pcmpeqb %xmm1, %xmm0
4305 pcmpeqb %xmm2, %xmm1
4306 @@ -1648,7 +1819,7 @@ LABEL(gobble_ashr_12):
4307 sub $0xffff, %edx
4308 jnz LABEL(exit)
4309
4310 -# ifdef USE_AS_STRNCMP
4311 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4312 sub $16, %r11
4313 jbe LABEL(strcmp_exitz)
4314 # endif
4315 @@ -1664,8 +1835,8 @@ LABEL(nibble_ashr_12):
4316 test $0xf000, %edx
4317 jnz LABEL(ashr_12_exittail)
4318
4319 -# ifdef USE_AS_STRNCMP
4320 - cmp $3, %r11
4321 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4322 + cmp $4, %r11
4323 jbe LABEL(ashr_12_exittail)
4324 # endif
4325
4326 @@ -1692,6 +1863,7 @@ LABEL(ashr_13):
4327 movdqa (%rsi), %xmm1
4328 pcmpeqb %xmm1, %xmm0
4329 pslldq $3, %xmm2
4330 + TOLOWER (%xmm1, %xmm2)
4331 pcmpeqb %xmm1, %xmm2
4332 psubb %xmm0, %xmm2
4333 pmovmskb %xmm2, %r9d
4334 @@ -1732,6 +1904,7 @@ LABEL(gobble_ashr_13):
4335 # else
4336 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
4337 # endif
4338 + TOLOWER (%xmm1, %xmm2)
4339
4340 pcmpeqb %xmm1, %xmm0
4341 pcmpeqb %xmm2, %xmm1
4342 @@ -1740,7 +1913,7 @@ LABEL(gobble_ashr_13):
4343 sub $0xffff, %edx
4344 jnz LABEL(exit)
4345
4346 -# ifdef USE_AS_STRNCMP
4347 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4348 sub $16, %r11
4349 jbe LABEL(strcmp_exitz)
4350 # endif
4351 @@ -1762,6 +1935,7 @@ LABEL(gobble_ashr_13):
4352 # else
4353 palignr $13, %xmm3, %xmm2 /* merge into one 16byte value */
4354 # endif
4355 + TOLOWER (%xmm1, %xmm2)
4356
4357 pcmpeqb %xmm1, %xmm0
4358 pcmpeqb %xmm2, %xmm1
4359 @@ -1770,7 +1944,7 @@ LABEL(gobble_ashr_13):
4360 sub $0xffff, %edx
4361 jnz LABEL(exit)
4362
4363 -# ifdef USE_AS_STRNCMP
4364 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4365 sub $16, %r11
4366 jbe LABEL(strcmp_exitz)
4367 # endif
4368 @@ -1786,8 +1960,8 @@ LABEL(nibble_ashr_13):
4369 test $0xe000, %edx
4370 jnz LABEL(ashr_13_exittail)
4371
4372 -# ifdef USE_AS_STRNCMP
4373 - cmp $2, %r11
4374 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4375 + cmp $3, %r11
4376 jbe LABEL(ashr_13_exittail)
4377 # endif
4378
4379 @@ -1814,6 +1988,7 @@ LABEL(ashr_14):
4380 movdqa (%rsi), %xmm1
4381 pcmpeqb %xmm1, %xmm0
4382 pslldq $2, %xmm2
4383 + TOLOWER (%xmm1, %xmm2)
4384 pcmpeqb %xmm1, %xmm2
4385 psubb %xmm0, %xmm2
4386 pmovmskb %xmm2, %r9d
4387 @@ -1854,6 +2029,7 @@ LABEL(gobble_ashr_14):
4388 # else
4389 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
4390 # endif
4391 + TOLOWER (%xmm1, %xmm2)
4392
4393 pcmpeqb %xmm1, %xmm0
4394 pcmpeqb %xmm2, %xmm1
4395 @@ -1862,7 +2038,7 @@ LABEL(gobble_ashr_14):
4396 sub $0xffff, %edx
4397 jnz LABEL(exit)
4398
4399 -# ifdef USE_AS_STRNCMP
4400 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4401 sub $16, %r11
4402 jbe LABEL(strcmp_exitz)
4403 # endif
4404 @@ -1884,6 +2060,7 @@ LABEL(gobble_ashr_14):
4405 # else
4406 palignr $14, %xmm3, %xmm2 /* merge into one 16byte value */
4407 # endif
4408 + TOLOWER (%xmm1, %xmm2)
4409
4410 pcmpeqb %xmm1, %xmm0
4411 pcmpeqb %xmm2, %xmm1
4412 @@ -1892,7 +2069,7 @@ LABEL(gobble_ashr_14):
4413 sub $0xffff, %edx
4414 jnz LABEL(exit)
4415
4416 -# ifdef USE_AS_STRNCMP
4417 +# if defined USE_AS_STRNCMP | defined USE_AS_STRNCASECMP_L
4418 sub $16, %r11
4419 jbe LABEL(strcmp_exitz)
4420 # endif
4421 @@ -1908,8 +2085,8 @@ LABEL(nibble_ashr_14):
4422 test $0xc000, %edx
4423 jnz LABEL(ashr_14_exittail)
4424
4425 -# ifdef USE_AS_STRNCMP
4426 - cmp $1, %r11
4427 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4428 + cmp $2, %r11
4429 jbe LABEL(ashr_14_exittail)
4430 # endif
4431
4432 @@ -1936,6 +2113,7 @@ LABEL(ashr_15):
4433 movdqa (%rsi), %xmm1
4434 pcmpeqb %xmm1, %xmm0
4435 pslldq $1, %xmm2
4436 + TOLOWER (%xmm1, %xmm2)
4437 pcmpeqb %xmm1, %xmm2
4438 psubb %xmm0, %xmm2
4439 pmovmskb %xmm2, %r9d
4440 @@ -1978,6 +2156,7 @@ LABEL(gobble_ashr_15):
4441 # else
4442 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
4443 # endif
4444 + TOLOWER (%xmm1, %xmm2)
4445
4446 pcmpeqb %xmm1, %xmm0
4447 pcmpeqb %xmm2, %xmm1
4448 @@ -1986,7 +2165,7 @@ LABEL(gobble_ashr_15):
4449 sub $0xffff, %edx
4450 jnz LABEL(exit)
4451
4452 -# ifdef USE_AS_STRNCMP
4453 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4454 sub $16, %r11
4455 jbe LABEL(strcmp_exitz)
4456 # endif
4457 @@ -2008,6 +2187,7 @@ LABEL(gobble_ashr_15):
4458 # else
4459 palignr $15, %xmm3, %xmm2 /* merge into one 16byte value */
4460 # endif
4461 + TOLOWER (%xmm1, %xmm2)
4462
4463 pcmpeqb %xmm1, %xmm0
4464 pcmpeqb %xmm2, %xmm1
4465 @@ -2016,7 +2196,7 @@ LABEL(gobble_ashr_15):
4466 sub $0xffff, %edx
4467 jnz LABEL(exit)
4468
4469 -# ifdef USE_AS_STRNCMP
4470 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4471 sub $16, %r11
4472 jbe LABEL(strcmp_exitz)
4473 # endif
4474 @@ -2032,9 +2212,9 @@ LABEL(nibble_ashr_15):
4475 test $0x8000, %edx
4476 jnz LABEL(ashr_15_exittail)
4477
4478 -# ifdef USE_AS_STRNCMP
4479 - test %r11, %r11
4480 - je LABEL(ashr_15_exittail)
4481 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4482 + cmpq $1, %r11
4483 + jbe LABEL(ashr_15_exittail)
4484 # endif
4485
4486 pxor %xmm0, %xmm0
4487 @@ -2049,6 +2229,7 @@ LABEL(ashr_15_exittail):
4488
4489 .p2align 4
4490 LABEL(aftertail):
4491 + TOLOWER (%xmm1, %xmm3)
4492 pcmpeqb %xmm3, %xmm1
4493 psubb %xmm0, %xmm1
4494 pmovmskb %xmm1, %edx
4495 @@ -2069,13 +2250,19 @@ LABEL(ret):
4496 LABEL(less16bytes):
4497 bsf %rdx, %rdx /* find and store bit index in %rdx */
4498
4499 -# ifdef USE_AS_STRNCMP
4500 +# if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
4501 sub %rdx, %r11
4502 jbe LABEL(strcmp_exitz)
4503 # endif
4504 movzbl (%rsi, %rdx), %ecx
4505 movzbl (%rdi, %rdx), %eax
4506
4507 +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
4508 + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
4509 + movl (%rdx,%rcx,4), %ecx
4510 + movl (%rdx,%rax,4), %eax
4511 +# endif
4512 +
4513 sub %ecx, %eax
4514 ret
4515
4516 @@ -2088,6 +2275,12 @@ LABEL(Byte0):
4517 movzx (%rsi), %ecx
4518 movzx (%rdi), %eax
4519
4520 +# if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
4521 + leaq _nl_C_LC_CTYPE_tolower+128*4(%rip), %rdx
4522 + movl (%rdx,%rcx,4), %ecx
4523 + movl (%rdx,%rax,4), %eax
4524 +# endif
4525 +
4526 sub %ecx, %eax
4527 ret
4528 END (BP_SYM (STRCMP))
4529 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase.S
4530 ===================================================================
4531 --- /dev/null
4532 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase.S
4533 @@ -0,0 +1 @@
4534 +/* In strncase_l.S. */
4535 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase_l-nonascii.c
4536 ===================================================================
4537 --- /dev/null
4538 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase_l-nonascii.c
4539 @@ -0,0 +1,8 @@
4540 +#include <string.h>
4541 +
4542 +extern int __strncasecmp_l_nonascii (__const char *__s1, __const char *__s2,
4543 + size_t __n, __locale_t __loc);
4544 +
4545 +#define __strncasecmp_l __strncasecmp_l_nonascii
4546 +#define USE_IN_EXTENDED_LOCALE_MODEL 1
4547 +#include <string/strncase.c>
4548 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase_l.S
4549 ===================================================================
4550 --- /dev/null
4551 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strncase_l.S
4552 @@ -0,0 +1,6 @@
4553 +#define STRCMP __strncasecmp_l
4554 +#define USE_AS_STRNCASECMP_L
4555 +#include "strcmp.S"
4556 +
4557 +weak_alias (__strncasecmp_l, strncasecmp_l)
4558 +libc_hidden_def (strncasecmp_l)
4559 Index: glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strnlen.S
4560 ===================================================================
4561 --- /dev/null
4562 +++ glibc-2.12-2-gc4ccff1/sysdeps/x86_64/strnlen.S
4563 @@ -0,0 +1,64 @@
4564 +/* strnlen(str,maxlen) -- determine the length of the string STR up to MAXLEN.
4565 + Copyright (C) 2010 Free Software Foundation, Inc.
4566 + Contributed by Ulrich Drepper <drepper@redhat.com>.
4567 + This file is part of the GNU C Library.
4568 +
4569 + The GNU C Library is free software; you can redistribute it and/or
4570 + modify it under the terms of the GNU Lesser General Public
4571 + License as published by the Free Software Foundation; either
4572 + version 2.1 of the License, or (at your option) any later version.
4573 +
4574 + The GNU C Library is distributed in the hope that it will be useful,
4575 + but WITHOUT ANY WARRANTY; without even the implied warranty of
4576 + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
4577 + Lesser General Public License for more details.
4578 +
4579 + You should have received a copy of the GNU Lesser General Public
4580 + License along with the GNU C Library; if not, write to the Free
4581 + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
4582 + 02111-1307 USA. */
4583 +
4584 +#include <sysdep.h>
4585 +
4586 +
4587 + .text
4588 +ENTRY(__strnlen)
4589 + movq %rsi, %rax
4590 + testq %rsi, %rsi
4591 + jz 3f
4592 + pxor %xmm2, %xmm2
4593 + movq %rdi, %rcx
4594 + movq %rdi, %r8
4595 + movq $16, %r9
4596 + andq $~15, %rdi
4597 + movdqa %xmm2, %xmm1
4598 + pcmpeqb (%rdi), %xmm2
4599 + orl $0xffffffff, %r10d
4600 + subq %rdi, %rcx
4601 + shll %cl, %r10d
4602 + subq %rcx, %r9
4603 + pmovmskb %xmm2, %edx
4604 + andl %r10d, %edx
4605 + jnz 1f
4606 + subq %r9, %rsi
4607 + jbe 3f
4608 +
4609 +2: movdqa 16(%rdi), %xmm0
4610 + leaq 16(%rdi), %rdi
4611 + pcmpeqb %xmm1, %xmm0
4612 + pmovmskb %xmm0, %edx
4613 + testl %edx, %edx
4614 + jnz 1f
4615 + subq $16, %rsi
4616 + jnbe 2b
4617 +3: ret
4618 +
4619 +1: subq %r8, %rdi
4620 + bsfl %edx, %edx
4621 + addq %rdi, %rdx
4622 + cmpq %rdx, %rax
4623 + cmovnbq %rdx, %rax
4624 + ret
4625 +END(__strnlen)
4626 +weak_alias (__strnlen, strnlen)
4627 +libc_hidden_def (strnlen)
4628 Index: glibc-2.12-2-gc4ccff1/wcsmbs/wcsatcliff.c
4629 ===================================================================
4630 --- glibc-2.12-2-gc4ccff1.orig/wcsmbs/wcsatcliff.c
4631 +++ glibc-2.12-2-gc4ccff1/wcsmbs/wcsatcliff.c
4632 @@ -16,6 +16,8 @@
4633 #define MEMCPY wmemcpy
4634 #define MEMPCPY wmempcpy
4635 #define MEMCHR wmemchr
4636 +#define STRCMP wcscmp
4637 +#define STRNCMP wcsncmp
4638
4639
4640 #include "../string/stratcliff.c"