]>
Commit | Line | Data |
---|---|---|
dff8da6b | 1 | /* Copyright (C) 2001-2024 Free Software Foundation, Inc. |
d8f00d46 UD |
2 | This file is part of the GNU C Library. |
3 | ||
4 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
d8f00d46 UD |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 12 | Lesser General Public License for more details. |
d8f00d46 | 13 | |
41bdb6e2 | 14 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 15 | License along with the GNU C Library; if not, see |
5a82c748 | 16 | <https://www.gnu.org/licenses/>. */ |
d8f00d46 | 17 | |
d8f00d46 UD |
18 | #include <assert.h> |
19 | #include <errno.h> | |
20 | #include <error.h> | |
21 | #include <fcntl.h> | |
bb3f4825 | 22 | #include <getopt.h> |
d8f00d46 UD |
23 | #include <iconv.h> |
24 | #include <locale.h> | |
25 | #include <mcheck.h> | |
ceaa9889 | 26 | #include <stdint.h> |
d8f00d46 UD |
27 | #include <stdio.h> |
28 | #include <stdlib.h> | |
29 | #include <string.h> | |
30 | #include <time.h> | |
31 | #include <unistd.h> | |
32 | #include <sys/stat.h> | |
33 | #include <sys/types.h> | |
34 | #include <regex.h> | |
466f2be6 | 35 | #include <support/support.h> |
d8f00d46 UD |
36 | |
37 | ||
bf7c04cd | 38 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
d8f00d46 UD |
39 | static clockid_t cl; |
40 | static int use_clock; | |
41 | #endif | |
73111f03 UD |
42 | static iconv_t cd; |
43 | static char *mem; | |
44 | static char *umem; | |
45 | static size_t memlen; | |
3c0fb574 | 46 | static size_t umemlen; |
bb3f4825 | 47 | static int timing; |
d8f00d46 | 48 | |
3c0fb574 | 49 | static int test_expr (const char *expr, int expected, int expectedicase); |
73111f03 | 50 | static int run_test (const char *expr, const char *mem, size_t memlen, |
3c0fb574 UD |
51 | int icase, int expected); |
52 | static int run_test_backwards (const char *expr, const char *mem, | |
53 | size_t memlen, int icase, int expected); | |
d8f00d46 UD |
54 | |
55 | ||
7166d23f UD |
56 | static int |
57 | do_test (void) | |
d8f00d46 UD |
58 | { |
59 | const char *file; | |
d8f00d46 UD |
60 | int fd; |
61 | struct stat st; | |
466f2be6 | 62 | int result = 0; |
d8f00d46 UD |
63 | char *inmem; |
64 | char *outmem; | |
65 | size_t inlen; | |
66 | size_t outlen; | |
d8f00d46 UD |
67 | |
68 | mtrace (); | |
69 | ||
70 | /* Make the content of the file available in memory. */ | |
3a9d025f | 71 | file = "./tst-regex.input"; |
d8f00d46 UD |
72 | fd = open (file, O_RDONLY); |
73 | if (fd == -1) | |
74 | error (EXIT_FAILURE, errno, "cannot open %s", basename (file)); | |
75 | ||
76 | if (fstat (fd, &st) != 0) | |
77 | error (EXIT_FAILURE, errno, "cannot stat %s", basename (file)); | |
78 | memlen = st.st_size; | |
79 | ||
80 | mem = (char *) malloc (memlen + 1); | |
81 | if (mem == NULL) | |
82 | error (EXIT_FAILURE, errno, "while allocating buffer"); | |
83 | ||
3c0fb574 | 84 | if ((size_t) read (fd, mem, memlen) != memlen) |
d8f00d46 UD |
85 | error (EXIT_FAILURE, 0, "cannot read entire file"); |
86 | mem[memlen] = '\0'; | |
87 | ||
88 | close (fd); | |
89 | ||
a22a582e PE |
90 | /* We have to convert a few things from UTF-8 to Latin-1. */ |
91 | cd = iconv_open ("ISO-8859-1", "UTF-8"); | |
d8f00d46 UD |
92 | if (cd == (iconv_t) -1) |
93 | error (EXIT_FAILURE, errno, "cannot get conversion descriptor"); | |
94 | ||
a22a582e PE |
95 | /* For the second test we have to convert the file content to Latin-1. |
96 | This cannot grow the data. */ | |
97 | umem = (char *) malloc (memlen + 1); | |
2f07975d UD |
98 | if (umem == NULL) |
99 | error (EXIT_FAILURE, errno, "while allocating buffer"); | |
100 | ||
d8f00d46 UD |
101 | inmem = mem; |
102 | inlen = memlen; | |
103 | outmem = umem; | |
a22a582e | 104 | outlen = memlen; |
d8f00d46 | 105 | iconv (cd, &inmem, &inlen, &outmem, &outlen); |
3c0fb574 | 106 | umemlen = outmem - umem; |
d8f00d46 UD |
107 | if (inlen != 0) |
108 | error (EXIT_FAILURE, errno, "cannot convert buffer"); | |
a22a582e | 109 | umem[umemlen] = '\0'; |
d8f00d46 | 110 | |
bf7c04cd UD |
111 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
112 | # if _POSIX_CPUTIME == 0 | |
113 | if (sysconf (_SC_CPUTIME) < 0) | |
114 | use_clock = 0; | |
115 | else | |
116 | # endif | |
117 | /* See whether we can use the CPU clock. */ | |
118 | use_clock = clock_getcpuclockid (0, &cl) == 0; | |
73111f03 UD |
119 | #endif |
120 | ||
2f07975d UD |
121 | #ifdef DEBUG |
122 | re_set_syntax (RE_DEBUG); | |
123 | #endif | |
124 | ||
73111f03 UD |
125 | /* Run the actual tests. All tests are run in a single-byte and a |
126 | multi-byte locale. */ | |
466f2be6 | 127 | result |= test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4); |
3c0fb574 UD |
128 | result |= test_expr ("G.ran", 2, 3); |
129 | result |= test_expr ("G.\\{1\\}ran", 2, 3); | |
7f0d9e61 | 130 | result |= test_expr ("G.*ran", 3, 43); |
a22a582e | 131 | result |= test_expr ("[äáàâ]", 0, 0); |
bb3f4825 UD |
132 | result |= test_expr ("Uddeborg", 2, 2); |
133 | result |= test_expr (".Uddeborg", 2, 2); | |
73111f03 UD |
134 | |
135 | /* Free the resources. */ | |
2f07975d | 136 | free (umem); |
73111f03 UD |
137 | iconv_close (cd); |
138 | free (mem); | |
139 | ||
140 | return result; | |
141 | } | |
142 | ||
143 | ||
144 | static int | |
3c0fb574 | 145 | test_expr (const char *expr, int expected, int expectedicase) |
73111f03 | 146 | { |
466f2be6 | 147 | int result = 0; |
73111f03 UD |
148 | char *inmem; |
149 | char *outmem; | |
150 | size_t inlen; | |
151 | size_t outlen; | |
152 | char *uexpr; | |
153 | ||
466f2be6 CD |
154 | /* First test: search with basic C.UTF-8 locale. */ |
155 | printf ("INFO: Testing C.UTF-8.\n"); | |
156 | xsetlocale (LC_ALL, "C.UTF-8"); | |
73111f03 | 157 | |
a22a582e | 158 | printf ("\nTest \"%s\" with multi-byte locale\n", expr); |
466f2be6 CD |
159 | result |= run_test (expr, mem, memlen, 0, expected); |
160 | printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr); | |
161 | result |= run_test (expr, mem, memlen, 1, expectedicase); | |
162 | printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr); | |
163 | result |= run_test_backwards (expr, mem, memlen, 0, expected); | |
164 | printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n", | |
165 | expr); | |
166 | result |= run_test_backwards (expr, mem, memlen, 1, expectedicase); | |
167 | ||
168 | /* Second test: search with an UTF-8 locale. */ | |
169 | printf ("INFO: Testing de_DE.UTF-8.\n"); | |
170 | xsetlocale (LC_ALL, "de_DE.UTF-8"); | |
171 | ||
172 | printf ("\nTest \"%s\" with multi-byte locale\n", expr); | |
173 | result |= run_test (expr, mem, memlen, 0, expected); | |
a22a582e | 174 | printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr); |
3c0fb574 | 175 | result |= run_test (expr, mem, memlen, 1, expectedicase); |
a22a582e | 176 | printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr); |
3c0fb574 | 177 | result |= run_test_backwards (expr, mem, memlen, 0, expected); |
a22a582e | 178 | printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n", |
3c0fb574 UD |
179 | expr); |
180 | result |= run_test_backwards (expr, mem, memlen, 1, expectedicase); | |
73111f03 | 181 | |
a22a582e | 182 | /* Second test: search with an ISO-8859-1 locale. */ |
466f2be6 CD |
183 | printf ("INFO: Testing de_DE.ISO-8859-1.\n"); |
184 | xsetlocale (LC_ALL, "de_DE.ISO-8859-1"); | |
73111f03 UD |
185 | |
186 | inmem = (char *) expr; | |
d8f00d46 | 187 | inlen = strlen (expr); |
a22a582e | 188 | outlen = inlen; |
d8f00d46 | 189 | outmem = uexpr = alloca (outlen + 1); |
73111f03 | 190 | memset (outmem, '\0', outlen + 1); |
d8f00d46 UD |
191 | iconv (cd, &inmem, &inlen, &outmem, &outlen); |
192 | if (inlen != 0) | |
193 | error (EXIT_FAILURE, errno, "cannot convert expression"); | |
194 | ||
d8f00d46 | 195 | /* Run the tests. */ |
a22a582e | 196 | printf ("\nTest \"%s\" with 8-bit locale\n", expr); |
3c0fb574 | 197 | result |= run_test (uexpr, umem, umemlen, 0, expected); |
a22a582e | 198 | printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr); |
3c0fb574 | 199 | result |= run_test (uexpr, umem, umemlen, 1, expectedicase); |
a22a582e | 200 | printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr); |
3c0fb574 | 201 | result |= run_test_backwards (uexpr, umem, umemlen, 0, expected); |
a22a582e | 202 | printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n", |
3c0fb574 UD |
203 | expr); |
204 | result |= run_test_backwards (uexpr, umem, umemlen, 1, expectedicase); | |
d8f00d46 UD |
205 | |
206 | return result; | |
207 | } | |
208 | ||
209 | ||
210 | static int | |
3c0fb574 UD |
211 | run_test (const char *expr, const char *mem, size_t memlen, int icase, |
212 | int expected) | |
d8f00d46 | 213 | { |
bf7c04cd | 214 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
d8f00d46 UD |
215 | struct timespec start; |
216 | struct timespec finish; | |
217 | #endif | |
218 | regex_t re; | |
219 | int err; | |
220 | size_t offset; | |
221 | int cnt; | |
222 | ||
bf7c04cd | 223 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
bb3f4825 | 224 | if (use_clock && !timing) |
d8f00d46 UD |
225 | use_clock = clock_gettime (cl, &start) == 0; |
226 | #endif | |
227 | ||
3c0fb574 | 228 | err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0)); |
d8f00d46 UD |
229 | if (err != REG_NOERROR) |
230 | { | |
231 | char buf[200]; | |
232 | regerror (err, &re, buf, sizeof buf); | |
233 | error (EXIT_FAILURE, 0, "cannot compile expression: %s", buf); | |
234 | } | |
235 | ||
236 | cnt = 0; | |
237 | offset = 0; | |
238 | assert (mem[memlen] == '\0'); | |
239 | while (offset < memlen) | |
240 | { | |
241 | regmatch_t ma[1]; | |
242 | const char *sp; | |
243 | const char *ep; | |
244 | ||
245 | err = regexec (&re, mem + offset, 1, ma, 0); | |
246 | if (err == REG_NOMATCH) | |
247 | break; | |
248 | ||
249 | if (err != REG_NOERROR) | |
250 | { | |
251 | char buf[200]; | |
252 | regerror (err, &re, buf, sizeof buf); | |
253 | error (EXIT_FAILURE, 0, "cannot use expression: %s", buf); | |
254 | } | |
255 | ||
256 | assert (ma[0].rm_so >= 0); | |
257 | sp = mem + offset + ma[0].rm_so; | |
73111f03 | 258 | while (sp > mem && sp[-1] != '\n') |
d8f00d46 UD |
259 | --sp; |
260 | ||
261 | ep = mem + offset + ma[0].rm_so; | |
262 | while (*ep != '\0' && *ep != '\n') | |
263 | ++ep; | |
264 | ||
265 | printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp); | |
266 | ||
267 | offset = ep + 1 - mem; | |
268 | } | |
269 | ||
270 | regfree (&re); | |
271 | ||
bf7c04cd | 272 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
bb3f4825 | 273 | if (use_clock && !timing) |
d8f00d46 UD |
274 | { |
275 | use_clock = clock_gettime (cl, &finish) == 0; | |
276 | if (use_clock) | |
277 | { | |
278 | if (finish.tv_nsec < start.tv_nsec) | |
279 | { | |
280 | finish.tv_nsec -= start.tv_nsec - 1000000000; | |
281 | finish.tv_sec -= 1 + start.tv_sec; | |
282 | } | |
283 | else | |
284 | { | |
285 | finish.tv_nsec -= start.tv_nsec; | |
286 | finish.tv_sec -= start.tv_sec; | |
287 | } | |
288 | ||
6490d945 L |
289 | printf ("elapsed time: %jd.%09jd sec\n", |
290 | (intmax_t) finish.tv_sec, (intmax_t) finish.tv_nsec); | |
d8f00d46 UD |
291 | } |
292 | } | |
bb3f4825 UD |
293 | |
294 | if (use_clock && timing) | |
295 | { | |
296 | struct timespec mintime = { .tv_sec = 24 * 60 * 60 }; | |
297 | ||
298 | for (int i = 0; i < 10; ++i) | |
299 | { | |
300 | offset = 0; | |
301 | use_clock = clock_gettime (cl, &start) == 0; | |
302 | ||
303 | if (!use_clock) | |
304 | continue; | |
305 | ||
306 | err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0)); | |
307 | if (err != REG_NOERROR) | |
308 | continue; | |
309 | ||
310 | while (offset < memlen) | |
311 | { | |
312 | regmatch_t ma[1]; | |
313 | ||
314 | err = regexec (&re, mem + offset, 1, ma, 0); | |
315 | if (err != REG_NOERROR) | |
316 | break; | |
317 | ||
318 | offset += ma[0].rm_eo; | |
319 | } | |
320 | ||
321 | regfree (&re); | |
322 | ||
323 | use_clock = clock_gettime (cl, &finish) == 0; | |
324 | if (use_clock) | |
325 | { | |
326 | if (finish.tv_nsec < start.tv_nsec) | |
327 | { | |
328 | finish.tv_nsec -= start.tv_nsec - 1000000000; | |
329 | finish.tv_sec -= 1 + start.tv_sec; | |
330 | } | |
331 | else | |
332 | { | |
333 | finish.tv_nsec -= start.tv_nsec; | |
334 | finish.tv_sec -= start.tv_sec; | |
335 | } | |
336 | if (finish.tv_sec < mintime.tv_sec | |
337 | || (finish.tv_sec == mintime.tv_sec | |
338 | && finish.tv_nsec < mintime.tv_nsec)) | |
339 | mintime = finish; | |
340 | } | |
341 | } | |
6490d945 L |
342 | printf ("elapsed time: %jd.%09jd sec\n", |
343 | (intmax_t) mintime.tv_sec, (intmax_t) mintime.tv_nsec); | |
bb3f4825 | 344 | } |
d8f00d46 UD |
345 | #endif |
346 | ||
347 | /* Return an error if the number of matches found is not match we | |
348 | expect. */ | |
73111f03 | 349 | return cnt != expected; |
d8f00d46 | 350 | } |
3c0fb574 UD |
351 | |
352 | ||
353 | static int | |
354 | run_test_backwards (const char *expr, const char *mem, size_t memlen, | |
355 | int icase, int expected) | |
356 | { | |
bf7c04cd | 357 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
3c0fb574 UD |
358 | struct timespec start; |
359 | struct timespec finish; | |
360 | #endif | |
361 | struct re_pattern_buffer re; | |
362 | const char *err; | |
363 | size_t offset; | |
364 | int cnt; | |
365 | ||
bf7c04cd | 366 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
bb3f4825 | 367 | if (use_clock && !timing) |
3c0fb574 UD |
368 | use_clock = clock_gettime (cl, &start) == 0; |
369 | #endif | |
370 | ||
371 | re_set_syntax ((RE_SYNTAX_POSIX_BASIC & ~RE_DOT_NEWLINE) | |
372 | | RE_HAT_LISTS_NOT_NEWLINE | |
373 | | (icase ? RE_ICASE : 0)); | |
374 | ||
375 | memset (&re, 0, sizeof (re)); | |
376 | re.fastmap = malloc (256); | |
377 | if (re.fastmap == NULL) | |
378 | error (EXIT_FAILURE, errno, "cannot allocate fastmap"); | |
379 | ||
380 | err = re_compile_pattern (expr, strlen (expr), &re); | |
381 | if (err != NULL) | |
382 | error (EXIT_FAILURE, 0, "cannot compile expression: %s", err); | |
383 | ||
384 | if (re_compile_fastmap (&re)) | |
385 | error (EXIT_FAILURE, 0, "couldn't compile fastmap"); | |
386 | ||
387 | cnt = 0; | |
388 | offset = memlen; | |
389 | assert (mem[memlen] == '\0'); | |
390 | while (offset <= memlen) | |
391 | { | |
392 | int start; | |
393 | const char *sp; | |
394 | const char *ep; | |
395 | ||
396 | start = re_search (&re, mem, memlen, offset, -offset, NULL); | |
397 | if (start == -1) | |
398 | break; | |
399 | ||
400 | if (start == -2) | |
401 | error (EXIT_FAILURE, 0, "internal error in re_search"); | |
402 | ||
403 | sp = mem + start; | |
404 | while (sp > mem && sp[-1] != '\n') | |
405 | --sp; | |
406 | ||
407 | ep = mem + start; | |
408 | while (*ep != '\0' && *ep != '\n') | |
409 | ++ep; | |
410 | ||
411 | printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp); | |
412 | ||
413 | offset = sp - 1 - mem; | |
414 | } | |
415 | ||
416 | regfree (&re); | |
417 | ||
bf7c04cd | 418 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
bb3f4825 | 419 | if (use_clock && !timing) |
3c0fb574 UD |
420 | { |
421 | use_clock = clock_gettime (cl, &finish) == 0; | |
422 | if (use_clock) | |
423 | { | |
424 | if (finish.tv_nsec < start.tv_nsec) | |
425 | { | |
426 | finish.tv_nsec -= start.tv_nsec - 1000000000; | |
427 | finish.tv_sec -= 1 + start.tv_sec; | |
428 | } | |
429 | else | |
430 | { | |
431 | finish.tv_nsec -= start.tv_nsec; | |
432 | finish.tv_sec -= start.tv_sec; | |
433 | } | |
434 | ||
6490d945 L |
435 | printf ("elapsed time: %jd.%09jd sec\n", |
436 | (intmax_t) finish.tv_sec, (intmax_t) finish.tv_nsec); | |
3c0fb574 UD |
437 | } |
438 | } | |
bb3f4825 UD |
439 | |
440 | if (use_clock && timing) | |
441 | { | |
442 | struct timespec mintime = { .tv_sec = 24 * 60 * 60 }; | |
443 | ||
444 | for (int i = 0; i < 10; ++i) | |
445 | { | |
446 | offset = memlen; | |
447 | use_clock = clock_gettime (cl, &start) == 0; | |
448 | ||
449 | if (!use_clock) | |
450 | continue; | |
451 | ||
452 | memset (&re, 0, sizeof (re)); | |
453 | re.fastmap = malloc (256); | |
454 | if (re.fastmap == NULL) | |
455 | continue; | |
456 | ||
457 | err = re_compile_pattern (expr, strlen (expr), &re); | |
458 | if (err != NULL) | |
459 | continue; | |
460 | ||
461 | if (re_compile_fastmap (&re)) | |
462 | { | |
463 | regfree (&re); | |
464 | continue; | |
465 | } | |
466 | ||
467 | while (offset <= memlen) | |
468 | { | |
469 | int start; | |
470 | const char *sp; | |
471 | ||
472 | start = re_search (&re, mem, memlen, offset, -offset, NULL); | |
473 | if (start < -1) | |
474 | break; | |
475 | ||
476 | sp = mem + start; | |
477 | while (sp > mem && sp[-1] != '\n') | |
478 | --sp; | |
479 | ||
480 | offset = sp - 1 - mem; | |
481 | } | |
482 | ||
483 | regfree (&re); | |
484 | ||
485 | use_clock = clock_gettime (cl, &finish) == 0; | |
486 | if (use_clock) | |
487 | { | |
488 | if (finish.tv_nsec < start.tv_nsec) | |
489 | { | |
490 | finish.tv_nsec -= start.tv_nsec - 1000000000; | |
491 | finish.tv_sec -= 1 + start.tv_sec; | |
492 | } | |
493 | else | |
494 | { | |
495 | finish.tv_nsec -= start.tv_nsec; | |
496 | finish.tv_sec -= start.tv_sec; | |
497 | } | |
498 | if (finish.tv_sec < mintime.tv_sec | |
499 | || (finish.tv_sec == mintime.tv_sec | |
500 | && finish.tv_nsec < mintime.tv_nsec)) | |
501 | mintime = finish; | |
502 | } | |
503 | } | |
6490d945 L |
504 | printf ("elapsed time: %jd.%09jd sec\n", |
505 | (intmax_t) mintime.tv_sec, (intmax_t) mintime.tv_nsec); | |
bb3f4825 | 506 | } |
3c0fb574 UD |
507 | #endif |
508 | ||
509 | /* Return an error if the number of matches found is not match we | |
510 | expect. */ | |
511 | return cnt != expected; | |
512 | } | |
7166d23f | 513 | |
7f0d9e61 | 514 | /* If --timing is used we will need a larger timeout. */ |
7166d23f UD |
515 | #define TIMEOUT 50 |
516 | #define CMDLINE_OPTIONS \ | |
517 | {"timing", no_argument, &timing, 1 }, | |
518 | #define TEST_FUNCTION do_test () | |
519 | #include "../test-skeleton.c" |