]>
Commit | Line | Data |
---|---|---|
2b778ceb | 1 | /* Copyright (C) 2001-2021 Free Software Foundation, Inc. |
d8f00d46 UD |
2 | This file is part of the GNU C Library. |
3 | ||
4 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either | |
7 | version 2.1 of the License, or (at your option) any later version. | |
d8f00d46 UD |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 12 | Lesser General Public License for more details. |
d8f00d46 | 13 | |
41bdb6e2 | 14 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 | 15 | License along with the GNU C Library; if not, see |
5a82c748 | 16 | <https://www.gnu.org/licenses/>. */ |
d8f00d46 | 17 | |
d8f00d46 UD |
18 | #include <assert.h> |
19 | #include <errno.h> | |
20 | #include <error.h> | |
21 | #include <fcntl.h> | |
bb3f4825 | 22 | #include <getopt.h> |
d8f00d46 UD |
23 | #include <iconv.h> |
24 | #include <locale.h> | |
25 | #include <mcheck.h> | |
ceaa9889 | 26 | #include <stdint.h> |
d8f00d46 UD |
27 | #include <stdio.h> |
28 | #include <stdlib.h> | |
29 | #include <string.h> | |
30 | #include <time.h> | |
31 | #include <unistd.h> | |
32 | #include <sys/stat.h> | |
33 | #include <sys/types.h> | |
34 | #include <regex.h> | |
35 | ||
36 | ||
bf7c04cd | 37 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
d8f00d46 UD |
38 | static clockid_t cl; |
39 | static int use_clock; | |
40 | #endif | |
73111f03 UD |
41 | static iconv_t cd; |
42 | static char *mem; | |
43 | static char *umem; | |
44 | static size_t memlen; | |
3c0fb574 | 45 | static size_t umemlen; |
bb3f4825 | 46 | static int timing; |
d8f00d46 | 47 | |
3c0fb574 | 48 | static int test_expr (const char *expr, int expected, int expectedicase); |
73111f03 | 49 | static int run_test (const char *expr, const char *mem, size_t memlen, |
3c0fb574 UD |
50 | int icase, int expected); |
51 | static int run_test_backwards (const char *expr, const char *mem, | |
52 | size_t memlen, int icase, int expected); | |
d8f00d46 UD |
53 | |
54 | ||
7166d23f UD |
55 | static int |
56 | do_test (void) | |
d8f00d46 UD |
57 | { |
58 | const char *file; | |
d8f00d46 UD |
59 | int fd; |
60 | struct stat st; | |
d8f00d46 UD |
61 | int result; |
62 | char *inmem; | |
63 | char *outmem; | |
64 | size_t inlen; | |
65 | size_t outlen; | |
d8f00d46 UD |
66 | |
67 | mtrace (); | |
68 | ||
69 | /* Make the content of the file available in memory. */ | |
3a9d025f | 70 | file = "./tst-regex.input"; |
d8f00d46 UD |
71 | fd = open (file, O_RDONLY); |
72 | if (fd == -1) | |
73 | error (EXIT_FAILURE, errno, "cannot open %s", basename (file)); | |
74 | ||
75 | if (fstat (fd, &st) != 0) | |
76 | error (EXIT_FAILURE, errno, "cannot stat %s", basename (file)); | |
77 | memlen = st.st_size; | |
78 | ||
79 | mem = (char *) malloc (memlen + 1); | |
80 | if (mem == NULL) | |
81 | error (EXIT_FAILURE, errno, "while allocating buffer"); | |
82 | ||
3c0fb574 | 83 | if ((size_t) read (fd, mem, memlen) != memlen) |
d8f00d46 UD |
84 | error (EXIT_FAILURE, 0, "cannot read entire file"); |
85 | mem[memlen] = '\0'; | |
86 | ||
87 | close (fd); | |
88 | ||
a22a582e PE |
89 | /* We have to convert a few things from UTF-8 to Latin-1. */ |
90 | cd = iconv_open ("ISO-8859-1", "UTF-8"); | |
d8f00d46 UD |
91 | if (cd == (iconv_t) -1) |
92 | error (EXIT_FAILURE, errno, "cannot get conversion descriptor"); | |
93 | ||
a22a582e PE |
94 | /* For the second test we have to convert the file content to Latin-1. |
95 | This cannot grow the data. */ | |
96 | umem = (char *) malloc (memlen + 1); | |
2f07975d UD |
97 | if (umem == NULL) |
98 | error (EXIT_FAILURE, errno, "while allocating buffer"); | |
99 | ||
d8f00d46 UD |
100 | inmem = mem; |
101 | inlen = memlen; | |
102 | outmem = umem; | |
a22a582e | 103 | outlen = memlen; |
d8f00d46 | 104 | iconv (cd, &inmem, &inlen, &outmem, &outlen); |
3c0fb574 | 105 | umemlen = outmem - umem; |
d8f00d46 UD |
106 | if (inlen != 0) |
107 | error (EXIT_FAILURE, errno, "cannot convert buffer"); | |
a22a582e | 108 | umem[umemlen] = '\0'; |
d8f00d46 | 109 | |
bf7c04cd UD |
110 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
111 | # if _POSIX_CPUTIME == 0 | |
112 | if (sysconf (_SC_CPUTIME) < 0) | |
113 | use_clock = 0; | |
114 | else | |
115 | # endif | |
116 | /* See whether we can use the CPU clock. */ | |
117 | use_clock = clock_getcpuclockid (0, &cl) == 0; | |
73111f03 UD |
118 | #endif |
119 | ||
2f07975d UD |
120 | #ifdef DEBUG |
121 | re_set_syntax (RE_DEBUG); | |
122 | #endif | |
123 | ||
73111f03 UD |
124 | /* Run the actual tests. All tests are run in a single-byte and a |
125 | multi-byte locale. */ | |
a22a582e | 126 | result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4); |
3c0fb574 UD |
127 | result |= test_expr ("G.ran", 2, 3); |
128 | result |= test_expr ("G.\\{1\\}ran", 2, 3); | |
129 | result |= test_expr ("G.*ran", 3, 44); | |
a22a582e | 130 | result |= test_expr ("[äáàâ]", 0, 0); |
bb3f4825 UD |
131 | result |= test_expr ("Uddeborg", 2, 2); |
132 | result |= test_expr (".Uddeborg", 2, 2); | |
73111f03 UD |
133 | |
134 | /* Free the resources. */ | |
2f07975d | 135 | free (umem); |
73111f03 UD |
136 | iconv_close (cd); |
137 | free (mem); | |
138 | ||
139 | return result; | |
140 | } | |
141 | ||
142 | ||
143 | static int | |
3c0fb574 | 144 | test_expr (const char *expr, int expected, int expectedicase) |
73111f03 UD |
145 | { |
146 | int result; | |
147 | char *inmem; | |
148 | char *outmem; | |
149 | size_t inlen; | |
150 | size_t outlen; | |
151 | char *uexpr; | |
152 | ||
a22a582e PE |
153 | /* First test: search with an UTF-8 locale. */ |
154 | if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL) | |
155 | error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8"); | |
73111f03 | 156 | |
a22a582e | 157 | printf ("\nTest \"%s\" with multi-byte locale\n", expr); |
3c0fb574 | 158 | result = run_test (expr, mem, memlen, 0, expected); |
a22a582e | 159 | printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr); |
3c0fb574 | 160 | result |= run_test (expr, mem, memlen, 1, expectedicase); |
a22a582e | 161 | printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr); |
3c0fb574 | 162 | result |= run_test_backwards (expr, mem, memlen, 0, expected); |
a22a582e | 163 | printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n", |
3c0fb574 UD |
164 | expr); |
165 | result |= run_test_backwards (expr, mem, memlen, 1, expectedicase); | |
73111f03 | 166 | |
a22a582e PE |
167 | /* Second test: search with an ISO-8859-1 locale. */ |
168 | if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL) | |
169 | error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1"); | |
73111f03 UD |
170 | |
171 | inmem = (char *) expr; | |
d8f00d46 | 172 | inlen = strlen (expr); |
a22a582e | 173 | outlen = inlen; |
d8f00d46 | 174 | outmem = uexpr = alloca (outlen + 1); |
73111f03 | 175 | memset (outmem, '\0', outlen + 1); |
d8f00d46 UD |
176 | iconv (cd, &inmem, &inlen, &outmem, &outlen); |
177 | if (inlen != 0) | |
178 | error (EXIT_FAILURE, errno, "cannot convert expression"); | |
179 | ||
d8f00d46 | 180 | /* Run the tests. */ |
a22a582e | 181 | printf ("\nTest \"%s\" with 8-bit locale\n", expr); |
3c0fb574 | 182 | result |= run_test (uexpr, umem, umemlen, 0, expected); |
a22a582e | 183 | printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr); |
3c0fb574 | 184 | result |= run_test (uexpr, umem, umemlen, 1, expectedicase); |
a22a582e | 185 | printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr); |
3c0fb574 | 186 | result |= run_test_backwards (uexpr, umem, umemlen, 0, expected); |
a22a582e | 187 | printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n", |
3c0fb574 UD |
188 | expr); |
189 | result |= run_test_backwards (uexpr, umem, umemlen, 1, expectedicase); | |
d8f00d46 UD |
190 | |
191 | return result; | |
192 | } | |
193 | ||
194 | ||
195 | static int | |
3c0fb574 UD |
196 | run_test (const char *expr, const char *mem, size_t memlen, int icase, |
197 | int expected) | |
d8f00d46 | 198 | { |
bf7c04cd | 199 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
d8f00d46 UD |
200 | struct timespec start; |
201 | struct timespec finish; | |
202 | #endif | |
203 | regex_t re; | |
204 | int err; | |
205 | size_t offset; | |
206 | int cnt; | |
207 | ||
bf7c04cd | 208 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
bb3f4825 | 209 | if (use_clock && !timing) |
d8f00d46 UD |
210 | use_clock = clock_gettime (cl, &start) == 0; |
211 | #endif | |
212 | ||
3c0fb574 | 213 | err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0)); |
d8f00d46 UD |
214 | if (err != REG_NOERROR) |
215 | { | |
216 | char buf[200]; | |
217 | regerror (err, &re, buf, sizeof buf); | |
218 | error (EXIT_FAILURE, 0, "cannot compile expression: %s", buf); | |
219 | } | |
220 | ||
221 | cnt = 0; | |
222 | offset = 0; | |
223 | assert (mem[memlen] == '\0'); | |
224 | while (offset < memlen) | |
225 | { | |
226 | regmatch_t ma[1]; | |
227 | const char *sp; | |
228 | const char *ep; | |
229 | ||
230 | err = regexec (&re, mem + offset, 1, ma, 0); | |
231 | if (err == REG_NOMATCH) | |
232 | break; | |
233 | ||
234 | if (err != REG_NOERROR) | |
235 | { | |
236 | char buf[200]; | |
237 | regerror (err, &re, buf, sizeof buf); | |
238 | error (EXIT_FAILURE, 0, "cannot use expression: %s", buf); | |
239 | } | |
240 | ||
241 | assert (ma[0].rm_so >= 0); | |
242 | sp = mem + offset + ma[0].rm_so; | |
73111f03 | 243 | while (sp > mem && sp[-1] != '\n') |
d8f00d46 UD |
244 | --sp; |
245 | ||
246 | ep = mem + offset + ma[0].rm_so; | |
247 | while (*ep != '\0' && *ep != '\n') | |
248 | ++ep; | |
249 | ||
250 | printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp); | |
251 | ||
252 | offset = ep + 1 - mem; | |
253 | } | |
254 | ||
255 | regfree (&re); | |
256 | ||
bf7c04cd | 257 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
bb3f4825 | 258 | if (use_clock && !timing) |
d8f00d46 UD |
259 | { |
260 | use_clock = clock_gettime (cl, &finish) == 0; | |
261 | if (use_clock) | |
262 | { | |
263 | if (finish.tv_nsec < start.tv_nsec) | |
264 | { | |
265 | finish.tv_nsec -= start.tv_nsec - 1000000000; | |
266 | finish.tv_sec -= 1 + start.tv_sec; | |
267 | } | |
268 | else | |
269 | { | |
270 | finish.tv_nsec -= start.tv_nsec; | |
271 | finish.tv_sec -= start.tv_sec; | |
272 | } | |
273 | ||
6490d945 L |
274 | printf ("elapsed time: %jd.%09jd sec\n", |
275 | (intmax_t) finish.tv_sec, (intmax_t) finish.tv_nsec); | |
d8f00d46 UD |
276 | } |
277 | } | |
bb3f4825 UD |
278 | |
279 | if (use_clock && timing) | |
280 | { | |
281 | struct timespec mintime = { .tv_sec = 24 * 60 * 60 }; | |
282 | ||
283 | for (int i = 0; i < 10; ++i) | |
284 | { | |
285 | offset = 0; | |
286 | use_clock = clock_gettime (cl, &start) == 0; | |
287 | ||
288 | if (!use_clock) | |
289 | continue; | |
290 | ||
291 | err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0)); | |
292 | if (err != REG_NOERROR) | |
293 | continue; | |
294 | ||
295 | while (offset < memlen) | |
296 | { | |
297 | regmatch_t ma[1]; | |
298 | ||
299 | err = regexec (&re, mem + offset, 1, ma, 0); | |
300 | if (err != REG_NOERROR) | |
301 | break; | |
302 | ||
303 | offset += ma[0].rm_eo; | |
304 | } | |
305 | ||
306 | regfree (&re); | |
307 | ||
308 | use_clock = clock_gettime (cl, &finish) == 0; | |
309 | if (use_clock) | |
310 | { | |
311 | if (finish.tv_nsec < start.tv_nsec) | |
312 | { | |
313 | finish.tv_nsec -= start.tv_nsec - 1000000000; | |
314 | finish.tv_sec -= 1 + start.tv_sec; | |
315 | } | |
316 | else | |
317 | { | |
318 | finish.tv_nsec -= start.tv_nsec; | |
319 | finish.tv_sec -= start.tv_sec; | |
320 | } | |
321 | if (finish.tv_sec < mintime.tv_sec | |
322 | || (finish.tv_sec == mintime.tv_sec | |
323 | && finish.tv_nsec < mintime.tv_nsec)) | |
324 | mintime = finish; | |
325 | } | |
326 | } | |
6490d945 L |
327 | printf ("elapsed time: %jd.%09jd sec\n", |
328 | (intmax_t) mintime.tv_sec, (intmax_t) mintime.tv_nsec); | |
bb3f4825 | 329 | } |
d8f00d46 UD |
330 | #endif |
331 | ||
332 | /* Return an error if the number of matches found is not match we | |
333 | expect. */ | |
73111f03 | 334 | return cnt != expected; |
d8f00d46 | 335 | } |
3c0fb574 UD |
336 | |
337 | ||
338 | static int | |
339 | run_test_backwards (const char *expr, const char *mem, size_t memlen, | |
340 | int icase, int expected) | |
341 | { | |
bf7c04cd | 342 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
3c0fb574 UD |
343 | struct timespec start; |
344 | struct timespec finish; | |
345 | #endif | |
346 | struct re_pattern_buffer re; | |
347 | const char *err; | |
348 | size_t offset; | |
349 | int cnt; | |
350 | ||
bf7c04cd | 351 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
bb3f4825 | 352 | if (use_clock && !timing) |
3c0fb574 UD |
353 | use_clock = clock_gettime (cl, &start) == 0; |
354 | #endif | |
355 | ||
356 | re_set_syntax ((RE_SYNTAX_POSIX_BASIC & ~RE_DOT_NEWLINE) | |
357 | | RE_HAT_LISTS_NOT_NEWLINE | |
358 | | (icase ? RE_ICASE : 0)); | |
359 | ||
360 | memset (&re, 0, sizeof (re)); | |
361 | re.fastmap = malloc (256); | |
362 | if (re.fastmap == NULL) | |
363 | error (EXIT_FAILURE, errno, "cannot allocate fastmap"); | |
364 | ||
365 | err = re_compile_pattern (expr, strlen (expr), &re); | |
366 | if (err != NULL) | |
367 | error (EXIT_FAILURE, 0, "cannot compile expression: %s", err); | |
368 | ||
369 | if (re_compile_fastmap (&re)) | |
370 | error (EXIT_FAILURE, 0, "couldn't compile fastmap"); | |
371 | ||
372 | cnt = 0; | |
373 | offset = memlen; | |
374 | assert (mem[memlen] == '\0'); | |
375 | while (offset <= memlen) | |
376 | { | |
377 | int start; | |
378 | const char *sp; | |
379 | const char *ep; | |
380 | ||
381 | start = re_search (&re, mem, memlen, offset, -offset, NULL); | |
382 | if (start == -1) | |
383 | break; | |
384 | ||
385 | if (start == -2) | |
386 | error (EXIT_FAILURE, 0, "internal error in re_search"); | |
387 | ||
388 | sp = mem + start; | |
389 | while (sp > mem && sp[-1] != '\n') | |
390 | --sp; | |
391 | ||
392 | ep = mem + start; | |
393 | while (*ep != '\0' && *ep != '\n') | |
394 | ++ep; | |
395 | ||
396 | printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp); | |
397 | ||
398 | offset = sp - 1 - mem; | |
399 | } | |
400 | ||
401 | regfree (&re); | |
402 | ||
bf7c04cd | 403 | #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0 |
bb3f4825 | 404 | if (use_clock && !timing) |
3c0fb574 UD |
405 | { |
406 | use_clock = clock_gettime (cl, &finish) == 0; | |
407 | if (use_clock) | |
408 | { | |
409 | if (finish.tv_nsec < start.tv_nsec) | |
410 | { | |
411 | finish.tv_nsec -= start.tv_nsec - 1000000000; | |
412 | finish.tv_sec -= 1 + start.tv_sec; | |
413 | } | |
414 | else | |
415 | { | |
416 | finish.tv_nsec -= start.tv_nsec; | |
417 | finish.tv_sec -= start.tv_sec; | |
418 | } | |
419 | ||
6490d945 L |
420 | printf ("elapsed time: %jd.%09jd sec\n", |
421 | (intmax_t) finish.tv_sec, (intmax_t) finish.tv_nsec); | |
3c0fb574 UD |
422 | } |
423 | } | |
bb3f4825 UD |
424 | |
425 | if (use_clock && timing) | |
426 | { | |
427 | struct timespec mintime = { .tv_sec = 24 * 60 * 60 }; | |
428 | ||
429 | for (int i = 0; i < 10; ++i) | |
430 | { | |
431 | offset = memlen; | |
432 | use_clock = clock_gettime (cl, &start) == 0; | |
433 | ||
434 | if (!use_clock) | |
435 | continue; | |
436 | ||
437 | memset (&re, 0, sizeof (re)); | |
438 | re.fastmap = malloc (256); | |
439 | if (re.fastmap == NULL) | |
440 | continue; | |
441 | ||
442 | err = re_compile_pattern (expr, strlen (expr), &re); | |
443 | if (err != NULL) | |
444 | continue; | |
445 | ||
446 | if (re_compile_fastmap (&re)) | |
447 | { | |
448 | regfree (&re); | |
449 | continue; | |
450 | } | |
451 | ||
452 | while (offset <= memlen) | |
453 | { | |
454 | int start; | |
455 | const char *sp; | |
456 | ||
457 | start = re_search (&re, mem, memlen, offset, -offset, NULL); | |
458 | if (start < -1) | |
459 | break; | |
460 | ||
461 | sp = mem + start; | |
462 | while (sp > mem && sp[-1] != '\n') | |
463 | --sp; | |
464 | ||
465 | offset = sp - 1 - mem; | |
466 | } | |
467 | ||
468 | regfree (&re); | |
469 | ||
470 | use_clock = clock_gettime (cl, &finish) == 0; | |
471 | if (use_clock) | |
472 | { | |
473 | if (finish.tv_nsec < start.tv_nsec) | |
474 | { | |
475 | finish.tv_nsec -= start.tv_nsec - 1000000000; | |
476 | finish.tv_sec -= 1 + start.tv_sec; | |
477 | } | |
478 | else | |
479 | { | |
480 | finish.tv_nsec -= start.tv_nsec; | |
481 | finish.tv_sec -= start.tv_sec; | |
482 | } | |
483 | if (finish.tv_sec < mintime.tv_sec | |
484 | || (finish.tv_sec == mintime.tv_sec | |
485 | && finish.tv_nsec < mintime.tv_nsec)) | |
486 | mintime = finish; | |
487 | } | |
488 | } | |
6490d945 L |
489 | printf ("elapsed time: %jd.%09jd sec\n", |
490 | (intmax_t) mintime.tv_sec, (intmax_t) mintime.tv_nsec); | |
bb3f4825 | 491 | } |
3c0fb574 UD |
492 | #endif |
493 | ||
494 | /* Return an error if the number of matches found is not match we | |
495 | expect. */ | |
496 | return cnt != expected; | |
497 | } | |
7166d23f UD |
498 | |
499 | /* If --timing is used we will need a larger timout. */ | |
500 | #define TIMEOUT 50 | |
501 | #define CMDLINE_OPTIONS \ | |
502 | {"timing", no_argument, &timing, 1 }, | |
503 | #define TEST_FUNCTION do_test () | |
504 | #include "../test-skeleton.c" |