]> git.ipfire.org Git - thirdparty/glibc.git/blob - posix/tst-regex.c
posix: remove some iso-8859-encoded characters
[thirdparty/glibc.git] / posix / tst-regex.c
1 /* Copyright (C) 2001-2021 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
17
18 #include <assert.h>
19 #include <errno.h>
20 #include <error.h>
21 #include <fcntl.h>
22 #include <getopt.h>
23 #include <iconv.h>
24 #include <locale.h>
25 #include <mcheck.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <time.h>
31 #include <unistd.h>
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <regex.h>
35
36
37 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
38 static clockid_t cl;
39 static int use_clock;
40 #endif
41 static iconv_t cd;
42 static char *mem;
43 static char *umem;
44 static size_t memlen;
45 static size_t umemlen;
46 static int timing;
47
48 static int test_expr (const char *expr, int expected, int expectedicase);
49 static int run_test (const char *expr, const char *mem, size_t memlen,
50 int icase, int expected);
51 static int run_test_backwards (const char *expr, const char *mem,
52 size_t memlen, int icase, int expected);
53
54
55 static int
56 do_test (void)
57 {
58 const char *file;
59 int fd;
60 struct stat st;
61 int result;
62 char *inmem;
63 char *outmem;
64 size_t inlen;
65 size_t outlen;
66
67 mtrace ();
68
69 /* Make the content of the file available in memory. */
70 file = "./tst-regex.input";
71 fd = open (file, O_RDONLY);
72 if (fd == -1)
73 error (EXIT_FAILURE, errno, "cannot open %s", basename (file));
74
75 if (fstat (fd, &st) != 0)
76 error (EXIT_FAILURE, errno, "cannot stat %s", basename (file));
77 memlen = st.st_size;
78
79 mem = (char *) malloc (memlen + 1);
80 if (mem == NULL)
81 error (EXIT_FAILURE, errno, "while allocating buffer");
82
83 if ((size_t) read (fd, mem, memlen) != memlen)
84 error (EXIT_FAILURE, 0, "cannot read entire file");
85 mem[memlen] = '\0';
86
87 close (fd);
88
89 /* We have to convert a few things from UTF-8 to Latin-1. */
90 cd = iconv_open ("ISO-8859-1", "UTF-8");
91 if (cd == (iconv_t) -1)
92 error (EXIT_FAILURE, errno, "cannot get conversion descriptor");
93
94 /* For the second test we have to convert the file content to Latin-1.
95 This cannot grow the data. */
96 umem = (char *) malloc (memlen + 1);
97 if (umem == NULL)
98 error (EXIT_FAILURE, errno, "while allocating buffer");
99
100 inmem = mem;
101 inlen = memlen;
102 outmem = umem;
103 outlen = memlen;
104 iconv (cd, &inmem, &inlen, &outmem, &outlen);
105 umemlen = outmem - umem;
106 if (inlen != 0)
107 error (EXIT_FAILURE, errno, "cannot convert buffer");
108 umem[umemlen] = '\0';
109
110 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
111 # if _POSIX_CPUTIME == 0
112 if (sysconf (_SC_CPUTIME) < 0)
113 use_clock = 0;
114 else
115 # endif
116 /* See whether we can use the CPU clock. */
117 use_clock = clock_getcpuclockid (0, &cl) == 0;
118 #endif
119
120 #ifdef DEBUG
121 re_set_syntax (RE_DEBUG);
122 #endif
123
124 /* Run the actual tests. All tests are run in a single-byte and a
125 multi-byte locale. */
126 result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4);
127 result |= test_expr ("G.ran", 2, 3);
128 result |= test_expr ("G.\\{1\\}ran", 2, 3);
129 result |= test_expr ("G.*ran", 3, 44);
130 result |= test_expr ("[äáàâ]", 0, 0);
131 result |= test_expr ("Uddeborg", 2, 2);
132 result |= test_expr (".Uddeborg", 2, 2);
133
134 /* Free the resources. */
135 free (umem);
136 iconv_close (cd);
137 free (mem);
138
139 return result;
140 }
141
142
143 static int
144 test_expr (const char *expr, int expected, int expectedicase)
145 {
146 int result;
147 char *inmem;
148 char *outmem;
149 size_t inlen;
150 size_t outlen;
151 char *uexpr;
152
153 /* First test: search with an UTF-8 locale. */
154 if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
155 error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8");
156
157 printf ("\nTest \"%s\" with multi-byte locale\n", expr);
158 result = run_test (expr, mem, memlen, 0, expected);
159 printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
160 result |= run_test (expr, mem, memlen, 1, expectedicase);
161 printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
162 result |= run_test_backwards (expr, mem, memlen, 0, expected);
163 printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
164 expr);
165 result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
166
167 /* Second test: search with an ISO-8859-1 locale. */
168 if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
169 error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1");
170
171 inmem = (char *) expr;
172 inlen = strlen (expr);
173 outlen = inlen;
174 outmem = uexpr = alloca (outlen + 1);
175 memset (outmem, '\0', outlen + 1);
176 iconv (cd, &inmem, &inlen, &outmem, &outlen);
177 if (inlen != 0)
178 error (EXIT_FAILURE, errno, "cannot convert expression");
179
180 /* Run the tests. */
181 printf ("\nTest \"%s\" with 8-bit locale\n", expr);
182 result |= run_test (uexpr, umem, umemlen, 0, expected);
183 printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr);
184 result |= run_test (uexpr, umem, umemlen, 1, expectedicase);
185 printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr);
186 result |= run_test_backwards (uexpr, umem, umemlen, 0, expected);
187 printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n",
188 expr);
189 result |= run_test_backwards (uexpr, umem, umemlen, 1, expectedicase);
190
191 return result;
192 }
193
194
195 static int
196 run_test (const char *expr, const char *mem, size_t memlen, int icase,
197 int expected)
198 {
199 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
200 struct timespec start;
201 struct timespec finish;
202 #endif
203 regex_t re;
204 int err;
205 size_t offset;
206 int cnt;
207
208 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
209 if (use_clock && !timing)
210 use_clock = clock_gettime (cl, &start) == 0;
211 #endif
212
213 err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0));
214 if (err != REG_NOERROR)
215 {
216 char buf[200];
217 regerror (err, &re, buf, sizeof buf);
218 error (EXIT_FAILURE, 0, "cannot compile expression: %s", buf);
219 }
220
221 cnt = 0;
222 offset = 0;
223 assert (mem[memlen] == '\0');
224 while (offset < memlen)
225 {
226 regmatch_t ma[1];
227 const char *sp;
228 const char *ep;
229
230 err = regexec (&re, mem + offset, 1, ma, 0);
231 if (err == REG_NOMATCH)
232 break;
233
234 if (err != REG_NOERROR)
235 {
236 char buf[200];
237 regerror (err, &re, buf, sizeof buf);
238 error (EXIT_FAILURE, 0, "cannot use expression: %s", buf);
239 }
240
241 assert (ma[0].rm_so >= 0);
242 sp = mem + offset + ma[0].rm_so;
243 while (sp > mem && sp[-1] != '\n')
244 --sp;
245
246 ep = mem + offset + ma[0].rm_so;
247 while (*ep != '\0' && *ep != '\n')
248 ++ep;
249
250 printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
251
252 offset = ep + 1 - mem;
253 }
254
255 regfree (&re);
256
257 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
258 if (use_clock && !timing)
259 {
260 use_clock = clock_gettime (cl, &finish) == 0;
261 if (use_clock)
262 {
263 if (finish.tv_nsec < start.tv_nsec)
264 {
265 finish.tv_nsec -= start.tv_nsec - 1000000000;
266 finish.tv_sec -= 1 + start.tv_sec;
267 }
268 else
269 {
270 finish.tv_nsec -= start.tv_nsec;
271 finish.tv_sec -= start.tv_sec;
272 }
273
274 printf ("elapsed time: %jd.%09jd sec\n",
275 (intmax_t) finish.tv_sec, (intmax_t) finish.tv_nsec);
276 }
277 }
278
279 if (use_clock && timing)
280 {
281 struct timespec mintime = { .tv_sec = 24 * 60 * 60 };
282
283 for (int i = 0; i < 10; ++i)
284 {
285 offset = 0;
286 use_clock = clock_gettime (cl, &start) == 0;
287
288 if (!use_clock)
289 continue;
290
291 err = regcomp (&re, expr, REG_NEWLINE | (icase ? REG_ICASE : 0));
292 if (err != REG_NOERROR)
293 continue;
294
295 while (offset < memlen)
296 {
297 regmatch_t ma[1];
298
299 err = regexec (&re, mem + offset, 1, ma, 0);
300 if (err != REG_NOERROR)
301 break;
302
303 offset += ma[0].rm_eo;
304 }
305
306 regfree (&re);
307
308 use_clock = clock_gettime (cl, &finish) == 0;
309 if (use_clock)
310 {
311 if (finish.tv_nsec < start.tv_nsec)
312 {
313 finish.tv_nsec -= start.tv_nsec - 1000000000;
314 finish.tv_sec -= 1 + start.tv_sec;
315 }
316 else
317 {
318 finish.tv_nsec -= start.tv_nsec;
319 finish.tv_sec -= start.tv_sec;
320 }
321 if (finish.tv_sec < mintime.tv_sec
322 || (finish.tv_sec == mintime.tv_sec
323 && finish.tv_nsec < mintime.tv_nsec))
324 mintime = finish;
325 }
326 }
327 printf ("elapsed time: %jd.%09jd sec\n",
328 (intmax_t) mintime.tv_sec, (intmax_t) mintime.tv_nsec);
329 }
330 #endif
331
332 /* Return an error if the number of matches found is not match we
333 expect. */
334 return cnt != expected;
335 }
336
337
338 static int
339 run_test_backwards (const char *expr, const char *mem, size_t memlen,
340 int icase, int expected)
341 {
342 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
343 struct timespec start;
344 struct timespec finish;
345 #endif
346 struct re_pattern_buffer re;
347 const char *err;
348 size_t offset;
349 int cnt;
350
351 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
352 if (use_clock && !timing)
353 use_clock = clock_gettime (cl, &start) == 0;
354 #endif
355
356 re_set_syntax ((RE_SYNTAX_POSIX_BASIC & ~RE_DOT_NEWLINE)
357 | RE_HAT_LISTS_NOT_NEWLINE
358 | (icase ? RE_ICASE : 0));
359
360 memset (&re, 0, sizeof (re));
361 re.fastmap = malloc (256);
362 if (re.fastmap == NULL)
363 error (EXIT_FAILURE, errno, "cannot allocate fastmap");
364
365 err = re_compile_pattern (expr, strlen (expr), &re);
366 if (err != NULL)
367 error (EXIT_FAILURE, 0, "cannot compile expression: %s", err);
368
369 if (re_compile_fastmap (&re))
370 error (EXIT_FAILURE, 0, "couldn't compile fastmap");
371
372 cnt = 0;
373 offset = memlen;
374 assert (mem[memlen] == '\0');
375 while (offset <= memlen)
376 {
377 int start;
378 const char *sp;
379 const char *ep;
380
381 start = re_search (&re, mem, memlen, offset, -offset, NULL);
382 if (start == -1)
383 break;
384
385 if (start == -2)
386 error (EXIT_FAILURE, 0, "internal error in re_search");
387
388 sp = mem + start;
389 while (sp > mem && sp[-1] != '\n')
390 --sp;
391
392 ep = mem + start;
393 while (*ep != '\0' && *ep != '\n')
394 ++ep;
395
396 printf ("match %d: \"%.*s\"\n", ++cnt, (int) (ep - sp), sp);
397
398 offset = sp - 1 - mem;
399 }
400
401 regfree (&re);
402
403 #if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
404 if (use_clock && !timing)
405 {
406 use_clock = clock_gettime (cl, &finish) == 0;
407 if (use_clock)
408 {
409 if (finish.tv_nsec < start.tv_nsec)
410 {
411 finish.tv_nsec -= start.tv_nsec - 1000000000;
412 finish.tv_sec -= 1 + start.tv_sec;
413 }
414 else
415 {
416 finish.tv_nsec -= start.tv_nsec;
417 finish.tv_sec -= start.tv_sec;
418 }
419
420 printf ("elapsed time: %jd.%09jd sec\n",
421 (intmax_t) finish.tv_sec, (intmax_t) finish.tv_nsec);
422 }
423 }
424
425 if (use_clock && timing)
426 {
427 struct timespec mintime = { .tv_sec = 24 * 60 * 60 };
428
429 for (int i = 0; i < 10; ++i)
430 {
431 offset = memlen;
432 use_clock = clock_gettime (cl, &start) == 0;
433
434 if (!use_clock)
435 continue;
436
437 memset (&re, 0, sizeof (re));
438 re.fastmap = malloc (256);
439 if (re.fastmap == NULL)
440 continue;
441
442 err = re_compile_pattern (expr, strlen (expr), &re);
443 if (err != NULL)
444 continue;
445
446 if (re_compile_fastmap (&re))
447 {
448 regfree (&re);
449 continue;
450 }
451
452 while (offset <= memlen)
453 {
454 int start;
455 const char *sp;
456
457 start = re_search (&re, mem, memlen, offset, -offset, NULL);
458 if (start < -1)
459 break;
460
461 sp = mem + start;
462 while (sp > mem && sp[-1] != '\n')
463 --sp;
464
465 offset = sp - 1 - mem;
466 }
467
468 regfree (&re);
469
470 use_clock = clock_gettime (cl, &finish) == 0;
471 if (use_clock)
472 {
473 if (finish.tv_nsec < start.tv_nsec)
474 {
475 finish.tv_nsec -= start.tv_nsec - 1000000000;
476 finish.tv_sec -= 1 + start.tv_sec;
477 }
478 else
479 {
480 finish.tv_nsec -= start.tv_nsec;
481 finish.tv_sec -= start.tv_sec;
482 }
483 if (finish.tv_sec < mintime.tv_sec
484 || (finish.tv_sec == mintime.tv_sec
485 && finish.tv_nsec < mintime.tv_nsec))
486 mintime = finish;
487 }
488 }
489 printf ("elapsed time: %jd.%09jd sec\n",
490 (intmax_t) mintime.tv_sec, (intmax_t) mintime.tv_nsec);
491 }
492 #endif
493
494 /* Return an error if the number of matches found is not match we
495 expect. */
496 return cnt != expected;
497 }
498
499 /* If --timing is used we will need a larger timout. */
500 #define TIMEOUT 50
501 #define CMDLINE_OPTIONS \
502 {"timing", no_argument, &timing, 1 },
503 #define TEST_FUNCTION do_test ()
504 #include "../test-skeleton.c"