]>
Commit | Line | Data |
---|---|---|
78c81ab7 UD |
1 | /* Regular expression tests. |
2 | Copyright (C) 2003 Free Software Foundation, Inc. | |
3 | This file is part of the GNU C Library. | |
4 | Contributed by Jakub Jelinek <jakub@redhat.com>, 2003. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
17 | License along with the GNU C Library; if not, write to the Free | |
18 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19 | 02111-1307 USA. */ | |
20 | ||
21 | #include <sys/types.h> | |
22 | #include <mcheck.h> | |
23 | #include <regex.h> | |
24 | #include <stdio.h> | |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
27 | #include <locale.h> | |
28 | #include <getopt.h> | |
29 | ||
30 | static void | |
31 | replace_special_chars (char *str) | |
32 | { | |
33 | for (; (str = strpbrk (str, "NTSZ")) != NULL; ++str) | |
34 | switch (*str) | |
35 | { | |
36 | case 'N': *str = '\n'; break; | |
37 | case 'T': *str = '\t'; break; | |
38 | case 'S': *str = ' '; break; | |
39 | case 'Z': *str = '\0'; break; | |
40 | } | |
41 | } | |
42 | ||
43 | static void | |
44 | glibc_re_syntax (char *str) | |
45 | { | |
46 | char *p, *end = strchr (str, '\0') + 1; | |
47 | ||
48 | /* Replace [[:<:]] with \< and [[:>:]] with \>. */ | |
49 | for (p = str; (p = strstr (p, "[[:")) != NULL; ) | |
50 | if ((p[3] == '<' || p[3] == '>') && strncmp (p + 4, ":]]", 3) == 0) | |
51 | { | |
52 | p[0] = '\\'; | |
53 | p[1] = p[3]; | |
54 | memmove (p + 2, p + 7, end - p - 7); | |
55 | end -= 5; | |
56 | p += 2; | |
57 | } | |
58 | else | |
59 | p += 3; | |
60 | } | |
61 | ||
62 | static char * | |
63 | mb_replace (char *dst, const char c) | |
64 | { | |
65 | switch (c) | |
66 | { | |
67 | /* Replace a with \'a and A with \'A. */ | |
68 | case 'a': | |
69 | *dst++ = '\xc3'; | |
70 | *dst++ = '\xa1'; | |
71 | break; | |
72 | case 'A': | |
73 | *dst++ = '\xc3'; | |
74 | *dst++ = '\x81'; | |
75 | break; | |
76 | /* Replace b with \v{c} and B with \v{C}. */ | |
77 | case 'b': | |
78 | *dst++ = '\xc4'; | |
79 | *dst++ = '\x8d'; | |
80 | break; | |
81 | case 'B': | |
82 | *dst++ = '\xc4'; | |
83 | *dst++ = '\x8c'; | |
84 | break; | |
85 | /* Replace c with \v{d} and C with \v{D}. */ | |
86 | case 'c': | |
87 | *dst++ = '\xc4'; | |
88 | *dst++ = '\x8f'; | |
89 | break; | |
90 | case 'C': | |
91 | *dst++ = '\xc4'; | |
92 | *dst++ = '\x8e'; | |
93 | break; | |
94 | /* Replace d with \'e and D with \'E. */ | |
95 | case 'd': | |
96 | *dst++ = '\xc3'; | |
97 | *dst++ = '\xa9'; | |
98 | break; | |
99 | case 'D': | |
100 | *dst++ = '\xc3'; | |
101 | *dst++ = '\x89'; | |
102 | break; | |
103 | } | |
104 | return dst; | |
105 | } | |
106 | ||
107 | static char * | |
108 | mb_frob_string (const char *str, const char *letters) | |
109 | { | |
110 | char *ret, *dst; | |
111 | const char *src; | |
112 | ||
113 | if (str == NULL) | |
114 | return NULL; | |
115 | ||
116 | ret = malloc (2 * strlen (str) + 1); | |
117 | if (ret == NULL) | |
118 | return NULL; | |
119 | ||
120 | for (src = str, dst = ret; *src; ++src) | |
121 | if (strchr (letters, *src)) | |
122 | dst = mb_replace (dst, *src); | |
123 | else | |
124 | *dst++ = *src; | |
125 | *dst = '\0'; | |
126 | return ret; | |
127 | } | |
128 | ||
129 | /* Like mb_frob_string, but don't replace anything between | |
130 | [: and :], [. and .] or [= and =]. */ | |
131 | ||
132 | static char * | |
133 | mb_frob_pattern (const char *str, const char *letters) | |
134 | { | |
135 | char *ret, *dst; | |
136 | const char *src; | |
137 | int in_class = 0; | |
138 | ||
139 | if (str == NULL) | |
140 | return NULL; | |
141 | ||
142 | ret = malloc (2 * strlen (str) + 1); | |
143 | if (ret == NULL) | |
144 | return NULL; | |
145 | ||
146 | for (src = str, dst = ret; *src; ++src) | |
147 | if (!in_class && strchr (letters, *src)) | |
148 | dst = mb_replace (dst, *src); | |
149 | else | |
150 | { | |
151 | if (!in_class && *src == '[' && strchr (":.=", src[1])) | |
152 | in_class = 1; | |
153 | else if (in_class && *src == ']' && strchr (":.=", src[-1])) | |
154 | in_class = 0; | |
155 | *dst++ = *src; | |
156 | } | |
157 | *dst = '\0'; | |
158 | return ret; | |
159 | } | |
160 | ||
161 | static int | |
162 | check_match (regmatch_t *rm, int idx, const char *string, | |
163 | const char *match, const char *fail) | |
164 | { | |
165 | if (match[0] == '-' && match[1] == '\0') | |
166 | { | |
167 | if (rm[idx].rm_so == -1 && rm[idx].rm_eo == -1) | |
168 | return 0; | |
169 | printf ("%s rm[%d] unexpectedly matched\n", fail, idx); | |
170 | return 1; | |
171 | } | |
172 | ||
173 | if (rm[idx].rm_so == -1 || rm[idx].rm_eo == -1) | |
174 | { | |
175 | printf ("%s rm[%d] unexpectedly did not match\n", fail, idx); | |
176 | return 1; | |
177 | } | |
178 | ||
179 | if (match[0] == '@') | |
180 | { | |
181 | if (rm[idx].rm_so != rm[idx].rm_eo) | |
182 | { | |
183 | printf ("%s rm[%d] not empty\n", fail, idx); | |
184 | return 1; | |
185 | } | |
186 | ||
ecdcadbd | 187 | if (strncmp (string + rm[idx].rm_so, match + 1, strlen (match + 1) ?: 1)) |
78c81ab7 UD |
188 | { |
189 | printf ("%s rm[%d] not matching %s\n", fail, idx, match); | |
190 | return 1; | |
191 | } | |
192 | return 0; | |
193 | } | |
194 | ||
195 | if (rm[idx].rm_eo - rm[idx].rm_so != strlen (match) | |
196 | || strncmp (string + rm[idx].rm_so, match, | |
197 | rm[idx].rm_eo - rm[idx].rm_so)) | |
198 | { | |
199 | printf ("%s rm[%d] not matching %s\n", fail, idx, match); | |
200 | return 1; | |
201 | } | |
202 | ||
203 | return 0; | |
204 | } | |
205 | ||
206 | static int | |
207 | test (const char *pattern, int cflags, const char *string, int eflags, | |
208 | char *expect, char *matches, const char *fail) | |
209 | { | |
210 | regex_t re; | |
211 | regmatch_t rm[10]; | |
212 | int n, ret = 0; | |
213 | ||
214 | n = regcomp (&re, pattern, cflags); | |
215 | if (n != 0) | |
216 | { | |
217 | if (eflags == -1) | |
218 | { | |
219 | static struct { reg_errcode_t code; const char *name; } codes [] | |
220 | #define C(x) { REG_##x, #x } | |
221 | = { C(NOERROR), C(NOMATCH), C(BADPAT), C(ECOLLATE), | |
222 | C(ECTYPE), C(EESCAPE), C(ESUBREG), C(EBRACK), | |
223 | C(EPAREN), C(EBRACE), C(BADBR), C(ERANGE), | |
224 | C(ESPACE), C(BADRPT) }; | |
225 | ||
226 | for (int i = 0; i < sizeof (codes) / sizeof (codes[0]); ++i) | |
227 | if (n == codes[i].code) | |
228 | { | |
229 | if (strcmp (string, codes[i].name)) | |
230 | { | |
231 | printf ("%s regcomp returned REG_%s (expected REG_%s)\n", | |
232 | fail, codes[i].name, string); | |
233 | return 1; | |
234 | } | |
235 | return 0; | |
236 | } | |
237 | ||
238 | printf ("%s regcomp return value REG_%d\n", fail, n); | |
239 | return 1; | |
240 | } | |
241 | ||
242 | char buf[500]; | |
243 | regerror (n, &re, buf, sizeof (buf)); | |
244 | printf ("%s regcomp failed: %s\n", fail, buf); | |
245 | return 1; | |
246 | } | |
247 | ||
248 | if (eflags == -1) | |
249 | { | |
250 | regfree (&re); | |
251 | ||
252 | /* The test case file assumes something only guaranteed by the | |
253 | rxspencer regex implementation. Namely that for empty | |
254 | expressions regcomp() return REG_EMPTY. This is not the case | |
255 | for us and so we ignore this error. */ | |
256 | if (strcmp (string, "EMPTY") == 0) | |
257 | return 0; | |
258 | ||
259 | printf ("%s regcomp unexpectedly succeeded\n", fail); | |
260 | return 1; | |
261 | } | |
262 | ||
263 | if (regexec (&re, string, 10, rm, eflags)) | |
264 | { | |
265 | regfree (&re); | |
266 | if (expect == NULL) | |
267 | return 0; | |
268 | printf ("%s regexec failed\n", fail); | |
269 | return 1; | |
270 | } | |
271 | ||
272 | regfree (&re); | |
273 | ||
274 | if (expect == NULL) | |
275 | { | |
276 | printf ("%s regexec unexpectedly succeeded\n", fail); | |
277 | return 1; | |
278 | } | |
279 | ||
280 | if (cflags & REG_NOSUB) | |
281 | return 0; | |
282 | ||
283 | ret = check_match (rm, 0, string, expect, fail); | |
284 | if (matches == NULL) | |
285 | return ret; | |
286 | ||
287 | for (n = 1; ret == 0 && n < 10; ++n) | |
288 | { | |
289 | char *p = NULL; | |
290 | ||
291 | if (matches) | |
292 | { | |
293 | p = strchr (matches, ','); | |
294 | if (p != NULL) | |
295 | *p = '\0'; | |
296 | } | |
297 | ret = check_match (rm, n, string, matches ?: "-", fail); | |
298 | if (p) | |
299 | { | |
300 | *p = ','; | |
301 | matches = p + 1; | |
302 | } | |
303 | else | |
304 | matches = NULL; | |
305 | } | |
306 | ||
307 | return ret; | |
308 | } | |
309 | ||
310 | static int | |
311 | mb_test (const char *pattern, int cflags, const char *string, int eflags, | |
312 | char *expect, const char *matches, const char *letters, | |
313 | const char *fail) | |
314 | { | |
315 | char *pattern_mb = mb_frob_pattern (pattern, letters); | |
316 | const char *string_mb | |
317 | = eflags == -1 ? string : mb_frob_string (string, letters); | |
318 | char *expect_mb = mb_frob_string (expect, letters); | |
319 | char *matches_mb = mb_frob_string (matches, letters); | |
320 | int ret = 0; | |
321 | ||
322 | if (!pattern_mb || !string_mb | |
323 | || (expect && !expect_mb) || (matches && !matches_mb)) | |
324 | { | |
325 | printf ("%s %m", fail); | |
326 | ret = 1; | |
327 | } | |
328 | else | |
329 | ret = test (pattern_mb, cflags, string_mb, eflags, expect_mb, | |
330 | matches_mb, fail); | |
331 | ||
332 | free (matches_mb); | |
333 | free (expect_mb); | |
334 | if (string_mb != string) | |
335 | free ((char *) string_mb); | |
336 | free (pattern_mb); | |
337 | return ret; | |
338 | } | |
339 | ||
340 | static int | |
341 | mb_tests (const char *pattern, int cflags, const char *string, int eflags, | |
342 | char *expect, const char *matches) | |
343 | { | |
344 | int ret = 0; | |
345 | int i; | |
346 | char letters[9], fail[20]; | |
347 | ||
348 | /* The tests aren't supposed to work with xdigit, since a-dA-D are | |
349 | hex digits while \'a \'A \v{c}\v{C}\v{d}\v{D}\'e \'E are not. */ | |
350 | if (strstr (pattern, "[:xdigit:]")) | |
351 | return 0; | |
352 | ||
353 | for (i = 1; i < 16; ++i) | |
354 | { | |
355 | char *p = letters; | |
356 | if (i & 1) | |
357 | *p++ = 'a', *p++ = 'A'; | |
358 | if (i & 2) | |
359 | *p++ = 'b', *p++ = 'B'; | |
360 | if (i & 4) | |
361 | *p++ = 'c', *p++ = 'C'; | |
362 | if (i & 8) | |
363 | *p++ = 'd', *p++ = 'D'; | |
364 | *p++ = '\0'; | |
365 | sprintf (fail, "UTF-8 %s FAIL", letters); | |
366 | ret |= mb_test (pattern, cflags, string, eflags, expect, matches, | |
367 | letters, fail); | |
368 | } | |
369 | return ret; | |
370 | } | |
371 | ||
372 | int | |
373 | main (int argc, char **argv) | |
374 | { | |
375 | int ret = 0; | |
376 | char *line = NULL; | |
377 | size_t line_len = 0; | |
378 | ssize_t len; | |
379 | FILE *f; | |
380 | static int test_utf8 = 0; | |
381 | static const struct option options[] = | |
382 | { | |
383 | {"utf8", no_argument, &test_utf8, 1}, | |
384 | {NULL, 0, NULL, 0 } | |
385 | }; | |
386 | ||
25ce4c6b | 387 | while (getopt_long (argc, argv, "", options, NULL) >= 0); |
78c81ab7 UD |
388 | |
389 | if (optind + 1 != argc) | |
390 | { | |
391 | fprintf (stderr, "Missing test filename\n"); | |
392 | return 1; | |
393 | } | |
394 | ||
395 | f = fopen (argv[optind], "r"); | |
396 | if (f == NULL) | |
397 | { | |
ecdcadbd | 398 | fprintf (stderr, "Couldn't open %s\n", argv[optind]); |
78c81ab7 UD |
399 | return 1; |
400 | } | |
401 | ||
402 | while ((len = getline (&line, &line_len, f)) > 0) | |
403 | { | |
404 | char *pattern, *flagstr, *string, *expect, *matches, *p; | |
405 | int cflags = REG_EXTENDED, eflags = 0, try_bre_ere = 0; | |
406 | ||
407 | if (line[len - 1] == '\n') | |
408 | line[len - 1] = '\0'; | |
409 | ||
410 | /* Skip comments and empty lines. */ | |
411 | if (*line == '#' || *line == '\0') | |
412 | continue; | |
413 | ||
414 | puts (line); | |
415 | fflush (stdout); | |
416 | ||
417 | pattern = strtok (line, "\t"); | |
418 | if (pattern == NULL) | |
419 | continue; | |
420 | ||
421 | if (strcmp (pattern, "\"\"") == 0) | |
422 | pattern += 2; | |
423 | ||
424 | flagstr = strtok (NULL, "\t"); | |
425 | if (flagstr == NULL) | |
426 | continue; | |
427 | ||
428 | string = strtok (NULL, "\t"); | |
429 | if (string == NULL) | |
430 | continue; | |
431 | ||
432 | if (strcmp (string, "\"\"") == 0) | |
433 | string += 2; | |
434 | ||
435 | for (p = flagstr; *p; ++p) | |
436 | switch (*p) | |
437 | { | |
438 | case '-': | |
439 | break; | |
440 | case 'b': | |
441 | cflags &= ~REG_EXTENDED; | |
442 | break; | |
443 | case '&': | |
444 | try_bre_ere = 1; | |
445 | break; | |
446 | case 'C': | |
447 | eflags = -1; | |
448 | break; | |
449 | case 'i': | |
450 | cflags |= REG_ICASE; | |
451 | break; | |
452 | case 's': | |
453 | cflags |= REG_NOSUB; | |
454 | break; | |
455 | case 'n': | |
456 | cflags |= REG_NEWLINE; | |
457 | break; | |
458 | case '^': | |
459 | eflags |= REG_NOTBOL; | |
460 | break; | |
461 | case '$': | |
462 | eflags |= REG_NOTEOL; | |
463 | break; | |
464 | case 'm': | |
465 | case 'p': | |
466 | case '#': | |
467 | /* Not supported. */ | |
468 | flagstr = NULL; | |
469 | break; | |
470 | } | |
471 | ||
472 | if (flagstr == NULL) | |
473 | continue; | |
474 | ||
475 | replace_special_chars (pattern); | |
476 | glibc_re_syntax (pattern); | |
477 | if (eflags != -1) | |
478 | replace_special_chars (string); | |
479 | ||
480 | expect = strtok (NULL, "\t"); | |
481 | matches = NULL; | |
482 | if (expect != NULL) | |
483 | { | |
484 | replace_special_chars (expect); | |
485 | matches = strtok (NULL, "\t"); | |
486 | if (matches != NULL) | |
487 | replace_special_chars (matches); | |
488 | } | |
489 | ||
490 | setlocale (LC_ALL, "C"); | |
491 | if (test (pattern, cflags, string, eflags, expect, matches, "FAIL") | |
492 | || (try_bre_ere | |
493 | && test (pattern, cflags & ~REG_EXTENDED, string, eflags, | |
494 | expect, matches, "FAIL"))) | |
495 | ret = 1; | |
496 | else if (test_utf8) | |
497 | { | |
498 | setlocale (LC_ALL, "cs_CZ.UTF-8"); | |
499 | if (test (pattern, cflags, string, eflags, expect, matches, | |
500 | "UTF-8 FAIL") | |
501 | || (try_bre_ere | |
502 | && test (pattern, cflags & ~REG_EXTENDED, string, eflags, | |
503 | expect, matches, "UTF-8 FAIL"))) | |
504 | ret = 1; | |
505 | else if (mb_tests (pattern, cflags, string, eflags, expect, matches) | |
506 | || (try_bre_ere | |
507 | && mb_tests (pattern, cflags & ~REG_EXTENDED, string, | |
508 | eflags, expect, matches))) | |
509 | ret = 1; | |
510 | } | |
511 | } | |
512 | ||
513 | fclose (f); | |
514 | return ret; | |
515 | } |