]>
Commit | Line | Data |
---|---|---|
6c805a2b UD |
1 | ; |
2 | ; | |
3 | ; this file contains a script of tests to run through regress.exe | |
4 | ; | |
5 | ; comments start with a semicolon and proceed to the end of the line | |
6 | ; | |
7 | ; changes to regular expression compile flags start with a "-" as the first | |
8 | ; non-whitespace character and consist of a list of the printable names | |
9 | ; of the flags, for example "match_default" | |
10 | ; | |
11 | ; Other lines contain a test to perform using the current flag status | |
12 | ; the first token contains the expression to compile, the second the string | |
13 | ; to match it against. If the second string is "!" then the expression should | |
14 | ; not compile, that is the first string is an invalid regular expression. | |
15 | ; This is then followed by a list of integers that specify what should match, | |
16 | ; each pair represents the starting and ending positions of a subexpression | |
17 | ; starting with the zeroth subexpression (the whole match). | |
18 | ; A value of -1 indicates that the subexpression should not take part in the | |
19 | ; match at all, if the first value is -1 then no part of the expression should | |
20 | ; match the string. | |
21 | ; | |
22 | ; Tests taken from BOOST testsuite and adapted to glibc regex. | |
23 | ; | |
24 | ; Boost Software License - Version 1.0 - August 17th, 2003 | |
25 | ; | |
26 | ; Permission is hereby granted, free of charge, to any person or organization | |
27 | ; obtaining a copy of the software and accompanying documentation covered by | |
28 | ; this license (the "Software") to use, reproduce, display, distribute, | |
29 | ; execute, and transmit the Software, and to prepare derivative works of the | |
30 | ; Software, and to permit third-parties to whom the Software is furnished to | |
31 | ; do so, all subject to the following: | |
32 | ; | |
33 | ; The copyright notices in the Software and this entire statement, including | |
34 | ; the above license grant, this restriction and the following disclaimer, | |
35 | ; must be included in all copies of the Software, in whole or in part, and | |
36 | ; all derivative works of the Software, unless such copies or derivative | |
37 | ; works are solely in the form of machine-executable object code generated by | |
38 | ; a source language processor. | |
39 | ; | |
40 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
41 | ; IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
42 | ; FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT | |
43 | ; SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE | |
44 | ; FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, | |
45 | ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER | |
46 | ; DEALINGS IN THE SOFTWARE. | |
47 | ; | |
48 | ||
49 | - match_default normal REG_EXTENDED | |
50 | ||
51 | ; | |
52 | ; try some really simple literals: | |
53 | a a 0 1 | |
54 | Z Z 0 1 | |
55 | Z aaa -1 -1 | |
56 | Z xxxxZZxxx 4 5 | |
57 | ||
58 | ; and some simple brackets: | |
59 | (a) zzzaazz 3 4 3 4 | |
60 | () zzz 0 0 0 0 | |
61 | () "" 0 0 0 0 | |
62 | ( ! | |
63 | ) ) 0 1 | |
64 | (aa ! | |
65 | aa) baa)b 1 4 | |
66 | a b -1 -1 | |
67 | \(\) () 0 2 | |
68 | \(a\) (a) 0 3 | |
69 | \() () 0 2 | |
70 | (\) ! | |
71 | p(a)rameter ABCparameterXYZ 3 12 4 5 | |
72 | [pq](a)rameter ABCparameterXYZ 3 12 4 5 | |
73 | ||
74 | ; now try escaped brackets: | |
75 | - match_default bk_parens REG_BASIC | |
76 | \(a\) zzzaazz 3 4 3 4 | |
77 | \(\) zzz 0 0 0 0 | |
78 | \(\) "" 0 0 0 0 | |
79 | \( ! | |
80 | \) ! | |
81 | \(aa ! | |
82 | aa\) ! | |
83 | () () 0 2 | |
84 | (a) (a) 0 3 | |
85 | (\) ! | |
86 | \() ! | |
87 | ||
88 | ; now move on to "." wildcards | |
89 | - match_default normal REG_EXTENDED REG_STARTEND | |
90 | . a 0 1 | |
91 | . \n 0 1 | |
92 | . \r 0 1 | |
93 | . \0 0 1 | |
94 | ||
95 | ; | |
7f0d9e61 | 96 | ; now move on to the repetition ops, |
6c805a2b UD |
97 | ; starting with operator * |
98 | - match_default normal REG_EXTENDED | |
99 | a* b 0 0 | |
100 | ab* a 0 1 | |
101 | ab* ab 0 2 | |
102 | ab* sssabbbbbbsss 3 10 | |
103 | ab*c* a 0 1 | |
104 | ab*c* abbb 0 4 | |
105 | ab*c* accc 0 4 | |
106 | ab*c* abbcc 0 5 | |
107 | *a ! | |
108 | \<* ! | |
109 | \>* ! | |
110 | \n* \n\n 0 2 | |
111 | \** ** 0 2 | |
112 | \* * 0 1 | |
113 | ||
114 | ; now try operator + | |
115 | ab+ a -1 -1 | |
116 | ab+ ab 0 2 | |
117 | ab+ sssabbbbbbsss 3 10 | |
118 | ab+c+ a -1 -1 | |
119 | ab+c+ abbb -1 -1 | |
120 | ab+c+ accc -1 -1 | |
121 | ab+c+ abbcc 0 5 | |
122 | +a ! | |
123 | \<+ ! | |
124 | \>+ ! | |
125 | \n+ \n\n 0 2 | |
126 | \+ + 0 1 | |
127 | \+ ++ 0 1 | |
128 | \++ ++ 0 2 | |
129 | ||
130 | ; now try operator ? | |
131 | - match_default normal REG_EXTENDED | |
132 | a? b 0 0 | |
133 | ab? a 0 1 | |
134 | ab? ab 0 2 | |
135 | ab? sssabbbbbbsss 3 5 | |
136 | ab?c? a 0 1 | |
137 | ab?c? abbb 0 2 | |
138 | ab?c? accc 0 2 | |
139 | ab?c? abcc 0 3 | |
140 | ?a ! | |
141 | \<? ! | |
142 | \>? ! | |
143 | \n? \n\n 0 1 | |
144 | \? ? 0 1 | |
145 | \? ?? 0 1 | |
146 | \?? ?? 0 1 | |
147 | ||
148 | ; now try operator {} | |
149 | - match_default normal REG_EXTENDED | |
150 | a{2} a -1 -1 | |
151 | a{2} aa 0 2 | |
152 | a{2} aaa 0 2 | |
153 | a{2,} a -1 -1 | |
154 | a{2,} aa 0 2 | |
155 | a{2,} aaaaa 0 5 | |
156 | a{2,4} a -1 -1 | |
157 | a{2,4} aa 0 2 | |
158 | a{2,4} aaa 0 3 | |
159 | a{2,4} aaaa 0 4 | |
160 | a{2,4} aaaaa 0 4 | |
161 | a{} ! | |
162 | a{2 ! | |
163 | a} a} 0 2 | |
164 | \{\} {} 0 2 | |
165 | ||
166 | - match_default normal REG_BASIC | |
167 | a\{2\} a -1 -1 | |
168 | a\{2\} aa 0 2 | |
169 | a\{2\} aaa 0 2 | |
170 | a\{2,\} a -1 -1 | |
171 | a\{2,\} aa 0 2 | |
172 | a\{2,\} aaaaa 0 5 | |
173 | a\{2,4\} a -1 -1 | |
174 | a\{2,4\} aa 0 2 | |
175 | a\{2,4\} aaa 0 3 | |
176 | a\{2,4\} aaaa 0 4 | |
177 | a\{2,4\} aaaaa 0 4 | |
178 | {} {} 0 2 | |
179 | ||
180 | ; now test the alternation operator | | |
181 | - match_default normal REG_EXTENDED | |
182 | a|b a 0 1 | |
183 | a|b b 0 1 | |
184 | a(b|c) ab 0 2 1 2 | |
185 | a(b|c) ac 0 2 1 2 | |
186 | a(b|c) ad -1 -1 -1 -1 | |
187 | a\| a| 0 2 | |
188 | ||
189 | ; now test the set operator [] | |
190 | - match_default normal REG_EXTENDED | |
191 | ; try some literals first | |
192 | [abc] a 0 1 | |
193 | [abc] b 0 1 | |
194 | [abc] c 0 1 | |
195 | [abc] d -1 -1 | |
196 | [^bcd] a 0 1 | |
197 | [^bcd] b -1 -1 | |
198 | [^bcd] d -1 -1 | |
199 | [^bcd] e 0 1 | |
200 | a[b]c abc 0 3 | |
201 | a[ab]c abc 0 3 | |
202 | a[^ab]c adc 0 3 | |
203 | a[]b]c a]c 0 3 | |
204 | a[[b]c a[c 0 3 | |
205 | a[-b]c a-c 0 3 | |
206 | a[^]b]c adc 0 3 | |
207 | a[^-b]c adc 0 3 | |
208 | a[b-]c a-c 0 3 | |
209 | a[b ! | |
210 | a[] ! | |
211 | ||
212 | ; then some ranges | |
213 | [b-e] a -1 -1 | |
214 | [b-e] b 0 1 | |
215 | [b-e] e 0 1 | |
216 | [b-e] f -1 -1 | |
217 | [^b-e] a 0 1 | |
218 | [^b-e] b -1 -1 | |
219 | [^b-e] e -1 -1 | |
220 | [^b-e] f 0 1 | |
221 | a[1-3]c a2c 0 3 | |
222 | a[3-1]c ! | |
223 | a[1-3-5]c ! | |
224 | a[1- ! | |
225 | ||
226 | ; and some classes | |
227 | a[[:alpha:]]c abc 0 3 | |
228 | a[[:unknown:]]c ! | |
229 | a[[: ! | |
230 | a[[:alpha ! | |
231 | a[[:alpha:] ! | |
232 | a[[:alpha,:] ! | |
233 | a[[:]:]]b ! | |
234 | a[[:-:]]b ! | |
235 | a[[:alph:]] ! | |
236 | a[[:alphabet:]] ! | |
237 | [[:alnum:]]+ -%@a0X_- 3 6 | |
238 | [[:alpha:]]+ -%@aX_0- 3 5 | |
239 | [[:blank:]]+ "a \tb" 1 4 | |
240 | [[:cntrl:]]+ a\n\tb 1 3 | |
241 | [[:digit:]]+ a019b 1 4 | |
242 | [[:graph:]]+ " a%b " 1 4 | |
243 | [[:lower:]]+ AabC 1 3 | |
244 | ; This test fails with STLPort, disable for now as this is a corner case anyway... | |
245 | ;[[:print:]]+ "\na b\n" 1 4 | |
246 | [[:punct:]]+ " %-&\t" 1 4 | |
247 | [[:space:]]+ "a \n\t\rb" 1 5 | |
248 | [[:upper:]]+ aBCd 1 3 | |
249 | [[:xdigit:]]+ p0f3Cx 1 5 | |
250 | ||
251 | ; now test flag settings: | |
252 | - escape_in_lists REG_NO_POSIX_TEST | |
253 | [\n] \n 0 1 | |
254 | - REG_NO_POSIX_TEST | |
255 | ||
256 | ; line anchors | |
257 | - match_default normal REG_EXTENDED | |
258 | ^ab ab 0 2 | |
259 | ^ab xxabxx -1 -1 | |
260 | ab$ ab 0 2 | |
261 | ab$ abxx -1 -1 | |
262 | - match_default match_not_bol match_not_eol normal REG_EXTENDED REG_NOTBOL REG_NOTEOL | |
263 | ^ab ab -1 -1 | |
264 | ^ab xxabxx -1 -1 | |
265 | ab$ ab -1 -1 | |
266 | ab$ abxx -1 -1 | |
267 | ||
268 | ; back references | |
269 | - match_default normal REG_PERL | |
270 | a(b)\2c ! | |
271 | a(b\1)c ! | |
272 | a(b*)c\1d abbcbbd 0 7 1 3 | |
273 | a(b*)c\1d abbcbd -1 -1 | |
274 | a(b*)c\1d abbcbbbd -1 -1 | |
275 | ^(.)\1 abc -1 -1 | |
276 | a([bc])\1d abcdabbd 4 8 5 6 | |
277 | ; strictly speaking this is at best ambiguous, at worst wrong, this is what most | |
7f0d9e61 | 278 | ; re implementations will match though. |
6c805a2b UD |
279 | a(([bc])\2)*d abbccd 0 6 3 5 3 4 |
280 | ||
281 | a(([bc])\2)*d abbcbd -1 -1 | |
282 | a((b)*\2)*d abbbd 0 5 1 4 2 3 | |
283 | ; perl only: | |
284 | (ab*)[ab]*\1 ababaaa 0 7 0 1 | |
285 | (a)\1bcd aabcd 0 5 0 1 | |
286 | (a)\1bc*d aabcd 0 5 0 1 | |
287 | (a)\1bc*d aabd 0 4 0 1 | |
288 | (a)\1bc*d aabcccd 0 7 0 1 | |
289 | (a)\1bc*[ce]d aabcccd 0 7 0 1 | |
290 | ^(a)\1b(c)*cd$ aabcccd 0 7 0 1 4 5 | |
291 | ||
292 | ; posix only: | |
293 | - match_default extended REG_EXTENDED | |
294 | (ab*)[ab]*\1 ababaaa 0 7 0 1 | |
295 | ||
296 | ; | |
297 | ; word operators: | |
298 | \w a 0 1 | |
299 | \w z 0 1 | |
300 | \w A 0 1 | |
301 | \w Z 0 1 | |
302 | \w _ 0 1 | |
303 | \w } -1 -1 | |
304 | \w ` -1 -1 | |
305 | \w [ -1 -1 | |
306 | \w @ -1 -1 | |
307 | ; non-word: | |
308 | \W a -1 -1 | |
309 | \W z -1 -1 | |
310 | \W A -1 -1 | |
311 | \W Z -1 -1 | |
312 | \W _ -1 -1 | |
313 | \W } 0 1 | |
314 | \W ` 0 1 | |
315 | \W [ 0 1 | |
316 | \W @ 0 1 | |
317 | ; word start: | |
318 | \<abcd " abcd" 2 6 | |
319 | \<ab cab -1 -1 | |
320 | \<ab "\nab" 1 3 | |
321 | \<tag ::tag 2 5 | |
322 | ;word end: | |
323 | abc\> abc 0 3 | |
324 | abc\> abcd -1 -1 | |
325 | abc\> abc\n 0 3 | |
326 | abc\> abc:: 0 3 | |
327 | ; word boundary: | |
328 | \babcd " abcd" 2 6 | |
329 | \bab cab -1 -1 | |
330 | \bab "\nab" 1 3 | |
331 | \btag ::tag 2 5 | |
332 | abc\b abc 0 3 | |
333 | abc\b abcd -1 -1 | |
334 | abc\b abc\n 0 3 | |
335 | abc\b abc:: 0 3 | |
336 | ; within word: | |
337 | \B ab 1 1 | |
338 | a\Bb ab 0 2 | |
339 | a\B ab 0 1 | |
340 | a\B a -1 -1 | |
341 | a\B "a " -1 -1 | |
342 | ||
343 | ; | |
344 | ; buffer operators: | |
345 | \`abc abc 0 3 | |
346 | \`abc \nabc -1 -1 | |
347 | \`abc " abc" -1 -1 | |
348 | abc\' abc 0 3 | |
349 | abc\' abc\n -1 -1 | |
350 | abc\' "abc " -1 -1 | |
351 | ||
352 | ; | |
353 | ; now follows various complex expressions designed to try and bust the matcher: | |
354 | a(((b)))c abc 0 3 1 2 1 2 1 2 | |
355 | a(b|(c))d abd 0 3 1 2 -1 -1 | |
356 | a(b|(c))d acd 0 3 1 2 1 2 | |
357 | a(b*|c)d abbd 0 4 1 3 | |
358 | ; just gotta have one DFA-buster, of course | |
359 | a[ab]{20} aaaaabaaaabaaaabaaaab 0 21 | |
360 | ; and an inline expansion in case somebody gets tricky | |
361 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab] aaaaabaaaabaaaabaaaab 0 21 | |
362 | ; and in case somebody just slips in an NFA... | |
363 | a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night) aaaaabaaaabaaaabaaaabweeknights 0 31 21 24 24 31 | |
364 | ; one really big one | |
365 | 1234567890123456789012345678901234567890123456789012345678901234567890 a1234567890123456789012345678901234567890123456789012345678901234567890b 1 71 | |
366 | ; fish for problems as brackets go past 8 | |
367 | [ab][cd][ef][gh][ij][kl][mn] xacegikmoq 1 8 | |
368 | [ab][cd][ef][gh][ij][kl][mn][op] xacegikmoq 1 9 | |
369 | [ab][cd][ef][gh][ij][kl][mn][op][qr] xacegikmoqy 1 10 | |
370 | [ab][cd][ef][gh][ij][kl][mn][op][q] xacegikmoqy 1 10 | |
371 | ; and as parenthesis go past 9: | |
372 | (a)(b)(c)(d)(e)(f)(g)(h) zabcdefghi 1 9 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 | |
373 | (a)(b)(c)(d)(e)(f)(g)(h)(i) zabcdefghij 1 10 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 | |
374 | (a)(b)(c)(d)(e)(f)(g)(h)(i)(j) zabcdefghijk 1 11 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 | |
375 | (a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k) zabcdefghijkl 1 12 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 | |
376 | (a)d|(b)c abc 1 3 -1 -1 1 2 | |
377 | _+((www)|(ftp)|(mailto)):_* "_wwwnocolon _mailto:" 12 20 13 19 -1 -1 -1 -1 13 19 | |
378 | ||
379 | ; subtleties of matching | |
380 | ;a(b)?c\1d acd 0 3 -1 -1 | |
381 | ; POSIX is about the following test: | |
382 | a(b)?c\1d acd -1 -1 -1 -1 | |
383 | a(b?c)+d accd 0 4 2 3 | |
384 | (wee|week)(knights|night) weeknights 0 10 0 3 3 10 | |
385 | .* abc 0 3 | |
386 | a(b|(c))d abd 0 3 1 2 -1 -1 | |
387 | a(b|(c))d acd 0 3 1 2 1 2 | |
388 | a(b*|c|e)d abbd 0 4 1 3 | |
389 | a(b*|c|e)d acd 0 3 1 2 | |
390 | a(b*|c|e)d ad 0 2 1 1 | |
391 | a(b?)c abc 0 3 1 2 | |
392 | a(b?)c ac 0 2 1 1 | |
393 | a(b+)c abc 0 3 1 2 | |
394 | a(b+)c abbbc 0 5 1 4 | |
395 | a(b*)c ac 0 2 1 1 | |
396 | (a|ab)(bc([de]+)f|cde) abcdef 0 6 0 1 1 6 3 5 | |
397 | a([bc]?)c abc 0 3 1 2 | |
398 | a([bc]?)c ac 0 2 1 1 | |
399 | a([bc]+)c abc 0 3 1 2 | |
400 | a([bc]+)c abcc 0 4 1 3 | |
401 | a([bc]+)bc abcbc 0 5 1 3 | |
402 | a(bb+|b)b abb 0 3 1 2 | |
403 | a(bbb+|bb+|b)b abb 0 3 1 2 | |
404 | a(bbb+|bb+|b)b abbb 0 4 1 3 | |
405 | a(bbb+|bb+|b)bb abbb 0 4 1 2 | |
406 | (.*).* abcdef 0 6 0 6 | |
407 | (a*)* bc 0 0 0 0 | |
408 | xyx*xz xyxxxxyxxxz 5 11 | |
409 | ||
410 | ; do we get the right subexpression when it is used more than once? | |
411 | a(b|c)*d ad 0 2 -1 -1 | |
412 | a(b|c)*d abcd 0 4 2 3 | |
413 | a(b|c)+d abd 0 3 1 2 | |
414 | a(b|c)+d abcd 0 4 2 3 | |
415 | a(b|c?)+d ad 0 2 1 1 | |
416 | a(b|c){0,0}d ad 0 2 -1 -1 | |
417 | a(b|c){0,1}d ad 0 2 -1 -1 | |
418 | a(b|c){0,1}d abd 0 3 1 2 | |
419 | a(b|c){0,2}d ad 0 2 -1 -1 | |
420 | a(b|c){0,2}d abcd 0 4 2 3 | |
421 | a(b|c){0,}d ad 0 2 -1 -1 | |
422 | a(b|c){0,}d abcd 0 4 2 3 | |
423 | a(b|c){1,1}d abd 0 3 1 2 | |
424 | a(b|c){1,2}d abd 0 3 1 2 | |
425 | a(b|c){1,2}d abcd 0 4 2 3 | |
426 | a(b|c){1,}d abd 0 3 1 2 | |
427 | a(b|c){1,}d abcd 0 4 2 3 | |
428 | a(b|c){2,2}d acbd 0 4 2 3 | |
429 | a(b|c){2,2}d abcd 0 4 2 3 | |
430 | a(b|c){2,4}d abcd 0 4 2 3 | |
431 | a(b|c){2,4}d abcbd 0 5 3 4 | |
432 | a(b|c){2,4}d abcbcd 0 6 4 5 | |
433 | a(b|c){2,}d abcd 0 4 2 3 | |
434 | a(b|c){2,}d abcbd 0 5 3 4 | |
6b6557e8 UD |
435 | ; perl only: these conflict with the POSIX test below |
436 | ;a(b|c?)+d abcd 0 4 3 3 | |
437 | ;a(b+|((c)*))+d abd 0 3 2 2 2 2 -1 -1 | |
438 | ;a(b+|((c)*))+d abcd 0 4 3 3 3 3 2 3 | |
6c805a2b UD |
439 | |
440 | ; posix only: | |
441 | - match_default extended REG_EXTENDED REG_STARTEND | |
442 | ||
6b6557e8 UD |
443 | a(b|c?)+d abcd 0 4 2 3 |
444 | a(b|((c)*))+d abcd 0 4 2 3 2 3 2 3 | |
445 | a(b+|((c)*))+d abd 0 3 1 2 -1 -1 -1 -1 | |
446 | a(b+|((c)*))+d abcd 0 4 2 3 2 3 2 3 | |
6c805a2b | 447 | a(b|((c)*))+d ad 0 2 1 1 1 1 -1 -1 |
6b6557e8 UD |
448 | a(b|((c)*))*d abcd 0 4 2 3 2 3 2 3 |
449 | a(b+|((c)*))*d abd 0 3 1 2 -1 -1 -1 -1 | |
450 | a(b+|((c)*))*d abcd 0 4 2 3 2 3 2 3 | |
6c805a2b UD |
451 | a(b|((c)*))*d ad 0 2 1 1 1 1 -1 -1 |
452 | ||
453 | - match_default normal REG_PERL | |
454 | ; try to match C++ syntax elements: | |
455 | ; line comment: | |
456 | //[^\n]* "++i //here is a line comment\n" 4 28 | |
457 | ; block comment: | |
458 | /\*([^*]|\*+[^*/])*\*+/ "/* here is a block comment */" 0 29 26 27 | |
459 | /\*([^*]|\*+[^*/])*\*+/ "/**/" 0 4 -1 -1 | |
460 | /\*([^*]|\*+[^*/])*\*+/ "/***/" 0 5 -1 -1 | |
461 | /\*([^*]|\*+[^*/])*\*+/ "/****/" 0 6 -1 -1 | |
462 | /\*([^*]|\*+[^*/])*\*+/ "/*****/" 0 7 -1 -1 | |
463 | /\*([^*]|\*+[^*/])*\*+/ "/*****/*/" 0 7 -1 -1 | |
464 | ; preprossor directives: | |
465 | ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol" 0 19 -1 -1 | |
466 | ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) #x" 0 25 -1 -1 | |
467 | ; perl only: | |
468 | ^[[:blank:]]*#([^\n]*\\[[:space:]]+)*[^\n]* "#define some_symbol(x) \\ \r\n foo();\\\r\n printf(#x);" 0 53 30 42 | |
469 | ; literals: | |
470 | ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFF 0 4 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 | |
471 | ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 35 0 2 0 2 -1 -1 0 2 -1 -1 -1 -1 -1 -1 | |
472 | ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFu 0 5 0 4 0 4 -1 -1 -1 -1 -1 -1 -1 -1 | |
473 | ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFL 0 5 0 4 0 4 -1 -1 4 5 -1 -1 -1 -1 | |
474 | ((0x[[:xdigit:]]+)|([[:digit:]]+))u?((int(8|16|32|64))|L)? 0xFFFFFFFFFFFFFFFFuint64 0 24 0 18 0 18 -1 -1 19 24 19 24 22 24 | |
475 | ; strings: | |
476 | '([^\\']|\\.)*' '\\x3A' 0 6 4 5 | |
477 | '([^\\']|\\.)*' '\\'' 0 4 1 3 | |
478 | '([^\\']|\\.)*' '\\n' 0 4 1 3 | |
479 | ||
480 | ; finally try some case insensitive matches: | |
481 | - match_default normal REG_EXTENDED REG_ICASE | |
482 | ; upper and lower have no meaning here so they fail, however these | |
483 | ; may compile with other libraries... | |
484 | ;[[:lower:]] ! | |
485 | ;[[:upper:]] ! | |
486 | 0123456789@abcdefghijklmnopqrstuvwxyz\[\\\]\^_`ABCDEFGHIJKLMNOPQRSTUVWXYZ\{\|\} 0123456789@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]\^_`abcdefghijklmnopqrstuvwxyz\{\|\} 0 72 | |
487 | ||
488 | ; known and suspected bugs: | |
489 | - match_default normal REG_EXTENDED | |
490 | \( ( 0 1 | |
491 | \) ) 0 1 | |
492 | \$ $ 0 1 | |
493 | \^ ^ 0 1 | |
494 | \. . 0 1 | |
495 | \* * 0 1 | |
496 | \+ + 0 1 | |
497 | \? ? 0 1 | |
498 | \[ [ 0 1 | |
499 | \] ] 0 1 | |
500 | \| | 0 1 | |
501 | \\ \\ 0 1 | |
502 | # # 0 1 | |
503 | \# # 0 1 | |
504 | a- a- 0 2 | |
505 | \- - 0 1 | |
506 | \{ { 0 1 | |
507 | \} } 0 1 | |
508 | 0 0 0 1 | |
509 | 1 1 0 1 | |
510 | 9 9 0 1 | |
511 | b b 0 1 | |
512 | B B 0 1 | |
513 | < < 0 1 | |
514 | > > 0 1 | |
515 | w w 0 1 | |
516 | W W 0 1 | |
517 | ` ` 0 1 | |
518 | ' ' 0 1 | |
519 | \n \n 0 1 | |
520 | , , 0 1 | |
521 | a a 0 1 | |
522 | f f 0 1 | |
523 | n n 0 1 | |
524 | r r 0 1 | |
525 | t t 0 1 | |
526 | v v 0 1 | |
527 | c c 0 1 | |
528 | x x 0 1 | |
529 | : : 0 1 | |
530 | (\.[[:alnum:]]+){2} "w.a.b " 1 5 3 5 | |
531 | ||
532 | - match_default normal REG_EXTENDED REG_ICASE | |
533 | a A 0 1 | |
534 | A a 0 1 | |
535 | [abc]+ abcABC 0 6 | |
536 | [ABC]+ abcABC 0 6 | |
537 | [a-z]+ abcABC 0 6 | |
538 | [A-Z]+ abzANZ 0 6 | |
539 | [a-Z]+ abzABZ 0 6 | |
540 | [A-z]+ abzABZ 0 6 | |
541 | [[:lower:]]+ abyzABYZ 0 8 | |
542 | [[:upper:]]+ abzABZ 0 6 | |
543 | [[:alpha:]]+ abyzABYZ 0 8 | |
544 | [[:alnum:]]+ 09abyzABYZ 0 10 | |
545 | ||
546 | ; word start: | |
547 | \<abcd " abcd" 2 6 | |
548 | \<ab cab -1 -1 | |
549 | \<ab "\nab" 1 3 | |
550 | \<tag ::tag 2 5 | |
551 | ;word end: | |
552 | abc\> abc 0 3 | |
553 | abc\> abcd -1 -1 | |
554 | abc\> abc\n 0 3 | |
555 | abc\> abc:: 0 3 | |
556 | ||
557 | ; collating elements and rewritten set code: | |
558 | - match_default normal REG_EXTENDED REG_STARTEND | |
559 | ;[[.zero.]] 0 0 1 | |
560 | ;[[.one.]] 1 0 1 | |
561 | ;[[.two.]] 2 0 1 | |
562 | ;[[.three.]] 3 0 1 | |
563 | [[.a.]] baa 1 2 | |
564 | ;[[.right-curly-bracket.]] } 0 1 | |
565 | ;[[.NUL.]] \0 0 1 | |
566 | [[:<:]z] ! | |
567 | [a[:>:]] ! | |
568 | [[=a=]] a 0 1 | |
569 | ;[[=right-curly-bracket=]] } 0 1 | |
570 | - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE | |
571 | [[.A.]] A 0 1 | |
572 | [[.A.]] a 0 1 | |
573 | [[.A.]-b]+ AaBb 0 4 | |
574 | [A-[.b.]]+ AaBb 0 4 | |
575 | [[.a.]-B]+ AaBb 0 4 | |
576 | [a-[.B.]]+ AaBb 0 4 | |
577 | - match_default normal REG_EXTENDED REG_STARTEND | |
578 | [[.a.]-c]+ abcd 0 3 | |
579 | [a-[.c.]]+ abcd 0 3 | |
580 | [[:alpha:]-a] ! | |
581 | [a-[:alpha:]] ! | |
582 | ||
583 | ; try mutli-character ligatures: | |
584 | ;[[.ae.]] ae 0 2 | |
585 | ;[[.ae.]] aE -1 -1 | |
586 | ;[[.AE.]] AE 0 2 | |
587 | ;[[.Ae.]] Ae 0 2 | |
588 | ;[[.ae.]-b] a -1 -1 | |
589 | ;[[.ae.]-b] b 0 1 | |
590 | ;[[.ae.]-b] ae 0 2 | |
591 | ;[a-[.ae.]] a 0 1 | |
592 | ;[a-[.ae.]] b -1 -1 | |
593 | ;[a-[.ae.]] ae 0 2 | |
594 | - match_default normal REG_EXTENDED REG_STARTEND REG_ICASE | |
595 | ;[[.ae.]] AE 0 2 | |
596 | ;[[.ae.]] Ae 0 2 | |
597 | ;[[.AE.]] Ae 0 2 | |
598 | ;[[.Ae.]] aE 0 2 | |
599 | ;[[.AE.]-B] a -1 -1 | |
600 | ;[[.Ae.]-b] b 0 1 | |
601 | ;[[.Ae.]-b] B 0 1 | |
602 | ;[[.ae.]-b] AE 0 2 | |
603 | ||
604 | - match_default normal REG_EXTENDED REG_STARTEND REG_NO_POSIX_TEST | |
605 | \s+ "ab ab" 2 5 | |
606 | \S+ " abc " 2 5 | |
607 | ||
608 | - match_default normal REG_EXTENDED REG_STARTEND | |
609 | \`abc abc 0 3 | |
610 | \`abc aabc -1 -1 | |
611 | abc\' abc 0 3 | |
612 | abc\' abcd -1 -1 | |
613 | abc\' abc\n\n -1 -1 | |
614 | abc\' abc 0 3 | |
615 | ||
616 | ; extended repeat checking to exercise new algorithms: | |
617 | ab.*xy abxy_ 0 4 | |
618 | ab.*xy ab_xy_ 0 5 | |
619 | ab.*xy abxy 0 4 | |
620 | ab.*xy ab_xy 0 5 | |
621 | ab.* ab 0 2 | |
622 | ab.* ab__ 0 4 | |
623 | ||
624 | ab.{2,5}xy ab__xy_ 0 6 | |
625 | ab.{2,5}xy ab____xy_ 0 8 | |
626 | ab.{2,5}xy ab_____xy_ 0 9 | |
627 | ab.{2,5}xy ab__xy 0 6 | |
628 | ab.{2,5}xy ab_____xy 0 9 | |
629 | ab.{2,5} ab__ 0 4 | |
630 | ab.{2,5} ab_______ 0 7 | |
631 | ab.{2,5}xy ab______xy -1 -1 | |
632 | ab.{2,5}xy ab_xy -1 -1 | |
633 | ||
634 | ab.*?xy abxy_ 0 4 | |
635 | ab.*?xy ab_xy_ 0 5 | |
636 | ab.*?xy abxy 0 4 | |
637 | ab.*?xy ab_xy 0 5 | |
638 | ab.*? ab 0 2 | |
639 | ab.*? ab__ 0 4 | |
640 | ||
641 | ab.{2,5}?xy ab__xy_ 0 6 | |
642 | ab.{2,5}?xy ab____xy_ 0 8 | |
643 | ab.{2,5}?xy ab_____xy_ 0 9 | |
644 | ab.{2,5}?xy ab__xy 0 6 | |
645 | ab.{2,5}?xy ab_____xy 0 9 | |
646 | ab.{2,5}? ab__ 0 4 | |
647 | ab.{2,5}? ab_______ 0 7 | |
648 | ab.{2,5}?xy ab______xy -1 -1 | |
649 | ab.{2,5}xy ab_xy -1 -1 | |
650 | ||
651 | ; again but with slower algorithm variant: | |
652 | - match_default REG_EXTENDED | |
653 | ; now again for single character repeats: | |
654 | ||
655 | ab_*xy abxy_ 0 4 | |
656 | ab_*xy ab_xy_ 0 5 | |
657 | ab_*xy abxy 0 4 | |
658 | ab_*xy ab_xy 0 5 | |
659 | ab_* ab 0 2 | |
660 | ab_* ab__ 0 4 | |
661 | ||
662 | ab_{2,5}xy ab__xy_ 0 6 | |
663 | ab_{2,5}xy ab____xy_ 0 8 | |
664 | ab_{2,5}xy ab_____xy_ 0 9 | |
665 | ab_{2,5}xy ab__xy 0 6 | |
666 | ab_{2,5}xy ab_____xy 0 9 | |
667 | ab_{2,5} ab__ 0 4 | |
668 | ab_{2,5} ab_______ 0 7 | |
669 | ab_{2,5}xy ab______xy -1 -1 | |
670 | ab_{2,5}xy ab_xy -1 -1 | |
671 | ||
672 | ab_*?xy abxy_ 0 4 | |
673 | ab_*?xy ab_xy_ 0 5 | |
674 | ab_*?xy abxy 0 4 | |
675 | ab_*?xy ab_xy 0 5 | |
676 | ab_*? ab 0 2 | |
677 | ab_*? ab__ 0 4 | |
678 | ||
679 | ab_{2,5}?xy ab__xy_ 0 6 | |
680 | ab_{2,5}?xy ab____xy_ 0 8 | |
681 | ab_{2,5}?xy ab_____xy_ 0 9 | |
682 | ab_{2,5}?xy ab__xy 0 6 | |
683 | ab_{2,5}?xy ab_____xy 0 9 | |
684 | ab_{2,5}? ab__ 0 4 | |
685 | ab_{2,5}? ab_______ 0 7 | |
686 | ab_{2,5}?xy ab______xy -1 -1 | |
687 | ab_{2,5}xy ab_xy -1 -1 | |
688 | ||
689 | ; and again for sets: | |
690 | ab[_,;]*xy abxy_ 0 4 | |
691 | ab[_,;]*xy ab_xy_ 0 5 | |
692 | ab[_,;]*xy abxy 0 4 | |
693 | ab[_,;]*xy ab_xy 0 5 | |
694 | ab[_,;]* ab 0 2 | |
695 | ab[_,;]* ab__ 0 4 | |
696 | ||
697 | ab[_,;]{2,5}xy ab__xy_ 0 6 | |
698 | ab[_,;]{2,5}xy ab____xy_ 0 8 | |
699 | ab[_,;]{2,5}xy ab_____xy_ 0 9 | |
700 | ab[_,;]{2,5}xy ab__xy 0 6 | |
701 | ab[_,;]{2,5}xy ab_____xy 0 9 | |
702 | ab[_,;]{2,5} ab__ 0 4 | |
703 | ab[_,;]{2,5} ab_______ 0 7 | |
704 | ab[_,;]{2,5}xy ab______xy -1 -1 | |
705 | ab[_,;]{2,5}xy ab_xy -1 -1 | |
706 | ||
707 | ab[_,;]*?xy abxy_ 0 4 | |
708 | ab[_,;]*?xy ab_xy_ 0 5 | |
709 | ab[_,;]*?xy abxy 0 4 | |
710 | ab[_,;]*?xy ab_xy 0 5 | |
711 | ab[_,;]*? ab 0 2 | |
712 | ab[_,;]*? ab__ 0 4 | |
713 | ||
714 | ab[_,;]{2,5}?xy ab__xy_ 0 6 | |
715 | ab[_,;]{2,5}?xy ab____xy_ 0 8 | |
716 | ab[_,;]{2,5}?xy ab_____xy_ 0 9 | |
717 | ab[_,;]{2,5}?xy ab__xy 0 6 | |
718 | ab[_,;]{2,5}?xy ab_____xy 0 9 | |
719 | ab[_,;]{2,5}? ab__ 0 4 | |
720 | ab[_,;]{2,5}? ab_______ 0 7 | |
721 | ab[_,;]{2,5}?xy ab______xy -1 -1 | |
722 | ab[_,;]{2,5}xy ab_xy -1 -1 | |
723 | ||
724 | ; and again for tricky sets with digraphs: | |
725 | ;ab[_[.ae.]]*xy abxy_ 0 4 | |
726 | ;ab[_[.ae.]]*xy ab_xy_ 0 5 | |
727 | ;ab[_[.ae.]]*xy abxy 0 4 | |
728 | ;ab[_[.ae.]]*xy ab_xy 0 5 | |
729 | ;ab[_[.ae.]]* ab 0 2 | |
730 | ;ab[_[.ae.]]* ab__ 0 4 | |
731 | ||
732 | ;ab[_[.ae.]]{2,5}xy ab__xy_ 0 6 | |
733 | ;ab[_[.ae.]]{2,5}xy ab____xy_ 0 8 | |
734 | ;ab[_[.ae.]]{2,5}xy ab_____xy_ 0 9 | |
735 | ;ab[_[.ae.]]{2,5}xy ab__xy 0 6 | |
736 | ;ab[_[.ae.]]{2,5}xy ab_____xy 0 9 | |
737 | ;ab[_[.ae.]]{2,5} ab__ 0 4 | |
738 | ;ab[_[.ae.]]{2,5} ab_______ 0 7 | |
739 | ;ab[_[.ae.]]{2,5}xy ab______xy -1 -1 | |
740 | ;ab[_[.ae.]]{2,5}xy ab_xy -1 -1 | |
741 | ||
742 | ;ab[_[.ae.]]*?xy abxy_ 0 4 | |
743 | ;ab[_[.ae.]]*?xy ab_xy_ 0 5 | |
744 | ;ab[_[.ae.]]*?xy abxy 0 4 | |
745 | ;ab[_[.ae.]]*?xy ab_xy 0 5 | |
746 | ;ab[_[.ae.]]*? ab 0 2 | |
747 | ;ab[_[.ae.]]*? ab__ 0 2 | |
748 | ||
749 | ;ab[_[.ae.]]{2,5}?xy ab__xy_ 0 6 | |
750 | ;ab[_[.ae.]]{2,5}?xy ab____xy_ 0 8 | |
751 | ;ab[_[.ae.]]{2,5}?xy ab_____xy_ 0 9 | |
752 | ;ab[_[.ae.]]{2,5}?xy ab__xy 0 6 | |
753 | ;ab[_[.ae.]]{2,5}?xy ab_____xy 0 9 | |
754 | ;ab[_[.ae.]]{2,5}? ab__ 0 4 | |
755 | ;ab[_[.ae.]]{2,5}? ab_______ 0 4 | |
756 | ;ab[_[.ae.]]{2,5}?xy ab______xy -1 -1 | |
757 | ;ab[_[.ae.]]{2,5}xy ab_xy -1 -1 | |
758 | ||
759 | ; new bugs detected in spring 2003: | |
760 | - normal match_continuous REG_NO_POSIX_TEST | |
761 | b abc 1 2 | |
762 | ||
763 | () abc 0 0 0 0 | |
764 | ^() abc 0 0 0 0 | |
765 | ^()+ abc 0 0 0 0 | |
766 | ^(){1} abc 0 0 0 0 | |
767 | ^(){2} abc 0 0 0 0 | |
768 | ^((){2}) abc 0 0 0 0 0 0 | |
769 | () "" 0 0 0 0 | |
770 | ()\1 "" 0 0 0 0 | |
771 | ()\1 a 0 0 0 0 | |
772 | a()\1b ab 0 2 1 1 | |
773 | a()b\1 ab 0 2 1 1 | |
774 | ||
775 | ; subtleties of matching with no sub-expressions marked | |
776 | - normal match_nosubs REG_NO_POSIX_TEST | |
777 | a(b?c)+d accd 0 4 | |
778 | (wee|week)(knights|night) weeknights 0 10 | |
779 | .* abc 0 3 | |
780 | a(b|(c))d abd 0 3 | |
781 | a(b|(c))d acd 0 3 | |
782 | a(b*|c|e)d abbd 0 4 | |
783 | a(b*|c|e)d acd 0 3 | |
784 | a(b*|c|e)d ad 0 2 | |
785 | a(b?)c abc 0 3 | |
786 | a(b?)c ac 0 2 | |
787 | a(b+)c abc 0 3 | |
788 | a(b+)c abbbc 0 5 | |
789 | a(b*)c ac 0 2 | |
790 | (a|ab)(bc([de]+)f|cde) abcdef 0 6 | |
791 | a([bc]?)c abc 0 3 | |
792 | a([bc]?)c ac 0 2 | |
793 | a([bc]+)c abc 0 3 | |
794 | a([bc]+)c abcc 0 4 | |
795 | a([bc]+)bc abcbc 0 5 | |
796 | a(bb+|b)b abb 0 3 | |
797 | a(bbb+|bb+|b)b abb 0 3 | |
798 | a(bbb+|bb+|b)b abbb 0 4 | |
799 | a(bbb+|bb+|b)bb abbb 0 4 | |
800 | (.*).* abcdef 0 6 | |
801 | (a*)* bc 0 0 | |
802 | ||
803 | - normal nosubs REG_NO_POSIX_TEST | |
804 | a(b?c)+d accd 0 4 | |
805 | (wee|week)(knights|night) weeknights 0 10 | |
806 | .* abc 0 3 | |
807 | a(b|(c))d abd 0 3 | |
808 | a(b|(c))d acd 0 3 | |
809 | a(b*|c|e)d abbd 0 4 | |
810 | a(b*|c|e)d acd 0 3 | |
811 | a(b*|c|e)d ad 0 2 | |
812 | a(b?)c abc 0 3 | |
813 | a(b?)c ac 0 2 | |
814 | a(b+)c abc 0 3 | |
815 | a(b+)c abbbc 0 5 | |
816 | a(b*)c ac 0 2 | |
817 | (a|ab)(bc([de]+)f|cde) abcdef 0 6 | |
818 | a([bc]?)c abc 0 3 | |
819 | a([bc]?)c ac 0 2 | |
820 | a([bc]+)c abc 0 3 | |
821 | a([bc]+)c abcc 0 4 | |
822 | a([bc]+)bc abcbc 0 5 | |
823 | a(bb+|b)b abb 0 3 | |
824 | a(bbb+|bb+|b)b abb 0 3 | |
825 | a(bbb+|bb+|b)b abbb 0 4 | |
826 | a(bbb+|bb+|b)bb abbb 0 4 | |
827 | (.*).* abcdef 0 6 | |
828 | (a*)* bc 0 0 | |
829 |