]> git.ipfire.org Git - thirdparty/gcc.git/blob - libphobos/src/std/regex/internal/tests.d
Add D front-end, libphobos library, and D2 testsuite.
[thirdparty/gcc.git] / libphobos / src / std / regex / internal / tests.d
1 /*
2 Regualar expressions package test suite.
3 */
4 module std.regex.internal.tests;
5
6 package(std.regex):
7
8 import std.conv, std.exception, std.meta, std.range,
9 std.typecons, std.regex;
10
11 import std.regex.internal.parser : Escapables; // characters that need escaping
12
13 alias Sequence(int B, int E) = staticIota!(B, E);
14
15 @safe unittest
16 {//sanity checks
17 regex("(a|b)*");
18 regex(`(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*(.*)\s*#`);
19 regex("abc|edf|ighrg");
20 auto r1 = regex("abc");
21 auto r2 = regex("(gylba)");
22 assert(match("abcdef", r1).hit == "abc");
23 assert(!match("wida",r2));
24 assert(bmatch("abcdef", r1).hit == "abc");
25 assert(!bmatch("wida", r2));
26 assert(match("abc", "abc".dup));
27 assert(bmatch("abc", "abc".dup));
28 Regex!char rc;
29 assert(rc.empty);
30 rc = regex("test");
31 assert(!rc.empty);
32 }
33
34 /* The test vectors in this file are altered from Henry Spencer's regexp
35 test code. His copyright notice is:
36
37 Copyright (c) 1986 by University of Toronto.
38 Written by Henry Spencer. Not derived from licensed software.
39
40 Permission is granted to anyone to use this software for any
41 purpose on any computer system, and to redistribute it freely,
42 subject to the following restrictions:
43
44 1. The author is not responsible for the consequences of use of
45 this software, no matter how awful, even if they arise
46 from defects in it.
47
48 2. The origin of this software must not be misrepresented, either
49 by explicit claim or by omission.
50
51 3. Altered versions must be plainly marked as such, and must not
52 be misrepresented as being the original software.
53
54
55 */
56
57 @safe unittest
58 {
59 struct TestVectors
60 {
61 string pattern;
62 string input;
63 string result;
64 string format;
65 string replace;
66 string flags;
67 }
68
69 static immutable TestVectors[] tv = [
70 TestVectors( "a\\b", "a", "y", "$&", "a" ),
71 TestVectors( "(a)b\\1", "abaab","y", "$&", "aba" ),
72 TestVectors( "()b\\1", "aaab", "y", "$&", "b" ),
73 TestVectors( "abc", "abc", "y", "$&", "abc" ),
74 TestVectors( "abc", "xbc", "n", "-", "-" ),
75 TestVectors( "abc", "axc", "n", "-", "-" ),
76 TestVectors( "abc", "abx", "n", "-", "-" ),
77 TestVectors( "abc", "xabcy","y", "$&", "abc" ),
78 TestVectors( "abc", "ababc","y", "$&", "abc" ),
79 TestVectors( "ab*c", "abc", "y", "$&", "abc" ),
80 TestVectors( "ab*bc", "abc", "y", "$&", "abc" ),
81 TestVectors( "ab*bc", "abbc", "y", "$&", "abbc" ),
82 TestVectors( "ab*bc", "abbbbc","y", "$&", "abbbbc" ),
83 TestVectors( "ab+bc", "abbc", "y", "$&", "abbc" ),
84 TestVectors( "ab+bc", "abc", "n", "-", "-" ),
85 TestVectors( "ab+bc", "abq", "n", "-", "-" ),
86 TestVectors( "ab+bc", "abbbbc","y", "$&", "abbbbc" ),
87 TestVectors( "ab?bc", "abbc", "y", "$&", "abbc" ),
88 TestVectors( "ab?bc", "abc", "y", "$&", "abc" ),
89 TestVectors( "ab?bc", "abbbbc","n", "-", "-" ),
90 TestVectors( "ab?c", "abc", "y", "$&", "abc" ),
91 TestVectors( "^abc$", "abc", "y", "$&", "abc" ),
92 TestVectors( "^abc$", "abcc", "n", "-", "-" ),
93 TestVectors( "^abc", "abcc", "y", "$&", "abc" ),
94 TestVectors( "^abc$", "aabc", "n", "-", "-" ),
95 TestVectors( "abc$", "aabc", "y", "$&", "abc" ),
96 TestVectors( "^", "abc", "y", "$&", "" ),
97 TestVectors( "$", "abc", "y", "$&", "" ),
98 TestVectors( "a.c", "abc", "y", "$&", "abc" ),
99 TestVectors( "a.c", "axc", "y", "$&", "axc" ),
100 TestVectors( "a.*c", "axyzc","y", "$&", "axyzc" ),
101 TestVectors( "a.*c", "axyzd","n", "-", "-" ),
102 TestVectors( "a[bc]d", "abc", "n", "-", "-" ),
103 TestVectors( "a[bc]d", "abd", "y", "$&", "abd" ),
104 TestVectors( "a[b-d]e", "abd", "n", "-", "-" ),
105 TestVectors( "a[b-d]e", "ace", "y", "$&", "ace" ),
106 TestVectors( "a[b-d]", "aac", "y", "$&", "ac" ),
107 TestVectors( "a[-b]", "a-", "y", "$&", "a-" ),
108 TestVectors( "a[b-]", "a-", "y", "$&", "a-" ),
109 TestVectors( "a[b-a]", "-", "c", "-", "-" ),
110 TestVectors( "a[]b", "-", "c", "-", "-" ),
111 TestVectors( "a[", "-", "c", "-", "-" ),
112 TestVectors( "a]", "a]", "y", "$&", "a]" ),
113 TestVectors( "a[\\]]b", "a]b", "y", "$&", "a]b" ),
114 TestVectors( "a[^bc]d", "aed", "y", "$&", "aed" ),
115 TestVectors( "a[^bc]d", "abd", "n", "-", "-" ),
116 TestVectors( "a[^-b]c", "adc", "y", "$&", "adc" ),
117 TestVectors( "a[^-b]c", "a-c", "n", "-", "-" ),
118 TestVectors( "a[^\\]b]c", "adc", "y", "$&", "adc" ),
119 TestVectors( "ab|cd", "abc", "y", "$&", "ab" ),
120 TestVectors( "ab|cd", "abcd", "y", "$&", "ab" ),
121 TestVectors( "()ef", "def", "y", "$&-$1", "ef-" ),
122 TestVectors( "()*", "-", "y", "-", "-" ),
123 TestVectors( "*a", "-", "c", "-", "-" ),
124 TestVectors( "^*", "-", "y", "-", "-" ),
125 TestVectors( "$*", "-", "y", "-", "-" ),
126 TestVectors( "(*)b", "-", "c", "-", "-" ),
127 TestVectors( "$b", "b", "n", "-", "-" ),
128 TestVectors( "a\\", "-", "c", "-", "-" ),
129 TestVectors( "a\\(b", "a(b", "y", "$&-$1", "a(b-" ),
130 TestVectors( "a\\(*b", "ab", "y", "$&", "ab" ),
131 TestVectors( "a\\(*b", "a((b", "y", "$&", "a((b" ),
132 TestVectors( "a\\\\b", "a\\b", "y", "$&", "a\\b" ),
133 TestVectors( "abc)", "-", "c", "-", "-" ),
134 TestVectors( "(abc", "-", "c", "-", "-" ),
135 TestVectors( "((a))", "abc", "y", "$&-$1-$2", "a-a-a" ),
136 TestVectors( "(a)b(c)", "abc", "y", "$&-$1-$2", "abc-a-c" ),
137 TestVectors( "a+b+c", "aabbabc","y", "$&", "abc" ),
138 TestVectors( "a**", "-", "c", "-", "-" ),
139 TestVectors( "a*?a", "aa", "y", "$&", "a" ),
140 TestVectors( "(a*)*", "aaa", "y", "-", "-" ),
141 TestVectors( "(a*)+", "aaa", "y", "-", "-" ),
142 TestVectors( "(a|)*", "-", "y", "-", "-" ),
143 TestVectors( "(a*|b)*", "aabb", "y", "-", "-" ),
144 TestVectors( "(a|b)*", "ab", "y", "$&-$1", "ab-b" ),
145 TestVectors( "(a+|b)*", "ab", "y", "$&-$1", "ab-b" ),
146 TestVectors( "(a+|b)+", "ab", "y", "$&-$1", "ab-b" ),
147 TestVectors( "(a+|b)?", "ab", "y", "$&-$1", "a-a" ),
148 TestVectors( "[^ab]*", "cde", "y", "$&", "cde" ),
149 TestVectors( "(^)*", "-", "y", "-", "-" ),
150 TestVectors( "(ab|)*", "-", "y", "-", "-" ),
151 TestVectors( ")(", "-", "c", "-", "-" ),
152 TestVectors( "", "abc", "y", "$&", "" ),
153 TestVectors( "abc", "", "n", "-", "-" ),
154 TestVectors( "a*", "", "y", "$&", "" ),
155 TestVectors( "([abc])*d", "abbbcd", "y", "$&-$1", "abbbcd-c" ),
156 TestVectors( "([abc])*bcd", "abcd", "y", "$&-$1", "abcd-a" ),
157 TestVectors( "a|b|c|d|e", "e", "y", "$&", "e" ),
158 TestVectors( "(a|b|c|d|e)f", "ef", "y", "$&-$1", "ef-e" ),
159 TestVectors( "((a*|b))*", "aabb", "y", "-", "-" ),
160 TestVectors( "abcd*efg", "abcdefg", "y", "$&", "abcdefg" ),
161 TestVectors( "ab*", "xabyabbbz", "y", "$&", "ab" ),
162 TestVectors( "ab*", "xayabbbz", "y", "$&", "a" ),
163 TestVectors( "(ab|cd)e", "abcde", "y", "$&-$1", "cde-cd" ),
164 TestVectors( "[abhgefdc]ij", "hij", "y", "$&", "hij" ),
165 TestVectors( "^(ab|cd)e", "abcde", "n", "x$1y", "xy" ),
166 TestVectors( "(abc|)ef", "abcdef", "y", "$&-$1", "ef-" ),
167 TestVectors( "(a|b)c*d", "abcd", "y", "$&-$1", "bcd-b" ),
168 TestVectors( "(ab|ab*)bc", "abc", "y", "$&-$1", "abc-a" ),
169 TestVectors( "a([bc]*)c*", "abc", "y", "$&-$1", "abc-bc" ),
170 TestVectors( "a([bc]*)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ),
171 TestVectors( "a([bc]+)(c*d)", "abcd", "y", "$&-$1-$2", "abcd-bc-d" ),
172 TestVectors( "a([bc]*)(c+d)", "abcd", "y", "$&-$1-$2", "abcd-b-cd" ),
173 TestVectors( "a[bcd]*dcdcde", "adcdcde", "y", "$&", "adcdcde" ),
174 TestVectors( "a[bcd]+dcdcde", "adcdcde", "n", "-", "-" ),
175 TestVectors( "(ab|a)b*c", "abc", "y", "$&-$1", "abc-ab" ),
176 TestVectors( "((a)(b)c)(d)", "abcd", "y", "$1-$2-$3-$4", "abc-a-b-d" ),
177 TestVectors( "[a-zA-Z_][a-zA-Z0-9_]*", "alpha", "y", "$&", "alpha" ),
178 TestVectors( "^a(bc+|b[eh])g|.h$", "abh", "y", "$&-$1", "bh-" ),
179 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effgz", "y", "$&-$1-$2", "effgz-effgz-" ),
180 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "ij", "y", "$&-$1-$2", "ij-ij-j" ),
181 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "effg", "n", "-", "-" ),
182 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "bcdd", "n", "-", "-" ),
183 TestVectors( "(bc+d$|ef*g.|h?i(j|k))", "reffgz", "y", "$&-$1-$2", "effgz-effgz-" ),
184 TestVectors( "(((((((((a)))))))))", "a", "y", "$&", "a" ),
185 TestVectors( "multiple words of text", "uh-uh", "n", "-", "-" ),
186 TestVectors( "multiple words", "multiple words, yeah", "y", "$&", "multiple words" ),
187 TestVectors( "(.*)c(.*)", "abcde", "y", "$&-$1-$2", "abcde-ab-de" ),
188 TestVectors( "\\((.*), (.*)\\)", "(a, b)", "y", "($2, $1)", "(b, a)" ),
189 TestVectors( "abcd", "abcd", "y", "$&-&-$$$&", "abcd-&-$abcd" ),
190 TestVectors( "a(bc)d", "abcd", "y", "$1-$$1-$$$1", "bc-$1-$bc" ),
191 TestVectors( "[k]", "ab", "n", "-", "-" ),
192 TestVectors( "[ -~]*", "abc", "y", "$&", "abc" ),
193 TestVectors( "[ -~ -~]*", "abc", "y", "$&", "abc" ),
194 TestVectors( "[ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
195 TestVectors( "[ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
196 TestVectors( "[ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
197 TestVectors( "[ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
198 TestVectors( "[ -~ -~ -~ -~ -~ -~ -~]*", "abc", "y", "$&", "abc" ),
199 TestVectors( "a{2}", "candy", "n", "", "" ),
200 TestVectors( "a{2}", "caandy", "y", "$&", "aa" ),
201 TestVectors( "a{2}", "caaandy", "y", "$&", "aa" ),
202 TestVectors( "a{2,}", "candy", "n", "", "" ),
203 TestVectors( "a{2,}", "caandy", "y", "$&", "aa" ),
204 TestVectors( "a{2,}", "caaaaaandy", "y", "$&", "aaaaaa" ),
205 TestVectors( "a{1,3}", "cndy", "n", "", "" ),
206 TestVectors( "a{1,3}", "candy", "y", "$&", "a" ),
207 TestVectors( "a{1,3}", "caandy", "y", "$&", "aa" ),
208 TestVectors( "a{1,3}", "caaaaaandy", "y", "$&", "aaa" ),
209 TestVectors( "e?le?", "angel", "y", "$&", "el" ),
210 TestVectors( "e?le?", "angle", "y", "$&", "le" ),
211 TestVectors( "\\bn\\w", "noonday", "y", "$&", "no" ),
212 TestVectors( "\\wy\\b", "possibly yesterday", "y", "$&", "ly" ),
213 TestVectors( "\\w\\Bn", "noonday", "y", "$&", "on" ),
214 TestVectors( "y\\B\\w", "possibly yesterday", "y", "$&", "ye" ),
215 TestVectors( "\\cJ", "abc\ndef", "y", "$&", "\n" ),
216 TestVectors( "\\d", "B2 is", "y", "$&", "2" ),
217 TestVectors( "\\D", "B2 is", "y", "$&", "B" ),
218 TestVectors( "\\s\\w*", "foo bar", "y", "$&", " bar" ),
219 TestVectors( "\\S\\w*", "foo bar", "y", "$&", "foo" ),
220 TestVectors( "abc", "ababc", "y", "$&", "abc" ),
221 TestVectors( "apple(,)\\sorange\\1", "apple, orange, cherry, peach", "y", "$&", "apple, orange," ),
222 TestVectors( "(\\w+)\\s(\\w+)", "John Smith", "y", "$2, $1", "Smith, John" ),
223 TestVectors( "\\n\\f\\r\\t\\v", "abc\n\f\r\t\vdef", "y", "$&", "\n\f\r\t\v" ),
224 TestVectors( ".*c", "abcde", "y", "$&", "abc" ),
225 TestVectors( "^\\w+((;|=)\\w+)+$", "some=host=tld", "y", "$&-$1-$2", "some=host=tld-=tld-=" ),
226 TestVectors( "^\\w+((\\.|-)\\w+)+$", "some.host.tld", "y", "$&-$1-$2", "some.host.tld-.tld-." ),
227 TestVectors( "q(a|b)*q", "xxqababqyy", "y", "$&-$1", "qababq-b" ),
228 TestVectors( "^(a)(b){0,1}(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ),
229 TestVectors( "^(a)((b){0,1})(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
230 TestVectors( "^(a)(b)?(c*)", "abcc", "y", "$1 $2 $3", "a b cc" ),
231 TestVectors( "^(a)((b)?)(c*)", "abcc", "y", "$1 $2 $3", "a b b" ),
232 TestVectors( "^(a)(b){0,1}(c*)", "acc", "y", "$1 $2 $3", "a cc" ),
233 TestVectors( "^(a)((b){0,1})(c*)", "acc", "y", "$1 $2 $3", "a " ),
234 TestVectors( "^(a)(b)?(c*)", "acc", "y", "$1 $2 $3", "a cc" ),
235 TestVectors( "^(a)((b)?)(c*)", "acc", "y", "$1 $2 $3", "a " ),
236 TestVectors( "(?:ab){3}", "_abababc","y", "$&-$1", "ababab-" ),
237 TestVectors( "(?:a(?:x)?)+", "aaxaxx", "y", "$&-$1-$2", "aaxax--" ),
238 TestVectors( `\W\w\W`, "aa b!ca", "y", "$&", " b!"),
239 //more repetitions:
240 TestVectors( "(?:a{2,4}b{1,3}){1,2}", "aaabaaaabbb", "y", "$&", "aaabaaaabbb" ),
241 TestVectors( "(?:a{2,4}b{1,3}){1,2}?", "aaabaaaabbb", "y", "$&", "aaab" ),
242 //groups:
243 TestVectors( "(abc)|(edf)|(xyz)", "xyz", "y", "$1-$2-$3","--xyz"),
244 TestVectors( "(?P<q>\\d+)/(?P<d>\\d+)", "2/3", "y", "${d}/${q}", "3/2"),
245 //set operations:
246 TestVectors( "[a-z--d-f]", " dfa", "y", "$&", "a"),
247 TestVectors( "[abc[pq--acq]]{2}", "bqpaca", "y", "$&", "pa"),
248 TestVectors( "[a-z9&&abc0-9]{3}", "z90a0abc", "y", "$&", "abc"),
249 TestVectors( "[0-9a-f~~0-5a-z]{2}", "g0a58x", "y", "$&", "8x"),
250 TestVectors( "[abc[pq]xyz[rs]]{4}", "cqxr", "y", "$&", "cqxr"),
251 TestVectors( "[abcdf--[ab&&[bcd]][acd]]", "abcdefgh", "y", "$&", "f"),
252 TestVectors( "[a-c||d-f]+", "abcdef", "y", "$&", "abcdef"),
253 TestVectors( "[a-f--a-c]+", "abcdef", "y", "$&", "def"),
254 TestVectors( "[a-c&&b-f]+", "abcdef", "y", "$&", "bc"),
255 TestVectors( "[a-c~~b-f]+", "abcdef", "y", "$&", "a"),
256 //unicode blocks & properties:
257 TestVectors( `\P{Inlatin1suppl ement}`, "\u00c2!", "y", "$&", "!"),
258 TestVectors( `\p{InLatin-1 Supplement}\p{in-mathematical-operators}\P{Inlatin1suppl ement}`,
259 "\u00c2\u2200\u00c3\u2203.", "y", "$&", "\u00c3\u2203."),
260 TestVectors( `[-+*/\p{in-mathematical-operators}]{2}`, "a+\u2212", "y", "$&", "+\u2212"),
261 TestVectors( `\p{Ll}+`, "XabcD", "y", "$&", "abc"),
262 TestVectors( `\p{Lu}+`, "абвГДЕ", "y", "$&", "ГДЕ"),
263 TestVectors( `^\p{Currency Symbol}\p{Sc}`, "$₤", "y", "$&", "$₤"),
264 TestVectors( `\p{Common}\p{Thai}`, "!ฆ", "y", "$&", "!ฆ"),
265 TestVectors( `[\d\s]*\D`, "12 \t3\U00001680\u0F20_2", "y", "$&", "12 \t3\U00001680\u0F20_"),
266 TestVectors( `[c-wф]фф`, "ффф", "y", "$&", "ффф"),
267 //case insensitive:
268 TestVectors( `^abcdEf$`, "AbCdEF", "y", "$&", "AbCdEF", "i"),
269 TestVectors( `Русский язык`, "рУсскИй ЯзЫк", "y", "$&", "рУсскИй ЯзЫк", "i"),
270 TestVectors( `ⒶⒷⓒ` , "ⓐⓑⒸ", "y", "$&", "ⓐⓑⒸ", "i"),
271 TestVectors( "\U00010400{2}", "\U00010428\U00010400 ", "y", "$&", "\U00010428\U00010400", "i"),
272 TestVectors( `[adzУ-Я]{4}`, "DzюЯ", "y", "$&", "DzюЯ", "i"),
273 TestVectors( `\p{L}\p{Lu}{10}`, "абвгдеЖЗИКЛ", "y", "$&", "абвгдеЖЗИКЛ", "i"),
274 TestVectors( `(?:Dåb){3}`, "DåbDÅBdÅb", "y", "$&", "DåbDÅBdÅb", "i"),
275 //escapes:
276 TestVectors( `\u0041\u005a\U00000065\u0001`, "AZe\u0001", "y", "$&", "AZe\u0001"),
277 TestVectors( `\u`, "", "c", "-", "-"),
278 TestVectors( `\U`, "", "c", "-", "-"),
279 TestVectors( `\u003`, "", "c", "-", "-"),
280 TestVectors( `[\x00-\x7f]{4}`, "\x00\x09ab", "y", "$&", "\x00\x09ab"),
281 TestVectors( `[\cJ\cK\cA-\cD]{3}\cQ`, "\x01\x0B\x0A\x11", "y", "$&", "\x01\x0B\x0A\x11"),
282 TestVectors( `\r\n\v\t\f\\`, "\r\n\v\t\f\\", "y", "$&", "\r\n\v\t\f\\"),
283 TestVectors( `[\u0003\u0001]{2}`, "\u0001\u0003", "y", "$&", "\u0001\u0003"),
284 TestVectors( `^[\u0020-\u0080\u0001\n-\r]{8}`, "abc\u0001\v\f\r\n", "y", "$&", "abc\u0001\v\f\r\n"),
285 TestVectors( `\w+\S\w+`, "ab7!44c", "y", "$&", "ab7!44c"),
286 TestVectors( `\b\w+\b`, " abde4 ", "y", "$&", "abde4"),
287 TestVectors( `\b\w+\b`, " abde4", "y", "$&", "abde4"),
288 TestVectors( `\b\w+\b`, "abde4 ", "y", "$&", "abde4"),
289 TestVectors( `\pL\pS`, "a\u02DA", "y", "$&", "a\u02DA"),
290 TestVectors( `\pX`, "", "c", "-", "-"),
291 // ^, $, \b, \B, multiline :
292 TestVectors( `\r.*?$`, "abc\r\nxy", "y", "$&", "\r\nxy", "sm"),
293 TestVectors( `^a$^b$`, "a\r\nb\n", "n", "$&", "-", "m"),
294 TestVectors( `^a$\r\n^b$`,"a\r\nb\n", "y", "$&", "a\r\nb", "m"),
295 TestVectors( `^$`, "\r\n", "y", "$&", "", "m"),
296 TestVectors( `^a$\nx$`, "a\nx\u2028","y", "$&", "a\nx", "m"),
297 TestVectors( `^a$\nx$`, "a\nx\u2029","y", "$&", "a\nx", "m"),
298 TestVectors( `^a$\nx$`, "a\nx\u0085","y", "$&", "a\nx","m"),
299 TestVectors( `^x$`, "\u2028x", "y", "$&", "x", "m"),
300 TestVectors( `^x$`, "\u2029x", "y", "$&", "x", "m"),
301 TestVectors( `^x$`, "\u0085x", "y", "$&", "x", "m"),
302 TestVectors( `\b^.`, "ab", "y", "$&", "a"),
303 TestVectors( `\B^.`, "ab", "n", "-", "-"),
304 TestVectors( `^ab\Bc\B`, "\r\nabcd", "y", "$&", "abc", "m"),
305 TestVectors( `^.*$`, "12345678", "y", "$&", "12345678"),
306
307 // luckily obtained regression on incremental matching in backtracker
308 TestVectors( `^(?:(?:([0-9A-F]+)\.\.([0-9A-F]+)|([0-9A-F]+))\s*;\s*([^ ]*)\s*#|# (?:\w|_)+=((?:\w|_)+))`,
309 "0020 ; White_Space # ", "y", "$1-$2-$3", "--0020"),
310 //lookahead
311 TestVectors( "(foo.)(?=(bar))", "foobar foodbar", "y", "$&-$1-$2", "food-food-bar" ),
312 TestVectors( `\b(\d+)[a-z](?=\1)`, "123a123", "y", "$&-$1", "123a-123" ),
313 TestVectors( `\$(?!\d{3})\w+`, "$123 $abc", "y", "$&", "$abc"),
314 TestVectors( `(abc)(?=(ed(f))\3)`, "abcedff", "y", "-", "-"),
315 TestVectors( `\b[A-Za-z0-9.]+(?=(@(?!gmail)))`, "a@gmail,x@com", "y", "$&-$1", "x-@"),
316 TestVectors( `x()(abc)(?=(d)(e)(f)\2)`, "xabcdefabc", "y", "$&", "xabc"),
317 TestVectors( `x()(abc)(?=(d)(e)(f)()\3\4\5)`, "xabcdefdef", "y", "$&", "xabc"),
318 //lookback
319 TestVectors( `(?<=(ab))\d`, "12ba3ab4", "y", "$&-$1", "4-ab", "i"),
320 TestVectors( `\w(?<!\d)\w`, "123ab24", "y", "$&", "ab"),
321 TestVectors( `(?<=Dåb)x\w`, "DåbDÅBxdÅb", "y", "$&", "xd", "i"),
322 TestVectors( `(?<=(ab*c))x`, "abbbbcxac", "y", "$&-$1", "x-abbbbc"),
323 TestVectors( `(?<=(ab*?c))x`, "abbbbcxac", "y", "$&-$1", "x-abbbbc"),
324 TestVectors( `(?<=(a.*?c))x`, "ababbcxac", "y", "$&-$1", "x-abbc"),
325 TestVectors( `(?<=(a{2,4}b{1,3}))x`, "yyaaaabx", "y", "$&-$1", "x-aaaab"),
326 TestVectors( `(?<=((?:a{2,4}b{1,3}){1,2}))x`, "aabbbaaaabx", "y", "$&-$1", "x-aabbbaaaab"),
327 TestVectors( `(?<=((?:a{2,4}b{1,3}){1,2}?))x`, "aabbbaaaabx", "y", "$&-$1", "x-aaaab"),
328 TestVectors( `(?<=(abc|def|aef))x`, "abcx", "y", "$&-$1", "x-abc"),
329 TestVectors( `(?<=(abc|def|aef))x`, "aefx", "y", "$&-$1", "x-aef"),
330 TestVectors( `(?<=(abc|dabc))(x)`, "dabcx", "y", "$&-$1-$2", "x-abc-x"),
331 TestVectors( `(?<=(|abc))x`, "dabcx", "y", "$&-$1", "x-"),
332 TestVectors( `(?<=((ab|da)*))x`, "abdaabx", "y", "$&-$2-$1", "x-ab-abdaab"),
333 TestVectors( `a(?<=(ba(?<=(aba)(?<=aaba))))`, "aabaa", "y", "$&-$1-$2", "a-ba-aba"),
334 TestVectors( `.(?<!b).`, "bax", "y", "$&", "ax"),
335 TestVectors( `(?<=b(?<!ab)).`, "abbx", "y", "$&", "x"),
336 TestVectors( `(?<=\.|[!?]+)X`, "Hey?!X", "y", "$&", "X"),
337 TestVectors( `(?<=\.|[!?]+)a{3}`, ".Nope.aaaX", "y", "$&", "aaa"),
338 //mixed lookaround
339 TestVectors( `a(?<=a(?=b))b`, "ab", "y", "$&", "ab"),
340 TestVectors( `a(?<=a(?!b))c`, "ac", "y", "$&", "ac"),
341 TestVectors( `a(?i)bc`, "aBc", "y", "$&", "aBc"),
342 TestVectors( `a(?i)bc`, "Abc", "n", "$&", "-"),
343 TestVectors( `(?i)a(?-i)bc`, "aBcAbc", "y", "$&", "Abc"),
344 TestVectors( `(?s).(?-s).`, "\n\n\na", "y", "$&", "\na"),
345 TestVectors( `(?m)^a(?-m)$`, "\na", "y", "$&", "a")
346 ];
347 string produceExpected(M,String)(auto ref M m, String fmt)
348 {
349 auto app = appender!(String)();
350 replaceFmt(fmt, m.captures, app, true);
351 return app.data;
352 }
353 void run_tests(alias matchFn)()
354 {
355 int i;
356 foreach (Char; AliasSeq!( char, wchar, dchar))
357 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
358 alias String = immutable(Char)[];
359 String produceExpected(M,Range)(auto ref M m, Range fmt)
360 {
361 auto app = appender!(String)();
362 replaceFmt(fmt, m.captures, app, true);
363 return app.data;
364 }
365 Regex!(Char) r;
366 foreach (a, tvd; tv)
367 {
368 uint c = tvd.result[0];
369 debug(std_regex_test) writeln(" Test #", a, " pattern: ", tvd.pattern, " with Char = ", Char.stringof);
370 try
371 {
372 i = 1;
373 r = regex(to!(String)(tvd.pattern), tvd.flags);
374 }
375 catch (RegexException e)
376 {
377 i = 0;
378 debug(std_regex_test) writeln(e.msg);
379 }
380
381 assert((c == 'c') ? !i : i, "failed to compile pattern "~tvd.pattern);
382
383 if (c != 'c')
384 {
385 auto m = matchFn(to!(String)(tvd.input), r);
386 i = !m.empty;
387 assert(
388 (c == 'y') ? i : !i,
389 text(matchFn.stringof ~": failed to match pattern #", a ,": ", tvd.pattern)
390 );
391 if (c == 'y')
392 {
393 auto result = produceExpected(m, to!(String)(tvd.format));
394 assert(result == to!String(tvd.replace),
395 text(matchFn.stringof ~": mismatch pattern #", a, ": ", tvd.pattern," expected: ",
396 tvd.replace, " vs ", result));
397 }
398 }
399 }
400 }();
401 debug(std_regex_test) writeln("!!! FReD bulk test done "~matchFn.stringof~" !!!");
402 }
403
404
405 void ct_tests()
406 {
407 import std.algorithm.comparison : equal;
408 version (std_regex_ct1)
409 {
410 pragma(msg, "Testing 1st part of ctRegex");
411 alias Tests = Sequence!(0, 155);
412 }
413 else version (std_regex_ct2)
414 {
415 pragma(msg, "Testing 2nd part of ctRegex");
416 alias Tests = Sequence!(155, 174);
417 }
418 //FIXME: #174-178 contains CTFE parser bug
419 else version (std_regex_ct3)
420 {
421 pragma(msg, "Testing 3rd part of ctRegex");
422 alias Tests = Sequence!(178, 220);
423 }
424 else version (std_regex_ct4)
425 {
426 pragma(msg, "Testing 4th part of ctRegex");
427 alias Tests = Sequence!(220, tv.length);
428 }
429 else
430 alias Tests = AliasSeq!(Sequence!(0, 30), Sequence!(235, tv.length-5));
431 foreach (a, v; Tests)
432 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
433 enum tvd = tv[v];
434 static if (tvd.result == "c")
435 {
436 static assert(!__traits(compiles, (){
437 enum r = regex(tvd.pattern, tvd.flags);
438 }), "errornously compiles regex pattern: " ~ tvd.pattern);
439 }
440 else
441 {
442 //BUG: tv[v] is fine but tvd is not known at compile time?!
443 auto r = ctRegex!(tv[v].pattern, tv[v].flags);
444 auto nr = regex(tvd.pattern, tvd.flags);
445 assert(equal(r.ir, nr.ir),
446 text("!C-T regex! failed to compile pattern #", a ,": ", tvd.pattern));
447 auto m = match(tvd.input, r);
448 auto c = tvd.result[0];
449 bool ok = (c == 'y') ^ m.empty;
450 assert(ok, text("ctRegex: failed to match pattern #",
451 a ,": ", tvd.pattern));
452 if (c == 'y')
453 {
454 import std.stdio;
455 auto result = produceExpected(m, tvd.format);
456 if (result != tvd.replace)
457 writeln("ctRegex mismatch pattern #", a, ": ", tvd.pattern," expected: ",
458 tvd.replace, " vs ", result);
459 }
460 }
461 }();
462 debug(std_regex_test) writeln("!!! FReD C-T test done !!!");
463 }
464
465 ct_tests();
466 run_tests!bmatch(); //backtracker
467 run_tests!match(); //thompson VM
468 }
469
470 @safe unittest
471 {
472 auto cr = ctRegex!("abc");
473 assert(bmatch("abc",cr).hit == "abc");
474 auto cr2 = ctRegex!("ab*c");
475 assert(bmatch("abbbbc",cr2).hit == "abbbbc");
476 }
477 @safe unittest
478 {
479 auto cr3 = ctRegex!("^abc$");
480 assert(bmatch("abc",cr3).hit == "abc");
481 auto cr4 = ctRegex!(`\b(a\B[a-z]b)\b`);
482 assert(array(match("azb",cr4).captures) == ["azb", "azb"]);
483 }
484
485 @safe unittest
486 {
487 auto cr5 = ctRegex!("(?:a{2,4}b{1,3}){1,2}");
488 assert(bmatch("aaabaaaabbb", cr5).hit == "aaabaaaabbb");
489 auto cr6 = ctRegex!("(?:a{2,4}b{1,3}){1,2}?"w);
490 assert(bmatch("aaabaaaabbb"w, cr6).hit == "aaab"w);
491 }
492
493 @safe unittest
494 {
495 auto cr7 = ctRegex!(`\r.*?$`,"sm");
496 assert(bmatch("abc\r\nxy", cr7).hit == "\r\nxy");
497 auto greed = ctRegex!("<packet.*?/packet>");
498 assert(bmatch("<packet>text</packet><packet>text</packet>", greed).hit
499 == "<packet>text</packet>");
500 }
501
502 @safe unittest
503 {
504 import std.algorithm.comparison : equal;
505 auto cr8 = ctRegex!("^(a)(b)?(c*)");
506 auto m8 = bmatch("abcc",cr8);
507 assert(m8);
508 assert(m8.captures[1] == "a");
509 assert(m8.captures[2] == "b");
510 assert(m8.captures[3] == "cc");
511 auto cr9 = ctRegex!("q(a|b)*q");
512 auto m9 = match("xxqababqyy",cr9);
513 assert(m9);
514 assert(equal(bmatch("xxqababqyy",cr9).captures, ["qababq", "b"]));
515 }
516
517 @safe unittest
518 {
519 import std.algorithm.comparison : equal;
520 auto rtr = regex("a|b|c");
521 enum ctr = regex("a|b|c");
522 assert(equal(rtr.ir,ctr.ir));
523 //CTFE parser BUG is triggered by group
524 //in the middle of alternation (at least not first and not last)
525 enum testCT = regex(`abc|(edf)|xyz`);
526 auto testRT = regex(`abc|(edf)|xyz`);
527 assert(equal(testCT.ir,testRT.ir));
528 }
529
530 @safe unittest
531 {
532 import std.algorithm.comparison : equal;
533 import std.algorithm.iteration : map;
534 enum cx = ctRegex!"(A|B|C)";
535 auto mx = match("B",cx);
536 assert(mx);
537 assert(equal(mx.captures, [ "B", "B"]));
538 enum cx2 = ctRegex!"(A|B)*";
539 assert(match("BAAA",cx2));
540
541 enum cx3 = ctRegex!("a{3,4}","i");
542 auto mx3 = match("AaA",cx3);
543 assert(mx3);
544 assert(mx3.captures[0] == "AaA");
545 enum cx4 = ctRegex!(`^a{3,4}?[a-zA-Z0-9~]{1,2}`,"i");
546 auto mx4 = match("aaaabc", cx4);
547 assert(mx4);
548 assert(mx4.captures[0] == "aaaab");
549 auto cr8 = ctRegex!("(a)(b)?(c*)");
550 auto m8 = bmatch("abcc",cr8);
551 assert(m8);
552 assert(m8.captures[1] == "a");
553 assert(m8.captures[2] == "b");
554 assert(m8.captures[3] == "cc");
555 auto cr9 = ctRegex!(".*$", "gm");
556 auto m9 = match("First\rSecond", cr9);
557 assert(m9);
558 assert(equal(map!"a.hit"(m9), ["First", "", "Second"]));
559 }
560
561 @safe unittest
562 {
563 import std.algorithm.comparison : equal;
564 import std.algorithm.iteration : map;
565 //global matching
566 void test_body(alias matchFn)()
567 {
568 string s = "a quick brown fox jumps over a lazy dog";
569 auto r1 = regex("\\b[a-z]+\\b","g");
570 string[] test;
571 foreach (m; matchFn(s, r1))
572 test ~= m.hit;
573 assert(equal(test, [ "a", "quick", "brown", "fox", "jumps", "over", "a", "lazy", "dog"]));
574 auto free_reg = regex(`
575
576 abc
577 \s+
578 "
579 (
580 [^"]+
581 | \\ "
582 )+
583 "
584 z
585 `, "x");
586 auto m = match(`abc "quoted string with \" inside"z`,free_reg);
587 assert(m);
588 string mails = " hey@you.com no@spam.net ";
589 auto rm = regex(`@(?<=\S+@)\S+`,"g");
590 assert(equal(map!"a[0]"(matchFn(mails, rm)), ["@you.com", "@spam.net"]));
591 auto m2 = matchFn("First line\nSecond line",regex(".*$","gm"));
592 assert(equal(map!"a[0]"(m2), ["First line", "", "Second line"]));
593 auto m2a = matchFn("First line\nSecond line",regex(".+$","gm"));
594 assert(equal(map!"a[0]"(m2a), ["First line", "Second line"]));
595 auto m2b = matchFn("First line\nSecond line",regex(".+?$","gm"));
596 assert(equal(map!"a[0]"(m2b), ["First line", "Second line"]));
597 debug(std_regex_test) writeln("!!! FReD FLAGS test done "~matchFn.stringof~" !!!");
598 }
599 test_body!bmatch();
600 test_body!match();
601 }
602
603 //tests for accumulated std.regex issues and other regressions
604 @safe unittest
605 {
606 import std.algorithm.comparison : equal;
607 import std.algorithm.iteration : map;
608 void test_body(alias matchFn)()
609 {
610 //issue 5857
611 //matching goes out of control if ... in (...){x} has .*/.+
612 auto c = matchFn("axxxzayyyyyzd",regex("(a.*z){2}d")).captures;
613 assert(c[0] == "axxxzayyyyyzd");
614 assert(c[1] == "ayyyyyz");
615 auto c2 = matchFn("axxxayyyyyd",regex("(a.*){2}d")).captures;
616 assert(c2[0] == "axxxayyyyyd");
617 assert(c2[1] == "ayyyyy");
618 //issue 2108
619 //greedy vs non-greedy
620 auto nogreed = regex("<packet.*?/packet>");
621 assert(matchFn("<packet>text</packet><packet>text</packet>", nogreed).hit
622 == "<packet>text</packet>");
623 auto greed = regex("<packet.*/packet>");
624 assert(matchFn("<packet>text</packet><packet>text</packet>", greed).hit
625 == "<packet>text</packet><packet>text</packet>");
626 //issue 4574
627 //empty successful match still advances the input
628 string[] pres, posts, hits;
629 foreach (m; matchFn("abcabc", regex("","g")))
630 {
631 pres ~= m.pre;
632 posts ~= m.post;
633 assert(m.hit.empty);
634
635 }
636 auto heads = [
637 "abcabc",
638 "abcab",
639 "abca",
640 "abc",
641 "ab",
642 "a",
643 ""
644 ];
645 auto tails = [
646 "abcabc",
647 "bcabc",
648 "cabc",
649 "abc",
650 "bc",
651 "c",
652 ""
653 ];
654 assert(pres == array(retro(heads)));
655 assert(posts == tails);
656 //issue 6076
657 //regression on .*
658 auto re = regex("c.*|d");
659 auto m = matchFn("mm", re);
660 assert(!m);
661 debug(std_regex_test) writeln("!!! FReD REGRESSION test done "~matchFn.stringof~" !!!");
662 auto rprealloc = regex(`((.){5}.{1,10}){5}`);
663 auto arr = array(repeat('0',100));
664 auto m2 = matchFn(arr, rprealloc);
665 assert(m2);
666 assert(collectException(
667 regex(r"^(import|file|binary|config)\s+([^\(]+)\(?([^\)]*)\)?\s*$")
668 ) is null);
669 foreach (ch; [Escapables])
670 {
671 assert(match(to!string(ch),regex(`[\`~ch~`]`)));
672 assert(!match(to!string(ch),regex(`[^\`~ch~`]`)));
673 assert(match(to!string(ch),regex(`[\`~ch~`-\`~ch~`]`)));
674 }
675 //bugzilla 7718
676 string strcmd = "./myApp.rb -os OSX -path \"/GIT/Ruby Apps/sec\" -conf 'notimer'";
677 auto reStrCmd = regex (`(".*")|('.*')`, "g");
678 assert(equal(map!"a[0]"(matchFn(strcmd, reStrCmd)),
679 [`"/GIT/Ruby Apps/sec"`, `'notimer'`]));
680 }
681 test_body!bmatch();
682 test_body!match();
683 }
684
685 // tests for replace
686 @safe unittest
687 {
688 void test(alias matchFn)()
689 {
690 import std.uni : toUpper;
691
692 foreach (i, v; AliasSeq!(string, wstring, dstring))
693 {
694 auto baz(Cap)(Cap m)
695 if (is(Cap == Captures!(Cap.String)))
696 {
697 return toUpper(m.hit);
698 }
699 alias String = v;
700 assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r")), to!String("c"))
701 == to!String("ack rapacity"));
702 assert(std.regex.replace!(matchFn)(to!String("ark rapacity"), regex(to!String("r"), "g"), to!String("c"))
703 == to!String("ack capacity"));
704 assert(std.regex.replace!(matchFn)(to!String("noon"), regex(to!String("^n")), to!String("[$&]"))
705 == to!String("[n]oon"));
706 assert(std.regex.replace!(matchFn)(
707 to!String("test1 test2"), regex(to!String(`\w+`),"g"), to!String("$`:$'")
708 ) == to!String(": test2 test1 :"));
709 auto s = std.regex.replace!(baz!(Captures!(String)))(to!String("Strap a rocket engine on a chicken."),
710 regex(to!String("[ar]"), "g"));
711 assert(s == "StRAp A Rocket engine on A chicken.");
712 }
713 debug(std_regex_test) writeln("!!! Replace test done "~matchFn.stringof~" !!!");
714 }
715 test!(bmatch)();
716 test!(match)();
717 }
718
719 // tests for splitter
720 @safe unittest
721 {
722 import std.algorithm.comparison : equal;
723 auto s1 = ", abc, de, fg, hi, ";
724 auto sp1 = splitter(s1, regex(", *"));
725 auto w1 = ["", "abc", "de", "fg", "hi", ""];
726 assert(equal(sp1, w1));
727
728 auto s2 = ", abc, de, fg, hi";
729 auto sp2 = splitter(s2, regex(", *"));
730 auto w2 = ["", "abc", "de", "fg", "hi"];
731
732 uint cnt;
733 foreach (e; sp2)
734 {
735 assert(w2[cnt++] == e);
736 }
737 assert(equal(sp2, w2));
738 }
739
740 @safe unittest
741 {
742 char[] s1 = ", abc, de, fg, hi, ".dup;
743 auto sp2 = splitter(s1, regex(", *"));
744 }
745
746 @safe unittest
747 {
748 import std.algorithm.comparison : equal;
749 auto s1 = ", abc, de, fg, hi, ";
750 auto w1 = ["", "abc", "de", "fg", "hi", ""];
751 assert(equal(split(s1, regex(", *")), w1[]));
752 }
753
754 @safe unittest
755 { // bugzilla 7141
756 string pattern = `[a\--b]`;
757 assert(match("-", pattern));
758 assert(match("b", pattern));
759 string pattern2 = `[&-z]`;
760 assert(match("b", pattern2));
761 }
762 @safe unittest
763 {//bugzilla 7111
764 assert(match("", regex("^")));
765 }
766 @safe unittest
767 {//bugzilla 7300
768 assert(!match("a"d, "aa"d));
769 }
770
771 // bugzilla 7551
772 @safe unittest
773 {
774 auto r = regex("[]abc]*");
775 assert("]ab".matchFirst(r).hit == "]ab");
776 assertThrown(regex("[]"));
777 auto r2 = regex("[]abc--ab]*");
778 assert("]ac".matchFirst(r2).hit == "]");
779 }
780
781 @safe unittest
782 {//bugzilla 7674
783 assert("1234".replace(regex("^"), "$$") == "$1234");
784 assert("hello?".replace(regex(r"\?", "g"), r"\?") == r"hello\?");
785 assert("hello?".replace(regex(r"\?", "g"), r"\\?") != r"hello\?");
786 }
787 @safe unittest
788 {// bugzilla 7679
789 import std.algorithm.comparison : equal;
790 foreach (S; AliasSeq!(string, wstring, dstring))
791 (){ // avoid slow optimizations for large functions @@@BUG@@@ 2396
792 enum re = ctRegex!(to!S(r"\."));
793 auto str = to!S("a.b");
794 assert(equal(std.regex.splitter(str, re), [to!S("a"), to!S("b")]));
795 assert(split(str, re) == [to!S("a"), to!S("b")]);
796 }();
797 }
798 @safe unittest
799 {//bugzilla 8203
800 string data = "
801 NAME = XPAW01_STA:STATION
802 NAME = XPAW01_STA
803 ";
804 auto uniFileOld = data;
805 auto r = regex(
806 r"^NAME = (?P<comp>[a-zA-Z0-9_]+):*(?P<blk>[a-zA-Z0-9_]*)","gm");
807 auto uniCapturesNew = match(uniFileOld, r);
808 for (int i = 0; i < 20; i++)
809 foreach (matchNew; uniCapturesNew) {}
810 //a second issue with same symptoms
811 auto r2 = regex(`([а-яА-Я\-_]+\s*)+(?<=[\s\.,\^])`);
812 match("аллея Театральная", r2);
813 }
814 @safe unittest
815 {// bugzilla 8637 purity of enforce
816 auto m = match("hello world", regex("world"));
817 enforce(m);
818 }
819
820 // bugzilla 8725
821 @safe unittest
822 {
823 static italic = regex( r"\*
824 (?!\s+)
825 (.*?)
826 (?!\s+)
827 \*", "gx" );
828 string input = "this * is* interesting, *very* interesting";
829 assert(replace(input, italic, "<i>$1</i>") ==
830 "this * is* interesting, <i>very</i> interesting");
831 }
832
833 // bugzilla 8349
834 @safe unittest
835 {
836 enum peakRegexStr = r"\>(wgEncode.*Tfbs.*\.(?:narrow)|(?:broad)Peak.gz)</a>";
837 enum peakRegex = ctRegex!(peakRegexStr);
838 //note that the regex pattern itself is probably bogus
839 assert(match(r"\>wgEncode-blah-Tfbs.narrow</a>", peakRegex));
840 }
841
842 // bugzilla 9211
843 @safe unittest
844 {
845 import std.algorithm.comparison : equal;
846 auto rx_1 = regex(r"^(\w)*(\d)");
847 auto m = match("1234", rx_1);
848 assert(equal(m.front, ["1234", "3", "4"]));
849 auto rx_2 = regex(r"^([0-9])*(\d)");
850 auto m2 = match("1234", rx_2);
851 assert(equal(m2.front, ["1234", "3", "4"]));
852 }
853
854 // bugzilla 9280
855 @safe unittest
856 {
857 string tomatch = "a!b@c";
858 static r = regex(r"^(?P<nick>.*?)!(?P<ident>.*?)@(?P<host>.*?)$");
859 auto nm = match(tomatch, r);
860 assert(nm);
861 auto c = nm.captures;
862 assert(c[1] == "a");
863 assert(c["nick"] == "a");
864 }
865
866
867 // bugzilla 9579
868 @safe unittest
869 {
870 char[] input = ['a', 'b', 'c'];
871 string format = "($1)";
872 // used to give a compile error:
873 auto re = regex(`(a)`, "g");
874 auto r = replace(input, re, format);
875 assert(r == "(a)bc");
876 }
877
878 // bugzilla 9634
879 @safe unittest
880 {
881 auto re = ctRegex!"(?:a+)";
882 assert(match("aaaa", re).hit == "aaaa");
883 }
884
885 //bugzilla 10798
886 @safe unittest
887 {
888 auto cr = ctRegex!("[abcd--c]*");
889 auto m = "abc".match(cr);
890 assert(m);
891 assert(m.hit == "ab");
892 }
893
894 // bugzilla 10913
895 @system unittest
896 {
897 @system static string foo(const(char)[] s)
898 {
899 return s.dup;
900 }
901 @safe static string bar(const(char)[] s)
902 {
903 return s.dup;
904 }
905 () @system {
906 replace!((a) => foo(a.hit))("blah", regex(`a`));
907 }();
908 () @safe {
909 replace!((a) => bar(a.hit))("blah", regex(`a`));
910 }();
911 }
912
913 // bugzilla 11262
914 @safe unittest
915 {
916 enum reg = ctRegex!(r",", "g");
917 auto str = "This,List";
918 str = str.replace(reg, "-");
919 assert(str == "This-List");
920 }
921
922 // bugzilla 11775
923 @safe unittest
924 {
925 assert(collectException(regex("a{1,0}")));
926 }
927
928 // bugzilla 11839
929 @safe unittest
930 {
931 import std.algorithm.comparison : equal;
932 assert(regex(`(?P<var1>\w+)`).namedCaptures.equal(["var1"]));
933 assert(collectException(regex(`(?P<1>\w+)`)));
934 assert(regex(`(?P<v1>\w+)`).namedCaptures.equal(["v1"]));
935 assert(regex(`(?P<__>\w+)`).namedCaptures.equal(["__"]));
936 assert(regex(`(?P<я>\w+)`).namedCaptures.equal(["я"]));
937 }
938
939 // bugzilla 12076
940 @safe unittest
941 {
942 auto RE = ctRegex!(r"(?<!x[a-z]+)\s([a-z]+)");
943 string s = "one two";
944 auto m = match(s, RE);
945 }
946
947 // bugzilla 12105
948 @safe unittest
949 {
950 auto r = ctRegex!`.*?(?!a)`;
951 assert("aaab".matchFirst(r).hit == "aaa");
952 auto r2 = ctRegex!`.*(?!a)`;
953 assert("aaab".matchFirst(r2).hit == "aaab");
954 }
955
956 //bugzilla 11784
957 @safe unittest
958 {
959 assert("abcdefghijklmnopqrstuvwxyz"
960 .matchFirst("[a-z&&[^aeiuo]]").hit == "b");
961 }
962
963 //bugzilla 12366
964 @safe unittest
965 {
966 auto re = ctRegex!(`^((?=(xx+?)\2+$)((?=\2+$)(?=(x+)(\4+$))\5){2})*x?$`);
967 assert("xxxxxxxx".match(re).empty);
968 assert(!"xxxx".match(re).empty);
969 }
970
971 // bugzilla 12582
972 @safe unittest
973 {
974 auto r = regex(`(?P<a>abc)`);
975 assert(collectException("abc".matchFirst(r)["b"]));
976 }
977
978 // bugzilla 12691
979 @safe unittest
980 {
981 assert(bmatch("e@", "^([a-z]|)*$").empty);
982 assert(bmatch("e@", ctRegex!`^([a-z]|)*$`).empty);
983 }
984
985 //bugzilla 12713
986 @safe unittest
987 {
988 assertThrown(regex("[[a-z]([a-z]|(([[a-z])))"));
989 }
990
991 //bugzilla 12747
992 @safe unittest
993 {
994 assertThrown(regex(`^x(\1)`));
995 assertThrown(regex(`^(x(\1))`));
996 assertThrown(regex(`^((x)(?=\1))`));
997 }
998
999 // bugzilla 14504
1000 @safe unittest
1001 {
1002 auto p = ctRegex!("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?" ~
1003 "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1004 }
1005
1006 // bugzilla 14529
1007 @safe unittest
1008 {
1009 auto ctPat2 = regex(r"^[CDF]$", "i");
1010 foreach (v; ["C", "c", "D", "d", "F", "f"])
1011 assert(matchAll(v, ctPat2).front.hit == v);
1012 }
1013
1014 // bugzilla 14615
1015 @safe unittest
1016 {
1017 import std.array : appender;
1018 import std.regex : replaceFirst, replaceFirstInto, regex;
1019 import std.stdio : writeln;
1020
1021 auto example = "Hello, world!";
1022 auto pattern = regex("^Hello, (bug)"); // won't find this one
1023 auto result = replaceFirst(example, pattern, "$1 Sponge Bob");
1024 assert(result == "Hello, world!"); // Ok.
1025
1026 auto sink = appender!string;
1027 replaceFirstInto(sink, example, pattern, "$1 Sponge Bob");
1028 assert(sink.data == "Hello, world!");
1029 replaceAllInto(sink, example, pattern, "$1 Sponge Bob");
1030 assert(sink.data == "Hello, world!Hello, world!");
1031 }
1032
1033 // bugzilla 15573
1034 @safe unittest
1035 {
1036 auto rx = regex("[c d]", "x");
1037 assert("a b".matchFirst(rx));
1038 }
1039
1040 // bugzilla 15864
1041 @safe unittest
1042 {
1043 regex(`(<a (?:(?:\w+=\"[^"]*\")?\s*)*href="\.\.?)"`);
1044 }
1045
1046 @safe unittest
1047 {
1048 auto r = regex("(?# comment)abc(?# comment2)");
1049 assert("abc".matchFirst(r));
1050 assertThrown(regex("(?#..."));
1051 }
1052
1053 // bugzilla 17075
1054 @safe unittest
1055 {
1056 enum titlePattern = `<title>(.+)</title>`;
1057 static titleRegex = ctRegex!titlePattern;
1058 string input = "<title>" ~ "<".repeat(100_000).join;
1059 assert(input.matchFirst(titleRegex).empty);
1060 }
1061
1062 // bugzilla 17212
1063 @safe unittest
1064 {
1065 auto r = regex(" [a] ", "x");
1066 assert("a".matchFirst(r));
1067 }
1068
1069 // bugzilla 17157
1070 @safe unittest
1071 {
1072 import std.algorithm.comparison : equal;
1073 auto ctr = ctRegex!"(a)|(b)|(c)|(d)";
1074 auto r = regex("(a)|(b)|(c)|(d)", "g");
1075 auto s = "--a--b--c--d--";
1076 auto outcomes = [
1077 ["a", "a", "", "", ""],
1078 ["b", "", "b", "", ""],
1079 ["c", "", "", "c", ""],
1080 ["d", "", "", "", "d"]
1081 ];
1082 assert(equal!equal(s.matchAll(ctr), outcomes));
1083 assert(equal!equal(s.bmatch(r), outcomes));
1084 }
1085
1086 // bugzilla 17667
1087 @safe unittest
1088 {
1089 import std.algorithm.searching : canFind;
1090 void willThrow(T, size_t line = __LINE__)(T arg, string msg)
1091 {
1092 auto e = collectException(regex(arg));
1093 assert(e.msg.canFind(msg), to!string(line) ~ ": " ~ e.msg);
1094 }
1095 willThrow([r".", r"[\(\{[\]\}\)]"], "no matching ']' found while parsing character class");
1096 willThrow([r"[\", r"123"], "no matching ']' found while parsing character class");
1097 willThrow([r"[a-", r"123"], "no matching ']' found while parsing character class");
1098 willThrow([r"[a-\", r"123"], "invalid escape sequence");
1099 willThrow([r"\", r"123"], "invalid escape sequence");
1100 }
1101
1102 // bugzilla 17668
1103 @safe unittest
1104 {
1105 import std.algorithm.searching;
1106 auto e = collectException!RegexException(regex(q"<[^]>"));
1107 assert(e.msg.canFind("no operand for '^'"));
1108 }
1109
1110 // bugzilla 17673
1111 @safe unittest
1112 {
1113 string str = `<">`;
1114 string[] regexps = ["abc", "\"|x"];
1115 auto regexp = regex(regexps);
1116 auto c = matchFirst(str, regexp);
1117 assert(c);
1118 assert(c.whichPattern == 2);
1119 }
1120