]> git.ipfire.org Git - people/ms/putty.git/blob - wildcard.c
Add search to connection list box.
[people/ms/putty.git] / wildcard.c
1 /*
2 * Wildcard matching engine for use with SFTP-based file transfer
3 * programs (PSFTP, new-look PSCP): since SFTP has no notion of
4 * getting the remote side to do globbing (and rightly so) we have
5 * to do it locally, by retrieving all the filenames in a directory
6 * and checking each against the wildcard pattern.
7 */
8
9 #include <assert.h>
10 #include <stdlib.h>
11 #include <string.h>
12
13 #include "putty.h"
14
15 /*
16 * Definition of wildcard syntax:
17 *
18 * - * matches any sequence of characters, including zero.
19 * - ? matches exactly one character which can be anything.
20 * - [abc] matches exactly one character which is a, b or c.
21 * - [a-f] matches anything from a through f.
22 * - [^a-f] matches anything _except_ a through f.
23 * - [-_] matches - or _; [^-_] matches anything else. (The - is
24 * non-special if it occurs immediately after the opening
25 * bracket or ^.)
26 * - [a^] matches an a or a ^. (The ^ is non-special if it does
27 * _not_ occur immediately after the opening bracket.)
28 * - \*, \?, \[, \], \\ match the single characters *, ?, [, ], \.
29 * - All other characters are non-special and match themselves.
30 */
31
32 /*
33 * Some notes on differences from POSIX globs (IEEE Std 1003.1, 2003 ed.):
34 * - backslashes act as escapes even within [] bracket expressions
35 * - does not support [!...] for non-matching list (POSIX are weird);
36 * NB POSIX allows [^...] as well via "A bracket expression starting
37 * with an unquoted circumflex character produces unspecified
38 * results". If we wanted to allow [!...] we might want to define
39 * [^!] as having its literal meaning (match '^' or '!').
40 * - none of the scary [[:class:]] stuff, etc
41 */
42
43 /*
44 * The wildcard matching technique we use is very simple and
45 * potentially O(N^2) in running time, but I don't anticipate it
46 * being that bad in reality (particularly since N will be the size
47 * of a filename, which isn't all that much). Perhaps one day, once
48 * PuTTY has grown a regexp matcher for some other reason, I might
49 * come back and reimplement wildcards by translating them into
50 * regexps or directly into NFAs; but for the moment, in the
51 * absence of any other need for the NFA->DFA translation engine,
52 * anything more than the simplest possible wildcard matcher is
53 * vast code-size overkill.
54 *
55 * Essentially, these wildcards are much simpler than regexps in
56 * that they consist of a sequence of rigid fragments (? and [...]
57 * can never match more or less than one character) separated by
58 * asterisks. It is therefore extremely simple to look at a rigid
59 * fragment and determine whether or not it begins at a particular
60 * point in the test string; so we can search along the string
61 * until we find each fragment, then search for the next. As long
62 * as we find each fragment in the _first_ place it occurs, there
63 * will never be a danger of having to backpedal and try to find it
64 * again somewhere else.
65 */
66
67 enum {
68 WC_TRAILINGBACKSLASH = 1,
69 WC_UNCLOSEDCLASS,
70 WC_INVALIDRANGE
71 };
72
73 /*
74 * Error reporting is done by returning various negative values
75 * from the wildcard routines. Passing any such value to wc_error
76 * will give a human-readable message.
77 */
78 const char *wc_error(int value)
79 {
80 value = abs(value);
81 switch (value) {
82 case WC_TRAILINGBACKSLASH:
83 return "'\' occurred at end of string (expected another character)";
84 case WC_UNCLOSEDCLASS:
85 return "expected ']' to close character class";
86 case WC_INVALIDRANGE:
87 return "character range was not terminated (']' just after '-')";
88 }
89 return "INTERNAL ERROR: unrecognised wildcard error number";
90 }
91
92 /*
93 * This is the routine that tests a target string to see if an
94 * initial substring of it matches a fragment. If successful, it
95 * returns 1, and advances both `fragment' and `target' past the
96 * fragment and matching substring respectively. If unsuccessful it
97 * returns zero. If the wildcard fragment suffers a syntax error,
98 * it returns <0 and the precise value indexes into wc_error.
99 */
100 static int wc_match_fragment(const char **fragment, const char **target)
101 {
102 const char *f, *t;
103
104 f = *fragment;
105 t = *target;
106 /*
107 * The fragment terminates at either the end of the string, or
108 * the first (unescaped) *.
109 */
110 while (*f && *f != '*' && *t) {
111 /*
112 * Extract one character from t, and one character's worth
113 * of pattern from f, and step along both. Return 0 if they
114 * fail to match.
115 */
116 if (*f == '\\') {
117 /*
118 * Backslash, which means f[1] is to be treated as a
119 * literal character no matter what it is. It may not
120 * be the end of the string.
121 */
122 if (!f[1])
123 return -WC_TRAILINGBACKSLASH; /* error */
124 if (f[1] != *t)
125 return 0; /* failed to match */
126 f += 2;
127 } else if (*f == '?') {
128 /*
129 * Question mark matches anything.
130 */
131 f++;
132 } else if (*f == '[') {
133 int invert = 0;
134 int matched = 0;
135 /*
136 * Open bracket introduces a character class.
137 */
138 f++;
139 if (*f == '^') {
140 invert = 1;
141 f++;
142 }
143 while (*f != ']') {
144 if (*f == '\\')
145 f++; /* backslashes still work */
146 if (!*f)
147 return -WC_UNCLOSEDCLASS; /* error again */
148 if (f[1] == '-') {
149 int lower, upper, ourchr;
150 lower = (unsigned char) *f++;
151 f++; /* eat the minus */
152 if (*f == ']')
153 return -WC_INVALIDRANGE; /* different error! */
154 if (*f == '\\')
155 f++; /* backslashes _still_ work */
156 if (!*f)
157 return -WC_UNCLOSEDCLASS; /* error again */
158 upper = (unsigned char) *f++;
159 ourchr = (unsigned char) *t;
160 if (lower > upper) {
161 int t = lower; lower = upper; upper = t;
162 }
163 if (ourchr >= lower && ourchr <= upper)
164 matched = 1;
165 } else {
166 matched |= (*t == *f++);
167 }
168 }
169 if (invert == matched)
170 return 0; /* failed to match character class */
171 f++; /* eat the ] */
172 } else {
173 /*
174 * Non-special character matches itself.
175 */
176 if (*f != *t)
177 return 0;
178 f++;
179 }
180 /*
181 * Now we've done that, increment t past the character we
182 * matched.
183 */
184 t++;
185 }
186 if (!*f || *f == '*') {
187 /*
188 * We have reached the end of f without finding a mismatch;
189 * so we're done. Update the caller pointers and return 1.
190 */
191 *fragment = f;
192 *target = t;
193 return 1;
194 }
195 /*
196 * Otherwise, we must have reached the end of t before we
197 * reached the end of f; so we've failed. Return 0.
198 */
199 return 0;
200 }
201
202 /*
203 * This is the real wildcard matching routine. It returns 1 for a
204 * successful match, 0 for an unsuccessful match, and <0 for a
205 * syntax error in the wildcard.
206 */
207 int wc_match(const char *wildcard, const char *target)
208 {
209 int ret;
210
211 /*
212 * Every time we see a '*' _followed_ by a fragment, we just
213 * search along the string for a location at which the fragment
214 * matches. The only special case is when we see a fragment
215 * right at the start, in which case we just call the matching
216 * routine once and give up if it fails.
217 */
218 if (*wildcard != '*') {
219 ret = wc_match_fragment(&wildcard, &target);
220 if (ret <= 0)
221 return ret; /* pass back failure or error alike */
222 }
223
224 while (*wildcard) {
225 assert(*wildcard == '*');
226 while (*wildcard == '*')
227 wildcard++;
228
229 /*
230 * It's possible we've just hit the end of the wildcard
231 * after seeing a *, in which case there's no need to
232 * bother searching any more because we've won.
233 */
234 if (!*wildcard)
235 return 1;
236
237 /*
238 * Now `wildcard' points at the next fragment. So we
239 * attempt to match it against `target', and if that fails
240 * we increment `target' and try again, and so on. When we
241 * find we're about to try matching against the empty
242 * string, we give up and return 0.
243 */
244 ret = 0;
245 while (*target) {
246 const char *save_w = wildcard, *save_t = target;
247
248 ret = wc_match_fragment(&wildcard, &target);
249
250 if (ret < 0)
251 return ret; /* syntax error */
252
253 if (ret > 0 && !*wildcard && *target) {
254 /*
255 * Final special case - literally.
256 *
257 * This situation arises when we are matching a
258 * _terminal_ fragment of the wildcard (that is,
259 * there is nothing after it, e.g. "*a"), and it
260 * has matched _too early_. For example, matching
261 * "*a" against "parka" will match the "a" fragment
262 * against the _first_ a, and then (if it weren't
263 * for this special case) matching would fail
264 * because we're at the end of the wildcard but not
265 * at the end of the target string.
266 *
267 * In this case what we must do is measure the
268 * length of the fragment in the target (which is
269 * why we saved `target'), jump straight to that
270 * distance from the end of the string using
271 * strlen, and match the same fragment again there
272 * (which is why we saved `wildcard'). Then we
273 * return whatever that operation returns.
274 */
275 target = save_t + strlen(save_t) - (target - save_t);
276 wildcard = save_w;
277 return wc_match_fragment(&wildcard, &target);
278 }
279
280 if (ret > 0)
281 break;
282 target++;
283 }
284 if (ret > 0)
285 continue;
286 return 0;
287 }
288
289 /*
290 * If we reach here, it must be because we successfully matched
291 * a fragment and then found ourselves right at the end of the
292 * wildcard. Hence, we return 1 if and only if we are also
293 * right at the end of the target.
294 */
295 return (*target ? 0 : 1);
296 }
297
298 /*
299 * Another utility routine that translates a non-wildcard string
300 * into its raw equivalent by removing any escaping backslashes.
301 * Expects a target string buffer of anything up to the length of
302 * the original wildcard. You can also pass NULL as the output
303 * buffer if you're only interested in the return value.
304 *
305 * Returns 1 on success, or 0 if a wildcard character was
306 * encountered. In the latter case the output string MAY not be
307 * zero-terminated and you should not use it for anything!
308 */
309 int wc_unescape(char *output, const char *wildcard)
310 {
311 while (*wildcard) {
312 if (*wildcard == '\\') {
313 wildcard++;
314 /* We are lenient about trailing backslashes in non-wildcards. */
315 if (*wildcard) {
316 if (output)
317 *output++ = *wildcard;
318 wildcard++;
319 }
320 } else if (*wildcard == '*' || *wildcard == '?' ||
321 *wildcard == '[' || *wildcard == ']') {
322 return 0; /* it's a wildcard! */
323 } else {
324 if (output)
325 *output++ = *wildcard;
326 wildcard++;
327 }
328 }
329 *output = '\0';
330 return 1; /* it's clean */
331 }
332
333 #ifdef TESTMODE
334
335 struct test {
336 const char *wildcard;
337 const char *target;
338 int expected_result;
339 };
340
341 const struct test fragment_tests[] = {
342 /*
343 * We exhaustively unit-test the fragment matching routine
344 * itself, which should save us the need to test all its
345 * intricacies during the full wildcard tests.
346 */
347 {"abc", "abc", 1},
348 {"abc", "abd", 0},
349 {"abc", "abcd", 1},
350 {"abcd", "abc", 0},
351 {"ab[cd]", "abc", 1},
352 {"ab[cd]", "abd", 1},
353 {"ab[cd]", "abe", 0},
354 {"ab[^cd]", "abc", 0},
355 {"ab[^cd]", "abd", 0},
356 {"ab[^cd]", "abe", 1},
357 {"ab\\", "abc", -WC_TRAILINGBACKSLASH},
358 {"ab\\*", "ab*", 1},
359 {"ab\\?", "ab*", 0},
360 {"ab?", "abc", 1},
361 {"ab?", "ab", 0},
362 {"ab[", "abc", -WC_UNCLOSEDCLASS},
363 {"ab[c-", "abb", -WC_UNCLOSEDCLASS},
364 {"ab[c-]", "abb", -WC_INVALIDRANGE},
365 {"ab[c-e]", "abb", 0},
366 {"ab[c-e]", "abc", 1},
367 {"ab[c-e]", "abd", 1},
368 {"ab[c-e]", "abe", 1},
369 {"ab[c-e]", "abf", 0},
370 {"ab[e-c]", "abb", 0},
371 {"ab[e-c]", "abc", 1},
372 {"ab[e-c]", "abd", 1},
373 {"ab[e-c]", "abe", 1},
374 {"ab[e-c]", "abf", 0},
375 {"ab[^c-e]", "abb", 1},
376 {"ab[^c-e]", "abc", 0},
377 {"ab[^c-e]", "abd", 0},
378 {"ab[^c-e]", "abe", 0},
379 {"ab[^c-e]", "abf", 1},
380 {"ab[^e-c]", "abb", 1},
381 {"ab[^e-c]", "abc", 0},
382 {"ab[^e-c]", "abd", 0},
383 {"ab[^e-c]", "abe", 0},
384 {"ab[^e-c]", "abf", 1},
385 {"ab[a^]", "aba", 1},
386 {"ab[a^]", "ab^", 1},
387 {"ab[a^]", "abb", 0},
388 {"ab[^a^]", "aba", 0},
389 {"ab[^a^]", "ab^", 0},
390 {"ab[^a^]", "abb", 1},
391 {"ab[-c]", "ab-", 1},
392 {"ab[-c]", "abc", 1},
393 {"ab[-c]", "abd", 0},
394 {"ab[^-c]", "ab-", 0},
395 {"ab[^-c]", "abc", 0},
396 {"ab[^-c]", "abd", 1},
397 {"ab[\\[-\\]]", "abZ", 0},
398 {"ab[\\[-\\]]", "ab[", 1},
399 {"ab[\\[-\\]]", "ab\\", 1},
400 {"ab[\\[-\\]]", "ab]", 1},
401 {"ab[\\[-\\]]", "ab^", 0},
402 {"ab[^\\[-\\]]", "abZ", 1},
403 {"ab[^\\[-\\]]", "ab[", 0},
404 {"ab[^\\[-\\]]", "ab\\", 0},
405 {"ab[^\\[-\\]]", "ab]", 0},
406 {"ab[^\\[-\\]]", "ab^", 1},
407 {"ab[a-fA-F]", "aba", 1},
408 {"ab[a-fA-F]", "abF", 1},
409 {"ab[a-fA-F]", "abZ", 0},
410 };
411
412 const struct test full_tests[] = {
413 {"a", "argh", 0},
414 {"a", "ba", 0},
415 {"a", "a", 1},
416 {"a*", "aardvark", 1},
417 {"a*", "badger", 0},
418 {"*a", "park", 0},
419 {"*a", "pArka", 1},
420 {"*a", "parka", 1},
421 {"*a*", "park", 1},
422 {"*a*", "perk", 0},
423 {"?b*r?", "abracadabra", 1},
424 {"?b*r?", "abracadabr", 0},
425 {"?b*r?", "abracadabzr", 0},
426 };
427
428 int main(void)
429 {
430 int i;
431 int fails, passes;
432
433 fails = passes = 0;
434
435 for (i = 0; i < sizeof(fragment_tests)/sizeof(*fragment_tests); i++) {
436 const char *f, *t;
437 int eret, aret;
438 f = fragment_tests[i].wildcard;
439 t = fragment_tests[i].target;
440 eret = fragment_tests[i].expected_result;
441 aret = wc_match_fragment(&f, &t);
442 if (aret != eret) {
443 printf("failed test: /%s/ against /%s/ returned %d not %d\n",
444 fragment_tests[i].wildcard, fragment_tests[i].target,
445 aret, eret);
446 fails++;
447 } else
448 passes++;
449 }
450
451 for (i = 0; i < sizeof(full_tests)/sizeof(*full_tests); i++) {
452 const char *f, *t;
453 int eret, aret;
454 f = full_tests[i].wildcard;
455 t = full_tests[i].target;
456 eret = full_tests[i].expected_result;
457 aret = wc_match(f, t);
458 if (aret != eret) {
459 printf("failed test: /%s/ against /%s/ returned %d not %d\n",
460 full_tests[i].wildcard, full_tests[i].target,
461 aret, eret);
462 fails++;
463 } else
464 passes++;
465 }
466
467 printf("passed %d, failed %d\n", passes, fails);
468
469 return 0;
470 }
471
472 #endif