[people/ms/putty.git] / wildcard.c

/*
 * Wildcard matching engine for use with SFTP-based file transfer
 * programs (PSFTP, new-look PSCP): since SFTP has no notion of
 * getting the remote side to do globbing (and rightly so) we have
 * to do it locally, by retrieving all the filenames in a directory
 * and checking each against the wildcard pattern.
 */

#include <assert.h>
#include <stdlib.h>
#include <string.h>

#include "putty.h"

/*
 * Definition of wildcard syntax:
 * 
 *  - * matches any sequence of characters, including zero.
 *  - ? matches exactly one character which can be anything.
 *  - [abc] matches exactly one character which is a, b or c.
 *  - [a-f] matches anything from a through f.
 *  - [^a-f] matches anything _except_ a through f.
 *  - [-_] matches - or _; [^-_] matches anything else. (The - is
 *    non-special if it occurs immediately after the opening
 *    bracket or ^.)
 *  - [a^] matches an a or a ^. (The ^ is non-special if it does
 *    _not_ occur immediately after the opening bracket.)
 *  - \*, \?, \[, \], \\ match the single characters *, ?, [, ], \.
 *  - All other characters are non-special and match themselves.
 */

/*
 * Some notes on differences from POSIX globs (IEEE Std 1003.1, 2003 ed.):
 *  - backslashes act as escapes even within [] bracket expressions
 *  - does not support [!...] for non-matching list (POSIX are weird);
 *    NB POSIX allows [^...] as well via "A bracket expression starting
 *    with an unquoted circumflex character produces unspecified
 *    results". If we wanted to allow [!...] we might want to define
 *    [^!] as having its literal meaning (match '^' or '!').
 *  - none of the scary [[:class:]] stuff, etc
 */

/*
 * The wildcard matching technique we use is very simple and
 * potentially O(N^2) in running time, but I don't anticipate it
 * being that bad in reality (particularly since N will be the size
 * of a filename, which isn't all that much). Perhaps one day, once
 * PuTTY has grown a regexp matcher for some other reason, I might
 * come back and reimplement wildcards by translating them into
 * regexps or directly into NFAs; but for the moment, in the
 * absence of any other need for the NFA->DFA translation engine,
 * anything more than the simplest possible wildcard matcher is
 * vast code-size overkill.
 * 
 * Essentially, these wildcards are much simpler than regexps in
 * that they consist of a sequence of rigid fragments (? and [...]
 * can never match more or less than one character) separated by
 * asterisks. It is therefore extremely simple to look at a rigid
 * fragment and determine whether or not it begins at a particular
 * point in the test string; so we can search along the string
 * until we find each fragment, then search for the next. As long
 * as we find each fragment in the _first_ place it occurs, there
 * will never be a danger of having to backpedal and try to find it
 * again somewhere else.
 */

enum {
    WC_TRAILINGBACKSLASH = 1,
    WC_UNCLOSEDCLASS,
    WC_INVALIDRANGE
};

/*
 * Error reporting is done by returning various negative values
 * from the wildcard routines. Passing any such value to wc_error
 * will give a human-readable message.
 */
const char *wc_error(int value)
{
    value = abs(value);
    switch (value) {
      case WC_TRAILINGBACKSLASH:
	return "'\' occurred at end of string (expected another character)";
      case WC_UNCLOSEDCLASS:
	return "expected ']' to close character class";
      case WC_INVALIDRANGE:
	return "character range was not terminated (']' just after '-')";
    }
    return "INTERNAL ERROR: unrecognised wildcard error number";
}

/*
 * This is the routine that tests a target string to see if an
 * initial substring of it matches a fragment. If successful, it
 * returns 1, and advances both `fragment' and `target' past the
 * fragment and matching substring respectively. If unsuccessful it
 * returns zero. If the wildcard fragment suffers a syntax error,
 * it returns <0 and the precise value indexes into wc_error.
 */
static int wc_match_fragment(const char **fragment, const char **target)
{
    const char *f, *t;

    f = *fragment;
    t = *target;
    /*
     * The fragment terminates at either the end of the string, or
     * the first (unescaped) *.
     */
    while (*f && *f != '*' && *t) {
	/*
	 * Extract one character from t, and one character's worth
	 * of pattern from f, and step along both. Return 0 if they
	 * fail to match.
	 */
	if (*f == '\\') {
	    /*
	     * Backslash, which means f[1] is to be treated as a
	     * literal character no matter what it is. It may not
	     * be the end of the string.
	     */
	    if (!f[1])
		return -WC_TRAILINGBACKSLASH;   /* error */
	    if (f[1] != *t)
		return 0;	       /* failed to match */
	    f += 2;
	} else if (*f == '?') {
	    /*
	     * Question mark matches anything.
	     */
	    f++;
	} else if (*f == '[') {
	    int invert = 0;
	    int matched = 0;
	    /*
	     * Open bracket introduces a character class.
	     */
	    f++;
	    if (*f == '^') {
		invert = 1;
		f++;
	    }
	    while (*f != ']') {
		if (*f == '\\')
		    f++;	       /* backslashes still work */
		if (!*f)
		    return -WC_UNCLOSEDCLASS;   /* error again */
		if (f[1] == '-') {
		    int lower, upper, ourchr;
		    lower = (unsigned char) *f++;
		    f++;	       /* eat the minus */
		    if (*f == ']')
			return -WC_INVALIDRANGE;   /* different error! */
		    if (*f == '\\')
			f++;	       /* backslashes _still_ work */
		    if (!*f)
			return -WC_UNCLOSEDCLASS;   /* error again */
		    upper = (unsigned char) *f++;
		    ourchr = (unsigned char) *t;
		    if (lower > upper) {
			int t = lower; lower = upper; upper = t;
		    }
		    if (ourchr >= lower && ourchr <= upper)
			matched = 1;
		} else {
		    matched |= (*t == *f++);
		}
	    }
	    if (invert == matched)
		return 0;	       /* failed to match character class */
	    f++;		       /* eat the ] */
	} else {
	    /*
	     * Non-special character matches itself.
	     */
	    if (*f != *t)
		return 0;
	    f++;
	}
	/*
	 * Now we've done that, increment t past the character we
	 * matched.
	 */
	t++;
    }
    if (!*f || *f == '*') {
	/*
	 * We have reached the end of f without finding a mismatch;
	 * so we're done. Update the caller pointers and return 1.
	 */
	*fragment = f;
	*target = t;
	return 1;
    }
    /*
     * Otherwise, we must have reached the end of t before we
     * reached the end of f; so we've failed. Return 0. 
     */
    return 0;
}

/*
 * This is the real wildcard matching routine. It returns 1 for a
 * successful match, 0 for an unsuccessful match, and <0 for a
 * syntax error in the wildcard.
 */
int wc_match(const char *wildcard, const char *target)
{
    int ret;

    /*
     * Every time we see a '*' _followed_ by a fragment, we just
     * search along the string for a location at which the fragment
     * matches. The only special case is when we see a fragment
     * right at the start, in which case we just call the matching
     * routine once and give up if it fails.
     */
    if (*wildcard != '*') {
	ret = wc_match_fragment(&wildcard, &target);
	if (ret <= 0)
	    return ret;		       /* pass back failure or error alike */
    }

    while (*wildcard) {
	assert(*wildcard == '*');
	while (*wildcard == '*')
	    wildcard++;

	/*
	 * It's possible we've just hit the end of the wildcard
	 * after seeing a *, in which case there's no need to
	 * bother searching any more because we've won.
	 */
	if (!*wildcard)
	    return 1;

	/*
	 * Now `wildcard' points at the next fragment. So we
	 * attempt to match it against `target', and if that fails
	 * we increment `target' and try again, and so on. When we
	 * find we're about to try matching against the empty
	 * string, we give up and return 0.
	 */
	ret = 0;
	while (*target) {
	    const char *save_w = wildcard, *save_t = target;

	    ret = wc_match_fragment(&wildcard, &target);

	    if (ret < 0)
		return ret;	       /* syntax error */

	    if (ret > 0 && !*wildcard && *target) {
		/*
		 * Final special case - literally.
		 * 
		 * This situation arises when we are matching a
		 * _terminal_ fragment of the wildcard (that is,
		 * there is nothing after it, e.g. "*a"), and it
		 * has matched _too early_. For example, matching
		 * "*a" against "parka" will match the "a" fragment
		 * against the _first_ a, and then (if it weren't
		 * for this special case) matching would fail
		 * because we're at the end of the wildcard but not
		 * at the end of the target string.
		 * 
		 * In this case what we must do is measure the
		 * length of the fragment in the target (which is
		 * why we saved `target'), jump straight to that
		 * distance from the end of the string using
		 * strlen, and match the same fragment again there
		 * (which is why we saved `wildcard'). Then we
		 * return whatever that operation returns.
		 */
		target = save_t + strlen(save_t) - (target - save_t);
		wildcard = save_w;
		return wc_match_fragment(&wildcard, &target);
	    }

	    if (ret > 0)
		break;
	    target++;
	}
	if (ret > 0)
	    continue;
	return 0;
    }

    /*
     * If we reach here, it must be because we successfully matched
     * a fragment and then found ourselves right at the end of the
     * wildcard. Hence, we return 1 if and only if we are also
     * right at the end of the target.
     */
    return (*target ? 0 : 1);
}

/*
 * Another utility routine that translates a non-wildcard string
 * into its raw equivalent by removing any escaping backslashes.
 * Expects a target string buffer of anything up to the length of
 * the original wildcard. You can also pass NULL as the output
 * buffer if you're only interested in the return value.
 * 
 * Returns 1 on success, or 0 if a wildcard character was
 * encountered. In the latter case the output string MAY not be
 * zero-terminated and you should not use it for anything!
 */
int wc_unescape(char *output, const char *wildcard)
{
    while (*wildcard) {
	if (*wildcard == '\\') {
	    wildcard++;
	    /* We are lenient about trailing backslashes in non-wildcards. */
	    if (*wildcard) {
		if (output)
		    *output++ = *wildcard;
		wildcard++;
	    }
	} else if (*wildcard == '*' || *wildcard == '?' ||
		   *wildcard == '[' || *wildcard == ']') {
	    return 0;		       /* it's a wildcard! */
	} else {
	    if (output)
		*output++ = *wildcard;
	    wildcard++;
	}
    }
    *output = '\0';
    return 1;			       /* it's clean */
}

#ifdef TESTMODE

struct test {
    const char *wildcard;
    const char *target;
    int expected_result;
};

const struct test fragment_tests[] = {
    /*
     * We exhaustively unit-test the fragment matching routine
     * itself, which should save us the need to test all its
     * intricacies during the full wildcard tests.
     */
    {"abc", "abc", 1},
    {"abc", "abd", 0},
    {"abc", "abcd", 1},
    {"abcd", "abc", 0},
    {"ab[cd]", "abc", 1},
    {"ab[cd]", "abd", 1},
    {"ab[cd]", "abe", 0},
    {"ab[^cd]", "abc", 0},
    {"ab[^cd]", "abd", 0},
    {"ab[^cd]", "abe", 1},
    {"ab\\", "abc", -WC_TRAILINGBACKSLASH},
    {"ab\\*", "ab*", 1},
    {"ab\\?", "ab*", 0},
    {"ab?", "abc", 1},
    {"ab?", "ab", 0},
    {"ab[", "abc", -WC_UNCLOSEDCLASS},
    {"ab[c-", "abb", -WC_UNCLOSEDCLASS},
    {"ab[c-]", "abb", -WC_INVALIDRANGE},
    {"ab[c-e]", "abb", 0},
    {"ab[c-e]", "abc", 1},
    {"ab[c-e]", "abd", 1},
    {"ab[c-e]", "abe", 1},
    {"ab[c-e]", "abf", 0},
    {"ab[e-c]", "abb", 0},
    {"ab[e-c]", "abc", 1},
    {"ab[e-c]", "abd", 1},
    {"ab[e-c]", "abe", 1},
    {"ab[e-c]", "abf", 0},
    {"ab[^c-e]", "abb", 1},
    {"ab[^c-e]", "abc", 0},
    {"ab[^c-e]", "abd", 0},
    {"ab[^c-e]", "abe", 0},
    {"ab[^c-e]", "abf", 1},
    {"ab[^e-c]", "abb", 1},
    {"ab[^e-c]", "abc", 0},
    {"ab[^e-c]", "abd", 0},
    {"ab[^e-c]", "abe", 0},
    {"ab[^e-c]", "abf", 1},
    {"ab[a^]", "aba", 1},
    {"ab[a^]", "ab^", 1},
    {"ab[a^]", "abb", 0},
    {"ab[^a^]", "aba", 0},
    {"ab[^a^]", "ab^", 0},
    {"ab[^a^]", "abb", 1},
    {"ab[-c]", "ab-", 1},
    {"ab[-c]", "abc", 1},
    {"ab[-c]", "abd", 0},
    {"ab[^-c]", "ab-", 0},
    {"ab[^-c]", "abc", 0},
    {"ab[^-c]", "abd", 1},
    {"ab[\\[-\\]]", "abZ", 0},
    {"ab[\\[-\\]]", "ab[", 1},
    {"ab[\\[-\\]]", "ab\\", 1},
    {"ab[\\[-\\]]", "ab]", 1},
    {"ab[\\[-\\]]", "ab^", 0},
    {"ab[^\\[-\\]]", "abZ", 1},
    {"ab[^\\[-\\]]", "ab[", 0},
    {"ab[^\\[-\\]]", "ab\\", 0},
    {"ab[^\\[-\\]]", "ab]", 0},
    {"ab[^\\[-\\]]", "ab^", 1},
    {"ab[a-fA-F]", "aba", 1},
    {"ab[a-fA-F]", "abF", 1},
    {"ab[a-fA-F]", "abZ", 0},
};

const struct test full_tests[] = {
    {"a", "argh", 0},
    {"a", "ba", 0},
    {"a", "a", 1},
    {"a*", "aardvark", 1},
    {"a*", "badger", 0},
    {"*a", "park", 0},
    {"*a", "pArka", 1},
    {"*a", "parka", 1},
    {"*a*", "park", 1},
    {"*a*", "perk", 0},
    {"?b*r?", "abracadabra", 1},
    {"?b*r?", "abracadabr", 0},
    {"?b*r?", "abracadabzr", 0},
};

int main(void)
{
    int i;
    int fails, passes;

    fails = passes = 0;

    for (i = 0; i < sizeof(fragment_tests)/sizeof(*fragment_tests); i++) {
	const char *f, *t;
	int eret, aret;
	f = fragment_tests[i].wildcard;
	t = fragment_tests[i].target;
	eret = fragment_tests[i].expected_result;
	aret = wc_match_fragment(&f, &t);
	if (aret != eret) {
	    printf("failed test: /%s/ against /%s/ returned %d not %d\n",
		   fragment_tests[i].wildcard, fragment_tests[i].target,
		   aret, eret);
	    fails++;
	} else
	    passes++;
    }

    for (i = 0; i < sizeof(full_tests)/sizeof(*full_tests); i++) {
	const char *f, *t;
	int eret, aret;
	f = full_tests[i].wildcard;
	t = full_tests[i].target;
	eret = full_tests[i].expected_result;
	aret = wc_match(f, t);
	if (aret != eret) {
	    printf("failed test: /%s/ against /%s/ returned %d not %d\n",
		   full_tests[i].wildcard, full_tests[i].target,
		   aret, eret);
	    fails++;
	} else
	    passes++;
    }

    printf("passed %d, failed %d\n", passes, fails);

    return 0;
}

#endif
Commit	Line	Data
1c1af145	1	/*
	2	* Wildcard matching engine for use with SFTP-based file transfer
	3	* programs (PSFTP, new-look PSCP): since SFTP has no notion of
	4	* getting the remote side to do globbing (and rightly so) we have
	5	* to do it locally, by retrieving all the filenames in a directory
	6	* and checking each against the wildcard pattern.
	7	*/
	8
	9	#include <assert.h>
	10	#include <stdlib.h>
	11	#include <string.h>
	12
	13	#include "putty.h"
	14
	15	/*
	16	* Definition of wildcard syntax:
	17	*
	18	* - * matches any sequence of characters, including zero.
	19	* - ? matches exactly one character which can be anything.
	20	* - [abc] matches exactly one character which is a, b or c.
	21	* - [a-f] matches anything from a through f.
	22	* - [^a-f] matches anything _except_ a through f.
	23	* - [-_] matches - or _; [^-_] matches anything else. (The - is
	24	* non-special if it occurs immediately after the opening
	25	* bracket or ^.)
	26	* - [a^] matches an a or a ^. (The ^ is non-special if it does
	27	* _not_ occur immediately after the opening bracket.)
	28	* - \, \?, \[, \], \\ match the single characters , ?, [, ], \.
	29	* - All other characters are non-special and match themselves.
	30	*/
	31
	32	/*
	33	* Some notes on differences from POSIX globs (IEEE Std 1003.1, 2003 ed.):
	34	* - backslashes act as escapes even within [] bracket expressions
	35	* - does not support [!...] for non-matching list (POSIX are weird);
	36	* NB POSIX allows [^...] as well via "A bracket expression starting
	37	* with an unquoted circumflex character produces unspecified
	38	* results". If we wanted to allow [!...] we might want to define
	39	* [^!] as having its literal meaning (match '^' or '!').
	40	* - none of the scary [[:class:]] stuff, etc
	41	*/
	42
	43	/*
	44	* The wildcard matching technique we use is very simple and
	45	* potentially O(N^2) in running time, but I don't anticipate it
	46	* being that bad in reality (particularly since N will be the size
	47	* of a filename, which isn't all that much). Perhaps one day, once
	48	* PuTTY has grown a regexp matcher for some other reason, I might
	49	* come back and reimplement wildcards by translating them into
	50	* regexps or directly into NFAs; but for the moment, in the
	51	* absence of any other need for the NFA->DFA translation engine,
	52	* anything more than the simplest possible wildcard matcher is
	53	* vast code-size overkill.
	54	*
	55	* Essentially, these wildcards are much simpler than regexps in
	56	* that they consist of a sequence of rigid fragments (? and [...]
	57	* can never match more or less than one character) separated by
	58	* asterisks. It is therefore extremely simple to look at a rigid
	59	* fragment and determine whether or not it begins at a particular
	60	* point in the test string; so we can search along the string
	61	* until we find each fragment, then search for the next. As long
	62	* as we find each fragment in the _first_ place it occurs, there
	63	* will never be a danger of having to backpedal and try to find it
	64	* again somewhere else.
65	*/
66
67	enum {
68	WC_TRAILINGBACKSLASH = 1,
69	WC_UNCLOSEDCLASS,
70	WC_INVALIDRANGE
71	};
72
73	/*
74	* Error reporting is done by returning various negative values
75	* from the wildcard routines. Passing any such value to wc_error
76	* will give a human-readable message.
77	*/
78	const char *wc_error(int value)
79	{
80	value = abs(value);
81	switch (value) {
82	case WC_TRAILINGBACKSLASH:
83	return "'\' occurred at end of string (expected another character)";
84	case WC_UNCLOSEDCLASS:
85	return "expected ']' to close character class";
86	case WC_INVALIDRANGE:
87	return "character range was not terminated (']' just after '-')";
88	}
89	return "INTERNAL ERROR: unrecognised wildcard error number";
90	}
91
92	/*
93	* This is the routine that tests a target string to see if an
94	* initial substring of it matches a fragment. If successful, it
95	* returns 1, and advances both `fragment' and `target' past the
96	* fragment and matching substring respectively. If unsuccessful it
97	* returns zero. If the wildcard fragment suffers a syntax error,
98	* it returns <0 and the precise value indexes into wc_error.
99	*/
100	static int wc_match_fragment(const char fragment, const char target)
101	{
102	const char f, t;
103
104	f = *fragment;
105	t = *target;
106	/*
107	* The fragment terminates at either the end of the string, or
108	* the first (unescaped) *.
109	*/
110	while (f && f != '' && t) {
111	/*
112	* Extract one character from t, and one character's worth
113	* of pattern from f, and step along both. Return 0 if they
114	* fail to match.
115	*/
116	if (*f == '\\') {
117	/*
118	* Backslash, which means f[1] is to be treated as a
119	* literal character no matter what it is. It may not
120	* be the end of the string.
121	*/
122	if (!f[1])
123	return -WC_TRAILINGBACKSLASH; /* error */
124	if (f[1] != *t)
125	return 0; /* failed to match */
126	f += 2;
127	} else if (*f == '?') {
128	/*
129	* Question mark matches anything.
130	*/
131	f++;
132	} else if (*f == '[') {
133	int invert = 0;
134	int matched = 0;
135	/*
136	* Open bracket introduces a character class.
137	*/
138	f++;
139	if (*f == '^') {
140	invert = 1;
141	f++;
142	}
143	while (*f != ']') {
144	if (*f == '\\')
145	f++; /* backslashes still work */
146	if (!*f)
147	return -WC_UNCLOSEDCLASS; /* error again */
148	if (f[1] == '-') {
149	int lower, upper, ourchr;
150	lower = (unsigned char) *f++;
151	f++; /* eat the minus */
152	if (*f == ']')
153	return -WC_INVALIDRANGE; /* different error! */
154	if (*f == '\\')
155	f++; /* backslashes _still_ work */
156	if (!*f)
157	return -WC_UNCLOSEDCLASS; /* error again */
158	upper = (unsigned char) *f++;
159	ourchr = (unsigned char) *t;
160	if (lower > upper) {
161	int t = lower; lower = upper; upper = t;
162	}
163	if (ourchr >= lower && ourchr <= upper)
164	matched = 1;
165	} else {
166	matched \|= (t == f++);
167	}
168	}
169	if (invert == matched)
170	return 0; /* failed to match character class */
171	f++; /* eat the ] */
172	} else {
173	/*
174	* Non-special character matches itself.
175	*/
176	if (f != t)
177	return 0;
178	f++;
179	}
180	/*
181	* Now we've done that, increment t past the character we
182	* matched.
183	*/
184	t++;
185	}
186	if (!f \|\| f == '*') {
187	/*
188	* We have reached the end of f without finding a mismatch;
189	* so we're done. Update the caller pointers and return 1.
190	*/
191	*fragment = f;
192	*target = t;
193	return 1;
194	}
195	/*
196	* Otherwise, we must have reached the end of t before we
197	* reached the end of f; so we've failed. Return 0.
198	*/
199	return 0;
200	}
201
202	/*
203	* This is the real wildcard matching routine. It returns 1 for a
204	* successful match, 0 for an unsuccessful match, and <0 for a
205	* syntax error in the wildcard.
206	*/
207	int wc_match(const char wildcard, const char target)
208	{
209	int ret;
210
211	/*
212	* Every time we see a '*' _followed_ by a fragment, we just
213	* search along the string for a location at which the fragment
214	* matches. The only special case is when we see a fragment
215	* right at the start, in which case we just call the matching
216	* routine once and give up if it fails.
217	*/
218	if (wildcard != '') {
219	ret = wc_match_fragment(&wildcard, &target);
220	if (ret <= 0)
221	return ret; /* pass back failure or error alike */
222	}
223
224	while (*wildcard) {
225	assert(wildcard == '');
226	while (wildcard == '')
227	wildcard++;
228
229	/*
230	* It's possible we've just hit the end of the wildcard
231	* after seeing a *, in which case there's no need to
232	* bother searching any more because we've won.
233	*/
234	if (!*wildcard)
235	return 1;
236
237	/*
238	* Now `wildcard' points at the next fragment. So we
239	* attempt to match it against `target', and if that fails
240	* we increment `target' and try again, and so on. When we
241	* find we're about to try matching against the empty
242	* string, we give up and return 0.
243	*/
244	ret = 0;
245	while (*target) {
246	const char save_w = wildcard, save_t = target;
247
248	ret = wc_match_fragment(&wildcard, &target);
249
250	if (ret < 0)
251	return ret; /* syntax error */
252
253	if (ret > 0 && !wildcard && target) {
254	/*
255	* Final special case - literally.
256	*
257	* This situation arises when we are matching a
258	* _terminal_ fragment of the wildcard (that is,
259	* there is nothing after it, e.g. "*a"), and it
260	* has matched _too early_. For example, matching
261	* "*a" against "parka" will match the "a" fragment
262	* against the _first_ a, and then (if it weren't
263	* for this special case) matching would fail
264	* because we're at the end of the wildcard but not
265	* at the end of the target string.
266	*
267	* In this case what we must do is measure the
268	* length of the fragment in the target (which is
269	* why we saved `target'), jump straight to that
270	* distance from the end of the string using
271	* strlen, and match the same fragment again there
272	* (which is why we saved `wildcard'). Then we
273	* return whatever that operation returns.
274	*/
275	target = save_t + strlen(save_t) - (target - save_t);
276	wildcard = save_w;
277	return wc_match_fragment(&wildcard, &target);
278	}
279
280	if (ret > 0)
281	break;
282	target++;
283	}
284	if (ret > 0)
285	continue;
286	return 0;
287	}
288
289	/*
290	* If we reach here, it must be because we successfully matched
291	* a fragment and then found ourselves right at the end of the
292	* wildcard. Hence, we return 1 if and only if we are also
293	* right at the end of the target.
294	*/
295	return (*target ? 0 : 1);
296	}
297
298	/*
299	* Another utility routine that translates a non-wildcard string
300	* into its raw equivalent by removing any escaping backslashes.
301	* Expects a target string buffer of anything up to the length of
302	* the original wildcard. You can also pass NULL as the output
303	* buffer if you're only interested in the return value.
304	*
305	* Returns 1 on success, or 0 if a wildcard character was
306	* encountered. In the latter case the output string MAY not be
307	* zero-terminated and you should not use it for anything!
308	*/
309	int wc_unescape(char output, const char wildcard)
310	{
311	while (*wildcard) {
312	if (*wildcard == '\\') {
313	wildcard++;
314	/* We are lenient about trailing backslashes in non-wildcards. */
315	if (*wildcard) {
316	if (output)
317	output++ = wildcard;
318	wildcard++;
319	}
320	} else if (wildcard == '' \|\| *wildcard == '?' \|\|
321	wildcard == '[' \|\| wildcard == ']') {
322	return 0; /* it's a wildcard! */
323	} else {
324	if (output)
325	output++ = wildcard;
326	wildcard++;
327	}
328	}
329	*output = '\0';
330	return 1; /* it's clean */
331	}
332
333	#ifdef TESTMODE
334
335	struct test {
336	const char *wildcard;
337	const char *target;
338	int expected_result;
339	};
340
341	const struct test fragment_tests[] = {
342	/*
343	* We exhaustively unit-test the fragment matching routine
344	* itself, which should save us the need to test all its
345	* intricacies during the full wildcard tests.
346	*/
347	{"abc", "abc", 1},
348	{"abc", "abd", 0},
349	{"abc", "abcd", 1},
350	{"abcd", "abc", 0},
351	{"ab[cd]", "abc", 1},
352	{"ab[cd]", "abd", 1},
353	{"ab[cd]", "abe", 0},
354	{"ab[^cd]", "abc", 0},
355	{"ab[^cd]", "abd", 0},
356	{"ab[^cd]", "abe", 1},
357	{"ab\\", "abc", -WC_TRAILINGBACKSLASH},
358	{"ab\\", "ab", 1},
359	{"ab\\?", "ab*", 0},
360	{"ab?", "abc", 1},
361	{"ab?", "ab", 0},
362	{"ab[", "abc", -WC_UNCLOSEDCLASS},
363	{"ab[c-", "abb", -WC_UNCLOSEDCLASS},
364	{"ab[c-]", "abb", -WC_INVALIDRANGE},
365	{"ab[c-e]", "abb", 0},
366	{"ab[c-e]", "abc", 1},
367	{"ab[c-e]", "abd", 1},
368	{"ab[c-e]", "abe", 1},
369	{"ab[c-e]", "abf", 0},
370	{"ab[e-c]", "abb", 0},
371	{"ab[e-c]", "abc", 1},
372	{"ab[e-c]", "abd", 1},
373	{"ab[e-c]", "abe", 1},
374	{"ab[e-c]", "abf", 0},
375	{"ab[^c-e]", "abb", 1},
376	{"ab[^c-e]", "abc", 0},
377	{"ab[^c-e]", "abd", 0},
378	{"ab[^c-e]", "abe", 0},
379	{"ab[^c-e]", "abf", 1},
380	{"ab[^e-c]", "abb", 1},
381	{"ab[^e-c]", "abc", 0},
382	{"ab[^e-c]", "abd", 0},
383	{"ab[^e-c]", "abe", 0},
384	{"ab[^e-c]", "abf", 1},
385	{"ab[a^]", "aba", 1},
386	{"ab[a^]", "ab^", 1},
387	{"ab[a^]", "abb", 0},
388	{"ab[^a^]", "aba", 0},
389	{"ab[^a^]", "ab^", 0},
390	{"ab[^a^]", "abb", 1},
391	{"ab[-c]", "ab-", 1},
392	{"ab[-c]", "abc", 1},
393	{"ab[-c]", "abd", 0},
394	{"ab[^-c]", "ab-", 0},
395	{"ab[^-c]", "abc", 0},
396	{"ab[^-c]", "abd", 1},
397	{"ab[\\[-\\]]", "abZ", 0},
398	{"ab[\\[-\\]]", "ab[", 1},
399	{"ab[\\[-\\]]", "ab\\", 1},
400	{"ab[\\[-\\]]", "ab]", 1},
401	{"ab[\\[-\\]]", "ab^", 0},
402	{"ab[^\\[-\\]]", "abZ", 1},
403	{"ab[^\\[-\\]]", "ab[", 0},
404	{"ab[^\\[-\\]]", "ab\\", 0},
405	{"ab[^\\[-\\]]", "ab]", 0},
406	{"ab[^\\[-\\]]", "ab^", 1},
407	{"ab[a-fA-F]", "aba", 1},
408	{"ab[a-fA-F]", "abF", 1},
409	{"ab[a-fA-F]", "abZ", 0},
410	};
411
412	const struct test full_tests[] = {
413	{"a", "argh", 0},
414	{"a", "ba", 0},
415	{"a", "a", 1},
416	{"a*", "aardvark", 1},
417	{"a*", "badger", 0},
418	{"*a", "park", 0},
419	{"*a", "pArka", 1},
420	{"*a", "parka", 1},
421	{"a", "park", 1},
422	{"a", "perk", 0},
423	{"?b*r?", "abracadabra", 1},
424	{"?b*r?", "abracadabr", 0},
425	{"?b*r?", "abracadabzr", 0},
426	};
427
428	int main(void)
429	{
430	int i;
431	int fails, passes;
432
433	fails = passes = 0;
434
435	for (i = 0; i < sizeof(fragment_tests)/sizeof(*fragment_tests); i++) {
436	const char f, t;
437	int eret, aret;
438	f = fragment_tests[i].wildcard;
439	t = fragment_tests[i].target;
440	eret = fragment_tests[i].expected_result;
441	aret = wc_match_fragment(&f, &t);
442	if (aret != eret) {
443	printf("failed test: /%s/ against /%s/ returned %d not %d\n",
444	fragment_tests[i].wildcard, fragment_tests[i].target,
445	aret, eret);
446	fails++;
447	} else
448	passes++;
449	}
450
451	for (i = 0; i < sizeof(full_tests)/sizeof(*full_tests); i++) {
452	const char f, t;
453	int eret, aret;
454	f = full_tests[i].wildcard;
455	t = full_tests[i].target;
456	eret = full_tests[i].expected_result;
457	aret = wc_match(f, t);
458	if (aret != eret) {
459	printf("failed test: /%s/ against /%s/ returned %d not %d\n",
460	full_tests[i].wildcard, full_tests[i].target,
461	aret, eret);
462	fails++;
463	} else
464	passes++;
465	}
466
467	printf("passed %d, failed %d\n", passes, fails);
468
469	return 0;
470	}
471
472	#endif