-/* $OpenBSD: match.c,v 1.45 2024/09/06 02:30:44 djm Exp $ */
+/* $OpenBSD: match.c,v 1.46 2026/05/31 04:19:16 djm Exp $ */
/*
* Author: Tatu Ylonen <ylo@cs.hut.fi>
* Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
*/
/*
* Copyright (c) 2000 Markus Friedl. All rights reserved.
+ * Copyright (c) 2026 Damien Miller. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
#include "match.h"
#include "misc.h"
+/*
+ * Computes the epsilon closure of an NFA set.
+ * In our wildcard grammar, epsilon transitions only exist for '*' wildcards,
+ * allowing us to transition from state i to i+1 without consuming input.
+ *
+ * This function modifies 'states' in place.
+ */
+static void
+epsilon_closure(char *states, const char *pattern, size_t M)
+{
+ size_t i;
+
+ /* only need a forward pass as there are no back jumps in our grammar */
+ for (i = 0; i < M; i++) {
+ if (!states[i] || pattern[i] != '*')
+ continue;
+ /*
+ * State i is active, and pattern[i] is '*', so we can
+ * epsilon-transition to i+1.
+ */
+ states[i + 1] = 1;
+ }
+}
+
/*
* Returns true if the given string matches the pattern (which may contain ?
- * and * as wildcards), and zero if it does not match.
+ * and * as wildcards), and zero if it does not match. Uses an NFA internally.
*/
int
match_pattern(const char *s, const char *pattern)
{
- for (;;) {
- /* If at end of pattern, accept if also at end of string. */
- if (!*pattern)
- return !*s;
-
- if (*pattern == '*') {
- /* Skip this and any consecutive asterisks. */
- while (*pattern == '*')
- pattern++;
-
- /* If at end of pattern, accept immediately. */
- if (!*pattern)
- return 1;
-
- /* If next character in pattern is known, optimize. */
- if (*pattern != '?' && *pattern != '*') {
+ size_t M;
+ size_t i;
+ char *states, *next_states, *tmp;
+ int active, matched = 0;
+
+ /* trivial case: empty pattern vs empty input */
+ if ((M = strlen(pattern)) == 0)
+ return *s == '\0';
+
+ /* A state for each pattern character, plus one final accepting state */
+ states = xcalloc(M + 1, sizeof(*states));
+ next_states = xcalloc(M + 1, sizeof(*next_states));
+
+ /* Initial state: state 0 is active */
+ states[0] = 1;
+ /* Other states might be reachable now if the pattern starts with '*' */
+ epsilon_closure(states, pattern, M);
+
+ for (; *s; s++) {
+ memset(next_states, 0, M + 1);
+
+ /* Calculate the reachable next states given the input char */
+ for (i = 0; i < M; i++) {
+ if (!states[i])
+ continue;
+ if (pattern[i] == '*') {
+ /*
+ * '*' matches any character, so we can
+ * stay in state i
+ */
+ next_states[i] = 1;
+ } else if (pattern[i] == '?' || pattern[i] == *s) {
/*
- * Look instances of the next character in
- * pattern, and try to match starting from
- * those.
+ * '?' matches any character, or we have
+ * a literal match.
*/
- for (; *s; s++)
- if (*s == *pattern &&
- match_pattern(s + 1, pattern + 1))
- return 1;
- /* Failed. */
- return 0;
+ next_states[i + 1] = 1;
}
- /*
- * Move ahead one character at a time and try to
- * match at each position.
- */
- for (; *s; s++)
- if (match_pattern(s, pattern))
- return 1;
- /* Failed. */
- return 0;
}
- /*
- * There must be at least one more character in the string.
- * If we are at the end, fail.
- */
- if (!*s)
- return 0;
- /* Check if the next character of the string is acceptable. */
- if (*pattern != '?' && *pattern != *s)
- return 0;
+ /* Expand the reachable next states with epsilon transitions */
+ epsilon_closure(next_states, pattern, M);
- /* Move to the next character, both in string and in pattern. */
- s++;
- pattern++;
+ /* Swap states and next_states */
+ tmp = states;
+ states = next_states;
+ next_states = tmp;
+
+ /* Check if we have any active pattern states left */
+ active = 0;
+ for (i = 0; i <= M; i++) {
+ if (states[i]) {
+ active = 1;
+ break;
+ }
+ }
+ if (!active)
+ goto out; /* No active states, fail early */
}
- /* NOTREACHED */
+ /*
+ * We matched only if we ended up in the final, accepting state
+ * after consuming all the input.
+ */
+ matched = states[M];
+ out:
+ free(states);
+ free(next_states);
+ return matched;
}
/*