]> git.ipfire.org Git - thirdparty/openssh-portable.git/commitdiff
upstream: Replace the old recursive match_pattern() with an
authordjm@openbsd.org <djm@openbsd.org>
Sun, 31 May 2026 04:19:16 +0000 (04:19 +0000)
committerDamien Miller <djm@mindrot.org>
Sun, 31 May 2026 05:03:54 +0000 (15:03 +1000)
implementation that uses a NFA for matching. This avoids the exponential
worst- case behaviour for the old implementation.

ok markus@

OpenBSD-Commit-ID: fc6b75a52f4c0acb52b7900658c8d25ff873cbae

match.c

diff --git a/match.c b/match.c
index 3ef5369310c664f601fce0e1355555a1b4166b36..b8f350df17e3e4fb77a39cb374be973044295f9e 100644 (file)
--- a/match.c
+++ b/match.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: match.c,v 1.45 2024/09/06 02:30:44 djm Exp $ */
+/* $OpenBSD: match.c,v 1.46 2026/05/31 04:19:16 djm Exp $ */
 /*
  * Author: Tatu Ylonen <ylo@cs.hut.fi>
  * Copyright (c) 1995 Tatu Ylonen <ylo@cs.hut.fi>, Espoo, Finland
@@ -13,6 +13,7 @@
  */
 /*
  * Copyright (c) 2000 Markus Friedl.  All rights reserved.
+ * Copyright (c) 2026 Damien Miller.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
 #include "match.h"
 #include "misc.h"
 
+/*
+ * Computes the epsilon closure of an NFA set.
+ * In our wildcard grammar, epsilon transitions only exist for '*' wildcards,
+ * allowing us to transition from state i to i+1 without consuming input.
+ *
+ * This function modifies 'states' in place.
+ */
+static void
+epsilon_closure(char *states, const char *pattern, size_t M)
+{
+       size_t i;
+
+       /* only need a forward pass as there are no back jumps in our grammar */
+       for (i = 0; i < M; i++) {
+               if (!states[i] || pattern[i] != '*')
+                       continue;
+               /*
+                * State i is active, and pattern[i] is '*', so we can
+                * epsilon-transition to i+1.
+                */
+               states[i + 1] = 1;
+       }
+}
+
 /*
  * Returns true if the given string matches the pattern (which may contain ?
- * and * as wildcards), and zero if it does not match.
+ * and * as wildcards), and zero if it does not match. Uses an NFA internally.
  */
 int
 match_pattern(const char *s, const char *pattern)
 {
-       for (;;) {
-               /* If at end of pattern, accept if also at end of string. */
-               if (!*pattern)
-                       return !*s;
-
-               if (*pattern == '*') {
-                       /* Skip this and any consecutive asterisks. */
-                       while (*pattern == '*')
-                               pattern++;
-
-                       /* If at end of pattern, accept immediately. */
-                       if (!*pattern)
-                               return 1;
-
-                       /* If next character in pattern is known, optimize. */
-                       if (*pattern != '?' && *pattern != '*') {
+       size_t M;
+       size_t i;
+       char *states, *next_states, *tmp;
+       int active, matched = 0;
+
+       /* trivial case: empty pattern vs empty input */
+       if ((M = strlen(pattern)) == 0)
+               return *s == '\0';
+
+       /* A state for each pattern character, plus one final accepting state */
+       states = xcalloc(M + 1, sizeof(*states));
+       next_states = xcalloc(M + 1, sizeof(*next_states));
+
+       /* Initial state: state 0 is active */
+       states[0] = 1;
+       /* Other states might be reachable now if the pattern starts with '*' */
+       epsilon_closure(states, pattern, M);
+
+       for (; *s; s++) {
+               memset(next_states, 0, M + 1);
+
+               /* Calculate the reachable next states given the input char */
+               for (i = 0; i < M; i++) {
+                       if (!states[i])
+                               continue;
+                       if (pattern[i] == '*') {
+                               /*
+                                * '*' matches any character, so we can
+                                * stay in state i
+                                */
+                               next_states[i] = 1;
+                       } else if (pattern[i] == '?' || pattern[i] == *s) {
                                /*
-                                * Look instances of the next character in
-                                * pattern, and try to match starting from
-                                * those.
+                                * '?' matches any character, or we have
+                                * a literal match.
                                 */
-                               for (; *s; s++)
-                                       if (*s == *pattern &&
-                                           match_pattern(s + 1, pattern + 1))
-                                               return 1;
-                               /* Failed. */
-                               return 0;
+                               next_states[i + 1] = 1;
                        }
-                       /*
-                        * Move ahead one character at a time and try to
-                        * match at each position.
-                        */
-                       for (; *s; s++)
-                               if (match_pattern(s, pattern))
-                                       return 1;
-                       /* Failed. */
-                       return 0;
                }
-               /*
-                * There must be at least one more character in the string.
-                * If we are at the end, fail.
-                */
-               if (!*s)
-                       return 0;
 
-               /* Check if the next character of the string is acceptable. */
-               if (*pattern != '?' && *pattern != *s)
-                       return 0;
+               /* Expand the reachable next states with epsilon transitions */
+               epsilon_closure(next_states, pattern, M);
 
-               /* Move to the next character, both in string and in pattern. */
-               s++;
-               pattern++;
+               /* Swap states and next_states */
+               tmp = states;
+               states = next_states;
+               next_states = tmp;
+
+               /* Check if we have any active pattern states left */
+               active = 0;
+               for (i = 0; i <= M; i++) {
+                       if (states[i]) {
+                               active = 1;
+                               break;
+                       }
+               }
+               if (!active)
+                       goto out; /* No active states, fail early */
        }
-       /* NOTREACHED */
+       /*
+        * We matched only if we ended up in the final, accepting state
+        * after consuming all the input.
+        */
+       matched = states[M];
+ out:
+       free(states);
+       free(next_states);
+       return matched;
 }
 
 /*