From 4083a52f4173b87a883537a1e2957413c2aebd9e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 16 Oct 2015 15:52:12 -0400 Subject: [PATCH] Miscellaneous cleanup of regular-expression compiler. Revert our previous addition of "all" flags to copyins() and copyouts(); they're no longer needed, and were never anything but an unsightly hack. Improve a couple of infelicities in the REG_DEBUG code for dumping the NFA data structure, including adding code to count the total number of states and arcs. Add a couple of missed error checks. Add some more documentation in the README file, and some regression tests illustrating cases that exceeded the state-count limit and/or took unreasonable amounts of time before this set of patches. Back-patch to all supported branches. --- src/backend/regex/regc_nfa.c | 55 +++++++++++------------------------- src/backend/regex/regcomp.c | 11 ++++---- 2 files changed, 23 insertions(+), 43 deletions(-) diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index fe8a4a12d17..6f04321cd35 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -823,14 +823,11 @@ moveins(struct nfa * nfa, /* * copyins - copy in arcs of a state to another state - * - * Either all arcs, or only non-empty ones as determined by all value. */ static void copyins(struct nfa * nfa, struct state * oldState, - struct state * newState, - int all) + struct state * newState) { assert(oldState != newState); @@ -840,8 +837,7 @@ copyins(struct nfa * nfa, struct arc *a; for (a = oldState->ins; a != NULL; a = a->inchain) - if (all || a->type != EMPTY) - cparc(nfa, a, a->from, newState); + cparc(nfa, a, a->from, newState); } else { @@ -873,12 +869,6 @@ copyins(struct nfa * nfa, { struct arc *a = oa; - if (!all && a->type == EMPTY) - { - oa = oa->inchain; - continue; - } - switch (sortins_cmp(&oa, &na)) { case -1: @@ -904,12 +894,6 @@ copyins(struct nfa * nfa, /* newState does not have anything matching oa */ struct arc *a = oa; - if (!all && a->type == EMPTY) - { - oa = oa->inchain; - continue; - } - oa = oa->inchain; createarc(nfa, a->type, a->co, a->from, newState); } @@ -1107,14 +1091,11 @@ moveouts(struct nfa * nfa, /* * copyouts - copy out arcs of a state to another state - * - * Either all arcs, or only non-empty ones as determined by all value. */ static void copyouts(struct nfa * nfa, struct state * oldState, - struct state * newState, - int all) + struct state * newState) { assert(oldState != newState); @@ -1124,8 +1105,7 @@ copyouts(struct nfa * nfa, struct arc *a; for (a = oldState->outs; a != NULL; a = a->outchain) - if (all || a->type != EMPTY) - cparc(nfa, a, newState, a->to); + cparc(nfa, a, newState, a->to); } else { @@ -1157,12 +1137,6 @@ copyouts(struct nfa * nfa, { struct arc *a = oa; - if (!all && a->type == EMPTY) - { - oa = oa->outchain; - continue; - } - switch (sortouts_cmp(&oa, &na)) { case -1: @@ -1188,12 +1162,6 @@ copyouts(struct nfa * nfa, /* newState does not have anything matching oa */ struct arc *a = oa; - if (!all && a->type == EMPTY) - { - oa = oa->outchain; - continue; - } - oa = oa->outchain; createarc(nfa, a->type, a->co, newState, a->to); } @@ -1452,6 +1420,10 @@ optimize(struct nfa * nfa, fprintf(f, "\nfinal cleanup:\n"); #endif cleanup(nfa); /* final tidying */ +#ifdef REG_DEBUG + if (verbose) + dumpnfa(nfa, f); +#endif return analyze(nfa); /* and analysis */ } @@ -1568,7 +1540,7 @@ pull(struct nfa * nfa, s = newstate(nfa); if (NISERR()) return 0; - copyins(nfa, from, s, 1); /* duplicate inarcs */ + copyins(nfa, from, s); /* duplicate inarcs */ cparc(nfa, con, s, to); /* move constraint arc */ freearc(nfa, con); if (NISERR()) @@ -1735,7 +1707,7 @@ push(struct nfa * nfa, s = newstate(nfa); if (NISERR()) return 0; - copyouts(nfa, to, s, 1); /* duplicate outarcs */ + copyouts(nfa, to, s); /* duplicate outarcs */ cparc(nfa, con, from, s); /* move constraint arc */ freearc(nfa, con); if (NISERR()) @@ -2952,6 +2924,8 @@ dumpnfa(struct nfa * nfa, { #ifdef REG_DEBUG struct state *s; + int nstates = 0; + int narcs = 0; fprintf(f, "pre %d, post %d", nfa->pre->no, nfa->post->no); if (nfa->bos[0] != COLORLESS) @@ -2964,7 +2938,12 @@ dumpnfa(struct nfa * nfa, fprintf(f, ", eol [%ld]", (long) nfa->eos[1]); fprintf(f, "\n"); for (s = nfa->states; s != NULL; s = s->next) + { dumpstate(s, f); + nstates++; + narcs += s->nouts; + } + fprintf(f, "total of %d states, %d arcs\n", nstates, narcs); if (nfa->parent == NULL) dumpcolors(nfa->cm, f); fflush(f); diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 9e04a4f298a..f4eebf84d7b 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -136,10 +136,10 @@ static int sortins_cmp(const void *, const void *); static void sortouts(struct nfa *, struct state *); static int sortouts_cmp(const void *, const void *); static void moveins(struct nfa *, struct state *, struct state *); -static void copyins(struct nfa *, struct state *, struct state *, int); +static void copyins(struct nfa *, struct state *, struct state *); static void mergeins(struct nfa *, struct state *, struct arc **, int); static void moveouts(struct nfa *, struct state *, struct state *); -static void copyouts(struct nfa *, struct state *, struct state *, int); +static void copyouts(struct nfa *, struct state *, struct state *); static void cloneouts(struct nfa *, struct state *, struct state *, struct state *, int); static void delsub(struct nfa *, struct state *, struct state *); static void deltraverse(struct nfa *, struct state *, struct state *); @@ -181,7 +181,6 @@ static void dumpnfa(struct nfa *, FILE *); #ifdef REG_DEBUG static void dumpstate(struct state *, FILE *); static void dumparcs(struct state *, FILE *); -static int dumprarcs(struct arc *, struct state *, FILE *, int); static void dumparc(struct arc *, struct state *, FILE *); static void dumpcnfa(struct cnfa *, FILE *); static void dumpcstate(int, struct cnfa *, FILE *); @@ -613,7 +612,9 @@ makesearch(struct vars * v, for (s = slist; s != NULL; s = s2) { s2 = newstate(nfa); - copyouts(nfa, s, s2, 1); + NOERR(); + copyouts(nfa, s, s2); + NOERR(); for (a = s->ins; a != NULL; a = b) { b = a->inchain; @@ -1997,7 +1998,7 @@ dump(regex_t *re, dumpcolors(&g->cmap, f); if (!NULLCNFA(g->search)) { - printf("\nsearch:\n"); + fprintf(f, "\nsearch:\n"); dumpcnfa(&g->search, f); } for (i = 1; i < g->nlacons; i++) -- 2.39.5