From 97746240626ac9f0d9732b2dc927d655c936b9ec Mon Sep 17 00:00:00 2001 From: Jim Meyering Date: Sun, 12 Apr 2026 23:07:09 -0700 Subject: [PATCH] regex: small simplification * lib/regexec.c (re_search_internal): Use only one copy of the 5-line reg-initialization code, removing that else block, at the tiny cost of moving a small test into the loop. --- ChangeLog | 5 ++ lib/regexec.c | 136 ++++++++++++++++++++++++-------------------------- 2 files changed, 69 insertions(+), 72 deletions(-) diff --git a/ChangeLog b/ChangeLog index b1bd9d66eb..3860cadfd1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2026-04-12 Jim Meyering + regex: small simplification + * lib/regexec.c (re_search_internal): Use only one copy + of the 5-line reg-initialization code, removing that else block, + at the tiny cost of moving a small test into the loop. + regex: fix missed short match with backrefs With a backref pattern like ^(.?)(.?).?\2\1 (no $), the engine could miss valid short matches. For example, "ab" should match diff --git a/lib/regexec.c b/lib/regexec.c index 76ef80ab07..259dfd36c6 100644 --- a/lib/regexec.c +++ b/lib/regexec.c @@ -848,87 +848,79 @@ re_search_internal (const regex_t *preg, const char *string, Idx length, { Idx reg_idx; - if (!preg->no_sub && nmatch > 1) + /* When set_regs fails for a backref pattern, the structural + match at match_last has no valid register assignment. Try + shorter match lengths, since a valid shorter match may + exist (e.g., all groups matching empty). */ + for (;;) { - /* When set_regs fails for a backref pattern, the structural - match at match_last has no valid register assignment. Try - shorter match lengths, since a valid shorter match may - exist (e.g., all groups matching empty). */ - for (;;) - { - /* Initialize registers. */ - for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) - pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = mctx.match_last; - - err = set_regs (preg, &mctx, nmatch, pmatch, - dfa->has_plural_match && dfa->nbackref > 0); - if (__glibc_likely (err == REG_NOERROR) - || save_state_log == NULL - || err != REG_NOMATCH) - break; + /* Initialize registers. */ + for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) + pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; + pmatch[0].rm_so = 0; + pmatch[0].rm_eo = mctx.match_last; - /* set_regs failed; try a shorter match_last. */ - Idx ml = mctx.match_last; - re_free (mctx.state_log); - do - { - --ml; - if (ml < 0) - break; - } - while (save_state_log[ml] == NULL - || !save_state_log[ml]->halt - || !check_halt_state_context - (&mctx, save_state_log[ml], ml)); + if (preg->no_sub || nmatch <= 1) + break; + + err = set_regs (preg, &mctx, nmatch, pmatch, + dfa->has_plural_match && dfa->nbackref > 0); + if (__glibc_likely (err == REG_NOERROR) + || save_state_log == NULL + || err != REG_NOMATCH) + break; + + /* set_regs failed; try a shorter match_last. */ + Idx ml = mctx.match_last; + re_free (mctx.state_log); + do + { + --ml; if (ml < 0) + break; + } + while (save_state_log[ml] == NULL + || !save_state_log[ml]->halt + || !check_halt_state_context + (&mctx, save_state_log[ml], ml)); + if (ml < 0) + { + err = REG_NOMATCH; + mctx.state_log = save_state_log; + save_state_log = NULL; + break; + } + mctx.state_log + = re_malloc (re_dfastate_t *, ml + 1); + if (__glibc_unlikely (mctx.state_log == NULL)) + { + mctx.state_log = save_state_log; + save_state_log = NULL; + err = REG_ESPACE; + break; + } + memcpy (mctx.state_log, save_state_log, + sizeof (re_dfastate_t *) * (ml + 1)); + mctx.match_last = ml; + mctx.last_node + = check_halt_state_context + (&mctx, save_state_log[ml], ml); + err = prune_impossible_nodes (&mctx); + if (__glibc_unlikely (err != REG_NOERROR)) + { + if (err == REG_NOMATCH) { - err = REG_NOMATCH; - mctx.state_log = save_state_log; - save_state_log = NULL; - break; - } - mctx.state_log - = re_malloc (re_dfastate_t *, ml + 1); - if (__glibc_unlikely (mctx.state_log == NULL)) - { + re_free (mctx.state_log); mctx.state_log = save_state_log; save_state_log = NULL; - err = REG_ESPACE; - break; - } - memcpy (mctx.state_log, save_state_log, - sizeof (re_dfastate_t *) * (ml + 1)); - mctx.match_last = ml; - mctx.last_node - = check_halt_state_context - (&mctx, save_state_log[ml], ml); - err = prune_impossible_nodes (&mctx); - if (__glibc_unlikely (err != REG_NOERROR)) - { - if (err == REG_NOMATCH) - { - re_free (mctx.state_log); - mctx.state_log = save_state_log; - save_state_log = NULL; - } - break; } + break; } - re_free (save_state_log); - save_state_log = NULL; - if (__glibc_unlikely (err != REG_NOERROR)) - goto free_return; - } - else - { - /* Initialize registers. */ - for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) - pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = mctx.match_last; } + re_free (save_state_log); + save_state_log = NULL; + if (__glibc_unlikely (err != REG_NOERROR)) + goto free_return; /* At last, add the offset to each register, since we slid the buffers so that we could assume that the matching starts -- 2.47.3