From b7d36ffca02c23f545d6e098d78180e6e72dfd8d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 21 Sep 2016 20:24:14 +0200 Subject: [PATCH] regex: use regexec_buf() The new regexec_buf() function operates on buffers with an explicitly specified length, rather than NUL-terminated strings. We need to use this function whenever the buffer we want to pass to regexec(3) may have been mmap(2)ed (and is hence not NUL-terminated). Note: the original motivation for this patch was to fix a bug where `git diff -G ` would crash. This patch converts more callers, though, some of which allocated to construct NUL-terminated strings, or worse, modified buffers to temporarily insert NULs while calling regexec(3). By converting them to use regexec_buf(), the code has become much cleaner. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- diff.c | 3 ++- diffcore-pickaxe.c | 18 ++++++++---------- grep.c | 14 ++------------ t/t4062-diff-pickaxe.sh | 2 +- xdiff-interface.c | 13 ++++--------- 5 files changed, 17 insertions(+), 33 deletions(-) diff --git a/diff.c b/diff.c index 059123c5dc..f77324e9e0 100644 --- a/diff.c +++ b/diff.c @@ -941,7 +941,8 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex, { if (word_regex && *begin < buffer->size) { regmatch_t match[1]; - if (!regexec(word_regex, buffer->ptr + *begin, 1, match, 0)) { + if (!regexec_buf(word_regex, buffer->ptr + *begin, + buffer->size - *begin, 1, match, 0)) { char *p = memchr(buffer->ptr + *begin + match[0].rm_so, '\n', match[0].rm_eo - match[0].rm_so); *end = p ? p - buffer->ptr : match[0].rm_eo + *begin; diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c index 7715c13ec4..8413d76582 100644 --- a/diffcore-pickaxe.c +++ b/diffcore-pickaxe.c @@ -21,7 +21,6 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len) { struct diffgrep_cb *data = priv; regmatch_t regmatch; - int hold; if (line[0] != '+' && line[0] != '-') return; @@ -31,11 +30,8 @@ static void diffgrep_consume(void *priv, char *line, unsigned long len) * caller early. */ return; - /* Yuck -- line ought to be "const char *"! */ - hold = line[len]; - line[len] = '\0'; - data->hit = !regexec(data->regexp, line + 1, 1, ®match, 0); - line[len] = hold; + data->hit = !regexec_buf(data->regexp, line + 1, len - 1, 1, + ®match, 0); } static int diff_grep(mmfile_t *one, mmfile_t *two, @@ -48,9 +44,11 @@ static int diff_grep(mmfile_t *one, mmfile_t *two, xdemitconf_t xecfg; if (!one) - return !regexec(regexp, two->ptr, 1, ®match, 0); + return !regexec_buf(regexp, two->ptr, two->size, + 1, ®match, 0); if (!two) - return !regexec(regexp, one->ptr, 1, ®match, 0); + return !regexec_buf(regexp, one->ptr, one->size, + 1, ®match, 0); /* * We have both sides; need to run textual diff and see if @@ -81,8 +79,8 @@ static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws) regmatch_t regmatch; int flags = 0; - assert(data[sz] == '\0'); - while (*data && !regexec(regexp, data, 1, ®match, flags)) { + while (*data && + !regexec_buf(regexp, data, sz, 1, ®match, flags)) { flags |= REG_NOTBOL; data += regmatch.rm_eo; if (*data && regmatch.rm_so == regmatch.rm_eo) diff --git a/grep.c b/grep.c index 528b652f71..8ed56236f0 100644 --- a/grep.c +++ b/grep.c @@ -848,17 +848,6 @@ static int fixmatch(struct grep_pat *p, char *line, char *eol, } } -static int regmatch(const regex_t *preg, char *line, char *eol, - regmatch_t *match, int eflags) -{ -#ifdef REG_STARTEND - match->rm_so = 0; - match->rm_eo = eol - line; - eflags |= REG_STARTEND; -#endif - return regexec(preg, line, 1, match, eflags); -} - static int patmatch(struct grep_pat *p, char *line, char *eol, regmatch_t *match, int eflags) { @@ -869,7 +858,8 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, else if (p->pcre_regexp) hit = !pcrematch(p, line, eol, match, eflags); else - hit = !regmatch(&p->regexp, line, eol, match, eflags); + hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, + eflags); return hit; } diff --git a/t/t4062-diff-pickaxe.sh b/t/t4062-diff-pickaxe.sh index 5929f2eabb..f0bf50bda7 100755 --- a/t/t4062-diff-pickaxe.sh +++ b/t/t4062-diff-pickaxe.sh @@ -14,7 +14,7 @@ test_expect_success setup ' test_tick && git commit -m "A 4k file" ' -test_expect_failure '-G matches' ' +test_expect_success '-G matches' ' git diff --name-only -G "^0{4096}$" HEAD^ >out && test 4096-zeroes.txt = "$(cat out)" ' diff --git a/xdiff-interface.c b/xdiff-interface.c index 54236f24b9..08a7313e6a 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -216,11 +216,10 @@ struct ff_regs { static long ff_regexp(const char *line, long len, char *buffer, long buffer_size, void *priv) { - char *line_buffer; struct ff_regs *regs = priv; regmatch_t pmatch[2]; int i; - int result = -1; + int result; /* Exclude terminating newline (and cr) from matching */ if (len > 0 && line[len-1] == '\n') { @@ -230,18 +229,16 @@ static long ff_regexp(const char *line, long len, len--; } - line_buffer = xstrndup(line, len); /* make NUL terminated */ - for (i = 0; i < regs->nr; i++) { struct ff_reg *reg = regs->array + i; - if (!regexec(®->re, line_buffer, 2, pmatch, 0)) { + if (!regexec_buf(®->re, line, len, 2, pmatch, 0)) { if (reg->negate) - goto fail; + return -1; break; } } if (regs->nr <= i) - goto fail; + return -1; i = pmatch[1].rm_so >= 0 ? 1 : 0; line += pmatch[i].rm_so; result = pmatch[i].rm_eo - pmatch[i].rm_so; @@ -250,8 +247,6 @@ static long ff_regexp(const char *line, long len, while (result > 0 && (isspace(line[result - 1]))) result--; memcpy(buffer, line, result); - fail: - free(line_buffer); return result; } -- 2.39.2