]> git.ipfire.org Git - thirdparty/git.git/commitdiff
word diff: handle zero length matches
authorPhillip Wood <phillip.wood@dunelm.org.uk>
Tue, 4 May 2021 09:27:34 +0000 (09:27 +0000)
committerJunio C Hamano <gitster@pobox.com>
Wed, 5 May 2021 09:53:42 +0000 (18:53 +0900)
If find_word_boundaries() encounters a zero length match (which can be
caused by matching a newline or using '*' instead of '+' in the regex)
we stop splitting the input into words which generates an inaccurate
diff. To fix this increment the start point when there is a zero
length match and try a new match. This is safe as posix regular
expressions always return the longest available match so a zero length
match means there are no longer matches available from the current
position.

Commit bf82940dbf1 (color-words: enable REG_NEWLINE to help user,
2009-01-17) prevented matching newlines in negated character classes
but it is still possible for the user to have an explicit newline
match in the regex which could cause a zero length match.

One could argue that having explicit newline matches or using '*'
rather than '+' are user errors but it seems to be better to work
round them than produce inaccurate diffs.

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
diff.c
t/t4034-diff-words.sh

diff --git a/diff.c b/diff.c
index 4acccd9d7edbb9e097a1d5b8b3bc2ff0ebf3acdb..c8b1d724349cf84073a9d9d9158e76288622569b 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -2053,7 +2053,7 @@ static void fn_out_diff_words_aux(void *priv,
 static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
                int *begin, int *end)
 {
-       if (word_regex && *begin < buffer->size) {
+       while (word_regex && *begin < buffer->size) {
                regmatch_t match[1];
                if (!regexec_buf(word_regex, buffer->ptr + *begin,
                                 buffer->size - *begin, 1, match, 0)) {
@@ -2061,9 +2061,13 @@ static int find_word_boundaries(mmfile_t *buffer, regex_t *word_regex,
                                        '\n', match[0].rm_eo - match[0].rm_so);
                        *end = p ? p - buffer->ptr : match[0].rm_eo + *begin;
                        *begin += match[0].rm_so;
-                       return *begin >= *end;
+                       if (*begin == *end)
+                               (*begin)++;
+                       else
+                               return *begin > *end;
+               } else {
+                       return -1;
                }
-               return -1;
        }
 
        /* find the next word */
index 56f1e62a97bff51505956e672b023d4f681aaf1d..17ceba9f6105de499b9391b01b0ffc97ea5f6204 100755 (executable)
@@ -184,6 +184,11 @@ test_expect_success 'word diff with a regular expression' '
        word_diff --color-words="[a-z]+"
 '
 
+test_expect_success 'word diff with zero length matches' '
+       cp expect.letter-runs-are-words expect &&
+       word_diff --color-words="[a-z${LF}]*"
+'
+
 test_expect_success 'set up a diff driver' '
        git config diff.testdriver.wordRegex "[^[:space:]]" &&
        cat <<-\EOF >.gitattributes