From: HarshK97 Date: Mon, 16 Mar 2026 19:23:45 +0000 (+0000) Subject: patch 9.2.0174: diff: inline word-diffs can be fragmented X-Git-Tag: v9.2.0174^0 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=42c6686c78d39843f71dba989a8ea59bc6975132;p=thirdparty%2Fvim.git patch 9.2.0174: diff: inline word-diffs can be fragmented Problem: When using 'diffopt=inline:word', lines were excessively fragmented with punctuation creating separate highlight blocks, making it harder to read the diffs. Solution: Added 'diff_refine_inline_word_highlight()' to merge adjacent diff blocks that are separated by small gaps of non-word characters (up to 5 bytes by default) (HarshK97). When using inline:word diff mode, adjacent changed words separated by punctuation or whitespace are now merged into a single highlight block if the gap between them contains fewer than 5 non-word characters. This creates more readable diffs and closely matches GitHub's own diff display. closes: #19098 Signed-off-by: HarshK97 Signed-off-by: Christian Brabandt --- diff --git a/runtime/doc/options.txt b/runtime/doc/options.txt index 245089a3b2..bddfa99211 100644 --- a/runtime/doc/options.txt +++ b/runtime/doc/options.txt @@ -1,4 +1,4 @@ -*options.txt* For Vim version 9.2. Last change: 2026 Mar 13 +*options.txt* For Vim version 9.2. Last change: 2026 Mar 16 VIM REFERENCE MANUAL by Bram Moolenaar @@ -3210,7 +3210,10 @@ A jump table for the options with a short description can be found at |Q_op|. difference. Non-alphanumeric multi-byte characters such as emoji and CJK characters are considered - individual words. + individual words. Small gaps of + non-word characters (5 bytes or less) + between changes are merged into a + single highlight block. internal Use the internal diff library. This is ignored when 'diffexpr' is set. *E960* diff --git a/runtime/doc/version9.txt b/runtime/doc/version9.txt index cee00a282d..3a722afde5 100644 --- a/runtime/doc/version9.txt +++ b/runtime/doc/version9.txt @@ -1,4 +1,4 @@ -*version9.txt* For Vim version 9.2. Last change: 2026 Mar 13 +*version9.txt* For Vim version 9.2. Last change: 2026 Mar 16 VIM REFERENCE MANUAL by Bram Moolenaar @@ -52588,6 +52588,13 @@ Popups ~ - 'previewpopup' supports the same values as 'completepopup' (except for "align"). +Diff mode ~ +--------- +When using inline word diffs, adjacent changes are now merged if the gap +between them is small (5 bytes or less) and contains only non-word characters. +This prevents fragmented highlighting when only whitespace or punctuation +separates changes. + Other ~ ----- - The new |xdg.vim| script for full XDG compatibility is included. diff --git a/src/diff.c b/src/diff.c index 2c359a92f5..176394724d 100644 --- a/src/diff.c +++ b/src/diff.c @@ -49,6 +49,7 @@ static int diff_need_update = FALSE; // ex_diffupdate needs to be called static int diff_flags = DIFF_INTERNAL | DIFF_FILLER | DIFF_CLOSE_OFF | DIFF_INLINE_CHAR; static long diff_algorithm = XDF_INDENT_HEURISTIC; +static int diff_word_gap = 5; // gap threshold for inline:word # define LBUFLEN 50 // length of line in diff file @@ -3391,6 +3392,131 @@ diff_refine_inline_char_highlight(diff_T *dp_orig, garray_T *linemap, int idx1) } while (pass++ < 4); // use limited number of passes to avoid excessive looping } +/* + * Refine inline word diff blocks by merging blocks that are only separated + * by whitespace or punctuation. This creates more coherent highlighting. + */ + static void +diff_refine_inline_word_highlight(diff_T *dp_orig, garray_T *linemap, int idx1, + linenr_T start_lnum) +{ + int pass = 1; + do + { + diff_T *dp = dp_orig; + + while (dp != NULL && dp->df_next != NULL) + { + // Only merge blocks on the same line + if (dp->df_lnum[idx1] + dp->df_count[idx1] - 1 >= linemap[idx1]. ga_len + || dp->df_next->df_lnum[idx1] - 1 >= linemap[idx1]. ga_len) + { + dp = dp->df_next; + continue; + } + + linemap_entry_T *entry1 = + &((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_lnum[idx1] + dp->df_count[idx1] - 2]; + linemap_entry_T *entry2 = + &((linemap_entry_T *)linemap[idx1].ga_data)[dp->df_next->df_lnum[idx1] - 1]; + + // Skip if blocks are on different lines + if (entry1->lineoff != entry2->lineoff) + { + dp = dp->df_next; + continue; + } + + // Calculate the gap between blocks + int gap_start = entry1->byte_start + entry1->num_bytes; + int gap_end = entry2->byte_start; + int gap_size = gap_end - gap_start; + + // Merge adjacent diff blocks separated by small gaps to reduce visual + // fragmentation. Gap threshold is set to 5 bytes which handles most + // common separators (spaces, punctuation, short variable names) while + // still preserving visually distinct changes. + if (gap_size <= 0 || gap_size > diff_word_gap) + { + dp = dp->df_next; + continue; + } + + // Get the text between the two blocks + char_u *line = ml_get_buf(curtab->tp_diffbuf[idx1], + start_lnum + entry1->lineoff, FALSE); + char_u *gap_text = line + gap_start; + + // Check if gap contains only whitespace and/or punctuation + bool only_non_word = true; + bool has_content = false; + + for (int i = 0; i < gap_size && gap_text[i] != NUL; i++) + { + has_content = true; + int char_class = mb_get_class_buf(gap_text + i, + curtab->tp_diffbuf[idx1]); + // class 2 is word characters, if we find any, don't merge + if (char_class == 2) + { + only_non_word = false; + break; + } + } + + // Merge if the gap is small and contains only non-word characters + if (has_content && only_non_word) + { + long total_change_bytes = 0; + for (int i = 0; i < DB_COUNT; i++) + { + if (curtab->tp_diffbuf[i] != NULL) + { + // count bytes in the first block + for (int k = 0; k < dp->df_count[i]; k++) + { + int idx = dp->df_lnum[i] + k - 1; + if (idx < linemap[i].ga_len) + total_change_bytes += + ((linemap_entry_T *)linemap[i].ga_data)[idx].num_bytes; + } + // count bytes in the next block + for (int k = 0; k < dp->df_next->df_count[i]; k++) + { + int idx = dp->df_next->df_lnum[i] + k - 1; + if (idx < linemap[i].ga_len) + total_change_bytes += + ((linemap_entry_T *)linemap[i].ga_data)[idx].num_bytes; + } + } + } + + if (total_change_bytes >= gap_size * 2) + { + // Merge the blocks by extending the first block to include the next + for (int i = 0; i < DB_COUNT; i++) + { + if (curtab->tp_diffbuf[i] != NULL) + { + dp->df_count[i] = dp->df_next->df_lnum[i] + + dp->df_next->df_count[i] - dp->df_lnum[i]; + } + } + + diff_T *dp_next = dp->df_next; + dp->df_next = dp_next->df_next; + clear_diffblock(dp_next); + + // Don't advance dp, check if can merge with the next block too + continue; + } + } + + dp = dp->df_next; + } + } while (pass++ < 4); // use limited number of passes to avoid excessive looping +} + /* * Find the inline difference within a diff block among different buffers. Do * this by splitting each block's content into characters or words, and then @@ -3656,7 +3782,9 @@ diff_find_change_inline_diff( } diff_T *new_diff = curtab->tp_first_diff; - if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) + if (diff_flags & DIFF_INLINE_WORD && file1_idx != -1) + diff_refine_inline_word_highlight(new_diff, linemap, file1_idx, dp->df_lnum[file1_idx]); + else if (diff_flags & DIFF_INLINE_CHAR && file1_idx != -1) diff_refine_inline_char_highlight(new_diff, linemap, file1_idx); // After the diff, use the linemap to obtain the original line/col of the diff --git a/src/testdir/dumps/Test_diff_inline_04.dump b/src/testdir/dumps/Test_diff_inline_04.dump index ed4f5a2a1c..e009998b8a 100644 --- a/src/testdir/dumps/Test_diff_inline_04.dump +++ b/src/testdir/dumps/Test_diff_inline_04.dump @@ -1,4 +1,4 @@ -| +0#0000e05#a8a8a8255@1|a+2#0000000#ff404010|b|c|d|e|f| +0&#ffd7ff255|g+2&#ff404010|h|i| +0&#ffd7ff255|j+2&#ff404010|k| +0&#ffd7ff255|n| @19||+1&#ffffff0| +0#0000e05#a8a8a8255@1|a+2#0000000#ff404010|B|c|e|f| +0&#ffd7ff255|g+2&#ff404010|H|i| +0&#ffd7ff255|l+2&#ff404010|m| +0&#ffd7ff255|n| @20 +| +0#0000e05#a8a8a8255@1|a+2#0000000#ff404010|b|c|d|e|f| |g|h|i| |j|k| +0&#ffd7ff255|n| @19||+1&#ffffff0| +0#0000e05#a8a8a8255@1|a+2#0000000#ff404010|B|c|e|f| |g|H|i| |l|m| +0&#ffd7ff255|n| @20 | +0#0000e05#a8a8a8255@1|x+0#0000000#5fd7ff255| @33||+1&#ffffff0| +0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34 | +0#0000e05#a8a8a8255@1|y+0#0000000#ffffff0| @33||+1&&| +0#0000e05#a8a8a8255@1|y+0#0000000#ffffff0| @33 | +0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34||+1#0000000#ffffff0| +0#0000e05#a8a8a8255@1|z+0#0000000#5fd7ff255| @33 diff --git a/src/testdir/dumps/Test_diff_inline_multibuffer_04.dump b/src/testdir/dumps/Test_diff_inline_multibuffer_04.dump index 3da504b6bf..da6ee8415c 100644 --- a/src/testdir/dumps/Test_diff_inline_multibuffer_04.dump +++ b/src/testdir/dumps/Test_diff_inline_multibuffer_04.dump @@ -1,4 +1,4 @@ -| +0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|i|s|+|i|s|=+0&#ffd7ff255|a+2&#ff404010|-+0&#ffd7ff255|s|e|t|e|n|c|e| @5||+1&#ffffff0| +0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|i|s|+|i|s|=+0&#ffd7ff255|a+2&#ff404010|n|o|t|h|e|r|-+0&#ffd7ff255|s|e|t|e|n|c||+1&#ffffff0| +0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|a|t|+|i|s|=+0&#ffd7ff255|a+2&#ff404010|-+0&#ffd7ff255|s|e|t|e|n|c|e| @4 +| +0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|i|s|+|i|s|=|a|-+0&#ffd7ff255|s|e|t|e|n|c|e| @5||+1&#ffffff0| +0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|i|s|+|i|s|=|a|n|o|t|h|e|r|-+0&#ffd7ff255|s|e|t|e|n|c||+1&#ffffff0| +0#0000e05#a8a8a8255@1|T+2#0000000#ff404010|h|a|t|+|i|s|=|a|-+0&#ffd7ff255|s|e|t|e|n|c|e| @4 |~+0#4040ff13#ffffff0| @23||+1#0000000&|~+0#4040ff13&| @22||+1#0000000&|~+0#4040ff13&| @22 |~| @23||+1#0000000&|~+0#4040ff13&| @22||+1#0000000&|~+0#4040ff13&| @22 |~| @23||+1#0000000&|~+0#4040ff13&| @22||+1#0000000&|~+0#4040ff13&| @22 diff --git a/src/testdir/dumps/Test_diff_inline_multiline_02.dump b/src/testdir/dumps/Test_diff_inline_multiline_02.dump index c7bf4cb6ed..9a0893842b 100644 --- a/src/testdir/dumps/Test_diff_inline_multiline_02.dump +++ b/src/testdir/dumps/Test_diff_inline_multiline_02.dump @@ -1,6 +1,6 @@ -| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| @2|i|s| @25||+1&#ffffff0| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| |i|s| |s+2&#ff404010|o|m|e| +0&#ffd7ff255|t+2&#ff404010|e|s|t| +0&#ffd7ff255@17 -| +0#0000e05#a8a8a8255@1|s+2#0000000#ff404010|o|m|e|t|e|s|t| +0&#ffd7ff255|t+2&#ff404010|e|x|t| +0&#ffd7ff255|f+2&#ff404010|o@1| +0&#ffd7ff255@17||+1&#ffffff0| +0#0000e05#a8a8a8255@1|t+2#0000000#ff404010|e|x|t|s| +0&#ffd7ff255@29 -| +0#0000e05#a8a8a8255@1|b+2#0000000#ff404010|a|z| +0&#ffd7ff255|a+2&#ff404010|b|c| +0&#ffd7ff255|d+2&#ff404010|e|f| +0&#ffd7ff255@23||+1&#ffffff0| +0#0000e05#a8a8a8255@1|f+2#0000000#ff404010|o@1| +0&#ffd7ff255|b+2&#ff404010|a|r| +0&#ffd7ff255|a+2&#ff404010|b|X| +0&#ffd7ff255|Y+2&#ff404010|e|f| +0&#ffd7ff255@19 +| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| @2|i|s| @25||+1&#ffffff0| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| |i|s| |s+2&#ff404010|o|m|e| |t|e|s|t| +0&#ffd7ff255@17 +| +0#0000e05#a8a8a8255@1|s+2#0000000#ff404010|o|m|e|t|e|s|t| |t|e|x|t| |f|o@1| +0&#ffd7ff255@17||+1&#ffffff0| +0#0000e05#a8a8a8255@1|t+2#0000000#ff404010|e|x|t|s| +0&#ffd7ff255@29 +| +0#0000e05#a8a8a8255@1|b+2#0000000#ff404010|a|z| |a|b|c| |d|e|f| +0&#ffd7ff255@23||+1&#ffffff0| +0#0000e05#a8a8a8255@1|f+2#0000000#ff404010|o@1| |b|a|r| |a|b|X| +0&#ffd7ff255|Y+2&#ff404010|e|f| +0&#ffd7ff255@19 | +0#0000e05#a8a8a8255@1|o+2#0000000#ff404010|n|e| +0&#ffd7ff255@31||+1&#ffffff0| +0#0000e05#a8a8a8255@1|o+2#0000000#ff404010|n|e|w|o|r|d| +0&#ffd7ff255|a|n|o|t|h|e|r| |w|o|r|d| @14 | +0#0000e05#a8a8a8255@1|w+2#0000000#ff404010|o|r|d| +0&#ffd7ff255|a|n|o|t|h|e|r| |w|o|r|d| @17||+1&#ffffff0| +0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34 | +0#0000e05#a8a8a8255@1|a+0#0000000#5fd7ff255|d@1|i|t|i|o|n|a|l| |l|i|n|e| @19||+1&#ffffff0| +0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34 diff --git a/src/testdir/dumps/Test_diff_inline_multiline_04.dump b/src/testdir/dumps/Test_diff_inline_multiline_04.dump index 4aef70cb37..3ab991a53d 100644 --- a/src/testdir/dumps/Test_diff_inline_multiline_04.dump +++ b/src/testdir/dumps/Test_diff_inline_multiline_04.dump @@ -1,6 +1,6 @@ -| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| | +0࿈ff13@1|i+0&#ffd7ff255|s| @25||+1&#ffffff0| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| |i|s| |s+2&#ff404010|o|m|e| +0&#ffd7ff255|t+2&#ff404010|e|s|t| +0&#ffd7ff255@17 -| +0#0000e05#a8a8a8255@1|s+2#0000000#ff404010|o|m|e|t|e|s|t| +0&#ffd7ff255|t+2&#ff404010|e|x|t| +0&#ffd7ff255|f|o@1| @17||+1&#ffffff0| +0#0000e05#a8a8a8255@1|t+2#0000000#ff404010|e|x|t|s| +0&#ffd7ff255@29 -| +0#0000e05#a8a8a8255@1|b+2#0000000#ff404010|a|z| +0&#ffd7ff255|a+2&#ff404010|b|c| +0&#ffd7ff255|d+2&#ff404010|e|f| +0&#ffd7ff255@23||+1&#ffffff0| +0#0000e05#a8a8a8255@1|f+0#0000000#ffd7ff255|o@1| |b+2&#ff404010|a|r| +0&#ffd7ff255|a+2&#ff404010|b|X| +0&#ffd7ff255|Y+2&#ff404010|e|f| +0&#ffd7ff255@19 +| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| | +0࿈ff13@1|i+0&#ffd7ff255|s| @25||+1&#ffffff0| +0#0000e05#a8a8a8255@1|t+0#0000000#ffd7ff255|h|i|s| |i|s| |s+2&#ff404010|o|m|e| |t|e|s|t| +0&#ffd7ff255@17 +| +0#0000e05#a8a8a8255@1|s+2#0000000#ff404010|o|m|e|t|e|s|t| |t|e|x|t| +0&#ffd7ff255|f|o@1| @17||+1&#ffffff0| +0#0000e05#a8a8a8255@1|t+2#0000000#ff404010|e|x|t|s| +0&#ffd7ff255@29 +| +0#0000e05#a8a8a8255@1|b+2#0000000#ff404010|a|z| |a|b|c| |d|e|f| +0&#ffd7ff255@23||+1&#ffffff0| +0#0000e05#a8a8a8255@1|f+0#0000000#ffd7ff255|o@1| |b+2&#ff404010|a|r| |a|b|X| |Y|e|f| +0&#ffd7ff255@19 | +0#0000e05#a8a8a8255@1|o+2#0000000#ff404010|n|e| +0&#ffd7ff255@31||+1&#ffffff0| +0#0000e05#a8a8a8255@1|o+2#0000000#ff404010|n|e|w|o|r|d| +0&#ffd7ff255|a|n|o|t|h|e|r| |w|o|r|d| @14 | +0#0000e05#a8a8a8255@1|w+2#0000000#ff404010|o|r|d| +0&#ffd7ff255|a|n|o|t|h|e|r| |w|o|r|d| @17||+1&#ffffff0| +0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34 | +0#0000e05#a8a8a8255@1|a+0#0000000#5fd7ff255|d@1|i|t|i|o|n|a|l| |l|i|n|e| @19||+1&#ffffff0| +0#0000e05#a8a8a8255@1|-+0#4040ff13#afffff255@34 diff --git a/src/testdir/dumps/Test_diff_inline_word_03.dump b/src/testdir/dumps/Test_diff_inline_word_03.dump index 30efaed3a6..a04cf50fab 100644 --- a/src/testdir/dumps/Test_diff_inline_word_03.dump +++ b/src/testdir/dumps/Test_diff_inline_word_03.dump @@ -1,4 +1,4 @@ -| +0#0000e05#a8a8a8255@1|🚀*0#0000000#ffd7ff255|⛵️*2&#ff404010|一*0&#ffd7ff255|二|三*2&#ff404010|ひ*0&#ffd7ff255|ら|が*0࿈ff13|な*0&#ffd7ff255|Δ+2&#ff404010|έ|λ|τ|α| +0&#ffd7ff255|Δ+2&#ff404010|e|l|t|a| +0&#ffd7ff255|f|o@1|b|a||+1&#ffffff0| +0#0000e05#a8a8a8255@1|🚀*0#0000000#ffd7ff255|🛸*2&#ff404010|一*0&#ffd7ff255|二|四*2&#ff404010|ひ*0&#ffd7ff255|ら|な|δ+2&#ff404010|έ|λ|τ|α| +0&#ffd7ff255|δ+2&#ff404010|e|l|t|a| +0&#ffd7ff255|f|o@1|b|a|r| +| +0#0000e05#a8a8a8255@1|🚀*0#0000000#ffd7ff255|⛵️*2&#ff404010|一*0&#ffd7ff255|二|三*2&#ff404010|ひ*0&#ffd7ff255|ら|が*2&#ff404010|な|Δ+&|έ|λ|τ|α| |Δ|e|l|t|a| +0&#ffd7ff255|f|o@1|b|a||+1&#ffffff0| +0#0000e05#a8a8a8255@1|🚀*0#0000000#ffd7ff255|🛸*2&#ff404010|一*0&#ffd7ff255|二|四*2&#ff404010|ひ*0&#ffd7ff255|ら|な*2&#ff404010|δ+&|έ|λ|τ|α| |δ|e|l|t|a| +0&#ffd7ff255|f|o@1|b|a|r| |~+0#4040ff13#ffffff0| @35||+1#0000000&|~+0#4040ff13&| @35 |~| @35||+1#0000000&|~+0#4040ff13&| @35 |~| @35||+1#0000000&|~+0#4040ff13&| @35 diff --git a/src/version.c b/src/version.c index 50f92520a3..d40a5f1476 100644 --- a/src/version.c +++ b/src/version.c @@ -734,6 +734,8 @@ static char *(features[]) = static int included_patches[] = { /* Add new patch number below this line */ +/**/ + 174, /**/ 173, /**/