]> git.ipfire.org Git - thirdparty/git.git/commitdiff
blame: consult diff process for no-hunk detection
authorMichael Montalbo <mmontalbo@gmail.com>
Fri, 29 May 2026 20:48:19 +0000 (20:48 +0000)
committerJunio C Hamano <gitster@pobox.com>
Fri, 29 May 2026 23:14:32 +0000 (08:14 +0900)
When a diff process is configured via diff.<driver>.process,
consult it during blame's per-commit diffing.  If the process
returns no hunks for a commit's changes to a file, treat the
commit as having no changes, causing blame to attribute lines
to earlier commits.

The consultation happens at the pass_blame_to_parent() callsite
using diff_process_fill_hunks(), matching how builtin_diff() in
diff.c uses the same function.  A new diff_hunks_xpp() variant
accepts a pre-populated xpparam_t for this callsite, while the
existing diff_hunks() retains its original signature and behavior.
The copy-detection callsite is unaffected since it does not use
the diff process.

The subprocess is long-running (one startup cost amortized
across the blame traversal), but each commit in the file's
history incurs a round-trip to the tool.

Signed-off-by: Michael Montalbo <mmontalbo@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
blame.c
t/t4080-diff-process.sh

diff --git a/blame.c b/blame.c
index 977cbb70974f8c345356c0c930223c6cd8383337..354e6c15f4eba3ad424beb7d48880ed81c3c2f32 100644 (file)
--- a/blame.c
+++ b/blame.c
@@ -19,6 +19,8 @@
 #include "tag.h"
 #include "trace2.h"
 #include "blame.h"
+#include "diff-process.h"
+#include "xdiff-interface.h"
 #include "alloc.h"
 #include "commit-slab.h"
 #include "bloom.h"
@@ -314,17 +316,25 @@ static struct commit *fake_working_tree_commit(struct repository *r,
 
 
 
-static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
-                     xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, int xdl_opts)
+static int diff_hunks_xpp(mmfile_t *file_a, mmfile_t *file_b,
+                         xdl_emit_hunk_consume_func_t hunk_func,
+                         void *cb_data, xpparam_t *xpp)
 {
-       xpparam_t xpp = {0};
        xdemitconf_t xecfg = {0};
        xdemitcb_t ecb = {NULL};
 
-       xpp.flags = xdl_opts;
        xecfg.hunk_func = hunk_func;
        ecb.priv = cb_data;
-       return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
+       return xdi_diff(file_a, file_b, xpp, &xecfg, &ecb);
+}
+
+static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
+                     xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, int xdl_opts)
+{
+       xpparam_t xpp = {0};
+
+       xpp.flags = xdl_opts;
+       return diff_hunks_xpp(file_a, file_b, hunk_func, cb_data, &xpp);
 }
 
 static const char *get_next_line(const char *start, const char *end)
@@ -1943,6 +1953,7 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
                                 struct blame_origin *parent, int ignore_diffs)
 {
        mmfile_t file_p, file_o;
+       xpparam_t xpp = {0};
        struct blame_chunk_cb_data d;
        struct blame_entry *newdest = NULL;
 
@@ -1961,10 +1972,21 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
                         &sb->num_read_blob, ignore_diffs);
        sb->num_get_patch++;
 
-       if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
-               die("unable to generate diff (%s -> %s)",
-                   oid_to_hex(&parent->commit->object.oid),
-                   oid_to_hex(&target->commit->object.oid));
+       xpp.flags = sb->xdl_opts;
+       /*
+        * If the diff process considers the files equivalent,
+        * skip the diff so blame looks past this commit.
+        */
+       if (diff_process_fill_hunks(&sb->revs->diffopt, target->path,
+                                   &file_p, &file_o, &xpp)
+           != DIFF_PROCESS_EQUIVALENT) {
+               if (diff_hunks_xpp(&file_p, &file_o, blame_chunk_cb,
+                                  &d, &xpp))
+                       die("unable to generate diff (%s -> %s)",
+                           oid_to_hex(&parent->commit->object.oid),
+                           oid_to_hex(&target->commit->object.oid));
+       }
+       free(xpp.external_hunks);
        /* The rest are the same as the parent */
        blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, 0,
                    parent, target, 0);
index ee0c306abd17e7da72420a0389813b836f10ff79..fdf6da1c341e676adc4d8a3091e9123e4a8e5256 100755 (executable)
@@ -551,4 +551,110 @@ test_expect_success PYTHON 'diff process fallback on overlapping hunks' '
        test_grep "NEW5" actual
 '
 
+#
+# Blame integration.
+#
+
+test_expect_success PYTHON 'blame uses tool-provided hunks' '
+       cat >blame-hunk.c <<-\EOF &&
+       line1
+       line2
+       line3
+       line4
+       original5
+       original6
+       line7
+       line8
+       line9
+       line10
+       EOF
+       git add blame-hunk.c &&
+       git commit -m "add blame-hunk.c" &&
+       ORIG=$(git rev-parse --short HEAD) &&
+
+       cat >blame-hunk.c <<-\EOF &&
+       line1
+       line2
+       line3
+       line4
+       changed5
+       changed6
+       line7
+       line8
+       changed9
+       changed10
+       EOF
+       git add blame-hunk.c &&
+       git commit -m "change blame-hunk.c" &&
+       CHANGE=$(git rev-parse --short HEAD) &&
+
+       # With fixed-hunk mode the tool reports only lines 5-6 as changed,
+       # so blame should attribute lines 9-10 to the original commit
+       # even though the builtin diff would show them as changed.
+       git -c diff.cdiff.process="$BACKEND --mode=fixed-hunk" \
+               blame blame-hunk.c >actual &&
+       sed -n "9p" actual >line9 &&
+       sed -n "10p" actual >line10 &&
+       test_grep "$ORIG" line9 &&
+       test_grep "$ORIG" line10 &&
+       sed -n "5p" actual >line5 &&
+       sed -n "6p" actual >line6 &&
+       test_grep "$CHANGE" line5 &&
+       test_grep "$CHANGE" line6
+'
+
+test_expect_success PYTHON 'blame skips commits with no hunks from diff process' '
+       cat >blame.c <<-\EOF &&
+       int main(void)
+       {
+           return 0;
+       }
+       EOF
+       git add blame.c &&
+       git commit -m "add blame.c" &&
+       ORIG_COMMIT=$(git rev-parse --short HEAD) &&
+
+       cat >blame.c <<-\EOF &&
+       int main(void)
+       {
+               return 0;
+       }
+       EOF
+       git add blame.c &&
+       git commit -m "reformat blame.c" &&
+       BLAME_COMMIT=$(git rev-parse --short HEAD) &&
+
+       # Without no-hunks mode, blame attributes the change.
+       git blame blame.c >without &&
+       test_grep "$BLAME_COMMIT" without &&
+
+       # With no-hunks mode, the process considers the files equivalent
+       # and blame skips the reformat commit, attributing to the original.
+       git -c diff.cdiff.process="$BACKEND --mode=no-hunks" \
+               blame blame.c >with &&
+       test_grep ! "$BLAME_COMMIT" with &&
+       test_grep "$ORIG_COMMIT" with
+'
+
+test_expect_success PYTHON 'blame --no-ext-diff bypasses diff process' '
+       rm -f backend.log &&
+       git -c diff.cdiff.process="$BACKEND --mode=no-hunks --log=backend.log" \
+               blame --no-ext-diff blame.c >actual &&
+       # Without the process, blame attributes the reformat commit normally.
+       test_grep "$BLAME_COMMIT" actual &&
+       test_path_is_missing backend.log
+'
+
+test_expect_success PYTHON 'blame --no-ext-diff uses builtin hunks' '
+       # fixed-hunk mode would narrow blame to lines 5-6, but
+       # --no-ext-diff should bypass it and use the builtin diff.
+       rm -f backend.log &&
+       git -c diff.cdiff.process="$BACKEND --mode=fixed-hunk --log=backend.log" \
+               blame --no-ext-diff blame-hunk.c >actual &&
+       # Builtin diff attributes lines 9-10 to the change commit.
+       sed -n "9p" actual >line9 &&
+       test_grep "$CHANGE" line9 &&
+       test_path_is_missing backend.log
+'
+
 test_done