]> git.ipfire.org Git - thirdparty/git.git/commitdiff
blame: make diff algorithm configurable
authorAntonin Delpeuch <antonin@delpeuch.eu>
Sat, 1 Nov 2025 21:57:32 +0000 (21:57 +0000)
committerJunio C Hamano <gitster@pobox.com>
Mon, 3 Nov 2025 18:23:01 +0000 (10:23 -0800)
The diff algorithm used in 'git-blame(1)' is set to 'myers',
without the possibility to change it aside from the `--minimal` option.

There has been long-standing interest in changing the default diff
algorithm to "histogram", and Git 3.0 was floated as a possible occasion
for taking some steps towards that:

https://lore.kernel.org/git/xmqqed873vgn.fsf@gitster.g/

As a preparation for this move, it is worth making sure that the diff
algorithm is configurable where useful.

Make it configurable in the `git-blame(1)` command by introducing the
`--diff-algorithm` option and make honor the `diff.algorithm` config
variable. Keep Myers diff as the default.

Signed-off-by: Antonin Delpeuch <antonin@delpeuch.eu>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/diff-algorithm-option.adoc [new file with mode: 0644]
Documentation/diff-options.adoc
Documentation/git-blame.adoc
builtin/blame.c
t/meson.build
t/t8015-blame-diff-algorithm.sh [new file with mode: 0755]

diff --git a/Documentation/diff-algorithm-option.adoc b/Documentation/diff-algorithm-option.adoc
new file mode 100644 (file)
index 0000000..8e3a0b6
--- /dev/null
@@ -0,0 +1,20 @@
+`--diff-algorithm=(patience|minimal|histogram|myers)`::
+       Choose a diff algorithm. The variants are as follows:
++
+--
+   `default`;;
+   `myers`;;
+       The basic greedy diff algorithm. Currently, this is the default.
+   `minimal`;;
+       Spend extra time to make sure the smallest possible diff is
+       produced.
+   `patience`;;
+       Use "patience diff" algorithm when generating patches.
+   `histogram`;;
+       This algorithm extends the patience algorithm to "support
+       low-occurrence common elements".
+--
++
+For instance, if you configured the `diff.algorithm` variable to a
+non-default value and want to use the default one, then you
+have to use `--diff-algorithm=default` option.
index ae31520f7f1d130cefbe9f637715b4a96b9b2c66..9cdad6f72a0c7d9186166c3d8789c531732b74ea 100644 (file)
@@ -197,26 +197,7 @@ and starts with _<text>_, this algorithm attempts to prevent it from
 appearing as a deletion or addition in the output. It uses the "patience
 diff" algorithm internally.
 
-`--diff-algorithm=(patience|minimal|histogram|myers)`::
-       Choose a diff algorithm. The variants are as follows:
-+
---
-   `default`;;
-   `myers`;;
-       The basic greedy diff algorithm. Currently, this is the default.
-   `minimal`;;
-       Spend extra time to make sure the smallest possible diff is
-       produced.
-   `patience`;;
-       Use "patience diff" algorithm when generating patches.
-   `histogram`;;
-       This algorithm extends the patience algorithm to "support
-       low-occurrence common elements".
---
-+
-For instance, if you configured the `diff.algorithm` variable to a
-non-default value and want to use the default one, then you
-have to use `--diff-algorithm=default` option.
+include::diff-algorithm-option.adoc[]
 
 `--stat[=<width>[,<name-width>[,<count>]]]`::
        Generate a diffstat. By default, as much space as necessary
index e438d28625882669b7cb615fac7e6580479c0dbe..adcbb6f5dc97a3f42f5b619a04bb4924de13b906 100644 (file)
@@ -85,6 +85,8 @@ include::blame-options.adoc[]
        Ignore whitespace when comparing the parent's version and
        the child's to find where the lines came from.
 
+include::diff-algorithm-option.adoc[]
+
 --abbrev=<n>::
        Instead of using the default 7+1 hexadecimal digits as the
        abbreviated object name, use <m>+1 digits, where <m> is at
index 2703820258d5f9b2df07598cb03dd7d43640dc71..888ce708a696efec4939d984bf642190d26e0d7d 100644 (file)
@@ -779,6 +779,19 @@ static int git_blame_config(const char *var, const char *value,
                }
        }
 
+       if (!strcmp(var, "diff.algorithm")) {
+               long diff_algorithm;
+               if (!value)
+                       return config_error_nonbool(var);
+               diff_algorithm = parse_algorithm_value(value);
+               if (diff_algorithm < 0)
+                       return error(_("unknown value for config '%s': %s"),
+                                    var, value);
+               xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
+               xdl_opts |= diff_algorithm;
+               return 0;
+       }
+
        if (git_diff_heuristic_config(var, value, cb) < 0)
                return -1;
        if (userdiff_config(var, value) < 0)
@@ -824,6 +837,38 @@ static int blame_move_callback(const struct option *option, const char *arg, int
        return 0;
 }
 
+static int blame_diff_algorithm_minimal(const struct option *option,
+                                       const char *arg, int unset)
+{
+       int *opt = option->value;
+
+       BUG_ON_OPT_ARG(arg);
+
+       *opt &= ~XDF_DIFF_ALGORITHM_MASK;
+       if (!unset)
+               *opt |= XDF_NEED_MINIMAL;
+
+       return 0;
+}
+
+static int blame_diff_algorithm_callback(const struct option *option,
+                                        const char *arg, int unset)
+{
+       int *opt = option->value;
+       long value = parse_algorithm_value(arg);
+
+       BUG_ON_OPT_NEG(unset);
+
+       if (value < 0)
+               return error(_("option diff-algorithm accepts \"myers\", "
+                              "\"minimal\", \"patience\" and \"histogram\""));
+
+       *opt &= ~(XDF_NEED_MINIMAL | XDF_DIFF_ALGORITHM_MASK);
+       *opt |= value;
+
+       return 0;
+}
+
 static int is_a_rev(const char *name)
 {
        struct object_id oid;
@@ -915,11 +960,16 @@ int cmd_blame(int argc,
                OPT_BIT('s', NULL, &output_option, N_("suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
                OPT_BIT('e', "show-email", &output_option, N_("show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
                OPT_BIT('w', NULL, &xdl_opts, N_("ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
+               OPT_CALLBACK_F(0, "diff-algorithm", &xdl_opts, N_("<algorithm>"),
+                              N_("choose a diff algorithm"),
+                              PARSE_OPT_NONEG, blame_diff_algorithm_callback),
                OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("ignore <rev> when blaming")),
                OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("ignore revisions from <file>")),
                OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
                OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
-               OPT_BIT(0, "minimal", &xdl_opts, N_("spend extra cycles to find better match"), XDF_NEED_MINIMAL),
+               OPT_CALLBACK_F(0, "minimal", &xdl_opts, NULL,
+                              N_("spend extra cycles to find a better match"),
+                              PARSE_OPT_NOARG, blame_diff_algorithm_minimal),
                OPT_STRING('S', NULL, &revs_file, N_("file"), N_("use revisions from <file> instead of calling git-rev-list")),
                OPT_STRING(0, "contents", &contents_from, N_("file"), N_("use <file>'s contents as the final image")),
                OPT_CALLBACK_F('C', NULL, &opt, N_("score"), N_("find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback),
index 401b24e50e0499877673733d3e15bedc0da8bb9c..9f2fe7af8ba4c9082e6142bf10570d264e504e7d 100644 (file)
@@ -955,6 +955,7 @@ integration_tests = [
   't8012-blame-colors.sh',
   't8013-blame-ignore-revs.sh',
   't8014-blame-ignore-fuzzy.sh',
+  't8015-blame-diff-algorithm.sh',
   't8020-last-modified.sh',
   't9001-send-email.sh',
   't9002-column.sh',
diff --git a/t/t8015-blame-diff-algorithm.sh b/t/t8015-blame-diff-algorithm.sh
new file mode 100755 (executable)
index 0000000..5318e18
--- /dev/null
@@ -0,0 +1,203 @@
+#!/bin/sh
+
+test_description='git blame with specific diff algorithm'
+
+. ./test-lib.sh
+
+test_expect_success setup '
+       cat >file.c <<-\EOF &&
+       int f(int x, int y)
+       {
+         if (x == 0)
+         {
+           return y;
+         }
+         return x;
+       }
+
+       int g(size_t u)
+       {
+         while (u < 30)
+         {
+           u++;
+         }
+         return u;
+       }
+       EOF
+       test_write_lines x x x x >file.txt &&
+       git add file.c file.txt &&
+       GIT_AUTHOR_NAME=Commit_1 git commit -m Commit_1 &&
+
+       cat >file.c <<-\EOF &&
+       int g(size_t u)
+       {
+         while (u < 30)
+         {
+           u++;
+         }
+         return u;
+       }
+
+       int h(int x, int y, int z)
+       {
+         if (z == 0)
+         {
+           return x;
+         }
+         return y;
+       }
+       EOF
+       test_write_lines x x x A B C D x E F G >file.txt &&
+       git add file.c file.txt &&
+       GIT_AUTHOR_NAME=Commit_2 git commit -m Commit_2
+'
+
+test_expect_success 'blame uses Myers diff algorithm by default' '
+       cat >expected <<-\EOF &&
+       Commit_2 int g(size_t u)
+       Commit_1 {
+       Commit_2   while (u < 30)
+       Commit_1   {
+       Commit_2     u++;
+       Commit_1   }
+       Commit_2   return u;
+       Commit_1 }
+       Commit_1
+       Commit_2 int h(int x, int y, int z)
+       Commit_1 {
+       Commit_2   if (z == 0)
+       Commit_1   {
+       Commit_2     return x;
+       Commit_1   }
+       Commit_2   return y;
+       Commit_1 }
+       EOF
+
+       git blame file.c > output &&
+       sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > without_varying_parts &&
+       sed -e "s/ *$//g" without_varying_parts > actual &&
+       test_cmp expected actual
+'
+
+test_expect_success 'blame honors --diff-algorithm option' '
+       cat >expected <<-\EOF &&
+       Commit_1 int g(size_t u)
+       Commit_1 {
+       Commit_1   while (u < 30)
+       Commit_1   {
+       Commit_1     u++;
+       Commit_1   }
+       Commit_1   return u;
+       Commit_1 }
+       Commit_2
+       Commit_2 int h(int x, int y, int z)
+       Commit_2 {
+       Commit_2   if (z == 0)
+       Commit_2   {
+       Commit_2     return x;
+       Commit_2   }
+       Commit_2   return y;
+       Commit_2 }
+       EOF
+
+       git blame file.c --diff-algorithm histogram > output &&
+       sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > without_varying_parts &&
+       sed -e "s/ *$//g" without_varying_parts > actual &&
+       test_cmp expected actual
+'
+
+test_expect_success 'blame honors diff.algorithm config variable' '
+       cat >expected <<-\EOF &&
+       Commit_1 int g(size_t u)
+       Commit_1 {
+       Commit_1   while (u < 30)
+       Commit_1   {
+       Commit_1     u++;
+       Commit_1   }
+       Commit_1   return u;
+       Commit_1 }
+       Commit_2
+       Commit_2 int h(int x, int y, int z)
+       Commit_2 {
+       Commit_2   if (z == 0)
+       Commit_2   {
+       Commit_2     return x;
+       Commit_2   }
+       Commit_2   return y;
+       Commit_2 }
+       EOF
+
+       git -c diff.algorithm=histogram blame file.c > output &&
+       sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > without_varying_parts &&
+       sed -e "s/ *$//g" without_varying_parts > actual &&
+       test_cmp expected actual
+'
+
+test_expect_success 'blame gives priority to --diff-algorithm over diff.algorithm' '
+       cat >expected <<-\EOF &&
+       Commit_1 int g(size_t u)
+       Commit_1 {
+       Commit_1   while (u < 30)
+       Commit_1   {
+       Commit_1     u++;
+       Commit_1   }
+       Commit_1   return u;
+       Commit_1 }
+       Commit_2
+       Commit_2 int h(int x, int y, int z)
+       Commit_2 {
+       Commit_2   if (z == 0)
+       Commit_2   {
+       Commit_2     return x;
+       Commit_2   }
+       Commit_2   return y;
+       Commit_2 }
+       EOF
+
+       git -c diff.algorithm=myers blame file.c --diff-algorithm histogram &&
+       sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > without_varying_parts &&
+       sed -e "s/ *$//g" without_varying_parts > actual &&
+       test_cmp expected actual
+'
+
+test_expect_success 'blame honors --minimal option' '
+       cat >expected <<-\EOF &&
+       Commit_1 x
+       Commit_1 x
+       Commit_1 x
+       Commit_2 A
+       Commit_2 B
+       Commit_2 C
+       Commit_2 D
+       Commit_1 x
+       Commit_2 E
+       Commit_2 F
+       Commit_2 G
+       EOF
+
+       git blame file.txt --minimal > output &&
+       sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > actual &&
+       test_cmp expected actual
+'
+
+test_expect_success 'blame respects the order of diff options' '
+       cat >expected <<-\EOF &&
+       Commit_1 x
+       Commit_1 x
+       Commit_1 x
+       Commit_2 A
+       Commit_2 B
+       Commit_2 C
+       Commit_2 D
+       Commit_2 x
+       Commit_2 E
+       Commit_2 F
+       Commit_2 G
+       EOF
+
+       git blame file.txt --minimal --diff-algorithm myers > output &&
+       sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > actual &&
+       test_cmp expected actual
+'
+
+test_done