]> git.ipfire.org Git - thirdparty/git.git/commitdiff
diff histogram: intern strings
authorPhillip Wood <phillip.wood@dunelm.org.uk>
Wed, 17 Nov 2021 11:20:23 +0000 (11:20 +0000)
committerJunio C Hamano <gitster@pobox.com>
Fri, 19 Nov 2021 06:23:31 +0000 (22:23 -0800)
Histogram is the only diff algorithm not to call
xdl_classify_record(). xdl_classify_record() ensures that the hash
values of two strings that are not equal differ which means that it is
not necessary to use xdl_recmatch() when comparing lines, all that is
necessary is to compare the hash values. This gives a 7% reduction in
the runtime of "git log --patch" when using the histogram diff
algorithm.

Test                                  HEAD^             HEAD
-----------------------------------------------------------------------------
4000.1: log -3000 (baseline)          0.18(0.14+0.04)   0.19(0.17+0.02) +5.6%
4000.2: log --raw -3000 (tree-only)   0.99(0.77+0.21)   0.98(0.78+0.20) -1.0%
4000.3: log -p -3000 (Myers)          4.84(4.31+0.51)   4.81(4.15+0.64) -0.6%
4000.4: log -p -3000 --histogram      6.34(5.86+0.46)   5.87(5.19+0.66) -7.4%
4000.5: log -p -3000 --patience       5.39(4.60+0.76)   5.35(4.60+0.73) -0.7%

Signed-off-by: Phillip Wood <phillip.wood@dunelm.org.uk>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
xdiff/xhistogram.c
xdiff/xprepare.c

index e694bfd9e31d54f1925a730a75b0ef6d9a4e6d95..6c1c88a69a1c99672f2e6064abb1477cc1dac36e 100644 (file)
@@ -91,9 +91,8 @@ struct region {
 static int cmp_recs(xpparam_t const *xpp,
        xrecord_t *r1, xrecord_t *r2)
 {
-       return r1->ha == r2->ha &&
-               xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size,
-                           xpp->flags);
+       return r1->ha == r2->ha;
+
 }
 
 #define CMP_ENV(xpp, env, s1, l1, s2, l2) \
index abeb8fb84e6d73086d612b831963a227e35743b8..7fae0727a026774ac3b48c6fe589fd4cfd5fae5f 100644 (file)
@@ -181,15 +181,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
        if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *))))
                goto abort;
 
-       if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
-               hbits = hsize = 0;
-       else {
-               hbits = xdl_hashbits((unsigned int) narec);
-               hsize = 1 << hbits;
-               if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
-                       goto abort;
-               memset(rhash, 0, hsize * sizeof(xrecord_t *));
-       }
+       hbits = xdl_hashbits((unsigned int) narec);
+       hsize = 1 << hbits;
+       if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
+               goto abort;
+       memset(rhash, 0, hsize * sizeof(xrecord_t *));
 
        nrec = 0;
        if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
@@ -208,9 +204,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
                        crec->size = (long) (cur - prev);
                        crec->ha = hav;
                        recs[nrec++] = crec;
-
-                       if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-                           xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
+                       if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
                                goto abort;
                }
        }
@@ -279,8 +273,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
        enl1 = xdl_guess_lines(mf1, sample) + 1;
        enl2 = xdl_guess_lines(mf2, sample) + 1;
 
-       if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF &&
-           xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
+       if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
                return -1;
 
        if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
@@ -305,8 +298,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
                return -1;
        }
 
-       if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)
-               xdl_free_classifier(&cf);
+       xdl_free_classifier(&cf);
 
        return 0;
 }