xdiff: remove dependence on xdlclassifier from xdl_cleanup_records()

author Ezekiel Newren <ezekielnewren@gmail.com>

Fri, 2 Jan 2026 18:52:23 +0000 (18:52 +0000)

committer Junio C Hamano <gitster@pobox.com>

Sun, 4 Jan 2026 02:44:52 +0000 (11:44 +0900)
author Ezekiel Newren <ezekielnewren@gmail.com>
Fri, 2 Jan 2026 18:52:23 +0000 (18:52 +0000)
committer Junio C Hamano <gitster@pobox.com>
Sun, 4 Jan 2026 02:44:52 +0000 (11:44 +0900)
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c

index d3cdb6ac02fb589170b6ed9f7bbaeb62329ba766..b53a3b80c4dae4f3479006ee60c29817e100be8e 100644 (file)
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -21,6 +21,7 @@
   */
  
  #include "xinclude.h"
+#include "compat/ivec.h"
  
  
  #define XDL_KPDIS_RUN 4
@@ -35,7 +36,6 @@ typedef struct s_xdlclass {
         struct s_xdlclass *next;
         xrecord_t rec;
         long idx;
-       long len1, len2;
  } xdlclass_t;
  
  typedef struct s_xdlclassifier {
@@ -92,7 +92,7 @@ static void xdl_free_classifier(xdlclassifier_t *cf) {
  }
  
  
-static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec) {
+static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t *rec) {
         size_t hi;
         xdlclass_t *rcrec;
  
@@ -113,13 +113,10 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
                                 return -1;
                 cf->rcrecs[rcrec->idx] = rcrec;
                 rcrec->rec = *rec;
-               rcrec->len1 = rcrec->len2 = 0;
                 rcrec->next = cf->rchash[hi];
                 cf->rchash[hi] = rcrec;
         }
  
-       (pass == 1) ? rcrec->len1++ : rcrec->len2++;
-
         rec->minimal_perfect_hash = (size_t)rcrec->idx;
  
         return 0;
@@ -253,22 +250,44 @@ static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) {
         return rpdis1 * XDL_KPDIS_RUN < (rpdis1 + rdis1);
  }
  
+struct xoccurrence
+{
+       size_t file1, file2;
+};
+
+
+DEFINE_IVEC_TYPE(struct xoccurrence, xoccurrence);
+
  
  /*
   * Try to reduce the problem complexity, discard records that have no
   * matches on the other file. Also, lines that have multiple matches
   * might be potentially discarded if they appear in a run of discardable.
   */
-static int xdl_cleanup_records(xdlclassifier_t *cf, xdfenv_t *xe) {
-       long i, nm, mlim;
+static int xdl_cleanup_records(xdfenv_t *xe, uint64_t flags) {
+       long i;
+       size_t nm, mlim;
         xrecord_t *recs;
-       xdlclass_t *rcrec;
         uint8_t *action1 = NULL, *action2 = NULL;
-       bool need_min = !!(cf->flags & XDF_NEED_MINIMAL);
+       struct IVec_xoccurrence occ;
+       bool need_min = !!(flags & XDF_NEED_MINIMAL);
         int ret = 0;
         ptrdiff_t dend1 = xe->xdf1.nrec - 1 - xe->delta_end;
         ptrdiff_t dend2 = xe->xdf2.nrec - 1 - xe->delta_end;
  
+       IVEC_INIT(occ);
+       ivec_zero(&occ, xe->mph_size);
+
+       for (size_t j = 0; j < xe->xdf1.nrec; j++) {
+               size_t mph1 = xe->xdf1.recs[j].minimal_perfect_hash;
+               occ.ptr[mph1].file1 += 1;
+       }
+
+       for (size_t j = 0; j < xe->xdf2.nrec; j++) {
+               size_t mph2 = xe->xdf2.recs[j].minimal_perfect_hash;
+               occ.ptr[mph2].file2 += 1;
+       }
+
         /*
          * Create temporary arrays that will help us decide if
          * changed[i] should remain false, or become true.
@@ -288,16 +307,14 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfenv_t *xe) {
         if ((mlim = xdl_bogosqrt((long)xe->xdf1.nrec)) > XDL_MAX_EQLIMIT)
                 mlim = XDL_MAX_EQLIMIT;
         for (i = xe->delta_start, recs = &xe->xdf1.recs[xe->delta_start]; i <= dend1; i++, recs++) {
-               rcrec = cf->rcrecs[recs->minimal_perfect_hash];
-               nm = rcrec ? rcrec->len2 : 0;
+               nm = occ.ptr[recs->minimal_perfect_hash].file2;
                 action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
         }
  
         if ((mlim = xdl_bogosqrt((long)xe->xdf2.nrec)) > XDL_MAX_EQLIMIT)
                 mlim = XDL_MAX_EQLIMIT;
         for (i = xe->delta_start, recs = &xe->xdf2.recs[xe->delta_start]; i <= dend2; i++, recs++) {
-               rcrec = cf->rcrecs[recs->minimal_perfect_hash];
-               nm = rcrec ? rcrec->len1 : 0;
+               nm = occ.ptr[recs->minimal_perfect_hash].file1;
                 action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
         }
  
@@ -332,6 +349,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfenv_t *xe) {
  cleanup:
         xdl_free(action1);
         xdl_free(action2);
+       ivec_free(&occ);
  
         return ret;
  }
@@ -387,18 +405,20 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
  
         for (size_t i = 0; i < xe->xdf1.nrec; i++) {
                 xrecord_t *rec = &xe->xdf1.recs[i];
-               xdl_classify_record(1, &cf, rec);
+               xdl_classify_record(&cf, rec);
         }
  
         for (size_t i = 0; i < xe->xdf2.nrec; i++) {
                 xrecord_t *rec = &xe->xdf2.recs[i];
-               xdl_classify_record(2, &cf, rec);
+               xdl_classify_record(&cf, rec);
         }
  
+       xe->mph_size = cf.count;
+
         xdl_trim_ends(xe);
         if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
             (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
-           xdl_cleanup_records(&cf, xe) < 0) {
+           xdl_cleanup_records(xe, xpp->flags) < 0) {
  
                 xdl_free_ctx(&xe->xdf2);
                 xdl_free_ctx(&xe->xdf1);
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h

index a9393960645d631977f8161ccb741ff1811af3b8..2528bd37e89d4a091d408f6f74b45503f6ba3314 100644 (file)
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -56,6 +56,7 @@ typedef struct s_xdfile {
  typedef struct s_xdfenv {
         xdfile_t xdf1, xdf2;
         size_t delta_start, delta_end;
+       size_t mph_size;
  } xdfenv_t;
author	Ezekiel Newren <ezekielnewren@gmail.com>
	Fri, 2 Jan 2026 18:52:23 +0000 (18:52 +0000)
committer	Junio C Hamano <gitster@pobox.com>
	Sun, 4 Jan 2026 02:44:52 +0000 (11:44 +0900)
xdiff/xprepare.c		patch \| blob \| blame \| history
xdiff/xtypes.h		patch \| blob \| blame \| history