Make generic: New macro EXTRA_CONTEXT_FIELDS, NOTE_DELETE, NOTE_INSERT.

author Bruno Haible <bruno@clisp.org>

Mon, 18 Dec 2006 13:21:20 +0000 (13:21 +0000)

committer Bruno Haible <bruno@clisp.org>

Mon, 18 Dec 2006 13:21:20 +0000 (13:21 +0000)
author Bruno Haible <bruno@clisp.org>
Mon, 18 Dec 2006 13:21:20 +0000 (13:21 +0000)
committer Bruno Haible <bruno@clisp.org>
Mon, 18 Dec 2006 13:21:20 +0000 (13:21 +0000)
diff --git a/lib/analyze.c b/lib/analyze.c

index 772925b297a7fdecddd53355ff7939d1cdea005a..1832a6ebc041c2e6349f04a9e780348b76adcafd 100644 (file)
--- a/lib/analyze.c
+++ b/lib/analyze.c
@@ -31,6 +31,9 @@
  #define ELEMENT lin
  #define EQUAL(x,y) ((x) == (y))
  #define OFFSET lin
+#define EXTRA_CONTEXT_FIELDS /* none */
+#define NOTE_DELETE(ctxt, xoff) files[0].changed[files[0].realindexes[xoff]] = 1
+#define NOTE_INSERT(ctxt, yoff) files[1].changed[files[1].realindexes[yoff]] = 1
  #define USE_HEURISTIC 1
  #include "diffseq.h"
  \f
diff --git a/lib/diffseq.h b/lib/diffseq.h

index 387578b6c60257c46465053555d40c6659da79c1..0f9d6eb56a4cab013780314b5b499c3191a24825 100644 (file)
--- a/lib/diffseq.h
+++ b/lib/diffseq.h
@@ -18,7 +18,7 @@
  
  /* The basic idea is to consider two vectors as similar if, when
     transforming the first vector into the second vector through a
-   sequence of edits (inserts and deletes of one character each),
+   sequence of edits (inserts and deletes of one element each),
     this sequence is short - or equivalently, if the ordered list
     of elements that are untouched by these edits is long.  For a
     good introduction to the subject, read about the "Levenshtein
@@ -45,6 +45,9 @@
       OFFSET                  A signed integer type sufficient to hold the
                               difference between two indices. Usually
                               something like ssize_t.
+     EXTRA_CONTEXT_FIELDS    Declarations of fields for 'struct context'.
+     NOTE_DELETE(ctxt, xoff) Record the removal of the object xvec[xoff].
+     NOTE_INSERT(ctxt, yoff) Record the insertion of the object yvec[yoff].
       USE_HEURISTIC           (Optional) Define if you want to support the
                               heuristic for large vectors.  */
  
@@ -70,6 +73,9 @@ struct context
    const ELEMENT *xvec;
    const ELEMENT *yvec;
  
+  /* Extra fields. */
+  EXTRA_CONTEXT_FIELDS
+
    /* Vector, indexed by diagonal, containing 1 + the X coordinate of the point
       furthest along the given diagonal in the forward search of the edit
       matrix. */
@@ -440,10 +446,16 @@ compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
    /* Handle simple cases. */
    if (xoff == xlim)
      while (yoff < ylim)
-      files[1].changed[files[1].realindexes[yoff++]] = 1;
+      {
+       NOTE_INSERT (ctxt, yoff);
+       yoff++;
+      }
    else if (yoff == ylim)
      while (xoff < xlim)
-      files[0].changed[files[0].realindexes[xoff++]] = 1;
+      {
+       NOTE_DELETE (ctxt, xoff);
+       xoff++;
+      }
    else
      {
        struct partition part;
@@ -461,3 +473,6 @@ compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
  #undef EQUAL
  #undef OFFSET
  #undef OFFSET_MAX
+#undef EXTRA_CONTEXT_FIELDS
+#undef NOTE_DELETE
+#undef NOTE_INSERT
diff --git a/lib/fstrcmp.c b/lib/fstrcmp.c

index 217f8fc37a51593ef85aa87097a188eed79dc398..721cd6d1ed83df68c8ab26cb35619d2569b77c83 100644 (file)
--- a/lib/fstrcmp.c
+++ b/lib/fstrcmp.c
@@ -64,6 +64,12 @@
  #define ELEMENT char
  #define EQUAL(x,y) ((x) == (y))
  #define OFFSET int
+#define EXTRA_CONTEXT_FIELDS \
+  /* The number of elements inserted or deleted. */ \
+  int xvec_edit_count; \
+  int yvec_edit_count;
+#define NOTE_DELETE(ctxt, xoff) ctxt->xvec_edit_count++
+#define NOTE_INSERT(ctxt, yoff) ctxt->yvec_edit_count++
  /* We don't need USE_HEURISTIC, since it is unlikely in typical uses of
     fstrcmp().  */
  
@@ -74,6 +80,9 @@
       OFFSET                  A signed integer type sufficient to hold the
                               difference between two indices. Usually
                               something like ssize_t.
+     EXTRA_CONTEXT_FIELDS    Declarations of fields for 'struct context'.
+     NOTE_DELETE(ctxt, xoff) Record the removal of the object xvec[xoff].
+     NOTE_INSERT(ctxt, yoff) Record the insertion of the object yvec[yoff].
       USE_HEURISTIC           (Optional) Define if you want to support the
                               heuristic for large vectors.  */
  
@@ -95,21 +104,13 @@
   */
  struct context
  {
-  /*
-   * Data on one input string being compared.
-   */
-  struct string_data
-  {
-    /* The string to be compared. */
-    const ELEMENT *data;
-
-    /* The length of the string to be compared. */
-    int data_length;
-
-    /* The number of elements inserted or deleted. */
-    int edit_count;
-  }
-  string[2];
+  /* Vectors being compared. */
+  const ELEMENT *xvec;
+  const ELEMENT *yvec;
+
+  /* The number of elements inserted or deleted. */
+  int xvec_edit_count;
+  int yvec_edit_count;
  
    /* Vector, indexed by diagonal, containing 1 + the X coordinate of the point
       furthest along the given diagonal in the forward search of the edit
@@ -182,8 +183,8 @@ diag (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, bool find_minimal,
  {
    OFFSET *const fd = ctxt->fdiag;      /* Give the compiler a chance. */
    OFFSET *const bd = ctxt->bdiag;      /* Additional help for the compiler. */
-  const ELEMENT *const xv = ctxt->string[0].data;      /* Still more help for the compiler. */
-  const ELEMENT *const yv = ctxt->string[1].data;      /* And more and more . . . */
+  const ELEMENT *const xv = ctxt->xvec;        /* Still more help for the compiler. */
+  const ELEMENT *const yv = ctxt->yvec;        /* And more and more . . . */
    const OFFSET dmin = xoff - ylim;     /* Minimum valid diagonal. */
    const OFFSET dmax = xlim - yoff;     /* Maximum valid diagonal. */
    const OFFSET fmid = xoff - yoff;     /* Center diagonal of top-down search. */
@@ -462,8 +463,8 @@ static void
  compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
             bool find_minimal, struct context *ctxt)
  {
-  const ELEMENT *const xv = ctxt->string[0].data;      /* Help the compiler.  */
-  const ELEMENT *const yv = ctxt->string[1].data;
+  const ELEMENT *const xv = ctxt->xvec;        /* Help the compiler.  */
+  const ELEMENT *const yv = ctxt->yvec;
  
    /* Slide down the bottom initial diagonal. */
    while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
@@ -481,21 +482,17 @@ compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim,
  
    /* Handle simple cases. */
    if (xoff == xlim)
-    {
-      while (yoff < ylim)
-       {
-         ctxt->string[1].edit_count++;
-         ++yoff;
-       }
-    }
+    while (yoff < ylim)
+      {
+       NOTE_INSERT (ctxt, yoff);
+       yoff++;
+      }
    else if (yoff == ylim)
-    {
-      while (xoff < xlim)
-       {
-         ctxt->string[0].edit_count++;
-         ++xoff;
-       }
-    }
+    while (xoff < xlim)
+      {
+       NOTE_DELETE (ctxt, xoff);
+       xoff++;
+      }
    else
      {
        struct partition part;
@@ -553,6 +550,8 @@ double
  fstrcmp (const char *string1, const char *string2)
  {
    struct context ctxt;
+  int xvec_length;
+  int yvec_length;
    int i;
  
    size_t fdiag_len;
@@ -560,21 +559,21 @@ fstrcmp (const char *string1, const char *string2)
    size_t bufmax;
  
    /* set the info for each string.  */
-  ctxt.string[0].data = string1;
-  ctxt.string[0].data_length = strlen (string1);
-  ctxt.string[1].data = string2;
-  ctxt.string[1].data_length = strlen (string2);
+  ctxt.xvec = string1;
+  xvec_length = strlen (string1);
+  ctxt.yvec = string2;
+  yvec_length = strlen (string2);
  
    /* short-circuit obvious comparisons */
-  if (ctxt.string[0].data_length == 0 && ctxt.string[1].data_length == 0)
+  if (xvec_length == 0 && yvec_length == 0)
      return 1.0;
-  if (ctxt.string[0].data_length == 0 || ctxt.string[1].data_length == 0)
+  if (xvec_length == 0 || yvec_length == 0)
      return 0.0;
  
    /* Set TOO_EXPENSIVE to be approximate square root of input size,
       bounded below by 256.  */
    ctxt.too_expensive = 1;
-  for (i = ctxt.string[0].data_length + ctxt.string[1].data_length;
+  for (i = xvec_length + yvec_length;
         i != 0;
         i >>= 2)
      ctxt.too_expensive <<= 1;
@@ -582,7 +581,7 @@ fstrcmp (const char *string1, const char *string2)
      ctxt.too_expensive = 256;
  
    /* Allocate memory for fdiag and bdiag from a thread-local pool.  */
-  fdiag_len = ctxt.string[0].data_length + ctxt.string[1].data_length + 3;
+  fdiag_len = xvec_length + yvec_length + 3;
    gl_once (keys_init_once, keys_init);
    buffer = (int *) gl_tls_get (buffer_key);
    bufmax = (size_t) (uintptr_t) gl_tls_get (bufmax_key);
@@ -600,20 +599,20 @@ fstrcmp (const char *string1, const char *string2)
        gl_tls_set (buffer_key, buffer);
        gl_tls_set (bufmax_key, (void *) (uintptr_t) bufmax);
      }
-  ctxt.fdiag = buffer + ctxt.string[1].data_length + 1;
+  ctxt.fdiag = buffer + yvec_length + 1;
    ctxt.bdiag = ctxt.fdiag + fdiag_len;
  
    /* Now do the main comparison algorithm */
-  ctxt.string[0].edit_count = 0;
-  ctxt.string[1].edit_count = 0;
-  compareseq (0, ctxt.string[0].data_length, 0, ctxt.string[1].data_length, 0,
+  ctxt.xvec_edit_count = 0;
+  ctxt.yvec_edit_count = 0;
+  compareseq (0, xvec_length, 0, yvec_length, 0,
               &ctxt);
  
    /* The result is
         ((number of chars in common) / (average length of the strings)).
       This is admittedly biased towards finding that the strings are
       similar, however it does produce meaningful results.  */
-  return ((double) (ctxt.string[0].data_length + ctxt.string[1].data_length
-                   - ctxt.string[1].edit_count - ctxt.string[0].edit_count)
-         / (ctxt.string[0].data_length + ctxt.string[1].data_length));
+  return ((double) (xvec_length + yvec_length
+                   - ctxt.yvec_edit_count - ctxt.xvec_edit_count)
+         / (xvec_length + yvec_length));
  }
author	Bruno Haible <bruno@clisp.org>
	Mon, 18 Dec 2006 13:21:20 +0000 (13:21 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Mon, 18 Dec 2006 13:21:20 +0000 (13:21 +0000)
lib/analyze.c		patch \| blob \| blame \| history
lib/diffseq.h		patch \| blob \| blame \| history
lib/fstrcmp.c		patch \| blob \| blame \| history