Make fstrcmp multithread-safe.

author Bruno Haible <bruno@clisp.org>

Thu, 27 Jul 2006 12:17:23 +0000 (12:17 +0000)

committer Bruno Haible <bruno@clisp.org>

Tue, 23 Jun 2009 10:13:42 +0000 (12:13 +0200)
author Bruno Haible <bruno@clisp.org>
Thu, 27 Jul 2006 12:17:23 +0000 (12:17 +0000)
committer Bruno Haible <bruno@clisp.org>
Tue, 23 Jun 2009 10:13:42 +0000 (12:13 +0200)
diff --git a/gettext-tools/lib/ChangeLog b/gettext-tools/lib/ChangeLog

index 6c724ca7dc1d04a4de47bd1bbe3e39d44a83a499..d98d57734285b4a177a0110ab96e070eae29fb81 100644 (file)
--- a/gettext-tools/lib/ChangeLog
+++ b/gettext-tools/lib/ChangeLog
@@ -1,3 +1,20 @@
+2006-07-23  Bruno Haible  <bruno@clisp.org>
+
+       Make fstrcmp multithread-safe.
+       * lock.h: New file, from gnulib.
+       * lock.c: New file, from gnulib.
+       * tls.h: New file, from gnulib.
+       * tls.c: New file, from gnulib.
+       * Makefile.am (libgettextlib_la_SOURCES): Add them.
+       * fstrcmp.c: Include lock.h, tls.h.
+       (uintptr_t): New macro.
+       (struct context): New structure, grouping all variables.
+       (diag, compareseq): Add context argument.
+       (buffer_key, bufmax_key): New variables.
+       (keys_init): New functions.
+       (keys_init_once): New variable.
+       (fstrcmp): Establish a struct context. Allocate memory per-thread.
+
  2006-07-22  Bruno Haible  <bruno@clisp.org>
  
         * gl_anyhash_list1.h: New file, from gnulib.
diff --git a/gettext-tools/lib/Makefile.am b/gettext-tools/lib/Makefile.am

index d93b51de6c27579a337297b9989d368d6ff9e0c1..bc9bc2ae55b7d8e70b61509086fa1159068fb39a 100644 (file)
--- a/gettext-tools/lib/Makefile.am
+++ b/gettext-tools/lib/Makefile.am
@@ -66,6 +66,7 @@ libgettextlib_la_SOURCES = \
    javaexec.h javaexec.c \
    javaversion.h javaversion.c \
    linebreak.h linebreak.c lbrkprop.h utf8-ucs4.h utf16-ucs4.h \
+  lock.h lock.c \
    mbswidth.h mbswidth.c \
    minmax.h \
    obstack.h obstack.c \
@@ -80,6 +81,7 @@ libgettextlib_la_SOURCES = \
    safe-write.h safe-write.c \
    sh-quote.h sh-quote.c \
    size_max.h \
+  tls.h tls.c \
    tmpdir.h tmpdir.c \
    ucs4-utf8.h ucs4-utf16.h \
    unlocked-io.h \
diff --git a/gettext-tools/lib/fstrcmp.c b/gettext-tools/lib/fstrcmp.c

index 6907c054e0370312e6049f6fef25d7941756fe6a..dadc82b4076358e62e585a57b5b75379c9aa114f 100644 (file)
--- a/gettext-tools/lib/fstrcmp.c
+++ b/gettext-tools/lib/fstrcmp.c
@@ -52,56 +52,66 @@
  
  #include <string.h>
  #include <stdio.h>
+#include <stdlib.h>
  #include <limits.h>
  
+#include "lock.h"
+#include "tls.h"
  #include "xalloc.h"
  
+#ifndef uintptr_t
+# define uintptr_t unsigned long
+#endif
+
  
  /*
- * Data on one input string being compared.
+ * Context of comparison operation.
   */
-struct string_data
+struct context
  {
-  /* The string to be compared. */
-  const char *data;
-
-  /* The length of the string to be compared. */
-  int data_length;
-
-  /* The number of characters inserted or deleted. */
-  int edit_count;
-};
+  /*
+   * Data on one input string being compared.
+   */
+  struct string_data
+  {
+    /* The string to be compared. */
+    const char *data;
  
-static struct string_data string[2];
+    /* The length of the string to be compared. */
+    int data_length;
  
+    /* The number of characters inserted or deleted. */
+    int edit_count;
+  }
+  string[2];
  
-#ifdef MINUS_H_FLAG
+  #ifdef MINUS_H_FLAG
  
-/* This corresponds to the diff -H flag.  With this heuristic, for
-   strings with a constant small density of changes, the algorithm is
-   linear in the strings size.  This is unlikely in typical uses of
-   fstrcmp, and so is usually compiled out.  Besides, there is no
-   interface to set it true.  */
-static int heuristic;
-
-#endif
+  /* This corresponds to the diff -H flag.  With this heuristic, for
+     strings with a constant small density of changes, the algorithm is
+     linear in the strings size.  This is unlikely in typical uses of
+     fstrcmp, and so is usually compiled out.  Besides, there is no
+     interface to set it true.  */
+  int heuristic;
  
+  #endif
  
-/* Vector, indexed by diagonal, containing 1 + the X coordinate of the
-   point furthest along the given diagonal in the forward search of the
-   edit matrix.  */
-static int *fdiag;
+  /* Vector, indexed by diagonal, containing 1 + the X coordinate of the
+     point furthest along the given diagonal in the forward search of the
+     edit matrix.  */
+  int *fdiag;
  
-/* Vector, indexed by diagonal, containing the X coordinate of the point
-   furthest along the given diagonal in the backward search of the edit
-   matrix.  */
-static int *bdiag;
+  /* Vector, indexed by diagonal, containing the X coordinate of the point
+     furthest along the given diagonal in the backward search of the edit
+     matrix.  */
+  int *bdiag;
  
-/* Edit scripts longer than this are too expensive to compute.  */
-static int too_expensive;
+  /* Edit scripts longer than this are too expensive to compute.  */
+  int too_expensive;
  
-/* Snakes bigger than this are considered `big'.  */
-#define SNAKE_LIMIT    20
+  /* Snakes bigger than this are considered `big'.  */
+  #define SNAKE_LIMIT  20
+};
  
  struct partition
  {
@@ -121,7 +131,7 @@ struct partition
  
     SYNOPSIS
         int diag(int xoff, int xlim, int yoff, int ylim, int minimal,
-                struct partition *part);
+                struct partition *part, struct context *ctxt);
  
     DESCRIPTION
         Find the midpoint of the shortest edit script for a specified
@@ -162,12 +172,12 @@ struct partition
  
  static int
  diag (int xoff, int xlim, int yoff, int ylim, int minimal,
-      struct partition *part)
+      struct partition *part, struct context *ctxt)
  {
-  int *const fd = fdiag;       /* Give the compiler a chance. */
-  int *const bd = bdiag;       /* Additional help for the compiler. */
-  const char *const xv = string[0].data;       /* Still more help for the compiler. */
-  const char *const yv = string[1].data;       /* And more and more . . . */
+  int *const fd = ctxt->fdiag; /* Give the compiler a chance. */
+  int *const bd = ctxt->bdiag; /* Additional help for the compiler. */
+  const char *const xv = ctxt->string[0].data; /* Still more help for the compiler. */
+  const char *const yv = ctxt->string[1].data; /* And more and more . . . */
    const int dmin = xoff - ylim;        /* Minimum valid diagonal. */
    const int dmax = xlim - yoff;        /* Maximum valid diagonal. */
    const int fmid = xoff - yoff;        /* Center diagonal of top-down search. */
@@ -286,7 +296,7 @@ diag (int xoff, int xlim, int yoff, int ylim, int minimal,
  
           With this heuristic, for strings with a constant small density
           of changes, the algorithm is linear in the strings size.  */
-      if (c > 200 && big_snake && heuristic)
+      if (c > 200 && big_snake && ctxt->heuristic)
         {
           int best;
  
@@ -387,7 +397,7 @@ diag (int xoff, int xlim, int yoff, int ylim, int minimal,
  
        /* Heuristic: if we've gone well beyond the call of duty, give up
          and report halfway between our best results so far.  */
-      if (c >= too_expensive)
+      if (c >= ctxt->too_expensive)
         {
           int fxybest;
           int fxbest;
@@ -465,7 +475,8 @@ diag (int xoff, int xlim, int yoff, int ylim, int minimal,
         compareseq - find edit sequence
  
     SYNOPSIS
-       void compareseq(int xoff, int xlim, int yoff, int ylim, int minimal);
+       void compareseq(int xoff, int xlim, int yoff, int ylim, int minimal,
+                       struct context *ctxt);
  
     DESCRIPTION
         Compare in detail contiguous subsequences of the two strings
@@ -481,10 +492,11 @@ diag (int xoff, int xlim, int yoff, int ylim, int minimal,
         expensive it is.  */
  
  static void
-compareseq (int xoff, int xlim, int yoff, int ylim, int minimal)
+compareseq (int xoff, int xlim, int yoff, int ylim, int minimal,
+           struct context *ctxt)
  {
-  const char *const xv = string[0].data;       /* Help the compiler.  */
-  const char *const yv = string[1].data;
+  const char *const xv = ctxt->string[0].data; /* Help the compiler.  */
+  const char *const yv = ctxt->string[1].data;
  
    /* Slide down the bottom initial diagonal. */
    while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
@@ -505,7 +517,7 @@ compareseq (int xoff, int xlim, int yoff, int ylim, int minimal)
      {
        while (yoff < ylim)
         {
-         ++string[1].edit_count;
+         ctxt->string[1].edit_count++;
           ++yoff;
         }
      }
@@ -513,7 +525,7 @@ compareseq (int xoff, int xlim, int yoff, int ylim, int minimal)
      {
        while (xoff < xlim)
         {
-         ++string[0].edit_count;
+         ctxt->string[0].edit_count++;
           ++xoff;
         }
      }
@@ -523,7 +535,7 @@ compareseq (int xoff, int xlim, int yoff, int ylim, int minimal)
        struct partition part;
  
        /* Find a point of correspondence in the middle of the strings.  */
-      c = diag (xoff, xlim, yoff, ylim, minimal, &part);
+      c = diag (xoff, xlim, yoff, ylim, minimal, &part, ctxt);
        if (c == 1)
         {
  #if 0
@@ -536,21 +548,43 @@ compareseq (int xoff, int xlim, int yoff, int ylim, int minimal)
           /* The two subsequences differ by a single insert or delete;
              record it and we are done.  */
           if (part.xmid - part.ymid < xoff - yoff)
-           ++string[1].edit_count;
+           ctxt->string[1].edit_count++;
           else
-           ++string[0].edit_count;
+           ctxt->string[0].edit_count++;
  #endif
         }
        else
         {
           /* Use the partitions to split this problem into subproblems.  */
-         compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
-         compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
+         compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal, ctxt);
+         compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal, ctxt);
         }
      }
  }
  
  
+/* Because fstrcmp is typically called multiple times, attempt to minimize
+   the number of memory allocations performed.  Thus, let a call reuse the
+   memory already allocated by the previous call, if it is sufficient.
+   To make it multithread-safe, without need for a lock that protects the
+   already allocated memory, store the allocated memory per thread.  Free
+   it only when the thread exits.  */
+
+static gl_tls_key_t buffer_key;        /* TLS key for a 'int *' */
+static gl_tls_key_t bufmax_key;        /* TLS key for a 'size_t' */
+
+static void
+keys_init (void)
+{
+  gl_tls_key_init (buffer_key, free);
+  gl_tls_key_init (bufmax_key, NULL);
+  /* The per-thread initial values are NULL and 0, respectively.  */
+}
+
+/* Ensure that keys_init is called once only.  */
+gl_once_define(static, keys_init_once);
+
+
  /* NAME
         fstrcmp - fuzzy string compare
  
@@ -571,55 +605,68 @@ compareseq (int xoff, int xlim, int yoff, int ylim, int minimal)
  double
  fstrcmp (const char *string1, const char *string2)
  {
+  struct context ctxt;
    int i;
  
    size_t fdiag_len;
-  static int *fdiag_buf;
-  static size_t fdiag_max;
+  int *buffer;
+  size_t bufmax;
  
    /* set the info for each string.  */
-  string[0].data = string1;
-  string[0].data_length = strlen (string1);
-  string[1].data = string2;
-  string[1].data_length = strlen (string2);
+  ctxt.string[0].data = string1;
+  ctxt.string[0].data_length = strlen (string1);
+  ctxt.string[1].data = string2;
+  ctxt.string[1].data_length = strlen (string2);
  
    /* short-circuit obvious comparisons */
-  if (string[0].data_length == 0 && string[1].data_length == 0)
+  if (ctxt.string[0].data_length == 0 && ctxt.string[1].data_length == 0)
      return 1.0;
-  if (string[0].data_length == 0 || string[1].data_length == 0)
+  if (ctxt.string[0].data_length == 0 || ctxt.string[1].data_length == 0)
      return 0.0;
  
    /* Set TOO_EXPENSIVE to be approximate square root of input size,
       bounded below by 256.  */
-  too_expensive = 1;
-  for (i = string[0].data_length + string[1].data_length; i != 0; i >>= 2)
-    too_expensive <<= 1;
-  if (too_expensive < 256)
-    too_expensive = 256;
-
-  /* Because fstrcmp is typically called multiple times, while scanning
-     symbol tables, etc, attempt to minimize the number of memory
-     allocations performed.  Thus, we use a static buffer for the
-     diagonal vectors, and never free them.  */
-  fdiag_len = string[0].data_length + string[1].data_length + 3;
-  if (fdiag_len > fdiag_max)
+  ctxt.too_expensive = 1;
+  for (i = ctxt.string[0].data_length + ctxt.string[1].data_length;
+       i != 0;
+       i >>= 2)
+    ctxt.too_expensive <<= 1;
+  if (ctxt.too_expensive < 256)
+    ctxt.too_expensive = 256;
+
+  /* Allocate memory for fdiag and bdiag from a thread-local pool.  */
+  fdiag_len = ctxt.string[0].data_length + ctxt.string[1].data_length + 3;
+  gl_once (keys_init_once, keys_init);
+  buffer = (int *) gl_tls_get (buffer_key);
+  bufmax = (size_t) (uintptr_t) gl_tls_get (bufmax_key);
+  if (fdiag_len > bufmax)
      {
-      fdiag_max = fdiag_len;
-      fdiag_buf = xrealloc (fdiag_buf, fdiag_max * (2 * sizeof (int)));
+      /* Need more memory.  */
+      bufmax = 2 * bufmax;
+      if (fdiag_len > bufmax)
+       bufmax = fdiag_len;
+      /* Calling xrealloc would be a waste: buffer's contents does not need
+        to be preserved.  */
+      if (buffer != NULL)
+       free (buffer);
+      buffer = (int *) xmalloc (bufmax * (2 * sizeof (int)));
+      gl_tls_set (buffer_key, buffer);
+      gl_tls_set (bufmax_key, (void *) (uintptr_t) bufmax);
      }
-  fdiag = fdiag_buf + string[1].data_length + 1;
-  bdiag = fdiag + fdiag_len;
+  ctxt.fdiag = buffer + ctxt.string[1].data_length + 1;
+  ctxt.bdiag = ctxt.fdiag + fdiag_len;
  
    /* Now do the main comparison algorithm */
-  string[0].edit_count = 0;
-  string[1].edit_count = 0;
-  compareseq (0, string[0].data_length, 0, string[1].data_length, 0);
+  ctxt.string[0].edit_count = 0;
+  ctxt.string[1].edit_count = 0;
+  compareseq (0, ctxt.string[0].data_length, 0, ctxt.string[1].data_length, 0,
+             &ctxt);
  
    /* The result is
         ((number of chars in common) / (average length of the strings)).
       This is admittedly biased towards finding that the strings are
       similar, however it does produce meaningful results.  */
-  return ((double) (string[0].data_length + string[1].data_length
-                   - string[1].edit_count - string[0].edit_count)
-         / (string[0].data_length + string[1].data_length));
+  return ((double) (ctxt.string[0].data_length + ctxt.string[1].data_length
+                   - ctxt.string[1].edit_count - ctxt.string[0].edit_count)
+         / (ctxt.string[0].data_length + ctxt.string[1].data_length));
  }
author	Bruno Haible <bruno@clisp.org>
	Thu, 27 Jul 2006 12:17:23 +0000 (12:17 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Tue, 23 Jun 2009 10:13:42 +0000 (12:13 +0200)
gettext-tools/lib/ChangeLog		patch \| blob \| blame \| history
gettext-tools/lib/Makefile.am		patch \| blob \| blame \| history
gettext-tools/lib/fstrcmp.c		patch \| blob \| blame \| history