From: Bruno Haible Date: Thu, 27 Jul 2006 12:17:23 +0000 (+0000) Subject: Make fstrcmp multithread-safe. X-Git-Tag: 0.16.x-branchpoint~313 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4f78848eaabc880ec1a73ec45def7af504935f29;p=thirdparty%2Fgettext.git Make fstrcmp multithread-safe. --- diff --git a/gettext-tools/lib/ChangeLog b/gettext-tools/lib/ChangeLog index 6c724ca7d..d98d57734 100644 --- a/gettext-tools/lib/ChangeLog +++ b/gettext-tools/lib/ChangeLog @@ -1,3 +1,20 @@ +2006-07-23 Bruno Haible + + Make fstrcmp multithread-safe. + * lock.h: New file, from gnulib. + * lock.c: New file, from gnulib. + * tls.h: New file, from gnulib. + * tls.c: New file, from gnulib. + * Makefile.am (libgettextlib_la_SOURCES): Add them. + * fstrcmp.c: Include lock.h, tls.h. + (uintptr_t): New macro. + (struct context): New structure, grouping all variables. + (diag, compareseq): Add context argument. + (buffer_key, bufmax_key): New variables. + (keys_init): New functions. + (keys_init_once): New variable. + (fstrcmp): Establish a struct context. Allocate memory per-thread. + 2006-07-22 Bruno Haible * gl_anyhash_list1.h: New file, from gnulib. diff --git a/gettext-tools/lib/Makefile.am b/gettext-tools/lib/Makefile.am index d93b51de6..bc9bc2ae5 100644 --- a/gettext-tools/lib/Makefile.am +++ b/gettext-tools/lib/Makefile.am @@ -66,6 +66,7 @@ libgettextlib_la_SOURCES = \ javaexec.h javaexec.c \ javaversion.h javaversion.c \ linebreak.h linebreak.c lbrkprop.h utf8-ucs4.h utf16-ucs4.h \ + lock.h lock.c \ mbswidth.h mbswidth.c \ minmax.h \ obstack.h obstack.c \ @@ -80,6 +81,7 @@ libgettextlib_la_SOURCES = \ safe-write.h safe-write.c \ sh-quote.h sh-quote.c \ size_max.h \ + tls.h tls.c \ tmpdir.h tmpdir.c \ ucs4-utf8.h ucs4-utf16.h \ unlocked-io.h \ diff --git a/gettext-tools/lib/fstrcmp.c b/gettext-tools/lib/fstrcmp.c index 6907c054e..dadc82b40 100644 --- a/gettext-tools/lib/fstrcmp.c +++ b/gettext-tools/lib/fstrcmp.c @@ -52,56 +52,66 @@ #include #include +#include #include +#include "lock.h" +#include "tls.h" #include "xalloc.h" +#ifndef uintptr_t +# define uintptr_t unsigned long +#endif + /* - * Data on one input string being compared. + * Context of comparison operation. */ -struct string_data +struct context { - /* The string to be compared. */ - const char *data; - - /* The length of the string to be compared. */ - int data_length; - - /* The number of characters inserted or deleted. */ - int edit_count; -}; + /* + * Data on one input string being compared. + */ + struct string_data + { + /* The string to be compared. */ + const char *data; -static struct string_data string[2]; + /* The length of the string to be compared. */ + int data_length; + /* The number of characters inserted or deleted. */ + int edit_count; + } + string[2]; -#ifdef MINUS_H_FLAG + #ifdef MINUS_H_FLAG -/* This corresponds to the diff -H flag. With this heuristic, for - strings with a constant small density of changes, the algorithm is - linear in the strings size. This is unlikely in typical uses of - fstrcmp, and so is usually compiled out. Besides, there is no - interface to set it true. */ -static int heuristic; - -#endif + /* This corresponds to the diff -H flag. With this heuristic, for + strings with a constant small density of changes, the algorithm is + linear in the strings size. This is unlikely in typical uses of + fstrcmp, and so is usually compiled out. Besides, there is no + interface to set it true. */ + int heuristic; + #endif -/* Vector, indexed by diagonal, containing 1 + the X coordinate of the - point furthest along the given diagonal in the forward search of the - edit matrix. */ -static int *fdiag; + /* Vector, indexed by diagonal, containing 1 + the X coordinate of the + point furthest along the given diagonal in the forward search of the + edit matrix. */ + int *fdiag; -/* Vector, indexed by diagonal, containing the X coordinate of the point - furthest along the given diagonal in the backward search of the edit - matrix. */ -static int *bdiag; + /* Vector, indexed by diagonal, containing the X coordinate of the point + furthest along the given diagonal in the backward search of the edit + matrix. */ + int *bdiag; -/* Edit scripts longer than this are too expensive to compute. */ -static int too_expensive; + /* Edit scripts longer than this are too expensive to compute. */ + int too_expensive; -/* Snakes bigger than this are considered `big'. */ -#define SNAKE_LIMIT 20 + /* Snakes bigger than this are considered `big'. */ + #define SNAKE_LIMIT 20 +}; struct partition { @@ -121,7 +131,7 @@ struct partition SYNOPSIS int diag(int xoff, int xlim, int yoff, int ylim, int minimal, - struct partition *part); + struct partition *part, struct context *ctxt); DESCRIPTION Find the midpoint of the shortest edit script for a specified @@ -162,12 +172,12 @@ struct partition static int diag (int xoff, int xlim, int yoff, int ylim, int minimal, - struct partition *part) + struct partition *part, struct context *ctxt) { - int *const fd = fdiag; /* Give the compiler a chance. */ - int *const bd = bdiag; /* Additional help for the compiler. */ - const char *const xv = string[0].data; /* Still more help for the compiler. */ - const char *const yv = string[1].data; /* And more and more . . . */ + int *const fd = ctxt->fdiag; /* Give the compiler a chance. */ + int *const bd = ctxt->bdiag; /* Additional help for the compiler. */ + const char *const xv = ctxt->string[0].data; /* Still more help for the compiler. */ + const char *const yv = ctxt->string[1].data; /* And more and more . . . */ const int dmin = xoff - ylim; /* Minimum valid diagonal. */ const int dmax = xlim - yoff; /* Maximum valid diagonal. */ const int fmid = xoff - yoff; /* Center diagonal of top-down search. */ @@ -286,7 +296,7 @@ diag (int xoff, int xlim, int yoff, int ylim, int minimal, With this heuristic, for strings with a constant small density of changes, the algorithm is linear in the strings size. */ - if (c > 200 && big_snake && heuristic) + if (c > 200 && big_snake && ctxt->heuristic) { int best; @@ -387,7 +397,7 @@ diag (int xoff, int xlim, int yoff, int ylim, int minimal, /* Heuristic: if we've gone well beyond the call of duty, give up and report halfway between our best results so far. */ - if (c >= too_expensive) + if (c >= ctxt->too_expensive) { int fxybest; int fxbest; @@ -465,7 +475,8 @@ diag (int xoff, int xlim, int yoff, int ylim, int minimal, compareseq - find edit sequence SYNOPSIS - void compareseq(int xoff, int xlim, int yoff, int ylim, int minimal); + void compareseq(int xoff, int xlim, int yoff, int ylim, int minimal, + struct context *ctxt); DESCRIPTION Compare in detail contiguous subsequences of the two strings @@ -481,10 +492,11 @@ diag (int xoff, int xlim, int yoff, int ylim, int minimal, expensive it is. */ static void -compareseq (int xoff, int xlim, int yoff, int ylim, int minimal) +compareseq (int xoff, int xlim, int yoff, int ylim, int minimal, + struct context *ctxt) { - const char *const xv = string[0].data; /* Help the compiler. */ - const char *const yv = string[1].data; + const char *const xv = ctxt->string[0].data; /* Help the compiler. */ + const char *const yv = ctxt->string[1].data; /* Slide down the bottom initial diagonal. */ while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff]) @@ -505,7 +517,7 @@ compareseq (int xoff, int xlim, int yoff, int ylim, int minimal) { while (yoff < ylim) { - ++string[1].edit_count; + ctxt->string[1].edit_count++; ++yoff; } } @@ -513,7 +525,7 @@ compareseq (int xoff, int xlim, int yoff, int ylim, int minimal) { while (xoff < xlim) { - ++string[0].edit_count; + ctxt->string[0].edit_count++; ++xoff; } } @@ -523,7 +535,7 @@ compareseq (int xoff, int xlim, int yoff, int ylim, int minimal) struct partition part; /* Find a point of correspondence in the middle of the strings. */ - c = diag (xoff, xlim, yoff, ylim, minimal, &part); + c = diag (xoff, xlim, yoff, ylim, minimal, &part, ctxt); if (c == 1) { #if 0 @@ -536,21 +548,43 @@ compareseq (int xoff, int xlim, int yoff, int ylim, int minimal) /* The two subsequences differ by a single insert or delete; record it and we are done. */ if (part.xmid - part.ymid < xoff - yoff) - ++string[1].edit_count; + ctxt->string[1].edit_count++; else - ++string[0].edit_count; + ctxt->string[0].edit_count++; #endif } else { /* Use the partitions to split this problem into subproblems. */ - compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal); - compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal); + compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal, ctxt); + compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal, ctxt); } } } +/* Because fstrcmp is typically called multiple times, attempt to minimize + the number of memory allocations performed. Thus, let a call reuse the + memory already allocated by the previous call, if it is sufficient. + To make it multithread-safe, without need for a lock that protects the + already allocated memory, store the allocated memory per thread. Free + it only when the thread exits. */ + +static gl_tls_key_t buffer_key; /* TLS key for a 'int *' */ +static gl_tls_key_t bufmax_key; /* TLS key for a 'size_t' */ + +static void +keys_init (void) +{ + gl_tls_key_init (buffer_key, free); + gl_tls_key_init (bufmax_key, NULL); + /* The per-thread initial values are NULL and 0, respectively. */ +} + +/* Ensure that keys_init is called once only. */ +gl_once_define(static, keys_init_once); + + /* NAME fstrcmp - fuzzy string compare @@ -571,55 +605,68 @@ compareseq (int xoff, int xlim, int yoff, int ylim, int minimal) double fstrcmp (const char *string1, const char *string2) { + struct context ctxt; int i; size_t fdiag_len; - static int *fdiag_buf; - static size_t fdiag_max; + int *buffer; + size_t bufmax; /* set the info for each string. */ - string[0].data = string1; - string[0].data_length = strlen (string1); - string[1].data = string2; - string[1].data_length = strlen (string2); + ctxt.string[0].data = string1; + ctxt.string[0].data_length = strlen (string1); + ctxt.string[1].data = string2; + ctxt.string[1].data_length = strlen (string2); /* short-circuit obvious comparisons */ - if (string[0].data_length == 0 && string[1].data_length == 0) + if (ctxt.string[0].data_length == 0 && ctxt.string[1].data_length == 0) return 1.0; - if (string[0].data_length == 0 || string[1].data_length == 0) + if (ctxt.string[0].data_length == 0 || ctxt.string[1].data_length == 0) return 0.0; /* Set TOO_EXPENSIVE to be approximate square root of input size, bounded below by 256. */ - too_expensive = 1; - for (i = string[0].data_length + string[1].data_length; i != 0; i >>= 2) - too_expensive <<= 1; - if (too_expensive < 256) - too_expensive = 256; - - /* Because fstrcmp is typically called multiple times, while scanning - symbol tables, etc, attempt to minimize the number of memory - allocations performed. Thus, we use a static buffer for the - diagonal vectors, and never free them. */ - fdiag_len = string[0].data_length + string[1].data_length + 3; - if (fdiag_len > fdiag_max) + ctxt.too_expensive = 1; + for (i = ctxt.string[0].data_length + ctxt.string[1].data_length; + i != 0; + i >>= 2) + ctxt.too_expensive <<= 1; + if (ctxt.too_expensive < 256) + ctxt.too_expensive = 256; + + /* Allocate memory for fdiag and bdiag from a thread-local pool. */ + fdiag_len = ctxt.string[0].data_length + ctxt.string[1].data_length + 3; + gl_once (keys_init_once, keys_init); + buffer = (int *) gl_tls_get (buffer_key); + bufmax = (size_t) (uintptr_t) gl_tls_get (bufmax_key); + if (fdiag_len > bufmax) { - fdiag_max = fdiag_len; - fdiag_buf = xrealloc (fdiag_buf, fdiag_max * (2 * sizeof (int))); + /* Need more memory. */ + bufmax = 2 * bufmax; + if (fdiag_len > bufmax) + bufmax = fdiag_len; + /* Calling xrealloc would be a waste: buffer's contents does not need + to be preserved. */ + if (buffer != NULL) + free (buffer); + buffer = (int *) xmalloc (bufmax * (2 * sizeof (int))); + gl_tls_set (buffer_key, buffer); + gl_tls_set (bufmax_key, (void *) (uintptr_t) bufmax); } - fdiag = fdiag_buf + string[1].data_length + 1; - bdiag = fdiag + fdiag_len; + ctxt.fdiag = buffer + ctxt.string[1].data_length + 1; + ctxt.bdiag = ctxt.fdiag + fdiag_len; /* Now do the main comparison algorithm */ - string[0].edit_count = 0; - string[1].edit_count = 0; - compareseq (0, string[0].data_length, 0, string[1].data_length, 0); + ctxt.string[0].edit_count = 0; + ctxt.string[1].edit_count = 0; + compareseq (0, ctxt.string[0].data_length, 0, ctxt.string[1].data_length, 0, + &ctxt); /* The result is ((number of chars in common) / (average length of the strings)). This is admittedly biased towards finding that the strings are similar, however it does produce meaningful results. */ - return ((double) (string[0].data_length + string[1].data_length - - string[1].edit_count - string[0].edit_count) - / (string[0].data_length + string[1].data_length)); + return ((double) (ctxt.string[0].data_length + ctxt.string[1].data_length + - ctxt.string[1].edit_count - ctxt.string[0].edit_count) + / (ctxt.string[0].data_length + ctxt.string[1].data_length)); }