From: Bruno Haible Date: Sun, 14 Sep 2008 21:41:30 +0000 (+0000) Subject: Shortcut fstrcmp computations by taking into account the best known match so X-Git-Tag: v0.18~347 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2df5c44aa8b410ec9ff638ed6de8c2fc334f14ab;p=thirdparty%2Fgettext.git Shortcut fstrcmp computations by taking into account the best known match so far. --- diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 15ed384b9..68c260030 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,15 @@ +2008-09-14 Ralf Wildenhues + Bruno Haible + + * message.h (fuzzy_search_goal_function): Add 'lower_bound' argument. + * message.c (fuzzy_search_goal_function): Likewise. Use fstrcmp_bounded + instead of fstrcmp. + (message_list_search_fuzzy_inner): Pass fuzzy_search_goal_function the + best weight known so far, to shortcut computations. + * msgl-fsearch.c (message_fuzzy_index_search): Likewise. + * msgmerge.c (definitions_search_fuzzy): Update + fuzzy_search_goal_function calls. + 2008-09-14 Ralf Wildenhues Bruno Haible diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c index cfe808a35..75fae5e0b 100644 --- a/gettext-tools/src/message.c +++ b/gettext-tools/src/message.c @@ -1,5 +1,5 @@ /* GNU gettext - internationalization aids - Copyright (C) 1995-1998, 2000-2007 Free Software Foundation, Inc. + Copyright (C) 1995-1998, 2000-2008 Free Software Foundation, Inc. This file was written by Peter Miller @@ -531,22 +531,37 @@ message_list_search (message_list_ty *mlp, double fuzzy_search_goal_function (const message_ty *mp, - const char *msgctxt, const char *msgid) -{ - /* The use of 'volatile' guarantees that excess precision bits are dropped - before the addition and before the following comparison at the caller's - site. It is necessary on x86 systems where double-floats are not IEEE - compliant by default, to avoid that msgmerge results become platform and - compiler option dependent. 'volatile' is a portable alternative to gcc's - -ffloat-store option. */ - volatile double weight = fstrcmp (msgid, mp->msgid); + const char *msgctxt, const char *msgid, + double lower_bound) +{ + double bonus = 0.0; /* A translation for a context is a good proposal also for another. But give mp a small advantage if mp is valid regardless of any context or has the same context as the one being looked up. */ if (mp->msgctxt == NULL || (msgctxt != NULL && strcmp (msgctxt, mp->msgctxt) == 0)) - weight += 0.00001; - return weight; + { + bonus = 0.00001; + /* Since we will consider (weight + bonus) at the end, we are only + interested in weights that are >= lower_bound - bonus. Subtract + a little more than the bonus, in order to avoid trouble due to + rounding errors. */ + lower_bound -= bonus * 1.01; + } + + { + /* The use of 'volatile' guarantees that excess precision bits are dropped + before the addition and before the following comparison at the caller's + site. It is necessary on x86 systems where double-floats are not IEEE + compliant by default, to avoid that msgmerge results become platform and + compiler option dependent. 'volatile' is a portable alternative to + gcc's -ffloat-store option. */ + volatile double weight = fstrcmp_bounded (msgid, mp->msgid, lower_bound); + + weight += bonus; + + return weight; + } } @@ -567,7 +582,8 @@ message_list_search_fuzzy_inner (message_list_ty *mlp, if (mp->msgstr != NULL && mp->msgstr[0] != '\0') { - double weight = fuzzy_search_goal_function (mp, msgctxt, msgid); + double weight = + fuzzy_search_goal_function (mp, msgctxt, msgid, *best_weight_p); if (weight > *best_weight_p) { *best_weight_p = weight; diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h index d58b0a54a..763ef34a5 100644 --- a/gettext-tools/src/message.h +++ b/gettext-tools/src/message.h @@ -1,5 +1,5 @@ /* GNU gettext - internationalization aids - Copyright (C) 1995-1998, 2000-2007 Free Software Foundation, Inc. + Copyright (C) 1995-1998, 2000-2008 Free Software Foundation, Inc. This file was written by Peter Miller @@ -317,10 +317,13 @@ extern message_ty * /* The goal function used in fuzzy search. - Higher values indicate a closer match. */ + Higher values indicate a closer match. + If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can + be returned. */ extern double fuzzy_search_goal_function (const message_ty *mp, - const char *msgctxt, const char *msgid); + const char *msgctxt, const char *msgid, + double lower_bound); /* The threshold for fuzzy-searching. A message is considered only if fstrcmp (msg, given) > FUZZY_THRESHOLD. */ diff --git a/gettext-tools/src/msgl-fsearch.c b/gettext-tools/src/msgl-fsearch.c index 1935a86c1..8d7d5410e 100644 --- a/gettext-tools/src/msgl-fsearch.c +++ b/gettext-tools/src/msgl-fsearch.c @@ -1,5 +1,5 @@ /* Fast fuzzy searching among messages. - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2008 Free Software Foundation, Inc. Written by Bruno Haible , 2006. This program is free software: you can redistribute it and/or modify @@ -553,7 +553,8 @@ message_fuzzy_index_search (message_fuzzy_index_ty *findex, { message_ty *mp = findex->messages[ptr->index]; double weight = - fuzzy_search_goal_function (mp, msgctxt, msgid); + fuzzy_search_goal_function (mp, msgctxt, msgid, + best_weight); if (weight > best_weight) { @@ -598,7 +599,8 @@ message_fuzzy_index_search (message_fuzzy_index_ty *findex, for (j = 0; j < mlp->nitems; j++) { message_ty *mp = mlp->item[j]; - double weight = fuzzy_search_goal_function (mp, msgctxt, msgid); + double weight = + fuzzy_search_goal_function (mp, msgctxt, msgid, best_weight); if (weight > best_weight) { diff --git a/gettext-tools/src/msgmerge.c b/gettext-tools/src/msgmerge.c index d9d9676b6..576e0c3fd 100644 --- a/gettext-tools/src/msgmerge.c +++ b/gettext-tools/src/msgmerge.c @@ -776,8 +776,8 @@ definitions_search_fuzzy (definitions_ty *definitions, /* Choose the best among mp1, mp2. */ if (mp1 == NULL || (mp2 != NULL - && (fuzzy_search_goal_function (mp2, msgctxt, msgid) - > fuzzy_search_goal_function (mp1, msgctxt, msgid)))) + && (fuzzy_search_goal_function (mp2, msgctxt, msgid, 0.0) + > fuzzy_search_goal_function (mp1, msgctxt, msgid, 0.0)))) mp1 = mp2; }