From 30334ea852046cf3fb129cf2d8ce293c1627f323 Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Mon, 15 Sep 2008 00:48:04 +0000 Subject: [PATCH] Use the result of the fuzzy search in the current list as a lower bound for the fuzzy search in the compendiums. --- gettext-tools/src/ChangeLog | 9 +++++++++ gettext-tools/src/msgl-fsearch.c | 11 +++++++---- gettext-tools/src/msgl-fsearch.h | 9 ++++++--- gettext-tools/src/msgmerge.c | 17 ++++++++++++++--- 4 files changed, 36 insertions(+), 10 deletions(-) diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog index 68e3bd515..bfed4904b 100644 --- a/gettext-tools/src/ChangeLog +++ b/gettext-tools/src/ChangeLog @@ -1,3 +1,12 @@ +2008-09-14 Bruno Haible + + * msgl-fsearch.h (message_fuzzy_index_search): Add 'lower_bound' + argument. + * msgl-fsearch.c (message_fuzzy_index_search): Likewise. + * msgmerge.c (definitions_search_fuzzy): Use the result of the fuzzy + search in the current list as a lower bound for the fuzzy search in the + compendiums. + 2008-09-14 Bruno Haible * message.h (message_list_search_fuzzy, FUZZY_THRESHOLD): Clarify diff --git a/gettext-tools/src/msgl-fsearch.c b/gettext-tools/src/msgl-fsearch.c index 8d7d5410e..e612f0172 100644 --- a/gettext-tools/src/msgl-fsearch.c +++ b/gettext-tools/src/msgl-fsearch.c @@ -484,10 +484,13 @@ mult_index_list_free (struct mult_index_list *accu) } /* Find a good match for the given msgctxt and msgid in the given fuzzy index. - The match does not need to be optimal. */ + The match does not need to be optimal. + Ignore matches for which the fuzzy_search_goal_function is < LOWER_BOUND. + LOWER_BOUND must be >= FUZZY_THRESHOLD. */ message_ty * message_fuzzy_index_search (message_fuzzy_index_ty *findex, - const char *msgctxt, const char *msgid) + const char *msgctxt, const char *msgid, + double lower_bound) { const char *str = msgid; @@ -547,7 +550,7 @@ message_fuzzy_index_search (message_fuzzy_index_ty *findex, if (count > accu.nitems) count = accu.nitems; - best_weight = FUZZY_THRESHOLD; + best_weight = lower_bound; best_mp = NULL; for (ptr = accu.item; count > 0; ptr++, count--) { @@ -589,7 +592,7 @@ message_fuzzy_index_search (message_fuzzy_index_ty *findex, if (!(lmax <= SHORT_MSG_MAX)) abort (); - best_weight = FUZZY_THRESHOLD; + best_weight = lower_bound; best_mp = NULL; for (l = lmin; l <= lmax; l++) { diff --git a/gettext-tools/src/msgl-fsearch.h b/gettext-tools/src/msgl-fsearch.h index 6793cefef..d5f796251 100644 --- a/gettext-tools/src/msgl-fsearch.h +++ b/gettext-tools/src/msgl-fsearch.h @@ -1,5 +1,5 @@ /* Fast fuzzy searching among messages. - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2008 Free Software Foundation, Inc. Written by Bruno Haible , 2006. This program is free software: you can redistribute it and/or modify @@ -39,10 +39,13 @@ extern message_fuzzy_index_ty * const char *canon_charset); /* Find a good match for the given msgctxt and msgid in the given fuzzy index. - The match does not need to be optimal. */ + The match does not need to be optimal. + Ignore matches for which the fuzzy_search_goal_function is < LOWER_BOUND. + LOWER_BOUND must be >= FUZZY_THRESHOLD. */ extern message_ty * message_fuzzy_index_search (message_fuzzy_index_ty *findex, - const char *msgctxt, const char *msgid); + const char *msgctxt, const char *msgid, + double lower_bound); /* Free a fuzzy index. */ extern void diff --git a/gettext-tools/src/msgmerge.c b/gettext-tools/src/msgmerge.c index b8df6a913..be765e904 100644 --- a/gettext-tools/src/msgmerge.c +++ b/gettext-tools/src/msgmerge.c @@ -767,20 +767,31 @@ definitions_search_fuzzy (definitions_ty *definitions, msgctxt, msgid); if (compendiums != NULL) { + double lower_bound_for_mp2; message_ty *mp2; + lower_bound_for_mp2 = + (mp1 != NULL + ? fuzzy_search_goal_function (mp1, msgctxt, msgid, 0.0) + : FUZZY_THRESHOLD); + /* This lower bound must be >= FUZZY_THRESHOLD. */ + if (!(lower_bound_for_mp2 >= FUZZY_THRESHOLD)) + abort (); + /* Create the fuzzy index lazily. */ if (definitions->comp_findex == NULL) definitions_init_comp_findex (definitions); mp2 = message_fuzzy_index_search (definitions->comp_findex, - msgctxt, msgid); + msgctxt, msgid, + lower_bound_for_mp2); /* Choose the best among mp1, mp2. */ if (mp1 == NULL || (mp2 != NULL - && (fuzzy_search_goal_function (mp2, msgctxt, msgid, 0.0) - > fuzzy_search_goal_function (mp1, msgctxt, msgid, 0.0)))) + && (fuzzy_search_goal_function (mp2, msgctxt, msgid, + lower_bound_for_mp2) + > lower_bound_for_mp2))) mp1 = mp2; } -- 2.47.2