From: Jean Abou Samra Date: Sun, 12 Feb 2023 01:46:30 +0000 (+0100) Subject: In fuzzy matching, also .lower().strip() fuzzy candidates X-Git-Tag: v2.12.0~3^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=63bb71a5acd0d49a2ceee15098485bc34b0e8864;p=thirdparty%2Fbabel.git In fuzzy matching, also .lower().strip() fuzzy candidates This seems intended at easing fuzzy matching with trivial edits in the msgstr (changing case and adding whitespace), but it was only done on the new msgstr, not on the old mgstr candidates, so it was possible for merging catalogs to miss messages. --- diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index dead4aac..19026434 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -803,10 +803,13 @@ class Catalog: # Prepare for fuzzy matching fuzzy_candidates = [] if not no_fuzzy_matching: - fuzzy_candidates = { - self._key_for(msgid): messages[msgid].context - for msgid in messages if msgid and messages[msgid].string - } + fuzzy_candidates = {} + for msgid in messages: + if msgid and messages[msgid].string: + key = self._key_for(msgid) + ctxt = messages[msgid].context + modified_key = key.lower().strip() + fuzzy_candidates[modified_key] = (key, ctxt) fuzzy_matches = set() def _merge(message: Message, oldkey: tuple[str, str] | str, newkey: tuple[str, str] | str) -> None: @@ -861,8 +864,8 @@ class Catalog: matches = get_close_matches(matchkey.lower().strip(), fuzzy_candidates.keys(), 1) if matches: - newkey = matches[0] - newctxt = fuzzy_candidates[newkey] + modified_key = matches[0] + newkey, newctxt = fuzzy_candidates[modified_key] if newctxt is not None: newkey = newkey, newctxt _merge(message, newkey, key) diff --git a/tests/messages/test_catalog.py b/tests/messages/test_catalog.py index 273c83f5..c2e7aeda 100644 --- a/tests/messages/test_catalog.py +++ b/tests/messages/test_catalog.py @@ -121,16 +121,16 @@ class CatalogTestCase(unittest.TestCase): def test_update_fuzzy_matching_with_case_change(self): cat = catalog.Catalog() - cat.add('foo', 'Voh') + cat.add('FOO', 'Voh') cat.add('bar', 'Bahr') tmpl = catalog.Catalog() - tmpl.add('Foo') + tmpl.add('foo') cat.update(tmpl) assert len(cat.obsolete) == 1 - assert 'foo' not in cat + assert 'FOO' not in cat - assert cat['Foo'].string == 'Voh' - assert cat['Foo'].fuzzy is True + assert cat['foo'].string == 'Voh' + assert cat['foo'].fuzzy is True def test_update_fuzzy_matching_with_char_change(self): cat = catalog.Catalog()