From: Denis Moyogo Jacquerye Date: Tue, 1 Nov 2022 09:14:58 +0000 (+0100) Subject: Test languages exemplars canonical duplicates X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0cc89cfc410207dfc4654b0a4a2bb135b3af1231;p=thirdparty%2Fgoogle%2Ffonts.git Test languages exemplars canonical duplicates --- diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index 9c30318be6..b15262ae02 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -14,8 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from collections import Counter +from collections import defaultdict, Counter import re +import unicodedata from gflanguages import LoadLanguages, languages_public_pb2, LoadScripts import pytest @@ -41,6 +42,25 @@ SKIP_EXEMPLARS = { } +@pytest.mark.parametrize("lang_code", LANGUAGES) +@pytest.mark.parametrize( + "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"] +) +def test_languages_exemplars_canonical_duplicates(lang_code, exemplar_name): + lang = LANGUAGES[lang_code] + exemplar = getattr(lang.exemplar_chars, exemplar_name).split() + normalized = defaultdict(set) + + for g in exemplar: + if g[0] == "{" and g[-1] == "}": + g = g.lstrip("{").rstrip("}") + normalized[unicodedata.normalize("NFC", g)].add(g) + + result = [(len(gs), gs) for n, gs in normalized.items()] + expected = [(1, {n}) for n, gs in normalized.items()] + assert result == expected + + @pytest.mark.parametrize("lang_code", LANGUAGES) @pytest.mark.parametrize( "exemplar_name", ["base", "auxiliary", "marks", "numerals", "punctuation", "index"]