From: Simon Cozens Date: Wed, 1 May 2024 09:13:32 +0000 (+0100) Subject: Merge branch 'main' into test-out-of-script X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5bd360f2f4ea47bc330c5808c63d62bba73d6f64;p=thirdparty%2Fgoogle%2Ffonts.git Merge branch 'main' into test-out-of-script --- 5bd360f2f4ea47bc330c5808c63d62bba73d6f64 diff --cc tests/test_data_languages.py index 7dcd6f4dcc,17870d442e..315275d953 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@@ -195,53 -182,15 +201,67 @@@ def test_exemplars_are_in_script(lang_c ) +@pytest.mark.parametrize("lang_code", LANGUAGES.keys()) +def test_sample_texts_are_in_script(lang_code): + if lang_code in [ + "mak_Maka", + "orv_Cyrl", + "cu_Cyrl", + "ff_Adlm", + "idu_Latn", + "ban_Bali", + ]: + pytest.xfail("These languages have known issues with their sample text") + return + lang = LANGUAGES[lang_code] + script_name = SCRIPTS[lang.script].name + script_name = CLDR_SCRIPT_TO_UCD_SCRIPT.get(script_name, script_name) + if not lang.sample_text.ListFields(): + pytest.skip("No sample text for language " + lang_code) + return + if lang.id in SKIP_EXEMPLARS: + pytest.skip(SKIP_EXEMPLARS[lang.id]) + return + out_of_script = defaultdict(set) + for field in SampleText.fields: + if field.name == "note": + continue + samples = getattr(lang.sample_text, field.name) + chars = set(samples) + for char in chars: + char_script = ( + youseedee.ucd_data(ord(char)).get("Script", "").replace("_", " ") + ) + if char_script == "Common" or char_script == "Inherited": + continue + if char_script != script_name: + extensions = ( + youseedee.ucd_data(ord(char)) + .get("Script_Extensions", "") + .split(" ") + ) + if any(ext == lang.script for ext in extensions): + continue + out_of_script[char_script].add(char) + break + msg = [] + for script, chars in out_of_script.items(): + msg.append(f"'{''.join(chars)}' ({script} != {script_name})") + assert not out_of_script, ( + f"{lang_code} sample text contained out-of-script characters" + f": {', '.join(msg)}" + ) ++ ++ + def test_exemplar_parser(): + bases = "a A ā Ā {a̍} {A̍} {kl}" + parsed_bases = parse(bases) + assert parsed_bases == { + "a", + "A", + "ā", + "Ā", + "k", + "l", + "̍", + }