From: nathan-williams Date: Mon, 22 Jul 2024 04:43:01 +0000 (-0400) Subject: Expand character class for language names X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=640c050f19b10245ef27ebea400f4bf445843c02;p=thirdparty%2Fgoogle%2Ffonts.git Expand character class for language names --- diff --git a/Lib/gflanguages/data/languages/bdh_Latn.textproto b/Lib/gflanguages/data/languages/bdh_Latn.textproto index 7e7ccf6dda..ecbbb850b3 100644 --- a/Lib/gflanguages/data/languages/bdh_Latn.textproto +++ b/Lib/gflanguages/data/languages/bdh_Latn.textproto @@ -1,7 +1,7 @@ id: "bdh_Latn" language: "bdh" script: "Latn" -name: "Baka (DRC/South Sudan)" +name: "Baka, DRC/South Sudan" autonym: "Tara Baká" population: 60000 region: "CD" diff --git a/Lib/gflanguages/data/languages/bkc_Latn.textproto b/Lib/gflanguages/data/languages/bkc_Latn.textproto index 02b8bca30d..08da4d26da 100644 --- a/Lib/gflanguages/data/languages/bkc_Latn.textproto +++ b/Lib/gflanguages/data/languages/bkc_Latn.textproto @@ -1,7 +1,7 @@ id: "bkc_Latn" language: "bkc" script: "Latn" -name: "Baka (Cameroon/Gabon)" +name: "Baka, Cameroon/Gabon" population: 71000 region: "CM" region: "GA" diff --git a/Lib/gflanguages/data/languages/bm_Nkoo.textproto b/Lib/gflanguages/data/languages/bm_Nkoo.textproto index 2734cdca76..1136be516c 100644 --- a/Lib/gflanguages/data/languages/bm_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/bm_Nkoo.textproto @@ -1,6 +1,6 @@ id: "bm_Nkoo" language: "bm" script: "Nkoo" -name: "Bambara (Nko)" +name: "Bambara (N’Ko)" population: 16000000 region: "ML" diff --git a/Lib/gflanguages/data/languages/bsq_Bass.textproto b/Lib/gflanguages/data/languages/bsq_Bass.textproto index 3d76b53b1d..e2984fefbb 100644 --- a/Lib/gflanguages/data/languages/bsq_Bass.textproto +++ b/Lib/gflanguages/data/languages/bsq_Bass.textproto @@ -1,7 +1,7 @@ id: "bsq_Bass" language: "bsq" script: "Bass" -name: "Bassa (Vah)" +name: "Bassa (Bassa Vah)" population: 410000 region: "LR" region: "SL" diff --git a/Lib/gflanguages/data/languages/cbk_Latn.textproto b/Lib/gflanguages/data/languages/cbk_Latn.textproto index 7053e530f9..748d555e18 100644 --- a/Lib/gflanguages/data/languages/cbk_Latn.textproto +++ b/Lib/gflanguages/data/languages/cbk_Latn.textproto @@ -1,7 +1,7 @@ id: "cbk_Latn" language: "cbk" script: "Latn" -name: "Chavacano, Latin, Philippines" +name: "Chavacano, Philippines (Latin)" region: "PH" sample_text { masthead_full: "TtOo" diff --git a/Lib/gflanguages/data/languages/chn_Dupl.textproto b/Lib/gflanguages/data/languages/chn_Dupl.textproto index d402556fcb..2e8e8213cb 100644 --- a/Lib/gflanguages/data/languages/chn_Dupl.textproto +++ b/Lib/gflanguages/data/languages/chn_Dupl.textproto @@ -1,6 +1,6 @@ id: "chn_Dupl" language: "chn" script: "Dupl" -name: "Chinook Jargon (Duployan shorthand)" +name: "Chinook Jargon (Duployan)" region: "US" region: "CA" diff --git a/Lib/gflanguages/data/languages/de_Dupl.textproto b/Lib/gflanguages/data/languages/de_Dupl.textproto index 7910e1de42..718b6fd747 100644 --- a/Lib/gflanguages/data/languages/de_Dupl.textproto +++ b/Lib/gflanguages/data/languages/de_Dupl.textproto @@ -1,5 +1,5 @@ id: "de_Dupl" language: "de" script: "Dupl" -name: "German (Duployan shorthand)" +name: "German (Duployan)" region: "DE" diff --git a/Lib/gflanguages/data/languages/dyu_Nkoo.textproto b/Lib/gflanguages/data/languages/dyu_Nkoo.textproto index 7edc157ecd..eb891abd2d 100644 --- a/Lib/gflanguages/data/languages/dyu_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/dyu_Nkoo.textproto @@ -1,5 +1,5 @@ id: "dyu_Nkoo" language: "dyu" script: "Nkoo" -name: "Dyula (Nko)" +name: "Dyula (N’Ko)" region: "CI" diff --git a/Lib/gflanguages/data/languages/eto_Latn.textproto b/Lib/gflanguages/data/languages/eto_Latn.textproto index d016fd39f9..2da83f6b07 100644 --- a/Lib/gflanguages/data/languages/eto_Latn.textproto +++ b/Lib/gflanguages/data/languages/eto_Latn.textproto @@ -1,7 +1,7 @@ id: "eto_Latn" language: "eto" script: "Latn" -name: "Eton (Cameroon)" +name: "Eton, Cameroon" population: 400000 region: "CM" exemplar_chars { diff --git a/Lib/gflanguages/data/languages/fr_Dupl.textproto b/Lib/gflanguages/data/languages/fr_Dupl.textproto index 584d4cbda7..0093d282a8 100644 --- a/Lib/gflanguages/data/languages/fr_Dupl.textproto +++ b/Lib/gflanguages/data/languages/fr_Dupl.textproto @@ -1,5 +1,5 @@ id: "fr_Dupl" language: "fr" script: "Dupl" -name: "French (Duployan shorthand)" +name: "French (Duployan)" historical: true diff --git a/Lib/gflanguages/data/languages/gcf_Latn.textproto b/Lib/gflanguages/data/languages/gcf_Latn.textproto index 56401d24d8..b36c6e6044 100644 --- a/Lib/gflanguages/data/languages/gcf_Latn.textproto +++ b/Lib/gflanguages/data/languages/gcf_Latn.textproto @@ -1,7 +1,7 @@ id: "gcf_Latn" language: "gcf" script: "Latn" -name: "Guadeloupean Creole French, Latin, Martinique" +name: "Guadeloupean Creole French, Martinique (Latin)" region: "GP" region: "MQ" sample_text { diff --git a/Lib/gflanguages/data/languages/man_Nkoo.textproto b/Lib/gflanguages/data/languages/man_Nkoo.textproto index 2ba2349b18..4e9166d020 100644 --- a/Lib/gflanguages/data/languages/man_Nkoo.textproto +++ b/Lib/gflanguages/data/languages/man_Nkoo.textproto @@ -1,5 +1,5 @@ id: "man_Nkoo" language: "man" script: "Nkoo" -name: "Mandingo (Nko)" +name: "Mandingo (N’Ko)" region: "GN" diff --git a/Lib/gflanguages/data/scripts/Beng.textproto b/Lib/gflanguages/data/scripts/Beng.textproto index d583c7ead8..d68be9e4d7 100644 --- a/Lib/gflanguages/data/scripts/Beng.textproto +++ b/Lib/gflanguages/data/scripts/Beng.textproto @@ -1,3 +1,2 @@ id: "Beng" -name: "Bangla" - +name: "Bengali" diff --git a/tests/test_data_languages.py b/tests/test_data_languages.py index c05b7d8a57..a859cf78fe 100644 --- a/tests/test_data_languages.py +++ b/tests/test_data_languages.py @@ -84,7 +84,15 @@ SKIP_REGION = { } # "ʼ" allowed as last character in language name for Metaʼ -LANGUAGE_NAME_REGEX = "^[-A-Za-zÀ-ÿ ]+(ʼ)?(, [-A-Za-zÀ-ÿ ]+)?( [(][-A-Za-zÀ-ÿ ]+[)])?$" +LANGUAGE_NAME_REGEX = "^[-’A-Za-zÀ-ÿ ]+(ʼ)?(, [-’A-Za-zÀ-ÿ/ ]+)?( [(][-’A-Za-zÀ-ÿ ]+[)])?$" +# Some scripts have abbreviated names for reference in language names that are +# sufficient in context. If an alternate is listed here, it should be used +# universally and consistently across all language names. +ALTERNATE_SCRIPT_NAMES = { + "Dupl": "Duployan", + "Hans": "Simplified", + "Hant": "Traditional", +} @pytest.mark.parametrize("lang_code", LANGUAGES) @@ -291,7 +299,7 @@ def test_language_uniqueness(): def test_language_name_structure(): languages_with_bad_name_structure = {} for lang in LANGUAGES.values(): - script_name = SCRIPTS[lang.script].name + script_name = SCRIPTS[lang.script].name if lang.script not in ALTERNATE_SCRIPT_NAMES else ALTERNATE_SCRIPT_NAMES[lang.script] names = [["name", lang.name]] if lang.preferred_name: names += [["preferred_name", lang.preferred_name]]