From b4ba84382f3ce7bdf0e5a68e7108a21f4e8e7926 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 7 Aug 2024 11:14:34 +0300 Subject: [PATCH] Do not allow substituting alternates or drafts in derived locales (#1113) * download_import_cldr.py: pass remaining arguments through to import_cldr * Do not allow substituting alternates or drafts in derived locales For more coverage, we've allowed using alternate or draft values when no officially accepted values have been present. However, non-global (i.e. e.g. `de_CH` locales) may have an alternate spelling for what would be an alternate in the parent locale (and that alternate hasn't been imported into the parent), and the import would have then accepted the alternate from the child locale as a non-alternate. Refs #1112 --- scripts/download_import_cldr.py | 4 +++- scripts/import_cldr.py | 36 ++++++++++++++++++++++----------- tests/test_core.py | 14 +++++++++++++ 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/scripts/download_import_cldr.py b/scripts/download_import_cldr.py index 4a9805dd..cf670ed9 100755 --- a/scripts/download_import_cldr.py +++ b/scripts/download_import_cldr.py @@ -79,7 +79,9 @@ def main(): subprocess.check_call([ sys.executable, os.path.join(scripts_path, 'import_cldr.py'), - common_path]) + common_path, + *sys.argv[1:], + ]) if __name__ == '__main__': diff --git a/scripts/import_cldr.py b/scripts/import_cldr.py index ee481440..e8cc0310 100755 --- a/scripts/import_cldr.py +++ b/scripts/import_cldr.py @@ -417,7 +417,8 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False): if locale_id in day_period_rules: data["day_period_rules"] = day_period_rules[locale_id] - parse_locale_display_names(data, tree) + is_global = ("_" not in locale_id) + parse_locale_display_names(data, tree, is_global=is_global) parse_list_patterns(data, tree) parse_dates(data, tree, sup, regions, territory) @@ -489,43 +490,54 @@ def _should_skip_elem(elem, type=None, dest=None): :param dest: Destination dict. May be elided to skip the dict check. :return: skip boolean """ - if 'draft' in elem.attrib or 'alt' in elem.attrib: + if _is_draft_or_alt(elem): if dest is None or type in dest: return True -def _import_type_text(dest, elem, type=None): +def _is_draft_or_alt(elem) -> bool: + return 'draft' in elem.attrib or 'alt' in elem.attrib + + +def _import_type_text(dest, elem, type=None, *, allow_variant_and_draft_fallback=True) -> None: """ Conditionally import the element's inner text(s) into the `dest` dict. - The condition being, namely, that the element isn't a draft/alternate version - of a pre-existing element. + If `allow_variant_and_draft_fallback` is True, then the element may be imported + if there otherwise isn't a pre-existing element of the same type. :param dest: Destination dict :param elem: XML element. :param type: Override type. (By default, the `type` attr of the element.) - :return: + :param allow_variant_and_draft_fallback: See above. + :return: Nothing. """ if type is None: type = elem.attrib['type'] - if _should_skip_elem(elem, type, dest): + + # Already have this, nothing to do. + if type in dest: + return + + if not allow_variant_and_draft_fallback and _is_draft_or_alt(elem): + # Not allowed to use a draft/alternate here. return dest[type] = _text(elem) -def parse_locale_display_names(data, tree): +def parse_locale_display_names(data, tree, *, is_global: bool): territories = data.setdefault('territories', {}) for elem in tree.findall('.//territories/territory'): - _import_type_text(territories, elem) + _import_type_text(territories, elem, allow_variant_and_draft_fallback=is_global) languages = data.setdefault('languages', {}) for elem in tree.findall('.//languages/language'): - _import_type_text(languages, elem) + _import_type_text(languages, elem, allow_variant_and_draft_fallback=is_global) variants = data.setdefault('variants', {}) for elem in tree.findall('.//variants/variant'): - _import_type_text(variants, elem) + _import_type_text(variants, elem, allow_variant_and_draft_fallback=is_global) scripts = data.setdefault('scripts', {}) for elem in tree.findall('.//scripts/script'): - _import_type_text(scripts, elem) + _import_type_text(scripts, elem, allow_variant_and_draft_fallback=is_global) def parse_list_patterns(data, tree): diff --git a/tests/test_core.py b/tests/test_core.py index 1bec2155..57f1a89c 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -348,3 +348,17 @@ def test_issue_814(): loc = Locale.parse('ca_ES_valencia') assert loc.variant == "VALENCIA" assert loc.get_display_name() == 'català (Espanya, valencià)' + + +def test_issue_1112(): + """ + Test that an alternate spelling of `Türkei` doesn't inadvertently + get imported from `de_AT` to replace the parent's non-alternate spelling. + """ + assert ( + Locale.parse('de').territories['TR'] == + Locale.parse('de_AT').territories['TR'] == + Locale.parse('de_CH').territories['TR'] == + Locale.parse('de_DE').territories['TR'] == + 'Türkei' + ) -- 2.47.2