]> git.ipfire.org Git - thirdparty/babel.git/commitdiff
Do not allow substituting alternates or drafts in derived locales (#1113)
authorAarni Koskela <akx@iki.fi>
Wed, 7 Aug 2024 08:14:34 +0000 (11:14 +0300)
committerGitHub <noreply@github.com>
Wed, 7 Aug 2024 08:14:34 +0000 (11:14 +0300)
* download_import_cldr.py: pass remaining arguments through to import_cldr

* Do not allow substituting alternates or drafts in derived locales

For more coverage, we've allowed using alternate or draft values
when no officially accepted values have been present.

However, non-global (i.e. e.g. `de_CH` locales) may have an alternate
spelling for what would be an alternate in the parent locale (and
that alternate hasn't been imported into the parent), and the import
would have then accepted the alternate from the child locale as a
non-alternate.

Refs #1112

scripts/download_import_cldr.py
scripts/import_cldr.py
tests/test_core.py

index 4a9805dde6596f7835987ac99ba8767754af0a53..cf670ed98c47e12de7dc8d7109a641bd85d5ea42 100755 (executable)
@@ -79,7 +79,9 @@ def main():
     subprocess.check_call([
         sys.executable,
         os.path.join(scripts_path, 'import_cldr.py'),
-        common_path])
+        common_path,
+        *sys.argv[1:],
+    ])
 
 
 if __name__ == '__main__':
index ee481440fcee113f82c9622d5167f46947f1af82..e8cc03106cf224638820e71b5c4c38a68d98c510 100755 (executable)
@@ -417,7 +417,8 @@ def _process_local_datas(sup, srcdir, destdir, force=False, dump_json=False):
         if locale_id in day_period_rules:
             data["day_period_rules"] = day_period_rules[locale_id]
 
-        parse_locale_display_names(data, tree)
+        is_global = ("_" not in locale_id)
+        parse_locale_display_names(data, tree, is_global=is_global)
         parse_list_patterns(data, tree)
         parse_dates(data, tree, sup, regions, territory)
 
@@ -489,43 +490,54 @@ def _should_skip_elem(elem, type=None, dest=None):
     :param dest: Destination dict. May be elided to skip the dict check.
     :return: skip boolean
     """
-    if 'draft' in elem.attrib or 'alt' in elem.attrib:
+    if _is_draft_or_alt(elem):
         if dest is None or type in dest:
             return True
 
 
-def _import_type_text(dest, elem, type=None):
+def _is_draft_or_alt(elem) -> bool:
+    return 'draft' in elem.attrib or 'alt' in elem.attrib
+
+
+def _import_type_text(dest, elem, type=None, *, allow_variant_and_draft_fallback=True) -> None:
     """
     Conditionally import the element's inner text(s) into the `dest` dict.
 
-    The condition being, namely, that the element isn't a draft/alternate version
-    of a pre-existing element.
+    If `allow_variant_and_draft_fallback` is True, then the element may be imported
+    if there otherwise isn't a pre-existing element of the same type.
 
     :param dest: Destination dict
     :param elem: XML element.
     :param type: Override type. (By default, the `type` attr of the element.)
-    :return:
+    :param allow_variant_and_draft_fallback: See above.
+    :return: Nothing.
     """
     if type is None:
         type = elem.attrib['type']
-    if _should_skip_elem(elem, type, dest):
+
+    # Already have this, nothing to do.
+    if type in dest:
+        return
+
+    if not allow_variant_and_draft_fallback and _is_draft_or_alt(elem):
+        # Not allowed to use a draft/alternate here.
         return
     dest[type] = _text(elem)
 
 
-def parse_locale_display_names(data, tree):
+def parse_locale_display_names(data, tree, *, is_global: bool):
     territories = data.setdefault('territories', {})
     for elem in tree.findall('.//territories/territory'):
-        _import_type_text(territories, elem)
+        _import_type_text(territories, elem, allow_variant_and_draft_fallback=is_global)
     languages = data.setdefault('languages', {})
     for elem in tree.findall('.//languages/language'):
-        _import_type_text(languages, elem)
+        _import_type_text(languages, elem, allow_variant_and_draft_fallback=is_global)
     variants = data.setdefault('variants', {})
     for elem in tree.findall('.//variants/variant'):
-        _import_type_text(variants, elem)
+        _import_type_text(variants, elem, allow_variant_and_draft_fallback=is_global)
     scripts = data.setdefault('scripts', {})
     for elem in tree.findall('.//scripts/script'):
-        _import_type_text(scripts, elem)
+        _import_type_text(scripts, elem, allow_variant_and_draft_fallback=is_global)
 
 
 def parse_list_patterns(data, tree):
index 1bec2155d51487c78ec6010dc24f8d3b0e5e6d5d..57f1a89c64ee752c38fb8c4ca24a57371f6186f3 100644 (file)
@@ -348,3 +348,17 @@ def test_issue_814():
     loc = Locale.parse('ca_ES_valencia')
     assert loc.variant == "VALENCIA"
     assert loc.get_display_name() == 'català (Espanya, valencià)'
+
+
+def test_issue_1112():
+    """
+    Test that an alternate spelling of `Türkei` doesn't inadvertently
+    get imported from `de_AT` to replace the parent's non-alternate spelling.
+    """
+    assert (
+        Locale.parse('de').territories['TR'] ==
+        Locale.parse('de_AT').territories['TR'] ==
+        Locale.parse('de_CH').territories['TR'] ==
+        Locale.parse('de_DE').territories['TR'] ==
+        'Türkei'
+    )