Support list format fallbacks

author Aarni Koskela <akx@iki.fi>

Thu, 11 Jul 2024 11:54:16 +0000 (14:54 +0300)

committer Aarni Koskela <akx@iki.fi>

Fri, 12 Jul 2024 05:51:28 +0000 (08:51 +0300)
author Aarni Koskela <akx@iki.fi>
Thu, 11 Jul 2024 11:54:16 +0000 (14:54 +0300)
committer Aarni Koskela <akx@iki.fi>
Fri, 12 Jul 2024 05:51:28 +0000 (08:51 +0300)
diff --git a/babel/lists.py b/babel/lists.py

index 376bc963edeb315400a739e3fc82695f97c397d1..6c34cb0993e25d2eab91c049a13025aae5f60b12 100644 (file)
--- a/babel/lists.py
+++ b/babel/lists.py
@@ -26,9 +26,11 @@ if TYPE_CHECKING:
  DEFAULT_LOCALE = default_locale()
  
  
-def format_list(lst: Sequence[str],
-                style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard',
-                locale: Locale | str | None = DEFAULT_LOCALE) -> str:
+def format_list(
+    lst: Sequence[str],
+    style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard',
+    locale: Locale | str | None = DEFAULT_LOCALE,
+) -> str:
      """
      Format the items in `lst` as a list.
  
@@ -39,7 +41,11 @@ def format_list(lst: Sequence[str],
      >>> format_list(['omena', 'peruna', 'aplari'], style='or', locale='fi')
      u'omena, peruna tai aplari'
  
-    These styles are defined, but not all are necessarily available in all locales.
+    Not all styles are necessarily available in all locales.
+    The function will attempt to fall back to replacement styles according to the rules
+    set forth in the CLDR root XML file, and raise a ValueError if no suitable replacement
+    can be found.
+
      The following text is verbatim from the Unicode TR35-49 spec [1].
  
      * standard:
@@ -76,14 +82,9 @@ def format_list(lst: Sequence[str],
      if len(lst) == 1:
          return lst[0]
  
-    if style not in locale.list_patterns:
-        raise ValueError(
-            f'Locale {locale} does not support list formatting style {style!r} '
-            f'(supported are {sorted(locale.list_patterns)})',
-        )
-    patterns = locale.list_patterns[style]
+    patterns = _resolve_list_style(locale, style)
  
-    if len(lst) == 2:
+    if len(lst) == 2 and '2' in patterns:
          return patterns['2'].format(*lst)
  
      result = patterns['start'].format(lst[0], lst[1])
@@ -92,3 +93,31 @@ def format_list(lst: Sequence[str],
      result = patterns['end'].format(result, lst[-1])
  
      return result
+
+
+# Based on CLDR 45's root.xml file's `<alias>`es.
+# The root file defines both `standard` and `or`,
+# so they're always available.
+# TODO: It would likely be better to use the
+#       babel.localedata.Alias mechanism for this,
+#       but I'm not quite sure how it's supposed to
+#       work with inheritance and data in the root.
+_style_fallbacks = {
+    "or-narrow": ["or-short", "or"],
+    "or-short": ["or"],
+    "standard-narrow": ["standard-short", "standard"],
+    "standard-short": ["standard"],
+    "unit": ["unit-short", "standard"],
+    "unit-narrow": ["unit-short", "unit", "standard"],
+    "unit-short": ["standard"],
+}
+
+
+def _resolve_list_style(locale: Locale, style: str):
+    for style in (style, *(_style_fallbacks.get(style, []))):  # noqa: B020
+        if style in locale.list_patterns:
+            return locale.list_patterns[style]
+    raise ValueError(
+        f"Locale {locale} does not support list formatting style {style!r} "
+        f"(supported are {sorted(locale.list_patterns)})",
+    )
diff --git a/tests/test_lists.py b/tests/test_lists.py

index 2b2453bb8e43b96a06a2714dc1f97197982467c4..46ca10d0264a3878a2094130bc39558347006952 100644 (file)
--- a/tests/test_lists.py
+++ b/tests/test_lists.py
@@ -1,6 +1,6 @@
  import pytest
  
-from babel import lists
+from babel import lists, units
  
  
  @pytest.mark.parametrize(('list', 'locale', 'expected'), [
@@ -18,3 +18,15 @@ def test_format_list(list, locale, expected):
  def test_format_list_error():
      with pytest.raises(ValueError):
          lists.format_list(['a', 'b', 'c'], style='orange', locale='en')
+
+
+def test_issue_1098():
+    one_foot = units.format_unit(1, "length-foot", length="short", locale="zh_CN")
+    five_inches = units.format_unit(5, "length-inch", length="short", locale="zh_CN")
+    # zh-CN does not specify the "unit" style, so we fall back to "unit-short" style.
+    assert (
+        lists.format_list([one_foot, five_inches], style="unit", locale="zh_CN") ==
+        lists.format_list([one_foot, five_inches], style="unit-short", locale="zh_CN") ==
+        # Translation verified using Google Translate. It would add more spacing, but the glyphs are correct.
+        "1英尺5英寸"
+    )
author	Aarni Koskela <akx@iki.fi>
	Thu, 11 Jul 2024 11:54:16 +0000 (14:54 +0300)
committer	Aarni Koskela <akx@iki.fi>
	Fri, 12 Jul 2024 05:51:28 +0000 (08:51 +0300)
babel/lists.py		patch \| blob \| blame \| history
tests/test_lists.py		patch \| blob \| blame \| history