[3.12] gh-126727: Fix locale.nl_langinfo(locale.ERA) (GH-126730) (GH-127098)

author Serhiy Storchaka <storchaka@gmail.com>

Thu, 21 Nov 2024 11:44:37 +0000 (13:44 +0200)

committer GitHub <noreply@github.com>

Thu, 21 Nov 2024 11:44:37 +0000 (11:44 +0000)
author Serhiy Storchaka <storchaka@gmail.com>
Thu, 21 Nov 2024 11:44:37 +0000 (13:44 +0200)
committer GitHub <noreply@github.com>
Thu, 21 Nov 2024 11:44:37 +0000 (11:44 +0000)
diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst

index a81879a2fe48dcba5d1ae665432d2f054a040836..fee5aba7ee3e3275d9d953a7336d459bd84d7aa7 100644 (file)
--- a/Doc/library/locale.rst
+++ b/Doc/library/locale.rst
@@ -281,7 +281,8 @@ The :mod:`locale` module defines the following exception and functions:
  
     .. data:: ERA
  
-      Get a string that represents the era used in the current locale.
+      Get a string which describes how years are counted and displayed for
+      each era in a locale.
  
        Most locales do not define this value.  An example of a locale which does
        define this value is the Japanese one.  In Japan, the traditional
@@ -290,9 +291,10 @@ The :mod:`locale` module defines the following exception and functions:
  
        Normally it should not be necessary to use this value directly. Specifying
        the ``E`` modifier in their format strings causes the :func:`time.strftime`
-      function to use this information.  The format of the returned string is not
-      specified, and therefore you should not assume knowledge of it on different
-      systems.
+      function to use this information.
+      The format of the returned string is specified in *The Open Group Base
+      Specifications Issue 8*, paragraph `7.3.5.2 LC_TIME C-Language Access
+      <https://pubs.opengroup.org/onlinepubs/9799919799/basedefs/V1_chap07.html#tag_07_03_05_02>`_.
  
     .. data:: ERA_D_T_FMT
  
diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py

index a680e6edb63c0e391f80ba105ab8468af30019d6..89c203250557f0a67a0af57708aee82fd26a4127 100644 (file)
--- a/Lib/test/test__locale.py
+++ b/Lib/test/test__locale.py
@@ -90,6 +90,14 @@ known_alt_digits = {
      'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}),
  }
  
+known_era = {
+    'C': (0, ''),
+    'en_US': (0, ''),
+    'ja_JP': (11, '+:1:2019/05/01:2019/12/31:令和:%EC元年'),
+    'zh_TW': (3, '+:1:1912/01/01:1912/12/31:民國:%EC元年'),
+    'th_TW': (1, '+:1:-543/01/01:+*:พ.ศ.:%EC %Ey'),
+}
+
  if sys.platform == 'win32':
      # ps_AF doesn't work on Windows: see bpo-38324 (msg361830)
      del known_numerics['ps_AF']
@@ -228,6 +236,44 @@ class _LocaleTests(unittest.TestCase):
          if not tested:
              self.skipTest('no suitable locales')
  
+    @unittest.skipUnless(nl_langinfo, "nl_langinfo is not available")
+    @unittest.skipUnless(hasattr(locale, 'ERA'), "requires locale.ERA")
+    @unittest.skipIf(
+        support.is_emscripten or support.is_wasi,
+        "musl libc issue on Emscripten, bpo-46390"
+    )
+    def test_era_nl_langinfo(self):
+        # Test nl_langinfo(ERA)
+        tested = False
+        for loc in candidate_locales:
+            with self.subTest(locale=loc):
+                try:
+                    setlocale(LC_TIME, loc)
+                    setlocale(LC_CTYPE, loc)
+                except Error:
+                    self.skipTest(f'no locale {loc!r}')
+                    continue
+
+                with self.subTest(locale=loc):
+                    era = nl_langinfo(locale.ERA)
+                    self.assertIsInstance(era, str)
+                    if era:
+                        self.assertEqual(era.count(':'), (era.count(';') + 1) * 5, era)
+
+                    loc1 = loc.split('.', 1)[0]
+                    if loc1 in known_era:
+                        count, sample = known_era[loc1]
+                        if count:
+                            if not era:
+                                self.skipTest(f'ERA is not set for locale {loc!r} on this platform')
+                            self.assertGreaterEqual(era.count(';') + 1, count)
+                            self.assertIn(sample, era)
+                        else:
+                            self.assertEqual(era, '')
+                    tested = True
+        if not tested:
+            self.skipTest('no suitable locales')
+
      def test_float_parsing(self):
          # Bug #1391872: Test whether float parsing is okay on European
          # locales.
diff --git a/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst

new file mode 100644 (file)

index 0000000..7bec8a6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst
@@ -0,0 +1,3 @@
+``locale.nl_langinfo(locale.ERA)`` now returns multiple era description
+segments separated by semicolons. Previously it only returned the first
+segment on platforms with Glibc.
diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c

index 53ebb57d23ae07a79df11633b9157472e6034220..db8194372dae49532d01a4d54452a1927ce5a98a 100644 (file)
--- a/Modules/_localemodule.c
+++ b/Modules/_localemodule.c
@@ -595,6 +595,37 @@ static struct langinfo_constant{
      {0, 0}
  };
  
+#ifdef __GLIBC__
+#if defined(ALT_DIGITS) || defined(ERA)
+static PyObject *
+decode_strings(const char *result, size_t max_count)
+{
+    /* Convert a sequence of NUL-separated C strings to a Python string
+     * containing semicolon separated items. */
+    size_t i = 0;
+    size_t count = 0;
+    for (; count < max_count && result[i]; count++) {
+        i += strlen(result + i) + 1;
+    }
+    char *buf = PyMem_Malloc(i);
+    if (buf == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    memcpy(buf, result, i);
+    /* Replace all NULs with semicolons. */
+    i = 0;
+    while (--count) {
+        i += strlen(buf + i);
+        buf[i++] = ';';
+    }
+    PyObject *pyresult = PyUnicode_DecodeLocale(buf, NULL);
+    PyMem_Free(buf);
+    return pyresult;
+}
+#endif
+#endif
+
  /*[clinic input]
  _locale.nl_langinfo
  
@@ -620,32 +651,18 @@ _locale_nl_langinfo_impl(PyObject *module, int item)
              result = result != NULL ? result : "";
              PyObject *pyresult;
  #ifdef __GLIBC__
+            /* According to the POSIX specification the result must be
+             * a sequence of semicolon-separated strings.
+             * But in Glibc they are NUL-separated. */
  #ifdef ALT_DIGITS
              if (item == ALT_DIGITS && *result) {
-                /* According to the POSIX specification the result must be
-                 * a sequence of up to 100 semicolon-separated strings.
-                 * But in Glibc they are NUL-separated. */
-                Py_ssize_t i = 0;
-                int count = 0;
-                for (; count < 100 && result[i]; count++) {
-                    i += strlen(result + i) + 1;
-                }
-                char *buf = PyMem_Malloc(i);
-                if (buf == NULL) {
-                    PyErr_NoMemory();
-                    pyresult = NULL;
-                }
-                else {
-                    memcpy(buf, result, i);
-                    /* Replace all NULs with semicolons. */
-                    i = 0;
-                    while (--count) {
-                        i += strlen(buf + i);
-                        buf[i++] = ';';
-                    }
-                    pyresult = PyUnicode_DecodeLocale(buf, NULL);
-                    PyMem_Free(buf);
-                }
+                pyresult = decode_strings(result, 100);
+            }
+            else
+#endif
+#ifdef ERA
+            if (item == ERA && *result) {
+                pyresult = decode_strings(result, SIZE_MAX);
              }
              else
  #endif
author	Serhiy Storchaka <storchaka@gmail.com>
	Thu, 21 Nov 2024 11:44:37 +0000 (13:44 +0200)
committer	GitHub <noreply@github.com>
	Thu, 21 Nov 2024 11:44:37 +0000 (11:44 +0000)
Doc/library/locale.rst		patch \| blob \| blame \| history
Lib/test/test__locale.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2024-11-12-13-14-47.gh-issue-126727.5Eqfqd.rst	[new file with mode: 0644]	patch \| blob
Modules/_localemodule.c		patch \| blob \| blame \| history