gh-137729: Fix support for locales with @-modifiers (GH-137253)

author Serhiy Storchaka <storchaka@gmail.com>

Mon, 18 Aug 2025 07:11:15 +0000 (10:11 +0300)

committer GitHub <noreply@github.com>

Mon, 18 Aug 2025 07:11:15 +0000 (10:11 +0300)
author Serhiy Storchaka <storchaka@gmail.com>
Mon, 18 Aug 2025 07:11:15 +0000 (10:11 +0300)
committer GitHub <noreply@github.com>
Mon, 18 Aug 2025 07:11:15 +0000 (10:11 +0300)
diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst

index d48ea04077f366fdb6321024f7762579ad40b5ba..0800b3e5677c93188a8dbaab156df16298547f24 100644 (file)
--- a/Doc/library/locale.rst
+++ b/Doc/library/locale.rst
@@ -42,7 +42,7 @@ The :mod:`locale` module defines the following exception and functions:
     If *locale* is a pair, it is converted to a locale name using
     the locale aliasing engine.
     The language code has the same format as a :ref:`locale name <locale_name>`,
-   but without encoding and ``@``-modifier.
+   but without encoding.
     The language code and encoding can be ``None``.
  
     If *locale* is omitted or ``None``, the current setting for *category* is
@@ -58,6 +58,9 @@ The :mod:`locale` module defines the following exception and functions:
     specified in the :envvar:`LANG` environment variable).  If the locale is not
     changed thereafter, using multithreading should not cause problems.
  
+   .. versionchanged:: next
+      Support language codes with ``@``-modifiers.
+
  
  .. function:: localeconv()
  
@@ -366,11 +369,15 @@ The :mod:`locale` module defines the following exception and functions:
     values except :const:`LC_ALL`.  It defaults to :const:`LC_CTYPE`.
  
     The language code has the same format as a :ref:`locale name <locale_name>`,
-   but without encoding and ``@``-modifier.
+   but without encoding.
     The language code and encoding may be ``None`` if their values cannot be
     determined.
     The "C" locale is represented as ``(None, None)``.
  
+   .. versionchanged:: next
+      ``@``-modifier are no longer silently removed, but included in
+      the language code.
+
  
  .. function:: getpreferredencoding(do_setlocale=True)
  
diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst

index 252d8966b7450fc384146ed28b635b122f81db9f..407606da961c16a4db717f1293b82135c0986387 100644 (file)
--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@@ -274,6 +274,15 @@ http.cookies
    (Contributed by Nick Burns and Senthil Kumaran in :gh:`92936`.)
  
  
+locale
+------
+
+* :func:`~locale.setlocale` now supports language codes with ``@``-modifiers.
+  ``@``-modifier are no longer silently removed in :func:`~locale.getlocale`,
+  but included in the language code.
+  (Contributed by Serhiy Storchaka in :gh:`137729`.)
+
+
  math
  ----
  
diff --git a/Lib/locale.py b/Lib/locale.py

index 0bde7ed51c66c18783f5a8d42978bf4049cfdf02..37cafb4a601b3c2c153d9763c4ec95a01709c328 100644 (file)
--- a/Lib/locale.py
+++ b/Lib/locale.py
@@ -375,12 +375,14 @@ def _replace_encoding(code, encoding):
  def _append_modifier(code, modifier):
      if modifier == 'euro':
          if '.' not in code:
-            return code + '.ISO8859-15'
+            # Linux appears to require keeping the "@euro" modifier in place,
+            # even when using the ".ISO8859-15" encoding.
+            return code + '.ISO8859-15@euro'
          _, _, encoding = code.partition('.')
-        if encoding in ('ISO8859-15', 'UTF-8'):
+        if encoding == 'UTF-8':
              return code
          if encoding == 'ISO8859-1':
-            return _replace_encoding(code, 'ISO8859-15')
+            code = _replace_encoding(code, 'ISO8859-15')
      return code + '@' + modifier
  
  def normalize(localename):
@@ -485,13 +487,18 @@ def _parse_localename(localename):
          # Deal with locale modifiers
          code, modifier = code.split('@', 1)
          if modifier == 'euro' and '.' not in code:
-            # Assume Latin-9 for @euro locales. This is bogus,
-            # since some systems may use other encodings for these
-            # locales. Also, we ignore other modifiers.
-            return code, 'iso-8859-15'
+            # Assume ISO8859-15 for @euro locales. Do note that some systems
+            # may use other encodings for these locales, so this may not always
+            # be correct.
+            return code + '@euro', 'ISO8859-15'
+    else:
+        modifier = ''
  
      if '.' in code:
-        return tuple(code.split('.')[:2])
+        code, encoding = code.split('.')[:2]
+        if modifier:
+            code += '@' + modifier
+        return code, encoding
      elif code == 'C':
          return None, None
      elif code == 'UTF-8':
@@ -516,7 +523,14 @@ def _build_localename(localetuple):
          if encoding is None:
              return language
          else:
-            return language + '.' + encoding
+            if '@' in language:
+                language, modifier = language.split('@', 1)
+            else:
+                modifier = ''
+            localename = language + '.' + encoding
+            if modifier:
+                localename += '@' + modifier
+            return localename
      except (TypeError, ValueError):
          raise TypeError('Locale must be None, a string, or an iterable of '
                          'two strings -- language code, encoding.') from None
@@ -888,6 +902,12 @@ del k, v
  # SS 2025-06-10:
  # Remove 'c.utf8' -> 'en_US.UTF-8' because 'en_US.UTF-8' does not exist
  # on all platforms.
+#
+# SS 2025-07-30:
+# Remove conflicts with GNU libc.
+#
+#    removed 'el_gr@euro'
+#    removed 'uz_uz@cyrillic'
  
  locale_alias = {
      'a3':                                   'az_AZ.KOI8-C',
@@ -1021,7 +1041,6 @@ locale_alias = {
      'el':                                   'el_GR.ISO8859-7',
      'el_cy':                                'el_CY.ISO8859-7',
      'el_gr':                                'el_GR.ISO8859-7',
-    'el_gr@euro':                           'el_GR.ISO8859-15',
      'en':                                   'en_US.ISO8859-1',
      'en_ag':                                'en_AG.UTF-8',
      'en_au':                                'en_AU.ISO8859-1',
@@ -1456,7 +1475,6 @@ locale_alias = {
      'ur_pk':                                'ur_PK.CP1256',
      'uz':                                   'uz_UZ.UTF-8',
      'uz_uz':                                'uz_UZ.UTF-8',
-    'uz_uz@cyrillic':                       'uz_UZ.UTF-8',
      've':                                   've_ZA.UTF-8',
      've_za':                                've_ZA.UTF-8',
      'vi':                                   'vi_VN.TCVN',
diff --git a/Lib/test/test_locale.py b/Lib/test/test_locale.py

index 698e137e3e8abd6234f3ae048091f50e083e3455..01b1e754d0421918b96166db454a7654b507ad22 100644 (file)
--- a/Lib/test/test_locale.py
+++ b/Lib/test/test_locale.py
@@ -1,4 +1,5 @@
  from decimal import Decimal
+from test import support
  from test.support import cpython_only, verbose, is_android, linked_to_musl, os_helper
  from test.support.warnings_helper import check_warnings
  from test.support.import_helper import ensure_lazy_imports, import_fresh_module
@@ -425,8 +426,8 @@ class NormalizeTest(unittest.TestCase):
          self.check('cs_CZ.ISO8859-2', 'cs_CZ.ISO8859-2')
  
      def test_euro_modifier(self):
-        self.check('de_DE@euro', 'de_DE.ISO8859-15')
-        self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15')
+        self.check('de_DE@euro', 'de_DE.ISO8859-15@euro')
+        self.check('en_US.ISO8859-15@euro', 'en_US.ISO8859-15@euro')
          self.check('de_DE.utf8@euro', 'de_DE.UTF-8')
  
      def test_latin_modifier(self):
@@ -534,6 +535,105 @@ class TestRealLocales(unittest.TestCase):
          with self.assertRaises(locale.Error):
              locale.setlocale(locale.LC_ALL, loc2)
  
+    @support.subTests('localename,localetuple', [
+        ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso885915')),
+        ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'iso88591')),
+        ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
+        ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-1')),
+        ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', None)),
+        ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso885915')),
+        ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'iso88591')),
+        ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
+        ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-1')),
+        ('de_DE.ISO8859-15@euro', ('de_DE@euro', None)),
+        ('el_GR.ISO8859-7@euro', ('el_GR@euro', 'iso88597')),
+        ('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
+        ('el_GR.ISO8859-7@euro', ('el_GR@euro', None)),
+        ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso885915')),
+        ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'iso88591')),
+        ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
+        ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-1')),
+        ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', None)),
+        ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'utf8')),
+        ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
+        ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', None)),
+        ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'utf8')),
+        ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
+        ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', None)),
+        ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'utf8')),
+        ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
+        ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', None)),
+        ('be_BY.UTF-8@latin', ('be_BY@latin', 'utf8')),
+        ('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
+        ('be_BY.UTF-8@latin', ('be_BY@latin', None)),
+        ('sr_RS.UTF-8@latin', ('sr_RS@latin', 'utf8')),
+        ('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
+        ('sr_RS.UTF-8@latin', ('sr_RS@latin', None)),
+        ('ug_CN.UTF-8@latin', ('ug_CN@latin', 'utf8')),
+        ('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
+        ('ug_CN.UTF-8@latin', ('ug_CN@latin', None)),
+        ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'utf8')),
+        ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
+        ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', None)),
+    ])
+    def test_setlocale_with_modifier(self, localename, localetuple):
+        try:
+            locale.setlocale(locale.LC_CTYPE, localename)
+        except locale.Error as exc:
+            self.skipTest(str(exc))
+        loc = locale.setlocale(locale.LC_CTYPE, localetuple)
+        self.assertEqual(loc, localename)
+
+        loctuple = locale.getlocale(locale.LC_CTYPE)
+        loc = locale.setlocale(locale.LC_CTYPE, loctuple)
+        self.assertEqual(loc, localename)
+
+    @support.subTests('localename,localetuple', [
+        ('fr_FR.iso885915@euro', ('fr_FR@euro', 'ISO8859-15')),
+        ('fr_FR.ISO8859-15@euro', ('fr_FR@euro', 'ISO8859-15')),
+        ('fr_FR@euro', ('fr_FR@euro', 'ISO8859-15')),
+        ('de_DE.iso885915@euro', ('de_DE@euro', 'ISO8859-15')),
+        ('de_DE.ISO8859-15@euro', ('de_DE@euro', 'ISO8859-15')),
+        ('de_DE@euro', ('de_DE@euro', 'ISO8859-15')),
+        ('el_GR.iso88597@euro', ('el_GR@euro', 'ISO8859-7')),
+        ('el_GR.ISO8859-7@euro', ('el_GR@euro', 'ISO8859-7')),
+        ('el_GR@euro', ('el_GR@euro', 'ISO8859-7')),
+        ('ca_ES.iso885915@euro', ('ca_ES@euro', 'ISO8859-15')),
+        ('ca_ES.ISO8859-15@euro', ('ca_ES@euro', 'ISO8859-15')),
+        ('ca_ES@euro', ('ca_ES@euro', 'ISO8859-15')),
+        ('ca_ES.utf8@valencia', ('ca_ES@valencia', 'UTF-8')),
+        ('ca_ES.UTF-8@valencia', ('ca_ES@valencia', 'UTF-8')),
+        ('ca_ES@valencia', ('ca_ES@valencia', 'UTF-8')),
+        ('ks_IN.utf8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
+        ('ks_IN.UTF-8@devanagari', ('ks_IN@devanagari', 'UTF-8')),
+        ('ks_IN@devanagari', ('ks_IN@devanagari', 'UTF-8')),
+        ('sd_IN.utf8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
+        ('sd_IN.UTF-8@devanagari', ('sd_IN@devanagari', 'UTF-8')),
+        ('sd_IN@devanagari', ('sd_IN@devanagari', 'UTF-8')),
+        ('be_BY.utf8@latin', ('be_BY@latin', 'UTF-8')),
+        ('be_BY.UTF-8@latin', ('be_BY@latin', 'UTF-8')),
+        ('be_BY@latin', ('be_BY@latin', 'UTF-8')),
+        ('sr_RS.utf8@latin', ('sr_RS@latin', 'UTF-8')),
+        ('sr_RS.UTF-8@latin', ('sr_RS@latin', 'UTF-8')),
+        ('sr_RS@latin', ('sr_RS@latin', 'UTF-8')),
+        ('ug_CN.utf8@latin', ('ug_CN@latin', 'UTF-8')),
+        ('ug_CN.UTF-8@latin', ('ug_CN@latin', 'UTF-8')),
+        ('ug_CN@latin', ('ug_CN@latin', 'UTF-8')),
+        ('uz_UZ.utf8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
+        ('uz_UZ.UTF-8@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
+        ('uz_UZ@cyrillic', ('uz_UZ@cyrillic', 'UTF-8')),
+    ])
+    def test_getlocale_with_modifier(self, localename, localetuple):
+        try:
+            locale.setlocale(locale.LC_CTYPE, localename)
+        except locale.Error as exc:
+            self.skipTest(str(exc))
+        loctuple = locale.getlocale(locale.LC_CTYPE)
+        self.assertEqual(loctuple, localetuple)
+
+        locale.setlocale(locale.LC_CTYPE, loctuple)
+        self.assertEqual(locale.getlocale(locale.LC_CTYPE), localetuple)
+
  
  class TestMiscellaneous(unittest.TestCase):
      def test_defaults_UTF8(self):
diff --git a/Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst b/Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst

new file mode 100644 (file)

index 0000000..b324a42
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst
@@ -0,0 +1,3 @@
+:func:`locale.setlocale` now supports language codes with ``@``-modifiers.
+``@``-modifier are no longer silently removed in :func:`locale.getlocale`,
+but included in the language code.
diff --git a/Tools/i18n/makelocalealias.py b/Tools/i18n/makelocalealias.py

index 02af1caff7d499813f8846c50422617d5e783ea1..7f001abc09745dc8d2c6d975d8a4de2af16d2b4e 100755 (executable)
--- a/Tools/i18n/makelocalealias.py
+++ b/Tools/i18n/makelocalealias.py
@@ -44,6 +44,13 @@ def parse(filename):
          # Ignore one letter locale mappings (except for 'c')
          if len(locale) == 1 and locale != 'c':
              continue
+        if '@' in locale and '@' not in alias:
+            # Do not simply remove the "@euro" modifier.
+            # Glibc generates separate locales with the "@euro" modifier, and
+            # not always generates a locale without it with the same encoding.
+            # It can also affect collation.
+            if locale.endswith('@euro') and not locale.endswith('.utf-8@euro'):
+                alias += '@euro'
          # Normalize encoding, if given
          if '.' in locale:
              lang, encoding = locale.split('.')[:2]
@@ -51,6 +58,10 @@ def parse(filename):
              encoding = encoding.replace('_', '')
              locale = lang + '.' + encoding
          data[locale] = alias
+    # Conflict with glibc.
+    data.pop('el_gr@euro', None)
+    data.pop('uz_uz@cyrillic', None)
+    data.pop('uz_uz.utf8@cyrillic', None)
      return data
  
  def parse_glibc_supported(filename):
@@ -81,7 +92,7 @@ def parse_glibc_supported(filename):
          # Add an encoding to alias
          alias, _, modifier = alias.partition('@')
          alias = _locale._replace_encoding(alias, alias_encoding)
-        if modifier and not (modifier == 'euro' and alias_encoding == 'ISO-8859-15'):
+        if modifier:
              alias += '@' + modifier
          data[locale] = alias
      return data
author	Serhiy Storchaka <storchaka@gmail.com>
	Mon, 18 Aug 2025 07:11:15 +0000 (10:11 +0300)
committer	GitHub <noreply@github.com>
	Mon, 18 Aug 2025 07:11:15 +0000 (10:11 +0300)
Doc/library/locale.rst		patch \| blob \| blame \| history
Doc/whatsnew/3.15.rst		patch \| blob \| blame \| history
Lib/locale.py		patch \| blob \| blame \| history
Lib/test/test_locale.py		patch \| blob \| blame \| history
Misc/NEWS.d/next/Library/2025-08-14-00-00-12.gh-issue-137729.i9NSKP.rst	[new file with mode: 0644]	patch \| blob
Tools/i18n/makelocalealias.py		patch \| blob \| blame \| history