]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
bpo-34527: POSIX locale enables the UTF-8 Mode (GH-8972)
authorVictor Stinner <vstinner@redhat.com>
Tue, 28 Aug 2018 10:35:44 +0000 (12:35 +0200)
committerGitHub <noreply@github.com>
Tue, 28 Aug 2018 10:35:44 +0000 (12:35 +0200)
* The UTF-8 Mode is now also enabled by the "POSIX" locale, not only
  by the "C" locale.
* On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces
  the ASCII encoding if the LC_CTYPE locale is "POSIX", not only if
  the LC_CTYPE locale is "C".
* test_utf8_mode.test_cmd_line() checks also that the command line
  arguments are decoded from UTF-8 when the the UTF-8 Mode is enabled
  with POSIX locale or C locale.

Lib/test/test_utf8_mode.py
Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst [new file with mode: 0644]
Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst [new file with mode: 0644]
Python/coreconfig.c
Python/fileutils.c

index 3e918fd54ce3ca8ca99f208402830c55661c0399..df988c1fc9e20cc881684aa582e1ea8316963984 100644 (file)
@@ -12,6 +12,7 @@ from test.support.script_helper import assert_python_ok, assert_python_failure
 
 
 MS_WINDOWS = (sys.platform == 'win32')
+POSIX_LOCALES = ('C', 'POSIX')
 
 
 class UTF8ModeTests(unittest.TestCase):
@@ -23,7 +24,7 @@ class UTF8ModeTests(unittest.TestCase):
 
     def posix_locale(self):
         loc = locale.setlocale(locale.LC_CTYPE, None)
-        return (loc == 'C')
+        return (loc in POSIX_LOCALES)
 
     def get_output(self, *args, failure=False, **kw):
         kw = dict(self.DEFAULT_ENV, **kw)
@@ -39,8 +40,10 @@ class UTF8ModeTests(unittest.TestCase):
     def test_posix_locale(self):
         code = 'import sys; print(sys.flags.utf8_mode)'
 
-        out = self.get_output('-c', code, LC_ALL='C')
-        self.assertEqual(out, '1')
+        for loc in POSIX_LOCALES:
+            with self.subTest(LC_ALL=loc):
+                out = self.get_output('-c', code, LC_ALL=loc)
+                self.assertEqual(out, '1')
 
     def test_xoption(self):
         code = 'import sys; print(sys.flags.utf8_mode)'
@@ -201,8 +204,10 @@ class UTF8ModeTests(unittest.TestCase):
         out = self.get_output('-X', 'utf8', '-c', code)
         self.assertEqual(out, 'UTF-8 UTF-8')
 
-        out = self.get_output('-X', 'utf8', '-c', code, LC_ALL='C')
-        self.assertEqual(out, 'UTF-8 UTF-8')
+        for loc in POSIX_LOCALES:
+            with self.subTest(LC_ALL=loc):
+                out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
+                self.assertEqual(out, 'UTF-8 UTF-8')
 
     @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
     def test_cmd_line(self):
@@ -217,13 +222,19 @@ class UTF8ModeTests(unittest.TestCase):
             self.assertEqual(args, ascii(expected), out)
 
         check('utf8', [arg_utf8])
+        for loc in POSIX_LOCALES:
+            with self.subTest(LC_ALL=loc):
+                check('utf8', [arg_utf8], LC_ALL=loc)
+
         if sys.platform == 'darwin' or support.is_android:
             c_arg = arg_utf8
         elif sys.platform.startswith("aix"):
             c_arg = arg.decode('iso-8859-1')
         else:
             c_arg = arg_ascii
-        check('utf8=0', [c_arg], LC_ALL='C')
+        for loc in POSIX_LOCALES:
+            with self.subTest(LC_ALL=loc):
+                check('utf8=0', [c_arg], LC_ALL=loc)
 
     def test_optim_level(self):
         # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-52-13.bpo-34527.sh5MQJ.rst
new file mode 100644 (file)
index 0000000..280a892
--- /dev/null
@@ -0,0 +1,2 @@
+The UTF-8 Mode is now also enabled by the "POSIX" locale, not only by the "C"
+locale.
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-11-53-39.bpo-34527.aBEX9b.rst
new file mode 100644 (file)
index 0000000..9fce794
--- /dev/null
@@ -0,0 +1,3 @@
+On FreeBSD, Py_DecodeLocale() and Py_EncodeLocale() now also forces the
+ASCII encoding if the LC_CTYPE locale is "POSIX", not only if the LC_CTYPE
+locale is "C".
index 1b9e26e50a2b231a55013c107ff6ee48b98bbfb0..acf46451f15ab5b721d36e2013c8b31a81e578d9 100644 (file)
@@ -1,5 +1,6 @@
 #include "Python.h"
 #include "internal/pystate.h"
+#include <locale.h>
 
 
 #define DECODE_LOCALE_ERR(NAME, LEN) \
@@ -828,14 +829,21 @@ static void
 config_init_locale(_PyCoreConfig *config)
 {
     if (_Py_LegacyLocaleDetected()) {
-        /* POSIX locale: enable C locale coercion and UTF-8 Mode */
-        if (config->utf8_mode < 0) {
-            config->utf8_mode = 1;
-        }
+        /* The C locale enables the C locale coercion (PEP 538) */
         if (config->coerce_c_locale < 0) {
             config->coerce_c_locale = 1;
         }
     }
+#ifndef MS_WINDOWS
+    const char *ctype_loc = setlocale(LC_CTYPE, NULL);
+    if (ctype_loc != NULL
+       && (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0)) {
+        /* The C locale and the POSIX locale enable the UTF-8 Mode (PEP 540) */
+        if (config->utf8_mode < 0) {
+            config->utf8_mode = 1;
+        }
+    }
+#endif
 }
 
 
index 35869c81ac9f746f3c242f9d3733eb4b9f825b6e..b413f4e1e68207346e04691f5e6baab6ce2729fc 100644 (file)
@@ -128,7 +128,7 @@ check_force_ascii(void)
     loc = setlocale(LC_CTYPE, NULL);
     if (loc == NULL)
         goto error;
-    if (strcmp(loc, "C") != 0) {
+    if (strcmp(loc, "C") != 0 && strcmp(loc, "POSIX") != 0) {
         /* the LC_CTYPE locale is different than C */
         return 0;
     }