]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
[3.7] bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986) (GH-8987)
authorVictor Stinner <vstinner@redhat.com>
Wed, 29 Aug 2018 09:01:33 +0000 (11:01 +0200)
committerGitHub <noreply@github.com>
Wed, 29 Aug 2018 09:01:33 +0000 (11:01 +0200)
* bpo-34485: stdout uses surrogateescape on POSIX locale (GH-8986)

Standard streams like sys.stdout now use the "surrogateescape" error
handler, instead of "strict", on the POSIX locale (when the C locale is not
coerced and the UTF-8 Mode is disabled).

Add tests on sys.stdout.errors with LC_ALL=POSIX.

Fix the error handler of standard streams like sys.stdout:
PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
"strict".

(cherry picked from commit 315877dc361d554bec34b4b62c270479ad36a1be)

Lib/test/test_sys.py
Lib/test/test_utf8_mode.py
Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst [new file with mode: 0644]
Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst [new file with mode: 0644]
Python/pylifecycle.c

index 336ae447a8de5e5aad9c89886cd40a168f3d5200..27f75901c63fda4fbacaf5443e84dedecda60a85 100644 (file)
@@ -654,10 +654,10 @@ class SysModuleTest(unittest.TestCase):
             expected = None
         self.check_fsencoding(fs_encoding, expected)
 
-    def c_locale_get_error_handler(self, isolated=False, encoding=None):
+    def c_locale_get_error_handler(self, locale, isolated=False, encoding=None):
         # Force the POSIX locale
         env = os.environ.copy()
-        env["LC_ALL"] = "C"
+        env["LC_ALL"] = locale
         env["PYTHONCOERCECLOCALE"] = "0"
         code = '\n'.join((
             'import sys',
@@ -683,44 +683,50 @@ class SysModuleTest(unittest.TestCase):
         stdout, stderr = p.communicate()
         return stdout
 
-    def test_c_locale_surrogateescape(self):
-        out = self.c_locale_get_error_handler(isolated=True)
+    def check_locale_surrogateescape(self, locale):
+        out = self.c_locale_get_error_handler(locale, isolated=True)
         self.assertEqual(out,
                          'stdin: surrogateescape\n'
                          'stdout: surrogateescape\n'
                          'stderr: backslashreplace\n')
 
         # replace the default error handler
-        out = self.c_locale_get_error_handler(encoding=':ignore')
+        out = self.c_locale_get_error_handler(locale, encoding=':ignore')
         self.assertEqual(out,
                          'stdin: ignore\n'
                          'stdout: ignore\n'
                          'stderr: backslashreplace\n')
 
         # force the encoding
-        out = self.c_locale_get_error_handler(encoding='iso8859-1')
+        out = self.c_locale_get_error_handler(locale, encoding='iso8859-1')
         self.assertEqual(out,
                          'stdin: strict\n'
                          'stdout: strict\n'
                          'stderr: backslashreplace\n')
-        out = self.c_locale_get_error_handler(encoding='iso8859-1:')
+        out = self.c_locale_get_error_handler(locale, encoding='iso8859-1:')
         self.assertEqual(out,
                          'stdin: strict\n'
                          'stdout: strict\n'
                          'stderr: backslashreplace\n')
 
         # have no any effect
-        out = self.c_locale_get_error_handler(encoding=':')
+        out = self.c_locale_get_error_handler(locale, encoding=':')
         self.assertEqual(out,
-                         'stdin: strict\n'
-                         'stdout: strict\n'
+                         'stdin: surrogateescape\n'
+                         'stdout: surrogateescape\n'
                          'stderr: backslashreplace\n')
-        out = self.c_locale_get_error_handler(encoding='')
+        out = self.c_locale_get_error_handler(locale, encoding='')
         self.assertEqual(out,
                          'stdin: surrogateescape\n'
                          'stdout: surrogateescape\n'
                          'stderr: backslashreplace\n')
 
+    def test_c_locale_surrogateescape(self):
+        self.check_locale_surrogateescape('C')
+
+    def test_posix_locale_surrogateescape(self):
+        self.check_locale_surrogateescape('POSIX')
+
     def test_implementation(self):
         # This test applies to all implementations equally.
 
index 4a16b73046894f8c4e2dc8811cde22cdd3065dee..554abfab3163159ea021c6d181bcef8374bffa56 100644 (file)
@@ -146,9 +146,9 @@ class UTF8ModeTests(unittest.TestCase):
         out = self.get_output('-X', 'utf8', '-c', code,
                               PYTHONIOENCODING=":namereplace")
         self.assertEqual(out.splitlines(),
-                         ['stdin: UTF-8/namereplace',
-                          'stdout: UTF-8/namereplace',
-                          'stderr: UTF-8/backslashreplace'])
+                         ['stdin: utf-8/namereplace',
+                          'stdout: utf-8/namereplace',
+                          'stderr: utf-8/backslashreplace'])
 
     def test_io(self):
         code = textwrap.dedent('''
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-28-23-01-14.bpo-34485.dq1Kqk.rst
new file mode 100644 (file)
index 0000000..5ca373a
--- /dev/null
@@ -0,0 +1,3 @@
+Fix the error handler of standard streams like sys.stdout:
+PYTHONIOENCODING=":" is now ignored instead of setting the error handler to
+"strict".
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst b/Misc/NEWS.d/next/Core and Builtins/2018-08-29-09-27-47.bpo-34485.5aJCmw.rst
new file mode 100644 (file)
index 0000000..893e4f5
--- /dev/null
@@ -0,0 +1,3 @@
+Standard streams like sys.stdout now use the "surrogateescape" error
+handler, instead of "strict", on the POSIX locale (when the C locale is not
+coerced and the UTF-8 Mode is disabled).
index fc4ee06f144f3aa05ec862c84d2de95cd71435b4..539d62a2f0f4584a77e04768d14aaafc59f535d7 100644 (file)
@@ -423,13 +423,13 @@ get_default_standard_stream_error_handler(void)
 {
     const char *ctype_loc = setlocale(LC_CTYPE, NULL);
     if (ctype_loc != NULL) {
-        /* "surrogateescape" is the default in the legacy C locale */
-        if (strcmp(ctype_loc, "C") == 0) {
+        /* surrogateescape is the default in the legacy C and POSIX locales */
+        if (strcmp(ctype_loc, "C") == 0 || strcmp(ctype_loc, "POSIX") == 0) {
             return "surrogateescape";
         }
 
 #ifdef PY_COERCE_C_LOCALE
-        /* "surrogateescape" is the default in locale coercion target locales */
+        /* surrogateescape is the default in locale coercion target locales */
         const _LocaleCoercionTarget *target = NULL;
         for (target = _TARGET_LOCALES; target->locale_name; target++) {
             if (strcmp(ctype_loc, target->locale_name) == 0) {
@@ -440,7 +440,7 @@ get_default_standard_stream_error_handler(void)
    }
 
    /* Otherwise return NULL to request the typical default error handler */
-   return NULL;
+   return "strict";
 }
 
 #ifdef PY_COERCE_C_LOCALE
@@ -1851,20 +1851,42 @@ init_sys_streams(PyInterpreterState *interp)
             if (err) {
                 *err = '\0';
                 err++;
-                if (*err && !errors) {
-                    errors = err;
+                if (!err[0]) {
+                    err = NULL;
                 }
             }
-            if (*pythonioencoding && !encoding) {
-                encoding = pythonioencoding;
+
+            /* Does PYTHONIOENCODING contain an encoding? */
+            if (pythonioencoding[0]) {
+                if (!encoding) {
+                    encoding = pythonioencoding;
+                }
+
+                /* If the encoding is set but not the error handler,
+                   use "strict" error handler by default.
+                   PYTHONIOENCODING=latin1 behaves as
+                   PYTHONIOENCODING=latin1:strict. */
+                if (!err) {
+                    err = "strict";
+                }
+            }
+
+            if (!errors && err != NULL) {
+                errors = err;
             }
         }
-        else if (interp->core_config.utf8_mode) {
-            encoding = "utf-8";
-            errors = "surrogateescape";
+
+        if (interp->core_config.utf8_mode) {
+            if (!encoding) {
+                encoding = "utf-8";
+            }
+            if (!errors) {
+                errors = "surrogateescape";
+            }
         }
 
-        if (!errors && !pythonioencoding) {
+
+        if (!errors) {
             /* Choose the default error handler based on the current locale */
             errors = get_default_standard_stream_error_handler();
         }