]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
New environment variable PYTHONIOENCODING.
authorMartin v. Löwis <martin@v.loewis.de>
Sun, 1 Jun 2008 07:20:46 +0000 (07:20 +0000)
committerMartin v. Löwis <martin@v.loewis.de>
Sun, 1 Jun 2008 07:20:46 +0000 (07:20 +0000)
Doc/c-api/file.rst
Doc/library/stdtypes.rst
Doc/using/cmdline.rst
Include/fileobject.h
Lib/test/test_sys.py
Misc/NEWS
Modules/main.c
Objects/fileobject.c
Python/pythonrun.c
Python/sysmodule.c

index e107268af724a38473d1d93739d66427877a5933..aa753e5699528f204f511737bfd56708a5816132 100644 (file)
@@ -130,6 +130,14 @@ change in future releases of Python.
    .. versionadded:: 2.3
 
 
+.. cfunction:: int PyFile_SetEncodingAndErrors(PyFileObject *p, const char *enc, *errors)
+
+   Set the file's encoding for Unicode output to *enc*, and its error
+   mode to *err*. Return 1 on success and 0 on failure.
+
+   .. versionadded:: 2.6
+
+
 .. cfunction:: int PyFile_SoftSpace(PyObject *p, int newflag)
 
    .. index:: single: softspace (file attribute)
index c6679fd53132f603393bc26031c98d9f7b643d02..c96ef10d7bb2cf666fe7b3e417ff3e44b96bd5c4 100644 (file)
@@ -2165,6 +2165,13 @@ the particular object.
    .. versionadded:: 2.3
 
 
+.. attribute:: file.errors
+
+   The Unicode error handler used to along with the encoding.
+
+   .. versionadded:: 2.6
+
+
 .. attribute:: file.mode
 
    The I/O mode for the file.  If the file was created using the :func:`open`
index a6ab7adad25778e4266742846232db4a03fc354c..7d9ecadb551dcf6581c736c1fe2aa97016e91d31 100644 (file)
@@ -481,6 +481,13 @@ These environment variables influence Python's behavior.
 
    .. versionadded:: 2.6
 
+.. envvar:: PYTHONIOENCODING
+
+   Overrides the encoding used for stdin/stdout/stderr, in the syntax
+   encodingname:errorhandler, with the :errors part being optional.
+
+   .. versionadded:: 2.6
+
 
 .. envvar:: PYTHONNOUSERSITE
 
index 56fae81fe49a5c45306043b3f1d9b1a3d25248b0..56cf40a364725107f80f5745e9f128bba6db73c9 100644 (file)
@@ -24,6 +24,7 @@ typedef struct {
        int f_newlinetypes;     /* Types of newlines seen */
        int f_skipnextlf;       /* Skip next \n */
        PyObject *f_encoding;
+       PyObject *f_errors;
        PyObject *weakreflist; /* List of weak references */
        int unlocked_count;     /* Num. currently running sections of code
                                   using f_fp with the GIL released. */
@@ -37,6 +38,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type;
 PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *);
 PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int);
 PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *);
+PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors);
 PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *,
                                              int (*)(FILE *));
 PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *);
index 975795a8ef7d109bfa38c2c0bc77564d2c48df5d..a4d8a72c61a437d77afa4f59dd6b8be138f8c1b6 100644 (file)
@@ -385,6 +385,26 @@ class SysModuleTest(unittest.TestCase):
 ##        self.assert_(r[0][2] > 100, r[0][2])
 ##        self.assert_(r[1][2] > 100, r[1][2])
 
+    def test_ioencoding(self):
+        import subprocess,os
+        env = dict(os.environ)
+
+        # Test character: cent sign, encoded as 0x4A (ASCII J) in CP424,
+        # not representable in ASCII.
+
+        env["PYTHONIOENCODING"] = "cp424"
+        p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
+                             stdout = subprocess.PIPE, env=env)
+        out = p.stdout.read().strip()
+        self.assertEqual(out, unichr(0xa2).encode("cp424"))
+
+        env["PYTHONIOENCODING"] = "ascii:replace"
+        p = subprocess.Popen([sys.executable, "-c", 'print unichr(0xa2)'],
+                             stdout = subprocess.PIPE, env=env)
+        out = p.stdout.read().strip()
+        self.assertEqual(out, '?')
+
+
 def test_main():
     test.test_support.run_unittest(SysModuleTest)
 
index 21465f225dfcd935584c66bcb0ef016604fa59d2..790e980c74aa050760883e7af2c33d8cddb7ee70 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,8 @@ What's New in Python 2.6 beta 1?
 Core and Builtins
 -----------------
 
+- New environment variable PYTHONIOENCODING.
+
 - Patch #2488: Add sys.maxsize.
 
 - Issue #2353: file.xreadlines() now emits a Py3k warning.
index dc94a092ef28b8bb01092c2a601b91d0ecea1d3b..c1c4b45e9cdf06c7383ca5b25a1c2ef2fca94b86 100644 (file)
@@ -99,6 +99,7 @@ static char *usage_5 = "\
 PYTHONHOME   : alternate <prefix> directory (or <prefix>%c<exec_prefix>).\n\
                The default module search path uses %s.\n\
 PYTHONCASEOK : ignore case in 'import' statements (Windows).\n\
+PYTHONIOENCODING: Encoding[:errors] used for stdin/stdout/stderr.\n\
 ";
 
 
index 86f3a1420d3114ca5f9900a66367b59debc3608f..4ea97f5de9b8abc1e1c0db82bc583b385583c05c 100644 (file)
@@ -155,6 +155,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
        Py_DECREF(f->f_name);
        Py_DECREF(f->f_mode);
        Py_DECREF(f->f_encoding);
+       Py_DECREF(f->f_errors);
 
         Py_INCREF(name);
         f->f_name = name;
@@ -170,6 +171,8 @@ fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
        f->f_skipnextlf = 0;
        Py_INCREF(Py_None);
        f->f_encoding = Py_None;
+       Py_INCREF(Py_None);
+       f->f_errors = Py_None;
 
        if (f->f_mode == NULL)
                return NULL;
@@ -435,19 +438,38 @@ PyFile_SetBufSize(PyObject *f, int bufsize)
 }
 
 /* Set the encoding used to output Unicode strings.
-   Returh 1 on success, 0 on failure. */
+   Return 1 on success, 0 on failure. */
 
 int
 PyFile_SetEncoding(PyObject *f, const char *enc)
+{
+       return PyFile_SetEncodingAndErrors(f, enc, NULL);
+}
+
+int
+PyFile_SetEncodingAndErrors(PyObject *f, const char *enc, char* errors)
 {
        PyFileObject *file = (PyFileObject*)f;
-       PyObject *str = PyBytes_FromString(enc);
+       PyObject *str, *oerrors;
 
        assert(PyFile_Check(f));
+       str = PyBytes_FromString(enc);
        if (!str)
                return 0;
+       if (errors) {
+               oerrors = PyString_FromString(errors);
+               if (!oerrors) {
+                       Py_DECREF(str);
+                       return 0;
+               }
+       } else {
+               oerrors = Py_None;
+               Py_INCREF(Py_None);
+       }
        Py_DECREF(file->f_encoding);
        file->f_encoding = str;
+       Py_DECREF(file->f_errors);
+       file->f_errors = oerrors;
        return 1;
 }
 
@@ -491,6 +513,7 @@ file_dealloc(PyFileObject *f)
        Py_XDECREF(f->f_name);
        Py_XDECREF(f->f_mode);
        Py_XDECREF(f->f_encoding);
+       Py_XDECREF(f->f_errors);
        drop_readahead(f);
        Py_TYPE(f)->tp_free((PyObject *)f);
 }
@@ -1879,6 +1902,8 @@ static PyMemberDef file_memberlist[] = {
         "file name"},
        {"encoding",    T_OBJECT,       OFF(f_encoding),        RO,
         "file encoding"},
+       {"errors",      T_OBJECT,       OFF(f_errors),  RO,
+        "Unicode error handler"},
        /* getattr(f, "closed") is implemented without this table */
        {NULL}  /* Sentinel */
 };
@@ -2093,6 +2118,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
                ((PyFileObject *)self)->f_mode = not_yet_string;
                Py_INCREF(Py_None);
                ((PyFileObject *)self)->f_encoding = Py_None;
+               Py_INCREF(Py_None);
+               ((PyFileObject *)self)->f_errors = Py_None;
                ((PyFileObject *)self)->weakreflist = NULL;
                ((PyFileObject *)self)->unlocked_count = 0;
        }
@@ -2295,7 +2322,9 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
                 if ((flags & Py_PRINT_RAW) &&
                    PyUnicode_Check(v) && enc != Py_None) {
                        char *cenc = PyBytes_AS_STRING(enc);
-                       value = PyUnicode_AsEncodedString(v, cenc, "strict");
+                       char *errors = fobj->f_errors == Py_None ? 
+                         "strict" : PyBytes_AS_STRING(fobj->f_errors);
+                       value = PyUnicode_AsEncodedString(v, cenc, errors);
                        if (value == NULL)
                                return -1;
                } else {
index 6a9cb2532932c62077805fb4b8fee6f6f2c8e581..c30bf652cafae07455f220d8e64441388ae23d52 100644 (file)
@@ -132,10 +132,19 @@ Py_InitializeEx(int install_sigs)
        PyThreadState *tstate;
        PyObject *bimod, *sysmod;
        char *p;
+       char *icodeset; /* On Windows, input codeset may theoretically 
+                          differ from output codeset. */
+       char *codeset = NULL;
+       char *errors = NULL;
+       int free_codeset = 0;
+       int overridden = 0;
 #if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
-       char *codeset;
-       char *saved_locale;
+       char *saved_locale, *loc_codeset;
        PyObject *sys_stream, *sys_isatty;
+#endif
+#ifdef MS_WINDOWS
+       char ibuf[128];
+       char buf[128];
 #endif
        extern void _Py_ReadyTypes(void);
 
@@ -238,38 +247,75 @@ Py_InitializeEx(int install_sigs)
        _PyGILState_Init(interp, tstate);
 #endif /* WITH_THREAD */
 
+       if ((p = Py_GETENV("PYTHONIOENCODING")) && *p != '\0') {
+               p = icodeset = codeset = strdup(p);
+               free_codeset = 1;
+               errors = strchr(p, ':');
+               if (errors) {
+                       *errors = '\0';
+                       errors++;
+               }
+               overridden = 1;
+       }
+
 #if defined(Py_USING_UNICODE) && defined(HAVE_LANGINFO_H) && defined(CODESET)
        /* On Unix, set the file system encoding according to the
           user's preference, if the CODESET names a well-known
           Python codec, and Py_FileSystemDefaultEncoding isn't
           initialized by other means. Also set the encoding of
-          stdin and stdout if these are terminals.  */
-
-       saved_locale = strdup(setlocale(LC_CTYPE, NULL));
-       setlocale(LC_CTYPE, "");
-       codeset = nl_langinfo(CODESET);
-       if (codeset && *codeset) {
-               PyObject *enc = PyCodec_Encoder(codeset);
-               if (enc) {
-                       codeset = strdup(codeset);
-                       Py_DECREF(enc);
-               } else {
-                       codeset = NULL;
-                       PyErr_Clear();
+          stdin and stdout if these are terminals, unless overridden.  */
+
+       if (!overridden || !Py_FileSystemDefaultEncoding) {
+               saved_locale = strdup(setlocale(LC_CTYPE, NULL));
+               setlocale(LC_CTYPE, "");
+               loc_codeset = nl_langinfo(CODESET);
+               if (loc_codeset && *loc_codeset) {
+                       PyObject *enc = PyCodec_Encoder(loc_codeset);
+                       if (enc) {
+                               loc_codeset = strdup(loc_codeset);
+                               Py_DECREF(enc);
+                       } else {
+                               loc_codeset = NULL;
+                               PyErr_Clear();
+                       }
+               } else
+                       loc_codeset = NULL;
+               setlocale(LC_CTYPE, saved_locale);
+               free(saved_locale);
+
+               if (!overridden) {
+                       codeset = icodeset = loc_codeset;
+                       free_codeset = 1;
+               }
+
+               /* Initialize Py_FileSystemDefaultEncoding from
+                  locale even if PYTHONIOENCODING is set. */
+               if (!Py_FileSystemDefaultEncoding) {
+                       Py_FileSystemDefaultEncoding = loc_codeset;
+                       if (!overridden)
+                               free_codeset = 0;
                }
-       } else
-               codeset = NULL;
-       setlocale(LC_CTYPE, saved_locale);
-       free(saved_locale);
+       }
+#endif
+
+#ifdef MS_WINDOWS
+       if (!overridden) {
+               icodeset = ibuf;
+               encoding = buf;
+               sprintf(ibuf, "cp%d", GetConsoleCP());
+               sprintf(buf, "cp%d", GetConsoleOutputCP());
+       }
+#endif
 
        if (codeset) {
                sys_stream = PySys_GetObject("stdin");
                sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
                if (!sys_isatty)
                        PyErr_Clear();
-               if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+               if ((overridden ||
+                    (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
                   PyFile_Check(sys_stream)) {
-                       if (!PyFile_SetEncoding(sys_stream, codeset))
+                       if (!PyFile_SetEncodingAndErrors(sys_stream, icodeset, errors))
                                Py_FatalError("Cannot set codeset of stdin");
                }
                Py_XDECREF(sys_isatty);
@@ -278,9 +324,10 @@ Py_InitializeEx(int install_sigs)
                sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
                if (!sys_isatty)
                        PyErr_Clear();
-               if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+               if ((overridden || 
+                    (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
                   PyFile_Check(sys_stream)) {
-                       if (!PyFile_SetEncoding(sys_stream, codeset))
+                       if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
                                Py_FatalError("Cannot set codeset of stdout");
                }
                Py_XDECREF(sys_isatty);
@@ -289,19 +336,17 @@ Py_InitializeEx(int install_sigs)
                sys_isatty = PyObject_CallMethod(sys_stream, "isatty", "");
                if (!sys_isatty)
                        PyErr_Clear();
-               if(sys_isatty && PyObject_IsTrue(sys_isatty) &&
+               if((overridden || 
+                   (sys_isatty && PyObject_IsTrue(sys_isatty))) &&
                   PyFile_Check(sys_stream)) {
-                       if (!PyFile_SetEncoding(sys_stream, codeset))
+                       if (!PyFile_SetEncodingAndErrors(sys_stream, codeset, errors))
                                Py_FatalError("Cannot set codeset of stderr");
                }
                Py_XDECREF(sys_isatty);
 
-               if (!Py_FileSystemDefaultEncoding)
-                       Py_FileSystemDefaultEncoding = codeset;
-               else
+               if (free_codeset)
                        free(codeset);
        }
-#endif
 }
 
 void
index b24842968e214cd07dc20933f0226b964f94c3b9..e4fcc506d6cb3ce2bad11e0e75bd537b6eab6b00 100644 (file)
@@ -1232,9 +1232,6 @@ _PySys_Init(void)
        PyObject *m, *v, *sysdict;
        PyObject *sysin, *sysout, *syserr;
        char *s;
-#ifdef MS_WINDOWS
-       char buf[128];
-#endif
 
        m = Py_InitModule3("sys", sys_methods, sys_doc);
        if (m == NULL)
@@ -1272,23 +1269,6 @@ _PySys_Init(void)
        syserr = PyFile_FromFile(stderr, "<stderr>", "w", _check_and_flush);
        if (PyErr_Occurred())
                return NULL;
-#ifdef MS_WINDOWS
-       if(isatty(_fileno(stdin)) && PyFile_Check(sysin)) {
-               sprintf(buf, "cp%d", GetConsoleCP());
-               if (!PyFile_SetEncoding(sysin, buf))
-                       return NULL;
-       }
-       if(isatty(_fileno(stdout)) && PyFile_Check(sysout)) {
-               sprintf(buf, "cp%d", GetConsoleOutputCP());
-               if (!PyFile_SetEncoding(sysout, buf))
-                       return NULL;
-       }
-       if(isatty(_fileno(stderr)) && PyFile_Check(syserr)) {
-               sprintf(buf, "cp%d", GetConsoleOutputCP());
-               if (!PyFile_SetEncoding(syserr, buf))
-                       return NULL;
-       }
-#endif
 
        PyDict_SetItemString(sysdict, "stdin", sysin);
        PyDict_SetItemString(sysdict, "stdout", sysout);