Patch # 1302 by Christian Heimes (with some love from me :-).

author Guido van Rossum <guido@python.org>

Thu, 25 Oct 2007 23:47:33 +0000 (23:47 +0000)

committer Guido van Rossum <guido@python.org>

Thu, 25 Oct 2007 23:47:33 +0000 (23:47 +0000)
author Guido van Rossum <guido@python.org>
Thu, 25 Oct 2007 23:47:33 +0000 (23:47 +0000)
committer Guido van Rossum <guido@python.org>
Thu, 25 Oct 2007 23:47:33 +0000 (23:47 +0000)
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py

index a98af8760a37f052c84c235cfaa94ac6f2bf536c..91728ef241e1bf3bf71dbaec584b89a06054628b 100755 (executable)
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -1119,6 +1119,15 @@ class _ExpectedSkips:
              if not os.path.supports_unicode_filenames:
                  self.expected.add('test_pep277')
  
+            # doctest, profile and cProfile tests fail when the codec for the fs
+            # encoding isn't built in because PyUnicode_Decode() adds two calls
+            # into Python.
+            encs = ("utf-8", "latin-1", "ascii", "mbcs", "utf-16", "utf-32")
+            if sys.getfilesystemencoding().lower() not in encs:
+                self.expected.add('test_profile')
+                self.expected.add('test_cProfile')
+                self.expected.add('test_doctest')
+
              try:
                  from test import test_socket_ssl
              except ImportError:
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index a24cdba41b918d7bff8db593b37e2116c3c35ac1..c77751de657030b5fb759048fafeb0d8523060ad 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -41,6 +41,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  
  #define PY_SSIZE_T_CLEAN
  #include "Python.h"
+#include "bytes_methods.h"
  
  #include "unicodeobject.h"
  #include "ucnhash.h"
@@ -592,9 +593,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
                 if (*f == '%') {
                         const char* p = f;
                         width = 0;
-                       while (isdigit(Py_CHARMASK(*f)))
+                       while (ISDIGIT(*f))
                                 width = (width*10) + *f++ - '0';
-                       while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
+                       while (*++f && *f != '%' && !ISALPHA(*f))
                                 ;
  
                         /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
@@ -755,12 +756,12 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
                         zeropad = (*f == '0');
                         /* parse the width.precision part */
                         width = 0;
-                       while (isdigit(Py_CHARMASK(*f)))
+                       while (ISDIGIT(*f))
                                 width = (width*10) + *f++ - '0';
                         precision = 0;
                         if (*f == '.') {
                                 f++;
-                               while (isdigit(Py_CHARMASK(*f)))
+                               while (ISDIGIT(*f))
                                         precision = (precision*10) + *f++ - '0';
                         }
                         /* handle the long flag, but only for %ld and %lu.
@@ -1056,21 +1057,47 @@ PyObject *PyUnicode_Decode(const char *s,
  {
      PyObject *buffer = NULL, *unicode;
      Py_buffer info;
+    char lower[20];  /* Enough for any encoding name we recognize */
+    char *l;
+    const char *e;
  
      if (encoding == NULL)
-       encoding = PyUnicode_GetDefaultEncoding();
+        encoding = PyUnicode_GetDefaultEncoding();
+
+    /* Convert encoding to lower case and replace '_' with '-' in order to
+       catch e.g. UTF_8 */
+    e = encoding;
+    l = lower;
+    while (*e && l < &lower[(sizeof lower) - 2]) {
+        if (ISUPPER(*e)) {
+            *l++ = TOLOWER(*e++);
+        }
+        else if (*e == '_') {
+            *l++ = '-';
+            e++;
+        }
+        else {
+            *l++ = *e++;
+        }
+    }
+    *l = '\0';
  
      /* Shortcuts for common default encodings */
-    if (strcmp(encoding, "utf-8") == 0)
+    if (strcmp(lower, "utf-8") == 0)
          return PyUnicode_DecodeUTF8(s, size, errors);
-    else if (strcmp(encoding, "latin-1") == 0)
+    else if ((strcmp(lower, "latin-1") == 0) ||
+             (strcmp(lower, "iso-8859-1") == 0))
          return PyUnicode_DecodeLatin1(s, size, errors);
  #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
-    else if (strcmp(encoding, "mbcs") == 0)
+    else if (strcmp(lower, "mbcs") == 0)
          return PyUnicode_DecodeMBCS(s, size, errors);
  #endif
-    else if (strcmp(encoding, "ascii") == 0)
+    else if (strcmp(lower, "ascii") == 0)
          return PyUnicode_DecodeASCII(s, size, errors);
+    else if (strcmp(lower, "utf-16") == 0)
+        return PyUnicode_DecodeUTF16(s, size, errors, 0);
+    else if (strcmp(lower, "utf-32") == 0)
+        return PyUnicode_DecodeUTF32(s, size, errors, 0);
  
      /* Decode via the codec registry */
      buffer = NULL;
@@ -1470,7 +1497,7 @@ char utf7_special[128] = {
  #define B64(n)  \
      ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f])
  #define B64CHAR(c) \
-    (isalnum(c) || (c) == '+' || (c) == '/')
+    (ISALNUM(c) || (c) == '+' || (c) == '/')
  #define UB64(c) \
      ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ?                   \
       (c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4 )
@@ -2703,7 +2730,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
              }
              for (i = 0; i < digits; ++i) {
                  c = (unsigned char) s[i];
-                if (!isxdigit(c)) {
+                if (!ISXDIGIT(c)) {
                      endinpos = (s+i+1)-starts;
                      if (unicode_decode_call_errorhandler(
                          errors, &errorHandler,
@@ -3077,7 +3104,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
         outpos = p-PyUnicode_AS_UNICODE(v);
         for (x = 0, i = 0; i < count; ++i, ++s) {
             c = (unsigned char)*s;
-           if (!isxdigit(c)) {
+           if (!ISXDIGIT(c)) {
                 endinpos = s-starts;
                 if (unicode_decode_call_errorhandler(
                     errors, &errorHandler,
author	Guido van Rossum <guido@python.org>
	Thu, 25 Oct 2007 23:47:33 +0000 (23:47 +0000)
committer	Guido van Rossum <guido@python.org>
	Thu, 25 Oct 2007 23:47:33 +0000 (23:47 +0000)
Lib/test/regrtest.py		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history