use Py_UNICODE_WIDE instead of USE_UCS4_STORAGE and Py_UNICODE_SIZE

author Fredrik Lundh <fredrik@pythonware.com>

Wed, 27 Jun 2001 18:59:43 +0000 (18:59 +0000)

committer Fredrik Lundh <fredrik@pythonware.com>

Wed, 27 Jun 2001 18:59:43 +0000 (18:59 +0000)
author Fredrik Lundh <fredrik@pythonware.com>
Wed, 27 Jun 2001 18:59:43 +0000 (18:59 +0000)
committer Fredrik Lundh <fredrik@pythonware.com>
Wed, 27 Jun 2001 18:59:43 +0000 (18:59 +0000)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index d89537fc91edbba59156c4939f2d2b30cdd19803..205b8bbdabab2bdae5a98ca709cc22d635c79708 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -66,10 +66,11 @@ Copyright (c) Corporation for National Research Initiatives.
  #error Must define Py_UNICODE_SIZE
  #endif
  
-/* experimental UCS-4 support.  enable at your own risk! */
-#undef USE_UCS4_STORAGE
-#if Py_UNICODE_SIZE == 4
-#define USE_UCS4_STORAGE
+/* Setting Py_UNICODE_WIDE enables UCS-4 storage.  Otherwise, Unicode
+   strings are stored as UCS-2 (with limited support for UTF-16) */
+
+#if Py_UNICODE_SIZE >= 4
+#define Py_UNICODE_WIDE
  #endif
  
  /* Set these flags if the platform has "wchar.h", "wctype.h" and the
@@ -81,12 +82,12 @@ Copyright (c) Corporation for National Research Initiatives.
  #ifndef PY_UNICODE_TYPE
  
  /* Windows has a usable wchar_t type (unless we're using UCS-4) */
-# if defined(MS_WIN32) && !defined(USE_UCS4_STORAGE)
+# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
  #  define HAVE_USABLE_WCHAR_T
  #  define PY_UNICODE_TYPE wchar_t
  # endif
  
-# if defined(USE_UCS4_STORAGE)
+# if defined(Py_UNICODE_WIDE)
  #  define PY_UNICODE_TYPE Py_UCS4
  # endif
  
diff --git a/Modules/sre.h b/Modules/sre.h

index 61a020896ab0b19119412c165953654abfbf1dca..632f47efcaef6f1c3b6c8104efa928596c0261ab 100644 (file)
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -13,8 +13,9 @@
  
  #include "sre_constants.h"
  
-/* size of a code word (must be unsigned short or larger) */
-#ifdef USE_UCS4_STORAGE
+/* size of a code word (must be unsigned short or larger, and
+   large enough to hold a Py_UNICODE character) */
+#ifdef Py_UNICODE_WIDE
  #define SRE_CODE unsigned long
  #else
  #define SRE_CODE unsigned short
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c

index 13fc6128c146bb32517f8b754c294dad258e1dc0..c1b5a0d4122e6c32ab95703f8dbd178303f69eb7 100644 (file)
--- a/Objects/unicodectype.c
+++ b/Objects/unicodectype.c
@@ -68,7 +68,7 @@ Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
      else
         ch += ctype->upper;
  
-#ifdef USE_UCS4_STORAGE
+#ifdef Py_UNICODE_WIDE
      /* The database assumes that the values wrap around at 0x10000. */
      if (ch > 0x10000)
         ch -= 0x10000;
@@ -360,7 +360,7 @@ Py_UNICODE _PyUnicode_ToUppercase(register Py_UNICODE ch)
      const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
  
      ch += ctype->upper;
-#ifdef USE_UCS4_STORAGE
+#ifdef Py_UNICODE_WIDE
      /* The database assumes that the values wrap around at 0x10000. */
      if (ch > 0x10000)
         ch -= 0x10000;
@@ -376,7 +376,7 @@ Py_UNICODE _PyUnicode_ToLowercase(register Py_UNICODE ch)
      const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
  
      ch += ctype->lower;
-#ifdef USE_UCS4_STORAGE
+#ifdef Py_UNICODE_WIDE
      /* The database assumes that the values wrap around at 0x10000. */
      if (ch > 0x10000)
         ch -= 0x10000;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 2f66c3cf93ea6d1fcb50e0870d5bed6b89bfb6f8..08e80894d841a6e4d13b1d25979423d5fe58c1a8 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -106,7 +106,7 @@ static char unicode_default_encoding[100];
  Py_UNICODE
  PyUnicode_GetMax()
  {
-#ifdef USE_UCS4_STORAGE
+#ifdef Py_UNICODE_WIDE
         return 0x10FFFF;
  #else
         /* This is actually an illegal character, so it should
@@ -791,7 +791,7 @@ PyObject *PyUnicode_DecodeUTF8(const char *s,
                  errmsg = "illegal encoding";
                 goto utf8Error;
             }
-#if Py_UNICODE_SIZE == 4
+#ifdef Py_UNICODE_WIDE
             *p++ = (Py_UNICODE)ch;
  #else
              /*  compute and append the two surrogates: */
@@ -1080,7 +1080,7 @@ PyObject *PyUnicode_DecodeUTF16(const char *s,
                     ch2 = (ch2 >> 8) | (ch2 << 8);
  #endif
             if (0xDC00 <= ch2 && ch2 <= 0xDFFF) {
-#if Py_UNICODE_SIZE == 2
+#ifndef Py_UNICODE_WIDE
                 /* This is valid data (a UTF-16 surrogate pair), but
                    we are not able to store this information since our
                    Py_UNICODE type only has 16 bits... this might
@@ -1326,7 +1326,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
                  *p++ = (Py_UNICODE) chr;
              else if (chr <= 0x10ffff) {
                  /* UCS-4 character. Either store directly, or as surrogate pair. */
-#if Py_UNICODE_SIZE == 4
+#ifdef Py_UNICODE_WIDE
                  *p++ = chr;
  #else
                  chr -= 0x10000L;
diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c

index 8917f450efcc8db2b0879df614397ec9937a4915..11d6f4c41cb7234ba5dce0250a6b0ae38b8f74c1 100644 (file)
--- a/Python/bltinmodule.c
+++ b/Python/bltinmodule.c
@@ -325,7 +325,7 @@ builtin_unichr(PyObject *self, PyObject *args)
                 return PyUnicode_FromUnicode(s, 1);
         }
         else {
-#if Py_UNICODE_SIZE == 2
+#ifndef Py_UNICODE_WIDE
                 /* UCS-4 character.  store as two surrogate characters */
                 x -= 0x10000L;
                 s[0] = 0xD800 + (Py_UNICODE) (x >> 10);
author	Fredrik Lundh <fredrik@pythonware.com>
	Wed, 27 Jun 2001 18:59:43 +0000 (18:59 +0000)
committer	Fredrik Lundh <fredrik@pythonware.com>
	Wed, 27 Jun 2001 18:59:43 +0000 (18:59 +0000)
Include/unicodeobject.h		patch \| blob \| blame \| history
Modules/sre.h		patch \| blob \| blame \| history
Objects/unicodectype.c		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history
Python/bltinmodule.c		patch \| blob \| blame \| history