On 17-Mar-2000, Marc-Andre Lemburg said:

author Barry Warsaw <barry@python.org>

Mon, 20 Mar 2000 16:36:48 +0000 (16:36 +0000)

committer Barry Warsaw <barry@python.org>

Mon, 20 Mar 2000 16:36:48 +0000 (16:36 +0000)
author Barry Warsaw <barry@python.org>
Mon, 20 Mar 2000 16:36:48 +0000 (16:36 +0000)
committer Barry Warsaw <barry@python.org>
Mon, 20 Mar 2000 16:36:48 +0000 (16:36 +0000)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index 37f2b0d3f479a86281e4f32ce88d8d65c9164bc8..770ecab52c41a882e39b286e22d9b2f1a45e9f4c 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -1,8 +1,5 @@
  #ifndef Py_UNICODEOBJECT_H
  #define Py_UNICODEOBJECT_H
-#ifdef __cplusplus
-extern "C" {
-#endif
  
  /*
  
@@ -109,8 +106,9 @@ typedef unsigned short Py_UNICODE;
  /* --- Internal Unicode Operations ---------------------------------------- */
  
  /* If you want Python to use the compiler's wctype.h functions instead
-   of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS.
-   This reduces the interpreter's code size. */
+   of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
+   configure Python using --with-ctype-functions.  This reduces the
+   interpreter's code size. */
  
  #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
  
@@ -169,6 +167,10 @@ typedef unsigned short Py_UNICODE;
      (!memcmp((string)->str + (offset), (substring)->str,\
               (substring)->length*sizeof(Py_UNICODE)))
  
+#ifdef __cplusplus
+extern "C" {
+#endif
+
  /* --- Unicode Type ------------------------------------------------------- */
  
  typedef struct {
@@ -647,7 +649,7 @@ extern DL_IMPORT(int) PyUnicode_Find(
      int direction              /* Find direction: +1 forward, -1 backward */
      );
  
-/* Count the number of occurances of substr in str[start:end]. */
+/* Count the number of occurrences of substr in str[start:end]. */
  
  extern DL_IMPORT(int) PyUnicode_Count(
      PyObject *str,             /* String */ 
@@ -656,7 +658,7 @@ extern DL_IMPORT(int) PyUnicode_Count(
      int end                    /* Stop index */
      );
  
-/* Replace at most maxcount occurances of substr in str with replstr
+/* Replace at most maxcount occurrences of substr in str with replstr
     and return the resulting Unicode object. */
  
  extern DL_IMPORT(PyObject *) PyUnicode_Replace(
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py

index c33b822a5b20dbbc2991ce50ab5e268bf6b42257..cd5876e7df683a529b749108d6fee97ddc6ef9dc 100644 (file)
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -30,13 +30,13 @@ Written by Marc-Andre Lemburg (mal@lemburg.com).
  import string,codecs,aliases
  
  _cache = {}
-_unkown = '--unkown--'
+_unknown = '--unknown--'
  
  def search_function(encoding):
      
      # Cache lookup
-    entry = _cache.get(encoding,_unkown)
-    if entry is not _unkown:
+    entry = _cache.get(encoding,_unknown)
+    if entry is not _unknown:
          return entry
  
      # Import the module
diff --git a/Lib/test/test_string.py b/Lib/test/test_string.py

index bb6d035f9b6c78d9b77741213dc1b20211b3c0fe..4a3e474cd87367907581f6f4ccf5c28ee8a8bffb 100644 (file)
--- a/Lib/test/test_string.py
+++ b/Lib/test/test_string.py
@@ -143,6 +143,7 @@ test('translate', 'abc', 'Abc', table)
  test('translate', 'xyz', 'xyz', table)
  
  test('replace', 'one!two!three!', 'one@two!three!', '!', '@', 1)
+test('replace', 'one!two!three!', 'onetwothree', '!', '')
  test('replace', 'one!two!three!', 'one@two@three!', '!', '@', 2)
  test('replace', 'one!two!three!', 'one@two@three@', '!', '@', 3)
  test('replace', 'one!two!three!', 'one@two@three@', '!', '@', 4)
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py

index 64b8ff8bd369ea8b85899f55e86ec65881fa15c3..69d4273ace8fa2fa60cdb64a14cebdf507442d2a 100644 (file)
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@@ -108,6 +108,7 @@ if 0:
      test('translate', u'xyz', u'xyz', table)
  
  test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1)
+test('replace', u'one!two!three!', u'onetwothree', '!', '')
  test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2)
  test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3)
  test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4)
diff --git a/Misc/unicode.txt b/Misc/unicode.txt

index b31beefcfcd52d069533611d2cfdc3f1b8087e30..9a4832afce8ddba28421bc9686f700d0fff702e7 100644 (file)
--- a/Misc/unicode.txt
+++ b/Misc/unicode.txt
@@ -743,8 +743,9 @@ For explicit handling of files using Unicode, the standard
  stream codecs as available through the codecs module should 
  be used.
  
-XXX There should be a short-cut open(filename,mode,encoding) available which
-    also assures that mode contains the 'b' character when needed.
+The codecs module should provide a short-cut open(filename,mode,encoding)
+available which also assures that mode contains the 'b' character when
+needed.
  
  
  File/Stream Input:
@@ -810,6 +811,10 @@ Unicode-Mappings:
  Introduction to Unicode (a little outdated by still nice to read):
          http://www.nada.kth.se/i18n/ucs/unicode-iso10646-oview.html
  
+For comparison:
+       Introducing Unicode to ECMAScript --
+       http://www-4.ibm.com/software/developer/library/internationalization-support.html
+
  Encodings:
  
      Overview:
@@ -832,7 +837,7 @@ Encodings:
  
  History of this Proposal:
  -------------------------
-1.2: 
+1.2: Removed POD about codecs.open()
  1.1: Added note about comparisons and hash values. Added note about
       case mapping algorithms. Changed stream codecs .read() and
       .write() method to match the standard file-like object methods
diff --git a/Modules/stropmodule.c b/Modules/stropmodule.c

index a0d8b9a1dc3b51d6862e42f50dbe6c026e49c8c6..4c9ee765389f7fb3157ba3eee119803344f8b104 100644 (file)
--- a/Modules/stropmodule.c
+++ b/Modules/stropmodule.c
@@ -1054,7 +1054,7 @@ strop_translate(self, args)
  
    strstr replacement for arbitrary blocks of memory.
  
-  Locates the first occurance in the memory pointed to by MEM of the
+  Locates the first occurrence in the memory pointed to by MEM of the
    contents of memory pointed to by PAT.  Returns the index into MEM if
    found, or -1 if not found.  If len of PAT is greater than length of
    MEM, the function returns -1.
diff --git a/Objects/stringobject.c b/Objects/stringobject.c

index 2d404b92bb97f8823a19ec70e246ddf3e4a47b08..10257f7562dada2fd60acb9a5fd53a78888704ce 100644 (file)
--- a/Objects/stringobject.c
+++ b/Objects/stringobject.c
@@ -1395,7 +1395,7 @@ string_translate(self, args)
  
    strstr replacement for arbitrary blocks of memory.
  
-  Locates the first occurance in the memory pointed to by MEM of the
+  Locates the first occurrence in the memory pointed to by MEM of the
    contents of memory pointed to by PAT.  Returns the index into MEM if
    found, or -1 if not found.  If len of PAT is greater than length of
    MEM, the function returns -1.
@@ -1578,7 +1578,7 @@ string_replace(self, args)
                 return NULL;
  
         if (sub_len <= 0) {
-               PyErr_SetString(PyExc_ValueError, "empty replacement string");
+               PyErr_SetString(PyExc_ValueError, "empty pattern string");
                 return NULL;
         }
         new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index da12da264481ab30cd62d7ae9ac4ffacfbd7e2a0..d63165ea05b64c1e483dfb6c6934ef9c330f3a97 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -83,7 +83,7 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
     all objects on the free list having a size less than this
     limit. This reduces malloc() overhead for small Unicode objects.  
  
-   At worse this will result in MAX_UNICODE_FREELIST_SIZE *
+   At worst this will result in MAX_UNICODE_FREELIST_SIZE *
     (sizeof(PyUnicodeObject) + STAYALIVE_SIZE_LIMIT +
     malloc()-overhead) bytes of unused garbage.
  
@@ -180,7 +180,7 @@ PyUnicodeObject *_PyUnicode_New(int length)
          unicode_freelist = *(PyUnicodeObject **)unicode_freelist;
          unicode_freelist_size--;
          unicode->ob_type = &PyUnicode_Type;
-        _Py_NewReference(unicode);
+        _Py_NewReference((PyObject *)unicode);
         if (unicode->str) {
             if (unicode->length < length &&
                 _PyUnicode_Resize(unicode, length)) {
@@ -199,16 +199,19 @@ PyUnicodeObject *_PyUnicode_New(int length)
         unicode->str = PyMem_NEW(Py_UNICODE, length + 1);
      }
  
-    if (!unicode->str) {
-        PyMem_DEL(unicode);
-        PyErr_NoMemory();
-        return NULL;
-    }
+    if (!unicode->str) 
+       goto onError;
      unicode->str[length] = 0;
      unicode->length = length;
      unicode->hash = -1;
      unicode->utf8str = NULL;
      return unicode;
+
+ onError:
+    _Py_ForgetReference((PyObject *)unicode);
+    PyMem_DEL(unicode);
+    PyErr_NoMemory();
+    return NULL;
  }
  
  static
@@ -224,7 +227,6 @@ void _PyUnicode_Free(register PyUnicodeObject *unicode)
          *(PyUnicodeObject **)unicode = unicode_freelist;
          unicode_freelist = unicode;
          unicode_freelist_size++;
-        _Py_ForgetReference(unicode);
      }
      else {
         free(unicode->str);
@@ -489,7 +491,7 @@ int utf8_decoding_error(const char **source,
      }
      else {
          PyErr_Format(PyExc_ValueError,
-                     "UTF-8 decoding error; unkown error handling code: %s",
+                     "UTF-8 decoding error; unknown error handling code: %s",
                       errors);
          return -1;
      }
@@ -611,7 +613,7 @@ int utf8_encoding_error(const Py_UNICODE **source,
      else {
         PyErr_Format(PyExc_ValueError,
                      "UTF-8 encoding error; "
-                    "unkown error handling code: %s",
+                    "unknown error handling code: %s",
                      errors);
         return -1;
      }
@@ -733,7 +735,7 @@ int utf16_decoding_error(const Py_UNICODE **source,
      }
      else {
          PyErr_Format(PyExc_ValueError,
-                     "UTF-16 decoding error; unkown error handling code: %s",
+                     "UTF-16 decoding error; unknown error handling code: %s",
                       errors);
          return -1;
      }
@@ -921,7 +923,7 @@ int unicodeescape_decoding_error(const char **source,
      else {
          PyErr_Format(PyExc_ValueError,
                       "Unicode-Escape decoding error; "
-                     "unkown error handling code: %s",
+                     "unknown error handling code: %s",
                       errors);
          return -1;
      }
@@ -1051,6 +1053,10 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
  
  */
  
+static const Py_UNICODE *findchar(const Py_UNICODE *s,
+                                 int size,
+                                 Py_UNICODE ch);
+
  static
  PyObject *unicodeescape_string(const Py_UNICODE *s,
                                 int size,
@@ -1069,9 +1075,6 @@ PyObject *unicodeescape_string(const Py_UNICODE *s,
      p = q = PyString_AS_STRING(repr);
  
      if (quotes) {
-        static const Py_UNICODE *findchar(const Py_UNICODE *s,
-                                         int size,
-                                         Py_UNICODE ch);
          *p++ = 'u';
          *p++ = (findchar(s, size, '\'') && 
                  !findchar(s, size, '"')) ? '"' : '\'';
@@ -1298,7 +1301,7 @@ int latin1_encoding_error(const Py_UNICODE **source,
      else {
         PyErr_Format(PyExc_ValueError,
                      "Latin-1 encoding error; "
-                    "unkown error handling code: %s",
+                    "unknown error handling code: %s",
                      errors);
         return -1;
      }
@@ -1369,7 +1372,7 @@ int ascii_decoding_error(const char **source,
      else {
         PyErr_Format(PyExc_ValueError,
                      "ASCII decoding error; "
-                    "unkown error handling code: %s",
+                    "unknown error handling code: %s",
                      errors);
         return -1;
      }
@@ -1431,7 +1434,7 @@ int ascii_encoding_error(const Py_UNICODE **source,
      else {
         PyErr_Format(PyExc_ValueError,
                      "ASCII encoding error; "
-                    "unkown error handling code: %s",
+                    "unknown error handling code: %s",
                      errors);
         return -1;
      }
@@ -1502,7 +1505,7 @@ int charmap_decoding_error(const char **source,
      else {
         PyErr_Format(PyExc_ValueError,
                      "charmap decoding error; "
-                    "unkown error handling code: %s",
+                    "unknown error handling code: %s",
                      errors);
         return -1;
      }
@@ -1618,7 +1621,7 @@ int charmap_encoding_error(const Py_UNICODE **source,
      else {
         PyErr_Format(PyExc_ValueError,
                      "charmap encoding error; "
-                    "unkown error handling code: %s",
+                    "unknown error handling code: %s",
                      errors);
         return -1;
      }
@@ -1750,7 +1753,7 @@ int translate_error(const Py_UNICODE **source,
      else {
         PyErr_Format(PyExc_ValueError,
                      "translate error; "
-                    "unkown error handling code: %s",
+                    "unknown error handling code: %s",
                      errors);
         return -1;
      }
diff --git a/Python/codecs.c b/Python/codecs.c

index 5075a20d666e502857a8d1617515f24d1d5011b4..2d493776008706912a54176bb98e4718c9d86c19 100644 (file)
--- a/Python/codecs.c
+++ b/Python/codecs.c
@@ -93,9 +93,14 @@ PyObject *lowercasestring(const char *string)
  
  PyObject *_PyCodec_Lookup(const char *encoding)
  {
-    PyObject *result, *args = NULL, *v;
+    PyObject *result, *args = NULL, *v = NULL;
      int i, len;
  
+    if (_PyCodec_SearchCache == NULL || _PyCodec_SearchPath == NULL) {
+       PyErr_SetString(PyExc_SystemError,
+                       "codec module not properly initialized");
+       goto onError;
+    }
      if (!import_encodings_called)
         import_encodings();
  
@@ -109,6 +114,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
      result = PyDict_GetItem(_PyCodec_SearchCache, v);
      if (result != NULL) {
         Py_INCREF(result);
+       Py_DECREF(v);
         return result;
      }
      
@@ -121,6 +127,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
      if (args == NULL)
         goto onError;
      PyTuple_SET_ITEM(args,0,v);
+    v = NULL;
  
      for (i = 0; i < len; i++) {
         PyObject *func;
@@ -146,7 +153,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
      if (i == len) {
         /* XXX Perhaps we should cache misses too ? */
         PyErr_SetString(PyExc_LookupError,
-                       "unkown encoding");
+                       "unknown encoding");
         goto onError;
      }
  
@@ -156,6 +163,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
      return result;
  
   onError:
+    Py_XDECREF(v);
      Py_XDECREF(args);
      return NULL;
  }
@@ -378,5 +386,7 @@ void _PyCodecRegistry_Init()
  void _PyCodecRegistry_Fini()
  {
      Py_XDECREF(_PyCodec_SearchPath);
+    _PyCodec_SearchPath = NULL;
      Py_XDECREF(_PyCodec_SearchCache);
+    _PyCodec_SearchCache = NULL;
  }
author	Barry Warsaw <barry@python.org>
	Mon, 20 Mar 2000 16:36:48 +0000 (16:36 +0000)
committer	Barry Warsaw <barry@python.org>
	Mon, 20 Mar 2000 16:36:48 +0000 (16:36 +0000)
Include/unicodeobject.h		patch \| blob \| blame \| history
Lib/encodings/__init__.py		patch \| blob \| blame \| history
Lib/test/test_string.py		patch \| blob \| blame \| history
Lib/test/test_unicode.py		patch \| blob \| blame \| history
Misc/unicode.txt		patch \| blob \| blame \| history
Modules/stropmodule.c		patch \| blob \| blame \| history
Objects/stringobject.c		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history
Python/codecs.c		patch \| blob \| blame \| history