Document utf8_length and wstr_length states

author Victor Stinner <victor.stinner@haypocalc.com>

Mon, 3 Oct 2011 23:05:08 +0000 (01:05 +0200)

committer Victor Stinner <victor.stinner@haypocalc.com>

Mon, 3 Oct 2011 23:05:08 +0000 (01:05 +0200)
author Victor Stinner <victor.stinner@haypocalc.com>
Mon, 3 Oct 2011 23:05:08 +0000 (01:05 +0200)
committer Victor Stinner <victor.stinner@haypocalc.com>
Mon, 3 Oct 2011 23:05:08 +0000 (01:05 +0200)
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h

index 8e19ebc0ad3acc57bde5fe122bc11bac11416ae0..3dee11f3441b427ad8d915fa2fdca49cec03a8fe 100644 (file)
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -226,9 +226,11 @@ typedef struct {
           * ready = 1
           * ascii = 0
           * utf8 != data
-         * wstr is shared with data if kind=PyUnicode_2BYTE_KIND
-           and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and
-           sizeof(wchar_4)=4
+         * utf8_length = 0 if utf8 is NULL
+         * wstr is shared with data and wstr_length=length
+           if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
+           or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
+         * wstr_length = 0 if wstr is NULL
  
         - legacy string, not ready:
  
@@ -239,6 +241,7 @@ typedef struct {
           * wstr is not NULL
           * data.any is NULL
           * utf8 is NULL
+         * utf8_length = 0
           * interned = SSTATE_NOT_INTERNED
           * ascii = 0
  
@@ -250,10 +253,12 @@ typedef struct {
           * compact = 0
           * ready = 1
           * data.any is not NULL
-         * utf8 is shared with data.any if ascii = 1
-         * wstr is shared with data.any if kind=PyUnicode_2BYTE_KIND
-           and sizeof(wchar_t)=2 or if kind=PyUnicode_4BYTE_KIND and
-           sizeof(wchar_4)=4
+         * utf8 is shared and utf8_length = length with data.any if ascii = 1
+         * utf8_length = 0 if utf8 is NULL
+         * wstr is shared and wstr_length = length with data.any
+           if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
+           or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
+         * wstr_length = 0 if wstr is NULL
  
         Compact strings use only one memory block (structure + characters),
         whereas legacy strings use one block for the structure and one block
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 42d061ac425a983e85193551ca57900c7a978c3b..84c8dcadee273ba6e9511cd45737e11971ef5970 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -300,50 +300,47 @@ _PyUnicode_CheckConsistency(void *op)
          assert(kind == PyUnicode_1BYTE_KIND);
          assert(ascii->state.ready == 1);
      }
-    else if (ascii->state.compact == 1) {
+    else {
          PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
          void *data;
-        assert(kind == PyUnicode_1BYTE_KIND
-               || kind == PyUnicode_2BYTE_KIND
-               || kind == PyUnicode_4BYTE_KIND);
-        assert(ascii->state.ascii == 0);
-        assert(ascii->state.ready == 1);
-        data = compact + 1;
-        assert (compact->utf8 != data);
-        if (
-#if SIZEOF_WCHAR_T == 2
-            kind == PyUnicode_2BYTE_KIND
-#else
-            kind == PyUnicode_4BYTE_KIND
-#endif
-           )
-            assert(ascii->wstr == data);
-        else
-            assert(ascii->wstr != data);
-    } else {
-        PyCompactUnicodeObject *compact = (PyCompactUnicodeObject *)op;
-        PyUnicodeObject *unicode = (PyUnicodeObject *)op;
  
-        if (kind == PyUnicode_WCHAR_KIND) {
-            assert(ascii->state.compact == 0);
-            assert(ascii->state.ascii == 0);
-            assert(ascii->state.ready == 0);
-            assert(ascii->wstr != NULL);
-            assert(unicode->data.any == NULL);
-            assert(compact->utf8 == NULL);
-            assert(ascii->state.interned == SSTATE_NOT_INTERNED);
-        }
-        else {
+        if (ascii->state.compact == 1) {
+            data = compact + 1;
              assert(kind == PyUnicode_1BYTE_KIND
                     || kind == PyUnicode_2BYTE_KIND
                     || kind == PyUnicode_4BYTE_KIND);
-            assert(ascii->state.compact == 0);
+            assert(ascii->state.ascii == 0);
              assert(ascii->state.ready == 1);
-            assert(unicode->data.any != NULL);
-            if (ascii->state.ascii)
-                assert (compact->utf8 == unicode->data.any);
-            else
-                assert (compact->utf8 != unicode->data.any);
+            assert (compact->utf8 != data);
+        } else {
+            PyUnicodeObject *unicode = (PyUnicodeObject *)op;
+
+            data = unicode->data.any;
+            if (kind == PyUnicode_WCHAR_KIND) {
+                assert(ascii->state.compact == 0);
+                assert(ascii->state.ascii == 0);
+                assert(ascii->state.ready == 0);
+                assert(ascii->wstr != NULL);
+                assert(data == NULL);
+                assert(compact->utf8 == NULL);
+                assert(ascii->state.interned == SSTATE_NOT_INTERNED);
+            }
+            else {
+                assert(kind == PyUnicode_1BYTE_KIND
+                       || kind == PyUnicode_2BYTE_KIND
+                       || kind == PyUnicode_4BYTE_KIND);
+                assert(ascii->state.compact == 0);
+                assert(ascii->state.ready == 1);
+                assert(data != NULL);
+                if (ascii->state.ascii) {
+                    assert (compact->utf8 == data);
+                    assert (compact->utf8_length == ascii->length);
+                }
+                else
+                    assert (compact->utf8 != data);
+            }
+        }
+        if (kind != PyUnicode_WCHAR_KIND) {
              if (
  #if SIZEOF_WCHAR_T == 2
                  kind == PyUnicode_2BYTE_KIND
@@ -351,10 +348,17 @@ _PyUnicode_CheckConsistency(void *op)
                  kind == PyUnicode_4BYTE_KIND
  #endif
                 )
-                assert(ascii->wstr == unicode->data.any);
-            else
-                assert(ascii->wstr != unicode->data.any);
+            {
+                assert(ascii->wstr == data);
+                assert(compact->wstr_length == ascii->length);
+            } else
+                assert(ascii->wstr != data);
          }
+
+        if (compact->utf8 == NULL)
+            assert(compact->utf8_length == 0);
+        if (ascii->wstr == NULL)
+            assert(compact->wstr_length == 0);
      }
      return 1;
  }
author	Victor Stinner <victor.stinner@haypocalc.com>
	Mon, 3 Oct 2011 23:05:08 +0000 (01:05 +0200)
committer	Victor Stinner <victor.stinner@haypocalc.com>
	Mon, 3 Oct 2011 23:05:08 +0000 (01:05 +0200)
Include/unicodeobject.h		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history