bpo-40521: Disable Unicode caches in isolated subinterpreters (GH-19933)

author Victor Stinner <vstinner@python.org>

Tue, 5 May 2020 16:50:30 +0000 (18:50 +0200)

committer GitHub <noreply@github.com>

Tue, 5 May 2020 16:50:30 +0000 (18:50 +0200)
author Victor Stinner <vstinner@python.org>
Tue, 5 May 2020 16:50:30 +0000 (18:50 +0200)
committer GitHub <noreply@github.com>
Tue, 5 May 2020 16:50:30 +0000 (18:50 +0200)
diff --git a/Objects/typeobject.c b/Objects/typeobject.c

index db0ae970090ba98b47486d0638008ca1d79e00ee..1565b90898605e970e9afa67c60c2b5029affb8d 100644 (file)
--- a/Objects/typeobject.c
+++ b/Objects/typeobject.c
@@ -56,6 +56,11 @@ static size_t method_cache_misses = 0;
  static size_t method_cache_collisions = 0;
  #endif
  
+/* bpo-40521: Interned strings are shared by all subinterpreters */
+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+#  define INTERN_NAME_STRINGS
+#endif
+
  /* alphabetical order */
  _Py_IDENTIFIER(__abstractmethods__);
  _Py_IDENTIFIER(__class__);
@@ -3418,6 +3423,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
              if (name == NULL)
                  return -1;
          }
+#ifdef INTERN_NAME_STRINGS
          if (!PyUnicode_CHECK_INTERNED(name)) {
              PyUnicode_InternInPlace(&name);
              if (!PyUnicode_CHECK_INTERNED(name)) {
@@ -3427,6 +3433,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
                  return -1;
              }
          }
+#endif
      }
      else {
          /* Will fail in _PyObject_GenericSetAttrWithDict. */
@@ -7531,10 +7538,17 @@ _PyTypes_InitSlotDefs(void)
      for (slotdef *p = slotdefs; p->name; p++) {
          /* Slots must be ordered by their offset in the PyHeapTypeObject. */
          assert(!p[1].name || p->offset <= p[1].offset);
+#ifdef INTERN_NAME_STRINGS
          p->name_strobj = PyUnicode_InternFromString(p->name);
          if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
              return _PyStatus_NO_MEMORY();
          }
+#else
+        p->name_strobj = PyUnicode_FromString(p->name);
+        if (!p->name_strobj) {
+            return _PyStatus_NO_MEMORY();
+        }
+#endif
      }
      slotdefs_initialized = 1;
      return _PyStatus_OK();
@@ -7559,7 +7573,9 @@ update_slot(PyTypeObject *type, PyObject *name)
      int offset;
  
      assert(PyUnicode_CheckExact(name));
+#ifdef INTERN_NAME_STRINGS
      assert(PyUnicode_CHECK_INTERNED(name));
+#endif
  
      assert(slotdefs_initialized);
      pp = ptrs;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index aba7407533c4ed28977a7f6d7591fdbf514710e0..18b9458721de18df8ad254ca13065a762f2a862e 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -198,6 +198,11 @@ extern "C" {
  #  define OVERALLOCATE_FACTOR 4
  #endif
  
+/* bpo-40521: Interned strings are shared by all interpreters. */
+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+#  define INTERNED_STRINGS
+#endif
+
  /* This dictionary holds all interned unicode strings.  Note that references
     to strings in this dictionary are *not* counted in the string's ob_refcnt.
     When the interned string reaches a refcnt of 0 the string deallocation
@@ -206,7 +211,9 @@ extern "C" {
     Another way to look at this is that to say that the actual reference
     count of a string is:  s->ob_refcnt + (s->state ? 2 : 0)
  */
+#ifdef INTERNED_STRINGS
  static PyObject *interned = NULL;
+#endif
  
  /* The empty Unicode object is shared to improve performance. */
  static PyObject *unicode_empty = NULL;
@@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
  /* List of static strings. */
  static _Py_Identifier *static_strings = NULL;
  
+/* bpo-40521: Latin1 singletons are shared by all interpreters. */
+#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
+#  define LATIN1_SINGLETONS
+#endif
+
+#ifdef LATIN1_SINGLETONS
  /* Single character Unicode strings in the Latin-1 range are being
     shared as well. */
  static PyObject *unicode_latin1[256] = {NULL};
+#endif
  
  /* Fast detection of the most frequent whitespace characters */
  const unsigned char _Py_ascii_whitespace[] = {
@@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode)
          return unicode_empty;
      }
  
+#ifdef LATIN1_SINGLETONS
      if (length == 1) {
          const void *data = PyUnicode_DATA(unicode);
          int kind = PyUnicode_KIND(unicode);
@@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode)
              }
          }
      }
+#endif
  
      assert(_PyUnicode_CheckConsistency(unicode, 1));
      return unicode;
@@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode)
      case SSTATE_INTERNED_MORTAL:
          /* revive dead object temporarily for DelItem */
          Py_SET_REFCNT(unicode, 3);
+#ifdef INTERNED_STRINGS
          if (PyDict_DelItem(interned, unicode) != 0) {
              _PyErr_WriteUnraisableMsg("deletion of interned string failed",
                                        NULL);
          }
+#endif
          break;
  
      case SSTATE_INTERNED_IMMORTAL:
@@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode)
  static int
  unicode_is_singleton(PyObject *unicode)
  {
-    PyASCIIObject *ascii = (PyASCIIObject *)unicode;
-    if (unicode == unicode_empty)
+    if (unicode == unicode_empty) {
          return 1;
+    }
+#ifdef LATIN1_SINGLETONS
+    PyASCIIObject *ascii = (PyASCIIObject *)unicode;
      if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
      {
          Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
          if (ch < 256 && unicode_latin1[ch] == unicode)
              return 1;
      }
+#endif
      return 0;
  }
  #endif
@@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
  static PyObject*
  get_latin1_char(unsigned char ch)
  {
-    PyObject *unicode = unicode_latin1[ch];
+    PyObject *unicode;
+
+#ifdef LATIN1_SINGLETONS
+    unicode = unicode_latin1[ch];
+    if (unicode) {
+        Py_INCREF(unicode);
+        return unicode;
+    }
+#endif
+
+    unicode = PyUnicode_New(1, ch);
      if (!unicode) {
-        unicode = PyUnicode_New(1, ch);
-        if (!unicode)
-            return NULL;
-        PyUnicode_1BYTE_DATA(unicode)[0] = ch;
-        assert(_PyUnicode_CheckConsistency(unicode, 1));
-        unicode_latin1[ch] = unicode;
+        return NULL;
      }
+
+    PyUnicode_1BYTE_DATA(unicode)[0] = ch;
+    assert(_PyUnicode_CheckConsistency(unicode, 1));
+
+#ifdef LATIN1_SINGLETONS
      Py_INCREF(unicode);
+    unicode_latin1[ch] = unicode;
+#endif
      return unicode;
  }
  
@@ -11270,7 +11303,6 @@ int
  _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
  {
      PyObject *right_uni;
-    Py_hash_t hash;
  
      assert(_PyUnicode_CHECK(left));
      assert(right->string);
@@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
      if (PyUnicode_CHECK_INTERNED(left))
          return 0;
  
+#ifdef INTERNED_STRINGS
      assert(_PyUnicode_HASH(right_uni) != -1);
-    hash = _PyUnicode_HASH(left);
+    Py_hash_t hash = _PyUnicode_HASH(left);
      if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
          return 0;
+#endif
  
      return unicode_compare_eq(left, right_uni);
  }
@@ -15487,20 +15521,26 @@ void
  PyUnicode_InternInPlace(PyObject **p)
  {
      PyObject *s = *p;
-    PyObject *t;
  #ifdef Py_DEBUG
      assert(s != NULL);
      assert(_PyUnicode_CHECK(s));
  #else
-    if (s == NULL || !PyUnicode_Check(s))
+    if (s == NULL || !PyUnicode_Check(s)) {
          return;
+    }
  #endif
+
      /* If it's a subclass, we don't really know what putting
         it in the interned dict might do. */
-    if (!PyUnicode_CheckExact(s))
+    if (!PyUnicode_CheckExact(s)) {
          return;
-    if (PyUnicode_CHECK_INTERNED(s))
+    }
+
+    if (PyUnicode_CHECK_INTERNED(s)) {
          return;
+    }
+
+#ifdef INTERNED_STRINGS
      if (interned == NULL) {
          interned = PyDict_New();
          if (interned == NULL) {
@@ -15508,22 +15548,28 @@ PyUnicode_InternInPlace(PyObject **p)
              return;
          }
      }
+
+    PyObject *t;
      Py_ALLOW_RECURSION
      t = PyDict_SetDefault(interned, s, s);
      Py_END_ALLOW_RECURSION
+
      if (t == NULL) {
          PyErr_Clear();
          return;
      }
+
      if (t != s) {
          Py_INCREF(t);
          Py_SETREF(*p, t);
          return;
      }
+
      /* The two references in interned are not counted by refcnt.
         The deallocator will take care of this */
      Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
      _PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
+#endif
  }
  
  void
@@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
  
          Py_CLEAR(unicode_empty);
  
+#ifdef LATIN1_SINGLETONS
          for (Py_ssize_t i = 0; i < 256; i++) {
              Py_CLEAR(unicode_latin1[i]);
          }
+#endif
          _PyUnicode_ClearStaticStrings();
      }
author	Victor Stinner <vstinner@python.org>
	Tue, 5 May 2020 16:50:30 +0000 (18:50 +0200)
committer	GitHub <noreply@github.com>
	Tue, 5 May 2020 16:50:30 +0000 (18:50 +0200)
Objects/typeobject.c		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history