bpo-40521: Optimize PyBytes_FromStringAndSize(str, 0) (GH-21142)

author Victor Stinner <vstinner@python.org>

Thu, 25 Jun 2020 12:07:40 +0000 (14:07 +0200)

committer GitHub <noreply@github.com>

Thu, 25 Jun 2020 12:07:40 +0000 (14:07 +0200)
author Victor Stinner <vstinner@python.org>
Thu, 25 Jun 2020 12:07:40 +0000 (14:07 +0200)
committer GitHub <noreply@github.com>
Thu, 25 Jun 2020 12:07:40 +0000 (14:07 +0200)
diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h

index bf1769e5ce2c2451df93c7e5a1f1900ab47b8248..cfc27470c80411e9d94418989ac2265efc347614 100644 (file)
--- a/Include/internal/pycore_interp.h
+++ b/Include/internal/pycore_interp.h
@@ -66,13 +66,13 @@ struct _Py_unicode_fs_codec {
  };
  
  struct _Py_bytes_state {
+    PyObject *empty_string;
      PyBytesObject *characters[256];
-    PyBytesObject *empty_string;
  };
  
  struct _Py_unicode_state {
      // The empty Unicode object is a singleton to improve performance.
-    PyObject *empty;
+    PyObject *empty_string;
      /* Single character Unicode strings in the Latin-1 range are being
         shared as well. */
      PyObject *latin1[256];
diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h

index 3b2173787118f92097f3a61f3747e5e672ed0019..bffc95b27e946cd4133ff5b6941ac5a3b9f5b6e2 100644 (file)
--- a/Include/internal/pycore_pylifecycle.h
+++ b/Include/internal/pycore_pylifecycle.h
@@ -32,6 +32,7 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);
  /* Various one-time initializers */
  
  extern PyStatus _PyUnicode_Init(PyThreadState *tstate);
+extern PyStatus _PyBytes_Init(PyThreadState *tstate);
  extern int _PyStructSequence_Init(void);
  extern int _PyLong_Init(PyThreadState *tstate);
  extern PyStatus _PyTuple_Init(PyThreadState *tstate);
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c

index ce006e15dce9ea1db70f115f7d0224f5ee244752..782bc8e1fa0b7d3ca56a7ac84e54d0adc65181b3 100644 (file)
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -4,8 +4,9 @@
  
  #include "Python.h"
  #include "pycore_abstract.h"      // _PyIndex_Check()
-#include "pycore_bytes_methods.h"
-#include "pycore_object.h"
+#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
+#include "pycore_initconfig.h"    // _PyStatus_OK()
+#include "pycore_object.h"        // _PyObject_GC_TRACK
  #include "pycore_pymem.h"         // PYMEM_CLEANBYTE
  
  #include "pystrhex.h"
@@ -41,6 +42,44 @@ get_bytes_state(void)
  }
  
  
+// Return a borrowed reference to the empty bytes string singleton.
+static inline PyObject* bytes_get_empty(void)
+{
+    struct _Py_bytes_state *state = get_bytes_state();
+    // bytes_get_empty() must not be called before _PyBytes_Init()
+    // or after _PyBytes_Fini()
+    assert(state->empty_string != NULL);
+    return state->empty_string;
+}
+
+
+// Return a strong reference to the empty bytes string singleton.
+static inline PyObject* bytes_new_empty(void)
+{
+    PyObject *empty = bytes_get_empty();
+    Py_INCREF(empty);
+    return (PyObject *)empty;
+}
+
+
+static int
+bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
+{
+    // Create the empty bytes string singleton
+    PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE);
+    if (op == NULL) {
+        return -1;
+    }
+    _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0);
+    op->ob_shash = -1;
+    op->ob_sval[0] = '\0';
+
+    assert(state->empty_string == NULL);
+    state->empty_string = (PyObject *)op;
+    return 0;
+}
+
+
  /*
     For PyBytes_FromString(), the parameter `str' points to a null-terminated
     string containing exactly `size' bytes.
@@ -70,12 +109,7 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
      assert(size >= 0);
  
      if (size == 0) {
-        struct _Py_bytes_state *state = get_bytes_state();
-        op = state->empty_string;
-        if (op != NULL) {
-            Py_INCREF(op);
-            return (PyObject *)op;
-        }
+        return bytes_new_empty();
      }
  
      if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
@@ -94,13 +128,8 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
      }
      _PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
      op->ob_shash = -1;
-    if (!use_calloc)
+    if (!use_calloc) {
          op->ob_sval[size] = '\0';
-    /* empty byte string singleton */
-    if (size == 0) {
-        struct _Py_bytes_state *state = get_bytes_state();
-        Py_INCREF(op);
-        state->empty_string = op;
      }
      return (PyObject *) op;
  }
@@ -122,6 +151,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
              return (PyObject *)op;
          }
      }
+    if (size == 0) {
+        return bytes_new_empty();
+    }
  
      op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
      if (op == NULL)
@@ -155,11 +187,7 @@ PyBytes_FromString(const char *str)
  
      struct _Py_bytes_state *state = get_bytes_state();
      if (size == 0) {
-        op = state->empty_string;
-        if (op != NULL) {
-            Py_INCREF(op);
-            return (PyObject *)op;
-        }
+        return bytes_new_empty();
      }
      else if (size == 1) {
          op = state->characters[*str & UCHAR_MAX];
@@ -178,11 +206,8 @@ PyBytes_FromString(const char *str)
      op->ob_shash = -1;
      memcpy(op->ob_sval, str, size+1);
      /* share short strings */
-    if (size == 0) {
-        Py_INCREF(op);
-        state->empty_string = op;
-    }
-    else if (size == 1) {
+    if (size == 1) {
+        assert(state->characters[*str & UCHAR_MAX] == NULL);
          Py_INCREF(op);
          state->characters[*str & UCHAR_MAX] = op;
      }
@@ -1272,7 +1297,7 @@ PyBytes_AsStringAndSize(PyObject *obj,
  /* -------------------------------------------------------------------- */
  /* Methods */
  
-#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string
+#define STRINGLIB_GET_EMPTY() bytes_get_empty()
  
  #include "stringlib/stringdefs.h"
  
@@ -3053,9 +3078,9 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
          goto error;
      }
      if (newsize == 0) {
-        *pv = _PyBytes_FromSize(0, 0);
+        *pv = bytes_new_empty();
          Py_DECREF(v);
-        return (*pv == NULL) ? -1 : 0;
+        return 0;
      }
      /* XXX UNREF/NEWREF interface should be more symmetrical */
  #ifdef Py_REF_DEBUG
@@ -3084,6 +3109,18 @@ error:
      return -1;
  }
  
+
+PyStatus
+_PyBytes_Init(PyThreadState *tstate)
+{
+    struct _Py_bytes_state *state = &tstate->interp->bytes;
+    if (bytes_create_empty_string_singleton(state) < 0) {
+        return _PyStatus_NO_MEMORY();
+    }
+    return _PyStatus_OK();
+}
+
+
  void
  _PyBytes_Fini(PyThreadState *tstate)
  {
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c

index 5ba99514d29691a6a9399be7025402d28a3c1389..55c886727ba2edd9886bbb32b5f3475686f4cff4 100644 (file)
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -41,16 +41,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  #define PY_SSIZE_T_CLEAN
  #include "Python.h"
  #include "pycore_abstract.h"       // _PyIndex_Check()
-#include "pycore_bytes_methods.h"
-#include "pycore_fileutils.h"
-#include "pycore_initconfig.h"
+#include "pycore_bytes_methods.h"  // _Py_bytes_lower()
+#include "pycore_initconfig.h"     // _PyStatus_OK()
  #include "pycore_interp.h"         // PyInterpreterState.fs_codec
-#include "pycore_object.h"
-#include "pycore_pathconfig.h"
-#include "pycore_pylifecycle.h"
+#include "pycore_object.h"         // _PyObject_GC_TRACK()
+#include "pycore_pathconfig.h"     // _Py_DumpPathConfig()
+#include "pycore_pylifecycle.h"    // _Py_SetFileSystemEncoding()
  #include "pycore_pystate.h"        // _PyInterpreterState_GET()
-#include "ucnhash.h"
-#include "stringlib/eq.h"
+#include "ucnhash.h"               // _PyUnicode_Name_CAPI
+#include "stringlib/eq.h"          // unicode_eq()
  
  #ifdef MS_WINDOWS
  #include <windows.h>
@@ -236,10 +235,12 @@ static inline PyObject* unicode_get_empty(void)
      struct _Py_unicode_state *state = get_unicode_state();
      // unicode_get_empty() must not be called before _PyUnicode_Init()
      // or after _PyUnicode_Fini()
-    assert(state->empty != NULL);
-    return state->empty;
+    assert(state->empty_string != NULL);
+    return state->empty_string;
  }
  
+
+// Return a strong reference to the empty string singleton.
  static inline PyObject* unicode_new_empty(void)
  {
      PyObject *empty = unicode_get_empty();
@@ -1385,6 +1386,26 @@ _PyUnicode_Dump(PyObject *op)
  }
  #endif
  
+static int
+unicode_create_empty_string_singleton(struct _Py_unicode_state *state)
+{
+    // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
+    // optimized to always use state->empty_string without having to check if
+    // it is NULL or not.
+    PyObject *empty = PyUnicode_New(1, 0);
+    if (empty == NULL) {
+        return -1;
+    }
+    PyUnicode_1BYTE_DATA(empty)[0] = 0;
+    _PyUnicode_LENGTH(empty) = 0;
+    assert(_PyUnicode_CheckConsistency(empty, 1));
+
+    assert(state->empty_string == NULL);
+    state->empty_string = empty;
+    return 0;
+}
+
+
  PyObject *
  PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
  {
@@ -1972,7 +1993,7 @@ static int
  unicode_is_singleton(PyObject *unicode)
  {
      struct _Py_unicode_state *state = get_unicode_state();
-    if (unicode == state->empty) {
+    if (unicode == state->empty_string) {
          return 1;
      }
      PyASCIIObject *ascii = (PyASCIIObject *)unicode;
@@ -15542,20 +15563,10 @@ _PyUnicode_Init(PyThreadState *tstate)
          0x2029, /* PARAGRAPH SEPARATOR */
      };
  
-    // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
-    // optimized to always use state->empty without having to check if it is
-    // NULL or not.
-    PyObject *empty = PyUnicode_New(1, 0);
-    if (empty == NULL) {
+    struct _Py_unicode_state *state = &tstate->interp->unicode;
+    if (unicode_create_empty_string_singleton(state) < 0) {
          return _PyStatus_NO_MEMORY();
      }
-    PyUnicode_1BYTE_DATA(empty)[0] = 0;
-    _PyUnicode_LENGTH(empty) = 0;
-    assert(_PyUnicode_CheckConsistency(empty, 1));
-
-    struct _Py_unicode_state *state = &tstate->interp->unicode;
-    assert(state->empty == NULL);
-    state->empty = empty;
  
      if (_Py_IsMainInterpreter(tstate)) {
          /* initialize the linebreak bloom filter */
@@ -16223,7 +16234,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
  #endif /* __INSURE__ */
      }
  
-    Py_CLEAR(state->empty);
+    Py_CLEAR(state->empty_string);
  
      for (Py_ssize_t i = 0; i < 256; i++) {
          Py_CLEAR(state->latin1[i]);
diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c

index 4b658f847bc12b1379f457815bf150547982270a..cd993ea13418ffa62b03bf75b4caea765b35ce81 100644 (file)
--- a/Python/pylifecycle.c
+++ b/Python/pylifecycle.c
@@ -607,6 +607,11 @@ pycore_init_types(PyThreadState *tstate)
          return status;
      }
  
+    status = _PyBytes_Init(tstate);
+    if (_PyStatus_EXCEPTION(status)) {
+        return status;
+    }
+
      status = _PyExc_Init(tstate);
      if (_PyStatus_EXCEPTION(status)) {
          return status;
author	Victor Stinner <vstinner@python.org>
	Thu, 25 Jun 2020 12:07:40 +0000 (14:07 +0200)
committer	GitHub <noreply@github.com>
	Thu, 25 Jun 2020 12:07:40 +0000 (14:07 +0200)
Include/internal/pycore_interp.h		patch \| blob \| blame \| history
Include/internal/pycore_pylifecycle.h		patch \| blob \| blame \| history
Objects/bytesobject.c		patch \| blob \| blame \| history
Objects/unicodeobject.c		patch \| blob \| blame \| history
Python/pylifecycle.c		patch \| blob \| blame \| history