Optimize pickle.load() and pickle.loads()

author Victor Stinner <victor.stinner@gmail.com>

Fri, 20 May 2016 09:42:37 +0000 (11:42 +0200)

committer Victor Stinner <victor.stinner@gmail.com>

Fri, 20 May 2016 09:42:37 +0000 (11:42 +0200)
author Victor Stinner <victor.stinner@gmail.com>
Fri, 20 May 2016 09:42:37 +0000 (11:42 +0200)
committer Victor Stinner <victor.stinner@gmail.com>
Fri, 20 May 2016 09:42:37 +0000 (11:42 +0200)
diff --git a/Doc/whatsnew/3.6.rst b/Doc/whatsnew/3.6.rst

index 52be1342aa3a8dc8e4deb26aee52255623b644a3..67fd50f486eb0301c6c79995efad733b5fd735f2 100644 (file)
--- a/Doc/whatsnew/3.6.rst
+++ b/Doc/whatsnew/3.6.rst
@@ -467,6 +467,9 @@ Optimizations
    with a short lifetime, and use :c:func:`malloc` for larger memory blocks.
    (Contributed by Victor Stinner in :issue:`26249`).
  
+* :func:`pickle.load` and :func:`pickle.loads` are now up to 10% faster when
+  deserializing many small objects (Contributed by Victor Stinner in
+  :issue:`27056`).
  
  Build and C API Changes
  =======================
diff --git a/Misc/NEWS b/Misc/NEWS

index 94e508f4cee1ff8d018203c45d4457a008cec166..ba66c4e5d59852195c0192750fe154d096761a39 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -16,6 +16,9 @@ Core and Builtins
  Library
  -------
  
+- Issue #27056: Optimize pickle.load() and pickle.loads(), up to 10% faster
+  to deserialize a lot of small objects.
+
  
  What's New in Python 3.6.0 alpha 1?
  ===================================
@@ -341,7 +344,7 @@ Library
  - Issue #26977: Removed unnecessary, and ignored, call to sum of squares helper
    in statistics.pvariance.
  
-- Issue #26002: Use bisect in statistics.median instead of a linear search. 
+- Issue #26002: Use bisect in statistics.median instead of a linear search.
    Patch by Upendra Kuma.
  
  - Issue #25974: Make use of new Decimal.as_integer_ratio() method in statistics
diff --git a/Modules/_pickle.c b/Modules/_pickle.c

index fdd60e01559b49b8eab400a4a851f341fa3cfcae..e3aa7c50ef497146feafe11965a0681bd754ace0 100644 (file)
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -1197,21 +1197,9 @@ _Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
      return read_size;
  }
  
-/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
-
-   This should be used for all data reads, rather than accessing the unpickler's
-   input buffer directly. This method deals correctly with reading from input
-   streams, which the input buffer doesn't deal with.
-
-   Note that when reading from a file-like object, self->next_read_idx won't
-   be updated (it should remain at 0 for the entire unpickling process). You
-   should use this function's return value to know how many bytes you can
-   consume.
-
-   Returns -1 (with an exception set) on failure. On success, return the
-   number of chars read. */
+/* Don't call it directly: use _Unpickler_Read() */
  static Py_ssize_t
-_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
+_Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
  {
      Py_ssize_t num_read;
  
@@ -1222,11 +1210,10 @@ _Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
                          "read would overflow (invalid bytecode)");
          return -1;
      }
-    if (self->next_read_idx + n <= self->input_len) {
-        *s = self->input_buffer + self->next_read_idx;
-        self->next_read_idx += n;
-        return n;
-    }
+
+    /* This case is handled by the _Unpickler_Read() macro for efficiency */
+    assert(self->next_read_idx + n > self->input_len);
+
      if (!self->read) {
          PyErr_Format(PyExc_EOFError, "Ran out of input");
          return -1;
@@ -1243,6 +1230,26 @@ _Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
      return n;
  }
  
+/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
+
+   This should be used for all data reads, rather than accessing the unpickler's
+   input buffer directly. This method deals correctly with reading from input
+   streams, which the input buffer doesn't deal with.
+
+   Note that when reading from a file-like object, self->next_read_idx won't
+   be updated (it should remain at 0 for the entire unpickling process). You
+   should use this function's return value to know how many bytes you can
+   consume.
+
+   Returns -1 (with an exception set) on failure. On success, return the
+   number of chars read. */
+#define _Unpickler_Read(self, s, n) \
+    (((self)->next_read_idx + (n) <= (self)->input_len)      \
+     ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
+        (self)->next_read_idx += (n),                        \
+        (n))                                                 \
+     : _Unpickler_ReadImpl(self, (s), (n)))
+
  static Py_ssize_t
  _Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
                      char **result)
author	Victor Stinner <victor.stinner@gmail.com>
	Fri, 20 May 2016 09:42:37 +0000 (11:42 +0200)
committer	Victor Stinner <victor.stinner@gmail.com>
	Fri, 20 May 2016 09:42:37 +0000 (11:42 +0200)
Doc/whatsnew/3.6.rst		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history
Modules/_pickle.c		patch \| blob \| blame \| history