]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Issue #12911: Fix memory consumption when calculating the repr() of huge tuples or...
authorAntoine Pitrou <solipsis@pitrou.net>
Thu, 6 Oct 2011 16:57:27 +0000 (18:57 +0200)
committerAntoine Pitrou <solipsis@pitrou.net>
Thu, 6 Oct 2011 16:57:27 +0000 (18:57 +0200)
This introduces a small private API for this common pattern.
The issue has been discovered thanks to Martin's huge-mem buildbot.

13 files changed:
Include/Python.h
Include/accu.h [new file with mode: 0644]
Lib/test/test_list.py
Lib/test/test_tuple.py
Makefile.pre.in
Misc/NEWS
Objects/accu.c [new file with mode: 0644]
Objects/listobject.c
Objects/tupleobject.c
PC/VC6/pythoncore.dsp
PC/VS7.1/pythoncore.vcproj
PC/VS8.0/pythoncore.vcproj
PCbuild/pythoncore.vcproj

index db33a76cd4e4e48c454aecd616d0aa0c09ced60f..5972ffa5923241cf668ebd50ddf20e3c2279ed61 100644 (file)
 #include "warnings.h"
 #include "weakrefobject.h"
 #include "structseq.h"
-
+#include "accu.h"
 
 #include "codecs.h"
 #include "pyerrors.h"
@@ -141,7 +141,7 @@ PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name);
 #endif
 
 /* Argument must be a char or an int in [-128, 127] or [0, 255]. */
-#define Py_CHARMASK(c)         ((unsigned char)((c) & 0xff))
+#define Py_CHARMASK(c)          ((unsigned char)((c) & 0xff))
 
 #include "pyfpe.h"
 
diff --git a/Include/accu.h b/Include/accu.h
new file mode 100644 (file)
index 0000000..9655d37
--- /dev/null
@@ -0,0 +1,35 @@
+#ifndef Py_LIMITED_API
+#ifndef Py_ACCU_H
+#define Py_ACCU_H
+
+/*** This is a private API for use by the interpreter and the stdlib.
+ *** Its definition may be changed or removed at any moment.
+ ***/
+
+/*
+ * A two-level accumulator of unicode objects that avoids both the overhead
+ * of keeping a huge number of small separate objects, and the quadratic
+ * behaviour of using a naive repeated concatenation scheme.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+    PyObject *large;  /* A list of previously accumulated large strings */
+    PyObject *small;  /* Pending small strings */
+} _PyAccu;
+
+PyAPI_FUNC(int) _PyAccu_Init(_PyAccu *acc);
+PyAPI_FUNC(int) _PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode);
+PyAPI_FUNC(PyObject *) _PyAccu_FinishAsList(_PyAccu *acc);
+PyAPI_FUNC(PyObject *) _PyAccu_Finish(_PyAccu *acc);
+PyAPI_FUNC(void) _PyAccu_Destroy(_PyAccu *acc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* Py_ACCU_H */
+#endif /* Py_LIMITED_API */
index 3510bd5febe04f390d54dd1caa356d150816e95e..d7e6629ba7c16366717fa783db067c0f5d13c15c 100644 (file)
@@ -59,6 +59,17 @@ class ListTest(list_tests.CommonTest):
         self.assertRaises((MemoryError, OverflowError), mul, lst, n)
         self.assertRaises((MemoryError, OverflowError), imul, lst, n)
 
+    def test_repr_large(self):
+        # Check the repr of large list objects
+        def check(n):
+            l = [0] * n
+            s = repr(l)
+            self.assertEqual(s,
+                '[' + ', '.join(['0'] * n) + ']')
+        check(10)       # check our checking code
+        check(1000000)
+
+
 def test_main(verbose=None):
     support.run_unittest(ListTest)
 
index 75fbe45ec4b755e0982a383b6115aa94c9ed8cea..1464a0e50f491611d6ba0a077424d75833d1aa83 100644 (file)
@@ -154,6 +154,16 @@ class TupleTest(seq_tests.CommonTest):
         # Trying to untrack an unfinished tuple could crash Python
         self._not_tracked(tuple(gc.collect() for i in range(101)))
 
+    def test_repr_large(self):
+        # Check the repr of large list objects
+        def check(n):
+            l = (0,) * n
+            s = repr(l)
+            self.assertEqual(s,
+                '(' + ', '.join(['0'] * n) + ')')
+        check(10)       # check our checking code
+        check(1000000)
+
 def test_main():
     support.run_unittest(TupleTest)
 
index a2b14a943349e61a79da1d7b493268ef6788eb32..cde3af0c420e653196961d20abf13536ebb4745a 100644 (file)
@@ -342,6 +342,7 @@ PYTHON_OBJS=        \
 # Objects
 OBJECT_OBJS=   \
                Objects/abstract.o \
+               Objects/accu.o \
                Objects/boolobject.o \
                Objects/bytes_methods.o \
                Objects/bytearrayobject.o \
@@ -664,6 +665,7 @@ PYTHON_HEADERS= \
                Include/Python-ast.h \
                Include/Python.h \
                Include/abstract.h \
+               Include/accu.h \
                Include/asdl.h \
                Include/ast.h \
                 Include/bltinmodule.h \
index c4aba0dfea50f9cd455f1abf9b37c550e9c1f379..67227b118c27b5e6f589e92ccb3a5b1a59c7c63a 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@ What's New in Python 3.2.3?
 Core and Builtins
 -----------------
 
+- Issue #12911: Fix memory consumption when calculating the repr() of huge
+  tuples or lists.
+
 - Issue #7732: Don't open a directory as a file anymore while importing a
   module. Ignore the direcotry if its name matchs the module name (e.g.
   "__init__.py") and raise a ImportError instead.
diff --git a/Objects/accu.c b/Objects/accu.c
new file mode 100644 (file)
index 0000000..88e8f08
--- /dev/null
@@ -0,0 +1,114 @@
+/* Accumulator struct implementation */
+
+#include "Python.h"
+
+static PyObject *
+join_list_unicode(PyObject *lst)
+{
+    /* return ''.join(lst) */
+    PyObject *sep, *ret;
+    sep = PyUnicode_FromStringAndSize("", 0);
+    ret = PyUnicode_Join(sep, lst);
+    Py_DECREF(sep);
+    return ret;
+}
+
+int
+_PyAccu_Init(_PyAccu *acc)
+{
+    /* Lazily allocated */
+    acc->large = NULL;
+    acc->small = PyList_New(0);
+    if (acc->small == NULL)
+        return -1;
+    return 0;
+}
+
+static int
+flush_accumulator(_PyAccu *acc)
+{
+    Py_ssize_t nsmall = PyList_GET_SIZE(acc->small);
+    if (nsmall) {
+        int ret;
+        PyObject *joined;
+        if (acc->large == NULL) {
+            acc->large = PyList_New(0);
+            if (acc->large == NULL)
+                return -1;
+        }
+        joined = join_list_unicode(acc->small);
+        if (joined == NULL)
+            return -1;
+        if (PyList_SetSlice(acc->small, 0, nsmall, NULL)) {
+            Py_DECREF(joined);
+            return -1;
+        }
+        ret = PyList_Append(acc->large, joined);
+        Py_DECREF(joined);
+        return ret;
+    }
+    return 0;
+}
+
+int
+_PyAccu_Accumulate(_PyAccu *acc, PyObject *unicode)
+{
+    Py_ssize_t nsmall;
+    assert(PyUnicode_Check(unicode));
+
+    if (PyList_Append(acc->small, unicode))
+        return -1;
+    nsmall = PyList_GET_SIZE(acc->small);
+    /* Each item in a list of unicode objects has an overhead (in 64-bit
+     * builds) of:
+     *   - 8 bytes for the list slot
+     *   - 56 bytes for the header of the unicode object
+     * that is, 64 bytes.  100000 such objects waste more than 6MB
+     * compared to a single concatenated string.
+     */
+    if (nsmall < 100000)
+        return 0;
+    return flush_accumulator(acc);
+}
+
+PyObject *
+_PyAccu_FinishAsList(_PyAccu *acc)
+{
+    int ret;
+    PyObject *res;
+
+    ret = flush_accumulator(acc);
+    Py_CLEAR(acc->small);
+    if (ret) {
+        Py_CLEAR(acc->large);
+        return NULL;
+    }
+    res = acc->large;
+    acc->large = NULL;
+    return res;
+}
+
+PyObject *
+_PyAccu_Finish(_PyAccu *acc)
+{
+    PyObject *list, *res;
+    if (acc->large == NULL) {
+        list = acc->small;
+        acc->small = NULL;
+    }
+    else {
+        list = _PyAccu_FinishAsList(acc);
+        if (!list)
+            return NULL;
+    }
+    res = join_list_unicode(list);
+    Py_DECREF(list);
+    return res;
+}
+
+void
+_PyAccu_Destroy(_PyAccu *acc)
+{
+    Py_CLEAR(acc->small);
+    Py_CLEAR(acc->large);
+}
index 36f8b9dc70b9cdd0e2277440317bb67398d168d5..00de597e5686221b25563cbf51d05be87a9bb22f 100644 (file)
@@ -321,70 +321,59 @@ static PyObject *
 list_repr(PyListObject *v)
 {
     Py_ssize_t i;
-    PyObject *s, *temp;
-    PyObject *pieces = NULL, *result = NULL;
+    PyObject *s = NULL;
+    _PyAccu acc;
+    static PyObject *sep = NULL;
+
+    if (Py_SIZE(v) == 0) {
+        return PyUnicode_FromString("[]");
+    }
+
+    if (sep == NULL) {
+        sep = PyUnicode_FromString(", ");
+        if (sep == NULL)
+            return NULL;
+    }
 
     i = Py_ReprEnter((PyObject*)v);
     if (i != 0) {
         return i > 0 ? PyUnicode_FromString("[...]") : NULL;
     }
 
-    if (Py_SIZE(v) == 0) {
-        result = PyUnicode_FromString("[]");
-        goto Done;
-    }
+    if (_PyAccu_Init(&acc))
+        goto error;
 
-    pieces = PyList_New(0);
-    if (pieces == NULL)
-        goto Done;
+    s = PyUnicode_FromString("[");
+    if (s == NULL || _PyAccu_Accumulate(&acc, s))
+        goto error;
+    Py_CLEAR(s);
 
     /* Do repr() on each element.  Note that this may mutate the list,
        so must refetch the list size on each iteration. */
     for (i = 0; i < Py_SIZE(v); ++i) {
-        int status;
         if (Py_EnterRecursiveCall(" while getting the repr of a list"))
-            goto Done;
+            goto error;
         s = PyObject_Repr(v->ob_item[i]);
         Py_LeaveRecursiveCall();
-        if (s == NULL)
-            goto Done;
-        status = PyList_Append(pieces, s);
-        Py_DECREF(s);  /* append created a new ref */
-        if (status < 0)
-            goto Done;
+        if (i > 0 && _PyAccu_Accumulate(&acc, sep))
+            goto error;
+        if (s == NULL || _PyAccu_Accumulate(&acc, s))
+            goto error;
+        Py_CLEAR(s);
     }
+    s = PyUnicode_FromString("]");
+    if (s == NULL || _PyAccu_Accumulate(&acc, s))
+        goto error;
+    Py_CLEAR(s);
 
-    /* Add "[]" decorations to the first and last items. */
-    assert(PyList_GET_SIZE(pieces) > 0);
-    s = PyUnicode_FromString("[");
-    if (s == NULL)
-        goto Done;
-    temp = PyList_GET_ITEM(pieces, 0);
-    PyUnicode_AppendAndDel(&s, temp);
-    PyList_SET_ITEM(pieces, 0, s);
-    if (s == NULL)
-        goto Done;
+    Py_ReprLeave((PyObject *)v);
+    return _PyAccu_Finish(&acc);
 
-    s = PyUnicode_FromString("]");
-    if (s == NULL)
-        goto Done;
-    temp = PyList_GET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1);
-    PyUnicode_AppendAndDel(&temp, s);
-    PyList_SET_ITEM(pieces, PyList_GET_SIZE(pieces) - 1, temp);
-    if (temp == NULL)
-        goto Done;
-
-    /* Paste them all together with ", " between. */
-    s = PyUnicode_FromString(", ");
-    if (s == NULL)
-        goto Done;
-    result = PyUnicode_Join(s, pieces);
-    Py_DECREF(s);
-
-Done:
-    Py_XDECREF(pieces);
+error:
+    _PyAccu_Destroy(&acc);
+    Py_XDECREF(s);
     Py_ReprLeave((PyObject *)v);
-    return result;
+    return NULL;
 }
 
 static Py_ssize_t
index 72b79c917a8667b662a4b339f66b72ba48d26f0b..8aacd121143fdcc4ed7557759a5b40ccd11500c1 100644 (file)
@@ -240,13 +240,20 @@ static PyObject *
 tuplerepr(PyTupleObject *v)
 {
     Py_ssize_t i, n;
-    PyObject *s, *temp;
-    PyObject *pieces, *result = NULL;
+    PyObject *s = NULL;
+    _PyAccu acc;
+    static PyObject *sep = NULL;
 
     n = Py_SIZE(v);
     if (n == 0)
         return PyUnicode_FromString("()");
 
+    if (sep == NULL) {
+        sep = PyUnicode_FromString(", ");
+        if (sep == NULL)
+            return NULL;
+    }
+
     /* While not mutable, it is still possible to end up with a cycle in a
        tuple through an object that stores itself within a tuple (and thus
        infinitely asks for the repr of itself). This should only be
@@ -256,52 +263,42 @@ tuplerepr(PyTupleObject *v)
         return i > 0 ? PyUnicode_FromString("(...)") : NULL;
     }
 
-    pieces = PyTuple_New(n);
-    if (pieces == NULL)
-        return NULL;
+    if (_PyAccu_Init(&acc))
+        goto error;
+
+    s = PyUnicode_FromString("(");
+    if (s == NULL || _PyAccu_Accumulate(&acc, s))
+        goto error;
+    Py_CLEAR(s);
 
     /* Do repr() on each element. */
     for (i = 0; i < n; ++i) {
         if (Py_EnterRecursiveCall(" while getting the repr of a tuple"))
-            goto Done;
+            goto error;
         s = PyObject_Repr(v->ob_item[i]);
         Py_LeaveRecursiveCall();
-        if (s == NULL)
-            goto Done;
-        PyTuple_SET_ITEM(pieces, i, s);
+        if (i > 0 && _PyAccu_Accumulate(&acc, sep))
+            goto error;
+        if (s == NULL || _PyAccu_Accumulate(&acc, s))
+            goto error;
+        Py_CLEAR(s);
     }
+    if (n > 1)
+        s = PyUnicode_FromString(")");
+    else
+        s = PyUnicode_FromString(",)");
+    if (s == NULL || _PyAccu_Accumulate(&acc, s))
+        goto error;
+    Py_CLEAR(s);
 
-    /* Add "()" decorations to the first and last items. */
-    assert(n > 0);
-    s = PyUnicode_FromString("(");
-    if (s == NULL)
-        goto Done;
-    temp = PyTuple_GET_ITEM(pieces, 0);
-    PyUnicode_AppendAndDel(&s, temp);
-    PyTuple_SET_ITEM(pieces, 0, s);
-    if (s == NULL)
-        goto Done;
-
-    s = PyUnicode_FromString(n == 1 ? ",)" : ")");
-    if (s == NULL)
-        goto Done;
-    temp = PyTuple_GET_ITEM(pieces, n-1);
-    PyUnicode_AppendAndDel(&temp, s);
-    PyTuple_SET_ITEM(pieces, n-1, temp);
-    if (temp == NULL)
-        goto Done;
-
-    /* Paste them all together with ", " between. */
-    s = PyUnicode_FromString(", ");
-    if (s == NULL)
-        goto Done;
-    result = PyUnicode_Join(s, pieces);
-    Py_DECREF(s);
-
-Done:
-    Py_DECREF(pieces);
     Py_ReprLeave((PyObject *)v);
-    return result;
+    return _PyAccu_Finish(&acc);
+
+error:
+    _PyAccu_Destroy(&acc);
+    Py_XDECREF(s);
+    Py_ReprLeave((PyObject *)v);
+    return NULL;
 }
 
 /* The addend 82520, was selected from the range(0, 1000000) for
index f8a2c81ec24f3dc91d1941bcd11065bf3a7363a7..2afa3221667408332114846f54640e9b231587c0 100644 (file)
@@ -205,6 +205,10 @@ SOURCE=..\..\Objects\abstract.c
 # End Source File\r
 # Begin Source File\r
 \r
+SOURCE=..\..\Objects\accu.c\r
+# End Source File\r
+# Begin Source File\r
+\r
 SOURCE=..\..\Parser\acceler.c\r
 # End Source File\r
 # Begin Source File\r
index a6f897ae64431d10117f86e830b20c8feefb8ebc..b629df423a287ab95540612d3a7c1778c031c9b5 100644 (file)
                <File\r
                        RelativePath="..\..\Objects\abstract.c">\r
                </File>\r
+               <File\r
+                       RelativePath="..\..\Objects\accu.c">\r
+               </File>\r
                <File\r
                        RelativePath="..\..\Parser\acceler.c">\r
                </File>\r
index 7252bcd603f2c02d14f39518fd7c9451076feca9..5e1ef1ac19ce30f9aa490c72eaf55906f7bc574d 100644 (file)
                                RelativePath="..\..\Include\abstract.h"\r
                                >\r
                        </File>\r
+                       <File\r
+                               RelativePath="..\..\Include\accu.h"\r
+                               >\r
+                       </File>\r
                        <File\r
                                RelativePath="..\..\Include\asdl.h"\r
                                >\r
                                RelativePath="..\..\Objects\abstract.c"\r
                                >\r
                        </File>\r
+                       <File\r
+                               RelativePath="..\..\Objects\accu.c"\r
+                               >\r
+                       </File>\r
                        <File\r
                                RelativePath="..\..\Objects\boolobject.c"\r
                                >\r
index b8187618bba13d369d9829e604469568f1a171c8..f83f6b18954fc40e7c572f777c1f1bc576072377 100644 (file)
                                RelativePath="..\Include\abstract.h"\r
                                >\r
                        </File>\r
+                       <File\r
+                               RelativePath="..\Include\accu.h"\r
+                               >\r
+                       </File>\r
                        <File\r
                                RelativePath="..\Include\asdl.h"\r
                                >\r
                                RelativePath="..\Objects\abstract.c"\r
                                >\r
                        </File>\r
+                       <File\r
+                               RelativePath="..\Objects\accu.c"\r
+                               >\r
+                       </File>\r
                        <File\r
                                RelativePath="..\Objects\boolobject.c"\r
                                >\r