]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Issue #17804: New function ``struct.iter_unpack`` allows for streaming struct unpacking.
authorAntoine Pitrou <solipsis@pitrou.net>
Fri, 26 Apr 2013 22:20:04 +0000 (00:20 +0200)
committerAntoine Pitrou <solipsis@pitrou.net>
Fri, 26 Apr 2013 22:20:04 +0000 (00:20 +0200)
Doc/library/struct.rst
Lib/struct.py
Lib/test/test_struct.py
Misc/NEWS
Modules/_struct.c

index 994506c2fe70e1f7609af1c9d6a8054456e6f5bc..f2ea3619294d263b9a34f49ffa23116a99cda095 100644 (file)
@@ -66,6 +66,19 @@ The module defines the following exception and functions:
    format (``len(buffer[offset:])`` must be at least ``calcsize(fmt)``).
 
 
+.. function:: iter_unpack(fmt, buffer)
+
+   Iteratively unpack from the buffer *buffer* according to the format
+   string *fmt*.  This function returns an iterator which will read
+   equally-sized chunks from the buffer until all its contents have been
+   consumed.  The buffer's size in bytes must be a multiple of the amount
+   of data required by the format, as reflected by :func:`calcsize`.
+
+   Each iteration yields a tuple as specified by the format string.
+
+   .. versionadded:: 3.4
+
+
 .. function:: calcsize(fmt)
 
    Return the size of the struct (and hence of the bytes object produced by
@@ -388,6 +401,13 @@ The :mod:`struct` module also defines the following type:
       (``len(buffer[offset:])`` must be at least :attr:`self.size`).
 
 
+   .. method:: iter_unpack(buffer)
+
+      Identical to the :func:`iter_unpack` function, using the compiled format.
+      (``len(buffer)`` must be a multiple of :attr:`self.size`).
+
+      .. versionadded:: 3.4
+
    .. attribute:: format
 
       The format string used to construct this Struct object.
index 9bfc23f8d5b75b10bb3bf9705ddb7272da14bd85..d6bba58863649898a3f075cdda51cade9dc07f06 100644 (file)
@@ -1,6 +1,7 @@
 __all__ = [
     # Functions
     'calcsize', 'pack', 'pack_into', 'unpack', 'unpack_from',
+    'iter_unpack',
 
     # Classes
     'Struct',
index eb97a2cc7a8a226e5ec4f6b915bffbc932319318..8ffa7e6144a580a4c8c1e7387ac273f30a990398 100644 (file)
@@ -1,4 +1,6 @@
+from collections import abc
 import array
+import operator
 import unittest
 import struct
 import sys
@@ -593,8 +595,78 @@ class StructTest(unittest.TestCase):
         self.check_sizeof('0s', 1)
         self.check_sizeof('0c', 0)
 
+
+class UnpackIteratorTest(unittest.TestCase):
+    """
+    Tests for iterative unpacking (struct.Struct.iter_unpack).
+    """
+
+    def test_construct(self):
+        def _check_iterator(it):
+            self.assertIsInstance(it, abc.Iterator)
+            self.assertIsInstance(it, abc.Iterable)
+        s = struct.Struct('>ibcp')
+        it = s.iter_unpack(b"")
+        _check_iterator(it)
+        it = s.iter_unpack(b"1234567")
+        _check_iterator(it)
+        # Wrong bytes length
+        with self.assertRaises(struct.error):
+            s.iter_unpack(b"123456")
+        with self.assertRaises(struct.error):
+            s.iter_unpack(b"12345678")
+        # Zero-length struct
+        s = struct.Struct('>')
+        with self.assertRaises(struct.error):
+            s.iter_unpack(b"")
+        with self.assertRaises(struct.error):
+            s.iter_unpack(b"12")
+
+    def test_iterate(self):
+        s = struct.Struct('>IB')
+        b = bytes(range(1, 16))
+        it = s.iter_unpack(b)
+        self.assertEqual(next(it), (0x01020304, 5))
+        self.assertEqual(next(it), (0x06070809, 10))
+        self.assertEqual(next(it), (0x0b0c0d0e, 15))
+        self.assertRaises(StopIteration, next, it)
+        self.assertRaises(StopIteration, next, it)
+
+    def test_arbitrary_buffer(self):
+        s = struct.Struct('>IB')
+        b = bytes(range(1, 11))
+        it = s.iter_unpack(memoryview(b))
+        self.assertEqual(next(it), (0x01020304, 5))
+        self.assertEqual(next(it), (0x06070809, 10))
+        self.assertRaises(StopIteration, next, it)
+        self.assertRaises(StopIteration, next, it)
+
+    def test_length_hint(self):
+        lh = operator.length_hint
+        s = struct.Struct('>IB')
+        b = bytes(range(1, 16))
+        it = s.iter_unpack(b)
+        self.assertEqual(lh(it), 3)
+        next(it)
+        self.assertEqual(lh(it), 2)
+        next(it)
+        self.assertEqual(lh(it), 1)
+        next(it)
+        self.assertEqual(lh(it), 0)
+        self.assertRaises(StopIteration, next, it)
+        self.assertEqual(lh(it), 0)
+
+    def test_module_func(self):
+        # Sanity check for the global struct.iter_unpack()
+        it = struct.iter_unpack('>IB', bytes(range(1, 11)))
+        self.assertEqual(next(it), (0x01020304, 5))
+        self.assertEqual(next(it), (0x06070809, 10))
+        self.assertRaises(StopIteration, next, it)
+        self.assertRaises(StopIteration, next, it)
+
+
 def test_main():
-    support.run_unittest(StructTest)
+    support.run_unittest(__name__)
 
 if __name__ == '__main__':
     test_main()
index 273f11c92bae14e78c3a0cf8f299a665a22d259f..5c1016df62eec6ce07d6aa5c94bdd2ee72767e81 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -49,6 +49,9 @@ Core and Builtins
 Library
 -------
 
+- Issue #17804: New function ``struct.iter_unpack`` allows for streaming
+  struct unpacking.
+
 - Issue #17830: When keyword.py is used to update a keyword file, it now
   preserves the line endings of the original file.
 
index 208559cf1c2c1ea32d39dbe861be34e0055dc1a2..2dec4ed6d74b24bc3a6ca78c8c22a7887b833a6f 100644 (file)
@@ -1247,6 +1247,9 @@ align(Py_ssize_t size, char c, const formatdef *e)
     return size;
 }
 
+/*
+ * Struct object implementation.
+ */
 
 /* calculate the size of a format string */
 
@@ -1556,6 +1559,142 @@ s_unpack_from(PyObject *self, PyObject *args, PyObject *kwds)
 }
 
 
+/* Unpack iterator type */
+
+typedef struct {
+    PyObject_HEAD
+    PyStructObject *so;
+    Py_buffer buf;
+    Py_ssize_t index;
+} unpackiterobject;
+
+static void
+unpackiter_dealloc(unpackiterobject *self)
+{
+    Py_XDECREF(self->so);
+    PyBuffer_Release(&self->buf);
+    PyObject_GC_Del(self);
+}
+
+static int
+unpackiter_traverse(unpackiterobject *self, visitproc visit, void *arg)
+{
+    Py_VISIT(self->so);
+    Py_VISIT(self->buf.obj);
+    return 0;
+}
+
+static PyObject *
+unpackiter_len(unpackiterobject *self)
+{
+    Py_ssize_t len;
+    if (self->so == NULL)
+        len = 0;
+    else
+        len = (self->buf.len - self->index) / self->so->s_size;
+    return PyLong_FromSsize_t(len);
+}
+
+static PyMethodDef unpackiter_methods[] = {
+    {"__length_hint__", (PyCFunction) unpackiter_len, METH_NOARGS, NULL},
+    {NULL,              NULL}           /* sentinel */
+};
+
+static PyObject *
+unpackiter_iternext(unpackiterobject *self)
+{
+    PyObject *result;
+    if (self->so == NULL)
+        return NULL;
+    if (self->index >= self->buf.len) {
+        /* Iterator exhausted */
+        Py_CLEAR(self->so);
+        PyBuffer_Release(&self->buf);
+        return NULL;
+    }
+    assert(self->index + self->so->s_size <= self->buf.len);
+    result = s_unpack_internal(self->so,
+                               (char*) self->buf.buf + self->index);
+    self->index += self->so->s_size;
+    return result;
+}
+
+PyTypeObject unpackiter_type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "unpack_iterator",                          /* tp_name */
+    sizeof(unpackiterobject),                   /* tp_basicsize */
+    0,                                          /* tp_itemsize */
+    (destructor)unpackiter_dealloc,             /* tp_dealloc */
+    0,                                          /* tp_print */
+    0,                                          /* tp_getattr */
+    0,                                          /* tp_setattr */
+    0,                                          /* tp_reserved */
+    0,                                          /* tp_repr */
+    0,                                          /* tp_as_number */
+    0,                                          /* tp_as_sequence */
+    0,                                          /* tp_as_mapping */
+    0,                                          /* tp_hash */
+    0,                                          /* tp_call */
+    0,                                          /* tp_str */
+    PyObject_GenericGetAttr,                    /* tp_getattro */
+    0,                                          /* tp_setattro */
+    0,                                          /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
+    0,                                          /* tp_doc */
+    (traverseproc)unpackiter_traverse,          /* tp_traverse */
+    0,                                          /* tp_clear */
+    0,                                          /* tp_richcompare */
+    0,                                          /* tp_weaklistoffset */
+    PyObject_SelfIter,                          /* tp_iter */
+    (iternextfunc)unpackiter_iternext,          /* tp_iternext */
+    unpackiter_methods                          /* tp_methods */
+};
+
+PyDoc_STRVAR(s_iter_unpack__doc__,
+"S.iter_unpack(buffer) -> iterator(v1, v2, ...)\n\
+\n\
+Return an iterator yielding tuples unpacked from the given bytes\n\
+source, like a repeated invocation of unpack_from().  Requires\n\
+that the bytes length be a multiple of the struct size.");
+
+static PyObject *
+s_iter_unpack(PyObject *_so, PyObject *input)
+{
+    PyStructObject *so = (PyStructObject *) _so;
+    unpackiterobject *self;
+
+    assert(PyStruct_Check(_so));
+    assert(so->s_codes != NULL);
+
+    if (so->s_size == 0) {
+        PyErr_Format(StructError,
+                     "cannot iteratively unpack with a struct of length 0");
+        return NULL;
+    }
+
+    self = (unpackiterobject *) PyType_GenericAlloc(&unpackiter_type, 0);
+    if (self == NULL)
+        return NULL;
+
+    if (PyObject_GetBuffer(input, &self->buf, PyBUF_SIMPLE) < 0) {
+        Py_DECREF(self);
+        return NULL;
+    }
+    if (self->buf.len % so->s_size != 0) {
+        PyErr_Format(StructError,
+                     "iterative unpacking requires a bytes length "
+                     "multiple of %zd",
+                     so->s_size);
+        Py_DECREF(self);
+        return NULL;
+    }
+    Py_INCREF(so);
+    self->so = so;
+    self->index = 0;
+    return (PyObject *) self;
+}
+
+
 /*
  * Guts of the pack function.
  *
@@ -1776,6 +1915,7 @@ s_sizeof(PyStructObject *self, void *unused)
 /* List of functions */
 
 static struct PyMethodDef s_methods[] = {
+    {"iter_unpack",     s_iter_unpack,  METH_O, s_iter_unpack__doc__},
     {"pack",            s_pack,         METH_VARARGS, s_pack__doc__},
     {"pack_into",       s_pack_into,    METH_VARARGS, s_pack_into__doc__},
     {"unpack",          s_unpack,       METH_O, s_unpack__doc__},
@@ -2025,9 +2165,34 @@ unpack_from(PyObject *self, PyObject *args, PyObject *kwds)
     return result;
 }
 
+PyDoc_STRVAR(iter_unpack_doc,
+"iter_unpack(fmt, buffer) -> iterator(v1, v2, ...)\n\
+\n\
+Return an iterator yielding tuples unpacked from the given bytes\n\
+source according to the format string, like a repeated invocation of\n\
+unpack_from().  Requires that the bytes length be a multiple of the\n\
+format struct size.");
+
+static PyObject *
+iter_unpack(PyObject *self, PyObject *args)
+{
+    PyObject *s_object, *fmt, *input, *result;
+
+    if (!PyArg_ParseTuple(args, "OO:iter_unpack", &fmt, &input))
+        return NULL;
+
+    s_object = cache_struct(fmt);
+    if (s_object == NULL)
+        return NULL;
+    result = s_iter_unpack(s_object, input);
+    Py_DECREF(s_object);
+    return result;
+}
+
 static struct PyMethodDef module_functions[] = {
     {"_clearcache",     (PyCFunction)clearcache,        METH_NOARGS,    clearcache_doc},
     {"calcsize",        calcsize,       METH_O, calcsize_doc},
+    {"iter_unpack",     iter_unpack,    METH_VARARGS,   iter_unpack_doc},
     {"pack",            pack,           METH_VARARGS,   pack_doc},
     {"pack_into",       pack_into,      METH_VARARGS,   pack_into_doc},
     {"unpack",          unpack, METH_VARARGS,   unpack_doc},