]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Issue #2389: Implement a portable mechanism for pickling array objects.
authorAlexandre Vassalotti <alexandre@peadrop.com>
Wed, 15 Jul 2009 17:49:23 +0000 (17:49 +0000)
committerAlexandre Vassalotti <alexandre@peadrop.com>
Wed, 15 Jul 2009 17:49:23 +0000 (17:49 +0000)
Reviewed by: Martin v. Löwis

Lib/test/test_array.py
Misc/NEWS
Modules/arraymodule.c

index 93c6cc0e6a637fdba523e4b5d94df19284134864..d94883c32f6bd4458cb25f906f4de400ac9b6f7d 100755 (executable)
@@ -5,10 +5,16 @@
 
 import unittest
 from test import support
-from weakref import proxy
-import array, io, math
-from pickle import loads, dumps, HIGHEST_PROTOCOL
+import weakref
+import pickle
 import operator
+import io
+import math
+import struct
+
+import array
+from array import _array_reconstructor as array_reconstructor
+
 
 class ArraySubclass(array.array):
     pass
@@ -30,6 +36,123 @@ class BadConstructorTest(unittest.TestCase):
 
 tests.append(BadConstructorTest)
 
+# Machine format codes.
+#
+# Search for "enum machine_format_code" in Modules/arraymodule.c to get the
+# authoritative values.
+UNKNOWN_FORMAT = -1
+UNSIGNED_INT8 = 0
+SIGNED_INT8 = 1
+UNSIGNED_INT16_LE = 2
+UNSIGNED_INT16_BE = 3
+SIGNED_INT16_LE = 4
+SIGNED_INT16_BE = 5
+UNSIGNED_INT32_LE = 6
+UNSIGNED_INT32_BE = 7
+SIGNED_INT32_LE = 8
+SIGNED_INT32_BE = 9
+UNSIGNED_INT64_LE = 10
+UNSIGNED_INT64_BE = 11
+SIGNED_INT64_LE = 12
+SIGNED_INT64_BE = 13
+IEEE_754_FLOAT_LE = 14
+IEEE_754_FLOAT_BE = 15
+IEEE_754_DOUBLE_LE = 16
+IEEE_754_DOUBLE_BE = 17
+UTF16_LE = 18
+UTF16_BE = 19
+UTF32_LE = 20
+UTF32_BE = 21
+
+class ArrayReconstructorTest(unittest.TestCase):
+
+    def test_error(self):
+        self.assertRaises(TypeError, array_reconstructor,
+                          "", "b", 0, b"")
+        self.assertRaises(TypeError, array_reconstructor,
+                          str, "b", 0, b"")
+        self.assertRaises(TypeError, array_reconstructor,
+                          array.array, "b", '', b"")
+        self.assertRaises(TypeError, array_reconstructor,
+                          array.array, "b", 0, "")
+        self.assertRaises(ValueError, array_reconstructor,
+                          array.array, "?", 0, b"")
+        self.assertRaises(ValueError, array_reconstructor,
+                          array.array, "b", UNKNOWN_FORMAT, b"")
+        self.assertRaises(ValueError, array_reconstructor,
+                          array.array, "b", 22, b"")
+        self.assertRaises(ValueError, array_reconstructor,
+                          array.array, "d", 16, b"a")
+
+    def test_numbers(self):
+        testcases = (
+            (['B', 'H', 'I', 'L'], UNSIGNED_INT8, '=BBBB',
+             [0x80, 0x7f, 0, 0xff]),
+            (['b', 'h', 'i', 'l'], SIGNED_INT8, '=bbb',
+             [-0x80, 0x7f, 0]),
+            (['H', 'I', 'L'], UNSIGNED_INT16_LE, '<HHHH',
+             [0x8000, 0x7fff, 0, 0xffff]),
+            (['H', 'I', 'L'], UNSIGNED_INT16_BE, '>HHHH',
+             [0x8000, 0x7fff, 0, 0xffff]),
+            (['h', 'i', 'l'], SIGNED_INT16_LE, '<hhh',
+             [-0x8000, 0x7fff, 0]),
+            (['h', 'i', 'l'], SIGNED_INT16_BE, '>hhh',
+             [-0x8000, 0x7fff, 0]),
+            (['I', 'L'], UNSIGNED_INT32_LE, '<IIII',
+             [1<<31, (1<<31)-1, 0, (1<<32)-1]),
+            (['I', 'L'], UNSIGNED_INT32_BE, '>IIII',
+             [1<<31, (1<<31)-1, 0, (1<<32)-1]),
+            (['i', 'l'], SIGNED_INT32_LE, '<iii',
+             [-1<<31, (1<<31)-1, 0]),
+            (['i', 'l'], SIGNED_INT32_BE, '>iii',
+             [-1<<31, (1<<31)-1, 0]),
+            (['L'], UNSIGNED_INT64_LE, '<QQQQ',
+             [1<<63, (1<<63)-1, 0, (1<<64)-1]),
+            (['L'], UNSIGNED_INT64_BE, '>QQQQ',
+             [1<<63, (1<<63)-1, 0, (1<<64)-1]),
+            (['l'], SIGNED_INT64_LE, '<qqq',
+             [-1<<63, (1<<63)-1, 0]),
+            (['l'], SIGNED_INT64_BE, '>qqq',
+             [-1<<63, (1<<63)-1, 0]),
+            (['f'], IEEE_754_FLOAT_LE, '<ffff',
+             [16711938.0, float('inf'), float('-inf'), -0.0]),
+            (['f'], IEEE_754_FLOAT_BE, '>ffff',
+             [16711938.0, float('inf'), float('-inf'), -0.0]),
+            (['d'], IEEE_754_DOUBLE_LE, '<dddd',
+             [9006104071832581.0, float('inf'), float('-inf'), -0.0]),
+            (['d'], IEEE_754_DOUBLE_BE, '>dddd',
+             [9006104071832581.0, float('inf'), float('-inf'), -0.0])
+        )
+        for testcase in testcases:
+            valid_typecodes, mformat_code, struct_fmt, values = testcase
+            arraystr = struct.pack(struct_fmt, *values)
+            for typecode in valid_typecodes:
+                a = array.array(typecode, values)
+                b = array_reconstructor(
+                    array.array, typecode, mformat_code, arraystr)
+                self.assertEqual(a, b,
+                    msg="{0!r} != {1!r}; testcase={2!r}".format(a, b, testcase))
+
+    def test_unicode(self):
+        teststr = "Bonne Journ\xe9e \U0002030a\U00020347"
+        testcases = (
+            (UTF16_LE, "UTF-16-LE"),
+            (UTF16_BE, "UTF-16-BE"),
+            (UTF32_LE, "UTF-32-LE"),
+            (UTF32_BE, "UTF-32-BE")
+        )
+        for testcase in testcases:
+            mformat_code, encoding = testcase
+            a = array.array('u', teststr)
+            b = array_reconstructor(
+                array.array, 'u', mformat_code, teststr.encode(encoding))
+            self.assertEqual(a, b,
+                msg="{0!r} != {1!r}; testcase={2!r}".format(a, b, testcase))
+
+
+tests.append(ArrayReconstructorTest)
+
+
 class BaseTest(unittest.TestCase):
     # Required class attributes (provided by subclasses
     # typecode: the typecode to test
@@ -97,31 +220,38 @@ class BaseTest(unittest.TestCase):
         self.assertNotEqual(id(a), id(b))
         self.assertEqual(a, b)
 
+    def test_reduce_ex(self):
+        a = array.array(self.typecode, self.example)
+        for protocol in range(3):
+            self.assert_(a.__reduce_ex__(protocol)[0] is array.array)
+        for protocol in range(3, pickle.HIGHEST_PROTOCOL):
+            self.assert_(a.__reduce_ex__(protocol)[0] is array_reconstructor)
+
     def test_pickle(self):
-        for protocol in range(HIGHEST_PROTOCOL + 1):
+        for protocol in range(pickle.HIGHEST_PROTOCOL + 1):
             a = array.array(self.typecode, self.example)
-            b = loads(dumps(a, protocol))
+            b = pickle.loads(pickle.dumps(a, protocol))
             self.assertNotEqual(id(a), id(b))
             self.assertEqual(a, b)
 
             a = ArraySubclass(self.typecode, self.example)
             a.x = 10
-            b = loads(dumps(a, protocol))
+            b = pickle.loads(pickle.dumps(a, protocol))
             self.assertNotEqual(id(a), id(b))
             self.assertEqual(a, b)
             self.assertEqual(a.x, b.x)
             self.assertEqual(type(a), type(b))
 
     def test_pickle_for_empty_array(self):
-        for protocol in range(HIGHEST_PROTOCOL + 1):
+        for protocol in range(pickle.HIGHEST_PROTOCOL + 1):
             a = array.array(self.typecode)
-            b = loads(dumps(a, protocol))
+            b = pickle.loads(pickle.dumps(a, protocol))
             self.assertNotEqual(id(a), id(b))
             self.assertEqual(a, b)
 
             a = ArraySubclass(self.typecode)
             a.x = 10
-            b = loads(dumps(a, protocol))
+            b = pickle.loads(pickle.dumps(a, protocol))
             self.assertNotEqual(id(a), id(b))
             self.assertEqual(a, b)
             self.assertEqual(a.x, b.x)
@@ -757,7 +887,7 @@ class BaseTest(unittest.TestCase):
 
     def test_weakref(self):
         s = array.array(self.typecode, self.example)
-        p = proxy(s)
+        p = weakref.proxy(s)
         self.assertEqual(p.tostring(), s.tostring())
         s = None
         self.assertRaises(ReferenceError, len, p)
index a740b33c7635cf0c20941e5b3521043419f6ccf3..f94ff17a34583f853554eae4471e43231d34919b 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -61,10 +61,12 @@ Library
 Extension Modules
 -----------------
 
-- Issue #4509: array.array objects are no longer modified after an operation
+- Issue #4509: Array objects are no longer modified after an operation
   failing due to the resize restriction in-place when the object has exported
   buffers.
 
+- Issue #2389: Array objects are now pickled in a portable manner.
+
 Build
 -----
 
index b24b4c9f13e4b9b514b83704c7c0f995a0714de0..72b2043f0837c8792fa6feb9423534bd2e51c43c 100644 (file)
@@ -27,6 +27,8 @@ struct arraydescr {
        PyObject * (*getitem)(struct arrayobject *, Py_ssize_t);
        int (*setitem)(struct arrayobject *, Py_ssize_t, PyObject *);
         char *formats;
+       int is_integer_type;
+       int is_signed;
 };
 
 typedef struct arrayobject {
@@ -389,20 +391,24 @@ d_setitem(arrayobject *ap, Py_ssize_t i, PyObject *v)
 }
 
 
-/* Description of types */
+/* Description of types.
+ *
+ * Don't forget to update typecode_to_mformat_code() if you add a new
+ * typecode.
+ */
 static struct arraydescr descriptors[] = {
-       {'b', 1, b_getitem, b_setitem, "b"},
-       {'B', 1, BB_getitem, BB_setitem, "B"},
-       {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u"},
-       {'h', sizeof(short), h_getitem, h_setitem, "h"},
-       {'H', sizeof(short), HH_getitem, HH_setitem, "H"},
-       {'i', sizeof(int), i_getitem, i_setitem, "i"},
-       {'I', sizeof(int), II_getitem, II_setitem, "I"},
-       {'l', sizeof(long), l_getitem, l_setitem, "l"},
-       {'L', sizeof(long), LL_getitem, LL_setitem, "L"},
-       {'f', sizeof(float), f_getitem, f_setitem, "f"},
-       {'d', sizeof(double), d_getitem, d_setitem, "d"},
-       {'\0', 0, 0, 0, 0} /* Sentinel */
+       {'b', 1, b_getitem, b_setitem, "b", 1, 1},
+       {'B', 1, BB_getitem, BB_setitem, "B", 1, 0},
+       {'u', sizeof(Py_UNICODE), u_getitem, u_setitem, "u", 0, 0},
+       {'h', sizeof(short), h_getitem, h_setitem, "h", 1, 1},
+       {'H', sizeof(short), HH_getitem, HH_setitem, "H", 1, 0},
+       {'i', sizeof(int), i_getitem, i_setitem, "i", 1, 1},
+       {'I', sizeof(int), II_getitem, II_setitem, "I", 1, 0},
+       {'l', sizeof(long), l_getitem, l_setitem, "l", 1, 1},
+       {'L', sizeof(long), LL_getitem, LL_setitem, "L", 1, 0},
+       {'f', sizeof(float), f_getitem, f_setitem, "f", 0, 0},
+       {'d', sizeof(double), d_getitem, d_setitem, "d", 0, 0},
+       {'\0', 0, 0, 0, 0, 0, 0} /* Sentinel */
 };
 
 /****************************************************************************
@@ -1135,40 +1141,6 @@ PyDoc_STRVAR(byteswap_doc,
 Byteswap all items of the array.  If the items in the array are not 1, 2,\n\
 4, or 8 bytes in size, RuntimeError is raised.");
 
-static PyObject *
-array_reduce(arrayobject *array)
-{
-       PyObject *dict, *result;
-
-       dict = PyObject_GetAttrString((PyObject *)array, "__dict__");
-       if (dict == NULL) {
-               PyErr_Clear();
-               dict = Py_None;
-               Py_INCREF(dict);
-       }
-       if (Py_SIZE(array) > 0) {
-               if (array->ob_descr->itemsize 
-                               > PY_SSIZE_T_MAX / Py_SIZE(array)) {
-                       return PyErr_NoMemory();
-               }
-               result = Py_BuildValue("O(Cy#)O",
-                       Py_TYPE(array), 
-                       array->ob_descr->typecode,
-                       array->ob_item,
-                       Py_SIZE(array) * array->ob_descr->itemsize,
-                       dict);
-       } else {
-               result = Py_BuildValue("O(C)O",
-                       Py_TYPE(array), 
-                       array->ob_descr->typecode,
-                       dict);
-       }
-       Py_DECREF(dict);
-       return result;
-}
-
-PyDoc_STRVAR(array_doc, "Return state information for pickling.");
-
 static PyObject *
 array_reverse(arrayobject *self, PyObject *unused)
 {
@@ -1475,6 +1447,498 @@ an array of some other type.");
 
 
 
+/*********************** Pickling support ************************/
+
+enum machine_format_code {
+       UNKNOWN_FORMAT = -1,
+       /* UNKNOWN_FORMAT is used to indicate that the machine format for an
+        * array type code cannot be interpreted. When this occurs, a list of
+        * Python objects is used to represent the content of the array
+        * instead of using the memory content of the array directly. In that
+        * case, the array_reconstructor mechanism is bypassed completely, and
+        * the standard array constructor is used instead.
+        *
+        * This is will most likely occur when the machine doesn't use IEEE
+        * floating-point numbers.
+        */
+
+       UNSIGNED_INT8 = 0,
+       SIGNED_INT8 = 1,
+       UNSIGNED_INT16_LE = 2,
+       UNSIGNED_INT16_BE = 3,
+       SIGNED_INT16_LE = 4,
+       SIGNED_INT16_BE = 5,
+       UNSIGNED_INT32_LE = 6,
+       UNSIGNED_INT32_BE = 7,
+       SIGNED_INT32_LE = 8,
+       SIGNED_INT32_BE = 9,
+       UNSIGNED_INT64_LE = 10,
+       UNSIGNED_INT64_BE = 11,
+       SIGNED_INT64_LE = 12,
+       SIGNED_INT64_BE = 13,
+       IEEE_754_FLOAT_LE = 14,
+       IEEE_754_FLOAT_BE = 15,
+       IEEE_754_DOUBLE_LE = 16,
+       IEEE_754_DOUBLE_BE = 17,
+       UTF16_LE = 18,
+       UTF16_BE = 19,
+       UTF32_LE = 20,
+       UTF32_BE = 21
+};
+#define MACHINE_FORMAT_CODE_MIN 0
+#define MACHINE_FORMAT_CODE_MAX 21
+
+static const struct mformatdescr {
+       size_t size;
+       int is_signed;
+       int is_big_endian;
+} mformat_descriptors[] = {
+       {1, 0, 0},              /* 0: UNSIGNED_INT8 */
+       {1, 1, 0},              /* 1: SIGNED_INT8 */
+       {2, 0, 0},              /* 2: UNSIGNED_INT16_LE */
+       {2, 0, 1},              /* 3: UNSIGNED_INT16_BE */
+       {2, 1, 0},              /* 4: SIGNED_INT16_LE */
+       {2, 1, 1},              /* 5: SIGNED_INT16_BE */
+       {4, 0, 0},              /* 6: UNSIGNED_INT32_LE */
+       {4, 0, 1},              /* 7: UNSIGNED_INT32_BE */
+       {4, 1, 0},              /* 8: SIGNED_INT32_LE */
+       {4, 1, 1},              /* 9: SIGNED_INT32_BE */
+       {8, 0, 0},              /* 10: UNSIGNED_INT64_LE */
+       {8, 0, 1},              /* 11: UNSIGNED_INT64_BE */
+       {8, 1, 0},              /* 12: SIGNED_INT64_LE */
+       {8, 1, 1},              /* 13: SIGNED_INT64_BE */
+       {4, 0, 0},              /* 14: IEEE_754_FLOAT_LE */
+       {4, 0, 1},              /* 15: IEEE_754_FLOAT_BE */
+       {8, 0, 0},              /* 16: IEEE_754_DOUBLE_LE */
+       {8, 0, 1},              /* 17: IEEE_754_DOUBLE_BE */
+       {4, 0, 0},              /* 18: UTF16_LE */
+       {4, 0, 1},              /* 19: UTF16_BE */
+       {8, 0, 0},              /* 20: UTF32_LE */
+       {8, 0, 1}               /* 21: UTF32_BE */
+};
+
+
+/*
+ * Internal: This function is used to find the machine format of a given
+ * array type code. This returns UNKNOWN_FORMAT when the machine format cannot
+ * be found.
+ */
+static enum machine_format_code
+typecode_to_mformat_code(int typecode)
+{
+#ifdef BYTEORDER_IS_BIG_ENDIAN
+       const int is_big_endian = 1;
+#else
+       const int is_big_endian = 0;
+#endif
+       size_t intsize;
+       int is_signed;
+
+       switch (typecode) {
+       case 'b':
+               return SIGNED_INT8;
+       case 'B':
+               return UNSIGNED_INT8;
+
+       case 'u':
+               if (sizeof(Py_UNICODE) == 2) {
+                       return UTF16_LE + is_big_endian;
+               }
+               if (sizeof(Py_UNICODE) == 4) {
+                       return UTF32_LE + is_big_endian;
+               }
+               return UNKNOWN_FORMAT;
+
+       case 'f':
+               if (sizeof(float) == 4) {
+                       const float y = 16711938.0;
+                       if (memcmp(&y, "\x4b\x7f\x01\x02", 4) == 0)
+                               return IEEE_754_FLOAT_BE;
+                       if (memcmp(&y, "\x02\x01\x7f\x4b", 4) == 0)
+                               return IEEE_754_FLOAT_LE;
+               }
+               return UNKNOWN_FORMAT;
+
+       case 'd':
+               if (sizeof(double) == 8) {
+                       const double x = 9006104071832581.0;
+                       if (memcmp(&x, "\x43\x3f\xff\x01\x02\x03\x04\x05", 8) == 0)
+                               return IEEE_754_DOUBLE_BE;
+                       if (memcmp(&x, "\x05\x04\x03\x02\x01\xff\x3f\x43", 8) == 0)
+                               return IEEE_754_DOUBLE_LE;
+               }
+               return UNKNOWN_FORMAT;
+
+       /* Integers */
+       case 'h':
+               intsize = sizeof(short);
+               is_signed = 1;
+               break;
+       case 'H':
+               intsize = sizeof(short);
+               is_signed = 0;
+               break;
+       case 'i':
+               intsize = sizeof(int);
+               is_signed = 1;
+               break;
+       case 'I':
+               intsize = sizeof(int);
+               is_signed = 0;
+               break;
+       case 'l':
+               intsize = sizeof(long);
+               is_signed = 1;
+               break;
+       case 'L':
+               intsize = sizeof(long);
+               is_signed = 0;
+               break;
+       default:
+               return UNKNOWN_FORMAT;
+       }
+       switch (intsize) {
+       case 2:
+               return UNSIGNED_INT16_LE + is_big_endian + (2 * is_signed);
+       case 4:
+               return UNSIGNED_INT32_LE + is_big_endian + (2 * is_signed);
+       case 8:
+               return UNSIGNED_INT64_LE + is_big_endian + (2 * is_signed);
+       default:
+               return UNKNOWN_FORMAT;
+       }
+}
+
+/* Forward declaration. */
+static PyObject *array_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
+
+/*
+ * Internal: This function wraps the array constructor--i.e., array_new()--to
+ * allow the creation of array objects from C code without having to deal
+ * directly the tuple argument of array_new(). The typecode argument is a
+ * Unicode character value, like 'i' or 'f' for example, representing an array
+ * type code. The items argument is a bytes or a list object from which
+ * contains the initial value of the array.
+ *  
+ * On success, this functions returns the array object created. Otherwise,
+ * NULL is returned to indicate a failure.
+ */
+static PyObject *
+make_array(PyTypeObject *arraytype, int typecode, PyObject *items)
+{
+       PyObject *new_args;
+       PyObject *array_obj;
+       PyObject *typecode_obj;
+       Py_UNICODE typecode_str[1] = {typecode};
+
+       assert(arraytype != NULL);
+       assert(items != NULL);
+
+       typecode_obj = PyUnicode_FromUnicode(typecode_str, 1);
+       if (typecode_obj == NULL)
+               return NULL;
+
+       new_args = PyTuple_New(2);
+       if (new_args == NULL)
+               return NULL;
+       Py_INCREF(items);
+       PyTuple_SET_ITEM(new_args, 0, typecode_obj);
+       PyTuple_SET_ITEM(new_args, 1, items);
+
+       array_obj = array_new(arraytype, new_args, NULL);
+       Py_DECREF(new_args);
+       if (array_obj == NULL)
+               return NULL;
+
+       return array_obj;
+}
+
+/*
+ * This functions is a special constructor used when unpickling an array. It
+ * provides a portable way to rebuild an array from its memory representation.
+ */
+static PyObject *
+array_reconstructor(PyObject *self, PyObject *args)
+{
+       PyTypeObject *arraytype;
+       PyObject *items;
+       PyObject *converted_items;
+       PyObject *result;
+       int typecode;
+       enum machine_format_code mformat_code;
+       struct arraydescr *descr;
+
+       if (!PyArg_ParseTuple(args, "OCiO:array._array_reconstructor",
+                       &arraytype, &typecode, &mformat_code, &items))
+               return NULL;
+
+       if (!PyType_Check(arraytype)) {
+               PyErr_Format(PyExc_TypeError,
+                       "first argument must a type object, not %.200s",
+                       Py_TYPE(arraytype)->tp_name);
+               return NULL;
+       }
+       if (!PyType_IsSubtype(arraytype, &Arraytype)) {
+               PyErr_Format(PyExc_TypeError,
+                       "%.200s is not a subtype of %.200s",
+                       arraytype->tp_name, Arraytype.tp_name);
+               return NULL;
+       }
+       for (descr = descriptors; descr->typecode != '\0'; descr++) {
+               if (descr->typecode == typecode)
+                       break;
+       }
+       if (descr->typecode == '\0') {
+               PyErr_SetString(PyExc_ValueError,
+                               "second argument must be a valid type code");
+               return NULL;
+       }
+       if (mformat_code < MACHINE_FORMAT_CODE_MIN ||
+           mformat_code > MACHINE_FORMAT_CODE_MAX) {
+               PyErr_SetString(PyExc_ValueError,
+                       "third argument must be a valid machine format code.");
+               return NULL;
+       }
+       if (!PyBytes_Check(items)) {
+               PyErr_Format(PyExc_TypeError,
+                       "fourth argument should be bytes, not %.200s",
+                       Py_TYPE(items)->tp_name);
+               return NULL;
+       }
+
+       /* Fast path: No decoding has to be done. */
+       if (mformat_code == typecode_to_mformat_code(typecode) ||
+           mformat_code == UNKNOWN_FORMAT) {
+               return make_array(arraytype, typecode, items);
+       }
+
+       /* Slow path: Decode the byte string according to the given machine
+        * format code. This occurs when the computer unpickling the array 
+        * object is architecturally different from the one that pickled the
+        * array.
+        */
+       if (Py_SIZE(items) % mformat_descriptors[mformat_code].size != 0) {
+               PyErr_SetString(PyExc_ValueError,
+                               "string length not a multiple of item size");
+               return NULL;
+       }
+       switch (mformat_code) {
+       case IEEE_754_FLOAT_LE:
+       case IEEE_754_FLOAT_BE: {
+               int i;
+               int le = (mformat_code == IEEE_754_FLOAT_LE) ? 1 : 0;
+               Py_ssize_t itemcount = Py_SIZE(items) / 4;
+               const unsigned char *memstr =
+                       (unsigned char *)PyBytes_AS_STRING(items);
+
+               converted_items = PyList_New(itemcount);
+               if (converted_items == NULL)
+                       return NULL;
+               for (i = 0; i < itemcount; i++) {
+                       PyObject *pyfloat = PyFloat_FromDouble(
+                               _PyFloat_Unpack4(&memstr[i * 4], le));
+                       if (pyfloat == NULL) {
+                               Py_DECREF(converted_items);
+                               return NULL;
+                       }
+                       PyList_SET_ITEM(converted_items, i, pyfloat);
+               }
+               break;
+       }
+       case IEEE_754_DOUBLE_LE:
+       case IEEE_754_DOUBLE_BE: {
+               int i;
+               int le = (mformat_code == IEEE_754_DOUBLE_LE) ? 1 : 0;
+               Py_ssize_t itemcount = Py_SIZE(items) / 8;
+               const unsigned char *memstr =
+                       (unsigned char *)PyBytes_AS_STRING(items);
+
+               converted_items = PyList_New(itemcount);
+               if (converted_items == NULL)
+                       return NULL;
+               for (i = 0; i < itemcount; i++) {
+                       PyObject *pyfloat = PyFloat_FromDouble(
+                               _PyFloat_Unpack8(&memstr[i * 8], le));
+                       if (pyfloat == NULL) {
+                               Py_DECREF(converted_items);
+                               return NULL;
+                       }
+                       PyList_SET_ITEM(converted_items, i, pyfloat);
+               }
+               break;
+       }
+       case UTF16_LE:
+       case UTF16_BE: {
+               int byteorder = (mformat_code == UTF16_LE) ? -1 : 1;
+               converted_items = PyUnicode_DecodeUTF16(
+                       PyBytes_AS_STRING(items), Py_SIZE(items),
+                       "strict", &byteorder);
+               if (converted_items == NULL)
+                       return NULL;
+               break;
+       }
+       case UTF32_LE:
+       case UTF32_BE: {
+               int byteorder = (mformat_code == UTF32_LE) ? -1 : 1;
+               converted_items = PyUnicode_DecodeUTF32(
+                       PyBytes_AS_STRING(items), Py_SIZE(items),
+                       "strict", &byteorder);
+               if (converted_items == NULL)
+                       return NULL;
+               break;
+       }
+
+       case UNSIGNED_INT8:
+       case SIGNED_INT8:
+       case UNSIGNED_INT16_LE:
+       case UNSIGNED_INT16_BE:
+       case SIGNED_INT16_LE:
+       case SIGNED_INT16_BE:
+       case UNSIGNED_INT32_LE:
+       case UNSIGNED_INT32_BE:
+       case SIGNED_INT32_LE:
+       case SIGNED_INT32_BE:
+       case UNSIGNED_INT64_LE:
+       case UNSIGNED_INT64_BE:
+       case SIGNED_INT64_LE:
+       case SIGNED_INT64_BE: {
+               int i;
+               const struct mformatdescr mf_descr =
+                       mformat_descriptors[mformat_code];
+               Py_ssize_t itemcount = Py_SIZE(items) / mf_descr.size;
+               const unsigned char *memstr =
+                       (unsigned char *)PyBytes_AS_STRING(items);
+               struct arraydescr *descr;
+
+               /* If possible, try to pack array's items using a data type
+                * that fits better. This may result in an array with narrower
+                * or wider elements.
+                *
+                * For example, if a 32-bit machine pickles a L-code array of
+                * unsigned longs, then the array will be unpickled by 64-bit
+                * machine as an I-code array of unsigned ints.
+                *
+                * XXX: Is it possible to write a unit test for this?
+                */
+               for (descr = descriptors; descr->typecode != '\0'; descr++) {
+                       if (descr->is_integer_type &&
+                           descr->itemsize == mf_descr.size &&
+                           descr->is_signed == mf_descr.is_signed)
+                               typecode = descr->typecode;
+               }
+
+               converted_items = PyList_New(itemcount);
+               if (converted_items == NULL)
+                       return NULL;
+               for (i = 0; i < itemcount; i++) {
+                       PyObject *pylong;
+
+                       pylong = _PyLong_FromByteArray(
+                               &memstr[i * mf_descr.size],
+                               mf_descr.size,
+                               !mf_descr.is_big_endian,
+                               mf_descr.is_signed);
+                       if (pylong == NULL) {
+                               Py_DECREF(converted_items);
+                               return NULL;
+                       }
+                       PyList_SET_ITEM(converted_items, i, pylong);
+               }
+               break;
+       }
+       case UNKNOWN_FORMAT:
+               /* Impossible, but needed to shut up GCC about the unhandled
+                * enumeration value.
+                */
+               return NULL;
+       }
+
+       result = make_array(arraytype, typecode, converted_items);
+       Py_DECREF(converted_items);
+       return result;
+}
+
+static PyObject *
+array_reduce_ex(arrayobject *array, PyObject *value)
+{
+       PyObject *dict;
+       PyObject *result;
+       PyObject *array_str;
+       int typecode = array->ob_descr->typecode;
+       int mformat_code;
+       static PyObject *array_reconstructor = NULL;
+       long protocol;
+
+       if (array_reconstructor == NULL) {
+               PyObject *array_module = PyImport_ImportModule("array");
+               if (array_module == NULL)
+                       return NULL;
+               array_reconstructor = PyObject_GetAttrString(
+                       array_module,
+                       "_array_reconstructor");
+               Py_DECREF(array_module);
+               if (array_reconstructor == NULL)
+                       return NULL;
+       }
+
+       if (!PyLong_Check(value)) {
+               PyErr_SetString(PyExc_TypeError,
+                               "__reduce_ex__ argument should an integer");
+               return NULL;
+       }
+       protocol = PyLong_AsLong(value);
+       if (protocol == -1 && PyErr_Occurred())
+               return NULL;
+
+       dict = PyObject_GetAttrString((PyObject *)array, "__dict__");
+       if (dict == NULL) {
+               if (!PyErr_ExceptionMatches(PyExc_AttributeError))
+                       return NULL;
+               PyErr_Clear();
+               dict = Py_None;
+               Py_INCREF(dict);
+       }
+
+       mformat_code = typecode_to_mformat_code(typecode);
+       if (mformat_code == UNKNOWN_FORMAT || protocol < 3) {
+               /* Convert the array to a list if we got something weird
+                * (e.g., non-IEEE floats), or we are pickling the array using
+                * a Python 2.x compatible protocol.
+                *
+                * It is necessary to a list representation for Python 2.x
+                * compatible pickle protocol, since Python 2's str objects
+                * are unpickled as unicode by Python 3. Thus it is impossible
+                * to make arrays unpicklable by Python 3 by using their memory
+                * representation, unless we resort to ugly hacks such as
+                * coercing unicode objects to bytes in array_reconstructor.
+                */
+               PyObject *list;
+               list = array_tolist(array, NULL);
+               if (list == NULL) {
+                       Py_DECREF(dict);
+                       return NULL;
+               }
+               result = Py_BuildValue(
+                       "O(CO)O", Py_TYPE(array), typecode, list, dict);
+               Py_DECREF(list);
+               Py_DECREF(dict);
+               return result;
+       }
+
+       array_str = array_tostring(array, NULL);
+       if (array_str == NULL) {
+               Py_DECREF(dict);
+               return NULL;
+       }
+       result = Py_BuildValue(
+               "O(OCiN)O", array_reconstructor, Py_TYPE(array), typecode,
+               mformat_code, array_str, dict);
+       Py_DECREF(dict);
+       return result;
+}
+
+PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
+
 static PyObject *
 array_get_typecode(arrayobject *a, void *closure)
 {
@@ -1525,8 +1989,8 @@ static PyMethodDef array_methods[] = {
         insert_doc},
        {"pop",         (PyCFunction)array_pop,         METH_VARARGS,
         pop_doc},
-       {"__reduce__",  (PyCFunction)array_reduce,      METH_NOARGS,
-        array_doc},
+       {"__reduce_ex__", (PyCFunction)array_reduce_ex, METH_O,
+        reduce_doc},
        {"remove",      (PyCFunction)array_remove,      METH_O,
         remove_doc},
        {"reverse",     (PyCFunction)array_reverse,     METH_NOARGS,
@@ -2167,6 +2631,8 @@ static PyTypeObject PyArrayIter_Type = {
 
 /* No functions in array module. */
 static PyMethodDef a_methods[] = {
+    {"_array_reconstructor", array_reconstructor, METH_VARARGS,
+     PyDoc_STR("Internal. Used for pickling support.")},
     {NULL, NULL, 0, NULL}        /* Sentinel */
 };