]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
Added q/Q standard (x-platform 8-byte ints) mode in struct module.
authorTim Peters <tim.peters@gmail.com>
Tue, 12 Jun 2001 01:22:22 +0000 (01:22 +0000)
committerTim Peters <tim.peters@gmail.com>
Tue, 12 Jun 2001 01:22:22 +0000 (01:22 +0000)
This completes the q/Q project.

longobject.c _PyLong_AsByteArray:  The original code had a gross bug:
the most-significant Python digit doesn't necessarily have SHIFT
significant bits, and you really need to count how many copies of the sign
bit it has else spurious overflow errors result.

test_struct.py:  This now does exhaustive std q/Q testing at, and on both
sides of, all relevant power-of-2 boundaries, both positive and negative.

NEWS:  Added brief dict news while I was at it.

Doc/lib/libstruct.tex
Lib/test/test_struct.py
Misc/NEWS
Modules/structmodule.c
Objects/longobject.c

index 9a1942da608e148eb133b1367f2c3224e2b040b0..f8056a2af9ed88e43a5c6d6ba74c0caa3d02b151 100644 (file)
@@ -72,7 +72,8 @@ Notes:
 \item[(1)]
   The \character{q} and \character{Q} conversion codes are available in
   native mode only if the platform C compiler supports C \ctype{long long},
-  or, on Windows, \ctype{__int64}.
+  or, on Windows, \ctype{__int64}.  They're always available in standard
+  modes.
 \end{description}
 
 
@@ -100,8 +101,8 @@ passed in to \function{pack()} is too long, the stored representation
 is truncated.  If the string is too short, padding is used to ensure
 that exactly enough bytes are used to satisfy the count.
 
-For the \character{I} and \character{L} format characters, the return
-value is a Python long integer.
+For the \character{I}, \character{L}, \character{q} and \character{Q}
+format characters, the return value is a Python long integer.
 
 For the \character{P} format character, the return value is a Python
 integer or long integer, depending on the size needed to hold a
@@ -139,10 +140,12 @@ Native size and alignment are determined using the C compiler's
 order.
 
 Standard size and alignment are as follows: no alignment is required
-for any type (so you have to use pad bytes); \ctype{short} is 2 bytes;
-\ctype{int} and \ctype{long} are 4 bytes.  \ctype{float} and
-\ctype{double} are 32-bit and 64-bit IEEE floating point numbers,
-respectively.
+for any type (so you have to use pad bytes);
+\ctype{short} is 2 bytes;
+\ctype{int} and \ctype{long} are 4 bytes;
+\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes;
+\ctype{float} and \ctype{double} are 32-bit and 64-bit
+IEEE floating point numbers, respectively.
 
 Note the difference between \character{@} and \character{=}: both use
 native byte order, but the size and alignment of the latter is
index c977913dfcfcd3b25f18659b4863f0929ee8facc..e6c8bb24c69ee49eb94c3905f150c6d1bbff0507 100644 (file)
@@ -12,6 +12,16 @@ def simple_err(func, *args):
             func.__name__, args)
 ##      pdb.set_trace()
 
+def any_err(func, *args):
+    try:
+        apply(func, args)
+    except (struct.error, OverflowError, TypeError):
+        pass
+    else:
+        raise TestFailed, "%s%s did not raise error" % (
+            func.__name__, args)
+##      pdb.set_trace()
+
 simple_err(struct.calcsize, 'Z')
 
 sz = struct.calcsize('i')
@@ -113,7 +123,8 @@ for fmt, arg, big, lil, asy in tests:
             raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % (
                 `fmt`, `res`, `rev`, `arg`)
 
-# Some q/Q sanity checks.
+###########################################################################
+# q/Q tests.
 
 has_native_qQ = 1
 try:
@@ -124,18 +135,22 @@ except struct.error:
 if verbose:
     print "Platform has native q/Q?", has_native_qQ and "Yes." or "No."
 
-simple_err(struct.pack, "Q", -1)   # can't pack -1 as unsigned regardless
+any_err(struct.pack, "Q", -1)   # can't pack -1 as unsigned regardless
 simple_err(struct.pack, "q", "a")  # can't pack string as 'q' regardless
 simple_err(struct.pack, "Q", "a")  # ditto, but 'Q'
 
+def string_reverse(s):
+    chars = list(s)
+    chars.reverse()
+    return "".join(chars)
+
 def bigendian_to_native(value):
     if isbigendian:
         return value
-    chars = list(value)
-    chars.reverse()
-    return "".join(chars)
+    else:
+        return string_reverse(value)
 
-if has_native_qQ:
+def test_native_qQ():
     bytes = struct.calcsize('q')
     # The expected values here are in big-endian format, primarily because
     # I'm on a little-endian machine and so this is the clearest way (for
@@ -156,3 +171,147 @@ if has_native_qQ:
         verify(retrieved == input,
                "%r-unpack of %r gave %r, not %r" %
                     (format, got, retrieved, input))
+
+if has_native_qQ:
+    test_native_qQ()
+
+# Standard q/Q (8 bytes; should work on all platforms).
+
+MIN_Q, MAX_Q = 0, 2L**64 - 1
+MIN_q, MAX_q = -(2L**63), 2L**63 - 1
+
+import binascii
+def test_one_qQ(x, pack=struct.pack,
+                   unpack=struct.unpack,
+                   unhexlify=binascii.unhexlify):
+    if verbose:
+        print "trying std q/Q on", x, "==", hex(x)
+
+    # Try 'q'.
+    if MIN_q <= x <= MAX_q:
+        # Try '>q'.
+        expected = long(x)
+        if x < 0:
+            expected += 1L << 64
+            assert expected > 0
+        expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
+        if len(expected) & 1:
+            expected = "0" + expected
+        expected = unhexlify(expected)
+        expected = "\x00" * (8 - len(expected)) + expected
+
+        # >q pack work?
+        got = pack(">q", x)
+        verify(got == expected,
+               "'>q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # >q unpack work?
+        retrieved = unpack(">q", got)[0]
+        verify(x == retrieved,
+               "'>q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, ">q", '\x01' + got)
+
+        # Try '<q'.
+        expected = string_reverse(expected)
+
+        # <q pack work?
+        got = pack("<q", x)
+        verify(got == expected,
+               "'<q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # <q unpack work?
+        retrieved = unpack("<q", got)[0]
+        verify(x == retrieved,
+               "'<q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, "<q", '\x01' + got)
+
+    else:
+        # x is out of q's range -- verify pack realizes that.
+        any_err(pack, '>q', x)
+        any_err(pack, '<q', x)
+
+    # Much the same for 'Q'.
+    if MIN_Q <= x <= MAX_Q:
+        # Try '>Q'.
+        expected = long(x)
+        expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
+        if len(expected) & 1:
+            expected = "0" + expected
+        expected = unhexlify(expected)
+        expected = "\x00" * (8 - len(expected)) + expected
+
+        # >Q pack work?
+        got = pack(">Q", x)
+        verify(got == expected,
+               "'>Q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # >Q unpack work?
+        retrieved = unpack(">Q", got)[0]
+        verify(x == retrieved,
+               "'>Q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, ">Q", '\x01' + got)
+
+        # Try '<Q'.
+        expected = string_reverse(expected)
+
+        # <Q pack work?
+        got = pack("<Q", x)
+        verify(got == expected,
+               "'<Q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # <Q unpack work?
+        retrieved = unpack("<Q", got)[0]
+        verify(x == retrieved,
+               "'<Q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, "<Q", '\x01' + got)
+
+    else:
+        # x is out of Q's range -- verify pack realizes that.
+        any_err(pack, '>Q', x)
+        any_err(pack, '<Q', x)
+
+def test_std_qQ():
+    from random import randrange
+
+    # Create all interesting powers of 2.
+    values = []
+    for exp in range(70):
+        values.append(1L << exp)
+
+    # Add some random 64-bit values.
+    for i in range(50):
+        val = 0L
+        for j in range(8):
+            val = (val << 8) | randrange(256)
+        values.append(val)
+
+    # Try all those, and their negations, and +-1 from them.  Note
+    # that this tests all power-of-2 boundaries in range, and a few out
+    # of range, plus +-(2**n +- 1).
+    for base in values:
+        for val in -base, base:
+            for incr in -1, 0, 1:
+                x = val + incr
+                try:
+                    x = int(x)
+                except OverflowError:
+                    pass
+                test_one_qQ(x)
+
+test_std_qQ()
index 18a87e9cb215b9fbf8c5262da1b6bb71b585f7a7..16850aac686cebcb55e8497db46ca8a28963706f 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -84,6 +84,9 @@ Core
   sortdict(dict) function for a simple way to display a dict in sorted
   order.
 
+- Many other small changes to dicts were made, resulting in faster
+  operation along the most common code paths.
+
 - Dictionary objects now support the "in" operator: "x in dict" means
   the same as dict.has_key(x).
 
@@ -119,7 +122,7 @@ Core
 
 - Collisions in dicts are resolved via a new approach, which can help
   dramatically in bad cases.  For example, looking up every key in a dict
-  d with d.keys() = [i << 16 for i in range(20000)] is approximately 500x
+  d with d.keys() == [i << 16 for i in range(20000)] is approximately 500x
   faster now.  Thanks to Christian Tismer for pointing out the cause and
   the nature of an effective cure (last December! better late than never).
 
@@ -145,8 +148,8 @@ Library
   native mode, these can be used only when the platform C compiler supports
   these types (when HAVE_LONG_LONG is #define'd by the Python config
   process), and then they inherit the sizes and alignments of the C types.
-  XXX TODO In standard mode, 'q' and 'Q' are supported on all platforms, and
-  XXX TODO are 8-byte integral types.
+  In standard mode, 'q' and 'Q' are supported on all platforms, and are
+  8-byte integral types.
 
 Tests
 
index 9b799781814484bd9fab31bdd7dffb1c8df5768d..4a8886f8be9ac0e132e79187775c677122e36605 100644 (file)
@@ -80,6 +80,34 @@ typedef struct { char c; LONG_LONG x; } s_long_long;
 #pragma options align=reset
 #endif
 
+/* Helper to get a PyLongObject by hook or by crook.  Caller should decref. */
+
+static PyObject *
+get_pylong(PyObject *v)
+{
+       PyNumberMethods *m;
+
+       assert(v != NULL);
+       if (PyInt_Check(v))
+               return PyLong_FromLong(PyInt_AS_LONG(v));
+       if (PyLong_Check(v)) {
+               Py_INCREF(v);
+               return v;
+       }
+       m = v->ob_type->tp_as_number;
+       if (m != NULL && m->nb_long != NULL) {
+               v = m->nb_long(v);
+               if (v == NULL)
+                       return NULL;
+               if (PyLong_Check(v))
+                       return v;
+               Py_DECREF(v);
+       }
+       PyErr_SetString(StructError,
+                       "cannot convert argument to long");
+       return NULL;
+}
+
 /* Helper routine to get a Python integer and raise the appropriate error
    if it isn't one */
 
@@ -123,33 +151,13 @@ static int
 get_longlong(PyObject *v, LONG_LONG *p)
 {
        LONG_LONG x;
-       int v_needs_decref = 0;
 
-       if (PyInt_Check(v)) {
-               x = (LONG_LONG)PyInt_AS_LONG(v);
-               *p = x;
-               return 0;
-       }
-       if (!PyLong_Check(v)) {
-               PyNumberMethods *m = v->ob_type->tp_as_number;
-               if (m != NULL && m->nb_long != NULL) {
-                       v = m->nb_long(v);
-                       if (v == NULL)
-                               return -1;
-                       v_needs_decref = 1;
-               }
-               if (!PyLong_Check(v)) {
-                       PyErr_SetString(StructError,
-                                       "cannot convert argument to long");
-                       if (v_needs_decref)
-                               Py_DECREF(v);
-                       return -1;
-               }
-       }
+       v = get_pylong(v);
+       if (v == NULL)
+               return -1;
        assert(PyLong_Check(v));
        x = PyLong_AsLongLong(v);
-       if (v_needs_decref)
-               Py_DECREF(v);
+       Py_DECREF(v);
        if (x == (LONG_LONG)-1 && PyErr_Occurred())
                return -1;
        *p = x;
@@ -162,39 +170,13 @@ static int
 get_ulonglong(PyObject *v, unsigned LONG_LONG *p)
 {
        unsigned LONG_LONG x;
-       int v_needs_decref = 0;
 
-       if (PyInt_Check(v)) {
-               long i = PyInt_AS_LONG(v);
-               if (i < 0) {
-                       PyErr_SetString(StructError, "can't convert negative "
-                                       "int to unsigned");
-                       return -1;
-               }
-               x = (unsigned LONG_LONG)i;
-               *p = x;
-               return 0;
-       }
-       if (!PyLong_Check(v)) {
-               PyNumberMethods *m = v->ob_type->tp_as_number;
-               if (m != NULL && m->nb_long != NULL) {
-                       v = m->nb_long(v);
-                       if (v == NULL)
-                               return -1;
-                       v_needs_decref = 1;
-               }
-               if (!PyLong_Check(v)) {
-                       PyErr_SetString(StructError,
-                                       "cannot convert argument to long");
-                       if (v_needs_decref)
-                               Py_DECREF(v);
-                       return -1;
-               }
-       }
+       v = get_pylong(v);
+       if (v == NULL)
+               return -1;
        assert(PyLong_Check(v));
        x = PyLong_AsUnsignedLongLong(v);
-       if (v_needs_decref)
-               Py_DECREF(v);
+       Py_DECREF(v);
        if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred())
                return -1;
        *p = x;
@@ -500,7 +482,7 @@ typedef struct _formatdef {
    TYPE is one of char, byte, ubyte, etc.
 */
 
-/* Native mode routines. */
+/* Native mode routines. ****************************************************/
 
 static PyObject *
 nu_char(const char *p, const formatdef *f)
@@ -797,6 +779,8 @@ static formatdef native_table[] = {
        {0}
 };
 
+/* Big-endian routines. *****************************************************/
+
 static PyObject *
 bu_int(const char *p, const formatdef *f)
 {
@@ -825,6 +809,24 @@ bu_uint(const char *p, const formatdef *f)
                return PyInt_FromLong((long)x);
 }
 
+static PyObject *
+bu_longlong(const char *p, const formatdef *f)
+{
+       return _PyLong_FromByteArray((const unsigned char *)p,
+                                     8,
+                                     0, /* little-endian */
+                                     1  /* signed */);
+}
+
+static PyObject *
+bu_ulonglong(const char *p, const formatdef *f)
+{
+       return _PyLong_FromByteArray((const unsigned char *)p,
+                                     8,
+                                     0, /* little-endian */
+                                     0  /* signed */);
+}
+
 static PyObject *
 bu_float(const char *p, const formatdef *f)
 {
@@ -867,6 +869,34 @@ bp_uint(char *p, PyObject *v, const formatdef *f)
        return 0;
 }
 
+static int
+bp_longlong(char *p, PyObject *v, const formatdef *f)
+{
+       int res;
+       v = get_pylong(v);
+       res = _PyLong_AsByteArray((PyLongObject *)v,
+                                 (unsigned char *)p,
+                                 8,
+                                 0, /* little_endian */
+                                 1  /* signed */);
+       Py_DECREF(v);
+       return res;
+}
+
+static int
+bp_ulonglong(char *p, PyObject *v, const formatdef *f)
+{
+       int res;
+       v = get_pylong(v);
+       res = _PyLong_AsByteArray((PyLongObject *)v,
+                                 (unsigned char *)p,
+                                 8,
+                                 0, /* little_endian */
+                                 0  /* signed */);
+       Py_DECREF(v);
+       return res;
+}
+
 static int
 bp_float(char *p, PyObject *v, const formatdef *f)
 {
@@ -904,11 +934,15 @@ static formatdef bigendian_table[] = {
        {'I',   4,              0,              bu_uint,        bp_uint},
        {'l',   4,              0,              bu_int,         bp_int},
        {'L',   4,              0,              bu_uint,        bp_uint},
+       {'q',   8,              0,              bu_longlong,    bp_longlong},
+       {'Q',   8,              0,              bu_ulonglong,   bp_ulonglong},
        {'f',   4,              0,              bu_float,       bp_float},
        {'d',   8,              0,              bu_double,      bp_double},
        {0}
 };
 
+/* Little-endian routines. *****************************************************/
+
 static PyObject *
 lu_int(const char *p, const formatdef *f)
 {
@@ -937,6 +971,24 @@ lu_uint(const char *p, const formatdef *f)
                return PyInt_FromLong((long)x);
 }
 
+static PyObject *
+lu_longlong(const char *p, const formatdef *f)
+{
+       return _PyLong_FromByteArray((const unsigned char *)p,
+                                     8,
+                                     1, /* little-endian */
+                                     1  /* signed */);
+}
+
+static PyObject *
+lu_ulonglong(const char *p, const formatdef *f)
+{
+       return _PyLong_FromByteArray((const unsigned char *)p,
+                                     8,
+                                     1, /* little-endian */
+                                     0  /* signed */);
+}
+
 static PyObject *
 lu_float(const char *p, const formatdef *f)
 {
@@ -979,6 +1031,34 @@ lp_uint(char *p, PyObject *v, const formatdef *f)
        return 0;
 }
 
+static int
+lp_longlong(char *p, PyObject *v, const formatdef *f)
+{
+       int res;
+       v = get_pylong(v);
+       res = _PyLong_AsByteArray((PyLongObject*)v,
+                                 (unsigned char *)p,
+                                 8,
+                                 1, /* little_endian */
+                                 1  /* signed */);
+       Py_DECREF(v);
+       return res;
+}
+
+static int
+lp_ulonglong(char *p, PyObject *v, const formatdef *f)
+{
+       int res;
+       v = get_pylong(v);
+       res = _PyLong_AsByteArray((PyLongObject*)v,
+                                 (unsigned char *)p,
+                                 8,
+                                 1, /* little_endian */
+                                 0  /* signed */);
+       Py_DECREF(v);
+       return res;
+}
+
 static int
 lp_float(char *p, PyObject *v, const formatdef *f)
 {
@@ -1016,6 +1096,8 @@ static formatdef lilendian_table[] = {
        {'I',   4,              0,              lu_uint,        lp_uint},
        {'l',   4,              0,              lu_int,         lp_int},
        {'L',   4,              0,              lu_uint,        lp_uint},
+       {'q',   8,              0,              lu_longlong,    lp_longlong},
+       {'Q',   8,              0,              lu_ulonglong,   lp_ulonglong},
        {'f',   4,              0,              lu_float,       lp_float},
        {'d',   8,              0,              lu_double,      lp_double},
        {0}
index 92f8b046a0198ee33c69e1009fc25755afe42de7..fac8bb648b6673f05ccef0f6f1b5ddf02b48c520 100644 (file)
@@ -364,20 +364,33 @@ _PyLong_AsByteArray(PyLongObject* v,
        accumbits = 0;
        carry = do_twos_comp ? 1 : 0;
        for (i = 0; i < ndigits; ++i) {
+               unsigned int oldaccumbits = accumbits;
                twodigits thisdigit = v->ob_digit[i];
                if (do_twos_comp) {
                        thisdigit = (thisdigit ^ MASK) + carry;
                        carry = thisdigit >> SHIFT;
                        thisdigit &= MASK;
                }
+               if (i < ndigits - 1)
+                       accumbits += SHIFT;
+               else {
+                       /* The most-significant digit may be partly empty. */
+                       twodigits bitmask = 1 << (SHIFT - 1);
+                       twodigits signbit = do_twos_comp << (SHIFT - 1);
+                       unsigned int nsignbits = 0;
+                       while ((thisdigit & bitmask) == signbit && bitmask) {
+                               ++nsignbits;
+                               bitmask >>= 1;
+                               signbit >>= 1;
+                       }
+                       accumbits += SHIFT - nsignbits;
+               }
                /* Because we're going LSB to MSB, thisdigit is more
                   significant than what's already in accum, so needs to be
                   prepended to accum. */
-               accum |= thisdigit << accumbits;
-               accumbits += SHIFT;
+               accum |= thisdigit << oldaccumbits;
                /* Store as many bytes as possible. */
-               assert(accumbits >= 8);
-               do {
+               while (accumbits >= 8) {
                        if (j >= n)
                                goto Overflow;
                        ++j;
@@ -385,13 +398,13 @@ _PyLong_AsByteArray(PyLongObject* v,
                        p += pincr;
                        accumbits -= 8;
                        accum >>= 8;
-               } while (accumbits >= 8);
+               }
        }
 
        /* Store the straggler (if any). */
        assert(accumbits < 8);
        assert(carry == 0);  /* else do_twos_comp and *every* digit was 0 */
-       if (accum) {
+       if (accumbits > 0) {
                if (j >= n)
                        goto Overflow;
                ++j;