Added q/Q standard (x-platform 8-byte ints) mode in struct module.

author Tim Peters <tim.peters@gmail.com>

Tue, 12 Jun 2001 01:22:22 +0000 (01:22 +0000)

committer Tim Peters <tim.peters@gmail.com>

Tue, 12 Jun 2001 01:22:22 +0000 (01:22 +0000)
author Tim Peters <tim.peters@gmail.com>
Tue, 12 Jun 2001 01:22:22 +0000 (01:22 +0000)
committer Tim Peters <tim.peters@gmail.com>
Tue, 12 Jun 2001 01:22:22 +0000 (01:22 +0000)
diff --git a/Doc/lib/libstruct.tex b/Doc/lib/libstruct.tex

index 9a1942da608e148eb133b1367f2c3224e2b040b0..f8056a2af9ed88e43a5c6d6ba74c0caa3d02b151 100644 (file)
--- a/Doc/lib/libstruct.tex
+++ b/Doc/lib/libstruct.tex
@@ -72,7 +72,8 @@ Notes:
  \item[(1)]
    The \character{q} and \character{Q} conversion codes are available in
    native mode only if the platform C compiler supports C \ctype{long long},
-  or, on Windows, \ctype{__int64}.
+  or, on Windows, \ctype{__int64}.  They're always available in standard
+  modes.
  \end{description}
  
  
@@ -100,8 +101,8 @@ passed in to \function{pack()} is too long, the stored representation
  is truncated.  If the string is too short, padding is used to ensure
  that exactly enough bytes are used to satisfy the count.
  
-For the \character{I} and \character{L} format characters, the return
-value is a Python long integer.
+For the \character{I}, \character{L}, \character{q} and \character{Q}
+format characters, the return value is a Python long integer.
  
  For the \character{P} format character, the return value is a Python
  integer or long integer, depending on the size needed to hold a
@@ -139,10 +140,12 @@ Native size and alignment are determined using the C compiler's
  order.
  
  Standard size and alignment are as follows: no alignment is required
-for any type (so you have to use pad bytes); \ctype{short} is 2 bytes;
-\ctype{int} and \ctype{long} are 4 bytes.  \ctype{float} and
-\ctype{double} are 32-bit and 64-bit IEEE floating point numbers,
-respectively.
+for any type (so you have to use pad bytes);
+\ctype{short} is 2 bytes;
+\ctype{int} and \ctype{long} are 4 bytes;
+\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes;
+\ctype{float} and \ctype{double} are 32-bit and 64-bit
+IEEE floating point numbers, respectively.
  
  Note the difference between \character{@} and \character{=}: both use
  native byte order, but the size and alignment of the latter is
diff --git a/Lib/test/test_struct.py b/Lib/test/test_struct.py

index c977913dfcfcd3b25f18659b4863f0929ee8facc..e6c8bb24c69ee49eb94c3905f150c6d1bbff0507 100644 (file)
--- a/Lib/test/test_struct.py
+++ b/Lib/test/test_struct.py
@@ -12,6 +12,16 @@ def simple_err(func, *args):
              func.__name__, args)
  ##      pdb.set_trace()
  
+def any_err(func, *args):
+    try:
+        apply(func, args)
+    except (struct.error, OverflowError, TypeError):
+        pass
+    else:
+        raise TestFailed, "%s%s did not raise error" % (
+            func.__name__, args)
+##      pdb.set_trace()
+
  simple_err(struct.calcsize, 'Z')
  
  sz = struct.calcsize('i')
@@ -113,7 +123,8 @@ for fmt, arg, big, lil, asy in tests:
              raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % (
                  `fmt`, `res`, `rev`, `arg`)
  
-# Some q/Q sanity checks.
+###########################################################################
+# q/Q tests.
  
  has_native_qQ = 1
  try:
@@ -124,18 +135,22 @@ except struct.error:
  if verbose:
      print "Platform has native q/Q?", has_native_qQ and "Yes." or "No."
  
-simple_err(struct.pack, "Q", -1)   # can't pack -1 as unsigned regardless
+any_err(struct.pack, "Q", -1)   # can't pack -1 as unsigned regardless
  simple_err(struct.pack, "q", "a")  # can't pack string as 'q' regardless
  simple_err(struct.pack, "Q", "a")  # ditto, but 'Q'
  
+def string_reverse(s):
+    chars = list(s)
+    chars.reverse()
+    return "".join(chars)
+
  def bigendian_to_native(value):
      if isbigendian:
          return value
-    chars = list(value)
-    chars.reverse()
-    return "".join(chars)
+    else:
+        return string_reverse(value)
  
-if has_native_qQ:
+def test_native_qQ():
      bytes = struct.calcsize('q')
      # The expected values here are in big-endian format, primarily because
      # I'm on a little-endian machine and so this is the clearest way (for
@@ -156,3 +171,147 @@ if has_native_qQ:
          verify(retrieved == input,
                 "%r-unpack of %r gave %r, not %r" %
                      (format, got, retrieved, input))
+
+if has_native_qQ:
+    test_native_qQ()
+
+# Standard q/Q (8 bytes; should work on all platforms).
+
+MIN_Q, MAX_Q = 0, 2L**64 - 1
+MIN_q, MAX_q = -(2L**63), 2L**63 - 1
+
+import binascii
+def test_one_qQ(x, pack=struct.pack,
+                   unpack=struct.unpack,
+                   unhexlify=binascii.unhexlify):
+    if verbose:
+        print "trying std q/Q on", x, "==", hex(x)
+
+    # Try 'q'.
+    if MIN_q <= x <= MAX_q:
+        # Try '>q'.
+        expected = long(x)
+        if x < 0:
+            expected += 1L << 64
+            assert expected > 0
+        expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
+        if len(expected) & 1:
+            expected = "0" + expected
+        expected = unhexlify(expected)
+        expected = "\x00" * (8 - len(expected)) + expected
+
+        # >q pack work?
+        got = pack(">q", x)
+        verify(got == expected,
+               "'>q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # >q unpack work?
+        retrieved = unpack(">q", got)[0]
+        verify(x == retrieved,
+               "'>q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, ">q", '\x01' + got)
+
+        # Try '<q'.
+        expected = string_reverse(expected)
+
+        # <q pack work?
+        got = pack("<q", x)
+        verify(got == expected,
+               "'<q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # <q unpack work?
+        retrieved = unpack("<q", got)[0]
+        verify(x == retrieved,
+               "'<q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, "<q", '\x01' + got)
+
+    else:
+        # x is out of q's range -- verify pack realizes that.
+        any_err(pack, '>q', x)
+        any_err(pack, '<q', x)
+
+    # Much the same for 'Q'.
+    if MIN_Q <= x <= MAX_Q:
+        # Try '>Q'.
+        expected = long(x)
+        expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
+        if len(expected) & 1:
+            expected = "0" + expected
+        expected = unhexlify(expected)
+        expected = "\x00" * (8 - len(expected)) + expected
+
+        # >Q pack work?
+        got = pack(">Q", x)
+        verify(got == expected,
+               "'>Q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # >Q unpack work?
+        retrieved = unpack(">Q", got)[0]
+        verify(x == retrieved,
+               "'>Q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, ">Q", '\x01' + got)
+
+        # Try '<Q'.
+        expected = string_reverse(expected)
+
+        # <Q pack work?
+        got = pack("<Q", x)
+        verify(got == expected,
+               "'<Q'-pack of %r gave %r, not %r" %
+                (x, got, expected))
+
+        # <Q unpack work?
+        retrieved = unpack("<Q", got)[0]
+        verify(x == retrieved,
+               "'<Q'-unpack of %r gave %r, not %r" %
+                (got, retrieved, x))
+
+        # Adding any byte should cause a "too big" error.
+        any_err(unpack, "<Q", '\x01' + got)
+
+    else:
+        # x is out of Q's range -- verify pack realizes that.
+        any_err(pack, '>Q', x)
+        any_err(pack, '<Q', x)
+
+def test_std_qQ():
+    from random import randrange
+
+    # Create all interesting powers of 2.
+    values = []
+    for exp in range(70):
+        values.append(1L << exp)
+
+    # Add some random 64-bit values.
+    for i in range(50):
+        val = 0L
+        for j in range(8):
+            val = (val << 8) | randrange(256)
+        values.append(val)
+
+    # Try all those, and their negations, and +-1 from them.  Note
+    # that this tests all power-of-2 boundaries in range, and a few out
+    # of range, plus +-(2**n +- 1).
+    for base in values:
+        for val in -base, base:
+            for incr in -1, 0, 1:
+                x = val + incr
+                try:
+                    x = int(x)
+                except OverflowError:
+                    pass
+                test_one_qQ(x)
+
+test_std_qQ()
diff --git a/Misc/NEWS b/Misc/NEWS

index 18a87e9cb215b9fbf8c5262da1b6bb71b585f7a7..16850aac686cebcb55e8497db46ca8a28963706f 100644 (file)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -84,6 +84,9 @@ Core
    sortdict(dict) function for a simple way to display a dict in sorted
    order.
  
+- Many other small changes to dicts were made, resulting in faster
+  operation along the most common code paths.
+
  - Dictionary objects now support the "in" operator: "x in dict" means
    the same as dict.has_key(x).
  
@@ -119,7 +122,7 @@ Core
  
  - Collisions in dicts are resolved via a new approach, which can help
    dramatically in bad cases.  For example, looking up every key in a dict
-  d with d.keys() = [i << 16 for i in range(20000)] is approximately 500x
+  d with d.keys() == [i << 16 for i in range(20000)] is approximately 500x
    faster now.  Thanks to Christian Tismer for pointing out the cause and
    the nature of an effective cure (last December! better late than never).
  
@@ -145,8 +148,8 @@ Library
    native mode, these can be used only when the platform C compiler supports
    these types (when HAVE_LONG_LONG is #define'd by the Python config
    process), and then they inherit the sizes and alignments of the C types.
-  XXX TODO In standard mode, 'q' and 'Q' are supported on all platforms, and
-  XXX TODO are 8-byte integral types.
+  In standard mode, 'q' and 'Q' are supported on all platforms, and are
+  8-byte integral types.
  
  Tests
  
diff --git a/Modules/structmodule.c b/Modules/structmodule.c

index 9b799781814484bd9fab31bdd7dffb1c8df5768d..4a8886f8be9ac0e132e79187775c677122e36605 100644 (file)
--- a/Modules/structmodule.c
+++ b/Modules/structmodule.c
@@ -80,6 +80,34 @@ typedef struct { char c; LONG_LONG x; } s_long_long;
  #pragma options align=reset
  #endif
  
+/* Helper to get a PyLongObject by hook or by crook.  Caller should decref. */
+
+static PyObject *
+get_pylong(PyObject *v)
+{
+       PyNumberMethods *m;
+
+       assert(v != NULL);
+       if (PyInt_Check(v))
+               return PyLong_FromLong(PyInt_AS_LONG(v));
+       if (PyLong_Check(v)) {
+               Py_INCREF(v);
+               return v;
+       }
+       m = v->ob_type->tp_as_number;
+       if (m != NULL && m->nb_long != NULL) {
+               v = m->nb_long(v);
+               if (v == NULL)
+                       return NULL;
+               if (PyLong_Check(v))
+                       return v;
+               Py_DECREF(v);
+       }
+       PyErr_SetString(StructError,
+                       "cannot convert argument to long");
+       return NULL;
+}
+
  /* Helper routine to get a Python integer and raise the appropriate error
     if it isn't one */
  
@@ -123,33 +151,13 @@ static int
  get_longlong(PyObject *v, LONG_LONG *p)
  {
         LONG_LONG x;
-       int v_needs_decref = 0;
  
-       if (PyInt_Check(v)) {
-               x = (LONG_LONG)PyInt_AS_LONG(v);
-               *p = x;
-               return 0;
-       }
-       if (!PyLong_Check(v)) {
-               PyNumberMethods *m = v->ob_type->tp_as_number;
-               if (m != NULL && m->nb_long != NULL) {
-                       v = m->nb_long(v);
-                       if (v == NULL)
-                               return -1;
-                       v_needs_decref = 1;
-               }
-               if (!PyLong_Check(v)) {
-                       PyErr_SetString(StructError,
-                                       "cannot convert argument to long");
-                       if (v_needs_decref)
-                               Py_DECREF(v);
-                       return -1;
-               }
-       }
+       v = get_pylong(v);
+       if (v == NULL)
+               return -1;
         assert(PyLong_Check(v));
         x = PyLong_AsLongLong(v);
-       if (v_needs_decref)
-               Py_DECREF(v);
+       Py_DECREF(v);
         if (x == (LONG_LONG)-1 && PyErr_Occurred())
                 return -1;
         *p = x;
@@ -162,39 +170,13 @@ static int
  get_ulonglong(PyObject *v, unsigned LONG_LONG *p)
  {
         unsigned LONG_LONG x;
-       int v_needs_decref = 0;
  
-       if (PyInt_Check(v)) {
-               long i = PyInt_AS_LONG(v);
-               if (i < 0) {
-                       PyErr_SetString(StructError, "can't convert negative "
-                                       "int to unsigned");
-                       return -1;
-               }
-               x = (unsigned LONG_LONG)i;
-               *p = x;
-               return 0;
-       }
-       if (!PyLong_Check(v)) {
-               PyNumberMethods *m = v->ob_type->tp_as_number;
-               if (m != NULL && m->nb_long != NULL) {
-                       v = m->nb_long(v);
-                       if (v == NULL)
-                               return -1;
-                       v_needs_decref = 1;
-               }
-               if (!PyLong_Check(v)) {
-                       PyErr_SetString(StructError,
-                                       "cannot convert argument to long");
-                       if (v_needs_decref)
-                               Py_DECREF(v);
-                       return -1;
-               }
-       }
+       v = get_pylong(v);
+       if (v == NULL)
+               return -1;
         assert(PyLong_Check(v));
         x = PyLong_AsUnsignedLongLong(v);
-       if (v_needs_decref)
-               Py_DECREF(v);
+       Py_DECREF(v);
         if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred())
                 return -1;
         *p = x;
@@ -500,7 +482,7 @@ typedef struct _formatdef {
     TYPE is one of char, byte, ubyte, etc.
  */
  
-/* Native mode routines. */
+/* Native mode routines. ****************************************************/
  
  static PyObject *
  nu_char(const char *p, const formatdef *f)
@@ -797,6 +779,8 @@ static formatdef native_table[] = {
         {0}
  };
  
+/* Big-endian routines. *****************************************************/
+
  static PyObject *
  bu_int(const char *p, const formatdef *f)
  {
@@ -825,6 +809,24 @@ bu_uint(const char *p, const formatdef *f)
                 return PyInt_FromLong((long)x);
  }
  
+static PyObject *
+bu_longlong(const char *p, const formatdef *f)
+{
+       return _PyLong_FromByteArray((const unsigned char *)p,
+                                     8,
+                                     0, /* little-endian */
+                                     1  /* signed */);
+}
+
+static PyObject *
+bu_ulonglong(const char *p, const formatdef *f)
+{
+       return _PyLong_FromByteArray((const unsigned char *)p,
+                                     8,
+                                     0, /* little-endian */
+                                     0  /* signed */);
+}
+
  static PyObject *
  bu_float(const char *p, const formatdef *f)
  {
@@ -867,6 +869,34 @@ bp_uint(char *p, PyObject *v, const formatdef *f)
         return 0;
  }
  
+static int
+bp_longlong(char *p, PyObject *v, const formatdef *f)
+{
+       int res;
+       v = get_pylong(v);
+       res = _PyLong_AsByteArray((PyLongObject *)v,
+                                 (unsigned char *)p,
+                                 8,
+                                 0, /* little_endian */
+                                 1  /* signed */);
+       Py_DECREF(v);
+       return res;
+}
+
+static int
+bp_ulonglong(char *p, PyObject *v, const formatdef *f)
+{
+       int res;
+       v = get_pylong(v);
+       res = _PyLong_AsByteArray((PyLongObject *)v,
+                                 (unsigned char *)p,
+                                 8,
+                                 0, /* little_endian */
+                                 0  /* signed */);
+       Py_DECREF(v);
+       return res;
+}
+
  static int
  bp_float(char *p, PyObject *v, const formatdef *f)
  {
@@ -904,11 +934,15 @@ static formatdef bigendian_table[] = {
         {'I',   4,              0,              bu_uint,        bp_uint},
         {'l',   4,              0,              bu_int,         bp_int},
         {'L',   4,              0,              bu_uint,        bp_uint},
+       {'q',   8,              0,              bu_longlong,    bp_longlong},
+       {'Q',   8,              0,              bu_ulonglong,   bp_ulonglong},
         {'f',   4,              0,              bu_float,       bp_float},
         {'d',   8,              0,              bu_double,      bp_double},
         {0}
  };
  
+/* Little-endian routines. *****************************************************/
+
  static PyObject *
  lu_int(const char *p, const formatdef *f)
  {
@@ -937,6 +971,24 @@ lu_uint(const char *p, const formatdef *f)
                 return PyInt_FromLong((long)x);
  }
  
+static PyObject *
+lu_longlong(const char *p, const formatdef *f)
+{
+       return _PyLong_FromByteArray((const unsigned char *)p,
+                                     8,
+                                     1, /* little-endian */
+                                     1  /* signed */);
+}
+
+static PyObject *
+lu_ulonglong(const char *p, const formatdef *f)
+{
+       return _PyLong_FromByteArray((const unsigned char *)p,
+                                     8,
+                                     1, /* little-endian */
+                                     0  /* signed */);
+}
+
  static PyObject *
  lu_float(const char *p, const formatdef *f)
  {
@@ -979,6 +1031,34 @@ lp_uint(char *p, PyObject *v, const formatdef *f)
         return 0;
  }
  
+static int
+lp_longlong(char *p, PyObject *v, const formatdef *f)
+{
+       int res;
+       v = get_pylong(v);
+       res = _PyLong_AsByteArray((PyLongObject*)v,
+                                 (unsigned char *)p,
+                                 8,
+                                 1, /* little_endian */
+                                 1  /* signed */);
+       Py_DECREF(v);
+       return res;
+}
+
+static int
+lp_ulonglong(char *p, PyObject *v, const formatdef *f)
+{
+       int res;
+       v = get_pylong(v);
+       res = _PyLong_AsByteArray((PyLongObject*)v,
+                                 (unsigned char *)p,
+                                 8,
+                                 1, /* little_endian */
+                                 0  /* signed */);
+       Py_DECREF(v);
+       return res;
+}
+
  static int
  lp_float(char *p, PyObject *v, const formatdef *f)
  {
@@ -1016,6 +1096,8 @@ static formatdef lilendian_table[] = {
         {'I',   4,              0,              lu_uint,        lp_uint},
         {'l',   4,              0,              lu_int,         lp_int},
         {'L',   4,              0,              lu_uint,        lp_uint},
+       {'q',   8,              0,              lu_longlong,    lp_longlong},
+       {'Q',   8,              0,              lu_ulonglong,   lp_ulonglong},
         {'f',   4,              0,              lu_float,       lp_float},
         {'d',   8,              0,              lu_double,      lp_double},
         {0}
diff --git a/Objects/longobject.c b/Objects/longobject.c

index 92f8b046a0198ee33c69e1009fc25755afe42de7..fac8bb648b6673f05ccef0f6f1b5ddf02b48c520 100644 (file)
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -364,20 +364,33 @@ _PyLong_AsByteArray(PyLongObject* v,
         accumbits = 0;
         carry = do_twos_comp ? 1 : 0;
         for (i = 0; i < ndigits; ++i) {
+               unsigned int oldaccumbits = accumbits;
                 twodigits thisdigit = v->ob_digit[i];
                 if (do_twos_comp) {
                         thisdigit = (thisdigit ^ MASK) + carry;
                         carry = thisdigit >> SHIFT;
                         thisdigit &= MASK;
                 }
+               if (i < ndigits - 1)
+                       accumbits += SHIFT;
+               else {
+                       /* The most-significant digit may be partly empty. */
+                       twodigits bitmask = 1 << (SHIFT - 1);
+                       twodigits signbit = do_twos_comp << (SHIFT - 1);
+                       unsigned int nsignbits = 0;
+                       while ((thisdigit & bitmask) == signbit && bitmask) {
+                               ++nsignbits;
+                               bitmask >>= 1;
+                               signbit >>= 1;
+                       }
+                       accumbits += SHIFT - nsignbits;
+               }
                 /* Because we're going LSB to MSB, thisdigit is more
                    significant than what's already in accum, so needs to be
                    prepended to accum. */
-               accum |= thisdigit << accumbits;
-               accumbits += SHIFT;
+               accum |= thisdigit << oldaccumbits;
                 /* Store as many bytes as possible. */
-               assert(accumbits >= 8);
-               do {
+               while (accumbits >= 8) {
                         if (j >= n)
                                 goto Overflow;
                         ++j;
@@ -385,13 +398,13 @@ _PyLong_AsByteArray(PyLongObject* v,
                         p += pincr;
                         accumbits -= 8;
                         accum >>= 8;
-               } while (accumbits >= 8);
+               }
         }
  
         /* Store the straggler (if any). */
         assert(accumbits < 8);
         assert(carry == 0);  /* else do_twos_comp and *every* digit was 0 */
-       if (accum) {
+       if (accumbits > 0) {
                 if (j >= n)
                         goto Overflow;
                 ++j;
author	Tim Peters <tim.peters@gmail.com>
	Tue, 12 Jun 2001 01:22:22 +0000 (01:22 +0000)
committer	Tim Peters <tim.peters@gmail.com>
	Tue, 12 Jun 2001 01:22:22 +0000 (01:22 +0000)
Doc/lib/libstruct.tex		patch \| blob \| blame \| history
Lib/test/test_struct.py		patch \| blob \| blame \| history
Misc/NEWS		patch \| blob \| blame \| history
Modules/structmodule.c		patch \| blob \| blame \| history
Objects/longobject.c		patch \| blob \| blame \| history