This completes the q/Q project.
longobject.c _PyLong_AsByteArray: The original code had a gross bug:
the most-significant Python digit doesn't necessarily have SHIFT
significant bits, and you really need to count how many copies of the sign
bit it has else spurious overflow errors result.
test_struct.py: This now does exhaustive std q/Q testing at, and on both
sides of, all relevant power-of-2 boundaries, both positive and negative.
NEWS: Added brief dict news while I was at it.
\item[(1)]
The \character{q} and \character{Q} conversion codes are available in
native mode only if the platform C compiler supports C \ctype{long long},
- or, on Windows, \ctype{__int64}.
+ or, on Windows, \ctype{__int64}. They're always available in standard
+ modes.
\end{description}
is truncated. If the string is too short, padding is used to ensure
that exactly enough bytes are used to satisfy the count.
-For the \character{I} and \character{L} format characters, the return
-value is a Python long integer.
+For the \character{I}, \character{L}, \character{q} and \character{Q}
+format characters, the return value is a Python long integer.
For the \character{P} format character, the return value is a Python
integer or long integer, depending on the size needed to hold a
order.
Standard size and alignment are as follows: no alignment is required
-for any type (so you have to use pad bytes); \ctype{short} is 2 bytes;
-\ctype{int} and \ctype{long} are 4 bytes. \ctype{float} and
-\ctype{double} are 32-bit and 64-bit IEEE floating point numbers,
-respectively.
+for any type (so you have to use pad bytes);
+\ctype{short} is 2 bytes;
+\ctype{int} and \ctype{long} are 4 bytes;
+\ctype{long long} (\ctype{__int64} on Windows) is 8 bytes;
+\ctype{float} and \ctype{double} are 32-bit and 64-bit
+IEEE floating point numbers, respectively.
Note the difference between \character{@} and \character{=}: both use
native byte order, but the size and alignment of the latter is
func.__name__, args)
## pdb.set_trace()
+def any_err(func, *args):
+ try:
+ apply(func, args)
+ except (struct.error, OverflowError, TypeError):
+ pass
+ else:
+ raise TestFailed, "%s%s did not raise error" % (
+ func.__name__, args)
+## pdb.set_trace()
+
simple_err(struct.calcsize, 'Z')
sz = struct.calcsize('i')
raise TestFailed, "unpack(%s, %s) -> (%s,) # expected (%s,)" % (
`fmt`, `res`, `rev`, `arg`)
-# Some q/Q sanity checks.
+###########################################################################
+# q/Q tests.
has_native_qQ = 1
try:
if verbose:
print "Platform has native q/Q?", has_native_qQ and "Yes." or "No."
-simple_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless
+any_err(struct.pack, "Q", -1) # can't pack -1 as unsigned regardless
simple_err(struct.pack, "q", "a") # can't pack string as 'q' regardless
simple_err(struct.pack, "Q", "a") # ditto, but 'Q'
+def string_reverse(s):
+ chars = list(s)
+ chars.reverse()
+ return "".join(chars)
+
def bigendian_to_native(value):
if isbigendian:
return value
- chars = list(value)
- chars.reverse()
- return "".join(chars)
+ else:
+ return string_reverse(value)
-if has_native_qQ:
+def test_native_qQ():
bytes = struct.calcsize('q')
# The expected values here are in big-endian format, primarily because
# I'm on a little-endian machine and so this is the clearest way (for
verify(retrieved == input,
"%r-unpack of %r gave %r, not %r" %
(format, got, retrieved, input))
+
+if has_native_qQ:
+ test_native_qQ()
+
+# Standard q/Q (8 bytes; should work on all platforms).
+
+MIN_Q, MAX_Q = 0, 2L**64 - 1
+MIN_q, MAX_q = -(2L**63), 2L**63 - 1
+
+import binascii
+def test_one_qQ(x, pack=struct.pack,
+ unpack=struct.unpack,
+ unhexlify=binascii.unhexlify):
+ if verbose:
+ print "trying std q/Q on", x, "==", hex(x)
+
+ # Try 'q'.
+ if MIN_q <= x <= MAX_q:
+ # Try '>q'.
+ expected = long(x)
+ if x < 0:
+ expected += 1L << 64
+ assert expected > 0
+ expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
+ if len(expected) & 1:
+ expected = "0" + expected
+ expected = unhexlify(expected)
+ expected = "\x00" * (8 - len(expected)) + expected
+
+ # >q pack work?
+ got = pack(">q", x)
+ verify(got == expected,
+ "'>q'-pack of %r gave %r, not %r" %
+ (x, got, expected))
+
+ # >q unpack work?
+ retrieved = unpack(">q", got)[0]
+ verify(x == retrieved,
+ "'>q'-unpack of %r gave %r, not %r" %
+ (got, retrieved, x))
+
+ # Adding any byte should cause a "too big" error.
+ any_err(unpack, ">q", '\x01' + got)
+
+ # Try '<q'.
+ expected = string_reverse(expected)
+
+ # <q pack work?
+ got = pack("<q", x)
+ verify(got == expected,
+ "'<q'-pack of %r gave %r, not %r" %
+ (x, got, expected))
+
+ # <q unpack work?
+ retrieved = unpack("<q", got)[0]
+ verify(x == retrieved,
+ "'<q'-unpack of %r gave %r, not %r" %
+ (got, retrieved, x))
+
+ # Adding any byte should cause a "too big" error.
+ any_err(unpack, "<q", '\x01' + got)
+
+ else:
+ # x is out of q's range -- verify pack realizes that.
+ any_err(pack, '>q', x)
+ any_err(pack, '<q', x)
+
+ # Much the same for 'Q'.
+ if MIN_Q <= x <= MAX_Q:
+ # Try '>Q'.
+ expected = long(x)
+ expected = hex(expected)[2:-1] # chop "0x" and trailing 'L'
+ if len(expected) & 1:
+ expected = "0" + expected
+ expected = unhexlify(expected)
+ expected = "\x00" * (8 - len(expected)) + expected
+
+ # >Q pack work?
+ got = pack(">Q", x)
+ verify(got == expected,
+ "'>Q'-pack of %r gave %r, not %r" %
+ (x, got, expected))
+
+ # >Q unpack work?
+ retrieved = unpack(">Q", got)[0]
+ verify(x == retrieved,
+ "'>Q'-unpack of %r gave %r, not %r" %
+ (got, retrieved, x))
+
+ # Adding any byte should cause a "too big" error.
+ any_err(unpack, ">Q", '\x01' + got)
+
+ # Try '<Q'.
+ expected = string_reverse(expected)
+
+ # <Q pack work?
+ got = pack("<Q", x)
+ verify(got == expected,
+ "'<Q'-pack of %r gave %r, not %r" %
+ (x, got, expected))
+
+ # <Q unpack work?
+ retrieved = unpack("<Q", got)[0]
+ verify(x == retrieved,
+ "'<Q'-unpack of %r gave %r, not %r" %
+ (got, retrieved, x))
+
+ # Adding any byte should cause a "too big" error.
+ any_err(unpack, "<Q", '\x01' + got)
+
+ else:
+ # x is out of Q's range -- verify pack realizes that.
+ any_err(pack, '>Q', x)
+ any_err(pack, '<Q', x)
+
+def test_std_qQ():
+ from random import randrange
+
+ # Create all interesting powers of 2.
+ values = []
+ for exp in range(70):
+ values.append(1L << exp)
+
+ # Add some random 64-bit values.
+ for i in range(50):
+ val = 0L
+ for j in range(8):
+ val = (val << 8) | randrange(256)
+ values.append(val)
+
+ # Try all those, and their negations, and +-1 from them. Note
+ # that this tests all power-of-2 boundaries in range, and a few out
+ # of range, plus +-(2**n +- 1).
+ for base in values:
+ for val in -base, base:
+ for incr in -1, 0, 1:
+ x = val + incr
+ try:
+ x = int(x)
+ except OverflowError:
+ pass
+ test_one_qQ(x)
+
+test_std_qQ()
sortdict(dict) function for a simple way to display a dict in sorted
order.
+- Many other small changes to dicts were made, resulting in faster
+ operation along the most common code paths.
+
- Dictionary objects now support the "in" operator: "x in dict" means
the same as dict.has_key(x).
- Collisions in dicts are resolved via a new approach, which can help
dramatically in bad cases. For example, looking up every key in a dict
- d with d.keys() = [i << 16 for i in range(20000)] is approximately 500x
+ d with d.keys() == [i << 16 for i in range(20000)] is approximately 500x
faster now. Thanks to Christian Tismer for pointing out the cause and
the nature of an effective cure (last December! better late than never).
native mode, these can be used only when the platform C compiler supports
these types (when HAVE_LONG_LONG is #define'd by the Python config
process), and then they inherit the sizes and alignments of the C types.
- XXX TODO In standard mode, 'q' and 'Q' are supported on all platforms, and
- XXX TODO are 8-byte integral types.
+ In standard mode, 'q' and 'Q' are supported on all platforms, and are
+ 8-byte integral types.
Tests
#pragma options align=reset
#endif
+/* Helper to get a PyLongObject by hook or by crook. Caller should decref. */
+
+static PyObject *
+get_pylong(PyObject *v)
+{
+ PyNumberMethods *m;
+
+ assert(v != NULL);
+ if (PyInt_Check(v))
+ return PyLong_FromLong(PyInt_AS_LONG(v));
+ if (PyLong_Check(v)) {
+ Py_INCREF(v);
+ return v;
+ }
+ m = v->ob_type->tp_as_number;
+ if (m != NULL && m->nb_long != NULL) {
+ v = m->nb_long(v);
+ if (v == NULL)
+ return NULL;
+ if (PyLong_Check(v))
+ return v;
+ Py_DECREF(v);
+ }
+ PyErr_SetString(StructError,
+ "cannot convert argument to long");
+ return NULL;
+}
+
/* Helper routine to get a Python integer and raise the appropriate error
if it isn't one */
get_longlong(PyObject *v, LONG_LONG *p)
{
LONG_LONG x;
- int v_needs_decref = 0;
- if (PyInt_Check(v)) {
- x = (LONG_LONG)PyInt_AS_LONG(v);
- *p = x;
- return 0;
- }
- if (!PyLong_Check(v)) {
- PyNumberMethods *m = v->ob_type->tp_as_number;
- if (m != NULL && m->nb_long != NULL) {
- v = m->nb_long(v);
- if (v == NULL)
- return -1;
- v_needs_decref = 1;
- }
- if (!PyLong_Check(v)) {
- PyErr_SetString(StructError,
- "cannot convert argument to long");
- if (v_needs_decref)
- Py_DECREF(v);
- return -1;
- }
- }
+ v = get_pylong(v);
+ if (v == NULL)
+ return -1;
assert(PyLong_Check(v));
x = PyLong_AsLongLong(v);
- if (v_needs_decref)
- Py_DECREF(v);
+ Py_DECREF(v);
if (x == (LONG_LONG)-1 && PyErr_Occurred())
return -1;
*p = x;
get_ulonglong(PyObject *v, unsigned LONG_LONG *p)
{
unsigned LONG_LONG x;
- int v_needs_decref = 0;
- if (PyInt_Check(v)) {
- long i = PyInt_AS_LONG(v);
- if (i < 0) {
- PyErr_SetString(StructError, "can't convert negative "
- "int to unsigned");
- return -1;
- }
- x = (unsigned LONG_LONG)i;
- *p = x;
- return 0;
- }
- if (!PyLong_Check(v)) {
- PyNumberMethods *m = v->ob_type->tp_as_number;
- if (m != NULL && m->nb_long != NULL) {
- v = m->nb_long(v);
- if (v == NULL)
- return -1;
- v_needs_decref = 1;
- }
- if (!PyLong_Check(v)) {
- PyErr_SetString(StructError,
- "cannot convert argument to long");
- if (v_needs_decref)
- Py_DECREF(v);
- return -1;
- }
- }
+ v = get_pylong(v);
+ if (v == NULL)
+ return -1;
assert(PyLong_Check(v));
x = PyLong_AsUnsignedLongLong(v);
- if (v_needs_decref)
- Py_DECREF(v);
+ Py_DECREF(v);
if (x == (unsigned LONG_LONG)-1 && PyErr_Occurred())
return -1;
*p = x;
TYPE is one of char, byte, ubyte, etc.
*/
-/* Native mode routines. */
+/* Native mode routines. ****************************************************/
static PyObject *
nu_char(const char *p, const formatdef *f)
{0}
};
+/* Big-endian routines. *****************************************************/
+
static PyObject *
bu_int(const char *p, const formatdef *f)
{
return PyInt_FromLong((long)x);
}
+static PyObject *
+bu_longlong(const char *p, const formatdef *f)
+{
+ return _PyLong_FromByteArray((const unsigned char *)p,
+ 8,
+ 0, /* little-endian */
+ 1 /* signed */);
+}
+
+static PyObject *
+bu_ulonglong(const char *p, const formatdef *f)
+{
+ return _PyLong_FromByteArray((const unsigned char *)p,
+ 8,
+ 0, /* little-endian */
+ 0 /* signed */);
+}
+
static PyObject *
bu_float(const char *p, const formatdef *f)
{
return 0;
}
+static int
+bp_longlong(char *p, PyObject *v, const formatdef *f)
+{
+ int res;
+ v = get_pylong(v);
+ res = _PyLong_AsByteArray((PyLongObject *)v,
+ (unsigned char *)p,
+ 8,
+ 0, /* little_endian */
+ 1 /* signed */);
+ Py_DECREF(v);
+ return res;
+}
+
+static int
+bp_ulonglong(char *p, PyObject *v, const formatdef *f)
+{
+ int res;
+ v = get_pylong(v);
+ res = _PyLong_AsByteArray((PyLongObject *)v,
+ (unsigned char *)p,
+ 8,
+ 0, /* little_endian */
+ 0 /* signed */);
+ Py_DECREF(v);
+ return res;
+}
+
static int
bp_float(char *p, PyObject *v, const formatdef *f)
{
{'I', 4, 0, bu_uint, bp_uint},
{'l', 4, 0, bu_int, bp_int},
{'L', 4, 0, bu_uint, bp_uint},
+ {'q', 8, 0, bu_longlong, bp_longlong},
+ {'Q', 8, 0, bu_ulonglong, bp_ulonglong},
{'f', 4, 0, bu_float, bp_float},
{'d', 8, 0, bu_double, bp_double},
{0}
};
+/* Little-endian routines. *****************************************************/
+
static PyObject *
lu_int(const char *p, const formatdef *f)
{
return PyInt_FromLong((long)x);
}
+static PyObject *
+lu_longlong(const char *p, const formatdef *f)
+{
+ return _PyLong_FromByteArray((const unsigned char *)p,
+ 8,
+ 1, /* little-endian */
+ 1 /* signed */);
+}
+
+static PyObject *
+lu_ulonglong(const char *p, const formatdef *f)
+{
+ return _PyLong_FromByteArray((const unsigned char *)p,
+ 8,
+ 1, /* little-endian */
+ 0 /* signed */);
+}
+
static PyObject *
lu_float(const char *p, const formatdef *f)
{
return 0;
}
+static int
+lp_longlong(char *p, PyObject *v, const formatdef *f)
+{
+ int res;
+ v = get_pylong(v);
+ res = _PyLong_AsByteArray((PyLongObject*)v,
+ (unsigned char *)p,
+ 8,
+ 1, /* little_endian */
+ 1 /* signed */);
+ Py_DECREF(v);
+ return res;
+}
+
+static int
+lp_ulonglong(char *p, PyObject *v, const formatdef *f)
+{
+ int res;
+ v = get_pylong(v);
+ res = _PyLong_AsByteArray((PyLongObject*)v,
+ (unsigned char *)p,
+ 8,
+ 1, /* little_endian */
+ 0 /* signed */);
+ Py_DECREF(v);
+ return res;
+}
+
static int
lp_float(char *p, PyObject *v, const formatdef *f)
{
{'I', 4, 0, lu_uint, lp_uint},
{'l', 4, 0, lu_int, lp_int},
{'L', 4, 0, lu_uint, lp_uint},
+ {'q', 8, 0, lu_longlong, lp_longlong},
+ {'Q', 8, 0, lu_ulonglong, lp_ulonglong},
{'f', 4, 0, lu_float, lp_float},
{'d', 8, 0, lu_double, lp_double},
{0}
accumbits = 0;
carry = do_twos_comp ? 1 : 0;
for (i = 0; i < ndigits; ++i) {
+ unsigned int oldaccumbits = accumbits;
twodigits thisdigit = v->ob_digit[i];
if (do_twos_comp) {
thisdigit = (thisdigit ^ MASK) + carry;
carry = thisdigit >> SHIFT;
thisdigit &= MASK;
}
+ if (i < ndigits - 1)
+ accumbits += SHIFT;
+ else {
+ /* The most-significant digit may be partly empty. */
+ twodigits bitmask = 1 << (SHIFT - 1);
+ twodigits signbit = do_twos_comp << (SHIFT - 1);
+ unsigned int nsignbits = 0;
+ while ((thisdigit & bitmask) == signbit && bitmask) {
+ ++nsignbits;
+ bitmask >>= 1;
+ signbit >>= 1;
+ }
+ accumbits += SHIFT - nsignbits;
+ }
/* Because we're going LSB to MSB, thisdigit is more
significant than what's already in accum, so needs to be
prepended to accum. */
- accum |= thisdigit << accumbits;
- accumbits += SHIFT;
+ accum |= thisdigit << oldaccumbits;
/* Store as many bytes as possible. */
- assert(accumbits >= 8);
- do {
+ while (accumbits >= 8) {
if (j >= n)
goto Overflow;
++j;
p += pincr;
accumbits -= 8;
accum >>= 8;
- } while (accumbits >= 8);
+ }
}
/* Store the straggler (if any). */
assert(accumbits < 8);
assert(carry == 0); /* else do_twos_comp and *every* digit was 0 */
- if (accum) {
+ if (accumbits > 0) {
if (j >= n)
goto Overflow;
++j;