masked); and the dictobject struct required a member to hold the table's
polynomial. In Tim's experiments the current scheme ran faster, produced
equally good collision statistics, needed less code & used less memory.
+
+Theoretical Python 2.5 headache: hash codes are only C "long", but
+sizeof(Py_ssize_t) > sizeof(long) may be possible. In that case, and if a
+dict is genuinely huge, then only the slots directly reachable via indexing
+by a C long can be the first slot in a probe sequence. The probe sequence
+will still eventually reach every slot in the table, but the collision rate
+on initial probes may be much higher than this scheme was designed for.
+Getting a hash code as fat as Py_ssize_t is the only real cure. But in
+practice, this probably won't make a lick of difference for many years (at
+which point everyone will have terabytes of RAM on 64-bit boxes).
*/
/* Object used as dummy key to fill deleted entries */
register Py_ssize_t i;
register size_t perturb;
register dictentry *freeslot;
- register unsigned int mask = mp->ma_mask;
+ register Py_ssize_t mask = mp->ma_mask;
dictentry *ep0 = mp->ma_table;
register dictentry *ep;
register int restore_error;
register Py_ssize_t i;
register size_t perturb;
register dictentry *freeslot;
- register unsigned int mask = mp->ma_mask;
+ register Py_ssize_t mask = mp->ma_mask;
dictentry *ep0 = mp->ma_table;
register dictentry *ep;
Py_DECREF(dummy);
}
ep->me_key = key;
- ep->me_hash = hash;
+ ep->me_hash = (Py_ssize_t)hash;
ep->me_value = value;
mp->ma_used++;
}
actually be smaller than the old one.
*/
static int
-dictresize(dictobject *mp, int minused)
+dictresize(dictobject *mp, Py_ssize_t minused)
{
- int newsize;
+ Py_ssize_t newsize;
dictentry *oldtable, *newtable, *ep;
- int i;
+ Py_ssize_t i;
int is_oldtable_malloced;
dictentry small_copy[PyDict_MINSIZE];
{
register dictobject *mp;
register long hash;
- register int n_used;
+ register Py_ssize_t n_used;
if (!PyDict_Check(op)) {
PyErr_BadInternalCall();
* Quadrupling the size improves average dictionary sparseness
* (reducing collisions) at the cost of some memory and iteration
* speed (which loops over every possible entry). It also halves
- * the number of expensive resize operations in a growing dictionary.
+| * the number of expensive resize operations in a growing dictionary.
*
* Very large dictionaries (over 50K items) use doubling instead.
* This may help applications with severe memory constraints.
*/
if (!(mp->ma_used > n_used && mp->ma_fill*3 >= (mp->ma_mask+1)*2))
return 0;
- return dictresize(mp, (mp->ma_used>50000 ? mp->ma_used*2 : mp->ma_used*4));
+ return dictresize(mp, (mp->ma_used > 50000 ? 2 : 4) * mp->ma_used);
}
int
dictobject *mp;
dictentry *ep, *table;
int table_is_malloced;
- int fill;
+ Py_ssize_t fill;
dictentry small_copy[PyDict_MINSIZE];
#ifdef Py_DEBUG
- int i, n;
+ Py_ssize_t i, n;
#endif
if (!PyDict_Check(op))
/*
* Iterate over a dict. Use like so:
*
- * int i;
+ * Py_ssize_t i;
* PyObject *key, *value;
* i = 0; # important! i should not otherwise be changed by you
* while (PyDict_Next(yourdict, &i, &key, &value)) {
PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)
{
register Py_ssize_t i;
- register int mask;
+ register Py_ssize_t mask;
register dictentry *ep;
if (!PyDict_Check(op))
dict_dealloc(register dictobject *mp)
{
register dictentry *ep;
- int fill = mp->ma_fill;
+ Py_ssize_t fill = mp->ma_fill;
PyObject_GC_UnTrack(mp);
Py_TRASHCAN_SAFE_BEGIN(mp)
for (ep = mp->ma_table; fill > 0; ep++) {
static int
dict_print(register dictobject *mp, register FILE *fp, register int flags)
{
- register int i;
- register int any;
+ register Py_ssize_t i;
+ register Py_ssize_t any;
- i = Py_ReprEnter((PyObject*)mp);
+ i = (int)Py_ReprEnter((PyObject*)mp);
if (i != 0) {
if (i < 0)
return i;
PyObject *missing;
static PyObject *missing_str = NULL;
if (missing_str == NULL)
- missing_str =
+ missing_str =
PyString_InternFromString("__missing__");
missing = _PyType_Lookup(mp->ob_type, missing_str);
if (missing != NULL)
dict_keys(register dictobject *mp)
{
register PyObject *v;
- register int i, j;
+ register Py_ssize_t i, j;
dictentry *ep;
- int mask, n;
+ Py_ssize_t mask, n;
again:
n = mp->ma_used;
dict_values(register dictobject *mp)
{
register PyObject *v;
- register int i, j;
+ register Py_ssize_t i, j;
dictentry *ep;
- int mask, n;
+ Py_ssize_t mask, n;
again:
n = mp->ma_used;
dict_items(register dictobject *mp)
{
register PyObject *v;
- register int i, j, n;
- int mask;
+ register Py_ssize_t i, j, n;
+ Py_ssize_t mask;
PyObject *item, *key, *value;
dictentry *ep;
PyDict_MergeFromSeq2(PyObject *d, PyObject *seq2, int override)
{
PyObject *it; /* iter(seq2) */
- int i; /* index into seq2 of current element */
+ Py_ssize_t i; /* index into seq2 of current element */
PyObject *item; /* seq2[i] */
PyObject *fast; /* item as a 2-tuple or 2-list */
i = -1;
Return:
Py_DECREF(it);
- return i;
+ return (int)i;
}
int
PyDict_Merge(PyObject *a, PyObject *b, int override)
{
register PyDictObject *mp, *other;
- register int i;
+ register Py_ssize_t i;
dictentry *entry;
/* We accept for the argument either a concrete dictionary object,
PyDict_GetItem(a, entry->me_key) == NULL)) {
Py_INCREF(entry->me_key);
Py_INCREF(entry->me_value);
- insertdict(mp, entry->me_key, entry->me_hash,
+ insertdict(mp, entry->me_key,
+ (long)entry->me_hash,
entry->me_value);
}
}
{
PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
PyObject *aval = NULL; /* a[akey] */
- int i, cmp;
+ Py_ssize_t i;
+ int cmp;
for (i = 0; i <= a->ma_mask; i++) {
PyObject *thiskey, *thisaval, *thisbval;
* find its associated value anymore; or
* maybe it is but the compare deleted the
* a[thiskey] entry.
- */
+| */
Py_DECREF(thiskey);
continue;
}
static int
dict_equal(dictobject *a, dictobject *b)
{
- int i;
+ Py_ssize_t i;
if (a->ma_used != b->ma_used)
/* can't be equal if # of entries differ */
static PyObject *
dict_popitem(dictobject *mp)
{
- int i = 0;
+ Py_ssize_t i = 0;
dictentry *ep;
PyObject *res;
* happened, the result would be an infinite loop (searching for an
* entry that no longer exists). Note that the usual popitem()
* idiom is "while d: k, v = d.popitem()". so needing to throw the
- * tuple away if the dict *is* empty isn't a significant
+ * tuple away if the dict *is* empty isn't a significant
* inefficiency -- possible, but unlikely in practice.
*/
res = PyTuple_New(2);
* field of slot 0 to hold a search finger:
* If slot 0 has a value, use slot 0.
* Else slot 0 is being used to hold a search finger,
- * and we use its hash value as the first index to look.
+| * and we use its hash value as the first index to look.
*/
ep = &mp->ma_table[0];
if (ep->me_value == NULL) {
- i = (int)ep->me_hash;
+ i = ep->me_hash;
/* The hash field may be a real hash value, or it may be a
* legit search finger, or it may be a once-legit search
* finger that's out of bounds now because it wrapped around
typedef struct {
PyObject_HEAD
dictobject *di_dict; /* Set to NULL when iterator is exhausted */
- int di_used;
- int di_pos;
+ Py_ssize_t di_used;
+ Py_ssize_t di_pos;
PyObject* di_result; /* reusable result tuple for iteritems */
- long len;
+ Py_ssize_t len;
} dictiterobject;
static PyObject *
static PyObject *
dictiter_len(dictiterobject *di)
{
- long len = 0;
+ Py_ssize_t len = 0;
if (di->di_dict != NULL && di->di_used == di->di_dict->ma_used)
len = di->len;
- return PyInt_FromLong(len);
+ return PyInt_FromSize_t(len);
}
PyDoc_STRVAR(length_hint_doc, "Private method returning an estimate of len(list(it)).");
static PyObject *dictiter_iternextkey(dictiterobject *di)
{
PyObject *key;
- register int i, mask;
+ register Py_ssize_t i, mask;
register dictentry *ep;
dictobject *d = di->di_dict;
static PyObject *dictiter_iternextvalue(dictiterobject *di)
{
PyObject *value;
- register int i, mask;
+ register Py_ssize_t i, mask;
register dictentry *ep;
dictobject *d = di->di_dict;
static PyObject *dictiter_iternextitem(dictiterobject *di)
{
PyObject *key, *value, *result = di->di_result;
- register int i, mask;
+ register Py_ssize_t i, mask;
register dictentry *ep;
dictobject *d = di->di_dict;