Consolidate the occurrances of the prime used as the multiplier when hashing.

author Gregory P. Smith <greg@krypto.org>

Sat, 14 Jan 2012 23:45:13 +0000 (15:45 -0800)

committer Gregory P. Smith <greg@krypto.org>

Sat, 14 Jan 2012 23:45:13 +0000 (15:45 -0800)
author Gregory P. Smith <greg@krypto.org>
Sat, 14 Jan 2012 23:45:13 +0000 (15:45 -0800)
committer Gregory P. Smith <greg@krypto.org>
Sat, 14 Jan 2012 23:45:13 +0000 (15:45 -0800)
diff --cc Include/pyport.h

index 5309de6a8092e3a3aa21be15dabfc9565c436b8c,269ba68e68f1918130904f51dd1f3f250976e637..1fd4dcce03b079fa3695212bcf0855ced22abdbb
--- 1/Include/pyport.h
--- 2/Include/pyport.h
+++ b/Include/pyport.h
@@@ -131,8 -131,11 +131,11 @@@ Used in:  PY_LONG_LON
   #endif
   #endif
   
+ /* Prime multiplier used in string and various other hashes. */
+ #define _PyHASH_MULTIPLIER 1000003  /* 0xf4243 */
+ 
   /* Parameters used for the numeric hash implementation.  See notes for
- -   _PyHash_Double in Objects/object.c.  Numeric hashes are based on
+ +   _Py_HashDouble in Objects/object.c.  Numeric hashes are based on
      reduction modulo the prime 2**_PyHASH_BITS - 1. */
   
   #if SIZEOF_VOID_P >= 8
diff --cc Objects/object.c

index 9060c827d53eb55e32d3c354ff31048d74b0388b,694e7e719e94b2bac449e16c8abc387ff0fea1e7..441068d1e35f78156148c9ca1dac8b0168b35dab
--- 1/Objects/object.c
--- 2/Objects/object.c
+++ b/Objects/object.c
@@@ -753,21 -746,6 +753,21 @@@ _Py_HashPointer(void *p
       return x;
   }
   
-         x = (1000003U * x) ^ (Py_uhash_t) *p++;
+ +Py_hash_t
+ +_Py_HashBytes(unsigned char *p, Py_ssize_t len)
+ +{
+ +    Py_uhash_t x;
+ +    Py_ssize_t i;
+ +
+ +    x = (Py_uhash_t) *p << 7;
+ +    for (i = 0; i < len; i++)
++        x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
+ +    x ^= (Py_uhash_t) len;
+ +    if (x == -1)
+ +        x = -2;
+ +    return x;
+ +}
+ +
   Py_hash_t
   PyObject_HashNotImplemented(PyObject *v)
   {
diff --cc Objects/tupleobject.c

index 54a580d4a33ce507d256be60317631fa2d719029,f6dbc315d90a7e0b74a6d4cff177454a2c4d0bc7..d58839e0917411a133d6c8c9a182c96be239eb99
--- 1/Objects/tupleobject.c
--- 2/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@@ -312,12 -312,11 +312,12 @@@ error
   static Py_hash_t
   tuplehash(PyTupleObject *v)
   {
- -    register Py_hash_t x, y;
+ +    register Py_uhash_t x;
+ +    register Py_hash_t y;
       register Py_ssize_t len = Py_SIZE(v);
       register PyObject **p;
-     Py_uhash_t mult = 1000003;
- -    Py_hash_t mult = _PyHASH_MULTIPLIER;
- -    x = 0x345678L;
++    Py_uhash_t mult = _PyHASH_MULTIPLIER;
+ +    x = 0x345678;
       p = v->ob_item;
       while (--len >= 0) {
           y = PyObject_Hash(*p++);
diff --cc Objects/unicodeobject.c

index e97ce1f6e6c5727fca9969e5c9b13245ffb64250,20528b931d02afc802fbabfb2a3c7c48df692372..59fc123790666144ae82683a74fa20a7c566a0c6
--- 1/Objects/unicodeobject.c
--- 2/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@@ -11193,53 -7652,27 +11193,53 @@@ unicode_getitem(PyObject *self, Py_ssiz
   }
   
   /* Believe it or not, this produces the same value for ASCII strings
- -   as string_hash(). */
+ +   as bytes_hash(). */
   static Py_hash_t
- -unicode_hash(PyUnicodeObject *self)
+ +unicode_hash(PyObject *self)
   {
       Py_ssize_t len;
- -    Py_UNICODE *p;
- -    Py_hash_t x;
- -
- -    if (self->hash != -1)
- -        return self->hash;
- -    len = Py_SIZE(self);
- -    p = self->str;
- -    x = *p << 7;
- -    while (--len >= 0)
- -        x = (_PyHASH_MULTIPLIER*x) ^ *p++;
- -    x ^= Py_SIZE(self);
+ +    Py_uhash_t x;
+ +
+ +    if (_PyUnicode_HASH(self) != -1)
+ +        return _PyUnicode_HASH(self);
+ +    if (PyUnicode_READY(self) == -1)
+ +        return -1;
+ +    len = PyUnicode_GET_LENGTH(self);
+ +
+ +    /* The hash function as a macro, gets expanded three times below. */
+ +#define HASH(P) \
+ +    x = (Py_uhash_t)*P << 7; \
+ +    while (--len >= 0) \
-         x = (1000003*x) ^ (Py_uhash_t)*P++;
++        x = (_PyHASH_MULTIPLIER*x) ^ (Py_uhash_t)*P++;
+ +
+ +    switch (PyUnicode_KIND(self)) {
+ +    case PyUnicode_1BYTE_KIND: {
+ +        const unsigned char *c = PyUnicode_1BYTE_DATA(self);
+ +        HASH(c);
+ +        break;
+ +    }
+ +    case PyUnicode_2BYTE_KIND: {
+ +        const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self);
+ +        HASH(s);
+ +        break;
+ +    }
+ +    default: {
+ +        Py_UCS4 *l;
+ +        assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND &&
+ +               "Impossible switch case in unicode_hash");
+ +        l = PyUnicode_4BYTE_DATA(self);
+ +        HASH(l);
+ +        break;
+ +    }
+ +    }
+ +    x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self);
+ +
       if (x == -1)
           x = -2;
- -    self->hash = x;
+ +    _PyUnicode_HASH(self) = x;
       return x;
   }
+ +#undef HASH
   
   PyDoc_STRVAR(index__doc__,
                "S.index(sub[, start[, end]]) -> int\n\
author	Gregory P. Smith <greg@krypto.org>
	Sat, 14 Jan 2012 23:45:13 +0000 (15:45 -0800)
committer	Gregory P. Smith <greg@krypto.org>
	Sat, 14 Jan 2012 23:45:13 +0000 (15:45 -0800)
		1	2
Include/pyport.h	patch \|	diff1 \|	diff2 \|	blob \| history
Objects/object.c	patch \|	diff1 \|	diff2 \|	blob \| history
Objects/tupleobject.c	patch \|	diff1 \|	diff2 \|	blob \| history
Objects/unicodeobject.c	patch \|	diff1 \|	diff2 \|	blob \| history