From: Gregory P. Smith Date: Sat, 14 Jan 2012 23:45:13 +0000 (-0800) Subject: Consolidate the occurrances of the prime used as the multiplier when hashing. X-Git-Tag: v3.3.0a1~399 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=f5b62a9b314e71e6bd5014be92ad1d10ff8e0d63;p=thirdparty%2FPython%2Fcpython.git Consolidate the occurrances of the prime used as the multiplier when hashing. --- f5b62a9b314e71e6bd5014be92ad1d10ff8e0d63 diff --cc Include/pyport.h index 5309de6a8092,269ba68e68f1..1fd4dcce03b0 --- a/Include/pyport.h +++ b/Include/pyport.h @@@ -131,8 -131,11 +131,11 @@@ Used in: PY_LONG_LON #endif #endif + /* Prime multiplier used in string and various other hashes. */ + #define _PyHASH_MULTIPLIER 1000003 /* 0xf4243 */ + /* Parameters used for the numeric hash implementation. See notes for - _PyHash_Double in Objects/object.c. Numeric hashes are based on + _Py_HashDouble in Objects/object.c. Numeric hashes are based on reduction modulo the prime 2**_PyHASH_BITS - 1. */ #if SIZEOF_VOID_P >= 8 diff --cc Objects/object.c index 9060c827d53e,694e7e719e94..441068d1e35f --- a/Objects/object.c +++ b/Objects/object.c @@@ -753,21 -746,6 +753,21 @@@ _Py_HashPointer(void *p return x; } +Py_hash_t +_Py_HashBytes(unsigned char *p, Py_ssize_t len) +{ + Py_uhash_t x; + Py_ssize_t i; + + x = (Py_uhash_t) *p << 7; + for (i = 0; i < len; i++) - x = (1000003U * x) ^ (Py_uhash_t) *p++; ++ x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++; + x ^= (Py_uhash_t) len; + if (x == -1) + x = -2; + return x; +} + Py_hash_t PyObject_HashNotImplemented(PyObject *v) { diff --cc Objects/tupleobject.c index 54a580d4a33c,f6dbc315d90a..d58839e09174 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@@ -312,12 -312,11 +312,12 @@@ error static Py_hash_t tuplehash(PyTupleObject *v) { - register Py_hash_t x, y; + register Py_uhash_t x; + register Py_hash_t y; register Py_ssize_t len = Py_SIZE(v); register PyObject **p; - Py_uhash_t mult = 1000003; - Py_hash_t mult = _PyHASH_MULTIPLIER; - x = 0x345678L; ++ Py_uhash_t mult = _PyHASH_MULTIPLIER; + x = 0x345678; p = v->ob_item; while (--len >= 0) { y = PyObject_Hash(*p++); diff --cc Objects/unicodeobject.c index e97ce1f6e6c5,20528b931d02..59fc12379066 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@@ -11193,53 -7652,27 +11193,53 @@@ unicode_getitem(PyObject *self, Py_ssiz } /* Believe it or not, this produces the same value for ASCII strings - as string_hash(). */ + as bytes_hash(). */ static Py_hash_t -unicode_hash(PyUnicodeObject *self) +unicode_hash(PyObject *self) { Py_ssize_t len; - Py_UNICODE *p; - Py_hash_t x; - - if (self->hash != -1) - return self->hash; - len = Py_SIZE(self); - p = self->str; - x = *p << 7; - while (--len >= 0) - x = (_PyHASH_MULTIPLIER*x) ^ *p++; - x ^= Py_SIZE(self); + Py_uhash_t x; + + if (_PyUnicode_HASH(self) != -1) + return _PyUnicode_HASH(self); + if (PyUnicode_READY(self) == -1) + return -1; + len = PyUnicode_GET_LENGTH(self); + + /* The hash function as a macro, gets expanded three times below. */ +#define HASH(P) \ + x = (Py_uhash_t)*P << 7; \ + while (--len >= 0) \ - x = (1000003*x) ^ (Py_uhash_t)*P++; ++ x = (_PyHASH_MULTIPLIER*x) ^ (Py_uhash_t)*P++; + + switch (PyUnicode_KIND(self)) { + case PyUnicode_1BYTE_KIND: { + const unsigned char *c = PyUnicode_1BYTE_DATA(self); + HASH(c); + break; + } + case PyUnicode_2BYTE_KIND: { + const Py_UCS2 *s = PyUnicode_2BYTE_DATA(self); + HASH(s); + break; + } + default: { + Py_UCS4 *l; + assert(PyUnicode_KIND(self) == PyUnicode_4BYTE_KIND && + "Impossible switch case in unicode_hash"); + l = PyUnicode_4BYTE_DATA(self); + HASH(l); + break; + } + } + x ^= (Py_uhash_t)PyUnicode_GET_LENGTH(self); + if (x == -1) x = -2; - self->hash = x; + _PyUnicode_HASH(self) = x; return x; } +#undef HASH PyDoc_STRVAR(index__doc__, "S.index(sub[, start[, end]]) -> int\n\