]> git.ipfire.org Git - thirdparty/Python/cpython.git/commitdiff
* Simplify hash function and add test to show effectiveness of the hash
authorRaymond Hettinger <python@rcn.com>
Sun, 23 Nov 2003 02:49:05 +0000 (02:49 +0000)
committerRaymond Hettinger <python@rcn.com>
Sun, 23 Nov 2003 02:49:05 +0000 (02:49 +0000)
  function.

* Add a better test for deepcopying.

* Add tests to show the __init__() function works like it does for list
  and tuple.  Add related test.

* Have shallow copies of frozensets return self.  Add related test.

* Have frozenset(f) return f if f is already a frozenset. Add related test.

* Beefed-up some existing tests.

Lib/test/test_set.py
Objects/setobject.c

index 85f87f7d1222918946fe6b6dff9575e272e77937..939e49097e415f078d30c4cbdb1aedc5fbeaa4c2 100644 (file)
@@ -38,15 +38,11 @@ class TestJointOps(unittest.TestCase):
         s = self.thetype([frozenset(self.letters)])
         self.assert_(self.thetype(self.letters) in s)
 
-    def test_copy(self):
-        dup = self.s.copy()
-        self.assertEqual(self.s, dup)
-        self.assertNotEqual(id(self.s), id(dup))
-
     def test_union(self):
         u = self.s.union(self.otherword)
         for c in self.letters:
             self.assertEqual(c in u, c in self.d or c in self.otherword)
+        self.assertEqual(self.s, self.thetype(self.word))
         self.assertEqual(type(u), self.thetype)
         self.assertRaises(PassThru, self.s.union, check_pass_thru())
         self.assertRaises(TypeError, self.s.union, [[]])
@@ -66,6 +62,7 @@ class TestJointOps(unittest.TestCase):
         i = self.s.intersection(self.otherword)
         for c in self.letters:
             self.assertEqual(c in i, c in self.d and c in self.otherword)
+        self.assertEqual(self.s, self.thetype(self.word))
         self.assertEqual(type(i), self.thetype)
         self.assertRaises(PassThru, self.s.intersection, check_pass_thru())
 
@@ -84,6 +81,7 @@ class TestJointOps(unittest.TestCase):
         i = self.s.difference(self.otherword)
         for c in self.letters:
             self.assertEqual(c in i, c in self.d and c not in self.otherword)
+        self.assertEqual(self.s, self.thetype(self.word))
         self.assertEqual(type(i), self.thetype)
         self.assertRaises(PassThru, self.s.difference, check_pass_thru())
         self.assertRaises(TypeError, self.s.difference, [[]])
@@ -103,6 +101,7 @@ class TestJointOps(unittest.TestCase):
         i = self.s.symmetric_difference(self.otherword)
         for c in self.letters:
             self.assertEqual(c in i, (c in self.d) ^ (c in self.otherword))
+        self.assertEqual(self.s, self.thetype(self.word))
         self.assertEqual(type(i), self.thetype)
         self.assertRaises(PassThru, self.s.symmetric_difference, check_pass_thru())
         self.assertRaises(TypeError, self.s.symmetric_difference, [[]])
@@ -155,16 +154,38 @@ class TestJointOps(unittest.TestCase):
         dup = pickle.loads(p)
         self.assertEqual(self.s, dup, "%s != %s" % (self.s, dup))
 
+    def test_deepcopy(self):
+        class Tracer:
+            def __init__(self, value):
+                self.value = value
+            def __hash__(self):
+                    return self.value
+            def __deepcopy__(self, memo=None):
+                return Tracer(self.value + 1)
+        t = Tracer(10)
+        s = self.thetype([t])
+        dup = copy.deepcopy(s)
+        self.assertNotEqual(id(s), id(dup))
+        for elem in dup:
+            newt = elem
+        self.assertNotEqual(id(t), id(newt))
+        self.assertEqual(t.value + 1, newt.value)
+
 class TestSet(TestJointOps):
     thetype = set
 
     def test_init(self):
-        s = set()
+        s = self.thetype()
         s.__init__(self.word)
         self.assertEqual(s, set(self.word))
         s.__init__(self.otherword)
         self.assertEqual(s, set(self.otherword))
 
+    def test_constructor_identity(self):
+        s = self.thetype(range(3))
+        t = self.thetype(s)
+        self.assertNotEqual(id(s), id(t))
+
     def test_hash(self):
         self.assertRaises(TypeError, hash, self.s)
 
@@ -172,6 +193,11 @@ class TestSet(TestJointOps):
         self.s.clear()
         self.assertEqual(self.s, set([]))
 
+    def test_copy(self):
+        dup = self.s.copy()
+        self.assertEqual(self.s, dup)
+        self.assertNotEqual(id(self.s), id(dup))
+
     def test_add(self):
         self.s.add('Q')
         self.assert_('Q' in self.s)
@@ -285,17 +311,27 @@ class TestFrozenSet(TestJointOps):
     thetype = frozenset
 
     def test_init(self):
-        s = frozenset()
-        s.__init__(self.word)
-        self.assertEqual(s, frozenset())
+        s = self.thetype(self.word)
+        s.__init__(self.otherword)
+        self.assertEqual(s, set(self.word))
+
+    def test_constructor_identity(self):
+        s = self.thetype(range(3))
+        t = self.thetype(s)
+        self.assertEqual(id(s), id(t))
 
     def test_hash(self):
-        self.assertEqual(hash(frozenset('abcdeb')), hash(frozenset('ebecda')))
+        self.assertEqual(hash(self.thetype('abcdeb')),
+                         hash(self.thetype('ebecda')))
+
+    def test_copy(self):
+        dup = self.s.copy()
+        self.assertEqual(id(self.s), id(dup))
 
     def test_frozen_as_dictkey(self):
         seq = range(10) + list('abcdefg') + ['apple']
-        key1 = frozenset(seq)
-        key2 = frozenset(reversed(seq))
+        key1 = self.thetype(seq)
+        key2 = self.thetype(reversed(seq))
         self.assertEqual(key1, key2)
         self.assertNotEqual(id(key1), id(key2))
         d = {}
@@ -303,15 +339,38 @@ class TestFrozenSet(TestJointOps):
         self.assertEqual(d[key2], 42)
 
     def test_hash_caching(self):
-        f = frozenset('abcdcda')
+        f = self.thetype('abcdcda')
         self.assertEqual(hash(f), hash(f))
 
+    def test_hash_effectiveness(self):
+        n = 13
+        rng = range(n)
+        hashvalues = set()
+        for i in xrange(2**n):
+            combination = [j for j in rng if (1<<j)&i]
+            hashvalues.add(hash(self.thetype(combination)))
+        self.assert_(len(hashvalues) >= 2**(n-2))
+
 class FrozenSetSubclass(frozenset):
     pass
 
 class TestFrozenSetSubclass(TestFrozenSet):
     thetype = FrozenSetSubclass
 
+    def test_constructor_identity(self):
+        s = self.thetype(range(3))
+        t = self.thetype(s)
+        self.assertNotEqual(id(s), id(t))
+
+    def test_copy(self):
+        dup = self.s.copy()
+        self.assertNotEqual(id(self.s), id(dup))
+
+    def test_nested_empty_constructor(self):
+        s = self.thetype()
+        t = self.thetype(s)
+        self.assertEqual(s, t)
+
 # Tests taken from test_sets.py =============================================
 
 empty_set = set()
index be73954b45466a1b4050d3a1b8a40e4ae20f7f42..fab07fbe76aceb49751f14b512c8cd364ef0cfc8 100644 (file)
@@ -64,6 +64,10 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 
        if (!PyArg_UnpackTuple(args, type->tp_name, 0, 1, &iterable))
                return NULL;
+       if (iterable != NULL && iterable->ob_type == &PyFrozenSet_Type) {
+               Py_INCREF(iterable);
+               return iterable;
+       }
        return make_new_set(type, iterable);
 }
 
@@ -154,6 +158,16 @@ set_copy(PySetObject *so)
        return (PyObject *)newso;
 }
 
+static PyObject *
+frozenset_copy(PySetObject *so)
+{
+       if (so->ob_type == &PyFrozenSet_Type) {
+               Py_INCREF(so);
+               return (PyObject *)so;
+       }
+       return set_copy(so);
+}
+
 PyDoc_STRVAR(copy_doc, "Return a shallow copy of a set.");
 
 static PyObject *
@@ -686,7 +700,7 @@ frozenset_hash(PyObject *self)
 {
        PyObject *it, *item;
        PySetObject *so = (PySetObject *)self;
-       long hash = 0, x;
+       long hash = 0;
 
        if (so->hash != -1)
                return so->hash;
@@ -696,14 +710,12 @@ frozenset_hash(PyObject *self)
                return -1;
 
        while ((item = PyIter_Next(it)) != NULL) {
-               x = PyObject_Hash(item);
-               /* Applying  x*(x+1) breaks-up linear relationships so that
-                  h(1) ^ h(2) will be less likely to coincide with hash(3).
-                  Multiplying by a large prime increases the dispersion 
-                  between consecutive hashes.  Adding one bit from the 
-                  original restores the one bit lost during the multiply 
-                  (all the products are even numbers).  */
-               hash ^= (x * (x+1) * 3644798167) | (x&1);
+               /* Multiplying by a large prime increases the bit dispersion for
+                  closely spaced hash values.  The is important because some
+                  use cases have many combinations of a small number of 
+                  elements with nearby hashes so that many distinct combinations
+                  collapse to only a handful of distinct hash values. */
+               hash ^= PyObject_Hash(item) * 3644798167;
                Py_DECREF(item);
        }
        Py_DECREF(it);
@@ -1096,17 +1108,17 @@ PyTypeObject PySet_Type = {
 
 
 static PyMethodDef frozenset_methods[] = {
-       {"copy",        (PyCFunction)set_copy,          METH_NOARGS,
+       {"copy",        (PyCFunction)frozenset_copy,    METH_NOARGS,
         copy_doc},
-       {"__copy__",    (PyCFunction)set_copy,          METH_NOARGS,
+       {"__copy__",    (PyCFunction)frozenset_copy,    METH_NOARGS,
         copy_doc},
-       {"difference",(PyCFunction)set_difference,      METH_O,
+       {"difference",  (PyCFunction)set_difference,    METH_O,
         difference_doc},
        {"intersection",(PyCFunction)set_intersection,  METH_O,
         intersection_doc},
-       {"issubset",(PyCFunction)set_issubset,          METH_O,
+       {"issubset",    (PyCFunction)set_issubset,      METH_O,
         issubset_doc},
-       {"issuperset",(PyCFunction)set_issuperset,      METH_O,
+       {"issuperset",  (PyCFunction)set_issuperset,    METH_O,
         issuperset_doc},
        {"__reduce__",  (PyCFunction)set_reduce,        METH_NOARGS,
         reduce_doc},