From: Mike Bayer Date: Sun, 23 Dec 2018 00:16:50 +0000 (-0500) Subject: Check collection less than two items remaining before firing scalar backref remove X-Git-Tag: rel_1_3_0b2~51^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=847d1359421ebb3b4ba653ca1a9d238e62e8e8a8;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git Check collection less than two items remaining before firing scalar backref remove Fixed long-standing issue where duplicate collection members would cause a backref to delete the association between the member and its parent object when one of the duplicates were removed, as occurs as a side effect of swapping two objects in one statement. Fixes: #1103 Change-Id: Ic12877f7bd5a4eb688091725a78410748e7fdf16 --- diff --git a/doc/build/changelog/migration_13.rst b/doc/build/changelog/migration_13.rst index 3932202fce..fcc6776f96 100644 --- a/doc/build/changelog/migration_13.rst +++ b/doc/build/changelog/migration_13.rst @@ -539,6 +539,80 @@ Note that this change may be revised if it leads to problems. :ticket:`4268` +.. _change_1103: + +Many-to-one backref checks for collection duplicates during remove operation +---------------------------------------------------------------------------- + +When an ORM-mapped collection that existed as a Python sequence, typically a +Python ``list`` as is the default for :func:`.relationship`, contained +duplicates, and the object were removed from one of its positions but not the +other(s), a many-to-one backref would set its attribute to ``None`` even +though the one-to-many side still represented the object as present. Even +though one-to-many collections cannot have duplicates in the relational model, +an ORM-mapped :func:`.relationship` that uses a sequence collection can have +duplicates inside of it in memory, with the restriction that this duplicate +state can neither be persisted nor retrieved from the database. In particular, +having a duplicate temporarily present in the list is intrinsic to a Python +"swap" operation. Given a standard one-to-many/many-to-one setup:: + + class A(Base): + __tablename__ = 'a' + + id = Column(Integer, primary_key=True) + bs = relationship("B", backref="a") + + + class B(Base): + __tablename__ = 'b' + id = Column(Integer, primary_key=True) + a_id = Column(ForeignKey("a.id")) + +If we have an ``A`` object with two ``B`` members, and perform a swap:: + + a1 = A(bs=[B(), B()]) + + a1.bs[0], a1.bs[1] = a1.bs[1], a1.bs[0] + +During the above operation, interception of the standard Python ``__setitem__`` +``__delitem__`` methods delivers an interim state where the second ``B()`` +object is present twice in the collection. When the ``B()`` object is removed +from one of the positions, the ``B.a`` backref would set the reference to +``None``, causing the link between the ``A`` and ``B`` object to be removed +during the flush. The same issue can be demonstrated using plain duplicates:: + + >>> a1 = A() + >>> b1 = B() + >>> a1.bs.append(b1) + >>> a1.bs.append(b1) # append the same b1 object twice + >>> del a1.bs[1] + >>> a1.bs # collection is unaffected so far... + [<__main__.B object at 0x7f047af5fb70>] + >>> b1.a # however b1.a is None + >>> + >>> session.add(a1) + >>> session.commit() # so upon flush + expire.... + >>> a1.bs # the value is gone + [] + +The fix ensures that when the backref fires off, which is before the collection +is mutated, the collection is checked for exactly one or zero instances of +the target item before unsetting the many-to-one side, using a linear search +which at the moment makes use of ``list.search`` and ``list.__contains__``. + +Originally it was thought that an event-based reference counting scheme would +need to be used within the collection internals so that all duplicate instances +could be tracked throughout the lifecycle of the collection, which would have +added a performance/memory/complexity impact to all collection operations, +including the very frequent operations of loading and appending. The approach +that is taken instead limits the additional expense to the less common +operations of collection removal and bulk replacement, and the observed +overhead of the linear scan is negligible; linear scans of relationship-bound +collections are already used within the unit of work as well as when a +collection is bulk replaced. + + +:ticket:`1103` Key Behavioral Changes - ORM ============================= diff --git a/doc/build/changelog/unreleased_13/1103.rst b/doc/build/changelog/unreleased_13/1103.rst new file mode 100644 index 0000000000..7b3577dab4 --- /dev/null +++ b/doc/build/changelog/unreleased_13/1103.rst @@ -0,0 +1,12 @@ +.. change:: + :tags: bug, orm + :tickets: 1103 + + Fixed long-standing issue where duplicate collection members would cause a + backref to delete the association between the member and its parent object + when one of the duplicates were removed, as occurs as a side effect of + swapping two objects in one statement. + + .. seealso:: + + :ref:`change_1103` diff --git a/lib/sqlalchemy/orm/attributes.py b/lib/sqlalchemy/orm/attributes.py index 5ba8be439a..b08c467413 100644 --- a/lib/sqlalchemy/orm/attributes.py +++ b/lib/sqlalchemy/orm/attributes.py @@ -1273,19 +1273,27 @@ def backref_listeners(attribute, key, uselist): if not child_impl.collection and not child_impl.dynamic: check_remove_token = child_impl._remove_token check_replace_token = child_impl._replace_token + check_for_dupes_on_remove = uselist and not parent_impl.dynamic else: check_remove_token = child_impl._remove_token check_replace_token = child_impl._bulk_replace_token \ if child_impl.collection else None + check_for_dupes_on_remove = False if initiator is not check_remove_token and \ initiator is not check_replace_token: - child_impl.pop( - child_state, - child_dict, - state.obj(), - initiator, - passive=PASSIVE_NO_FETCH) + + if not check_for_dupes_on_remove or \ + not util.has_dupes( + # when this event is called, the item is usually + # present in the list, except for a pop() operation. + state.dict[parent_impl.key], child): + child_impl.pop( + child_state, + child_dict, + state.obj(), + initiator, + passive=PASSIVE_NO_FETCH) if uselist: event.listen(attribute, "append", diff --git a/lib/sqlalchemy/util/__init__.py b/lib/sqlalchemy/util/__init__.py index 9229d07971..d8c28d6afc 100644 --- a/lib/sqlalchemy/util/__init__.py +++ b/lib/sqlalchemy/util/__init__.py @@ -21,7 +21,8 @@ from ._collections import KeyedTuple, ImmutableContainer, immutabledict, \ UniqueAppender, PopulateDict, EMPTY_SET, to_list, to_set, \ to_column_set, update_copy, flatten_iterator, has_intersection, \ LRUCache, ScopedRegistry, ThreadLocalRegistry, WeakSequence, \ - coerce_generator_arg, lightweight_named_tuple, collections_abc + coerce_generator_arg, lightweight_named_tuple, collections_abc, \ + has_dupes from .langhelpers import iterate_attributes, class_hierarchy, \ portable_instancemethod, unbound_method_to_callable, \ diff --git a/lib/sqlalchemy/util/_collections.py b/lib/sqlalchemy/util/_collections.py index 3152458bf8..43440134ab 100644 --- a/lib/sqlalchemy/util/_collections.py +++ b/lib/sqlalchemy/util/_collections.py @@ -1057,3 +1057,33 @@ def _iter_id(iterable): for item in iterable: yield id(item), item + + +def has_dupes(sequence, target): + """Given a sequence and search object, return True if there's more + than one, False if zero or one of them. + + + """ + # compare to .index version below, this version introduces less function + # overhead and is usually the same speed. At 15000 items (way bigger than + # a relationship-bound collection in memory usually is) it begins to + # fall behind the other version only by microseconds. + c = 0 + for item in sequence: + if item is target: + c += 1 + if c > 1: + return True + return False + +# .index version. the two __contains__ calls as well +# as .index() and isinstance() slow this down. +# def has_dupes(sequence, target): +# if target not in sequence: +# return False +# elif not isinstance(sequence, collections_abc.Sequence): +# return False +# +# idx = sequence.index(target) +# return target in sequence[idx + 1:] diff --git a/test/orm/test_backref_mutations.py b/test/orm/test_backref_mutations.py index f32365fd61..e50d3ba424 100644 --- a/test/orm/test_backref_mutations.py +++ b/test/orm/test_backref_mutations.py @@ -292,6 +292,91 @@ class O2MCollectionTest(_fixtures.FixtureTest): assert a1 not in u1.addresses + def test_tuple_assignment_w_reverse(self): + User, Address = self.classes.User, self.classes.Address + + u1 = User() + + a1 = Address(email_address="1") + a2 = Address(email_address="2") + a3 = Address(email_address="3") + u1.addresses.append(a1) + u1.addresses.append(a2) + u1.addresses.append(a3) + + u1.addresses[1], u1.addresses[2] = u1.addresses[2], u1.addresses[1] + + assert a3.user is u1 + + eq_( + u1.addresses, + [a1, a3, a2] + ) + + def test_straight_remove(self): + User, Address = self.classes.User, self.classes.Address + + u1 = User() + + a1 = Address(email_address="1") + a2 = Address(email_address="2") + a3 = Address(email_address="3") + u1.addresses.append(a1) + u1.addresses.append(a2) + u1.addresses.append(a3) + + del u1.addresses[2] + + assert a3.user is None + eq_( + u1.addresses, + [a1, a2] + ) + + def test_append_del(self): + User, Address = self.classes.User, self.classes.Address + + u1 = User() + + a1 = Address(email_address="1") + a2 = Address(email_address="2") + a3 = Address(email_address="3") + u1.addresses.append(a1) + u1.addresses.append(a2) + u1.addresses.append(a3) + + u1.addresses.append(a2) + del u1.addresses[1] + + assert a2.user is u1 + eq_( + u1.addresses, + [a1, a3, a2] + ) + + def test_bulk_replace(self): + User, Address = self.classes.User, self.classes.Address + + u1 = User() + + a1 = Address(email_address="1") + a2 = Address(email_address="2") + a3 = Address(email_address="3") + u1.addresses.append(a1) + u1.addresses.append(a2) + u1.addresses.append(a3) + u1.addresses.append(a3) + + assert a3.user is u1 + + u1.addresses = [a1, a2, a1] + + assert a3.user is None + eq_( + u1.addresses, + [a1, a2, a1] + ) + class O2OScalarBackrefMoveTest(_fixtures.FixtureTest): run_inserts = None @@ -810,3 +895,4 @@ class M2MStaleBackrefTest(_fixtures.FixtureTest): i1.keywords = [] k2.items.remove(i1) assert len(k2.items) == 0 + diff --git a/test/profiles.txt b/test/profiles.txt index 4aeadffe31..43d8dfb730 100644 --- a/test/profiles.txt +++ b/test/profiles.txt @@ -292,34 +292,34 @@ test.aaa_profiling.test_orm.AttributeOverheadTest.test_attribute_set 3.7_sqlite_ # TEST: test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_mssql_pyodbc_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_mssql_pyodbc_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_mysql_mysqldb_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_mysql_mysqldb_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_oracle_cx_oracle_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_oracle_cx_oracle_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_postgresql_psycopg2_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_postgresql_psycopg2_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_sqlite_pysqlite_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_sqlite_pysqlite_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_mssql_pyodbc_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_mssql_pyodbc_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_mysql_mysqldb_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_mysql_mysqldb_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_oracle_cx_oracle_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_oracle_cx_oracle_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_postgresql_psycopg2_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_postgresql_psycopg2_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_sqlite_pysqlite_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_sqlite_pysqlite_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_mysql_mysqldb_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_mysql_mysqldb_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_oracle_cx_oracle_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_oracle_cx_oracle_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_postgresql_psycopg2_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_postgresql_psycopg2_dbapiunicode_nocextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_sqlite_pysqlite_dbapiunicode_cextensions 5822 -test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_sqlite_pysqlite_dbapiunicode_nocextensions 5822 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_mssql_pyodbc_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_mssql_pyodbc_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_mysql_mysqldb_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_mysql_mysqldb_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_oracle_cx_oracle_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_oracle_cx_oracle_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_postgresql_psycopg2_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_postgresql_psycopg2_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_sqlite_pysqlite_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 2.7_sqlite_pysqlite_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_mssql_pyodbc_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_mssql_pyodbc_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_mysql_mysqldb_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_mysql_mysqldb_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_oracle_cx_oracle_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_oracle_cx_oracle_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_postgresql_psycopg2_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_postgresql_psycopg2_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_sqlite_pysqlite_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.6_sqlite_pysqlite_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_mysql_mysqldb_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_mysql_mysqldb_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_oracle_cx_oracle_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_oracle_cx_oracle_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_postgresql_psycopg2_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_postgresql_psycopg2_dbapiunicode_nocextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_sqlite_pysqlite_dbapiunicode_cextensions 6222 +test.aaa_profiling.test_orm.AttributeOverheadTest.test_collection_append_remove 3.7_sqlite_pysqlite_dbapiunicode_nocextensions 6222 # TEST: test.aaa_profiling.test_orm.BranchedOptionTest.test_generate_cache_key_bound_branching