]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
- Added new option to :func:`.relationship` ``distinct_target_key``.
authorMike Bayer <mike_mp@zzzcomputing.com>
Sun, 13 Oct 2013 20:54:21 +0000 (16:54 -0400)
committerMike Bayer <mike_mp@zzzcomputing.com>
Sun, 13 Oct 2013 20:54:21 +0000 (16:54 -0400)
This enables the subquery eager loader strategy to apply a DISTINCT
to the innermost SELECT subquery, to assist in the case where
duplicate rows are generated by the innermost query which corresponds
to this relationship (there's not yet a general solution to the issue
of dupe rows within subquery eager loading, however, when joins outside
of the innermost subquery produce dupes).  When the flag
is set to ``True``, the DISTINCT is rendered unconditionally, and when
it is set to ``None``, DISTINCT is rendered if the innermost relationship
targets columns that do not comprise a full primary key.
The option defaults to False in 0.8 (e.g. off by default in all cases),
None in 0.9 (e.g. automatic by default).   Thanks to Alexander Koval
for help with this. [ticket:2836]

Conflicts:
lib/sqlalchemy/orm/relationships.py

doc/build/changelog/changelog_08.rst
lib/sqlalchemy/orm/__init__.py
lib/sqlalchemy/orm/properties.py
lib/sqlalchemy/orm/strategies.py
test/orm/test_subquery_relations.py

index a126bcbeb2c777ad9ace2fad36db864e8a6023bd..f147fcfeda12f088e0617b1ccf68bb6926acf27a 100644 (file)
 .. changelog::
     :version: 0.8.3
 
+    .. change::
+        :tags: feature, orm
+        :tickets: 2836
+        :versions: 0.9.0
+
+        Added new option to :func:`.relationship` ``distinct_target_key``.
+        This enables the subquery eager loader strategy to apply a DISTINCT
+        to the innermost SELECT subquery, to assist in the case where
+        duplicate rows are generated by the innermost query which corresponds
+        to this relationship (there's not yet a general solution to the issue
+        of dupe rows within subquery eager loading, however, when joins outside
+        of the innermost subquery produce dupes).  When the flag
+        is set to ``True``, the DISTINCT is rendered unconditionally, and when
+        it is set to ``None``, DISTINCT is rendered if the innermost relationship
+        targets columns that do not comprise a full primary key.
+        The option defaults to False in 0.8 (e.g. off by default in all cases),
+        None in 0.9 (e.g. automatic by default).   Thanks to Alexander Koval
+        for help with this.
+
     .. change::
         :tags: bug, mysql
         :tickets: 2515
index 0984b3b37498f1478cef220518fb2e2d66e5d024..5183f4b7ab2e1ea32d1e6a93ffccaeae21eac46f 100644 (file)
@@ -431,6 +431,23 @@ def relationship(argument, secondary=None, **kwargs):
       or when the reference is one-to-one or a collection that is guaranteed
       to have one or at least one entry.
 
+    :param distinct_target_key=False:
+      Indicate if a "subquery" eager load should apply the DISTINCT
+      keyword to the innermost SELECT statement.  When set to ``None``,
+      the DISTINCT keyword will be applied in those cases when the target
+      columns do not comprise the full primary key of the target table.
+      When set to ``True``, the DISTINCT keyword is applied to the innermost
+      SELECT unconditionally.
+
+      This flag defaults as False in 0.8 but will default to None in 0.9.
+      It may be desirable to set this flag to False when the DISTINCT is
+      reducing performance of the innermost subquery beyond that of what
+      duplicate innermost rows may be causing.
+
+      .. versionadded:: 0.8.3 - distinct_target_key allows the
+         subquery eager loader to apply a DISTINCT modifier to the
+         innermost SELECT.
+
     :param join_depth:
       when non-``None``, an integer value indicating how many levels
       deep "eager" loaders should join on a self-referring or cyclical
index 9f8721de90dcef87ba80b5c9d57252b33ec558ef..f832091c0e4bda4822e7435f35aba605a072e96c 100644 (file)
@@ -256,6 +256,7 @@ class RelationshipProperty(StrategizedProperty):
         enable_typechecks=True, join_depth=None,
         comparator_factory=None,
         single_parent=False, innerjoin=False,
+        distinct_target_key=False,
         doc=None,
         active_history=False,
         cascade_backrefs=True,
@@ -283,6 +284,7 @@ class RelationshipProperty(StrategizedProperty):
         self.enable_typechecks = enable_typechecks
         self.query_class = query_class
         self.innerjoin = innerjoin
+        self.distinct_target_key = distinct_target_key
         self.doc = doc
         self.active_history = active_history
         self.join_depth = join_depth
index 7535a8c8810c4e7dc1d3fd54de9b04440c01fb73..b8ab55da4f45d79c5a5913030ea4cb8f103511e6 100644 (file)
@@ -709,7 +709,7 @@ class SubqueryLoader(AbstractRelationshipLoader):
             elif subq_path.contains_mapper(self.mapper):
                 return
 
-        subq_mapper, leftmost_mapper, leftmost_attr = \
+        subq_mapper, leftmost_mapper, leftmost_attr, leftmost_relationship = \
                 self._get_leftmost(subq_path)
 
         orig_query = context.attributes.get(
@@ -720,7 +720,8 @@ class SubqueryLoader(AbstractRelationshipLoader):
         # produce a subquery from it.
         left_alias = self._generate_from_original_query(
                             orig_query, leftmost_mapper,
-                            leftmost_attr, entity.mapper
+                            leftmost_attr, leftmost_relationship,
+                            entity.mapper
         )
 
         # generate another Query that will join the
@@ -769,11 +770,12 @@ class SubqueryLoader(AbstractRelationshipLoader):
             leftmost_mapper._columntoproperty[c].class_attribute
             for c in leftmost_cols
         ]
-        return subq_mapper, leftmost_mapper, leftmost_attr
+        return subq_mapper, leftmost_mapper, leftmost_attr, leftmost_prop
 
     def _generate_from_original_query(self,
             orig_query, leftmost_mapper,
-            leftmost_attr, entity_mapper
+            leftmost_attr, leftmost_relationship,
+            entity_mapper
     ):
         # reformat the original query
         # to look only for significant columns
@@ -784,8 +786,22 @@ class SubqueryLoader(AbstractRelationshipLoader):
         if not q._from_obj and entity_mapper.isa(leftmost_mapper):
             q._set_select_from([entity_mapper], False)
 
+        target_cols = q._adapt_col_list(leftmost_attr)
+
         # select from the identity columns of the outer
-        q._set_entities(q._adapt_col_list(leftmost_attr))
+        q._set_entities(target_cols)
+
+        distinct_target_key = leftmost_relationship.distinct_target_key
+
+        if distinct_target_key is True:
+            q._distinct = True
+        elif distinct_target_key is None:
+            # if target_cols refer to a non-primary key or only
+            # part of a composite primary key, set the q as distinct
+            for t in set(c.table for c in target_cols):
+                if not set(target_cols).issuperset(t.primary_key):
+                    q._distinct = True
+                    break
 
         if q._order_by is False:
             q._order_by = leftmost_mapper.order_by
index 3ee94cae9bc1df827841fb81856f9a56f5679ec1..fca7b3a80fa5b2f3d3132d7ecb78775ed48731b9 100644 (file)
@@ -1564,3 +1564,203 @@ class CyclicalInheritingEagerTestTwo(fixtures.DeclarativeMappedTest,
         d = session.query(Director).options(subqueryload('*')).first()
         assert len(list(session)) == 3
 
+
+class SubqueryloadDistinctTest(fixtures.DeclarativeMappedTest,
+                               testing.AssertsCompiledSQL):
+    __dialect__ = 'default'
+
+    run_inserts = 'once'
+    run_deletes = None
+
+    @classmethod
+    def setup_classes(cls):
+        Base = cls.DeclarativeBasic
+
+        class Director(Base):
+            __tablename__ = 'director'
+            id = Column(Integer, primary_key=True,
+                                    test_needs_autoincrement=True)
+            name = Column(String(50))
+
+        class DirectorPhoto(Base):
+            __tablename__ = 'director_photo'
+            id = Column(Integer, primary_key=True,
+                                    test_needs_autoincrement=True)
+            path = Column(String(255))
+            director_id = Column(Integer, ForeignKey('director.id'))
+            director = relationship(Director, backref="photos")
+
+        class Movie(Base):
+            __tablename__ = 'movie'
+            id = Column(Integer, primary_key=True, test_needs_autoincrement=True)
+            director_id = Column(Integer, ForeignKey('director.id'))
+            director = relationship(Director, backref="movies")
+            title = Column(String(50))
+            credits = relationship("Credit", backref="movie")
+
+        class Credit(Base):
+            __tablename__ = 'credit'
+            id = Column(Integer, primary_key=True, test_needs_autoincrement=True)
+            movie_id = Column(Integer, ForeignKey('movie.id'))
+
+    @classmethod
+    def insert_data(cls):
+        Movie = cls.classes.Movie
+        Director = cls.classes.Director
+        DirectorPhoto = cls.classes.DirectorPhoto
+        Credit = cls.classes.Credit
+
+        d = Director(name='Woody Allen')
+        d.photos = [DirectorPhoto(path='/1.jpg'),
+                    DirectorPhoto(path='/2.jpg')]
+        d.movies = [Movie(title='Manhattan', credits=[Credit(), Credit()]),
+                    Movie(title='Sweet and Lowdown', credits=[Credit()])]
+        sess = create_session()
+        sess.add_all([d])
+        sess.flush()
+
+    def test_distinct_strategy_opt_m2o(self):
+        self._run_test_m2o(True, None)
+        self._run_test_m2o(False, None)
+
+    def test_distinct_unrelated_opt_m2o(self):
+        self._run_test_m2o(None, True)
+        self._run_test_m2o(None, False)
+
+    def _run_test_m2o(self,
+            director_strategy_level,
+            photo_strategy_level):
+
+        # test where the innermost is m2o, e.g.
+        # Movie->director
+
+        Movie = self.classes.Movie
+        Director = self.classes.Director
+
+        Movie.director.property.distinct_target_key = director_strategy_level
+        Director.photos.property.distinct_target_key = photo_strategy_level
+
+        # the DISTINCT is controlled by
+        # only the Movie->director relationship, *not* the
+        # Director.photos
+        expect_distinct = director_strategy_level in (True, None)
+
+        s = create_session()
+
+        q = (
+            s.query(Movie)
+            .options(
+                subqueryload(Movie.director),
+                subqueryload(Movie.director, Director.photos)
+            )
+        )
+        ctx = q._compile_context()
+
+        q2 = ctx.attributes[
+            ('subquery', (inspect(Movie), inspect(Movie).attrs.director))
+        ]
+        self.assert_compile(
+            q2,
+            'SELECT director.id AS director_id, '
+            'director.name AS director_name, '
+            'anon_1.movie_director_id AS anon_1_movie_director_id '
+            'FROM (SELECT%s movie.director_id AS movie_director_id '
+            'FROM movie) AS anon_1 '
+            'JOIN director ON director.id = anon_1.movie_director_id '
+            'ORDER BY anon_1.movie_director_id' % (
+                    " DISTINCT" if expect_distinct else "")
+        )
+
+        ctx2 = q2._compile_context()
+        result = s.execute(q2)
+        rows = result.fetchall()
+
+        if expect_distinct:
+            eq_(rows, [
+                (1, 'Woody Allen', 1),
+            ])
+        else:
+            eq_(rows, [
+                (1, 'Woody Allen', 1), (1, 'Woody Allen', 1),
+            ])
+
+        q3 = ctx2.attributes[
+            ('subquery', (inspect(Director), inspect(Director).attrs.photos))
+        ]
+
+        self.assert_compile(
+            q3,
+            'SELECT director_photo.id AS director_photo_id, '
+            'director_photo.path AS director_photo_path, '
+            'director_photo.director_id AS director_photo_director_id, '
+            'director_1.id AS director_1_id '
+            'FROM (SELECT%s movie.director_id AS movie_director_id '
+            'FROM movie) AS anon_1 '
+            'JOIN director AS director_1 ON director_1.id = anon_1.movie_director_id '
+            'JOIN director_photo ON director_1.id = director_photo.director_id '
+            'ORDER BY director_1.id' % (
+                    " DISTINCT" if expect_distinct else "")
+        )
+        result = s.execute(q3)
+        rows = result.fetchall()
+        if expect_distinct:
+            eq_(rows, [
+                (1, u'/1.jpg', 1, 1),
+                (2, u'/2.jpg', 1, 1),
+            ])
+        else:
+            eq_(rows, [
+                (1, u'/1.jpg', 1, 1),
+                (2, u'/2.jpg', 1, 1),
+                (1, u'/1.jpg', 1, 1),
+                (2, u'/2.jpg', 1, 1),
+            ])
+
+
+        movies = q.all()
+
+        # check number of persistent objects in session
+        eq_(len(list(s)), 5)
+
+    def test_cant_do_distinct_in_joins(self):
+        """the DISTINCT feature here works when the m2o is in the innermost
+        mapper, but when we are just joining along relationships outside
+        of that, we can still have dupes, and there's no solution to that.
+
+        """
+        Movie = self.classes.Movie
+        Credit = self.classes.Credit
+
+        Credit.movie.property.distinct_target_key = False
+        Movie.director.property.distinct_target_key = False
+
+        s = create_session()
+
+        q = (
+            s.query(Credit)
+            .options(
+                subqueryload(Credit.movie),
+                subqueryload(Credit.movie, Movie.director)
+            )
+        )
+
+        ctx = q._compile_context()
+
+        q2 = ctx.attributes[
+            ('subquery', (inspect(Credit), Credit.movie.property))
+        ]
+        ctx2 = q2._compile_context()
+        q3 = ctx2.attributes[
+            ('subquery', (inspect(Movie), Movie.director.property))
+        ]
+
+        # three rows due to dupe at Credit.movie level
+        # as well as Movie.director level
+        result = s.execute(q3)
+        eq_(
+            result.fetchall(),
+            [
+                (1, 'Woody Allen', 1), (1, 'Woody Allen', 1),
+                    (1, 'Woody Allen', 1)
+            ]
+        )