create second level deduping when use_labels is turned on

author Mike Bayer <mike_mp@zzzcomputing.com>

Sat, 5 Oct 2019 22:27:44 +0000 (18:27 -0400)

committer Mike Bayer <mike_mp@zzzcomputing.com>

Mon, 7 Oct 2019 15:26:10 +0000 (11:26 -0400)
author Mike Bayer <mike_mp@zzzcomputing.com>
Sat, 5 Oct 2019 22:27:44 +0000 (18:27 -0400)
committer Mike Bayer <mike_mp@zzzcomputing.com>
Mon, 7 Oct 2019 15:26:10 +0000 (11:26 -0400)
diff --git a/lib/sqlalchemy/sql/elements.py b/lib/sqlalchemy/sql/elements.py

index 8ee157b6f8baafb7c4aee38f3aff471bc86f3686..b6462b33472b923b630e976d6901fdb4e0f49197 100644 (file)
--- a/lib/sqlalchemy/sql/elements.py
+++ b/lib/sqlalchemy/sql/elements.py
@@ -878,7 +878,13 @@ class ColumnElement(
          while self._is_clone_of is not None:
              self = self._is_clone_of
  
-        return _anonymous_label("%%(%d %s)s" % (id(self), seed or "anon"))
+        # as of 1.4 anonymous label for ColumnElement uses hash(), not id(),
+        # as the identifier, because a column and its annotated version are
+        # the same thing in a SQL statement
+        if isinstance(seed, _anonymous_label):
+            return _anonymous_label("%s%%(%d %s)s" % (seed, hash(self), ""))
+
+        return _anonymous_label("%%(%d %s)s" % (hash(self), seed or "anon"))
  
      @util.memoized_property
      def anon_label(self):
@@ -900,6 +906,10 @@ class ColumnElement(
      def _label_anon_label(self):
          return self._anon_label(getattr(self, "_label", None))
  
+    @util.memoized_property
+    def _dedupe_label_anon_label(self):
+        return self._anon_label(getattr(self, "_label", "anon") + "_")
+
  
  class WrapsColumnExpression(object):
      """Mixin that defines a :class:`.ColumnElement` as a wrapper with special
diff --git a/lib/sqlalchemy/sql/selectable.py b/lib/sqlalchemy/sql/selectable.py

index 33ba95717c9db10a2a312b48b6198d9af36876bb..ddbcdf91d6363f1026ff716b567762c063312b82 100644 (file)
--- a/lib/sqlalchemy/sql/selectable.py
+++ b/lib/sqlalchemy/sql/selectable.py
@@ -4174,33 +4174,78 @@ class Select(
              (name_for_col(c), c) for c in cols
          ).as_immutable()
  
-    @_memoized_property
-    def _columns_plus_names(self):
+    def _generate_columns_plus_names(self, anon_for_dupe_key):
          cols = _select_iterables(self._raw_columns)
  
+        # when use_labels is on:
+        # in all cases == if we see the same label name, use _label_anon_label
+        # for subsequent occurences of that label
+        #
+        # anon_for_dupe_key == if we see the same column object multiple
+        # times under a particular name, whether it's the _label name or the
+        # anon label, apply _dedupe_label_anon_label to the subsequent
+        # occurrences of it.
+
          if self.use_labels:
-            names = set()
+            names = {}
  
              def name_for_col(c):
                  if c._label is None or not c._render_label_in_columns_clause:
                      return (None, c)
  
                  name = c._label
+
                  if name in names:
-                    name = c._label_anon_label
+                    # when looking to see if names[name] is the same column as
+                    # c, use hash(), so that an annotated version of the column
+                    # is seen as the same as the non-annotated
+                    if hash(names[name]) != hash(c):
+
+                        # different column under the same name.  apply
+                        # disambiguating label
+                        name = c._label_anon_label
+
+                        if anon_for_dupe_key and name in names:
+                            # here, c._label_anon_label is definitely unique to
+                            # that column identity (or annotated version), so
+                            # this should always be true.
+                            # this is also an infrequent codepath because
+                            # you need two levels of duplication to be here
+                            assert hash(names[name]) == hash(c)
+
+                            # the column under the disambiguating label is
+                            # already present.  apply the "dedupe" label to
+                            # subsequent occurrences of the column so that the
+                            # original stays non-ambiguous
+                            name = c._dedupe_label_anon_label
+                        else:
+                            names[name] = c
+                    elif anon_for_dupe_key:
+                        # same column under the same name. apply the "dedupe"
+                        # label so that the original stays non-ambiguous
+                        name = c._dedupe_label_anon_label
                  else:
-                    names.add(name)
+                    names[name] = c
                  return name, c
  
              return [name_for_col(c) for c in cols]
          else:
              return [(None, c) for c in cols]
  
+    @_memoized_property
+    def _columns_plus_names(self):
+        """generate label names plus columns to render in a SELECT."""
+
+        return self._generate_columns_plus_names(True)
+
      def _generate_fromclause_column_proxies(self, subquery):
+        """generate column proxies to place in the exported .c collection
+        of a subquery."""
+
          keys_seen = set()
          prox = []
  
-        for name, c in self._columns_plus_names:
+        for name, c in self._generate_columns_plus_names(False):
              if not hasattr(c, "_make_proxy"):
                  continue
              if name is None:
diff --git a/test/sql/test_compiler.py b/test/sql/test_compiler.py

index 36e9cd33bb3b46244bf21b78b647318963ecd755..ca73f6c18c1ada7d07d6715bb153f8eaa942f5c9 100644 (file)
--- a/test/sql/test_compiler.py
+++ b/test/sql/test_compiler.py
@@ -571,16 +571,185 @@ class SelectTest(fixtures.TestBase, AssertsCompiledSQL):
          s = s.compile(dialect=default.DefaultDialect(paramstyle="qmark"))
          eq_(s.positiontup, ["a", "b", "c"])
  
+    def test_overlapping_labels_use_labels(self):
+        foo = table("foo", column("id"), column("bar_id"))
+        foo_bar = table("foo_bar", column("id"))
+
+        stmt = select([foo, foo_bar]).apply_labels()
+        self.assert_compile(
+            stmt,
+            "SELECT foo.id AS foo_id, foo.bar_id AS foo_bar_id, "
+            "foo_bar.id AS foo_bar_id_1 "
+            "FROM foo, foo_bar",
+        )
+
+    def test_overlapping_labels_plus_dupes_use_labels(self):
+        foo = table("foo", column("id"), column("bar_id"))
+        foo_bar = table("foo_bar", column("id"))
+
+        # current approach is:
+        # 1. positional nature of columns is always maintained in all cases
+        # 2. two different columns that have the same label, second one
+        #    is disambiguated
+        # 3. if the same column is repeated, it gets deduped using a special
+        #    'dedupe' label that will show two underscores
+        # 4. The disambiguating label generated in #2 also has to be deduped.
+        # 5. The derived columns, e.g. subquery().c etc. do not export the
+        #    "dedupe" columns, at all.  they are unreachable (because they
+        #    are unreachable anyway in SQL unless you use "SELECT *")
+        #
+        # this is all new logic necessitated by #4753 since we allow columns
+        # to be repeated.   We would still like the targeting of this column,
+        # both in a result set as well as in a derived selectable, to be
+        # unambiguous (DBs like postgresql won't let us reference an ambiguous
+        # label in a derived selectable even if its the same column repeated).
+        #
+        # this kind of thing happens of course because the ORM is in some
+        # more exotic cases writing in joins where columns may be duped.
+        # it might be nice to fix it on that side also, however SQLAlchemy
+        # has deduped columns in SELECT statements for 13 years so having a
+        # robust behavior when dupes are present is still very useful.
+
+        stmt = select(
+            [
+                foo.c.id,
+                foo.c.bar_id,
+                foo_bar.c.id,
+                foo.c.bar_id,
+                foo.c.id,
+                foo.c.bar_id,
+                foo_bar.c.id,
+                foo_bar.c.id,
+            ]
+        ).apply_labels()
+        self.assert_compile(
+            stmt,
+            "SELECT foo.id AS foo_id, "
+            "foo.bar_id AS foo_bar_id, "  # 1. 1st foo.bar_id, as is
+            "foo_bar.id AS foo_bar_id_1, "  # 2. 1st foo_bar.id, disamb from 1
+            "foo.bar_id AS foo_bar_id__1, "  # 3. 2nd foo.bar_id, dedupe from 1
+            "foo.id AS foo_id__1, "
+            "foo.bar_id AS foo_bar_id__1, "  # 4. 3rd foo.bar_id, same as 3
+            "foo_bar.id AS foo_bar_id__2, "  # 5. 2nd foo_bar.id
+            "foo_bar.id AS foo_bar_id__2 "  # 6. 3rd foo_bar.id, same as 5
+            "FROM foo, foo_bar",
+        )
+
+        # for the subquery, the labels created for repeated occurrences
+        # of the same column are not used.  only the label applied to the
+        # first occurrence of each column is used
+        self.assert_compile(
+            select([stmt.subquery()]),
+            "SELECT "
+            "anon_1.foo_id, "  # from 1st foo.id in derived (line 1)
+            "anon_1.foo_bar_id, "  # from 1st foo.bar_id in derived (line 2)
+            "anon_1.foo_bar_id_1, "  # from 1st foo_bar.id in derived (line 3)
+            "anon_1.foo_bar_id, "  # from 1st foo.bar_id in derived (line 2)
+            "anon_1.foo_id, "  # from 1st foo.id in derived (line 1)
+            "anon_1.foo_bar_id, "  # from 1st foo.bar_id in derived (line 2)
+            "anon_1.foo_bar_id_1, "  # from 1st foo_bar.id in derived (line 3)
+            "anon_1.foo_bar_id_1 "  # from 1st foo_bar.id in derived (line 3)
+            "FROM ("
+            "SELECT foo.id AS foo_id, "
+            "foo.bar_id AS foo_bar_id, "  # 1. 1st foo.bar_id, as is
+            "foo_bar.id AS foo_bar_id_1, "  # 2. 1st foo_bar.id, disamb from 1
+            "foo.bar_id AS foo_bar_id__1, "  # 3. 2nd foo.bar_id, dedupe from 1
+            "foo.id AS foo_id__1, "
+            "foo.bar_id AS foo_bar_id__1, "  # 4. 3rd foo.bar_id, same as 3
+            "foo_bar.id AS foo_bar_id__2, "  # 5. 2nd foo_bar.id
+            "foo_bar.id AS foo_bar_id__2 "  # 6. 3rd foo_bar.id, same as 5
+            "FROM foo, foo_bar"
+            ") AS anon_1",
+        )
+
      def test_dupe_columns_use_labels(self):
-        """as of 1.4, there's no deduping.
+        t = table("t", column("a"), column("b"))
+        self.assert_compile(
+            select([t.c.a, t.c.a, t.c.b, t.c.a]).apply_labels(),
+            "SELECT t.a AS t_a, t.a AS t_a__1, t.b AS t_b, "
+            "t.a AS t_a__1 FROM t",
+        )
  
-        however the labels will still uniqify themselves...
-        """
+    def test_dupe_columns_use_labels_derived_selectable(self):
+        t = table("t", column("a"), column("b"))
+        stmt = select([t.c.a, t.c.a, t.c.b, t.c.a]).apply_labels().subquery()
+
+        self.assert_compile(
+            select([stmt]),
+            "SELECT anon_1.t_a, anon_1.t_a, anon_1.t_b, anon_1.t_a FROM "
+            "(SELECT t.a AS t_a, t.a AS t_a__1, t.b AS t_b, t.a AS t_a__1 "
+            "FROM t) AS anon_1",
+        )
  
+    def test_dupe_columns_use_labels_mix_annotations(self):
          t = table("t", column("a"), column("b"))
+        a, b, a_a = t.c.a, t.c.b, t.c.a._annotate({"some_orm_thing": True})
+
+        self.assert_compile(
+            select([a, a_a, b, a_a]).apply_labels(),
+            "SELECT t.a AS t_a, t.a AS t_a__1, t.b AS t_b, "
+            "t.a AS t_a__1 FROM t",
+        )
+
+        self.assert_compile(
+            select([a_a, a, b, a_a]).apply_labels(),
+            "SELECT t.a AS t_a, t.a AS t_a__1, t.b AS t_b, "
+            "t.a AS t_a__1 FROM t",
+        )
+
+        self.assert_compile(
+            select([a_a, a_a, b, a]).apply_labels(),
+            "SELECT t.a AS t_a, t.a AS t_a__1, t.b AS t_b, "
+            "t.a AS t_a__1 FROM t",
+        )
+
+    def test_dupe_columns_use_labels_derived_selectable_mix_annotations(self):
+        t = table("t", column("a"), column("b"))
+        a, b, a_a = t.c.a, t.c.b, t.c.a._annotate({"some_orm_thing": True})
+        stmt = select([a, a_a, b, a_a]).apply_labels().subquery()
+
+        self.assert_compile(
+            select([stmt]),
+            "SELECT anon_1.t_a, anon_1.t_a, anon_1.t_b, anon_1.t_a FROM "
+            "(SELECT t.a AS t_a, t.a AS t_a__1, t.b AS t_b, t.a AS t_a__1 "
+            "FROM t) AS anon_1",
+        )
+
+    def test_overlapping_labels_plus_dupes_use_labels_mix_annotations(self):
+        foo = table("foo", column("id"), column("bar_id"))
+        foo_bar = table("foo_bar", column("id"))
+
+        foo_bar__id = foo_bar.c.id._annotate({"some_orm_thing": True})
+
+        stmt = select(
+            [
+                foo.c.bar_id,
+                foo_bar.c.id,
+                foo_bar.c.id,
+                foo_bar__id,
+                foo_bar__id,
+            ]
+        ).apply_labels()
+
+        self.assert_compile(
+            stmt,
+            "SELECT foo.bar_id AS foo_bar_id, foo_bar.id AS foo_bar_id_1, "
+            "foo_bar.id AS foo_bar_id__1, foo_bar.id AS foo_bar_id__1, "
+            "foo_bar.id AS foo_bar_id__1 FROM foo, foo_bar",
+        )
+
+    def test_dupe_columns_use_labels_from_anon(self):
+
+        t = table("t", column("a"), column("b"))
+        a = t.alias()
+
+        # second and third occurrences of a.c.a are labeled, but are
+        # dupes of each other.
          self.assert_compile(
-            select([t.c.a, t.c.a, t.c.b]).apply_labels(),
-            "SELECT t.a AS t_a, t.a AS t_a_1, t.b AS t_b FROM t",
+            select([a.c.a, a.c.a, a.c.b, a.c.a]).apply_labels(),
+            "SELECT t_1.a AS t_1_a, t_1.a AS t_1_a__1, t_1.b AS t_1_b, "
+            "t_1.a AS t_1_a__1 "
+            "FROM t AS t_1",
          )
  
      def test_nested_label_targeting(self):
diff --git a/test/sql/test_join_rewriting.py b/test/sql/test_join_rewriting.py

index 573455e7db148eae134083d81d150d9a51ac34f1..e713f5d73b6f65e145fed3e93985785732964d44 100644 (file)
--- a/test/sql/test_join_rewriting.py
+++ b/test/sql/test_join_rewriting.py
@@ -362,7 +362,7 @@ class JoinRewriteTest(_JoinRewriteTestBase, fixtures.TestBase):
      _a_bc_wdupes = (
          "SELECT a.id AS a_id, anon_1.b_id AS b_id, anon_1.b_a_id AS b_a_id, "
          "anon_1.c_id AS c_id, anon_1.c_b_id AS c_b_id, "
-        "anon_1.b_a_id AS b_a_id_1, anon_1.c_b_id AS c_b_id_1 "
+        "anon_1.b_a_id AS b_a_id__1, anon_1.c_b_id AS c_b_id__1 "
          "FROM a JOIN "
          "(SELECT b.id AS b_id, b.a_id AS b_a_id, c.id AS c_id, "
          "c.b_id AS c_b_id "
@@ -373,19 +373,10 @@ class JoinRewriteTest(_JoinRewriteTestBase, fixtures.TestBase):
  
      _a_bc_wdupes_anon_map = (
          "SELECT 1 FROM (SELECT a.id AS a_id, b.id AS b_id, b.a_id AS b_a_id, "
-        "c.id AS c_id, c.b_id AS c_b_id, b.a_id AS b_a_id_1, "
-        "c.b_id AS c_b_id_1 "
-        "FROM a JOIN (b JOIN c ON b.id = c.b_id) ON a.id = b.a_id "
-        "WHERE b.id = :id_1 AND c.id = :id_2) AS anon_1 "
-        "WHERE anon_1.b_a_id_1 = anon_1.c_b_id_1"
-    )
-
-    _a_bc_wdupes_anon_map = (
-        "SELECT 1 FROM (SELECT a.id AS a_id, b.id AS b_id, b.a_id AS b_a_id, "
-        "c.id AS c_id, c.b_id AS c_b_id, b.a_id AS b_a_id_1, "
-        "c.b_id AS c_b_id_1 FROM a JOIN (b JOIN c ON b.id = c.b_id) "
+        "c.id AS c_id, c.b_id AS c_b_id, b.a_id AS b_a_id__1, "
+        "c.b_id AS c_b_id__1 FROM a JOIN (b JOIN c ON b.id = c.b_id) "
          "ON a.id = b.a_id WHERE b.id = :id_1 AND c.id = :id_2) AS anon_1 "
-        "WHERE anon_1.b_a_id_1 = anon_1.c_b_id_1"
+        "WHERE anon_1.b_a_id = anon_1.c_b_id"
      )
  
      _a_bc_comma_a1_selbc = (
@@ -477,7 +468,7 @@ class JoinRewriteTest(_JoinRewriteTestBase, fixtures.TestBase):
      _b_a_id_double_overlap_annotated = (
          "SELECT anon_1.b_id AS anon_1_b_id, anon_1.b_a_id AS anon_1_b_a_id, "
          "anon_1.b_a_id_1 AS anon_1_b_a_id_1 "
-        "FROM (SELECT b.id AS b_id, b.a_id AS b_a_id, b_a.id AS b_a_id_2 "
+        "FROM (SELECT b.id AS b_id, b.a_id AS b_a_id, b_a.id AS b_a_id_1 "
          "FROM b JOIN b_a ON b.id = b_a.id) AS anon_1"
      )
  
@@ -534,7 +525,7 @@ class JoinPlainTest(_JoinRewriteTestBase, fixtures.TestBase):
  
      _a_bc_wdupes = (
          "SELECT a.id AS a_id, b.id AS b_id, b.a_id AS b_a_id, c.id AS c_id, "
-        "c.b_id AS c_b_id, b.a_id AS b_a_id_1, c.b_id AS c_b_id_1 "
+        "c.b_id AS c_b_id, b.a_id AS b_a_id__1, c.b_id AS c_b_id__1 "
          "FROM a JOIN "
          "(b JOIN c ON b.id = c.b_id) "
          "ON a.id = b.a_id "
@@ -544,11 +535,11 @@ class JoinPlainTest(_JoinRewriteTestBase, fixtures.TestBase):
  
      _a_bc_wdupes_anon_map = (
          "SELECT 1 FROM (SELECT a.id AS a_id, b.id AS b_id, b.a_id AS b_a_id, "
-        "c.id AS c_id, c.b_id AS c_b_id, b.a_id AS b_a_id_1, "
-        "c.b_id AS c_b_id_1 "
+        "c.id AS c_id, c.b_id AS c_b_id, b.a_id AS b_a_id__1, "
+        "c.b_id AS c_b_id__1 "
          "FROM a JOIN (b JOIN c ON b.id = c.b_id) ON a.id = b.a_id "
          "WHERE b.id = :id_1 AND c.id = :id_2) AS anon_1 "
-        "WHERE anon_1.b_a_id_1 = anon_1.c_b_id_1"
+        "WHERE anon_1.b_a_id = anon_1.c_b_id"
      )
  
      _a_bc_comma_a1_selbc = (
@@ -680,11 +671,11 @@ class JoinNoUseLabelsTest(_JoinRewriteTestBase, fixtures.TestBase):
  
      _a_bc_wdupes_anon_map = (
          "SELECT 1 FROM (SELECT a.id AS a_id, b.id AS b_id, b.a_id AS b_a_id, "
-        "c.id AS c_id, c.b_id AS c_b_id, b.a_id AS b_a_id_1, "
-        "c.b_id AS c_b_id_1 "
+        "c.id AS c_id, c.b_id AS c_b_id, b.a_id AS b_a_id__1, "
+        "c.b_id AS c_b_id__1 "
          "FROM a JOIN (b JOIN c ON b.id = c.b_id) ON a.id = b.a_id "
          "WHERE b.id = :id_1 AND c.id = :id_2) AS anon_1 "
-        "WHERE anon_1.b_a_id_1 = anon_1.c_b_id_1"
+        "WHERE anon_1.b_a_id = anon_1.c_b_id"
      )
  
      _a_bc_comma_a1_selbc = (
diff --git a/test/sql/test_labels.py b/test/sql/test_labels.py

index 638d4ee7f11177da5ea71ecb6b0a8e72b9590c12..e953e71a1a760ca417bf750814f6495cb1772181 100644 (file)
--- a/test/sql/test_labels.py
+++ b/test/sql/test_labels.py
@@ -924,9 +924,9 @@ class ColExprLabelTest(fixtures.TestBase, AssertsCompiledSQL):
                  ]
              ).apply_labels(),
              "SELECT some_table.name AS some_table_name, "
-            "some_table.name AS some_table_name_1, "
-            "SOME_COL_THING(some_table.name) AS some_table_name_2, "
-            "SOME_COL_THING(some_table.name) AS some_table_name_3 "
+            "some_table.name AS some_table_name__1, "
+            "SOME_COL_THING(some_table.name) AS some_table_name_1, "
+            "SOME_COL_THING(some_table.name) AS some_table_name_2 "
              "FROM some_table",
          )
  
diff --git a/test/sql/test_selectable.py b/test/sql/test_selectable.py

index 76cece5e4026512767fc2b681f710ba5d9935bd4..00b9b68c7ea490f4343196332e572c5bb9dd9531 100644 (file)
--- a/test/sql/test_selectable.py
+++ b/test/sql/test_selectable.py
@@ -2735,6 +2735,7 @@ class WithLabelsTest(fixtures.TestBase):
          eq_(
              list(sel.subquery().c.keys()),
              ["t_x_id", t2.c.id._label_anon_label],
+            # ["t_x_id", "t_x_id"]  # if we turn off deduping entirely,
          )
          self._assert_result_keys(sel, ["t_x_id", "t_x_id_1"])
          self._assert_subq_result_keys(sel, ["t_x_id", "t_x_id_1"])
@@ -2803,7 +2804,13 @@ class WithLabelsTest(fixtures.TestBase):
              list(sel.selected_columns.keys()),
              ["t_x_a", t2.c.a._label_anon_label],
          )
+
+        # deduping for different cols but same label
          eq_(list(sel.subquery().c.keys()), ["t_x_a", t2.c.a._label_anon_label])
+
+        # if we turn off deduping entirely
+        # eq_(list(sel.subquery().c.keys()), ["t_x_a", "t_x_a"])
+
          self._assert_result_keys(sel, ["t_x_id", "t_x_id_1"])
          self._assert_subq_result_keys(sel, ["t_x_id", "t_x_id_1"])
author	Mike Bayer <mike_mp@zzzcomputing.com>
	Sat, 5 Oct 2019 22:27:44 +0000 (18:27 -0400)
committer	Mike Bayer <mike_mp@zzzcomputing.com>
	Mon, 7 Oct 2019 15:26:10 +0000 (11:26 -0400)
lib/sqlalchemy/sql/elements.py		patch \| blob \| blame \| history
lib/sqlalchemy/sql/selectable.py		patch \| blob \| blame \| history
test/sql/test_compiler.py		patch \| blob \| blame \| history
test/sql/test_join_rewriting.py		patch \| blob \| blame \| history
test/sql/test_labels.py		patch \| blob \| blame \| history
test/sql/test_selectable.py		patch \| blob \| blame \| history