- The SQL compiler now generates the mapping of expected columns

author Mike Bayer <mike_mp@zzzcomputing.com>

Sat, 7 Mar 2015 17:48:13 +0000 (12:48 -0500)

committer Mike Bayer <mike_mp@zzzcomputing.com>

Sat, 7 Mar 2015 22:18:31 +0000 (17:18 -0500)
author Mike Bayer <mike_mp@zzzcomputing.com>
Sat, 7 Mar 2015 17:48:13 +0000 (12:48 -0500)
committer Mike Bayer <mike_mp@zzzcomputing.com>
Sat, 7 Mar 2015 22:18:31 +0000 (17:18 -0500)
diff --git a/doc/build/changelog/changelog_10.rst b/doc/build/changelog/changelog_10.rst

index 694c780fbe1bb74a8213b0f215ba75e2e48c2597..fb75d4a8196ec9e5bc1e036f9ed21095511fb4b6 100644 (file)
--- a/doc/build/changelog/changelog_10.rst
+++ b/doc/build/changelog/changelog_10.rst
@@ -23,6 +23,23 @@
      series as well.  For changes that are specific to 1.0 with an emphasis
      on compatibility concerns, see :doc:`/changelog/migration_10`.
  
+    .. change::
+        :tags: feature, sql
+        :tickets: 918
+
+        The SQL compiler now generates the mapping of expected columns
+        such that they are matched to the received result set positionally,
+        rather than by name.  Originally, this was seen as a way to handle
+        cases where we had columns returned with difficult-to-predict names,
+        though in modern use that issue has been overcome by anonymous
+        labeling.   In this version, the approach basically reduces function
+        call count per-result by a few dozen calls, or more for larger
+        sets of result columns.  The approach still degrades into a modern
+        version of the old approach if any discrepancy in size exists between
+        the compiled set of columns versus what was received, so there's no
+        issue for partially or fully textual compilation scenarios where these
+        lists might not line up.
+
      .. change::
          :tags: feature, postgresql
          :pullreq: github:132
diff --git a/lib/sqlalchemy/dialects/oracle/base.py b/lib/sqlalchemy/dialects/oracle/base.py

index b482c9069d4f4f67cb935f115ffdd10447b340df..a5e071148a906174711eb5e479f0743dac4c6ef8 100644 (file)
--- a/lib/sqlalchemy/dialects/oracle/base.py
+++ b/lib/sqlalchemy/dialects/oracle/base.py
@@ -692,8 +692,9 @@ class OracleCompiler(compiler.SQLCompiler):
                  self.bindparam_string(self._truncate_bindparam(outparam)))
              columns.append(
                  self.process(col_expr, within_columns_clause=False))
-            self.result_map[outparam.key] = (
-                outparam.key,
+
+            self._add_to_result_map(
+                outparam.key, outparam.key,
                  (column, getattr(column, 'name', None),
                   getattr(column, 'key', None)),
                  column.type
diff --git a/lib/sqlalchemy/engine/default.py b/lib/sqlalchemy/engine/default.py

index 17d2e2531c67b404cdcc214d297b704451fe0350..9d2bbfb157a860e8d47dad3d8bae590c6b374e16 100644 (file)
--- a/lib/sqlalchemy/engine/default.py
+++ b/lib/sqlalchemy/engine/default.py
@@ -461,9 +461,9 @@ class DefaultExecutionContext(interfaces.ExecutionContext):
      is_crud = False
      isddl = False
      executemany = False
-    result_map = None
      compiled = None
      statement = None
+    _result_columns = None
      _is_implicit_returning = False
      _is_explicit_returning = False
  
@@ -525,7 +525,7 @@ class DefaultExecutionContext(interfaces.ExecutionContext):
          # compiled clauseelement.  process bind params, process table defaults,
          # track collections used by ResultProxy to target and process results
  
-        self.result_map = compiled.result_map
+        self._result_columns = compiled._result_columns
  
          self.unicode_statement = util.text_type(compiled)
          if not dialect.supports_unicode_statements:
@@ -663,6 +663,13 @@ class DefaultExecutionContext(interfaces.ExecutionContext):
          self.cursor = self.create_cursor()
          return self
  
+    @property
+    def result_map(self):
+        if self._result_columns:
+            return self.compiled.result_map
+        else:
+            return None
+
      @util.memoized_property
      def engine(self):
          return self.root_connection.engine
diff --git a/lib/sqlalchemy/engine/result.py b/lib/sqlalchemy/engine/result.py

index 3995942efe04606b1481b6dd06fc41f08e893733..6eca54a34dd42f2a43a997289b21e8451be92b4a 100644 (file)
--- a/lib/sqlalchemy/engine/result.py
+++ b/lib/sqlalchemy/engine/result.py
@@ -187,86 +187,142 @@ class ResultMetaData(object):
      context."""
  
      def __init__(self, parent, metadata):
-        self._processors = processors = []
-
-        # We do not strictly need to store the processor in the key mapping,
-        # though it is faster in the Python version (probably because of the
-        # saved attribute lookup self._processors)
-        self._keymap = keymap = {}
-        self.keys = []
          context = parent.context
          dialect = context.dialect
          typemap = dialect.dbapi_type_map
          translate_colname = context._translate_colname
-        self.case_sensitive = dialect.case_sensitive
-
-        # high precedence key values.
-        primary_keymap = {}
-
-        for i, rec in enumerate(metadata):
-            colname = rec[0]
-            coltype = rec[1]
-
-            if dialect.description_encoding:
-                colname = dialect._description_decoder(colname)
+        self.case_sensitive = case_sensitive = dialect.case_sensitive
  
+        if context._result_columns:
+            num_ctx_cols = len(context._result_columns)
+        else:
+            num_ctx_cols = None
+
+        if num_ctx_cols and \
+                context.compiled._ordered_columns and \
+                num_ctx_cols == len(metadata):
+            # case 1 - SQL expression statement, number of columns
+            # in result matches number of cols in compiled.  This is the
+            # vast majority case for SQL expression constructs.  In this
+            # case we don't bother trying to parse or match up to
+            # the colnames in the result description.
+            raw = [
+                (
+                    idx,
+                    key,
+                    name.lower() if not case_sensitive else name,
+                    context.get_result_processor(
+                        type_, key, metadata[idx][1]
+                    ),
+                    obj,
+                    None
+                ) for idx, (key, name, obj, type_)
+                in enumerate(context._result_columns)
+            ]
+            self.keys = [
+                elem[1] for elem in context._result_columns
+            ]
+        else:
+            # case 2 - raw string, or number of columns in result does
+            # not match number of cols in compiled.  The raw string case
+            # is very common.   The latter can happen
+            # when text() is used with only a partial typemap, or
+            # in the extremely unlikely cases where the compiled construct
+            # has a single element with multiple col expressions in it
+            # (e.g. has commas embedded) or there's some kind of statement
+            # that is adding extra columns.
+            # In all these cases we fall back to the "named" approach
+            # that SQLAlchemy has used up through 0.9.
+
+            raw = []
+            self.keys = []
+            untranslated = None
+            for idx, rec in enumerate(metadata):
+                colname = rec[0]
+                coltype = rec[1]
+
+                if dialect.description_encoding:
+                    colname = dialect._description_decoder(colname)
+
+                if translate_colname:
+                    colname, untranslated = translate_colname(colname)
+
+                if dialect.requires_name_normalize:
+                    colname = dialect.normalize_name(colname)
+
+                self.keys.append(colname)
+                if not case_sensitive:
+                    colname = colname.lower()
+
+                if num_ctx_cols:
+                    try:
+                        ctx_rec = context.result_map[colname]
+                    except KeyError:
+                        mapped_type = typemap.get(coltype, sqltypes.NULLTYPE)
+                        obj = None
+                    else:
+                        obj = ctx_rec[1]
+                        mapped_type = ctx_rec[2]
+                else:
+                    mapped_type = typemap.get(coltype, sqltypes.NULLTYPE)
+                    obj = None
+                processor = context.get_result_processor(
+                    mapped_type, colname, coltype)
+
+                raw.append(
+                    (idx, colname, colname, processor, obj, untranslated)
+                )
+
+        # keymap indexes by integer index...
+        self._keymap = dict([
+            (elem[0], (elem[3], elem[4], elem[0]))
+            for elem in raw
+        ])
+
+        # processors in key order for certain per-row
+        # views like __iter__ and slices
+        self._processors = [elem[3] for elem in raw]
+
+        if num_ctx_cols:
+            # keymap by primary string...
+            by_key = dict([
+                (elem[2], (elem[3], elem[4], elem[0]))
+                for elem in raw
+            ])
+
+            # if by-primary-string dictionary smaller (or bigger?!) than
+            # number of columns, assume we have dupes, rewrite
+            # dupe records with "None" for index which results in
+            # ambiguous column exception when accessed.
+            if len(by_key) != num_ctx_cols:
+                seen = set()
+                for idx in range(num_ctx_cols):
+                    key = raw[idx][1]
+                    if key in seen:
+                        by_key[key] = (None, by_key[key][1], None)
+                    seen.add(key)
+
+            # update keymap with secondary "object"-based keys
+            self._keymap.update([
+                (obj_elem, by_key[elem[2]])
+                for elem in raw if elem[4]
+                for obj_elem in elem[4]
+            ])
+
+            # update keymap with primary string names taking
+            # precedence
+            self._keymap.update(by_key)
+        else:
+            self._keymap.update([
+                (elem[2], (elem[3], elem[4], elem[0]))
+                for elem in raw
+            ])
+            # update keymap with "translated" names (sqlite-only thing)
              if translate_colname:
-                colname, untranslated = translate_colname(colname)
-
-            if dialect.requires_name_normalize:
-                colname = dialect.normalize_name(colname)
-
-            if context.result_map:
-                try:
-                    name, obj, type_ = context.result_map[
-                        colname if self.case_sensitive else colname.lower()]
-                except KeyError:
-                    name, obj, type_ = \
-                        colname, None, typemap.get(coltype, sqltypes.NULLTYPE)
-            else:
-                name, obj, type_ = \
-                    colname, None, typemap.get(coltype, sqltypes.NULLTYPE)
-
-            processor = context.get_result_processor(type_, colname, coltype)
-
-            processors.append(processor)
-            rec = (processor, obj, i)
-
-            # indexes as keys. This is only needed for the Python version of
-            # RowProxy (the C version uses a faster path for integer indexes).
-            primary_keymap[i] = rec
-
-            # populate primary keymap, looking for conflicts.
-            if primary_keymap.setdefault(
-                    name if self.case_sensitive
-                    else name.lower(),
-                    rec) is not rec:
-                # place a record that doesn't have the "index" - this
-                # is interpreted later as an AmbiguousColumnError,
-                # but only when actually accessed.   Columns
-                # colliding by name is not a problem if those names
-                # aren't used; integer access is always
-                # unambiguous.
-                primary_keymap[name
-                               if self.case_sensitive
-                               else name.lower()] = rec = (None, obj, None)
-
-            self.keys.append(colname)
-            if obj:
-                for o in obj:
-                    keymap[o] = rec
-                    # technically we should be doing this but we
-                    # are saving on callcounts by not doing so.
-                    # if keymap.setdefault(o, rec) is not rec:
-                    #    keymap[o] = (None, obj, None)
-
-            if translate_colname and \
-                    untranslated:
-                keymap[untranslated] = rec
-
-        # overwrite keymap values with those of the
-        # high precedence keymap.
-        keymap.update(primary_keymap)
+                self._keymap.update([
+                    (elem[5], self._keymap[elem[2]])
+                    for elem in raw if elem[5]
+                ])
  
      @util.pending_deprecation("0.8", "sqlite dialect uses "
                                "_translate_colname() now")
diff --git a/lib/sqlalchemy/sql/compiler.py b/lib/sqlalchemy/sql/compiler.py

index f8f4d1dda9172a79e80073c9286e23e21b5c3616..61b6d22d0f7e5bb0ff8da021366e2dea168725f7 100644 (file)
--- a/lib/sqlalchemy/sql/compiler.py
+++ b/lib/sqlalchemy/sql/compiler.py
@@ -361,7 +361,12 @@ class SQLCompiler(Compiled):
          # column/label name, ColumnElement object (if any) and
          # TypeEngine. ResultProxy uses this for type processing and
          # column targeting
-        self.result_map = {}
+        self._result_columns = []
+
+        # if False, means we can't be sure the list of entries
+        # in _result_columns is actually the rendered order.   This
+        # gets flipped when we use TextAsFrom, for example.
+        self._ordered_columns = True
  
          # true if the paramstyle is positional
          self.positional = dialect.positional
@@ -388,6 +393,22 @@ class SQLCompiler(Compiled):
          if self.positional and dialect.paramstyle == 'numeric':
              self._apply_numbered_params()
  
+    @property
+    def result_map(self):
+        d = {}
+        for elem in self._result_columns:
+            key, rec = elem[0], elem[1:]
+            if key in d:
+                # conflicting keyname, just double up the list
+                # of objects.  this will cause an "ambiguous name"
+                # error if an attempt is made by the result set to
+                # access.
+                e_name, e_obj, e_type = d[key]
+                d[key] = e_name, e_obj + rec[1], e_type
+            else:
+                d[key] = rec
+        return d
+
      @util.memoized_instancemethod
      def _init_cte_state(self):
          """Initialize collections related to CTEs only if
@@ -678,6 +699,7 @@ class SQLCompiler(Compiled):
          )
  
          if populate_result_map:
+            self._ordered_columns = False
              for c in taf.column_args:
                  self.process(c, within_columns_clause=True,
                               add_to_result_map=self._add_to_result_map)
@@ -1241,15 +1263,7 @@ class SQLCompiler(Compiled):
          if not self.dialect.case_sensitive:
              keyname = keyname.lower()
  
-        if keyname in self.result_map:
-            # conflicting keyname, just double up the list
-            # of objects.  this will cause an "ambiguous name"
-            # error if an attempt is made by the result set to
-            # access.
-            e_name, e_obj, e_type = self.result_map[keyname]
-            self.result_map[keyname] = e_name, e_obj + objects, e_type
-        else:
-            self.result_map[keyname] = name, objects, type_
+        self._result_columns.append((keyname, name, objects, type_))
  
      def _label_select_column(self, select, column,
                               populate_result_map,
@@ -1439,9 +1453,11 @@ class SQLCompiler(Compiled):
              (inner_col[c._key_label], c)
              for c in select.inner_columns
          )
-        for key, (name, objs, typ) in list(self.result_map.items()):
-            objs = tuple([d.get(col, col) for col in objs])
-            self.result_map[key] = (name, objs, typ)
+
+        self._result_columns = [
+            (key, name, tuple([d.get(col, col) for col in objs]), typ)
+            for key, name, objs, typ in self._result_columns
+        ]
  
      _default_stack_entry = util.immutabledict([
          ('iswrapper', False),
diff --git a/test/sql/test_defaults.py b/test/sql/test_defaults.py

index ecf3dcc4d24b1c95fffe940f41463169c696b844..c154daa22e9952ed828ed0cec6295a9c0fc5fa97 100644 (file)
--- a/test/sql/test_defaults.py
+++ b/test/sql/test_defaults.py
@@ -133,7 +133,7 @@ class DefaultTest(fixtures.TestBase):
                      [
                          func.trunc(
                              func.sysdate(), sa.literal_column("'DAY'"),
-                            type_=sa.Date).label('today')]))
+                            type_=sa.Date)]))
              assert isinstance(ts, datetime.date) and not isinstance(
                  ts, datetime.datetime)
              f = sa.select([func.length('abcdef')], bind=db).scalar()
diff --git a/test/sql/test_query.py b/test/sql/test_query.py

index 2f13486eb8294411cff875946febef8cf24e3491..eeec487bee1371daae37746ae09a0b3d9ead45cf 100644 (file)
--- a/test/sql/test_query.py
+++ b/test/sql/test_query.py
@@ -1175,6 +1175,48 @@ class QueryTest(fixtures.TestBase):
              row[1], 1
          )
  
+    def test_fetch_partial_result_map(self):
+        users.insert().execute(user_id=7, user_name='ed')
+
+        t = text("select * from query_users").columns(
+            user_name=String()
+        )
+        eq_(
+            testing.db.execute(t).fetchall(), [(7, 'ed')]
+        )
+
+    def test_fetch_unordered_result_map(self):
+        users.insert().execute(user_id=7, user_name='ed')
+
+        class Goofy1(TypeDecorator):
+            impl = String
+
+            def process_result_value(self, value, dialect):
+                return value + "a"
+
+        class Goofy2(TypeDecorator):
+            impl = String
+
+            def process_result_value(self, value, dialect):
+                return value + "b"
+
+        class Goofy3(TypeDecorator):
+            impl = String
+
+            def process_result_value(self, value, dialect):
+                return value + "c"
+
+        t = text(
+            "select user_name as a, user_name as b, "
+            "user_name as c from query_users").columns(
+            a=Goofy1(), b=Goofy2(), c=Goofy3()
+        )
+        eq_(
+            testing.db.execute(t).fetchall(), [
+                ('eda', 'edb', 'edc')
+            ]
+        )
+
      @testing.requires.subqueries
      def test_column_label_targeting(self):
          users.insert().execute(user_id=7, user_name='ed')
author	Mike Bayer <mike_mp@zzzcomputing.com>
	Sat, 7 Mar 2015 17:48:13 +0000 (12:48 -0500)
committer	Mike Bayer <mike_mp@zzzcomputing.com>
	Sat, 7 Mar 2015 22:18:31 +0000 (17:18 -0500)
doc/build/changelog/changelog_10.rst		patch \| blob \| blame \| history
lib/sqlalchemy/dialects/oracle/base.py		patch \| blob \| blame \| history
lib/sqlalchemy/engine/default.py		patch \| blob \| blame \| history
lib/sqlalchemy/engine/result.py		patch \| blob \| blame \| history
lib/sqlalchemy/sql/compiler.py		patch \| blob \| blame \| history
test/sql/test_defaults.py		patch \| blob \| blame \| history
test/sql/test_query.py		patch \| blob \| blame \| history