From: Paul Becotte <pjbecotte@gmail.com>
Date: Fri, 24 Jan 2020 00:36:33 +0000 (-0500)
Subject: Update the type comparison code used for schema autogeneration. Compare
X-Git-Tag: rel_1_4_0~3
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3ddf82e1e2b4fff47edbd45dd493e6bbd5880496;p=thirdparty%2Fsqlalchemy%2Falembic.git

Update the type comparison code used for schema autogeneration. Compare

A major rework of the "type comparison" logic is in place which changes the
entire approach by which column datatypes are compared.  Types are now
compared based on the DDL string generated by the metadata type vs. the
datatype reflected from the database.  This means we compare types based on
what would actually render and additionally if elements of the types change
like string length, those changes are detected as well.  False positives
like those generated between SQLAlchemy Boolean and MySQL TINYINT should
also be resolved.   Thanks very much to Paul Becotte  for lots of hard work
and patience on this one.

Fixes: #605
Closes: #619
Pull-request: https://github.com/sqlalchemy/alembic/pull/619
Pull-request-sha: 1a6a3860881081c3b98fdbfc203f8d3af5f98926

Change-Id: I1ab30f2a30fc567cde56b5b8be5f521ff235b67b
---

diff --git a/.gitignore b/.gitignore
index 39b9696c..47a52f7a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ coverage.xml
 /scratch
 /scratch_test_*
 /test_schema.db
+.idea/
diff --git a/alembic/__init__.py b/alembic/__init__.py
index 4d4fbaa2..6b40a270 100644
--- a/alembic/__init__.py
+++ b/alembic/__init__.py
@@ -5,7 +5,7 @@ from . import op  # noqa
 from .runtime import environment
 from .runtime import migration
 
-__version__ = '1.3.4'
+__version__ = "1.4.0"
 
 sys.modules["alembic.migration"] = migration
 sys.modules["alembic.environment"] = environment
diff --git a/alembic/ddl/impl.py b/alembic/ddl/impl.py
index 17209c98..72fc2947 100644
--- a/alembic/ddl/impl.py
+++ b/alembic/ddl/impl.py
@@ -1,6 +1,8 @@
+from collections import namedtuple
+import re
+
 from sqlalchemy import schema
 from sqlalchemy import text
-from sqlalchemy import types as sqltypes
 
 from . import base
 from .. import util
@@ -21,6 +23,9 @@ class ImplMeta(type):
 _impls = {}
 
 
+Params = namedtuple("Params", ["type", "args", "kwargs"])
+
+
 class DefaultImpl(with_metaclass(ImplMeta)):
 
     """Provide the entrypoint for major migration operations,
@@ -39,6 +44,7 @@ class DefaultImpl(with_metaclass(ImplMeta)):
 
     transactional_ddl = False
     command_terminator = ";"
+    type_synonyms = ({"NUMERIC", "DECIMAL"},)
 
     def __init__(
         self,
@@ -322,32 +328,69 @@ class DefaultImpl(with_metaclass(ImplMeta)):
                     for row in rows:
                         self._exec(table.insert(inline=True).values(**row))
 
-    def compare_type(self, inspector_column, metadata_column):
-
-        conn_type = inspector_column.type
-        metadata_type = metadata_column.type
-
-        metadata_impl = metadata_type.dialect_impl(self.dialect)
-        if isinstance(metadata_impl, sqltypes.Variant):
-            metadata_impl = metadata_impl.impl.dialect_impl(self.dialect)
-
-        # work around SQLAlchemy bug "stale value for type affinity"
-        # fixed in 0.7.4
-        metadata_impl.__dict__.pop("_type_affinity", None)
+    def _tokenize_column_type(self, column):
+        definition = self.dialect.type_compiler.process(column.type).lower()
+        # py27 - py36 - col_type, *param_terms = re.findall...
+        matches = re.findall("[^(),]+", definition)
+        col_type, param_terms = matches[0], matches[1:]
+        params = Params(col_type, [], {})
+        for term in param_terms:
+            if "=" in term:
+                key, val = term.split("=")
+                params.kwargs[key.strip()] = val.strip()
+            else:
+                params.args.append(term.strip())
+        return params
 
-        if hasattr(metadata_impl, "compare_against_backend"):
-            comparison = metadata_impl.compare_against_backend(
-                self.dialect, conn_type
-            )
-            if comparison is not None:
-                return not comparison
+    def _column_types_match(self, inspector_type, metadata_type):
+        if inspector_type == metadata_type:
+            return True
+        synonyms = [{t.lower() for t in batch} for batch in self.type_synonyms]
+        for batch in synonyms:
+            if {inspector_type, metadata_type}.issubset(batch):
+                return True
+        return False
 
-        if conn_type._compare_type_affinity(metadata_impl):
-            comparator = _type_comparators.get(conn_type._type_affinity, None)
+    def _column_args_match(self, inspected_params, meta_params):
+        """We want to compare column parameters. However, we only want
+        to compare parameters that are set. If they both have `collation`,
+        we want to make sure they are the same. However, if only one
+        specifies it, dont flag it for being less specific
+        """
+        # py27- .keys is a set in py36
+        for kw in set(inspected_params.kwargs.keys()) & set(
+            meta_params.kwargs.keys()
+        ):
+            if inspected_params.kwargs[kw] != meta_params.kwargs[kw]:
+                return False
+
+        # compare the positional arguments to the extent that they are
+        # present in the SQL form of the metadata type compared to
+        # the inspected type.   Example:  Oracle NUMBER(17) and NUMBER(17, 0)
+        # are equivalent.  As it's not clear if some database types might
+        # ignore additional parameters vs. they aren't actually there and
+        # need to be added, for now we are still favoring avoiding false
+        # positives
+        for meta, inspect in zip(meta_params.args, inspected_params.args):
+            if meta != inspect:
+                return False
+
+        return True
 
-            return comparator and comparator(metadata_impl, conn_type)
-        else:
+    def compare_type(self, inspector_column, metadata_column):
+        """Returns True if there ARE differences between the types of the two
+        columns. Takes impl.type_synonyms into account between retrospected
+        and metadata types
+        """
+        inspector_params = self._tokenize_column_type(inspector_column)
+        metadata_params = self._tokenize_column_type(metadata_column)
+        if not self._column_types_match(
+            inspector_params.type, metadata_params.type
+        ):
+            return True
+        if not self._column_args_match(inspector_params, metadata_params):
             return True
+        return False
 
     def compare_server_default(
         self,
@@ -425,45 +468,3 @@ class DefaultImpl(with_metaclass(ImplMeta)):
 
     def render_type(self, type_obj, autogen_context):
         return False
-
-
-def _string_compare(t1, t2):
-    return t1.length is not None and t1.length != t2.length
-
-
-def _numeric_compare(t1, t2):
-    return (t1.precision is not None and t1.precision != t2.precision) or (
-        t1.precision is not None
-        and t1.scale is not None
-        and t1.scale != t2.scale
-    )
-
-
-def _integer_compare(t1, t2):
-    t1_small_or_big = (
-        "S"
-        if isinstance(t1, sqltypes.SmallInteger)
-        else "B"
-        if isinstance(t1, sqltypes.BigInteger)
-        else "I"
-    )
-    t2_small_or_big = (
-        "S"
-        if isinstance(t2, sqltypes.SmallInteger)
-        else "B"
-        if isinstance(t2, sqltypes.BigInteger)
-        else "I"
-    )
-    return t1_small_or_big != t2_small_or_big
-
-
-def _datetime_compare(t1, t2):
-    return t1.timezone != t2.timezone
-
-
-_type_comparators = {
-    sqltypes.String: _string_compare,
-    sqltypes.Numeric: _numeric_compare,
-    sqltypes.Integer: _integer_compare,
-    sqltypes.DateTime: _datetime_compare,
-}
diff --git a/alembic/ddl/mysql.py b/alembic/ddl/mysql.py
index ceac853b..a66bb84f 100644
--- a/alembic/ddl/mysql.py
+++ b/alembic/ddl/mysql.py
@@ -24,6 +24,7 @@ class MySQLImpl(DefaultImpl):
     __dialect__ = "mysql"
 
     transactional_ddl = False
+    type_synonyms = DefaultImpl.type_synonyms + ({"BOOL", "TINYINT"},)
 
     def alter_column(
         self,
diff --git a/alembic/ddl/oracle.py b/alembic/ddl/oracle.py
index 76cf7c5d..50e183af 100644
--- a/alembic/ddl/oracle.py
+++ b/alembic/ddl/oracle.py
@@ -19,6 +19,10 @@ class OracleImpl(DefaultImpl):
     transactional_ddl = False
     batch_separator = "/"
     command_terminator = ""
+    type_synonyms = DefaultImpl.type_synonyms + (
+        {"VARCHAR", "VARCHAR2"},
+        {"BIGINT", "INTEGER", "SMALLINT", "DECIMAL", "NUMERIC", "NUMBER"},
+    )
 
     def __init__(self, *arg, **kw):
         super(OracleImpl, self).__init__(*arg, **kw)
diff --git a/alembic/ddl/postgresql.py b/alembic/ddl/postgresql.py
index 9cb3c11e..4316a96b 100644
--- a/alembic/ddl/postgresql.py
+++ b/alembic/ddl/postgresql.py
@@ -38,6 +38,9 @@ log = logging.getLogger(__name__)
 class PostgresqlImpl(DefaultImpl):
     __dialect__ = "postgresql"
     transactional_ddl = True
+    type_synonyms = DefaultImpl.type_synonyms + (
+        {"FLOAT", "DOUBLE PRECISION"},
+    )
 
     def prep_table_for_batch(self, table):
         for constraint in table.constraints:
diff --git a/docs/build/autogenerate.rst b/docs/build/autogenerate.rst
index 79c1f876..1a59371d 100644
--- a/docs/build/autogenerate.rst
+++ b/docs/build/autogenerate.rst
@@ -102,6 +102,8 @@ command.   We should also go into our migration file and alter it as needed, inc
 adjustments to the directives as well as the addition of other directives which these may
 be dependent on - specifically data changes in between creates/alters/drops.
 
+.. _autogenerate_detects:
+
 What does Autogenerate Detect (and what does it *not* detect?)
 --------------------------------------------------------------
 
@@ -130,12 +132,29 @@ Autogenerate **will detect**:
 Autogenerate can **optionally detect**:
 
 * Change of column type.  This will occur if you set
-  the :paramref:`.EnvironmentContext.configure.compare_type` parameter
-  to ``True``, or to a custom callable function.   The default implementation
-  **only detects major type changes**, such as between ``Numeric`` and
-  ``String``, and does not detect changes in arguments such as lengths, precisions,
-  or enumeration members.  The type comparison logic is extensible to work
-  around these limitations, see :ref:`compare_types` for details.
+  the :paramref:`.EnvironmentContext.configure.compare_type` parameter to
+  ``True``.   The default implementation will reliably detect major changes,
+  such as between :class:`.Numeric` and :class:`.String`, as well as
+  accommodate for the types generated by SQLAlchemy's "generic" types such as
+  :class:`.Boolean`.   Arguments that are shared between both types, such as
+  length and precision values, will also be compared.   If either the metadata
+  type or database type has **additional** arguments beyond that of the other
+  type, these are **not** compared, such as if one numeric type featured a
+  "scale" and other type did not, this would be seen as the backing database
+  not supporting the value, or reporting on a default that the metadata did not
+  specify.
+
+  The type comparison logic is fully extensible as well; see
+  :ref:`compare_types` for details.
+
+  .. versionchanged:: 1.4 type comparison code has been reworked such that
+     column types are compared based on their rendered DDL, which should allow
+     the functionality enabled by
+     :paramref:`.EnvironmentContext.configure.compare_type`
+     to be much more accurate, correctly accounting for the behavior of
+     SQLAlchemy "generic" types as well as major arguments specified within
+     types.
+
 * Change of server default.  This will occur if you set
   the :paramref:`.EnvironmentContext.configure.compare_server_default`
   parameter to ``True``, or to a custom callable function.
@@ -405,18 +424,26 @@ is set to True::
 .. note::
 
    The default type comparison logic (which is end-user extensible) currently
-   works for **major changes in type only**, such as between ``Numeric`` and
-   ``String``.     The logic will **not** detect changes such as:
+   (as of Alembic version 1.4.0) works by comparing the generated SQL for a
+   column. It does this in two steps-
 
-   * changes between types that have the same "type affinity", such as
-     between ``VARCHAR`` and ``TEXT``, or ``FLOAT`` and ``NUMERIC``
+   * First, it compares the outer type of each column such as ``VARCHAR``
+     or ``TEXT``. Dialect implementations can have synonyms that are considered
+     equivalent- this is because some databases support types by converting them
+     to another type. For example, NUMERIC and DECIMAL are considered equivalent
+     on all backends, while on the Oracle backend the additional synonyms
+     BIGINT, INTEGER, NUMBER, SMALLINT are added to this list of equivalents
 
-   * changes between the arguments within the type, such as the lengths of
+   * Next, the arguments within the type, such as the lengths of
      strings, precision values for numerics, the elements inside of an
-     enumeration.
+     enumeration are compared. If BOTH columns have arguments AND they are
+     different, a change will be detected. If one column is just set to the
+     default and the other has arguments, Alembic will pass on attempting to
+     compare these. The rationale is that it is difficult to detect what a
+     database backend sets as a default value without generating false
+     positives.
 
-   Detection of these kinds of parameters is a long term project on the
-   SQLAlchemy side.
+..versionchanged 1.4.0:: Added the text and keyword comparison for column types
 
 Alternatively, the :paramref:`.EnvironmentContext.configure.compare_type`
 parameter accepts a callable function which may be used to implement custom type
@@ -488,6 +515,9 @@ then a basic check for type equivalence is run.
 .. versionadded:: 0.7.6 - added support for the ``compare_against_backend()``
    method.
 
+.. versionadded:: 1.4.0 - added column keyword comparisons and the
+   ``type_synonyms`` property.
+
 
 .. _post_write_hooks:
 
diff --git a/docs/build/unreleased/605.rst b/docs/build/unreleased/605.rst
new file mode 100644
index 00000000..3c206a7e
--- /dev/null
+++ b/docs/build/unreleased/605.rst
@@ -0,0 +1,17 @@
+.. change::
+    :tags: bug, autogenerate
+    :tickets: 605
+
+    A major rework of the "type comparison" logic is in place which changes the
+    entire approach by which column datatypes are compared.  Types are now
+    compared based on the DDL string generated by the metadata type vs. the
+    datatype reflected from the database.  This means we compare types based on
+    what would actually render and additionally if elements of the types change
+    like string length, those changes are detected as well.  False positives
+    like those generated between SQLAlchemy Boolean and MySQL TINYINT should
+    also be resolved.   Thanks very much to Paul Becotte  for lots of hard work
+    and patience on this one.
+
+    .. seealso::
+
+        :ref:`autogenerate_detects` - updated comments on type comparison
\ No newline at end of file
diff --git a/tests/requirements.py b/tests/requirements.py
index 7eb528e6..07ca376e 100644
--- a/tests/requirements.py
+++ b/tests/requirements.py
@@ -4,6 +4,10 @@ from alembic.util import sqla_compat
 
 
 class DefaultRequirements(SuiteRequirements):
+    @property
+    def unicode_string(self):
+        return exclusions.skip_if(["oracle"])
+
     @property
     def alter_column(self):
         return exclusions.skip_if(["sqlite"], "no ALTER COLUMN support")
@@ -86,6 +90,12 @@ class DefaultRequirements(SuiteRequirements):
             "postgresql", "oracle", "mssql", "sybase", "sqlite"
         )
 
+    @property
+    def datetime_timezone(self):
+        """target dialect supports timezone with datetime types."""
+
+        return exclusions.only_on(["postgresql"])
+
     @property
     def postgresql_uuid_ossp(self):
         def check_uuid_ossp(config):
diff --git a/tests/test_autogen_diffs.py b/tests/test_autogen_diffs.py
index 470aaaf6..7a7702f7 100644
--- a/tests/test_autogen_diffs.py
+++ b/tests/test_autogen_diffs.py
@@ -2,17 +2,22 @@ import sys
 
 from sqlalchemy import BIGINT
 from sqlalchemy import BigInteger
+from sqlalchemy import Boolean
 from sqlalchemy import CHAR
 from sqlalchemy import CheckConstraint
 from sqlalchemy import Column
+from sqlalchemy import DATE
 from sqlalchemy import DateTime
 from sqlalchemy import DECIMAL
+from sqlalchemy import Enum
+from sqlalchemy import FLOAT
 from sqlalchemy import ForeignKey
 from sqlalchemy import ForeignKeyConstraint
 from sqlalchemy import Index
 from sqlalchemy import inspect
 from sqlalchemy import INTEGER
 from sqlalchemy import Integer
+from sqlalchemy import LargeBinary
 from sqlalchemy import MetaData
 from sqlalchemy import Numeric
 from sqlalchemy import PrimaryKeyConstraint
@@ -21,7 +26,9 @@ from sqlalchemy import String
 from sqlalchemy import Table
 from sqlalchemy import Text
 from sqlalchemy import text
+from sqlalchemy import TIMESTAMP
 from sqlalchemy import TypeDecorator
+from sqlalchemy import Unicode
 from sqlalchemy import UniqueConstraint
 from sqlalchemy import VARCHAR
 from sqlalchemy.dialects import sqlite
@@ -30,6 +37,7 @@ from sqlalchemy.types import VARBINARY
 
 from alembic import autogenerate
 from alembic import testing
+from alembic.autogenerate import api
 from alembic.migration import MigrationContext
 from alembic.operations import ops
 from alembic.testing import assert_raises_message
@@ -39,7 +47,10 @@ from alembic.testing import is_
 from alembic.testing import is_not_
 from alembic.testing import mock
 from alembic.testing import TestBase
+from alembic.testing.env import clear_staging_env
+from alembic.testing.env import staging_env
 from alembic.util import CommandError
+from ._autogen_fixtures import _default_object_filters
 from ._autogen_fixtures import AutogenFixtureTest
 from ._autogen_fixtures import AutogenTest
 
@@ -665,6 +676,21 @@ class CompareTypeSpecificityTest(TestBase):
         (VARCHAR(30), String(30), False),
         (VARCHAR(30), String(40), True),
         (VARCHAR(30), Integer(), True),
+        (Text(), String(255), True),
+        # insp + metadata types same number of
+        # args but are different; they're different
+        (DECIMAL(10, 5), DECIMAL(10, 6), True),
+        # insp + metadata types, inspected type
+        # has an additional arg; assume this is additional
+        # default precision on the part of the DB, assume they are
+        # equivalent
+        (DECIMAL(10, 5), DECIMAL(10), False),
+        # insp + metadata types, metadata type
+        # has an additional arg; this can go either way, either the
+        # metadata has extra precision, or the DB doesn't support the
+        # element, go with consider them equivalent for now
+        (DECIMAL(10), DECIMAL(10, 5), False),
+        (DECIMAL(10, 2), Numeric(10), False),
         (DECIMAL(10, 5), Numeric(10, 5), False),
         (DECIMAL(10, 5), Numeric(12, 5), True),
         (DECIMAL(10, 5), DateTime(), True),
@@ -675,24 +701,152 @@ class CompareTypeSpecificityTest(TestBase):
         (BIGINT(), SmallInteger(), True),
         (INTEGER(), SmallInteger(), True),
         (Integer(), String(), True),
-        (DateTime(), DateTime(timezone=False), False),
-        (DateTime(), DateTime(timezone=True), True),
-        (DateTime(timezone=False), DateTime(timezone=True), True),
         id_="ssa",
-        argnames="compare_from,compare_to,expected",
+        argnames="inspected_type,metadata_type,expected",
     )
     def test_compare_type(
-        self, impl_fixture, compare_from, compare_to, expected
+        self, impl_fixture, inspected_type, metadata_type, expected
     ):
 
         is_(
             impl_fixture.compare_type(
-                Column("x", compare_from), Column("x", compare_to)
+                Column("x", inspected_type), Column("x", metadata_type)
             ),
             expected,
         )
 
 
+class CompareMetadataToInspectorTest(TestBase):
+    __backend__ = True
+
+    @classmethod
+    def _get_bind(cls):
+        return config.db
+
+    configure_opts = {}
+
+    def setUp(self):
+        staging_env()
+        self.bind = self._get_bind()
+        self.m1 = MetaData()
+
+    def tearDown(self):
+        self.m1.drop_all(self.bind)
+        clear_staging_env()
+
+    def _compare_columns(self, cola, colb):
+        Table("sometable", self.m1, Column("col", cola))
+        self.m1.create_all(self.bind)
+        m2 = MetaData()
+        Table("sometable", m2, Column("col", colb))
+
+        ctx_opts = {
+            "compare_type": True,
+            "compare_server_default": True,
+            "target_metadata": m2,
+            "upgrade_token": "upgrades",
+            "downgrade_token": "downgrades",
+            "alembic_module_prefix": "op.",
+            "sqlalchemy_module_prefix": "sa.",
+            "include_object": _default_object_filters,
+        }
+        if self.configure_opts:
+            ctx_opts.update(self.configure_opts)
+        with self.bind.connect() as conn:
+            context = MigrationContext.configure(
+                connection=conn, opts=ctx_opts
+            )
+            autogen_context = api.AutogenContext(context, m2)
+            uo = ops.UpgradeOps(ops=[])
+            autogenerate._produce_net_changes(autogen_context, uo)
+        return bool(uo.as_diffs())
+
+    @testing.combinations(
+        (INTEGER(),),
+        (CHAR(),),
+        (VARCHAR(32),),
+        (Text(),),
+        (FLOAT(),),
+        (Numeric(),),
+        (DECIMAL(),),
+        (TIMESTAMP(),),
+        (DateTime(),),
+        (Boolean(),),
+        (BigInteger(),),
+        (SmallInteger(),),
+        (DATE(),),
+        (String(32),),
+        (LargeBinary(),),
+        (Unicode(32),),
+        (Enum("one", "two", "three", name="the_enum"),),
+    )
+    def test_introspected_columns_match_metadata_columns(self, cola):
+        # this is ensuring false positives aren't generated for types
+        # that have not changed.
+        is_(self._compare_columns(cola, cola), False)
+
+    @testing.combinations(
+        (String(32), VARCHAR(32), False),
+        (VARCHAR(6), String(6), False),
+        (CHAR(), String(1), True),
+        (Text(), VARCHAR(255), True),
+        (Unicode(32), String(32), False, config.requirements.unicode_string),
+        (Unicode(32), VARCHAR(32), False, config.requirements.unicode_string),
+        (VARCHAR(6), VARCHAR(12), True),
+        (VARCHAR(6), String(12), True),
+    )
+    def test_string_comparisons(self, cola, colb, expect_changes):
+        is_(self._compare_columns(cola, colb), expect_changes)
+
+    @testing.combinations(
+        (
+            DateTime(),
+            DateTime(timezone=False),
+            False,
+            config.requirements.datetime_timezone,
+        ),
+        (
+            DateTime(),
+            DateTime(timezone=True),
+            True,
+            config.requirements.datetime_timezone,
+        ),
+        (
+            DateTime(timezone=True),
+            DateTime(timezone=False),
+            True,
+            config.requirements.datetime_timezone,
+        ),
+    )
+    def test_datetime_comparisons(self, cola, colb, expect_changes):
+        is_(self._compare_columns(cola, colb), expect_changes)
+
+    @testing.combinations(
+        (Integer(), Integer(), False),
+        (
+            Integer(),
+            Numeric(8, 0),
+            True,
+            config.requirements.integer_subtype_comparisons,
+        ),
+        (Numeric(8, 0), Numeric(8, 2), True),
+        (
+            BigInteger(),
+            Integer(),
+            True,
+            config.requirements.integer_subtype_comparisons,
+        ),
+        (
+            SmallInteger(),
+            Integer(),
+            True,
+            config.requirements.integer_subtype_comparisons,
+        ),
+    )
+    def test_numeric_comparisons(self, cola, colb, expect_changes):
+        is_(self._compare_columns(cola, colb), expect_changes)
+
+
 class AutogenSystemColTest(AutogenTest, TestBase):
     __only_on__ = "postgresql"
 
@@ -1013,8 +1167,8 @@ class AutogenerateDiffOrderTest(AutogenTest, TestBase):
 
     def test_diffs_order(self):
         """
-        Added in order to test that child tables(tables with FKs) are generated
-        before their parent tables
+        Added in order to test that child tables(tables with FKs) are
+        generated before their parent tables
         """
 
         ctx = self.autogen_context