From: Paul Becotte Date: Fri, 24 Jan 2020 00:36:33 +0000 (-0500) Subject: Update the type comparison code used for schema autogeneration. Compare X-Git-Tag: rel_1_4_0~3 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3ddf82e1e2b4fff47edbd45dd493e6bbd5880496;p=thirdparty%2Fsqlalchemy%2Falembic.git Update the type comparison code used for schema autogeneration. Compare A major rework of the "type comparison" logic is in place which changes the entire approach by which column datatypes are compared. Types are now compared based on the DDL string generated by the metadata type vs. the datatype reflected from the database. This means we compare types based on what would actually render and additionally if elements of the types change like string length, those changes are detected as well. False positives like those generated between SQLAlchemy Boolean and MySQL TINYINT should also be resolved. Thanks very much to Paul Becotte for lots of hard work and patience on this one. Fixes: #605 Closes: #619 Pull-request: https://github.com/sqlalchemy/alembic/pull/619 Pull-request-sha: 1a6a3860881081c3b98fdbfc203f8d3af5f98926 Change-Id: I1ab30f2a30fc567cde56b5b8be5f521ff235b67b --- diff --git a/.gitignore b/.gitignore index 39b9696c..47a52f7a 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ coverage.xml /scratch /scratch_test_* /test_schema.db +.idea/ diff --git a/alembic/__init__.py b/alembic/__init__.py index 4d4fbaa2..6b40a270 100644 --- a/alembic/__init__.py +++ b/alembic/__init__.py @@ -5,7 +5,7 @@ from . import op # noqa from .runtime import environment from .runtime import migration -__version__ = '1.3.4' +__version__ = "1.4.0" sys.modules["alembic.migration"] = migration sys.modules["alembic.environment"] = environment diff --git a/alembic/ddl/impl.py b/alembic/ddl/impl.py index 17209c98..72fc2947 100644 --- a/alembic/ddl/impl.py +++ b/alembic/ddl/impl.py @@ -1,6 +1,8 @@ +from collections import namedtuple +import re + from sqlalchemy import schema from sqlalchemy import text -from sqlalchemy import types as sqltypes from . import base from .. import util @@ -21,6 +23,9 @@ class ImplMeta(type): _impls = {} +Params = namedtuple("Params", ["type", "args", "kwargs"]) + + class DefaultImpl(with_metaclass(ImplMeta)): """Provide the entrypoint for major migration operations, @@ -39,6 +44,7 @@ class DefaultImpl(with_metaclass(ImplMeta)): transactional_ddl = False command_terminator = ";" + type_synonyms = ({"NUMERIC", "DECIMAL"},) def __init__( self, @@ -322,32 +328,69 @@ class DefaultImpl(with_metaclass(ImplMeta)): for row in rows: self._exec(table.insert(inline=True).values(**row)) - def compare_type(self, inspector_column, metadata_column): - - conn_type = inspector_column.type - metadata_type = metadata_column.type - - metadata_impl = metadata_type.dialect_impl(self.dialect) - if isinstance(metadata_impl, sqltypes.Variant): - metadata_impl = metadata_impl.impl.dialect_impl(self.dialect) - - # work around SQLAlchemy bug "stale value for type affinity" - # fixed in 0.7.4 - metadata_impl.__dict__.pop("_type_affinity", None) + def _tokenize_column_type(self, column): + definition = self.dialect.type_compiler.process(column.type).lower() + # py27 - py36 - col_type, *param_terms = re.findall... + matches = re.findall("[^(),]+", definition) + col_type, param_terms = matches[0], matches[1:] + params = Params(col_type, [], {}) + for term in param_terms: + if "=" in term: + key, val = term.split("=") + params.kwargs[key.strip()] = val.strip() + else: + params.args.append(term.strip()) + return params - if hasattr(metadata_impl, "compare_against_backend"): - comparison = metadata_impl.compare_against_backend( - self.dialect, conn_type - ) - if comparison is not None: - return not comparison + def _column_types_match(self, inspector_type, metadata_type): + if inspector_type == metadata_type: + return True + synonyms = [{t.lower() for t in batch} for batch in self.type_synonyms] + for batch in synonyms: + if {inspector_type, metadata_type}.issubset(batch): + return True + return False - if conn_type._compare_type_affinity(metadata_impl): - comparator = _type_comparators.get(conn_type._type_affinity, None) + def _column_args_match(self, inspected_params, meta_params): + """We want to compare column parameters. However, we only want + to compare parameters that are set. If they both have `collation`, + we want to make sure they are the same. However, if only one + specifies it, dont flag it for being less specific + """ + # py27- .keys is a set in py36 + for kw in set(inspected_params.kwargs.keys()) & set( + meta_params.kwargs.keys() + ): + if inspected_params.kwargs[kw] != meta_params.kwargs[kw]: + return False + + # compare the positional arguments to the extent that they are + # present in the SQL form of the metadata type compared to + # the inspected type. Example: Oracle NUMBER(17) and NUMBER(17, 0) + # are equivalent. As it's not clear if some database types might + # ignore additional parameters vs. they aren't actually there and + # need to be added, for now we are still favoring avoiding false + # positives + for meta, inspect in zip(meta_params.args, inspected_params.args): + if meta != inspect: + return False + + return True - return comparator and comparator(metadata_impl, conn_type) - else: + def compare_type(self, inspector_column, metadata_column): + """Returns True if there ARE differences between the types of the two + columns. Takes impl.type_synonyms into account between retrospected + and metadata types + """ + inspector_params = self._tokenize_column_type(inspector_column) + metadata_params = self._tokenize_column_type(metadata_column) + if not self._column_types_match( + inspector_params.type, metadata_params.type + ): + return True + if not self._column_args_match(inspector_params, metadata_params): return True + return False def compare_server_default( self, @@ -425,45 +468,3 @@ class DefaultImpl(with_metaclass(ImplMeta)): def render_type(self, type_obj, autogen_context): return False - - -def _string_compare(t1, t2): - return t1.length is not None and t1.length != t2.length - - -def _numeric_compare(t1, t2): - return (t1.precision is not None and t1.precision != t2.precision) or ( - t1.precision is not None - and t1.scale is not None - and t1.scale != t2.scale - ) - - -def _integer_compare(t1, t2): - t1_small_or_big = ( - "S" - if isinstance(t1, sqltypes.SmallInteger) - else "B" - if isinstance(t1, sqltypes.BigInteger) - else "I" - ) - t2_small_or_big = ( - "S" - if isinstance(t2, sqltypes.SmallInteger) - else "B" - if isinstance(t2, sqltypes.BigInteger) - else "I" - ) - return t1_small_or_big != t2_small_or_big - - -def _datetime_compare(t1, t2): - return t1.timezone != t2.timezone - - -_type_comparators = { - sqltypes.String: _string_compare, - sqltypes.Numeric: _numeric_compare, - sqltypes.Integer: _integer_compare, - sqltypes.DateTime: _datetime_compare, -} diff --git a/alembic/ddl/mysql.py b/alembic/ddl/mysql.py index ceac853b..a66bb84f 100644 --- a/alembic/ddl/mysql.py +++ b/alembic/ddl/mysql.py @@ -24,6 +24,7 @@ class MySQLImpl(DefaultImpl): __dialect__ = "mysql" transactional_ddl = False + type_synonyms = DefaultImpl.type_synonyms + ({"BOOL", "TINYINT"},) def alter_column( self, diff --git a/alembic/ddl/oracle.py b/alembic/ddl/oracle.py index 76cf7c5d..50e183af 100644 --- a/alembic/ddl/oracle.py +++ b/alembic/ddl/oracle.py @@ -19,6 +19,10 @@ class OracleImpl(DefaultImpl): transactional_ddl = False batch_separator = "/" command_terminator = "" + type_synonyms = DefaultImpl.type_synonyms + ( + {"VARCHAR", "VARCHAR2"}, + {"BIGINT", "INTEGER", "SMALLINT", "DECIMAL", "NUMERIC", "NUMBER"}, + ) def __init__(self, *arg, **kw): super(OracleImpl, self).__init__(*arg, **kw) diff --git a/alembic/ddl/postgresql.py b/alembic/ddl/postgresql.py index 9cb3c11e..4316a96b 100644 --- a/alembic/ddl/postgresql.py +++ b/alembic/ddl/postgresql.py @@ -38,6 +38,9 @@ log = logging.getLogger(__name__) class PostgresqlImpl(DefaultImpl): __dialect__ = "postgresql" transactional_ddl = True + type_synonyms = DefaultImpl.type_synonyms + ( + {"FLOAT", "DOUBLE PRECISION"}, + ) def prep_table_for_batch(self, table): for constraint in table.constraints: diff --git a/docs/build/autogenerate.rst b/docs/build/autogenerate.rst index 79c1f876..1a59371d 100644 --- a/docs/build/autogenerate.rst +++ b/docs/build/autogenerate.rst @@ -102,6 +102,8 @@ command. We should also go into our migration file and alter it as needed, inc adjustments to the directives as well as the addition of other directives which these may be dependent on - specifically data changes in between creates/alters/drops. +.. _autogenerate_detects: + What does Autogenerate Detect (and what does it *not* detect?) -------------------------------------------------------------- @@ -130,12 +132,29 @@ Autogenerate **will detect**: Autogenerate can **optionally detect**: * Change of column type. This will occur if you set - the :paramref:`.EnvironmentContext.configure.compare_type` parameter - to ``True``, or to a custom callable function. The default implementation - **only detects major type changes**, such as between ``Numeric`` and - ``String``, and does not detect changes in arguments such as lengths, precisions, - or enumeration members. The type comparison logic is extensible to work - around these limitations, see :ref:`compare_types` for details. + the :paramref:`.EnvironmentContext.configure.compare_type` parameter to + ``True``. The default implementation will reliably detect major changes, + such as between :class:`.Numeric` and :class:`.String`, as well as + accommodate for the types generated by SQLAlchemy's "generic" types such as + :class:`.Boolean`. Arguments that are shared between both types, such as + length and precision values, will also be compared. If either the metadata + type or database type has **additional** arguments beyond that of the other + type, these are **not** compared, such as if one numeric type featured a + "scale" and other type did not, this would be seen as the backing database + not supporting the value, or reporting on a default that the metadata did not + specify. + + The type comparison logic is fully extensible as well; see + :ref:`compare_types` for details. + + .. versionchanged:: 1.4 type comparison code has been reworked such that + column types are compared based on their rendered DDL, which should allow + the functionality enabled by + :paramref:`.EnvironmentContext.configure.compare_type` + to be much more accurate, correctly accounting for the behavior of + SQLAlchemy "generic" types as well as major arguments specified within + types. + * Change of server default. This will occur if you set the :paramref:`.EnvironmentContext.configure.compare_server_default` parameter to ``True``, or to a custom callable function. @@ -405,18 +424,26 @@ is set to True:: .. note:: The default type comparison logic (which is end-user extensible) currently - works for **major changes in type only**, such as between ``Numeric`` and - ``String``. The logic will **not** detect changes such as: + (as of Alembic version 1.4.0) works by comparing the generated SQL for a + column. It does this in two steps- - * changes between types that have the same "type affinity", such as - between ``VARCHAR`` and ``TEXT``, or ``FLOAT`` and ``NUMERIC`` + * First, it compares the outer type of each column such as ``VARCHAR`` + or ``TEXT``. Dialect implementations can have synonyms that are considered + equivalent- this is because some databases support types by converting them + to another type. For example, NUMERIC and DECIMAL are considered equivalent + on all backends, while on the Oracle backend the additional synonyms + BIGINT, INTEGER, NUMBER, SMALLINT are added to this list of equivalents - * changes between the arguments within the type, such as the lengths of + * Next, the arguments within the type, such as the lengths of strings, precision values for numerics, the elements inside of an - enumeration. + enumeration are compared. If BOTH columns have arguments AND they are + different, a change will be detected. If one column is just set to the + default and the other has arguments, Alembic will pass on attempting to + compare these. The rationale is that it is difficult to detect what a + database backend sets as a default value without generating false + positives. - Detection of these kinds of parameters is a long term project on the - SQLAlchemy side. +..versionchanged 1.4.0:: Added the text and keyword comparison for column types Alternatively, the :paramref:`.EnvironmentContext.configure.compare_type` parameter accepts a callable function which may be used to implement custom type @@ -488,6 +515,9 @@ then a basic check for type equivalence is run. .. versionadded:: 0.7.6 - added support for the ``compare_against_backend()`` method. +.. versionadded:: 1.4.0 - added column keyword comparisons and the + ``type_synonyms`` property. + .. _post_write_hooks: diff --git a/docs/build/unreleased/605.rst b/docs/build/unreleased/605.rst new file mode 100644 index 00000000..3c206a7e --- /dev/null +++ b/docs/build/unreleased/605.rst @@ -0,0 +1,17 @@ +.. change:: + :tags: bug, autogenerate + :tickets: 605 + + A major rework of the "type comparison" logic is in place which changes the + entire approach by which column datatypes are compared. Types are now + compared based on the DDL string generated by the metadata type vs. the + datatype reflected from the database. This means we compare types based on + what would actually render and additionally if elements of the types change + like string length, those changes are detected as well. False positives + like those generated between SQLAlchemy Boolean and MySQL TINYINT should + also be resolved. Thanks very much to Paul Becotte for lots of hard work + and patience on this one. + + .. seealso:: + + :ref:`autogenerate_detects` - updated comments on type comparison \ No newline at end of file diff --git a/tests/requirements.py b/tests/requirements.py index 7eb528e6..07ca376e 100644 --- a/tests/requirements.py +++ b/tests/requirements.py @@ -4,6 +4,10 @@ from alembic.util import sqla_compat class DefaultRequirements(SuiteRequirements): + @property + def unicode_string(self): + return exclusions.skip_if(["oracle"]) + @property def alter_column(self): return exclusions.skip_if(["sqlite"], "no ALTER COLUMN support") @@ -86,6 +90,12 @@ class DefaultRequirements(SuiteRequirements): "postgresql", "oracle", "mssql", "sybase", "sqlite" ) + @property + def datetime_timezone(self): + """target dialect supports timezone with datetime types.""" + + return exclusions.only_on(["postgresql"]) + @property def postgresql_uuid_ossp(self): def check_uuid_ossp(config): diff --git a/tests/test_autogen_diffs.py b/tests/test_autogen_diffs.py index 470aaaf6..7a7702f7 100644 --- a/tests/test_autogen_diffs.py +++ b/tests/test_autogen_diffs.py @@ -2,17 +2,22 @@ import sys from sqlalchemy import BIGINT from sqlalchemy import BigInteger +from sqlalchemy import Boolean from sqlalchemy import CHAR from sqlalchemy import CheckConstraint from sqlalchemy import Column +from sqlalchemy import DATE from sqlalchemy import DateTime from sqlalchemy import DECIMAL +from sqlalchemy import Enum +from sqlalchemy import FLOAT from sqlalchemy import ForeignKey from sqlalchemy import ForeignKeyConstraint from sqlalchemy import Index from sqlalchemy import inspect from sqlalchemy import INTEGER from sqlalchemy import Integer +from sqlalchemy import LargeBinary from sqlalchemy import MetaData from sqlalchemy import Numeric from sqlalchemy import PrimaryKeyConstraint @@ -21,7 +26,9 @@ from sqlalchemy import String from sqlalchemy import Table from sqlalchemy import Text from sqlalchemy import text +from sqlalchemy import TIMESTAMP from sqlalchemy import TypeDecorator +from sqlalchemy import Unicode from sqlalchemy import UniqueConstraint from sqlalchemy import VARCHAR from sqlalchemy.dialects import sqlite @@ -30,6 +37,7 @@ from sqlalchemy.types import VARBINARY from alembic import autogenerate from alembic import testing +from alembic.autogenerate import api from alembic.migration import MigrationContext from alembic.operations import ops from alembic.testing import assert_raises_message @@ -39,7 +47,10 @@ from alembic.testing import is_ from alembic.testing import is_not_ from alembic.testing import mock from alembic.testing import TestBase +from alembic.testing.env import clear_staging_env +from alembic.testing.env import staging_env from alembic.util import CommandError +from ._autogen_fixtures import _default_object_filters from ._autogen_fixtures import AutogenFixtureTest from ._autogen_fixtures import AutogenTest @@ -665,6 +676,21 @@ class CompareTypeSpecificityTest(TestBase): (VARCHAR(30), String(30), False), (VARCHAR(30), String(40), True), (VARCHAR(30), Integer(), True), + (Text(), String(255), True), + # insp + metadata types same number of + # args but are different; they're different + (DECIMAL(10, 5), DECIMAL(10, 6), True), + # insp + metadata types, inspected type + # has an additional arg; assume this is additional + # default precision on the part of the DB, assume they are + # equivalent + (DECIMAL(10, 5), DECIMAL(10), False), + # insp + metadata types, metadata type + # has an additional arg; this can go either way, either the + # metadata has extra precision, or the DB doesn't support the + # element, go with consider them equivalent for now + (DECIMAL(10), DECIMAL(10, 5), False), + (DECIMAL(10, 2), Numeric(10), False), (DECIMAL(10, 5), Numeric(10, 5), False), (DECIMAL(10, 5), Numeric(12, 5), True), (DECIMAL(10, 5), DateTime(), True), @@ -675,24 +701,152 @@ class CompareTypeSpecificityTest(TestBase): (BIGINT(), SmallInteger(), True), (INTEGER(), SmallInteger(), True), (Integer(), String(), True), - (DateTime(), DateTime(timezone=False), False), - (DateTime(), DateTime(timezone=True), True), - (DateTime(timezone=False), DateTime(timezone=True), True), id_="ssa", - argnames="compare_from,compare_to,expected", + argnames="inspected_type,metadata_type,expected", ) def test_compare_type( - self, impl_fixture, compare_from, compare_to, expected + self, impl_fixture, inspected_type, metadata_type, expected ): is_( impl_fixture.compare_type( - Column("x", compare_from), Column("x", compare_to) + Column("x", inspected_type), Column("x", metadata_type) ), expected, ) +class CompareMetadataToInspectorTest(TestBase): + __backend__ = True + + @classmethod + def _get_bind(cls): + return config.db + + configure_opts = {} + + def setUp(self): + staging_env() + self.bind = self._get_bind() + self.m1 = MetaData() + + def tearDown(self): + self.m1.drop_all(self.bind) + clear_staging_env() + + def _compare_columns(self, cola, colb): + Table("sometable", self.m1, Column("col", cola)) + self.m1.create_all(self.bind) + m2 = MetaData() + Table("sometable", m2, Column("col", colb)) + + ctx_opts = { + "compare_type": True, + "compare_server_default": True, + "target_metadata": m2, + "upgrade_token": "upgrades", + "downgrade_token": "downgrades", + "alembic_module_prefix": "op.", + "sqlalchemy_module_prefix": "sa.", + "include_object": _default_object_filters, + } + if self.configure_opts: + ctx_opts.update(self.configure_opts) + with self.bind.connect() as conn: + context = MigrationContext.configure( + connection=conn, opts=ctx_opts + ) + autogen_context = api.AutogenContext(context, m2) + uo = ops.UpgradeOps(ops=[]) + autogenerate._produce_net_changes(autogen_context, uo) + return bool(uo.as_diffs()) + + @testing.combinations( + (INTEGER(),), + (CHAR(),), + (VARCHAR(32),), + (Text(),), + (FLOAT(),), + (Numeric(),), + (DECIMAL(),), + (TIMESTAMP(),), + (DateTime(),), + (Boolean(),), + (BigInteger(),), + (SmallInteger(),), + (DATE(),), + (String(32),), + (LargeBinary(),), + (Unicode(32),), + (Enum("one", "two", "three", name="the_enum"),), + ) + def test_introspected_columns_match_metadata_columns(self, cola): + # this is ensuring false positives aren't generated for types + # that have not changed. + is_(self._compare_columns(cola, cola), False) + + @testing.combinations( + (String(32), VARCHAR(32), False), + (VARCHAR(6), String(6), False), + (CHAR(), String(1), True), + (Text(), VARCHAR(255), True), + (Unicode(32), String(32), False, config.requirements.unicode_string), + (Unicode(32), VARCHAR(32), False, config.requirements.unicode_string), + (VARCHAR(6), VARCHAR(12), True), + (VARCHAR(6), String(12), True), + ) + def test_string_comparisons(self, cola, colb, expect_changes): + is_(self._compare_columns(cola, colb), expect_changes) + + @testing.combinations( + ( + DateTime(), + DateTime(timezone=False), + False, + config.requirements.datetime_timezone, + ), + ( + DateTime(), + DateTime(timezone=True), + True, + config.requirements.datetime_timezone, + ), + ( + DateTime(timezone=True), + DateTime(timezone=False), + True, + config.requirements.datetime_timezone, + ), + ) + def test_datetime_comparisons(self, cola, colb, expect_changes): + is_(self._compare_columns(cola, colb), expect_changes) + + @testing.combinations( + (Integer(), Integer(), False), + ( + Integer(), + Numeric(8, 0), + True, + config.requirements.integer_subtype_comparisons, + ), + (Numeric(8, 0), Numeric(8, 2), True), + ( + BigInteger(), + Integer(), + True, + config.requirements.integer_subtype_comparisons, + ), + ( + SmallInteger(), + Integer(), + True, + config.requirements.integer_subtype_comparisons, + ), + ) + def test_numeric_comparisons(self, cola, colb, expect_changes): + is_(self._compare_columns(cola, colb), expect_changes) + + class AutogenSystemColTest(AutogenTest, TestBase): __only_on__ = "postgresql" @@ -1013,8 +1167,8 @@ class AutogenerateDiffOrderTest(AutogenTest, TestBase): def test_diffs_order(self): """ - Added in order to test that child tables(tables with FKs) are generated - before their parent tables + Added in order to test that child tables(tables with FKs) are + generated before their parent tables """ ctx = self.autogen_context