Use looser tokenization for type comparison

author Mike Bayer <mike_mp@zzzcomputing.com>

Thu, 19 Mar 2020 16:32:47 +0000 (12:32 -0400)

committer Mike Bayer <mike_mp@zzzcomputing.com>

Thu, 19 Mar 2020 19:27:39 +0000 (15:27 -0400)
author Mike Bayer <mike_mp@zzzcomputing.com>
Thu, 19 Mar 2020 16:32:47 +0000 (12:32 -0400)
committer Mike Bayer <mike_mp@zzzcomputing.com>
Thu, 19 Mar 2020 19:27:39 +0000 (15:27 -0400)
diff --git a/alembic/ddl/impl.py b/alembic/ddl/impl.py

index 655d8c949a23b8a9b8f51578bd6a92e5dde1d465..9df2a72dfb720b7ff1ed1b25d252e48fd27a76dd 100644 (file)
--- a/alembic/ddl/impl.py
+++ b/alembic/ddl/impl.py
@@ -22,8 +22,7 @@ class ImplMeta(type):
  
  _impls = {}
  
-
-Params = namedtuple("Params", ["type", "args", "kwargs"])
+Params = namedtuple("Params", ["token0", "tokens", "args", "kwargs"])
  
  
  class DefaultImpl(with_metaclass(ImplMeta)):
@@ -45,6 +44,7 @@ class DefaultImpl(with_metaclass(ImplMeta)):
      transactional_ddl = False
      command_terminator = ";"
      type_synonyms = ({"NUMERIC", "DECIMAL"},)
+    type_arg_extract = ()
  
      def __init__(
          self,
@@ -339,33 +339,50 @@ class DefaultImpl(with_metaclass(ImplMeta)):
          # TIMESTAMP WITH TIMEZONE
          # INTEGER UNSIGNED
          # INTEGER (10) UNSIGNED
+        # INTEGER(10) UNSIGNED
+        # varchar character set utf8
          #
-        # "ext" are the words after the parenthesis, if any, but if there
-        # are no parenthesis, then these are part of "col".  so they are
-        # moved together for normalization purposes
-        matches = re.search(
-            r"^(?P<col>[^()]*)(?:\((?P<params>.*?)\))?(?P<ext>[^()]*)?$",
-            definition,
-        ).groupdict(default="")
-        col_type = matches["col"]
-        if matches["ext"]:
-            col_type = col_type.strip() + " " + matches["ext"].strip()
-
-        params = Params(col_type, [], {})
-        for term in re.findall("[^(),]+", matches["params"] or ""):
-            if "=" in term:
-                key, val = term.split("=")
-                params.kwargs[key.strip()] = val.strip()
+
+        tokens = re.findall(r"[\w\-_]+|\(.+?\)", definition)
+
+        term_tokens = []
+        paren_term = None
+
+        for token in tokens:
+            if re.match(r"^\(.*\)$", token):
+                paren_term = token
              else:
-                params.args.append(term.strip())
+                term_tokens.append(token)
+
+        params = Params(term_tokens[0], term_tokens[1:], [], {})
+
+        if paren_term:
+            for term in re.findall("[^(),]+", paren_term):
+                if "=" in term:
+                    key, val = term.split("=")
+                    params.kwargs[key.strip()] = val.strip()
+                else:
+                    params.args.append(term.strip())
+
          return params
  
-    def _column_types_match(self, inspector_type, metadata_type):
-        if inspector_type == metadata_type:
+    def _column_types_match(self, inspector_params, metadata_params):
+        if inspector_params.token0 == metadata_params.token0:
              return True
+
          synonyms = [{t.lower() for t in batch} for batch in self.type_synonyms]
+        inspector_all_terms = " ".join(
+            [inspector_params.token0] + inspector_params.tokens
+        )
+        metadata_all_terms = " ".join(
+            [metadata_params.token0] + metadata_params.tokens
+        )
+
          for batch in synonyms:
-            if {inspector_type, metadata_type}.issubset(batch):
+            if {inspector_all_terms, metadata_all_terms}.issubset(batch) or {
+                inspector_params.token0,
+                metadata_params.token0,
+            }.issubset(batch):
                  return True
          return False
  
@@ -375,22 +392,27 @@ class DefaultImpl(with_metaclass(ImplMeta)):
          we want to make sure they are the same. However, if only one
          specifies it, dont flag it for being less specific
          """
-        # py27- .keys is a set in py36
-        for kw in set(inspected_params.kwargs.keys()) & set(
-            meta_params.kwargs.keys()
+
+        if (
+            len(meta_params.tokens) == len(inspected_params.tokens)
+            and meta_params.tokens != inspected_params.tokens
          ):
-            if inspected_params.kwargs[kw] != meta_params.kwargs[kw]:
-                return False
+            return False
+
+        if (
+            len(meta_params.args) == len(inspected_params.args)
+            and meta_params.args != inspected_params.args
+        ):
+            return False
+
+        insp = " ".join(inspected_params.tokens).lower()
+        meta = " ".join(meta_params.tokens).lower()
+
+        for reg in self.type_arg_extract:
+            mi = re.search(reg, insp)
+            mm = re.search(reg, meta)
  
-        # compare the positional arguments to the extent that they are
-        # present in the SQL form of the metadata type compared to
-        # the inspected type.   Example:  Oracle NUMBER(17) and NUMBER(17, 0)
-        # are equivalent.  As it's not clear if some database types might
-        # ignore additional parameters vs. they aren't actually there and
-        # need to be added, for now we are still favoring avoiding false
-        # positives
-        for meta, inspect in zip(meta_params.args, inspected_params.args):
-            if meta != inspect:
+            if mi and mm and mi.group(1) != mm.group(1):
                  return False
  
          return True
@@ -402,9 +424,8 @@ class DefaultImpl(with_metaclass(ImplMeta)):
          """
          inspector_params = self._tokenize_column_type(inspector_column)
          metadata_params = self._tokenize_column_type(metadata_column)
-        if not self._column_types_match(
-            inspector_params.type, metadata_params.type
-        ):
+
+        if not self._column_types_match(inspector_params, metadata_params,):
              return True
          if not self._column_args_match(inspector_params, metadata_params):
              return True
diff --git a/alembic/ddl/mssql.py b/alembic/ddl/mssql.py

index 3ccbbff06579862477376f58df2d4a6d0e46646d..78a7eb65e1f05154b06070eaafa4c95fa3b55abe 100644 (file)
--- a/alembic/ddl/mssql.py
+++ b/alembic/ddl/mssql.py
@@ -26,6 +26,8 @@ class MSSQLImpl(DefaultImpl):
      transactional_ddl = True
      batch_separator = "GO"
  
+    type_synonyms = DefaultImpl.type_synonyms + ({"VARCHAR", "NVARCHAR"},)
+
      def __init__(self, *arg, **kw):
          super(MSSQLImpl, self).__init__(*arg, **kw)
          self.batch_separator = self.context_opts.get(
diff --git a/alembic/ddl/mysql.py b/alembic/ddl/mysql.py

index a66bb84f6c847f1933f600815a979fc9a071d3ec..02ce253684a53b9c3d5744ea8ce2a06adf3ba91e 100644 (file)
--- a/alembic/ddl/mysql.py
+++ b/alembic/ddl/mysql.py
@@ -25,6 +25,7 @@ class MySQLImpl(DefaultImpl):
  
      transactional_ddl = False
      type_synonyms = DefaultImpl.type_synonyms + ({"BOOL", "TINYINT"},)
+    type_arg_extract = [r"character set ([\w\-_]+)", r"collate ([\w\-_]+)"]
  
      def alter_column(
          self,
diff --git a/docs/build/unreleased/671.rst b/docs/build/unreleased/671.rst

new file mode 100644 (file)

index 0000000..635faae
--- /dev/null
+++ b/docs/build/unreleased/671.rst
@@ -0,0 +1,7 @@
+.. change::
+    :tags: bug, autogenerate, mysql
+    :tickets: 617
+
+    Fixed more false-positive failures produced by the new "compare type" logic
+    first added in :ticket:`605`, particularly impacting MySQL string types
+    regarding flags such as "charset" and "collation".
diff --git a/tests/requirements.py b/tests/requirements.py

index bd258e08f222461bac1345b94b3f6c1d0ed3ea13..d6794a54514852d0c3b8f8b0830a31494cfe7515 100644 (file)
--- a/tests/requirements.py
+++ b/tests/requirements.py
@@ -98,6 +98,22 @@ class DefaultRequirements(SuiteRequirements):
  
          return exclusions.only_on(["postgresql"])
  
+    @property
+    def postgresql(self):
+        return exclusions.only_on(["postgresql"])
+
+    @property
+    def mysql(self):
+        return exclusions.only_on(["mysql"])
+
+    @property
+    def oracle(self):
+        return exclusions.only_on(["oracle"])
+
+    @property
+    def mssql(self):
+        return exclusions.only_on(["mssql"])
+
      @property
      def postgresql_uuid_ossp(self):
          def check_uuid_ossp(config):
diff --git a/tests/test_autogen_diffs.py b/tests/test_autogen_diffs.py

index e1e5c8d402f5df482850d366226e58296f5f81f5..ad387655e9eb227953051ffb0517a46fd6e0ca43 100644 (file)
--- a/tests/test_autogen_diffs.py
+++ b/tests/test_autogen_diffs.py
@@ -786,6 +786,8 @@ class CompareMetadataToInspectorTest(TestBase):
          # that have not changed.
          is_(self._compare_columns(cola, cola), False)
  
+    # TODO: ideally the backend-specific types would be tested
+    # within the test suites for those backends.
      @testing.combinations(
          (String(32), VARCHAR(32), False),
          (VARCHAR(6), String(6), False),
@@ -795,6 +797,40 @@ class CompareMetadataToInspectorTest(TestBase):
          (Unicode(32), VARCHAR(32), False, config.requirements.unicode_string),
          (VARCHAR(6), VARCHAR(12), True),
          (VARCHAR(6), String(12), True),
+        (Integer(), String(10), True),
+        (String(10), Integer(), True),
+        (
+            Unicode(30, collation="en_US"),
+            Unicode(30, collation="en_US"),
+            False,  # unfortunately dialects don't seem to consistently
+            # reflect collations right now so we can't test for
+            # positives here
+            config.requirements.postgresql,
+        ),
+        (
+            mysql.VARCHAR(200, charset="utf8"),
+            Unicode(200),
+            False,
+            config.requirements.mysql,
+        ),
+        (
+            mysql.VARCHAR(200, charset="latin1"),
+            mysql.VARCHAR(200, charset="utf-8"),
+            True,
+            config.requirements.mysql + config.requirements.sqlalchemy_13,
+        ),
+        (
+            String(255, collation="utf8_bin"),
+            String(255),
+            False,
+            config.requirements.mysql,
+        ),
+        (
+            String(255, collation="utf8_bin"),
+            String(255, collation="latin1_bin"),
+            True,
+            config.requirements.mysql + config.requirements.sqlalchemy_13,
+        ),
      )
      def test_string_comparisons(self, cola, colb, expect_changes):
          is_(self._compare_columns(cola, colb), expect_changes)
author	Mike Bayer <mike_mp@zzzcomputing.com>
	Thu, 19 Mar 2020 16:32:47 +0000 (12:32 -0400)
committer	Mike Bayer <mike_mp@zzzcomputing.com>
	Thu, 19 Mar 2020 19:27:39 +0000 (15:27 -0400)
alembic/ddl/impl.py		patch \| blob \| blame \| history
alembic/ddl/mssql.py		patch \| blob \| blame \| history
alembic/ddl/mysql.py		patch \| blob \| blame \| history
docs/build/unreleased/671.rst	[new file with mode: 0644]	patch \| blob
tests/requirements.py		patch \| blob \| blame \| history
tests/test_autogen_diffs.py		patch \| blob \| blame \| history