From: Mike Bayer Date: Thu, 19 Mar 2020 16:32:47 +0000 (-0400) Subject: Use looser tokenization for type comparison X-Git-Tag: rel_1_4_2~1^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=52ef05c936170109f2f457751f4940e4269595c3;p=thirdparty%2Fsqlalchemy%2Falembic.git Use looser tokenization for type comparison Fixed more false-positive failures produced by the new "compare type" logic first added in :ticket:`605`, particularly impacting MySQL string types regarding flags such as "charset" and "collation". Change-Id: If3dcfed7b0aa2d7367e1508289a0b5ee6f4c15a7 Fixes: #617 --- diff --git a/alembic/ddl/impl.py b/alembic/ddl/impl.py index 655d8c94..9df2a72d 100644 --- a/alembic/ddl/impl.py +++ b/alembic/ddl/impl.py @@ -22,8 +22,7 @@ class ImplMeta(type): _impls = {} - -Params = namedtuple("Params", ["type", "args", "kwargs"]) +Params = namedtuple("Params", ["token0", "tokens", "args", "kwargs"]) class DefaultImpl(with_metaclass(ImplMeta)): @@ -45,6 +44,7 @@ class DefaultImpl(with_metaclass(ImplMeta)): transactional_ddl = False command_terminator = ";" type_synonyms = ({"NUMERIC", "DECIMAL"},) + type_arg_extract = () def __init__( self, @@ -339,33 +339,50 @@ class DefaultImpl(with_metaclass(ImplMeta)): # TIMESTAMP WITH TIMEZONE # INTEGER UNSIGNED # INTEGER (10) UNSIGNED + # INTEGER(10) UNSIGNED + # varchar character set utf8 # - # "ext" are the words after the parenthesis, if any, but if there - # are no parenthesis, then these are part of "col". so they are - # moved together for normalization purposes - matches = re.search( - r"^(?P[^()]*)(?:\((?P.*?)\))?(?P[^()]*)?$", - definition, - ).groupdict(default="") - col_type = matches["col"] - if matches["ext"]: - col_type = col_type.strip() + " " + matches["ext"].strip() - - params = Params(col_type, [], {}) - for term in re.findall("[^(),]+", matches["params"] or ""): - if "=" in term: - key, val = term.split("=") - params.kwargs[key.strip()] = val.strip() + + tokens = re.findall(r"[\w\-_]+|\(.+?\)", definition) + + term_tokens = [] + paren_term = None + + for token in tokens: + if re.match(r"^\(.*\)$", token): + paren_term = token else: - params.args.append(term.strip()) + term_tokens.append(token) + + params = Params(term_tokens[0], term_tokens[1:], [], {}) + + if paren_term: + for term in re.findall("[^(),]+", paren_term): + if "=" in term: + key, val = term.split("=") + params.kwargs[key.strip()] = val.strip() + else: + params.args.append(term.strip()) + return params - def _column_types_match(self, inspector_type, metadata_type): - if inspector_type == metadata_type: + def _column_types_match(self, inspector_params, metadata_params): + if inspector_params.token0 == metadata_params.token0: return True + synonyms = [{t.lower() for t in batch} for batch in self.type_synonyms] + inspector_all_terms = " ".join( + [inspector_params.token0] + inspector_params.tokens + ) + metadata_all_terms = " ".join( + [metadata_params.token0] + metadata_params.tokens + ) + for batch in synonyms: - if {inspector_type, metadata_type}.issubset(batch): + if {inspector_all_terms, metadata_all_terms}.issubset(batch) or { + inspector_params.token0, + metadata_params.token0, + }.issubset(batch): return True return False @@ -375,22 +392,27 @@ class DefaultImpl(with_metaclass(ImplMeta)): we want to make sure they are the same. However, if only one specifies it, dont flag it for being less specific """ - # py27- .keys is a set in py36 - for kw in set(inspected_params.kwargs.keys()) & set( - meta_params.kwargs.keys() + + if ( + len(meta_params.tokens) == len(inspected_params.tokens) + and meta_params.tokens != inspected_params.tokens ): - if inspected_params.kwargs[kw] != meta_params.kwargs[kw]: - return False + return False + + if ( + len(meta_params.args) == len(inspected_params.args) + and meta_params.args != inspected_params.args + ): + return False + + insp = " ".join(inspected_params.tokens).lower() + meta = " ".join(meta_params.tokens).lower() + + for reg in self.type_arg_extract: + mi = re.search(reg, insp) + mm = re.search(reg, meta) - # compare the positional arguments to the extent that they are - # present in the SQL form of the metadata type compared to - # the inspected type. Example: Oracle NUMBER(17) and NUMBER(17, 0) - # are equivalent. As it's not clear if some database types might - # ignore additional parameters vs. they aren't actually there and - # need to be added, for now we are still favoring avoiding false - # positives - for meta, inspect in zip(meta_params.args, inspected_params.args): - if meta != inspect: + if mi and mm and mi.group(1) != mm.group(1): return False return True @@ -402,9 +424,8 @@ class DefaultImpl(with_metaclass(ImplMeta)): """ inspector_params = self._tokenize_column_type(inspector_column) metadata_params = self._tokenize_column_type(metadata_column) - if not self._column_types_match( - inspector_params.type, metadata_params.type - ): + + if not self._column_types_match(inspector_params, metadata_params,): return True if not self._column_args_match(inspector_params, metadata_params): return True diff --git a/alembic/ddl/mssql.py b/alembic/ddl/mssql.py index 3ccbbff0..78a7eb65 100644 --- a/alembic/ddl/mssql.py +++ b/alembic/ddl/mssql.py @@ -26,6 +26,8 @@ class MSSQLImpl(DefaultImpl): transactional_ddl = True batch_separator = "GO" + type_synonyms = DefaultImpl.type_synonyms + ({"VARCHAR", "NVARCHAR"},) + def __init__(self, *arg, **kw): super(MSSQLImpl, self).__init__(*arg, **kw) self.batch_separator = self.context_opts.get( diff --git a/alembic/ddl/mysql.py b/alembic/ddl/mysql.py index a66bb84f..02ce2536 100644 --- a/alembic/ddl/mysql.py +++ b/alembic/ddl/mysql.py @@ -25,6 +25,7 @@ class MySQLImpl(DefaultImpl): transactional_ddl = False type_synonyms = DefaultImpl.type_synonyms + ({"BOOL", "TINYINT"},) + type_arg_extract = [r"character set ([\w\-_]+)", r"collate ([\w\-_]+)"] def alter_column( self, diff --git a/docs/build/unreleased/671.rst b/docs/build/unreleased/671.rst new file mode 100644 index 00000000..635faaee --- /dev/null +++ b/docs/build/unreleased/671.rst @@ -0,0 +1,7 @@ +.. change:: + :tags: bug, autogenerate, mysql + :tickets: 617 + + Fixed more false-positive failures produced by the new "compare type" logic + first added in :ticket:`605`, particularly impacting MySQL string types + regarding flags such as "charset" and "collation". diff --git a/tests/requirements.py b/tests/requirements.py index bd258e08..d6794a54 100644 --- a/tests/requirements.py +++ b/tests/requirements.py @@ -98,6 +98,22 @@ class DefaultRequirements(SuiteRequirements): return exclusions.only_on(["postgresql"]) + @property + def postgresql(self): + return exclusions.only_on(["postgresql"]) + + @property + def mysql(self): + return exclusions.only_on(["mysql"]) + + @property + def oracle(self): + return exclusions.only_on(["oracle"]) + + @property + def mssql(self): + return exclusions.only_on(["mssql"]) + @property def postgresql_uuid_ossp(self): def check_uuid_ossp(config): diff --git a/tests/test_autogen_diffs.py b/tests/test_autogen_diffs.py index e1e5c8d4..ad387655 100644 --- a/tests/test_autogen_diffs.py +++ b/tests/test_autogen_diffs.py @@ -786,6 +786,8 @@ class CompareMetadataToInspectorTest(TestBase): # that have not changed. is_(self._compare_columns(cola, cola), False) + # TODO: ideally the backend-specific types would be tested + # within the test suites for those backends. @testing.combinations( (String(32), VARCHAR(32), False), (VARCHAR(6), String(6), False), @@ -795,6 +797,40 @@ class CompareMetadataToInspectorTest(TestBase): (Unicode(32), VARCHAR(32), False, config.requirements.unicode_string), (VARCHAR(6), VARCHAR(12), True), (VARCHAR(6), String(12), True), + (Integer(), String(10), True), + (String(10), Integer(), True), + ( + Unicode(30, collation="en_US"), + Unicode(30, collation="en_US"), + False, # unfortunately dialects don't seem to consistently + # reflect collations right now so we can't test for + # positives here + config.requirements.postgresql, + ), + ( + mysql.VARCHAR(200, charset="utf8"), + Unicode(200), + False, + config.requirements.mysql, + ), + ( + mysql.VARCHAR(200, charset="latin1"), + mysql.VARCHAR(200, charset="utf-8"), + True, + config.requirements.mysql + config.requirements.sqlalchemy_13, + ), + ( + String(255, collation="utf8_bin"), + String(255), + False, + config.requirements.mysql, + ), + ( + String(255, collation="utf8_bin"), + String(255, collation="latin1_bin"), + True, + config.requirements.mysql + config.requirements.sqlalchemy_13, + ), ) def test_string_comparisons(self, cola, colb, expect_changes): is_(self._compare_columns(cola, colb), expect_changes)