From: Mike Bayer Date: Sat, 2 Oct 2021 15:53:55 +0000 (-0400) Subject: support utf8mb3 char encoding fully for mysqlclient, others X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=68cb964f540c5fc2deeddb231c6ed3b8eeda7924;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git support utf8mb3 char encoding fully for mysqlclient, others Fixes to accommodate for the MariaDB 10.6 series, including backwards incompatible changes in both the mariadb-connector Python driver as well as the native 10.6 client libraries that are used automatically by the mysqlclient DBAPI. The "utf8mb3" encoding symbol is now reported by these client libraries when the encoding is stated as "utf8", leading to lookup and encoding errors within the MySQL dialect that does not expect this symbol. Updates to both the MySQL base library to accommodate for this utf8mb3 symbol being reported as well as to the test suite. Thanks to Georg Richter for support. Fixes: #7136 Fixes: #7115 Change-Id: I655d9d9868aef76037023d0c602b8a7c881780b0 (cherry picked from commit f43e65f9b557b2d110141137a0216c661ba038b4) --- diff --git a/doc/build/changelog/unreleased_13/7115.rst b/doc/build/changelog/unreleased_13/7115.rst new file mode 100644 index 0000000000..1f2c7fcf86 --- /dev/null +++ b/doc/build/changelog/unreleased_13/7115.rst @@ -0,0 +1,16 @@ +.. change:: + :tags: bug, mysql, mariadb + :tickets: 7115, 7136 + :versions: 1.4.26 + + Fixes to accommodate for the MariaDB 10.6 series, including backwards + incompatible changes in both the mariadb-connector Python driver (supported + on SQLAlchemy 1.4 only) as well as the native 10.6 client libraries that + are used automatically by the mysqlclient DBAPI (applies to both 1.3 and + 1.4). The "utf8mb3" encoding symbol is now reported by these client + libraries when the encoding is stated as "utf8", leading to lookup and + encoding errors within the MySQL dialect that does not expect this symbol. + Updates to both the MySQL base library to accommodate for this utf8mb3 + symbol being reported as well as to the test suite. Thanks to Georg Richter + for support. + diff --git a/lib/sqlalchemy/dialects/mysql/base.py b/lib/sqlalchemy/dialects/mysql/base.py index 47e4dff944..69135912d8 100644 --- a/lib/sqlalchemy/dialects/mysql/base.py +++ b/lib/sqlalchemy/dialects/mysql/base.py @@ -3214,6 +3214,7 @@ class _DecodingRowProxy(object): "koi8u": "koi8_u", "utf16": "utf-16-be", # MySQL's uft16 is always bigendian "utf8mb4": "utf8", # real utf8 + "utf8mb3": "utf8", # real utf8 - occurs for MariaDB client libs 10.6 "eucjpms": "ujis", } diff --git a/test/dialect/mysql/test_dialect.py b/test/dialect/mysql/test_dialect.py index 53ceefa696..200f02f268 100644 --- a/test/dialect/mysql/test_dialect.py +++ b/test/dialect/mysql/test_dialect.py @@ -16,6 +16,7 @@ from sqlalchemy.testing import engines from sqlalchemy.testing import eq_ from sqlalchemy.testing import expect_warnings from sqlalchemy.testing import fixtures +from sqlalchemy.testing import in_ from sqlalchemy.testing import mock from ...engine import test_execute @@ -164,17 +165,30 @@ class DialectTest(fixtures.TestBase): )[1] eq_(kw["foo"], "true") - @testing.only_on("mysql") - @testing.skip_if("mysql+mysqlconnector", "totally broken for the moment") - @testing.fails_on("mysql+oursql", "unsupported") - def test_special_encodings(self): + @testing.only_on( + [ + "mysql+mysqldb", + "mysql+pymysql", + "mariadb+mysqldb", + "mariadb+pymysql", + ] + ) + @testing.combinations( + ("utf8mb4",), + ("utf8",), + ) + def test_special_encodings(self, enc): - for enc in ["utf8mb4", "utf8"]: - eng = engines.testing_engine( - options={"connect_args": {"charset": enc, "use_unicode": 0}} - ) - conn = eng.connect() - eq_(conn.dialect._connection_charset, enc) + eng = engines.testing_engine( + options={"connect_args": {"charset": enc, "use_unicode": 0}} + ) + conn = eng.connect() + + detected = conn.dialect._connection_charset + if enc == "utf8mb4": + eq_(detected, enc) + else: + in_(detected, ["utf8", "utf8mb3"]) def test_no_show_variables(self): from sqlalchemy.testing import mock diff --git a/test/dialect/mysql/test_reflection.py b/test/dialect/mysql/test_reflection.py index f32341ebe6..af8258387c 100644 --- a/test/dialect/mysql/test_reflection.py +++ b/test/dialect/mysql/test_reflection.py @@ -294,7 +294,7 @@ class ReflectionTest(fixtures.TestBase, AssertsCompiledSQL): Column("c1", Integer()), mysql_engine="MEMORY", comment=comment, - mysql_default_charset="utf8", + mysql_default_charset="utf8mb4", mysql_auto_increment="5", mysql_avg_row_length="3", mysql_password="secret", @@ -309,7 +309,7 @@ class ReflectionTest(fixtures.TestBase, AssertsCompiledSQL): assert def_table.kwargs["mysql_engine"] == "MEMORY" assert def_table.comment == comment - assert def_table.kwargs["mysql_default_charset"] == "utf8" + assert def_table.kwargs["mysql_default_charset"] == "utf8mb4" assert def_table.kwargs["mysql_auto_increment"] == "5" assert def_table.kwargs["mysql_avg_row_length"] == "3" assert def_table.kwargs["mysql_password"] == "secret" @@ -319,7 +319,7 @@ class ReflectionTest(fixtures.TestBase, AssertsCompiledSQL): assert reflected.comment == comment assert reflected.kwargs["mysql_comment"] == comment - assert reflected.kwargs["mysql_default charset"] == "utf8" + assert reflected.kwargs["mysql_default charset"] == "utf8mb4" assert reflected.kwargs["mysql_avg_row_length"] == "3" assert reflected.kwargs["mysql_connection"] == "fish" diff --git a/test/dialect/mysql/test_types.py b/test/dialect/mysql/test_types.py index f34a8d3243..6d902e75a5 100644 --- a/test/dialect/mysql/test_types.py +++ b/test/dialect/mysql/test_types.py @@ -511,14 +511,14 @@ class TypeRoundTripTest(fixtures.TestBase, AssertsExecutionResults): self.metadata, Column("id", Integer), Column("data", UnicodeText), - mysql_default_charset="utf8", - mysql_collate="utf8_bin", + mysql_default_charset="utf8mb4", + mysql_collate="utf8mb4_bin", ) t.create() m2 = MetaData(testing.db) t2 = Table("foo", m2, autoload=True) - eq_(t2.kwargs["mysql_collate"], "utf8_bin") - eq_(t2.kwargs["mysql_default charset"], "utf8") + eq_(t2.kwargs["mysql_collate"], "utf8mb4_bin") + eq_(t2.kwargs["mysql_default charset"], "utf8mb4") # test [ticket:2906] # in order to test the condition here, need to use