]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
- The MySQL CAST compilation now takes into account aspects of a string
authorMike Bayer <mike_mp@zzzcomputing.com>
Mon, 13 Jan 2014 19:05:05 +0000 (14:05 -0500)
committerMike Bayer <mike_mp@zzzcomputing.com>
Mon, 13 Jan 2014 19:05:05 +0000 (14:05 -0500)
type such as "charset" and "collation".  While MySQL wants all character-
based CAST calls to use the CHAR type, we now create a real CHAR
object at CAST time and copy over all the parameters it has, so that
an expression like ``cast(x, mysql.TEXT(charset='utf8'))`` will
render ``CAST(t.col AS CHAR CHARACTER SET utf8)``.

- Added new "unicode returns" detection to the MySQL dialect and
to the default dialect system overall, such that any dialect
can add extra "tests" to the on-first-connect "does this DBAPI
return unicode directly?" detection. In this case, we are
adding a check specifically against the "utf8" encoding with
an explicit "utf8_bin" collation type (after checking that
this collation is available) to test for some buggy unicode
behavior observed with MySQLdb version 1.2.3.  While MySQLdb
has resolved this issue as of 1.2.4, the check here should
guard against regressions.  The change also allows the "unicode"
checks to log in the engine logs, which was not previously
the case. [ticket:2906]

doc/build/changelog/changelog_09.rst
lib/sqlalchemy/connectors/mysqldb.py
lib/sqlalchemy/dialects/mysql/base.py
lib/sqlalchemy/dialects/mysql/mysqldb.py
lib/sqlalchemy/engine/default.py
test/dialect/mysql/test_compiler.py
test/dialect/mysql/test_types.py
test/engine/test_execute.py

index 74f92da06aea9dd7e42ac7e9a0e13c91e2b35201..96f763e31297934458d15fb53025133067b14a60 100644 (file)
 .. changelog::
     :version: 0.9.2
 
+    .. change::
+        :tags: bug, mysql
+
+        The MySQL CAST compilation now takes into account aspects of a string
+        type such as "charset" and "collation".  While MySQL wants all character-
+        based CAST calls to use the CHAR type, we now create a real CHAR
+        object at CAST time and copy over all the parameters it has, so that
+        an expression like ``cast(x, mysql.TEXT(charset='utf8'))`` will
+        render ``CAST(t.col AS CHAR CHARACTER SET utf8)``.
+
+    .. change::
+        :tags: bug, mysql
+        :tickets: 2906
+
+        Added new "unicode returns" detection to the MySQL dialect and
+        to the default dialect system overall, such that any dialect
+        can add extra "tests" to the on-first-connect "does this DBAPI
+        return unicode directly?" detection. In this case, we are
+        adding a check specifically against the "utf8" encoding with
+        an explicit "utf8_bin" collation type (after checking that
+        this collation is available) to test for some buggy unicode
+        behavior observed with MySQLdb version 1.2.3.  While MySQLdb
+        has resolved this issue as of 1.2.4, the check here should
+        guard against regressions.  The change also allows the "unicode"
+        checks to log in the engine logs, which was not previously
+        the case.
+
     .. change::
         :tags: bug, mysql, pool, engine
         :tickets: 2907
index 0f250dfdbdcbaf6c47aa1ce5718f9a0e8f2376fb..33e59218bd1c868add1197d4c1e3bc464adee4ad 100644 (file)
@@ -62,6 +62,7 @@ class MySQLDBConnector(Connector):
         # is overridden when pymysql is used
         return __import__('MySQLdb')
 
+
     def do_executemany(self, cursor, statement, parameters, context=None):
         rowcount = cursor.executemany(statement, parameters)
         if context is not None:
index a3942e89c26ec88718e1cf0b876dfbdf359cff37..22675e59240b88acc8bbcd5bcb62d41952600d27 100644 (file)
@@ -976,6 +976,25 @@ class CHAR(_StringType, sqltypes.CHAR):
         """
         super(CHAR, self).__init__(length=length, **kwargs)
 
+    @classmethod
+    def _adapt_string_for_cast(self, type_):
+        # copy the given string type into a CHAR
+        # for the purposes of rendering a CAST expression
+        type_ = sqltypes.to_instance(type_)
+        if isinstance(type_, sqltypes.CHAR):
+            return type_
+        elif isinstance(type_, _StringType):
+            return CHAR(
+                length=type_.length,
+                charset=type_.charset,
+                collation=type_.collation,
+                ascii=type_.ascii,
+                binary=type_.binary,
+                unicode=type_.unicode,
+                national=False # not supported in CAST
+            )
+        else:
+            return CHAR(length=type_.length)
 
 class NVARCHAR(_StringType, sqltypes.NVARCHAR):
     """MySQL NVARCHAR type.
@@ -1397,14 +1416,9 @@ class MySQLCompiler(compiler.SQLCompiler):
         elif isinstance(type_, (sqltypes.DECIMAL, sqltypes.DateTime,
                                             sqltypes.Date, sqltypes.Time)):
             return self.dialect.type_compiler.process(type_)
-        elif isinstance(type_, sqltypes.Text):
-            return 'CHAR'
-        elif (isinstance(type_, sqltypes.String) and not
-              isinstance(type_, (ENUM, SET))):
-            if getattr(type_, 'length'):
-                return 'CHAR(%s)' % type_.length
-            else:
-                return 'CHAR'
+        elif isinstance(type_, sqltypes.String) and not isinstance(type_, (ENUM, SET)):
+            adapted = CHAR._adapt_string_for_cast(type_)
+            return self.dialect.type_compiler.process(adapted)
         elif isinstance(type_, sqltypes._Binary):
             return 'BINARY'
         elif isinstance(type_, sqltypes.NUMERIC):
@@ -2165,7 +2179,6 @@ class MySQLDialect(default.DefaultDialect):
                 rs.close()
 
     def initialize(self, connection):
-        default.DefaultDialect.initialize(self, connection)
         self._connection_charset = self._detect_charset(connection)
         self._detect_ansiquotes(connection)
         if self._server_ansiquotes:
@@ -2174,6 +2187,8 @@ class MySQLDialect(default.DefaultDialect):
             self.identifier_preparer = self.preparer(self,
                                 server_ansiquotes=self._server_ansiquotes)
 
+        default.DefaultDialect.initialize(self, connection)
+
     @property
     def _supports_cast(self):
         return self.server_version_info is None or \
@@ -2443,6 +2458,7 @@ class MySQLDialect(default.DefaultDialect):
         # as of MySQL 5.0.1
         self._backslash_escapes = 'NO_BACKSLASH_ESCAPES' not in mode
 
+
     def _show_create_table(self, connection, table, charset=None,
                            full_name=None):
         """Run SHOW CREATE TABLE for a ``Table``."""
index c6942ae2de69c5a3fdcf94a5560b0178e3051719..84e8299d54968b2ee1843f8e8424d8bb882ac222 100644 (file)
@@ -56,7 +56,8 @@ from ...connectors.mysqldb import (
                         MySQLDBIdentifierPreparer,
                         MySQLDBConnector
                     )
-
+from .base import TEXT
+from ... import sql
 
 class MySQLExecutionContext_mysqldb(MySQLDBExecutionContext, MySQLExecutionContext):
     pass
@@ -75,4 +76,27 @@ class MySQLDialect_mysqldb(MySQLDBConnector, MySQLDialect):
     statement_compiler = MySQLCompiler_mysqldb
     preparer = MySQLIdentifierPreparer_mysqldb
 
+    def _check_unicode_returns(self, connection):
+        # work around issue fixed in
+        # https://github.com/farcepest/MySQLdb1/commit/cd44524fef63bd3fcb71947392326e9742d520e8
+        # specific issue w/ the utf8_bin collation and unicode returns
+
+        has_utf8_bin = connection.scalar(
+                                "show collation where %s = 'utf8' and %s = 'utf8_bin'"
+                                    % (
+                                    self.identifier_preparer.quote("Charset"),
+                                    self.identifier_preparer.quote("Collation")
+                                ))
+        if has_utf8_bin:
+            additional_tests = [
+                sql.collate(sql.cast(
+                        sql.literal_column(
+                            "'test collated returns'"),
+                            TEXT(charset='utf8')), "utf8_bin")
+            ]
+        else:
+            additional_tests = []
+        return super(MySQLDBConnector, self)._check_unicode_returns(
+                            connection, additional_tests)
+
 dialect = MySQLDialect_mysqldb
index 509d772aabe432de1577e8aa522eae0e3bce988a..bcb9960b12ffe3aaeebad2f69d0728a52f47e73d 100644 (file)
@@ -228,46 +228,55 @@ class DefaultDialect(interfaces.Dialect):
         """
         return None
 
-    def _check_unicode_returns(self, connection):
+    def _check_unicode_returns(self, connection, additional_tests=None):
         if util.py2k and not self.supports_unicode_statements:
             cast_to = util.binary_type
         else:
             cast_to = util.text_type
 
-        def check_unicode(formatstr, type_):
+        if self.positional:
+            parameters = self.execute_sequence_format()
+        else:
+            parameters = {}
+
+        def check_unicode(test):
             cursor = connection.connection.cursor()
             try:
                 try:
-                    cursor.execute(
-                        cast_to(
-                            expression.select(
-                                [expression.cast(
-                                    expression.literal_column(
-                                        "'test %s returns'" % formatstr),
-                                        type_)
-                            ]).compile(dialect=self)
-                        )
-                    )
+                    statement = cast_to(expression.select([test]).compile(dialect=self))
+                    connection._cursor_execute(cursor, statement, parameters)
                     row = cursor.fetchone()
 
                     return isinstance(row[0], util.text_type)
-                except self.dbapi.Error as de:
+                except exc.DBAPIError as de:
                     util.warn("Exception attempting to "
                             "detect unicode returns: %r" % de)
                     return False
             finally:
                 cursor.close()
 
-        # detect plain VARCHAR
-        unicode_for_varchar = check_unicode("plain", sqltypes.VARCHAR(60))
-
-        # detect if there's an NVARCHAR type with different behavior available
-        unicode_for_unicode = check_unicode("unicode", sqltypes.Unicode(60))
-
-        if unicode_for_unicode and not unicode_for_varchar:
+        tests = [
+            # detect plain VARCHAR
+            expression.cast(
+                expression.literal_column("'test plain returns'"),
+                sqltypes.VARCHAR(60)
+            ),
+            # detect if there's an NVARCHAR type with different behavior available
+            expression.cast(
+                expression.literal_column("'test unicode returns'"),
+                sqltypes.Unicode(60)
+            ),
+        ]
+
+        if additional_tests:
+            tests += additional_tests
+
+        results = set([check_unicode(test) for test in tests])
+
+        if results.issuperset([True, False]):
             return "conditional"
         else:
-            return unicode_for_varchar
+            return results == set([True])
 
     def _check_unicode_description(self, connection):
         # all DBAPIs on Py2K return cursor.description as encoded,
index 46e8bfb828a95fddb6c03998cbcc81f93712d206..45f8405c8bf81d2f1b81e43800d45dc166330559 100644 (file)
@@ -341,8 +341,10 @@ class SQLTest(fixtures.TestBase, AssertsCompiledSQL):
             (VARCHAR, "CAST(t.col AS CHAR)"),
             (NCHAR, "CAST(t.col AS CHAR)"),
             (CHAR, "CAST(t.col AS CHAR)"),
+            (m.CHAR(charset='utf8'), "CAST(t.col AS CHAR CHARACTER SET utf8)"),
             (CLOB, "CAST(t.col AS CHAR)"),
             (TEXT, "CAST(t.col AS CHAR)"),
+            (m.TEXT(charset='utf8'), "CAST(t.col AS CHAR CHARACTER SET utf8)"),
             (String(32), "CAST(t.col AS CHAR(32))"),
             (Unicode(32), "CAST(t.col AS CHAR(32))"),
             (CHAR(32), "CAST(t.col AS CHAR(32))"),
index e3d5d6185736baca1e8dfb2a5f2fd26ff5d27f4b..acf9c1e2fff60b8ea4100f80e2adf98ac2d6b425 100644 (file)
@@ -264,15 +264,23 @@ class TypesTest(fixtures.TestBase, AssertsExecutionResults, AssertsCompiledSQL):
     def test_charset_collate_table(self):
         t = Table('foo', self.metadata,
             Column('id', Integer),
+            Column('data', UnicodeText),
             mysql_default_charset='utf8',
-            mysql_collate='utf8_unicode_ci'
+            mysql_collate='utf8_bin'
         )
         t.create()
         m2 = MetaData(testing.db)
         t2 = Table('foo', m2, autoload=True)
-        eq_(t2.kwargs['mysql_collate'], 'utf8_unicode_ci')
+        eq_(t2.kwargs['mysql_collate'], 'utf8_bin')
         eq_(t2.kwargs['mysql_default charset'], 'utf8')
 
+        # test [ticket:2906]
+        # in order to test the condition here, need to use
+        # MySQLdb 1.2.3 and also need to pass either use_unicode=1
+        # or charset=utf8 to the URL.
+        t.insert().execute(id=1, data=u('some text'))
+        assert isinstance(testing.db.scalar(select([t.c.data])), util.text_type)
+
     def test_bit_50(self):
         """Exercise BIT types on 5.0+ (not valid for all engine types)"""
 
index c2479eff7ab65ee6d6d14709d1cebdff305dfc1e..d3bd3c2cdaeda03c4800128b99a366d294c72218 100644 (file)
@@ -1050,7 +1050,7 @@ class ResultProxyTest(fixtures.TestBase):
 
 class ExecutionOptionsTest(fixtures.TestBase):
     def test_dialect_conn_options(self):
-        engine = testing_engine("sqlite://")
+        engine = testing_engine("sqlite://", options=dict(_initialize=False))
         engine.dialect = Mock()
         conn = engine.connect()
         c2 = conn.execution_options(foo="bar")