From c0e6ebd70b04c7941b7750c77cd4329b043679f8 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Sun, 13 Jan 2019 18:15:52 -0500 Subject: [PATCH] Render N'' for SQL Server unicode literals The ``literal_processor`` for the :class:`.Unicode` and :class:`.UnicodeText` datatypes now render an ``N`` character in front of the literal string expression as required by SQL Server for Unicode string values rendered in SQL expressions. Note that this adds full unicode characters to the standard test suite, which means we also need to bump MySQL provisioning up to utf8mb4. Modern installs do not seem to be reproducing the 1271 issue locally, if it reproduces in CI it would be better for us to skip those ORM-centric tests for MySQL. Also remove unused _StringType from SQL Server dialect Fixes: #4442 Change-Id: Id55817b3e8a2d81ddc8b7b27f85e3f1dcc1cea7e --- doc/build/changelog/unreleased_13/4442.rst | 8 +++ lib/sqlalchemy/dialects/mssql/base.py | 24 +++++-- lib/sqlalchemy/testing/provision.py | 9 +-- lib/sqlalchemy/testing/requirements.py | 7 ++ lib/sqlalchemy/testing/suite/test_types.py | 79 ++++++++++++++++------ setup.cfg | 4 +- test/dialect/mssql/test_types.py | 48 +++++++++++++ test/requirements.py | 14 +++- 8 files changed, 158 insertions(+), 35 deletions(-) create mode 100644 doc/build/changelog/unreleased_13/4442.rst diff --git a/doc/build/changelog/unreleased_13/4442.rst b/doc/build/changelog/unreleased_13/4442.rst new file mode 100644 index 0000000000..ea8c3b3e17 --- /dev/null +++ b/doc/build/changelog/unreleased_13/4442.rst @@ -0,0 +1,8 @@ +.. change:: + :tags: bug, mssql + :tickets: 4222 + + The ``literal_processor`` for the :class:`.Unicode` and + :class:`.UnicodeText` datatypes now render an ``N`` character in front of + the literal string expression as required by SQL Server for Unicode string + values rendered in SQL expressions. diff --git a/lib/sqlalchemy/dialects/mssql/base.py b/lib/sqlalchemy/dialects/mssql/base.py index d98915c0fc..bdbc6254b4 100644 --- a/lib/sqlalchemy/dialects/mssql/base.py +++ b/lib/sqlalchemy/dialects/mssql/base.py @@ -1003,12 +1003,26 @@ class DATETIMEOFFSET(sqltypes.TypeEngine): self.precision = precision -class _StringType(object): +class _UnicodeLiteral(object): + def literal_processor(self, dialect): + def process(value): + + value = value.replace("'", "''") + + if dialect.identifier_preparer._double_percents: + value = value.replace("%", "%%") + + return "N'%s'" % value + + return process + - """Base for MSSQL string types.""" +class _MSUnicode(_UnicodeLiteral, sqltypes.Unicode): + pass - def __init__(self, collation=None): - super(_StringType, self).__init__(collation=collation) + +class _MSUnicodeText(_UnicodeLiteral, sqltypes.UnicodeText): + pass class TIMESTAMP(sqltypes._Binary): @@ -2124,6 +2138,8 @@ class MSDialect(default.DefaultDialect): sqltypes.DateTime: _MSDateTime, sqltypes.Date: _MSDate, sqltypes.Time: TIME, + sqltypes.Unicode: _MSUnicode, + sqltypes.UnicodeText: _MSUnicodeText, } engine_config_types = default.DefaultDialect.engine_config_types.union( diff --git a/lib/sqlalchemy/testing/provision.py b/lib/sqlalchemy/testing/provision.py index 88dc28528a..70ace05110 100644 --- a/lib/sqlalchemy/testing/provision.py +++ b/lib/sqlalchemy/testing/provision.py @@ -207,15 +207,12 @@ def _mysql_create_db(cfg, eng, ident): except Exception: pass - # using utf8mb4 we are getting collation errors on UNIONS: - # test/orm/inheritance/test_polymorphic_rel.py" - # 1271, u"Illegal mix of collations for operation 'UNION'" - conn.execute("CREATE DATABASE %s CHARACTER SET utf8mb3" % ident) + conn.execute("CREATE DATABASE %s CHARACTER SET utf8mb4" % ident) conn.execute( - "CREATE DATABASE %s_test_schema CHARACTER SET utf8mb3" % ident + "CREATE DATABASE %s_test_schema CHARACTER SET utf8mb4" % ident ) conn.execute( - "CREATE DATABASE %s_test_schema_2 CHARACTER SET utf8mb3" % ident + "CREATE DATABASE %s_test_schema_2 CHARACTER SET utf8mb4" % ident ) diff --git a/lib/sqlalchemy/testing/requirements.py b/lib/sqlalchemy/testing/requirements.py index a17d26edb8..3a21617406 100644 --- a/lib/sqlalchemy/testing/requirements.py +++ b/lib/sqlalchemy/testing/requirements.py @@ -732,6 +732,13 @@ class SuiteRequirements(Requirements): return exclusions.open() + @property + def expressions_against_unbounded_text(self): + """target database supports use of an unbounded textual field in a + WHERE clause.""" + + return exclusions.open() + @property def selectone(self): """target driver must support the literal statement 'select 1'""" diff --git a/lib/sqlalchemy/testing/suite/test_types.py b/lib/sqlalchemy/testing/suite/test_types.py index ff8db5897c..4791671f31 100644 --- a/lib/sqlalchemy/testing/suite/test_types.py +++ b/lib/sqlalchemy/testing/suite/test_types.py @@ -38,6 +38,8 @@ from ...util import u class _LiteralRoundTripFixture(object): + supports_whereclause = True + @testing.provide_metadata def _literal_round_trip(self, type_, input_, output, filter_=None): """test literal rendering """ @@ -49,33 +51,47 @@ class _LiteralRoundTripFixture(object): t = Table("t", self.metadata, Column("x", type_)) t.create() - for value in input_: - ins = ( - t.insert() - .values(x=literal(value)) - .compile( - dialect=testing.db.dialect, - compile_kwargs=dict(literal_binds=True), + with testing.db.connect() as conn: + for value in input_: + ins = ( + t.insert() + .values(x=literal(value)) + .compile( + dialect=testing.db.dialect, + compile_kwargs=dict(literal_binds=True), + ) ) - ) - testing.db.execute(ins) + conn.execute(ins) + + if self.supports_whereclause: + stmt = t.select().where(t.c.x == literal(value)) + else: + stmt = t.select() - for row in t.select().execute(): - value = row[0] - if filter_ is not None: - value = filter_(value) - assert value in output + stmt = stmt.compile( + dialect=testing.db.dialect, + compile_kwargs=dict(literal_binds=True), + ) + for row in conn.execute(stmt): + value = row[0] + if filter_ is not None: + value = filter_(value) + assert value in output class _UnicodeFixture(_LiteralRoundTripFixture): __requires__ = ("unicode_data",) data = u( - "Alors vous imaginez ma surprise, au lever du jour, " - "quand une drôle de petite voix m’a réveillé. Elle " - "disait: « S’il vous plaît… dessine-moi un mouton! »" + "Alors vous imaginez ma 🐍 surprise, au lever du jour, " + "quand une drôle de petite 🐍 voix m’a réveillé. Elle " + "disait: « S’il vous plaît… dessine-moi 🐍 un mouton! »" ) + @property + def supports_whereclause(self): + return config.requirements.expressions_against_unbounded_text.enabled + @classmethod def define_tables(cls, metadata): Table( @@ -122,6 +138,11 @@ class _UnicodeFixture(_LiteralRoundTripFixture): def test_literal(self): self._literal_round_trip(self.datatype, [self.data], [self.data]) + def test_literal_non_ascii(self): + self._literal_round_trip( + self.datatype, [util.u("réve🐍 illé")], [util.u("réve🐍 illé")] + ) + class UnicodeVarcharTest(_UnicodeFixture, fixtures.TablesTest): __requires__ = ("unicode_data",) @@ -149,6 +170,10 @@ class TextTest(_LiteralRoundTripFixture, fixtures.TablesTest): __requires__ = ("text_type",) __backend__ = True + @property + def supports_whereclause(self): + return config.requirements.expressions_against_unbounded_text.enabled + @classmethod def define_tables(cls, metadata): Table( @@ -177,6 +202,11 @@ class TextTest(_LiteralRoundTripFixture, fixtures.TablesTest): def test_literal(self): self._literal_round_trip(Text, ["some text"], ["some text"]) + def test_literal_non_ascii(self): + self._literal_round_trip( + Text, [util.u("réve🐍 illé")], [util.u("réve🐍 illé")] + ) + def test_literal_quoting(self): data = """some 'text' hey "hi there" that's text""" self._literal_round_trip(Text, [data], [data]) @@ -202,8 +232,15 @@ class StringTest(_LiteralRoundTripFixture, fixtures.TestBase): foo.drop(config.db) def test_literal(self): + # note that in Python 3, this invokes the Unicode + # datatype for the literal part because all strings are unicode self._literal_round_trip(String(40), ["some text"], ["some text"]) + def test_literal_non_ascii(self): + self._literal_round_trip( + String(40), [util.u("réve🐍 illé")], [util.u("réve🐍 illé")] + ) + def test_literal_quoting(self): data = """some 'text' hey "hi there" that's text""" self._literal_round_trip(String(40), [data], [data]) @@ -864,8 +901,8 @@ class JSONTest(_LiteralRoundTripFixture, fixtures.TablesTest): { "name": "r1", "data": { - util.u("réveillé"): util.u("réveillé"), - "data": {"k1": util.u("drôle")}, + util.u("réve🐍 illé"): util.u("réve🐍 illé"), + "data": {"k1": util.u("drôl🐍e")}, }, }, ) @@ -873,8 +910,8 @@ class JSONTest(_LiteralRoundTripFixture, fixtures.TablesTest): eq_( conn.scalar(select([self.tables.data_table.c.data])), { - util.u("réveillé"): util.u("réveillé"), - "data": {"k1": util.u("drôle")}, + util.u("réve🐍 illé"): util.u("réve🐍 illé"), + "data": {"k1": util.u("drôl🐍e")}, }, ) diff --git a/setup.cfg b/setup.cfg index ec6da6057f..f0c0d4d5af 100644 --- a/setup.cfg +++ b/setup.cfg @@ -66,8 +66,8 @@ postgresql=postgresql://scott:tiger@127.0.0.1:5432/test pg8000=postgresql+pg8000://scott:tiger@127.0.0.1:5432/test postgresql_psycopg2cffi=postgresql+psycopg2cffi://scott:tiger@127.0.0.1:5432/test -mysql=mysql://scott:tiger@127.0.0.1:3306/test?charset=utf8 -pymysql=mysql+pymysql://scott:tiger@127.0.0.1:3306/test?charset=utf8 +mysql=mysql://scott:tiger@127.0.0.1:3306/test?charset=utf8mb4 +pymysql=mysql+pymysql://scott:tiger@127.0.0.1:3306/test?charset=utf8mb4 mssql=mssql+pyodbc://scott:tiger@ms_2008 mssql_pymssql=mssql+pymssql://scott:tiger@ms_2008 diff --git a/test/dialect/mssql/test_types.py b/test/dialect/mssql/test_types.py index 356057af16..54dd6876a4 100644 --- a/test/dialect/mssql/test_types.py +++ b/test/dialect/mssql/test_types.py @@ -7,6 +7,7 @@ import os import sqlalchemy as sa from sqlalchemy import Boolean from sqlalchemy import Column +from sqlalchemy import column from sqlalchemy import Date from sqlalchemy import DateTime from sqlalchemy import DefaultClause @@ -14,6 +15,7 @@ from sqlalchemy import Float from sqlalchemy import inspect from sqlalchemy import Integer from sqlalchemy import LargeBinary +from sqlalchemy import literal from sqlalchemy import MetaData from sqlalchemy import Numeric from sqlalchemy import PickleType @@ -27,7 +29,9 @@ from sqlalchemy import Text from sqlalchemy import text from sqlalchemy import Time from sqlalchemy import types +from sqlalchemy import Unicode from sqlalchemy import UnicodeText +from sqlalchemy import util from sqlalchemy.databases import mssql from sqlalchemy.dialects.mssql import ROWVERSION from sqlalchemy.dialects.mssql import TIMESTAMP @@ -37,6 +41,7 @@ from sqlalchemy.dialects.mssql.base import MS_2008_VERSION from sqlalchemy.dialects.mssql.base import TIME from sqlalchemy.sql import sqltypes from sqlalchemy.testing import assert_raises_message +from sqlalchemy.testing import AssertsCompiledSQL from sqlalchemy.testing import AssertsExecutionResults from sqlalchemy.testing import ComparesTables from sqlalchemy.testing import emits_warning_on @@ -922,6 +927,49 @@ class TypeRoundTripTest( engine.execute(tbl.delete()) +class StringTest(fixtures.TestBase, AssertsCompiledSQL): + __dialect__ = mssql.dialect() + + def test_unicode_literal_binds(self): + self.assert_compile( + column("x", Unicode()) == "foo", "x = N'foo'", literal_binds=True + ) + + def test_unicode_text_literal_binds(self): + self.assert_compile( + column("x", UnicodeText()) == "foo", + "x = N'foo'", + literal_binds=True, + ) + + def test_string_text_literal_binds(self): + self.assert_compile( + column("x", String()) == "foo", "x = 'foo'", literal_binds=True + ) + + def test_string_text_literal_binds_explicit_unicode_right(self): + self.assert_compile( + column("x", String()) == util.u("foo"), + "x = 'foo'", + literal_binds=True, + ) + + def test_string_text_explicit_literal_binds(self): + # the literal experssion here coerces the right side to + # Unicode on Python 3 for plain string, test with unicode + # string just to confirm literal is doing this + self.assert_compile( + column("x", String()) == literal(util.u("foo")), + "x = N'foo'", + literal_binds=True, + ) + + def test_text_text_literal_binds(self): + self.assert_compile( + column("x", Text()) == "foo", "x = 'foo'", literal_binds=True + ) + + class BinaryTest(fixtures.TestBase): __only_on__ = "mssql" __requires__ = ("non_broken_binary",) diff --git a/test/requirements.py b/test/requirements.py index c70169acfa..c265bb3c9d 100644 --- a/test/requirements.py +++ b/test/requirements.py @@ -662,6 +662,16 @@ class DefaultRequirements(SuiteRequirements): return exclusions.open() + @property + def expressions_against_unbounded_text(self): + """target database supports use of an unbounded textual field in a + WHERE clause.""" + + return fails_if( + ["oracle"], + "ORA-00932: inconsistent datatypes: expected - got CLOB", + ) + @property def unicode_data(self): """target drive must support unicode data stored in columns.""" @@ -1173,9 +1183,9 @@ class DefaultRequirements(SuiteRequirements): lookup = { # will raise without quoting "postgresql": "POSIX", - # note MySQL databases need to be created w/ utf8mb3 charset + # note MySQL databases need to be created w/ utf8mb4 charset # for the test suite - "mysql": "utf8mb3_bin", + "mysql": "utf8mb4_bin", "sqlite": "NOCASE", # will raise *with* quoting "mssql": "Latin1_General_CI_AS", -- 2.47.2