From: Mike Bayer Date: Mon, 7 Feb 2022 17:08:51 +0000 (-0500) Subject: apply literal value resolution to String X-Git-Tag: rel_2_0_0b1~496^2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=24de22338522779c5d9e720c4b97dc8609136c29;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git apply literal value resolution to String Python string values for which a SQL type is determined from the type of the value, mainly when using :func:`_sql.literal`, will now apply the :class:`_types.String` type, rather than the :class:`_types.Unicode` datatype, for Python string values that test as "ascii only" using Python ``str.isascii()``. If the string is not ``isascii()``, the :class:`_types.Unicode` datatype will be bound instead, which was used in all string detection previously. This behavior **only applies to in-place detection of datatypes when using ``literal()`` or other contexts that have no existing datatype**, which is not usually the case under normal :class:`_schema.Column` comparison operations, where the type of the :class:`_schema.Column` being compared always takes precedence. Use of the :class:`_types.Unicode` datatype can determine literal string formatting on backends such as SQL Server, where a literal value (i.e. using ``literal_binds``) will be rendered as ``N''`` instead of ``'value'``. For normal bound value handling, the :class:`_types.Unicode` datatype also may have implications for passing values to the DBAPI, again in the case of SQL Server, the pyodbc driver supports the use of :ref:`setinputsizes mode ` which will handle :class:`_types.String` versus :class:`_types.Unicode` differently. Fixes: #7551 Change-Id: I4f8de63e36532ae8ce4c630ee59211349ce95361 --- diff --git a/doc/build/changelog/unreleased_20/7551.rst b/doc/build/changelog/unreleased_20/7551.rst new file mode 100644 index 0000000000..0c0b5863d7 --- /dev/null +++ b/doc/build/changelog/unreleased_20/7551.rst @@ -0,0 +1,25 @@ +.. change:: + :tags: bug, types + :tickets: 7551 + + Python string values for which a SQL type is determined from the type of + the value, mainly when using :func:`_sql.literal`, will now apply the + :class:`_types.String` type, rather than the :class:`_types.Unicode` + datatype, for Python string values that test as "ascii only" using Python + ``str.isascii()``. If the string is not ``isascii()``, the + :class:`_types.Unicode` datatype will be bound instead, which was used in + all string detection previously. This behavior **only applies to in-place + detection of datatypes when using ``literal()`` or other contexts that have + no existing datatype**, which is not usually the case under normal + :class:`_schema.Column` comparison operations, where the type of the + :class:`_schema.Column` being compared always takes precedence. + + Use of the :class:`_types.Unicode` datatype can determine literal string + formatting on backends such as SQL Server, where a literal value (i.e. + using ``literal_binds``) will be rendered as ``N''`` instead of + ``'value'``. For normal bound value handling, the :class:`_types.Unicode` + datatype also may have implications for passing values to the DBAPI, again + in the case of SQL Server, the pyodbc driver supports the use of + :ref:`setinputsizes mode ` which will handle + :class:`_types.String` versus :class:`_types.Unicode` differently. + diff --git a/lib/sqlalchemy/sql/sqltypes.py b/lib/sqlalchemy/sql/sqltypes.py index 42fad5e044..575f402c3f 100644 --- a/lib/sqlalchemy/sql/sqltypes.py +++ b/lib/sqlalchemy/sql/sqltypes.py @@ -184,6 +184,15 @@ class String(Concatenable, TypeEngine[typing_Text]): self.length = length self.collation = collation + def _resolve_for_literal(self, value): + # I was SO PROUD of my regex trick, but we dont need it. + # re.search(r"[^\u0000-\u007F]", value) + + if value.isascii(): + return _STRING + else: + return _UNICODE + def literal_processor(self, dialect): def process(value): value = value.replace("'", "''") @@ -3019,6 +3028,10 @@ MATCHTYPE = MatchType() TABLEVALUE = TableValueType() DATETIME_TIMEZONE = DateTime(timezone=True) TIME_TIMEZONE = Time(timezone=True) +_DATETIME = DateTime() +_TIME = Time() +_STRING = String() +_UNICODE = Unicode() _type_map = { int: Integer(), @@ -3026,12 +3039,12 @@ _type_map = { bool: BOOLEANTYPE, decimal.Decimal: Numeric(), dt.date: Date(), - dt.datetime: DateTime(), - dt.time: Time(), + dt.datetime: _DATETIME, + dt.time: _TIME, dt.timedelta: Interval(), util.NoneType: NULLTYPE, bytes: LargeBinary(), - str: Unicode(), + str: _STRING, } diff --git a/test/dialect/mssql/test_types.py b/test/dialect/mssql/test_types.py index 2ef8b76dae..90933e05a6 100644 --- a/test/dialect/mssql/test_types.py +++ b/test/dialect/mssql/test_types.py @@ -1199,15 +1199,34 @@ class StringTest(fixtures.TestBase, AssertsCompiledSQL): literal_binds=True, ) - def test_string_text_explicit_literal_binds(self): - # the literal expression here coerces the right side to - # Unicode on Python 3 for plain string, test with unicode - # string just to confirm literal is doing this - self.assert_compile( - column("x", String()) == literal("foo"), - "x = N'foo'", - literal_binds=True, - ) + @testing.combinations(None, String(), Unicode(), argnames="coltype") + @testing.combinations(None, String(), Unicode(), argnames="literaltype") + @testing.combinations("réve🐍 illé", "hello", "réveillé", argnames="value") + def test_string_text_explicit_literal_binds( + self, coltype, literaltype, value + ): + """test #7551, dynamic coercion for string literals""" + + lhs = column("x", coltype) + rhs = literal(value, type_=literaltype) + + rhs_force_unicode = isinstance(literaltype, Unicode) + rhs_tests_as_unicode = literaltype is None and value != "hello" + + should_it_be_n = rhs_force_unicode or rhs_tests_as_unicode + + if should_it_be_n: + self.assert_compile( + lhs == rhs, + f"x = N'{value}'", + literal_binds=True, + ) + else: + self.assert_compile( + lhs == rhs, + f"x = '{value}'", + literal_binds=True, + ) def test_text_text_literal_binds(self): self.assert_compile( diff --git a/test/sql/test_types.py b/test/sql/test_types.py index 79b77581d0..f441c3b463 100644 --- a/test/sql/test_types.py +++ b/test/sql/test_types.py @@ -4016,3 +4016,30 @@ class LiteralTest(fixtures.TestBase): dialect=testing.db.dialect, compile_kwargs={"literal_binds": True}, ) + + +class ResolveForLiteralTest(fixtures.TestBase): + """test suite for literal resolution, includes tests for + #7537 and #7551 + + """ + + @testing.combinations( + ( + datetime.datetime( + 2012, 10, 15, 12, 57, 18, tzinfo=datetime.timezone.utc + ), + sqltypes.DATETIME_TIMEZONE, + ), + (datetime.datetime(2012, 10, 15, 12, 57, 18, 396), sqltypes._DATETIME), + ( + datetime.time(12, 57, 18, tzinfo=datetime.timezone.utc), + sqltypes.TIME_TIMEZONE, + ), + (datetime.time(12, 57, 18), sqltypes._TIME), + ("réve🐍 illé", sqltypes._UNICODE), + ("hello", sqltypes._STRING), + ("réveillé", sqltypes._UNICODE), + ) + def test_resolve(self, value, expected): + is_(literal(value).type, expected)