From: Mike Bayer <mike_mp@zzzcomputing.com>
Date: Mon, 7 Feb 2022 17:08:51 +0000 (-0500)
Subject: apply literal value resolution to String
X-Git-Tag: rel_2_0_0b1~496^2
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=24de22338522779c5d9e720c4b97dc8609136c29;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git

apply literal value resolution to String

Python string values for which a SQL type is determined from the type of
the value, mainly when using :func:`_sql.literal`, will now apply the
:class:`_types.String` type, rather than the :class:`_types.Unicode`
datatype, for Python string values that test as "ascii only" using Python
``str.isascii()``. If the string is not ``isascii()``, the
:class:`_types.Unicode` datatype will be bound instead, which was used in
all string detection previously. This behavior **only applies to in-place
detection of datatypes when using ``literal()`` or other contexts that have
no existing datatype**, which is not usually the case under normal
:class:`_schema.Column` comparison operations, where the type of the
:class:`_schema.Column` being compared always takes precedence.

Use of the :class:`_types.Unicode` datatype can determine literal string
formatting on backends such as SQL Server, where a literal value (i.e.
using ``literal_binds``) will be rendered as ``N'<value>'`` instead of
``'value'``. For normal bound value handling, the :class:`_types.Unicode`
datatype also may have implications for passing values to the DBAPI, again
in the case of SQL Server, the pyodbc driver supports the use of
:ref:`setinputsizes mode <mssql_pyodbc_setinputsizes>` which will handle
:class:`_types.String` versus :class:`_types.Unicode` differently.

Fixes: #7551
Change-Id: I4f8de63e36532ae8ce4c630ee59211349ce95361
---

diff --git a/doc/build/changelog/unreleased_20/7551.rst b/doc/build/changelog/unreleased_20/7551.rst
new file mode 100644
index 0000000000..0c0b5863d7
--- /dev/null
+++ b/doc/build/changelog/unreleased_20/7551.rst
@@ -0,0 +1,25 @@
+.. change::
+    :tags: bug, types
+    :tickets: 7551
+
+    Python string values for which a SQL type is determined from the type of
+    the value, mainly when using :func:`_sql.literal`, will now apply the
+    :class:`_types.String` type, rather than the :class:`_types.Unicode`
+    datatype, for Python string values that test as "ascii only" using Python
+    ``str.isascii()``. If the string is not ``isascii()``, the
+    :class:`_types.Unicode` datatype will be bound instead, which was used in
+    all string detection previously. This behavior **only applies to in-place
+    detection of datatypes when using ``literal()`` or other contexts that have
+    no existing datatype**, which is not usually the case under normal
+    :class:`_schema.Column` comparison operations, where the type of the
+    :class:`_schema.Column` being compared always takes precedence.
+
+    Use of the :class:`_types.Unicode` datatype can determine literal string
+    formatting on backends such as SQL Server, where a literal value (i.e.
+    using ``literal_binds``) will be rendered as ``N'<value>'`` instead of
+    ``'value'``. For normal bound value handling, the :class:`_types.Unicode`
+    datatype also may have implications for passing values to the DBAPI, again
+    in the case of SQL Server, the pyodbc driver supports the use of
+    :ref:`setinputsizes mode <mssql_pyodbc_setinputsizes>` which will handle
+    :class:`_types.String` versus :class:`_types.Unicode` differently.
+
diff --git a/lib/sqlalchemy/sql/sqltypes.py b/lib/sqlalchemy/sql/sqltypes.py
index 42fad5e044..575f402c3f 100644
--- a/lib/sqlalchemy/sql/sqltypes.py
+++ b/lib/sqlalchemy/sql/sqltypes.py
@@ -184,6 +184,15 @@ class String(Concatenable, TypeEngine[typing_Text]):
         self.length = length
         self.collation = collation
 
+    def _resolve_for_literal(self, value):
+        # I was SO PROUD of my regex trick, but we dont need it.
+        # re.search(r"[^\u0000-\u007F]", value)
+
+        if value.isascii():
+            return _STRING
+        else:
+            return _UNICODE
+
     def literal_processor(self, dialect):
         def process(value):
             value = value.replace("'", "''")
@@ -3019,6 +3028,10 @@ MATCHTYPE = MatchType()
 TABLEVALUE = TableValueType()
 DATETIME_TIMEZONE = DateTime(timezone=True)
 TIME_TIMEZONE = Time(timezone=True)
+_DATETIME = DateTime()
+_TIME = Time()
+_STRING = String()
+_UNICODE = Unicode()
 
 _type_map = {
     int: Integer(),
@@ -3026,12 +3039,12 @@ _type_map = {
     bool: BOOLEANTYPE,
     decimal.Decimal: Numeric(),
     dt.date: Date(),
-    dt.datetime: DateTime(),
-    dt.time: Time(),
+    dt.datetime: _DATETIME,
+    dt.time: _TIME,
     dt.timedelta: Interval(),
     util.NoneType: NULLTYPE,
     bytes: LargeBinary(),
-    str: Unicode(),
+    str: _STRING,
 }
 
 
diff --git a/test/dialect/mssql/test_types.py b/test/dialect/mssql/test_types.py
index 2ef8b76dae..90933e05a6 100644
--- a/test/dialect/mssql/test_types.py
+++ b/test/dialect/mssql/test_types.py
@@ -1199,15 +1199,34 @@ class StringTest(fixtures.TestBase, AssertsCompiledSQL):
             literal_binds=True,
         )
 
-    def test_string_text_explicit_literal_binds(self):
-        # the literal expression here coerces the right side to
-        # Unicode on Python 3 for plain string, test with unicode
-        # string just to confirm literal is doing this
-        self.assert_compile(
-            column("x", String()) == literal("foo"),
-            "x = N'foo'",
-            literal_binds=True,
-        )
+    @testing.combinations(None, String(), Unicode(), argnames="coltype")
+    @testing.combinations(None, String(), Unicode(), argnames="literaltype")
+    @testing.combinations("rÃ©veð illÃ©", "hello", "rÃ©veillÃ©", argnames="value")
+    def test_string_text_explicit_literal_binds(
+        self, coltype, literaltype, value
+    ):
+        """test #7551, dynamic coercion for string literals"""
+
+        lhs = column("x", coltype)
+        rhs = literal(value, type_=literaltype)
+
+        rhs_force_unicode = isinstance(literaltype, Unicode)
+        rhs_tests_as_unicode = literaltype is None and value != "hello"
+
+        should_it_be_n = rhs_force_unicode or rhs_tests_as_unicode
+
+        if should_it_be_n:
+            self.assert_compile(
+                lhs == rhs,
+                f"x = N'{value}'",
+                literal_binds=True,
+            )
+        else:
+            self.assert_compile(
+                lhs == rhs,
+                f"x = '{value}'",
+                literal_binds=True,
+            )
 
     def test_text_text_literal_binds(self):
         self.assert_compile(
diff --git a/test/sql/test_types.py b/test/sql/test_types.py
index 79b77581d0..f441c3b463 100644
--- a/test/sql/test_types.py
+++ b/test/sql/test_types.py
@@ -4016,3 +4016,30 @@ class LiteralTest(fixtures.TestBase):
             dialect=testing.db.dialect,
             compile_kwargs={"literal_binds": True},
         )
+
+
+class ResolveForLiteralTest(fixtures.TestBase):
+    """test suite for literal resolution, includes tests for
+    #7537 and #7551
+
+    """
+
+    @testing.combinations(
+        (
+            datetime.datetime(
+                2012, 10, 15, 12, 57, 18, tzinfo=datetime.timezone.utc
+            ),
+            sqltypes.DATETIME_TIMEZONE,
+        ),
+        (datetime.datetime(2012, 10, 15, 12, 57, 18, 396), sqltypes._DATETIME),
+        (
+            datetime.time(12, 57, 18, tzinfo=datetime.timezone.utc),
+            sqltypes.TIME_TIMEZONE,
+        ),
+        (datetime.time(12, 57, 18), sqltypes._TIME),
+        ("rÃ©veð illÃ©", sqltypes._UNICODE),
+        ("hello", sqltypes._STRING),
+        ("rÃ©veillÃ©", sqltypes._UNICODE),
+    )
+    def test_resolve(self, value, expected):
+        is_(literal(value).type, expected)