apply literal value resolution to String

author Mike Bayer <mike_mp@zzzcomputing.com>

Mon, 7 Feb 2022 17:08:51 +0000 (12:08 -0500)

committer Mike Bayer <mike_mp@zzzcomputing.com>

Mon, 7 Feb 2022 17:13:40 +0000 (12:13 -0500)
author Mike Bayer <mike_mp@zzzcomputing.com>
Mon, 7 Feb 2022 17:08:51 +0000 (12:08 -0500)
committer Mike Bayer <mike_mp@zzzcomputing.com>
Mon, 7 Feb 2022 17:13:40 +0000 (12:13 -0500)
diff --git a/doc/build/changelog/unreleased_20/7551.rst b/doc/build/changelog/unreleased_20/7551.rst

new file mode 100644 (file)

index 0000000..0c0b586
--- /dev/null
+++ b/doc/build/changelog/unreleased_20/7551.rst
@@ -0,0 +1,25 @@
+.. change::
+    :tags: bug, types
+    :tickets: 7551
+
+    Python string values for which a SQL type is determined from the type of
+    the value, mainly when using :func:`_sql.literal`, will now apply the
+    :class:`_types.String` type, rather than the :class:`_types.Unicode`
+    datatype, for Python string values that test as "ascii only" using Python
+    ``str.isascii()``. If the string is not ``isascii()``, the
+    :class:`_types.Unicode` datatype will be bound instead, which was used in
+    all string detection previously. This behavior **only applies to in-place
+    detection of datatypes when using ``literal()`` or other contexts that have
+    no existing datatype**, which is not usually the case under normal
+    :class:`_schema.Column` comparison operations, where the type of the
+    :class:`_schema.Column` being compared always takes precedence.
+
+    Use of the :class:`_types.Unicode` datatype can determine literal string
+    formatting on backends such as SQL Server, where a literal value (i.e.
+    using ``literal_binds``) will be rendered as ``N'<value>'`` instead of
+    ``'value'``. For normal bound value handling, the :class:`_types.Unicode`
+    datatype also may have implications for passing values to the DBAPI, again
+    in the case of SQL Server, the pyodbc driver supports the use of
+    :ref:`setinputsizes mode <mssql_pyodbc_setinputsizes>` which will handle
+    :class:`_types.String` versus :class:`_types.Unicode` differently.
+
diff --git a/lib/sqlalchemy/sql/sqltypes.py b/lib/sqlalchemy/sql/sqltypes.py

index 42fad5e0443a4c2b3be2d69b0022586b2f40d9f3..575f402c3fd2d5acf4940046ca584aebc60cc728 100644 (file)
--- a/lib/sqlalchemy/sql/sqltypes.py
+++ b/lib/sqlalchemy/sql/sqltypes.py
@@ -184,6 +184,15 @@ class String(Concatenable, TypeEngine[typing_Text]):
          self.length = length
          self.collation = collation
  
+    def _resolve_for_literal(self, value):
+        # I was SO PROUD of my regex trick, but we dont need it.
+        # re.search(r"[^\u0000-\u007F]", value)
+
+        if value.isascii():
+            return _STRING
+        else:
+            return _UNICODE
+
      def literal_processor(self, dialect):
          def process(value):
              value = value.replace("'", "''")
@@ -3019,6 +3028,10 @@ MATCHTYPE = MatchType()
  TABLEVALUE = TableValueType()
  DATETIME_TIMEZONE = DateTime(timezone=True)
  TIME_TIMEZONE = Time(timezone=True)
+_DATETIME = DateTime()
+_TIME = Time()
+_STRING = String()
+_UNICODE = Unicode()
  
  _type_map = {
      int: Integer(),
@@ -3026,12 +3039,12 @@ _type_map = {
      bool: BOOLEANTYPE,
      decimal.Decimal: Numeric(),
      dt.date: Date(),
-    dt.datetime: DateTime(),
-    dt.time: Time(),
+    dt.datetime: _DATETIME,
+    dt.time: _TIME,
      dt.timedelta: Interval(),
      util.NoneType: NULLTYPE,
      bytes: LargeBinary(),
-    str: Unicode(),
+    str: _STRING,
  }
  
  
diff --git a/test/dialect/mssql/test_types.py b/test/dialect/mssql/test_types.py

index 2ef8b76daeeb3990815132e7f254e28570abdcec..90933e05a6e906b255ed8f28c95b96e42695a19b 100644 (file)
--- a/test/dialect/mssql/test_types.py
+++ b/test/dialect/mssql/test_types.py
@@ -1199,15 +1199,34 @@ class StringTest(fixtures.TestBase, AssertsCompiledSQL):
              literal_binds=True,
          )
  
-    def test_string_text_explicit_literal_binds(self):
-        # the literal expression here coerces the right side to
-        # Unicode on Python 3 for plain string, test with unicode
-        # string just to confirm literal is doing this
-        self.assert_compile(
-            column("x", String()) == literal("foo"),
-            "x = N'foo'",
-            literal_binds=True,
-        )
+    @testing.combinations(None, String(), Unicode(), argnames="coltype")
+    @testing.combinations(None, String(), Unicode(), argnames="literaltype")
+    @testing.combinations("réve🐍 illé", "hello", "réveillé", argnames="value")
+    def test_string_text_explicit_literal_binds(
+        self, coltype, literaltype, value
+    ):
+        """test #7551, dynamic coercion for string literals"""
+
+        lhs = column("x", coltype)
+        rhs = literal(value, type_=literaltype)
+
+        rhs_force_unicode = isinstance(literaltype, Unicode)
+        rhs_tests_as_unicode = literaltype is None and value != "hello"
+
+        should_it_be_n = rhs_force_unicode or rhs_tests_as_unicode
+
+        if should_it_be_n:
+            self.assert_compile(
+                lhs == rhs,
+                f"x = N'{value}'",
+                literal_binds=True,
+            )
+        else:
+            self.assert_compile(
+                lhs == rhs,
+                f"x = '{value}'",
+                literal_binds=True,
+            )
  
      def test_text_text_literal_binds(self):
          self.assert_compile(
diff --git a/test/sql/test_types.py b/test/sql/test_types.py

index 79b77581d0bfa2f256c4ce8b588bf5fcb2692301..f441c3b4639f1fc4e56f24b6cd3ece4d74cae1f6 100644 (file)
--- a/test/sql/test_types.py
+++ b/test/sql/test_types.py
@@ -4016,3 +4016,30 @@ class LiteralTest(fixtures.TestBase):
              dialect=testing.db.dialect,
              compile_kwargs={"literal_binds": True},
          )
+
+
+class ResolveForLiteralTest(fixtures.TestBase):
+    """test suite for literal resolution, includes tests for
+    #7537 and #7551
+
+    """
+
+    @testing.combinations(
+        (
+            datetime.datetime(
+                2012, 10, 15, 12, 57, 18, tzinfo=datetime.timezone.utc
+            ),
+            sqltypes.DATETIME_TIMEZONE,
+        ),
+        (datetime.datetime(2012, 10, 15, 12, 57, 18, 396), sqltypes._DATETIME),
+        (
+            datetime.time(12, 57, 18, tzinfo=datetime.timezone.utc),
+            sqltypes.TIME_TIMEZONE,
+        ),
+        (datetime.time(12, 57, 18), sqltypes._TIME),
+        ("réve🐍 illé", sqltypes._UNICODE),
+        ("hello", sqltypes._STRING),
+        ("réveillé", sqltypes._UNICODE),
+    )
+    def test_resolve(self, value, expected):
+        is_(literal(value).type, expected)
author	Mike Bayer <mike_mp@zzzcomputing.com>
	Mon, 7 Feb 2022 17:08:51 +0000 (12:08 -0500)
committer	Mike Bayer <mike_mp@zzzcomputing.com>
	Mon, 7 Feb 2022 17:13:40 +0000 (12:13 -0500)
doc/build/changelog/unreleased_20/7551.rst	[new file with mode: 0644]	patch \| blob
lib/sqlalchemy/sql/sqltypes.py		patch \| blob \| blame \| history
test/dialect/mssql/test_types.py		patch \| blob \| blame \| history
test/sql/test_types.py		patch \| blob \| blame \| history