From: Mike Bayer Date: Thu, 10 Jan 2019 17:03:40 +0000 (-0500) Subject: Leave bytestring exception messages as bytestrings X-Git-Tag: rel_1_3_0b2~41 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2db54ee92ebd0970f52b271e152a6df9b563693f;p=thirdparty%2Fsqlalchemy%2Fsqlalchemy.git Leave bytestring exception messages as bytestrings Fixed a regression introduced in version 1.2 where a refactor of the :class:`.SQLAlchemyError` base exception class introduced an inappropriate coercion of a plain string message into Unicode under python 2k, which is not handled by the Python interpreter for characters outside of the platform's encoding (typically ascii). The :class:`.SQLAlchemyError` class now passes a bytestring through under Py2K for ``__str__()`` as is the behavior of exception objects in general under Py2K, does a safe coercion to unicode utf-8 with backslash fallback for ``__unicode__()``. For Py3K the message is typically unicode already, but if not is again safe-coerced with utf-8 with backslash fallback for the ``__str__()`` method. Fixes: #4429 Change-Id: I2289da3f2c45c7d0041fa43d838958f7614defc3 --- diff --git a/doc/build/changelog/unreleased_12/4429.rst b/doc/build/changelog/unreleased_12/4429.rst new file mode 100644 index 0000000000..12c3e7e10f --- /dev/null +++ b/doc/build/changelog/unreleased_12/4429.rst @@ -0,0 +1,15 @@ +.. change:: + :tags: bug, engine + :tickets: 4429 + + Fixed a regression introduced in version 1.2 where a refactor + of the :class:`.SQLAlchemyError` base exception class introduced an + inappropriate coercion of a plain string message into Unicode under + python 2k, which is not handled by the Python interpreter for characters + outside of the platform's encoding (typically ascii). The + :class:`.SQLAlchemyError` class now passes a bytestring through under + Py2K for ``__str__()`` as is the behavior of exception objects in general + under Py2K, does a safe coercion to unicode utf-8 with + backslash fallback for ``__unicode__()``. For Py3K the message is + typically unicode already, but if not is again safe-coerced with utf-8 + with backslash fallback for the ``__str__()`` method. \ No newline at end of file diff --git a/lib/sqlalchemy/exc.py b/lib/sqlalchemy/exc.py index e47df85af3..cf16f9772c 100644 --- a/lib/sqlalchemy/exc.py +++ b/lib/sqlalchemy/exc.py @@ -36,24 +36,47 @@ class SQLAlchemyError(Exception): "http://sqlalche.me/e/%s)" % (self.code,) ) - def _message(self): - # get string representation just like Exception.__str__(self), - # but also support if the string has non-ascii chars + def _message(self, as_unicode=compat.py3k): + # rules: + # + # 1. under py2k, for __str__ return single string arg as it was + # given without converting to unicode. for __unicode__ + # do a conversion but check that it's not unicode already just in + # case + # + # 2. under py3k, single arg string will usually be a unicode + # object, but since __str__() must return unicode, check for + # bytestring just in case + # + # 3. for multiple self.args, this is not a case in current + # SQLAlchemy though this is happening in at least one known external + # library, call str() which does a repr(). + # if len(self.args) == 1: - return compat.text_type(self.args[0]) + text = self.args[0] + if as_unicode and isinstance(text, compat.binary_types): + return compat.decode_backslashreplace(text, "utf-8") + else: + return self.args[0] else: - return compat.text_type(self.args) + # this is not a normal case within SQLAlchemy but is here for + # compatibility with Exception.args - the str() comes out as + # a repr() of the tuple + return str(self.args) - def __str__(self): - message = self._message() + def _sql_message(self, as_unicode): + message = self._message(as_unicode) if self.code: message = "%s %s" % (message, self._code_str()) return message + def __str__(self): + return self._sql_message(compat.py3k) + def __unicode__(self): - return self.__str__() + return self._sql_message(True) class ArgumentError(SQLAlchemyError): @@ -321,10 +344,10 @@ class StatementError(SQLAlchemyError): (self.args[0], self.statement, self.params, self.orig), ) - def __str__(self): + def _sql_message(self, as_unicode): from sqlalchemy.sql import util - details = [self._message()] + details = [self._message(as_unicode=as_unicode)] if self.statement: details.append("[SQL: %r]" % self.statement) if self.params: diff --git a/lib/sqlalchemy/testing/suite/test_dialect.py b/lib/sqlalchemy/testing/suite/test_dialect.py index 245ccc6f07..1b3307042e 100644 --- a/lib/sqlalchemy/testing/suite/test_dialect.py +++ b/lib/sqlalchemy/testing/suite/test_dialect.py @@ -1,3 +1,5 @@ +#! coding: utf-8 + from .. import assert_raises from .. import config from .. import eq_ @@ -11,6 +13,7 @@ from ... import Integer from ... import literal_column from ... import select from ... import String +from ...util import compat class ExceptionTest(fixtures.TablesTest): @@ -53,6 +56,28 @@ class ExceptionTest(fixtures.TablesTest): trans.rollback() + def test_exception_with_non_ascii(self): + with config.db.connect() as conn: + try: + # try to create an error message that likely has non-ascii + # characters in the DBAPI's message string. unfortunately + # there's no way to make this happen with some drivers like + # mysqlclient, pymysql. this at least does produce a non- + # ascii error message for cx_oracle, psycopg2 + conn.execute(select([literal_column(u"méil")])) + assert False + except exc.DBAPIError as err: + err_str = str(err) + + assert str(err.orig) in str(err) + + # test that we are actually getting string on Py2k, unicode + # on Py3k. + if compat.py2k: + assert isinstance(err_str, str) + else: + assert isinstance(err_str, str) + class AutocommitTest(fixtures.TablesTest): diff --git a/lib/sqlalchemy/util/__init__.py b/lib/sqlalchemy/util/__init__.py index 13bcc37e71..4909c7c60d 100644 --- a/lib/sqlalchemy/util/__init__.py +++ b/lib/sqlalchemy/util/__init__.py @@ -51,6 +51,7 @@ from .compat import byte_buffer # noqa from .compat import callable # noqa from .compat import cmp # noqa from .compat import cpython # noqa +from .compat import decode_backslashreplace # noqa from .compat import dottedgetter # noqa from .compat import inspect_getargspec # noqa from .compat import int_types # noqa diff --git a/lib/sqlalchemy/util/compat.py b/lib/sqlalchemy/util/compat.py index 7963eebb61..6c24f75e12 100644 --- a/lib/sqlalchemy/util/compat.py +++ b/lib/sqlalchemy/util/compat.py @@ -92,6 +92,9 @@ if py3k: def b64encode(x): return base64.b64encode(x).decode("ascii") + def decode_backslashreplace(text, encoding): + return text.decode(encoding, errors="backslashreplace") + def cmp(a, b): return (a > b) - (a < b) @@ -195,6 +198,15 @@ else: def ue(s): return unicode(s, "unicode_escape") # noqa + def decode_backslashreplace(text, encoding): + try: + return text.decode(encoding) + except UnicodeDecodeError: + # regular "backslashreplace" for an incompatible encoding raises: + # "TypeError: don't know how to handle UnicodeDecodeError in + # error callback" + return repr(text)[1:-1].decode() + # not as nice as that of Py3K, but at least preserves # the code line where the issue occurred exec( diff --git a/test/base/test_utils.py b/test/base/test_utils.py index 20b41101e2..69af6e0329 100644 --- a/test/base/test_utils.py +++ b/test/base/test_utils.py @@ -1,3 +1,5 @@ +#! coding: utf-8 + import copy import inspect import sys @@ -2552,3 +2554,47 @@ class QuotedTokenParserTest(fixtures.TestBase): def test_quoted_single_w_dot_middle(self): self._test('"na.me"', ["na.me"]) + + +class BackslashReplaceTest(fixtures.TestBase): + def test_ascii_to_utf8(self): + eq_( + compat.decode_backslashreplace(util.b("hello world"), "utf-8"), + util.u("hello world"), + ) + + def test_utf8_to_utf8(self): + eq_( + compat.decode_backslashreplace( + util.u("some message méil").encode("utf-8"), "utf-8" + ), + util.u("some message méil"), + ) + + def test_latin1_to_utf8(self): + eq_( + compat.decode_backslashreplace( + util.u("some message méil").encode("latin-1"), "utf-8" + ), + util.u("some message m\\xe9il"), + ) + + eq_( + compat.decode_backslashreplace( + util.u("some message méil").encode("latin-1"), "latin-1" + ), + util.u("some message méil"), + ) + + def test_cp1251_to_utf8(self): + message = util.u("some message П").encode("cp1251") + eq_(message, b"some message \xcf") + eq_( + compat.decode_backslashreplace(message, "utf-8"), + util.u("some message \\xcf"), + ) + + eq_( + compat.decode_backslashreplace(message, "cp1251"), + util.u("some message П"), + ) diff --git a/test/engine/test_execute.py b/test/engine/test_execute.py index d9d10a9a6c..8613be5bc6 100644 --- a/test/engine/test_execute.py +++ b/test/engine/test_execute.py @@ -406,7 +406,7 @@ class ExecuteTest(fixtures.TestBase): obj, ) - def test_stmt_exception_non_ascii(self): + def test_stmt_exception_bytestring_raised(self): name = util.u("méil") with testing.db.connect() as conn: assert_raises_message( @@ -427,6 +427,63 @@ class ExecuteTest(fixtures.TestBase): {"uname_incorrect": "foo"}, ) + def test_stmt_exception_bytestring_utf8(self): + # uncommon case for Py3K, bytestring object passed + # as the error message + message = util.u("some message méil").encode("utf-8") + + err = tsa.exc.SQLAlchemyError(message) + if util.py2k: + # string passes it through + eq_(str(err), message) + + # unicode accessor decodes to utf-8 + eq_(unicode(err), util.u("some message méil")) # noqa + else: + eq_(str(err), util.u("some message méil")) + + def test_stmt_exception_bytestring_latin1(self): + # uncommon case for Py3K, bytestring object passed + # as the error message + message = util.u("some message méil").encode("latin-1") + + err = tsa.exc.SQLAlchemyError(message) + if util.py2k: + # string passes it through + eq_(str(err), message) + + # unicode accessor decodes to utf-8 + eq_(unicode(err), util.u("some message m\\xe9il")) # noqa + else: + eq_(str(err), util.u("some message m\\xe9il")) + + def test_stmt_exception_unicode_hook_unicode(self): + # uncommon case for Py2K, Unicode object passed + # as the error message + message = util.u("some message méil") + + err = tsa.exc.SQLAlchemyError(message) + if util.py2k: + eq_(unicode(err), util.u("some message méil")) # noqa + else: + eq_(str(err), util.u("some message méil")) + + def test_stmt_exception_str_multi_args(self): + err = tsa.exc.SQLAlchemyError("some message", 206) + eq_(str(err), "('some message', 206)") + + def test_stmt_exception_str_multi_args_bytestring(self): + message = util.u("some message méil").encode("utf-8") + + err = tsa.exc.SQLAlchemyError(message, 206) + eq_(str(err), str((message, 206))) + + def test_stmt_exception_str_multi_args_unicode(self): + message = util.u("some message méil") + + err = tsa.exc.SQLAlchemyError(message, 206) + eq_(str(err), str((message, 206))) + def test_stmt_exception_pickleable_no_dbapi(self): self._test_stmt_exception_pickleable(Exception("hello world"))