]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
Implment encoding_errors for cx_oracle
authorMike Bayer <mike_mp@zzzcomputing.com>
Mon, 12 Aug 2019 19:09:37 +0000 (15:09 -0400)
committerMike Bayer <mike_mp@zzzcomputing.com>
Mon, 21 Oct 2019 18:20:24 +0000 (14:20 -0400)
Added dialect-level flag ``encoding_errors`` to the cx_Oracle dialect,
which can be specified as part of :func:`.create_engine`.   This is passed
to SQLAlchemy's unicode decoding converter under Python 2, and to
cx_Oracle's ``cursor.var()`` object as the ``encodingErrors`` parameter
under Python 3, for the very unusual case that broken encodings are present
in the target database which cannot be fetched unless error handling is
relaxed.  The value is ultimately one of the Python "encoding errors"
parameters passed to ``decode()``.

Closes: #4801
Fixes: #4799
Change-Id: I1d542ba367bcd187347c54db1fee815f7890e71c

doc/build/changelog/unreleased_13/4799.rst [new file with mode: 0644]
lib/sqlalchemy/dialects/oracle/cx_oracle.py
test/dialect/oracle/test_dialect.py

diff --git a/doc/build/changelog/unreleased_13/4799.rst b/doc/build/changelog/unreleased_13/4799.rst
new file mode 100644 (file)
index 0000000..8cc12a6
--- /dev/null
@@ -0,0 +1,12 @@
+.. change::
+    :tags: usecase, oracle
+    :tickets: 4799
+
+    Added dialect-level flag ``encoding_errors`` to the cx_Oracle dialect,
+    which can be specified as part of :func:`.create_engine`.   This is passed
+    to SQLAlchemy's unicode decoding converter under Python 2, and to
+    cx_Oracle's ``cursor.var()`` object as the ``encodingErrors`` parameter
+    under Python 3, for the very unusual case that broken encodings are present
+    in the target database which cannot be fetched unless error handling is
+    relaxed.  The value is ultimately one of the Python "encoding errors"
+    parameters passed to ``decode()``.
index 2572a79b377d5087628c04c4cae473b31dc12385..d7cd6dcfc6dfafad8bb0399d44aae8c430c1668f 100644 (file)
@@ -78,6 +78,8 @@ The parameters accepted by the cx_oracle dialect are as follows:
 
 * ``coerce_to_decimal`` - see :ref:`cx_oracle_numeric` for detail.
 
+* ``encoding_errors`` - see :ref:`cx_oracle_unicode_encoding_errors` for detail.
+
 .. _cx_oracle_unicode:
 
 Unicode
@@ -124,6 +126,23 @@ VARCHAR2, CHAR, and CLOB, the flag ``coerce_to_unicode=False`` can be passed to
    delivered as VARCHAR2/CHAR/CLOB data.
 
 
+.. _cx_oracle_unicode_encoding_errors:
+
+Encoding Errors
+^^^^^^^^^^^^^^^
+
+For the unusual case that data in the Oracle database is present with a broken
+encoding, the dialect accepts a parameter ``encoding_errors`` which will be
+passed to Unicode decoding functions in order to affect how decoding errors are
+handled.  The value is ultimately consumed by the Python `decode
+<https://docs.python.org/3/library/stdtypes.html#bytes.decode>`_ function, and
+is passed both via cx_Oracle's ``encodingErrors`` parameter consumed by
+``Cursor.var()``, as well as SQLAlchemy's own decoding function, as the
+cx_Oracle dialect makes use of both under different circumstances.
+
+.. versionadded:: 1.3.11
+
+
 .. _cx_oracle_setinputsizes:
 
 Fine grained control over cx_Oracle data binding performance with setinputsizes
@@ -760,12 +779,14 @@ class OracleDialect_cx_oracle(OracleDialect):
         coerce_to_unicode=True,
         coerce_to_decimal=True,
         arraysize=50,
+        encoding_errors=None,
         threaded=None,
         **kwargs
     ):
 
         OracleDialect.__init__(self, **kwargs)
         self.arraysize = arraysize
+        self.encoding_errors = encoding_errors
         if threaded is not None:
             self._cx_oracle_threaded = threaded
         self.auto_convert_lobs = auto_convert_lobs
@@ -823,6 +844,19 @@ class OracleDialect_cx_oracle(OracleDialect):
 
         self._is_cx_oracle_6 = self.cx_oracle_ver >= (6,)
 
+    @property
+    def _cursor_var_unicode_kwargs(self):
+        if self.encoding_errors:
+            if self.cx_oracle_ver >= (6, 4):
+                return {"encodingErrors": self.encoding_errors}
+            else:
+                util.warn(
+                    "cx_oracle version %r does not support encodingErrors"
+                    % (self.cx_oracle_ver,)
+                )
+
+        return {}
+
     def _parse_cx_oracle_ver(self, version):
         m = re.match(r"(\d+)\.(\d+)(?:\.(\d+))?", version)
         if m:
@@ -920,7 +954,7 @@ class OracleDialect_cx_oracle(OracleDialect):
             ):
                 if compat.py2k:
                     outconverter = processors.to_unicode_processor_factory(
-                        dialect.encoding, None
+                        dialect.encoding, errors=dialect.encoding_errors
                     )
                     return cursor.var(
                         cx_Oracle.STRING,
@@ -929,7 +963,12 @@ class OracleDialect_cx_oracle(OracleDialect):
                         outconverter=outconverter,
                     )
                 else:
-                    return cursor.var(util.text_type, size, cursor.arraysize)
+                    return cursor.var(
+                        util.text_type,
+                        size,
+                        cursor.arraysize,
+                        **dialect._cursor_var_unicode_kwargs
+                    )
 
             elif dialect.auto_convert_lobs and default_type in (
                 cx_Oracle.CLOB,
@@ -937,7 +976,7 @@ class OracleDialect_cx_oracle(OracleDialect):
             ):
                 if compat.py2k:
                     outconverter = processors.to_unicode_processor_factory(
-                        dialect.encoding, None
+                        dialect.encoding, errors=dialect.encoding_errors
                     )
                     return cursor.var(
                         default_type,
@@ -951,6 +990,7 @@ class OracleDialect_cx_oracle(OracleDialect):
                         size,
                         cursor.arraysize,
                         outconverter=lambda value: value.read(),
+                        **dialect._cursor_var_unicode_kwargs
                     )
 
             elif dialect.auto_convert_lobs and default_type in (
index c29a0b76d799d48b001f25f09ebf9b8ab56836b0..62926700e7e1853b0a1aad06d413f047b7adc11d 100644 (file)
@@ -30,6 +30,7 @@ from sqlalchemy.testing.mock import Mock
 from sqlalchemy.testing.schema import Column
 from sqlalchemy.testing.schema import Table
 from sqlalchemy.util import u
+from sqlalchemy.util import ue
 
 
 class DialectTest(fixtures.TestBase):
@@ -63,6 +64,200 @@ class DialectTest(fixtures.TestBase):
             cx_oracle.OracleDialect_cx_oracle(dbapi=Mock())
 
 
+class EncodingErrorsTest(fixtures.TestBase):
+    """mock test for encoding_errors.
+
+    While we tried to write a round trip test, I could only reproduce the
+    problem on Python 3 and only for STRING/CHAR.  I couldn't get a CLOB to
+    come back with broken encoding and also under py2k cx_Oracle would always
+    return a bytestring with the correct encoding.    Since the test barely
+    worked, it is not included here to avoid future problems.  It's not clear
+    what other levels of encode/decode are going on such that explicitly
+    selecting for AL16UTF16 is still returning a utf-8 bytestring under py2k or
+    for CLOBs, nor is it really  clear that this flag is useful, however, at
+    least for the Py3K case, cx_Oracle supports the flag and we did have one
+    user reporting that they had a (non-reproducible) database which
+    illustrated the problem so we will pass it in.
+
+    """
+
+    # NOTE: these numbers are arbitrary, they are not the actual
+    # cx_Oracle constants
+    cx_Oracle_NUMBER = 0
+    cx_Oracle_STRING = 1
+    cx_Oracle_FIXED_CHAR = 2
+    cx_Oracle_CLOB = 3
+    cx_Oracle_NCLOB = 4
+
+    @testing.fixture
+    def cx_Oracle(self):
+        return mock.Mock(
+            NUMBER=self.cx_Oracle_NUMBER,
+            STRING=self.cx_Oracle_STRING,
+            FIXED_CHAR=self.cx_Oracle_FIXED_CHAR,
+            CLOB=self.cx_Oracle_CLOB,
+            NCLOB=self.cx_Oracle_NCLOB,
+            version="7.0.1",
+            __future__=mock.Mock(),
+        )
+
+    _oracle_char_combinations = testing.combinations(
+        ("STRING", cx_Oracle_STRING, False),
+        ("FIXED_CHAR", cx_Oracle_FIXED_CHAR, False),
+        ("CLOB", cx_Oracle_CLOB, True),
+        ("NCLOB", cx_Oracle_NCLOB, True),
+        argnames="cx_oracle_type,use_read",
+        id_="iaa",
+    )
+
+    def _assert_errorhandler(self, outconverter, use_read, has_errorhandler):
+        data = ue("\uee2c\u9a66")  # this is u"\uee2c\u9a66"
+
+        utf8_w_errors = data.encode("utf-16")
+
+        if use_read:
+            utf8_w_errors = mock.Mock(
+                read=mock.Mock(return_value=utf8_w_errors)
+            )
+
+        if has_errorhandler:
+
+            eq_(
+                outconverter(utf8_w_errors),
+                data.encode("utf-16").decode("utf-8", "ignore"),
+            )
+        else:
+            assert_raises(UnicodeDecodeError, outconverter, utf8_w_errors)
+
+    @_oracle_char_combinations
+    @testing.requires.python3
+    def test_older_cx_oracle_warning(
+        self, cx_Oracle, cx_oracle_type, use_read
+    ):
+        cx_Oracle.version = "6.3"
+
+        ignore_dialect = cx_oracle.dialect(
+            dbapi=cx_Oracle, encoding_errors="ignore"
+        )
+        ignore_outputhandler = (
+            ignore_dialect._generate_connection_outputtype_handler()
+        )
+
+        cursor = mock.Mock()
+
+        with testing.expect_warnings(
+            r"cx_oracle version \(6, 3\) does not support encodingErrors"
+        ):
+            ignore_outputhandler(
+                cursor, "foo", cx_oracle_type, None, None, None
+            )
+
+    @_oracle_char_combinations
+    @testing.requires.python2
+    def test_encoding_errors_sqla_py2k(
+        self, cx_Oracle, cx_oracle_type, use_read
+    ):
+        ignore_dialect = cx_oracle.dialect(
+            dbapi=cx_Oracle, encoding_errors="ignore"
+        )
+
+        ignore_outputhandler = (
+            ignore_dialect._generate_connection_outputtype_handler()
+        )
+
+        cursor = mock.Mock()
+        ignore_outputhandler(cursor, "foo", cx_oracle_type, None, None, None)
+        outconverter = cursor.mock_calls[0][2]["outconverter"]
+        self._assert_errorhandler(outconverter, use_read, True)
+
+    @_oracle_char_combinations
+    @testing.requires.python2
+    def test_no_encoding_errors_sqla_py2k(
+        self, cx_Oracle, cx_oracle_type, use_read
+    ):
+        plain_dialect = cx_oracle.dialect(dbapi=cx_Oracle)
+
+        plain_outputhandler = (
+            plain_dialect._generate_connection_outputtype_handler()
+        )
+
+        cursor = mock.Mock()
+        plain_outputhandler(cursor, "foo", cx_oracle_type, None, None, None)
+        outconverter = cursor.mock_calls[0][2]["outconverter"]
+        self._assert_errorhandler(outconverter, use_read, False)
+
+    @_oracle_char_combinations
+    @testing.requires.python3
+    def test_encoding_errors_cx_oracle_py3k(
+        self, cx_Oracle, cx_oracle_type, use_read
+    ):
+        ignore_dialect = cx_oracle.dialect(
+            dbapi=cx_Oracle, encoding_errors="ignore"
+        )
+
+        ignore_outputhandler = (
+            ignore_dialect._generate_connection_outputtype_handler()
+        )
+
+        cursor = mock.Mock()
+        ignore_outputhandler(cursor, "foo", cx_oracle_type, None, None, None)
+
+        if use_read:
+            eq_(
+                cursor.mock_calls,
+                [
+                    mock.call.var(
+                        mock.ANY,
+                        None,
+                        cursor.arraysize,
+                        encodingErrors="ignore",
+                        outconverter=mock.ANY,
+                    )
+                ],
+            )
+        else:
+            eq_(
+                cursor.mock_calls,
+                [
+                    mock.call.var(
+                        mock.ANY,
+                        None,
+                        cursor.arraysize,
+                        encodingErrors="ignore",
+                    )
+                ],
+            )
+
+    @_oracle_char_combinations
+    @testing.requires.python3
+    def test_no_encoding_errors_cx_oracle_py3k(
+        self, cx_Oracle, cx_oracle_type, use_read
+    ):
+        plain_dialect = cx_oracle.dialect(dbapi=cx_Oracle)
+
+        plain_outputhandler = (
+            plain_dialect._generate_connection_outputtype_handler()
+        )
+
+        cursor = mock.Mock()
+        plain_outputhandler(cursor, "foo", cx_oracle_type, None, None, None)
+
+        if use_read:
+            eq_(
+                cursor.mock_calls,
+                [
+                    mock.call.var(
+                        mock.ANY, None, cursor.arraysize, outconverter=mock.ANY
+                    )
+                ],
+            )
+        else:
+            eq_(
+                cursor.mock_calls,
+                [mock.call.var(mock.ANY, None, cursor.arraysize)],
+            )
+
+
 class OutParamTest(fixtures.TestBase, AssertsExecutionResults):
     __only_on__ = "oracle+cx_oracle"
     __backend__ = True