Implment encoding_errors for cx_oracle

author Mike Bayer <mike_mp@zzzcomputing.com>

Mon, 12 Aug 2019 19:09:37 +0000 (15:09 -0400)

committer Mike Bayer <mike_mp@zzzcomputing.com>

Mon, 21 Oct 2019 18:20:24 +0000 (14:20 -0400)
author Mike Bayer <mike_mp@zzzcomputing.com>
Mon, 12 Aug 2019 19:09:37 +0000 (15:09 -0400)
committer Mike Bayer <mike_mp@zzzcomputing.com>
Mon, 21 Oct 2019 18:20:24 +0000 (14:20 -0400)
diff --git a/doc/build/changelog/unreleased_13/4799.rst b/doc/build/changelog/unreleased_13/4799.rst

new file mode 100644 (file)

index 0000000..8cc12a6
--- /dev/null
+++ b/doc/build/changelog/unreleased_13/4799.rst
@@ -0,0 +1,12 @@
+.. change::
+    :tags: usecase, oracle
+    :tickets: 4799
+
+    Added dialect-level flag ``encoding_errors`` to the cx_Oracle dialect,
+    which can be specified as part of :func:`.create_engine`.   This is passed
+    to SQLAlchemy's unicode decoding converter under Python 2, and to
+    cx_Oracle's ``cursor.var()`` object as the ``encodingErrors`` parameter
+    under Python 3, for the very unusual case that broken encodings are present
+    in the target database which cannot be fetched unless error handling is
+    relaxed.  The value is ultimately one of the Python "encoding errors"
+    parameters passed to ``decode()``.
diff --git a/lib/sqlalchemy/dialects/oracle/cx_oracle.py b/lib/sqlalchemy/dialects/oracle/cx_oracle.py

index 2572a79b377d5087628c04c4cae473b31dc12385..d7cd6dcfc6dfafad8bb0399d44aae8c430c1668f 100644 (file)
--- a/lib/sqlalchemy/dialects/oracle/cx_oracle.py
+++ b/lib/sqlalchemy/dialects/oracle/cx_oracle.py
@@ -78,6 +78,8 @@ The parameters accepted by the cx_oracle dialect are as follows:
  
  * ``coerce_to_decimal`` - see :ref:`cx_oracle_numeric` for detail.
  
+* ``encoding_errors`` - see :ref:`cx_oracle_unicode_encoding_errors` for detail.
+
  .. _cx_oracle_unicode:
  
  Unicode
@@ -124,6 +126,23 @@ VARCHAR2, CHAR, and CLOB, the flag ``coerce_to_unicode=False`` can be passed to
     delivered as VARCHAR2/CHAR/CLOB data.
  
  
+.. _cx_oracle_unicode_encoding_errors:
+
+Encoding Errors
+^^^^^^^^^^^^^^^
+
+For the unusual case that data in the Oracle database is present with a broken
+encoding, the dialect accepts a parameter ``encoding_errors`` which will be
+passed to Unicode decoding functions in order to affect how decoding errors are
+handled.  The value is ultimately consumed by the Python `decode
+<https://docs.python.org/3/library/stdtypes.html#bytes.decode>`_ function, and
+is passed both via cx_Oracle's ``encodingErrors`` parameter consumed by
+``Cursor.var()``, as well as SQLAlchemy's own decoding function, as the
+cx_Oracle dialect makes use of both under different circumstances.
+
+.. versionadded:: 1.3.11
+
+
  .. _cx_oracle_setinputsizes:
  
  Fine grained control over cx_Oracle data binding performance with setinputsizes
@@ -760,12 +779,14 @@ class OracleDialect_cx_oracle(OracleDialect):
          coerce_to_unicode=True,
          coerce_to_decimal=True,
          arraysize=50,
+        encoding_errors=None,
          threaded=None,
          **kwargs
      ):
  
          OracleDialect.__init__(self, **kwargs)
          self.arraysize = arraysize
+        self.encoding_errors = encoding_errors
          if threaded is not None:
              self._cx_oracle_threaded = threaded
          self.auto_convert_lobs = auto_convert_lobs
@@ -823,6 +844,19 @@ class OracleDialect_cx_oracle(OracleDialect):
  
          self._is_cx_oracle_6 = self.cx_oracle_ver >= (6,)
  
+    @property
+    def _cursor_var_unicode_kwargs(self):
+        if self.encoding_errors:
+            if self.cx_oracle_ver >= (6, 4):
+                return {"encodingErrors": self.encoding_errors}
+            else:
+                util.warn(
+                    "cx_oracle version %r does not support encodingErrors"
+                    % (self.cx_oracle_ver,)
+                )
+
+        return {}
+
      def _parse_cx_oracle_ver(self, version):
          m = re.match(r"(\d+)\.(\d+)(?:\.(\d+))?", version)
          if m:
@@ -920,7 +954,7 @@ class OracleDialect_cx_oracle(OracleDialect):
              ):
                  if compat.py2k:
                      outconverter = processors.to_unicode_processor_factory(
-                        dialect.encoding, None
+                        dialect.encoding, errors=dialect.encoding_errors
                      )
                      return cursor.var(
                          cx_Oracle.STRING,
@@ -929,7 +963,12 @@ class OracleDialect_cx_oracle(OracleDialect):
                          outconverter=outconverter,
                      )
                  else:
-                    return cursor.var(util.text_type, size, cursor.arraysize)
+                    return cursor.var(
+                        util.text_type,
+                        size,
+                        cursor.arraysize,
+                        **dialect._cursor_var_unicode_kwargs
+                    )
  
              elif dialect.auto_convert_lobs and default_type in (
                  cx_Oracle.CLOB,
@@ -937,7 +976,7 @@ class OracleDialect_cx_oracle(OracleDialect):
              ):
                  if compat.py2k:
                      outconverter = processors.to_unicode_processor_factory(
-                        dialect.encoding, None
+                        dialect.encoding, errors=dialect.encoding_errors
                      )
                      return cursor.var(
                          default_type,
@@ -951,6 +990,7 @@ class OracleDialect_cx_oracle(OracleDialect):
                          size,
                          cursor.arraysize,
                          outconverter=lambda value: value.read(),
+                        **dialect._cursor_var_unicode_kwargs
                      )
  
              elif dialect.auto_convert_lobs and default_type in (
diff --git a/test/dialect/oracle/test_dialect.py b/test/dialect/oracle/test_dialect.py

index c29a0b76d799d48b001f25f09ebf9b8ab56836b0..62926700e7e1853b0a1aad06d413f047b7adc11d 100644 (file)
--- a/test/dialect/oracle/test_dialect.py
+++ b/test/dialect/oracle/test_dialect.py
@@ -30,6 +30,7 @@ from sqlalchemy.testing.mock import Mock
  from sqlalchemy.testing.schema import Column
  from sqlalchemy.testing.schema import Table
  from sqlalchemy.util import u
+from sqlalchemy.util import ue
  
  
  class DialectTest(fixtures.TestBase):
@@ -63,6 +64,200 @@ class DialectTest(fixtures.TestBase):
              cx_oracle.OracleDialect_cx_oracle(dbapi=Mock())
  
  
+class EncodingErrorsTest(fixtures.TestBase):
+    """mock test for encoding_errors.
+
+    While we tried to write a round trip test, I could only reproduce the
+    problem on Python 3 and only for STRING/CHAR.  I couldn't get a CLOB to
+    come back with broken encoding and also under py2k cx_Oracle would always
+    return a bytestring with the correct encoding.    Since the test barely
+    worked, it is not included here to avoid future problems.  It's not clear
+    what other levels of encode/decode are going on such that explicitly
+    selecting for AL16UTF16 is still returning a utf-8 bytestring under py2k or
+    for CLOBs, nor is it really  clear that this flag is useful, however, at
+    least for the Py3K case, cx_Oracle supports the flag and we did have one
+    user reporting that they had a (non-reproducible) database which
+    illustrated the problem so we will pass it in.
+
+    """
+
+    # NOTE: these numbers are arbitrary, they are not the actual
+    # cx_Oracle constants
+    cx_Oracle_NUMBER = 0
+    cx_Oracle_STRING = 1
+    cx_Oracle_FIXED_CHAR = 2
+    cx_Oracle_CLOB = 3
+    cx_Oracle_NCLOB = 4
+
+    @testing.fixture
+    def cx_Oracle(self):
+        return mock.Mock(
+            NUMBER=self.cx_Oracle_NUMBER,
+            STRING=self.cx_Oracle_STRING,
+            FIXED_CHAR=self.cx_Oracle_FIXED_CHAR,
+            CLOB=self.cx_Oracle_CLOB,
+            NCLOB=self.cx_Oracle_NCLOB,
+            version="7.0.1",
+            __future__=mock.Mock(),
+        )
+
+    _oracle_char_combinations = testing.combinations(
+        ("STRING", cx_Oracle_STRING, False),
+        ("FIXED_CHAR", cx_Oracle_FIXED_CHAR, False),
+        ("CLOB", cx_Oracle_CLOB, True),
+        ("NCLOB", cx_Oracle_NCLOB, True),
+        argnames="cx_oracle_type,use_read",
+        id_="iaa",
+    )
+
+    def _assert_errorhandler(self, outconverter, use_read, has_errorhandler):
+        data = ue("\uee2c\u9a66")  # this is u"\uee2c\u9a66"
+
+        utf8_w_errors = data.encode("utf-16")
+
+        if use_read:
+            utf8_w_errors = mock.Mock(
+                read=mock.Mock(return_value=utf8_w_errors)
+            )
+
+        if has_errorhandler:
+
+            eq_(
+                outconverter(utf8_w_errors),
+                data.encode("utf-16").decode("utf-8", "ignore"),
+            )
+        else:
+            assert_raises(UnicodeDecodeError, outconverter, utf8_w_errors)
+
+    @_oracle_char_combinations
+    @testing.requires.python3
+    def test_older_cx_oracle_warning(
+        self, cx_Oracle, cx_oracle_type, use_read
+    ):
+        cx_Oracle.version = "6.3"
+
+        ignore_dialect = cx_oracle.dialect(
+            dbapi=cx_Oracle, encoding_errors="ignore"
+        )
+        ignore_outputhandler = (
+            ignore_dialect._generate_connection_outputtype_handler()
+        )
+
+        cursor = mock.Mock()
+
+        with testing.expect_warnings(
+            r"cx_oracle version \(6, 3\) does not support encodingErrors"
+        ):
+            ignore_outputhandler(
+                cursor, "foo", cx_oracle_type, None, None, None
+            )
+
+    @_oracle_char_combinations
+    @testing.requires.python2
+    def test_encoding_errors_sqla_py2k(
+        self, cx_Oracle, cx_oracle_type, use_read
+    ):
+        ignore_dialect = cx_oracle.dialect(
+            dbapi=cx_Oracle, encoding_errors="ignore"
+        )
+
+        ignore_outputhandler = (
+            ignore_dialect._generate_connection_outputtype_handler()
+        )
+
+        cursor = mock.Mock()
+        ignore_outputhandler(cursor, "foo", cx_oracle_type, None, None, None)
+        outconverter = cursor.mock_calls[0][2]["outconverter"]
+        self._assert_errorhandler(outconverter, use_read, True)
+
+    @_oracle_char_combinations
+    @testing.requires.python2
+    def test_no_encoding_errors_sqla_py2k(
+        self, cx_Oracle, cx_oracle_type, use_read
+    ):
+        plain_dialect = cx_oracle.dialect(dbapi=cx_Oracle)
+
+        plain_outputhandler = (
+            plain_dialect._generate_connection_outputtype_handler()
+        )
+
+        cursor = mock.Mock()
+        plain_outputhandler(cursor, "foo", cx_oracle_type, None, None, None)
+        outconverter = cursor.mock_calls[0][2]["outconverter"]
+        self._assert_errorhandler(outconverter, use_read, False)
+
+    @_oracle_char_combinations
+    @testing.requires.python3
+    def test_encoding_errors_cx_oracle_py3k(
+        self, cx_Oracle, cx_oracle_type, use_read
+    ):
+        ignore_dialect = cx_oracle.dialect(
+            dbapi=cx_Oracle, encoding_errors="ignore"
+        )
+
+        ignore_outputhandler = (
+            ignore_dialect._generate_connection_outputtype_handler()
+        )
+
+        cursor = mock.Mock()
+        ignore_outputhandler(cursor, "foo", cx_oracle_type, None, None, None)
+
+        if use_read:
+            eq_(
+                cursor.mock_calls,
+                [
+                    mock.call.var(
+                        mock.ANY,
+                        None,
+                        cursor.arraysize,
+                        encodingErrors="ignore",
+                        outconverter=mock.ANY,
+                    )
+                ],
+            )
+        else:
+            eq_(
+                cursor.mock_calls,
+                [
+                    mock.call.var(
+                        mock.ANY,
+                        None,
+                        cursor.arraysize,
+                        encodingErrors="ignore",
+                    )
+                ],
+            )
+
+    @_oracle_char_combinations
+    @testing.requires.python3
+    def test_no_encoding_errors_cx_oracle_py3k(
+        self, cx_Oracle, cx_oracle_type, use_read
+    ):
+        plain_dialect = cx_oracle.dialect(dbapi=cx_Oracle)
+
+        plain_outputhandler = (
+            plain_dialect._generate_connection_outputtype_handler()
+        )
+
+        cursor = mock.Mock()
+        plain_outputhandler(cursor, "foo", cx_oracle_type, None, None, None)
+
+        if use_read:
+            eq_(
+                cursor.mock_calls,
+                [
+                    mock.call.var(
+                        mock.ANY, None, cursor.arraysize, outconverter=mock.ANY
+                    )
+                ],
+            )
+        else:
+            eq_(
+                cursor.mock_calls,
+                [mock.call.var(mock.ANY, None, cursor.arraysize)],
+            )
+
+
  class OutParamTest(fixtures.TestBase, AssertsExecutionResults):
      __only_on__ = "oracle+cx_oracle"
      __backend__ = True
author	Mike Bayer <mike_mp@zzzcomputing.com>
	Mon, 12 Aug 2019 19:09:37 +0000 (15:09 -0400)
committer	Mike Bayer <mike_mp@zzzcomputing.com>
	Mon, 21 Oct 2019 18:20:24 +0000 (14:20 -0400)
doc/build/changelog/unreleased_13/4799.rst	[new file with mode: 0644]	patch \| blob
lib/sqlalchemy/dialects/oracle/cx_oracle.py		patch \| blob \| blame \| history
test/dialect/oracle/test_dialect.py		patch \| blob \| blame \| history