]> git.ipfire.org Git - thirdparty/sqlalchemy/sqlalchemy.git/commitdiff
update cx_Oracle / oracledb LOB handling
authorMike Bayer <mike_mp@zzzcomputing.com>
Tue, 7 Jun 2022 20:09:35 +0000 (16:09 -0400)
committerMike Bayer <mike_mp@zzzcomputing.com>
Fri, 10 Jun 2022 14:48:17 +0000 (10:48 -0400)
Adjustments made to the BLOB / CLOB / NCLOB datatypes in the cx_Oracle and
oracledb dialects, to improve performance based on recommendations from
Oracle developers.

References: https://github.com/oracle/python-cx_Oracle/issues/596
Fixes: #7494
Change-Id: I0d8cc3579140aa65cacf5b7d3373f7e1929a8f85

doc/build/changelog/unreleased_20/7494.rst [new file with mode: 0644]
doc/build/core/connections.rst
lib/sqlalchemy/dialects/oracle/cx_oracle.py
lib/sqlalchemy/engine/interfaces.py
setup.cfg
test/dialect/oracle/test_dialect.py
test/dialect/oracle/test_types.py
tox.ini

diff --git a/doc/build/changelog/unreleased_20/7494.rst b/doc/build/changelog/unreleased_20/7494.rst
new file mode 100644 (file)
index 0000000..3d4af47
--- /dev/null
@@ -0,0 +1,7 @@
+.. change::
+    :tags: bug, oracle
+    :tickets: 7494
+
+    Adjustments made to the BLOB / CLOB / NCLOB datatypes in the cx_Oracle and
+    oracledb dialects, to improve performance based on recommendations from
+    Oracle developers.
index 123c9b6a1e014cbed88d8c78ade3212bbda7d943..df2969cdfa327d42f3339888e82cc23cd087bbb3 100644 (file)
@@ -1782,6 +1782,8 @@ method may be used::
 
 .. versionadded:: 1.4  Added the :meth:`_engine.Connection.exec_driver_sql` method.
 
+.. _dbapi_connections_cursor:
+
 Working with the DBAPI cursor directly
 --------------------------------------
 
index 60592253d7c078a414537804110f1df280fcb513..25e93632cba4956bdda20a244c616c94d97bf595 100644 (file)
@@ -358,28 +358,18 @@ The dialect supports RETURNING fully.
 
 .. _cx_oracle_lob:
 
-LOB Objects
------------
+LOB Datatypes
+--------------
 
-cx_oracle returns oracle LOBs using the cx_oracle.LOB object.  SQLAlchemy
-converts these to strings so that the interface of the Binary type is
-consistent with that of other backends, which takes place within a cx_Oracle
-outputtypehandler.
+LOB datatypes refer to the "large object" datatypes such as CLOB, NCLOB and
+BLOB. Modern versions of cx_Oracle and oracledb are optimized for these
+datatypes to be delivered as a single buffer. As such, SQLAlchemy makes use of
+these newer type handlers by default.
 
-cx_Oracle prior to version 6 would require that LOB objects be read before
-a new batch of rows would be read, as determined by the ``cursor.arraysize``.
-As of the 6 series, this limitation has been lifted.  Nevertheless, because
-SQLAlchemy pre-reads these LOBs up front, this issue is avoided in any case.
-
-To disable the auto "read()" feature of the dialect, the flag
-``auto_convert_lobs=False`` may be passed to :func:`_sa.create_engine`.  Under
-the cx_Oracle 5 series, having this flag turned off means there is the chance
-of reading from a stale LOB object if not read as it is fetched.   With
-cx_Oracle 6, this issue is resolved.
-
-.. versionchanged:: 1.2  the LOB handling system has been greatly simplified
-   internally to make use of outputtypehandlers, and no longer makes use
-   of alternate "buffered" result set objects.
+To disable the use of newer type handlers and deliver LOB objects as classic
+buffered objects with a ``read()`` method, the parameter
+``auto_convert_lobs=False`` may be passed to :func:`_sa.create_engine`,
+which takes place only engine-wide.
 
 Two Phase Transactions Not Supported
 -------------------------------------
@@ -449,6 +439,10 @@ from ...engine import processors
 from ...sql import sqltypes
 from ...sql._typing import is_sql_compiler
 
+# source:
+# https://github.com/oracle/python-cx_Oracle/issues/596#issuecomment-999243649
+_CX_ORACLE_MAGIC_LOB_SIZE = 131072
+
 
 class _OracleInteger(sqltypes.Integer):
     def get_dbapi_type(self, dbapi):
@@ -584,6 +578,10 @@ class _CXOracleTIMESTAMP(oracle._OracleDateLiteralRender, sqltypes.TIMESTAMP):
         return self._literal_processor_datetime(dialect)
 
 
+class _LOBDataType:
+    pass
+
+
 # TODO: the names used across CHAR / VARCHAR / NCHAR / NVARCHAR
 # here are inconsistent and not very good
 class _OracleChar(sqltypes.CHAR):
@@ -606,22 +604,31 @@ class _OracleUnicodeStringCHAR(sqltypes.Unicode):
         return dbapi.LONG_STRING
 
 
-class _OracleUnicodeTextNCLOB(oracle.NCLOB):
+class _OracleUnicodeTextNCLOB(_LOBDataType, oracle.NCLOB):
     def get_dbapi_type(self, dbapi):
-        return dbapi.NCLOB
+        # previously, this was dbapi.NCLOB.
+        # DB_TYPE_NVARCHAR will instead be passed to setinputsizes()
+        # when this datatype is used.
+        return dbapi.DB_TYPE_NVARCHAR
 
 
-class _OracleUnicodeTextCLOB(sqltypes.UnicodeText):
+class _OracleUnicodeTextCLOB(_LOBDataType, sqltypes.UnicodeText):
     def get_dbapi_type(self, dbapi):
-        return dbapi.CLOB
+        # previously, this was dbapi.CLOB.
+        # DB_TYPE_NVARCHAR will instead be passed to setinputsizes()
+        # when this datatype is used.
+        return dbapi.DB_TYPE_NVARCHAR
 
 
-class _OracleText(sqltypes.Text):
+class _OracleText(_LOBDataType, sqltypes.Text):
     def get_dbapi_type(self, dbapi):
-        return dbapi.CLOB
+        # previously, this was dbapi.CLOB.
+        # DB_TYPE_NVARCHAR will instead be passed to setinputsizes()
+        # when this datatype is used.
+        return dbapi.DB_TYPE_NVARCHAR
 
 
-class _OracleLong(oracle.LONG):
+class _OracleLong(_LOBDataType, oracle.LONG):
     def get_dbapi_type(self, dbapi):
         return dbapi.LONG_STRING
 
@@ -641,9 +648,12 @@ class _OracleEnum(sqltypes.Enum):
         return process
 
 
-class _OracleBinary(sqltypes.LargeBinary):
+class _OracleBinary(_LOBDataType, sqltypes.LargeBinary):
     def get_dbapi_type(self, dbapi):
-        return dbapi.BLOB
+        # previously, this was dbapi.BLOB.
+        # DB_TYPE_RAW will instead be passed to setinputsizes()
+        # when this datatype is used.
+        return dbapi.DB_TYPE_RAW
 
     def bind_processor(self, dialect):
         return None
@@ -703,6 +713,10 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext):
         # check for has_out_parameters or RETURNING, create cx_Oracle.var
         # objects if so
         if self.compiled.has_out_parameters or self.compiled._oracle_returning:
+
+            out_parameters = self.out_parameters
+            assert out_parameters is not None
+
             quoted_bind_names = self.compiled.escaped_bind_names
             for bindparam in self.compiled.binds.values():
                 if bindparam.isoutparam:
@@ -710,7 +724,7 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext):
                     type_impl = bindparam.type.dialect_impl(self.dialect)
 
                     if hasattr(type_impl, "_cx_oracle_var"):
-                        self.out_parameters[name] = type_impl._cx_oracle_var(
+                        out_parameters[name] = type_impl._cx_oracle_var(
                             self.dialect, self.cursor, arraysize=1
                         )
                     else:
@@ -718,6 +732,8 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext):
 
                         cx_Oracle = self.dialect.dbapi
 
+                        assert cx_Oracle is not None
+
                         if dbtype is None:
                             raise exc.InvalidRequestError(
                                 "Cannot create out parameter for "
@@ -726,23 +742,37 @@ class OracleExecutionContext_cx_oracle(OracleExecutionContext):
                                 " cx_oracle" % (bindparam.key, bindparam.type)
                             )
 
-                        if dbtype in (
-                            cx_Oracle.BLOB,
-                            cx_Oracle.CLOB,
-                            cx_Oracle.NCLOB,
-                        ):
-                            self.out_parameters[name] = self.cursor.var(
+                        # note this is an OUT parameter.   Using
+                        # non-LOB datavalues with large unicode-holding
+                        # values causes the failure (both cx_Oracle and
+                        # oracledb):
+                        # ORA-22835: Buffer too small for CLOB to CHAR or
+                        # BLOB to RAW conversion (actual: 16507,
+                        # maximum: 4000)
+                        # [SQL: INSERT INTO long_text (x, y, z) VALUES
+                        # (:x, :y, :z) RETURNING long_text.x, long_text.y,
+                        # long_text.z INTO :ret_0, :ret_1, :ret_2]
+                        # so even for DB_TYPE_NVARCHAR we convert to a LOB
+
+                        if isinstance(type_impl, _LOBDataType):
+                            if dbtype == cx_Oracle.DB_TYPE_NVARCHAR:
+                                dbtype = cx_Oracle.NCLOB
+                            elif dbtype == cx_Oracle.DB_TYPE_RAW:
+                                dbtype = cx_Oracle.BLOB
+                            # other LOB types go in directly
+
+                            out_parameters[name] = self.cursor.var(
                                 dbtype,
                                 outconverter=lambda value: value.read(),
                                 arraysize=1,
                             )
                         else:
-                            self.out_parameters[name] = self.cursor.var(
+                            out_parameters[name] = self.cursor.var(
                                 dbtype, arraysize=1
                             )
                     self.parameters[0][
                         quoted_bind_names.get(name, name)
-                    ] = self.out_parameters[name]
+                    ] = out_parameters[name]
 
     def _generate_cursor_outputtype_handler(self):
         output_handlers = {}
@@ -931,14 +961,21 @@ class OracleDialect_cx_oracle(OracleDialect):
         self._load_version(dbapi_module)
 
         if dbapi_module is not None:
+            # these constants will first be seen in SQLAlchemy datatypes
+            # coming from the get_dbapi_type() method.   We then
+            # will place the following types into setinputsizes() calls
+            # on each statement.  Oracle constants that are not in this
+            # list will not be put into setinputsizes().
             self.include_set_input_sizes = {
                 dbapi_module.DATETIME,
-                dbapi_module.NCLOB,
-                dbapi_module.CLOB,
-                dbapi_module.LOB,
+                dbapi_module.DB_TYPE_NVARCHAR,  # used for CLOB, NCLOB
+                dbapi_module.DB_TYPE_RAW,  # used for BLOB
+                dbapi_module.NCLOB,  # not currently used except for OUT param
+                dbapi_module.CLOB,  # not currently used except for OUT param
+                dbapi_module.LOB,  # not currently used
+                dbapi_module.BLOB,  # not currently used except for OUT param
                 dbapi_module.NCHAR,
                 dbapi_module.FIXED_NCHAR,
-                dbapi_module.BLOB,
                 dbapi_module.FIXED_CHAR,
                 dbapi_module.TIMESTAMP,
                 int,  # _OracleInteger,
@@ -1131,8 +1168,8 @@ class OracleDialect_cx_oracle(OracleDialect):
                 cx_Oracle.NCLOB,
             ):
                 return cursor.var(
-                    cx_Oracle.LONG_STRING,
-                    size,
+                    cx_Oracle.DB_TYPE_NVARCHAR,
+                    _CX_ORACLE_MAGIC_LOB_SIZE,
                     cursor.arraysize,
                     **dialect._cursor_var_unicode_kwargs,
                 )
@@ -1141,8 +1178,8 @@ class OracleDialect_cx_oracle(OracleDialect):
                 cx_Oracle.BLOB,
             ):
                 return cursor.var(
-                    cx_Oracle.LONG_BINARY,
-                    size,
+                    cx_Oracle.DB_TYPE_RAW,
+                    _CX_ORACLE_MAGIC_LOB_SIZE,
                     cursor.arraysize,
                 )
 
index cd6efb904dd33838fabde51df664bee8cabad85a..b8e85b64682f72d926ac2f3ef72d6bad27aa2d2c 100644 (file)
@@ -191,6 +191,9 @@ class DBAPICursor(Protocol):
     def nextset(self) -> Optional[bool]:
         ...
 
+    def __getattr__(self, key: str) -> Any:
+        ...
+
 
 _CoreSingleExecuteParams = Mapping[str, Any]
 _MutableCoreSingleExecuteParams = MutableMapping[str, Any]
index a9e5181b1fa319b71893a5c0824f3fd624a988be..80406827523e5b440166cc22cf3cb2b595977b8c 100644 (file)
--- a/setup.cfg
+++ b/setup.cfg
@@ -57,7 +57,7 @@ mariadb_connector =
 oracle =
     cx_oracle>=7
 oracle_oracledb =
-    oracledb>=1
+    oracledb>=1.0.1
 postgresql = psycopg2>=2.7
 postgresql_pg8000 = pg8000>=1.16.6,!=1.29.0
 postgresql_asyncpg =
index ad716d8c83369bb696e2b3cc38aa09af394ba1dd..98eb76c1841cfda71c76e23eeba2fed7b6236c46 100644 (file)
@@ -380,7 +380,7 @@ class EncodingErrorsTest(fixtures.TestBase):
             [
                 mock.call.var(
                     mock.ANY,
-                    None,
+                    mock.ANY,
                     cursor.arraysize,
                     encodingErrors="ignore",
                 )
@@ -411,7 +411,7 @@ class EncodingErrorsTest(fixtures.TestBase):
         else:
             eq_(
                 cursor.mock_calls,
-                [mock.call.var(mock.ANY, None, cursor.arraysize)],
+                [mock.call.var(mock.ANY, mock.ANY, cursor.arraysize)],
             )
 
 
index 131b6fa34c063b14dcb6947368ee2e87a2cb0556..76fc7d542c59cf09cda8e977fff8220e127030fd 100644 (file)
@@ -1188,9 +1188,9 @@ class SetInputSizesTest(fixtures.TestBase):
         (oracle.BINARY_FLOAT, 25.34534, "NATIVE_FLOAT", False),
         (oracle.DOUBLE_PRECISION, 25.34534, None, False),
         (Unicode(30), "test", "NCHAR", True),
-        (UnicodeText(), "test", "NCLOB", True),
+        (UnicodeText(), "test", "DB_TYPE_NVARCHAR", True),
         (Unicode(30), "test", None, False),
-        (UnicodeText(), "test", "CLOB", False),
+        (UnicodeText(), "test", "DB_TYPE_NVARCHAR", False),
         (String(30), "test", None, False),
         (CHAR(30), "test", "FIXED_CHAR", False),
         (NCHAR(30), "test", "FIXED_NCHAR", False),
diff --git a/tox.ini b/tox.ini
index db78deead8316c51e7048ab9e5b8914a40f75c08..d966d9807d049a81161f4b0d8b5978e445d6b43b 100644 (file)
--- a/tox.ini
+++ b/tox.ini
@@ -32,8 +32,8 @@ deps=
      mysql: .[mariadb_connector]
 
      oracle: .[oracle]
-     oracle: .[oracle_oracledb]
-
+     oracle: .[oracle_oracledb]
+     oracle: git+https://github.com/oracle/python-oracledb
      mssql: .[mssql]
 
      dbapimain-sqlite: git+https://github.com/omnilib/aiosqlite.git#egg=aiosqlite