--- /dev/null
+.. change::
+ :tags: usecase, oracle
+ :tickets: 12317, 12341
+
+ Added new datatype :class:`_oracle.VECTOR` and accompanying DDL and DQL
+ support to fully support this type for Oracle Database. This change
+ includes the base :class:`_oracle.VECTOR` type that adds new type-specific
+ methods ``l2_distance``, ``cosine_distance``, ``inner_product`` as well as
+ new parameters ``oracle_vector`` for the :class:`.Index` construct,
+ allowing vector indexes to be configured, and ``oracle_fetch_approximate``
+ for the :meth:`.Select.fetch` clause. Pull request courtesy Suraj Shaw.
+
+ .. seealso::
+
+ :ref:`oracle_vector_datatype`
+
TIMESTAMP,
VARCHAR,
VARCHAR2,
+ VECTOR,
)
Types which are specific to Oracle Database, or have Oracle-specific
.. autoclass:: TIMESTAMP
:members: __init__
+.. autoclass:: VECTOR
+ :members: __init__
+
+.. autoclass:: VectorIndexType
+ :members:
+
+.. autoclass:: VectorIndexConfig
+ :members:
+ :undoc-members:
+
+.. autoclass:: VectorStorageFormat
+ :members:
+
+.. autoclass:: VectorDistanceType
+ :members:
+
+
.. _oracledb:
python-oracledb
from .base import TIMESTAMP
from .base import VARCHAR
from .base import VARCHAR2
+from .base import VECTOR
+from .base import VectorIndexConfig
+from .base import VectorIndexType
+from .vector import VectorDistanceType
+from .vector import VectorStorageFormat
# Alias oracledb also as oracledb_async
oracledb_async = type(
"NVARCHAR2",
"ROWID",
"REAL",
+ "VECTOR",
+ "VectorDistanceType",
+ "VectorIndexType",
+ "VectorIndexConfig",
+ "VectorStorageFormat",
)
number of prefix columns to compress, or ``True`` to use the default (all
columns for non-unique indexes, all but the last column for unique indexes).
+.. _oracle_vector_datatype:
+
+VECTOR Datatype
+---------------
+
+Oracle Database 23ai introduced a new VECTOR datatype for artificial intelligence
+and machine learning search operations. The VECTOR datatype is a homogeneous array
+of 8-bit signed integers, 8-bit unsigned integers (binary), 32-bit floating-point numbers,
+or 64-bit floating-point numbers.
+
+.. seealso::
+
+ `Using VECTOR Data
+ <https://python-oracledb.readthedocs.io/en/latest/user_guide/vector_data_type.html>`_ - in the documentation
+ for the :ref:`oracledb` driver.
+
+.. versionadded:: 2.0.41
+
+CREATE TABLE support for VECTOR
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+With the :class:`.VECTOR` datatype, you can specify the dimension for the data
+and the storage format. Valid values for storage format are enum values from
+:class:`.VectorStorageFormat`. To create a table that includes a
+:class:`.VECTOR` column::
+
+ from sqlalchemy.dialects.oracle import VECTOR, VectorStorageFormat
+
+ t = Table(
+ "t1",
+ metadata,
+ Column("id", Integer, primary_key=True),
+ Column(
+ "embedding",
+ VECTOR(dim=3, storage_format=VectorStorageFormat.FLOAT32),
+ ),
+ Column(...),
+ ...,
+ )
+
+Vectors can also be defined with an arbitrary number of dimensions and formats.
+This allows you to specify vectors of different dimensions with the various
+storage formats mentioned above.
+
+**Examples**
+
+* In this case, the storage format is flexible, allowing any vector type data to be inserted,
+ such as INT8 or BINARY etc::
+
+ vector_col: Mapped[array.array] = mapped_column(VECTOR(dim=3))
+
+* The dimension is flexible in this case, meaning that any dimension vector can be used::
+
+ vector_col: Mapped[array.array] = mapped_column(
+ VECTOR(storage_format=VectorStorageType.INT8)
+ )
+
+* Both the dimensions and the storage format are flexible::
+
+ vector_col: Mapped[array.array] = mapped_column(VECTOR)
+
+Python Datatypes for VECTOR
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+VECTOR data can be inserted using Python list or Python ``array.array()`` objects.
+Python arrays of type FLOAT (32-bit), DOUBLE (64-bit), or INT (8-bit signed integer)
+are used as bind values when inserting VECTOR columns::
+
+ from sqlalchemy import insert, select
+
+ with engine.begin() as conn:
+ conn.execute(
+ insert(t1),
+ {"id": 1, "embedding": [1, 2, 3]},
+ )
+
+VECTOR Indexes
+~~~~~~~~~~~~~~
+
+The VECTOR feature supports an Oracle-specific parameter ``oracle_vector``
+on the :class:`.Index` construct, which allows the construction of VECTOR
+indexes.
+
+To utilize VECTOR indexing, set the ``oracle_vector`` parameter to True to use
+the default values provided by Oracle. HNSW is the default indexing method::
+
+ from sqlalchemy import Index
+
+ Index(
+ "vector_index",
+ t1.c.embedding,
+ oracle_vector=True,
+ )
+
+The full range of parameters for vector indexes are available by using the
+:class:`.VectorIndexConfig` dataclass in place of a boolean; this dataclass
+allows full configuration of the index::
+
+ Index(
+ "hnsw_vector_index",
+ t1.c.embedding,
+ oracle_vector=VectorIndexConfig(
+ index_type=VectorIndexType.HNSW,
+ distance=VectorDistanceType.COSINE,
+ accuracy=90,
+ hnsw_neighbors=5,
+ hnsw_efconstruction=20,
+ parallel=10,
+ ),
+ )
+
+ Index(
+ "ivf_vector_index",
+ t1.c.embedding,
+ oracle_vector=VectorIndexConfig(
+ index_type=VectorIndexType.IVF,
+ distance=VectorDistanceType.DOT,
+ accuracy=90,
+ ivf_neighbor_partitions=5,
+ ),
+ )
+
+For complete explanation of these parameters, see the Oracle documentation linked
+below.
+
+.. seealso::
+
+ `CREATE VECTOR INDEX <https://www.oracle.com/pls/topic/lookup?ctx=dblatest&id=GUID-B396C369-54BB-4098-A0DD-7C54B3A0D66F>`_ - in the Oracle documentation
+
+
+
+Similarity Searching
+~~~~~~~~~~~~~~~~~~~~
+
+When using the :class:`_oracle.VECTOR` datatype with a :class:`.Column` or similar
+ORM mapped construct, additional comparison functions are available, including:
+
+* ``l2_distance``
+* ``cosine_distance``
+* ``inner_product``
+
+Example Usage::
+
+ result_vector = connection.scalars(
+ select(t1).order_by(t1.embedding.l2_distance([2, 3, 4])).limit(3)
+ )
+
+ for user in vector:
+ print(user.id, user.embedding)
+
+FETCH APPROXIMATE support
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Approximate vector search can only be performed when all syntax and semantic
+rules are satisfied, the corresponding vector index is available, and the
+query optimizer determines to perform it. If any of these conditions are
+unmet, then an approximate search is not performed. In this case the query
+returns exact results.
+
+To enable approximate searching during similarity searches on VECTORS, the
+``oracle_fetch_approximate`` parameter may be used with the :meth:`.Select.fetch`
+clause to add ``FETCH APPROX`` to the SELECT statement::
+
+ select(users_table).fetch(5, oracle_fetch_approximate=True)
+
""" # noqa
from __future__ import annotations
from collections import defaultdict
+from dataclasses import fields
from functools import lru_cache
from functools import wraps
import re
from .types import ROWID # noqa
from .types import TIMESTAMP
from .types import VARCHAR2 # noqa
+from .vector import VECTOR
+from .vector import VectorIndexConfig
+from .vector import VectorIndexType
from ... import Computed
from ... import exc
from ... import schema as sa_schema
from ...sql import null
from ...sql import or_
from ...sql import select
+from ...sql import selectable as sa_selectable
from ...sql import sqltypes
from ...sql import util as sql_util
from ...sql import visitors
"BINARY_DOUBLE": BINARY_DOUBLE,
"BINARY_FLOAT": BINARY_FLOAT,
"ROWID": ROWID,
+ "VECTOR": VECTOR,
}
def visit_ROWID(self, type_, **kw):
return "ROWID"
+ def visit_VECTOR(self, type_, **kw):
+ if type_.dim is None and type_.storage_format is None:
+ return "VECTOR(*,*)"
+ elif type_.storage_format is None:
+ return f"VECTOR({type_.dim},*)"
+ elif type_.dim is None:
+ return f"VECTOR(*,{type_.storage_format.value})"
+ else:
+ return f"VECTOR({type_.dim},{type_.storage_format.value})"
+
class OracleCompiler(compiler.SQLCompiler):
"""Oracle compiler modifies the lexical structure of Select
else:
return select._fetch_clause
+ def fetch_clause(
+ self,
+ select,
+ fetch_clause=None,
+ require_offset=False,
+ use_literal_execute_for_simple_int=False,
+ **kw,
+ ):
+ text = super().fetch_clause(
+ select,
+ fetch_clause=fetch_clause,
+ require_offset=require_offset,
+ use_literal_execute_for_simple_int=(
+ use_literal_execute_for_simple_int
+ ),
+ **kw,
+ )
+
+ if select.dialect_options["oracle"]["fetch_approximate"]:
+ text = re.sub("FETCH FIRST", "FETCH APPROX FIRST", text)
+
+ return text
+
def translate_select_structure(self, select_stmt, **kwargs):
select = select_stmt
class OracleDDLCompiler(compiler.DDLCompiler):
+
+ def _build_vector_index_config(
+ self, vector_index_config: VectorIndexConfig
+ ) -> str:
+ parts = []
+ sql_param_name = {
+ "hnsw_neighbors": "neighbors",
+ "hnsw_efconstruction": "efconstruction",
+ "ivf_neighbor_partitions": "neighbor partitions",
+ "ivf_sample_per_partition": "sample_per_partition",
+ "ivf_min_vectors_per_partition": "min_vectors_per_partition",
+ }
+ if vector_index_config.index_type == VectorIndexType.HNSW:
+ parts.append("ORGANIZATION INMEMORY NEIGHBOR GRAPH")
+ elif vector_index_config.index_type == VectorIndexType.IVF:
+ parts.append("ORGANIZATION NEIGHBOR PARTITIONS")
+ if vector_index_config.distance is not None:
+ parts.append(f"DISTANCE {vector_index_config.distance.value}")
+
+ if vector_index_config.accuracy is not None:
+ parts.append(
+ f"WITH TARGET ACCURACY {vector_index_config.accuracy}"
+ )
+
+ parameters_str = [f"type {vector_index_config.index_type.name}"]
+ prefix = vector_index_config.index_type.name.lower() + "_"
+
+ for field in fields(vector_index_config):
+ if field.name.startswith(prefix):
+ key = sql_param_name.get(field.name)
+ value = getattr(vector_index_config, field.name)
+ if value is not None:
+ parameters_str.append(f"{key} {value}")
+
+ parameters_str = ", ".join(parameters_str)
+ parts.append(f"PARAMETERS ({parameters_str})")
+
+ if vector_index_config.parallel is not None:
+ parts.append(f"PARALLEL {vector_index_config.parallel}")
+
+ return " ".join(parts)
+
def define_constraint_cascades(self, constraint):
text = ""
if constraint.ondelete is not None:
text += "UNIQUE "
if index.dialect_options["oracle"]["bitmap"]:
text += "BITMAP "
+ vector_options = index.dialect_options["oracle"]["vector"]
+ if vector_options:
+ text += "VECTOR "
text += "INDEX %s ON %s (%s)" % (
self._prepared_index_name(index, include_schema=True),
preparer.format_table(index.table, use_schema=True),
text += " COMPRESS %d" % (
index.dialect_options["oracle"]["compress"]
)
+ if vector_options:
+ if vector_options is True:
+ vector_options = VectorIndexConfig()
+
+ text += " " + self._build_vector_index_config(vector_options)
return text
def post_create_table(self, table):
"tablespace": None,
},
),
- (sa_schema.Index, {"bitmap": False, "compress": False}),
+ (
+ sa_schema.Index,
+ {
+ "bitmap": False,
+ "compress": False,
+ "vector": False,
+ },
+ ),
(sa_schema.Sequence, {"order": None}),
(sa_schema.Identity, {"order": None, "on_null": None}),
+ (sa_selectable.Select, {"fetch_approximate": False}),
+ (sa_selectable.CompoundSelect, {"fetch_approximate": False}),
]
@util.deprecated_params(
--- /dev/null
+# dialects/oracle/vector.py
+# Copyright (C) 2005-2025 the SQLAlchemy authors and contributors
+# <see AUTHORS file>
+#
+# This module is part of SQLAlchemy and is released under
+# the MIT License: https://www.opensource.org/licenses/mit-license.php
+# mypy: ignore-errors
+
+
+from __future__ import annotations
+
+import array
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional
+
+import sqlalchemy.types as types
+from sqlalchemy.types import Float
+
+
+class VectorIndexType(Enum):
+ """Enum representing different types of VECTOR index structures.
+
+ See :ref:`oracle_vector_datatype` for background.
+
+ .. versionadded:: 2.0.41
+
+ """
+
+ HNSW = "HNSW"
+ """
+ The HNSW (Hierarchical Navigable Small World) index type.
+ """
+ IVF = "IVF"
+ """
+ The IVF (Inverted File Index) index type
+ """
+
+
+class VectorDistanceType(Enum):
+ """Enum representing different types of vector distance metrics.
+
+ See :ref:`oracle_vector_datatype` for background.
+
+ .. versionadded:: 2.0.41
+
+ """
+
+ EUCLIDEAN = "EUCLIDEAN"
+ """Euclidean distance (L2 norm).
+
+ Measures the straight-line distance between two vectors in space.
+ """
+ DOT = "DOT"
+ """Dot product similarity.
+
+ Measures the algebraic similarity between two vectors.
+ """
+ COSINE = "COSINE"
+ """Cosine similarity.
+
+ Measures the cosine of the angle between two vectors.
+ """
+ MANHATTAN = "MANHATTAN"
+ """Manhattan distance (L1 norm).
+
+ Calculates the sum of absolute differences across dimensions.
+ """
+
+
+class VectorStorageFormat(Enum):
+ """Enum representing the data format used to store vector components.
+
+ See :ref:`oracle_vector_datatype` for background.
+
+ .. versionadded:: 2.0.41
+
+ """
+
+ INT8 = "INT8"
+ """
+ 8-bit integer format.
+ """
+ BINARY = "BINARY"
+ """
+ Binary format.
+ """
+ FLOAT32 = "FLOAT32"
+ """
+ 32-bit floating-point format.
+ """
+ FLOAT64 = "FLOAT64"
+ """
+ 64-bit floating-point format.
+ """
+
+
+@dataclass
+class VectorIndexConfig:
+ """Define the configuration for Oracle VECTOR Index.
+
+ See :ref:`oracle_vector_datatype` for background.
+
+ .. versionadded:: 2.0.41
+
+ :param index_type: Enum value from :class:`.VectorIndexType`
+ Specifies the indexing method. For HNSW, this must be
+ :attr:`.VectorIndexType.HNSW`.
+
+ :param distance: Enum value from :class:`.VectorDistanceType`
+ specifies the metric for calculating distance between VECTORS.
+
+ :param accuracy: interger. Should be in the range 0 to 100
+ Specifies the accuracy of the nearest neighbor search during
+ query execution.
+
+ :param parallel: integer. Specifies degree of parallelism.
+
+ :param hnsw_neighbors: interger. Should be in the range 0 to
+ 2048. Specifies the number of nearest neighbors considered
+ during the search. The attribute :attr:`.VectorIndexConfig.hnsw_neighbors`
+ is HNSW index specific.
+
+ :param hnsw_efconstruction: integer. Should be in the range 0
+ to 65535. Controls the trade-off between indexing speed and
+ recall quality during index construction. The attribute
+ :attr:`.VectorIndexConfig.hnsw_efconstruction` is HNSW index
+ specific.
+
+ :param ivf_neighbor_partitions: integer. Should be in the range
+ 0 to 10,000,000. Specifies the number of partitions used to
+ divide the dataset. The attribute
+ :attr:`.VectorIndexConfig.ivf_neighbor_partitions` is IVF index
+ specific.
+
+ :param ivf_sample_per_partition: integer. Should be between 1
+ and ``num_vectors / neighbor partitions``. Specifies the
+ number of samples used per partition. The attribute
+ :attr:`.VectorIndexConfig.ivf_sample_per_partition` is IVF index
+ specific.
+
+ :param ivf_min_vectors_per_partition: integer. From 0 (no trimming)
+ to the total number of vectors (results in 1 partition). Specifies
+ the minimum number of vectors per partition. The attribute
+ :attr:`.VectorIndexConfig.ivf_min_vectors_per_partition`
+ is IVF index specific.
+
+ """
+
+ index_type: VectorIndexType = VectorIndexType.HNSW
+ distance: Optional[VectorDistanceType] = None
+ accuracy: Optional[int] = None
+ hnsw_neighbors: Optional[int] = None
+ hnsw_efconstruction: Optional[int] = None
+ ivf_neighbor_partitions: Optional[int] = None
+ ivf_sample_per_partition: Optional[int] = None
+ ivf_min_vectors_per_partition: Optional[int] = None
+ parallel: Optional[int] = None
+
+ def __post_init__(self):
+ self.index_type = VectorIndexType(self.index_type)
+ for field in [
+ "hnsw_neighbors",
+ "hnsw_efconstruction",
+ "ivf_neighbor_partitions",
+ "ivf_sample_per_partition",
+ "ivf_min_vectors_per_partition",
+ "parallel",
+ "accuracy",
+ ]:
+ value = getattr(self, field)
+ if value is not None and not isinstance(value, int):
+ raise TypeError(
+ f"{field} must be an integer if"
+ f"provided, got {type(value).__name__}"
+ )
+
+
+class VECTOR(types.TypeEngine):
+ """Oracle VECTOR datatype.
+
+ For complete background on using this type, see
+ :ref:`oracle_vector_datatype`.
+
+ .. versionadded:: 2.0.41
+
+ """
+
+ cache_ok = True
+ __visit_name__ = "VECTOR"
+
+ _typecode_map = {
+ VectorStorageFormat.INT8: "b", # Signed int
+ VectorStorageFormat.BINARY: "B", # Unsigned int
+ VectorStorageFormat.FLOAT32: "f", # Float
+ VectorStorageFormat.FLOAT64: "d", # Double
+ }
+
+ def __init__(self, dim=None, storage_format=None):
+ """Construct a VECTOR.
+
+ :param dim: integer. The dimension of the VECTOR datatype. This
+ should be an integer value.
+
+ :param storage_format: VectorStorageFormat. The VECTOR storage
+ type format. This may be Enum values form
+ :class:`.VectorStorageFormat` INT8, BINARY, FLOAT32, or FLOAT64.
+
+ """
+ if dim is not None and not isinstance(dim, int):
+ raise TypeError("dim must be an interger")
+ if storage_format is not None and not isinstance(
+ storage_format, VectorStorageFormat
+ ):
+ raise TypeError(
+ "storage_format must be an enum of type VectorStorageFormat"
+ )
+ self.dim = dim
+ self.storage_format = storage_format
+
+ def _cached_bind_processor(self, dialect):
+ """
+ Convert a list to a array.array before binding it to the database.
+ """
+
+ def process(value):
+ if value is None or isinstance(value, array.array):
+ return value
+
+ # Convert list to a array.array
+ elif isinstance(value, list):
+ typecode = self._array_typecode(self.storage_format)
+ value = array.array(typecode, value)
+ return value
+
+ else:
+ raise TypeError("VECTOR accepts list or array.array()")
+
+ return process
+
+ def _cached_result_processor(self, dialect, coltype):
+ """
+ Convert a array.array to list before binding it to the database.
+ """
+
+ def process(value):
+ if isinstance(value, array.array):
+ return list(value)
+
+ return process
+
+ def _array_typecode(self, typecode):
+ """
+ Map storage format to array typecode.
+ """
+ return self._typecode_map.get(typecode, "d")
+
+ class comparator_factory(types.TypeEngine.Comparator):
+ def l2_distance(self, other):
+ return self.op("<->", return_type=Float)(other)
+
+ def inner_product(self, other):
+ return self.op("<#>", return_type=Float)(other)
+
+ def cosine_distance(self, other):
+ return self.op("<=>", return_type=Float)(other)
from .base import ColumnSet
from .base import CompileState
from .base import DedupeColumnCollection
+from .base import DialectKWArgs
from .base import Executable
from .base import Generative
from .base import HasCompileState
raise NotImplementedError
-class GenerativeSelect(SelectBase, Generative):
+class GenerativeSelect(DialectKWArgs, SelectBase, Generative):
"""Base class for SELECT statements where additional elements can be
added.
count: _LimitOffsetType,
with_ties: bool = False,
percent: bool = False,
+ **dialect_kw: Any,
) -> Self:
- """Return a new selectable with the given FETCH FIRST criterion
+ r"""Return a new selectable with the given FETCH FIRST criterion
applied.
This is a numeric value which usually renders as ``FETCH {FIRST | NEXT}
:param percent: When ``True``, ``count`` represents the percentage
of the total number of selected rows to return. Defaults to ``False``
+ :param \**dialect_kw: Additional dialect-specific keyword arguments
+ may be accepted by dialects.
+
+ .. versionadded:: 2.0.41
+
.. seealso::
:meth:`_sql.GenerativeSelect.limit`
:meth:`_sql.GenerativeSelect.offset`
"""
-
+ self._validate_dialect_kwargs(dialect_kw)
self._limit_clause = None
if count is None:
self._fetch_clause = self._fetch_clause_options = None
]
+ SupportsCloneAnnotations._clone_annotations_traverse_internals
+ HasCTE._has_ctes_traverse_internals
+ + DialectKWArgs._dialect_kwargs_traverse_internals
)
selects: List[SelectBase]
+ HasHints._has_hints_traverse_internals
+ SupportsCloneAnnotations._clone_annotations_traverse_internals
+ Executable._executable_traverse_internals
+ + DialectKWArgs._dialect_kwargs_traverse_internals
)
_cache_key_traversal: _CacheKeyTraversalType = _traverse_internals + [
stmt.__dict__.update(kw)
return stmt
- def __init__(self, *entities: _ColumnsClauseArgument[Any]):
+ def __init__(
+ self, *entities: _ColumnsClauseArgument[Any], **dialect_kw: Any
+ ):
r"""Construct a new :class:`_expression.Select`.
The public constructor for :class:`_expression.Select` is the
)
for ent in entities
]
-
GenerativeSelect.__init__(self)
def _apply_syntax_extension_to_self(
checkparams={"param_1": 20, "param_2": 10},
)
+ @testing.only_on("oracle>=23.4")
+ def test_fetch_type(self):
+ t = table("sometable", column("col1"), column("col2"))
+ s = select(t).fetch(2, oracle_fetch_approximate=True)
+ self.assert_compile(
+ s,
+ "SELECT sometable.col1, sometable.col2 FROM sometable "
+ "FETCH APPROX FIRST __[POSTCOMPILE_param_1] ROWS ONLY",
+ checkparams={"param_1": 2},
+ )
+
def test_limit_two(self):
t = table("sometable", column("col1"), column("col2"))
s = select(t).limit(10).offset(20).subquery()
from sqlalchemy import Unicode
from sqlalchemy import UniqueConstraint
from sqlalchemy.dialects import oracle
+from sqlalchemy.dialects.oracle import VECTOR
+from sqlalchemy.dialects.oracle import VectorDistanceType
+from sqlalchemy.dialects.oracle import VectorIndexConfig
+from sqlalchemy.dialects.oracle import VectorIndexType
+from sqlalchemy.dialects.oracle import VectorStorageFormat
from sqlalchemy.dialects.oracle.base import BINARY_DOUBLE
from sqlalchemy.dialects.oracle.base import BINARY_FLOAT
from sqlalchemy.dialects.oracle.base import DOUBLE_PRECISION
tbl = Table("test_tablespace", m2, autoload_with=connection)
assert tbl.dialect_options["oracle"]["tablespace"] == "TEMP"
+ @testing.only_on("oracle>=23.4")
+ def test_reflection_w_vector_column(self, connection, metadata):
+ tb1 = Table(
+ "test_vector",
+ metadata,
+ Column("id", Integer, primary_key=True),
+ Column("name", String(30)),
+ Column(
+ "embedding",
+ VECTOR(dim=3, storage_format=VectorStorageFormat.FLOAT32),
+ ),
+ )
+ metadata.create_all(connection)
+
+ m2 = MetaData()
+
+ tb1 = Table("test_vector", m2, autoload_with=connection)
+ assert tb1.columns.keys() == ["id", "name", "embedding"]
+
class ViewReflectionTest(fixtures.TestBase):
__only_on__ = "oracle"
eq_(len(reflectedtable.constraints), 1)
eq_(len(reflectedtable.indexes), 5)
+ @testing.only_on("oracle>=23.4")
+ def test_vector_index(self, metadata, connection):
+ tb1 = Table(
+ "test_vector",
+ metadata,
+ Column("id", Integer, primary_key=True),
+ Column("name", String(30)),
+ Column(
+ "embedding",
+ VECTOR(dim=3, storage_format=VectorStorageFormat.FLOAT32),
+ ),
+ )
+ tb1.create(connection)
+
+ ivf_index = Index(
+ "ivf_vector_index",
+ tb1.c.embedding,
+ oracle_vector=VectorIndexConfig(
+ index_type=VectorIndexType.IVF,
+ distance=VectorDistanceType.DOT,
+ accuracy=90,
+ ivf_neighbor_partitions=5,
+ ),
+ )
+ ivf_index.create(connection)
+
+ expected = [
+ {
+ "name": "ivf_vector_index",
+ "column_names": ["embedding"],
+ "dialect_options": {},
+ "unique": False,
+ },
+ ]
+ eq_(inspect(connection).get_indexes("test_vector"), expected)
+
class DBLinkReflectionTest(fixtures.TestBase):
__requires__ = ("oracle_test_dblink",)
+import array
import datetime
import decimal
import os
from sqlalchemy import exc
from sqlalchemy import FLOAT
from sqlalchemy import Float
+from sqlalchemy import Index
from sqlalchemy import Integer
from sqlalchemy import LargeBinary
from sqlalchemy import literal
from sqlalchemy.dialects.oracle import base as oracle
from sqlalchemy.dialects.oracle import cx_oracle
from sqlalchemy.dialects.oracle import oracledb
+from sqlalchemy.dialects.oracle import VECTOR
+from sqlalchemy.dialects.oracle import VectorDistanceType
+from sqlalchemy.dialects.oracle import VectorIndexConfig
+from sqlalchemy.dialects.oracle import VectorIndexType
+from sqlalchemy.dialects.oracle import VectorStorageFormat
from sqlalchemy.sql import column
from sqlalchemy.sql.sqltypes import NullType
from sqlalchemy.testing import AssertsCompiledSQL
finally:
exec_sql(connection, "DROP TABLE Z_TEST")
+ @testing.only_on("oracle>=23.4")
+ def test_vector_dim(self, metadata, connection):
+ t1 = Table(
+ "t1",
+ metadata,
+ Column(
+ "c1", VECTOR(dim=3, storage_format=VectorStorageFormat.FLOAT32)
+ ),
+ )
+
+ t1.create(connection)
+ eq_(t1.c.c1.type.dim, 3)
+
+ @testing.only_on("oracle>=23.4")
+ def test_vector_insert(self, metadata, connection):
+ t1 = Table(
+ "t1",
+ metadata,
+ Column("id", Integer, primary_key=True),
+ Column("c1", VECTOR(storage_format=VectorStorageFormat.INT8)),
+ )
+
+ t1.create(connection)
+ connection.execute(
+ t1.insert(),
+ dict(id=1, c1=[6, 7, 8, 5]),
+ )
+ eq_(
+ connection.execute(t1.select()).first(),
+ (1, [6, 7, 8, 5]),
+ )
+ connection.execute(t1.delete().where(t1.c.id == 1))
+ connection.execute(t1.insert(), dict(id=1, c1=[6, 7]))
+ eq_(
+ connection.execute(t1.select()).first(),
+ (1, [6, 7]),
+ )
+
+ @testing.only_on("oracle>=23.4")
+ def test_vector_insert_array(self, metadata, connection):
+ t1 = Table(
+ "t1",
+ metadata,
+ Column("id", Integer, primary_key=True),
+ Column("c1", VECTOR),
+ )
+
+ t1.create(connection)
+ connection.execute(
+ t1.insert(),
+ dict(id=1, c1=array.array("b", [6, 7, 8, 5])),
+ )
+ eq_(
+ connection.execute(t1.select()).first(),
+ (1, [6, 7, 8, 5]),
+ )
+
+ connection.execute(t1.delete().where(t1.c.id == 1))
+
+ connection.execute(
+ t1.insert(), dict(id=1, c1=array.array("b", [6, 7]))
+ )
+ eq_(
+ connection.execute(t1.select()).first(),
+ (1, [6, 7]),
+ )
+
+ @testing.only_on("oracle>=23.4")
+ def test_vector_multiformat_insert(self, metadata, connection):
+ t1 = Table(
+ "t1",
+ metadata,
+ Column("id", Integer, primary_key=True),
+ Column("c1", VECTOR),
+ )
+
+ t1.create(connection)
+ connection.execute(
+ t1.insert(),
+ dict(id=1, c1=[6.12, 7.54, 8.33]),
+ )
+ eq_(
+ connection.execute(t1.select()).first(),
+ (1, [6.12, 7.54, 8.33]),
+ )
+ connection.execute(t1.delete().where(t1.c.id == 1))
+ connection.execute(t1.insert(), dict(id=1, c1=[6, 7]))
+ eq_(
+ connection.execute(t1.select()).first(),
+ (1, [6, 7]),
+ )
+
+ @testing.only_on("oracle>=23.4")
+ def test_vector_format(self, metadata, connection):
+ t1 = Table(
+ "t1",
+ metadata,
+ Column(
+ "c1", VECTOR(dim=3, storage_format=VectorStorageFormat.FLOAT32)
+ ),
+ )
+
+ t1.create(connection)
+ eq_(t1.c.c1.type.storage_format, VectorStorageFormat.FLOAT32)
+
+ @testing.only_on("oracle>=23.4")
+ def test_vector_hnsw_index(self, metadata, connection):
+ t1 = Table(
+ "t1",
+ metadata,
+ Column("id", Integer),
+ Column(
+ "embedding",
+ VECTOR(dim=3, storage_format=VectorStorageFormat.FLOAT32),
+ ),
+ )
+
+ t1.create(connection)
+
+ hnsw_index = Index(
+ "hnsw_vector_index", t1.c.embedding, oracle_vector=True
+ )
+ hnsw_index.create(connection)
+
+ connection.execute(t1.insert(), dict(id=1, embedding=[6, 7, 8]))
+ eq_(
+ connection.execute(t1.select()).first(),
+ (1, [6.0, 7.0, 8.0]),
+ )
+
+ @testing.only_on("oracle>=23.4")
+ def test_vector_ivf_index(self, metadata, connection):
+ t1 = Table(
+ "t1",
+ metadata,
+ Column("id", Integer),
+ Column(
+ "embedding",
+ VECTOR(dim=3, storage_format=VectorStorageFormat.FLOAT32),
+ ),
+ )
+
+ t1.create(connection)
+ ivf_index = Index(
+ "ivf_vector_index",
+ t1.c.embedding,
+ oracle_vector=VectorIndexConfig(
+ index_type=VectorIndexType.IVF,
+ distance=VectorDistanceType.DOT,
+ accuracy=90,
+ ivf_neighbor_partitions=5,
+ ),
+ )
+ ivf_index.create(connection)
+
+ connection.execute(t1.insert(), dict(id=1, embedding=[6, 7, 8]))
+ eq_(
+ connection.execute(t1.select()).first(),
+ (1, [6.0, 7.0, 8.0]),
+ )
+
+ @testing.only_on("oracle>=23.4")
+ def test_vector_l2_distance(self, metadata, connection):
+ t1 = Table(
+ "t1",
+ metadata,
+ Column("id", Integer),
+ Column(
+ "embedding",
+ VECTOR(dim=3, storage_format=VectorStorageFormat.INT8),
+ ),
+ )
+
+ t1.create(connection)
+
+ connection.execute(t1.insert(), dict(id=1, embedding=[8, 9, 10]))
+ connection.execute(t1.insert(), dict(id=2, embedding=[1, 2, 3]))
+ connection.execute(
+ t1.insert(),
+ dict(id=3, embedding=[15, 16, 17]),
+ )
+
+ query_vector = [2, 3, 4]
+ res = connection.execute(
+ t1.select().order_by((t1.c.embedding.l2_distance(query_vector)))
+ ).first()
+ eq_(res.embedding, [1, 2, 3])
+
class LOBFetchTest(fixtures.TablesTest):
__only_on__ = "oracle"
from sqlalchemy.sql import type_coerce
from sqlalchemy.sql import visitors
from sqlalchemy.sql.annotation import Annotated
+from sqlalchemy.sql.base import DialectKWArgs
from sqlalchemy.sql.base import HasCacheKey
from sqlalchemy.sql.base import SingletonConstant
from sqlalchemy.sql.base import SyntaxExtension
select(table_a.c.a).fetch(2, percent=True),
select(table_a.c.a).fetch(2, with_ties=True),
select(table_a.c.a).fetch(2, with_ties=True, percent=True),
+ select(table_a.c.a).fetch(2, oracle_fetch_approximate=True),
select(table_a.c.a).fetch(2).offset(3),
select(table_a.c.a).fetch(2).offset(5),
select(table_a.c.a).limit(2).offset(5),
NoInit,
SingletonConstant,
SyntaxExtension,
+ DialectKWArgs,
]
)
)