From 719197dd9399b4436aeaba3e2f44761292036ad6 Mon Sep 17 00:00:00 2001 From: Mike Bayer Date: Wed, 18 May 2022 16:06:29 -0400 Subject: [PATCH] use plainto_tsquery for PG match The :meth:`.Operators.match` operator now uses ``plainto_tsquery()`` for PostgreSQL full text search, rather than ``to_tsquery()``. The rationale for this change is to provide better cross-compatibility with match on other database backends. Full support for all PostgreSQL full text functions remains available through the use of :data:`.func` in conjunction with :meth:`.Operators.bool_op` (an improved version of :meth:`.Operators.op` for boolean operators). Additional doc updates here apply to 1.4 so will backport these out to a separate commit. Fixes: #7086 Change-Id: I1946075daf5d9c558e85f73f1bf852604b3b1b8c --- doc/build/changelog/migration_20.rst | 25 ++++ doc/build/changelog/unreleased_20/7086.rst | 15 +++ lib/sqlalchemy/dialects/postgresql/base.py | 123 ++++++++++++-------- lib/sqlalchemy/sql/operators.py | 8 +- test/dialect/postgresql/test_compiler.py | 41 ++++++- test/dialect/postgresql/test_query.py | 25 +++- test/ext/mypy/plain_files/sql_operations.py | 7 ++ test/sql/test_operators.py | 15 ++- 8 files changed, 194 insertions(+), 65 deletions(-) create mode 100644 doc/build/changelog/unreleased_20/7086.rst diff --git a/doc/build/changelog/migration_20.rst b/doc/build/changelog/migration_20.rst index 2fcfafbbe8..6e9aae04a5 100644 --- a/doc/build/changelog/migration_20.rst +++ b/doc/build/changelog/migration_20.rst @@ -419,6 +419,31 @@ the :meth:`_types.TypeEngine.with_variant` method as follows:: ) +.. _change_7086: + +``match()`` operator on PostgreSQL uses ``plainto_tsquery()`` rather than ``to_tsquery()`` +------------------------------------------------------------------------------------------ + +The :meth:`.Operators.match` function now renders +``col @@ plainto_tsquery(expr)`` on the PostgreSQL backend, rather than +``col @@ to_tsquery()``. ``plainto_tsquery()`` accepts plain text whereas +``to_tsquery()`` accepts specialized query symbols, and is therefore less +cross-compatible with other backends. + +All PostgreSQL search functions and operators are available through use of +:data:`.func` to generate PostgreSQL-specific functions and +:meth:`.Operators.bool_op` (a boolean-typed version of :meth:`.Operators.op`) +to generate arbitrary operators, in the same manner as they are available +in previous versions. See the examples at :ref:`postgresql_match`. + +Existing SQLAlchemy projects that make use of PG-specific directives within +:meth:`.Operators.match` should make use of ``func.to_tsquery()`` directly. +To render SQL in exactly the same form as would be present +in 1.4, see the version note at :ref:`postgresql_simple_match`. + + + +:ticket:`7086` .. _migration_20_overview: diff --git a/doc/build/changelog/unreleased_20/7086.rst b/doc/build/changelog/unreleased_20/7086.rst new file mode 100644 index 0000000000..ebc180d5b7 --- /dev/null +++ b/doc/build/changelog/unreleased_20/7086.rst @@ -0,0 +1,15 @@ +.. change:: + :tags: bug, postgresql + :tickets: 7086 + + The :meth:`.Operators.match` operator now uses ``plainto_tsquery()`` for + PostgreSQL full text search, rather than ``to_tsquery()``. The rationale + for this change is to provide better cross-compatibility with match on + other database backends. Full support for all PostgreSQL full text + functions remains available through the use of :data:`.func` in + conjunction with :meth:`.Operators.bool_op` (an improved version of + :meth:`.Operators.op` for boolean operators). + + .. seealso:: + + :ref:`change_7086` diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py index 987e36ed94..6a49e296ca 100644 --- a/lib/sqlalchemy/dialects/postgresql/base.py +++ b/lib/sqlalchemy/dialects/postgresql/base.py @@ -699,28 +699,68 @@ constraint violation which occurs: Full Text Search ---------------- -SQLAlchemy makes available the PostgreSQL ``@@`` operator via the -:meth:`_expression.ColumnElement.match` method on any textual column expression. +PostgreSQL's full text search system is available through the use of the +:data:`.func` namespace, combined with the use of custom operators +via the :meth:`.Operators.bool_op` method. For simple cases with some +degree of cross-backend compatibility, the :meth:`.Operators.match` operator +may also be used. + +.. _postgresql_simple_match: + +Simple plain text matching with ``match()`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The :meth:`.Operators.match` operator provides for cross-compatible simple +text matching. For the PostgreSQL backend, it's hardcoded to generate +an expression using the ``@@`` operator in conjunction with the +``plainto_tsquery()`` PostgreSQL function. On the PostgreSQL dialect, an expression like the following:: select(sometable.c.text.match("search string")) -will emit to the database:: +would emit to the database:: - SELECT text @@ to_tsquery('search string') FROM table + SELECT text @@ plainto_tsquery('search string') FROM table -Various other PostgreSQL text search functions such as ``to_tsquery()``, -``to_tsvector()``, and ``plainto_tsquery()`` are available by explicitly using -the standard SQLAlchemy :data:`.func` construct. +Above, passing a plain string to :meth:`.Operators.match` will automatically +make use of ``plainto_tsquery()`` to specify the type of tsquery. This +establishes basic database cross-compatibility for :meth:`.Operators.match` +with other backends. -For example:: +.. versionchanged:: 2.0 The default tsquery generation function used by the + PostgreSQL dialect with :meth:`.Operators.match` is ``plainto_tsquery()``. - select(func.to_tsvector('fat cats ate rats').match('cat & rat')) + To render exactly what was rendered in 1.4, use the following form:: + + from sqlalchemy import func + + select( + sometable.c.text.bool_op("@@")(func.to_tsquery("search string")) + ) -Emits the equivalent of:: + Which would emit:: + + SELECT text @@ to_tsquery('search string') FROM table + +Using PostgreSQL full text functions and operators directly +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Text search operations beyond the simple use of :meth:`.Operators.match` +may make use of the :data:`.func` namespace to generate PostgreSQL full-text +functions, in combination with :meth:`.Operators.bool_op` to generate +any boolean operator. + +For example, the query:: + + select( + func.to_tsquery('cat').bool_op("@>")(func.to_tsquery('cat & rat')) + ) + +would generate: + + SELECT to_tsquery('cat') @> to_tsquery('cat & rat') - SELECT to_tsvector('fat cats ate rats') @@ to_tsquery('cat & rat') The :class:`_postgresql.TSVECTOR` type can provide for explicit CAST:: @@ -732,54 +772,32 @@ produces a statement equivalent to:: SELECT CAST('some text' AS TSVECTOR) AS anon_1 -.. tip:: - - It's important to remember that text searching in PostgreSQL is powerful but complicated, - and SQLAlchemy users are advised to reference the PostgreSQL documentation - regarding - `Full Text Search `_. - - There are important differences between ``to_tsquery`` and - ``plainto_tsquery``, the most significant of which is that ``to_tsquery`` - expects specially formatted "querytext" that is written to PostgreSQL's own - specification, while ``plainto_tsquery`` expects unformatted text that is - transformed into ``to_tsquery`` compatible querytext. This means the input to - ``.match()`` under PostgreSQL may be incompatible with the input to - ``.match()`` under another database backend. SQLAlchemy users who support - multiple backends are advised to carefully implement their usage of - ``.match()`` to work around these constraints. - -Full Text Searches in PostgreSQL are influenced by a combination of: the -PostgreSQL setting of ``default_text_search_config``, the ``regconfig`` used -to build the GIN/GiST indexes, and the ``regconfig`` optionally passed in -during a query. - -When performing a Full Text Search against a column that has a GIN or -GiST index that is already pre-computed (which is common on full text -searches) one may need to explicitly pass in a particular PostgreSQL -``regconfig`` value to ensure the query-planner utilizes the index and does -not re-compute the column on demand. - -In order to provide for this explicit query planning, or to use different -search strategies, the ``match`` method accepts a ``postgresql_regconfig`` -keyword argument:: +Specifying the "regconfig" with ``match()`` or custom operators +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +PostgreSQL's ``plainto_tsquery()`` function accepts an optional +"regconfig" argument that is used to instruct PostgreSQL to use a +particular pre-computed GIN or GiST index in order to perform the search. +When using :meth:`.Operators.match`, this additional parameter may be +specified using the ``postgresql_regconfig`` parameter, such as:: select(mytable.c.id).where( mytable.c.title.match('somestring', postgresql_regconfig='english') ) -Emits the equivalent of:: +Which would emit:: SELECT mytable.id FROM mytable - WHERE mytable.title @@ to_tsquery('english', 'somestring') + WHERE mytable.title @@ plainto_tsquery('english', 'somestring') -One can also specifically pass in a `'regconfig'` value to the -``to_tsvector()`` command as the initial argument:: +When using other PostgreSQL search functions with :data:`.func`, the +"regconfig" parameter may be passed directly as the initial argument:: select(mytable.c.id).where( - func.to_tsvector('english', mytable.c.title )\ - .match('somestring', postgresql_regconfig='english') + func.to_tsvector("english", mytable.c.title).bool_op("@@")( + func.to_tsquery("english", "somestring") ) + ) produces a statement equivalent to:: @@ -791,6 +809,11 @@ It is recommended that you use the ``EXPLAIN ANALYZE...`` tool from PostgreSQL to ensure that you are generating queries with SQLAlchemy that take full advantage of any indexes you may have created for full text search. +.. seealso:: + + `Full Text Search `_ - in the PostgreSQL documentation + + FROM ONLY ... ------------- @@ -2238,12 +2261,12 @@ class PGCompiler(compiler.SQLCompiler): binary.modifiers["postgresql_regconfig"], sqltypes.STRINGTYPE ) if regconfig: - return "%s @@ to_tsquery(%s, %s)" % ( + return "%s @@ plainto_tsquery(%s, %s)" % ( self.process(binary.left, **kw), regconfig, self.process(binary.right, **kw), ) - return "%s @@ to_tsquery(%s)" % ( + return "%s @@ plainto_tsquery(%s)" % ( self.process(binary.left, **kw), self.process(binary.right, **kw), ) diff --git a/lib/sqlalchemy/sql/operators.py b/lib/sqlalchemy/sql/operators.py index 0a85277611..5934544321 100644 --- a/lib/sqlalchemy/sql/operators.py +++ b/lib/sqlalchemy/sql/operators.py @@ -1129,7 +1129,13 @@ class ColumnOperators(Operators): a MATCH-like function or operator provided by the backend. Examples include: - * PostgreSQL - renders ``x @@ to_tsquery(y)`` + * PostgreSQL - renders ``x @@ plainto_tsquery(y)`` + + .. versionchanged:: 2.0 ``plainto_tsquery()`` is used instead + of ``to_tsquery()`` for PostgreSQL now; for compatibility with + other forms, see :ref:`postgresql_match`. + + * MySQL - renders ``MATCH (x) AGAINST (y IN BOOLEAN MODE)`` .. seealso:: diff --git a/test/dialect/postgresql/test_compiler.py b/test/dialect/postgresql/test_compiler.py index a4c6cc0873..25550afe14 100644 --- a/test/dialect/postgresql/test_compiler.py +++ b/test/dialect/postgresql/test_compiler.py @@ -3038,6 +3038,36 @@ class FullTextSearchTest(fixtures.TestBase, AssertsCompiledSQL): c = q.compile(dialect=postgresql.dialect()) raise ValueError(c) + def test_match_custom(self): + s = select(self.table_alt.c.id).where( + func.to_tsquery("fat").bool_op("<->")(func.to_tsquery("rat")) + ) + self.assert_compile( + s, + "SELECT mytable.id FROM mytable WHERE " + "to_tsquery(%(to_tsquery_1)s) <-> to_tsquery(%(to_tsquery_2)s)", + {"to_tsquery_1": "fat", "to_tsquery_2": "rat"}, + ) + + def test_match_custom_regconfig(self): + s = select(self.table_alt.c.id).where( + func.to_tsquery("english", "fat").bool_op("<->")( + func.to_tsquery("english", "rat") + ) + ) + self.assert_compile( + s, + "SELECT mytable.id FROM mytable WHERE " + "to_tsquery(%(to_tsquery_1)s, %(to_tsquery_2)s) <-> " + "to_tsquery(%(to_tsquery_3)s, %(to_tsquery_4)s)", + { + "to_tsquery_1": "english", + "to_tsquery_2": "fat", + "to_tsquery_3": "english", + "to_tsquery_4": "rat", + }, + ) + def test_match_basic(self): s = select(self.table_alt.c.id).where( self.table_alt.c.title.match("somestring") @@ -3046,7 +3076,7 @@ class FullTextSearchTest(fixtures.TestBase, AssertsCompiledSQL): s, "SELECT mytable.id " "FROM mytable " - "WHERE mytable.title @@ to_tsquery(%(title_1)s)", + "WHERE mytable.title @@ plainto_tsquery(%(title_1)s)", ) def test_match_regconfig(self): @@ -3059,7 +3089,8 @@ class FullTextSearchTest(fixtures.TestBase, AssertsCompiledSQL): s, "SELECT mytable.id " "FROM mytable " - """WHERE mytable.title @@ to_tsquery('english', %(title_1)s)""", + "WHERE mytable.title @@ " + "plainto_tsquery('english', %(title_1)s)", ) def test_match_tsvector(self): @@ -3071,7 +3102,7 @@ class FullTextSearchTest(fixtures.TestBase, AssertsCompiledSQL): "SELECT mytable.id " "FROM mytable " "WHERE to_tsvector(mytable.title) " - "@@ to_tsquery(%(to_tsvector_1)s)", + "@@ plainto_tsquery(%(to_tsvector_1)s)", ) def test_match_tsvectorconfig(self): @@ -3085,7 +3116,7 @@ class FullTextSearchTest(fixtures.TestBase, AssertsCompiledSQL): "SELECT mytable.id " "FROM mytable " "WHERE to_tsvector(%(to_tsvector_1)s, mytable.title) @@ " - "to_tsquery(%(to_tsvector_2)s)", + "plainto_tsquery(%(to_tsvector_2)s)", ) def test_match_tsvectorconfig_regconfig(self): @@ -3099,7 +3130,7 @@ class FullTextSearchTest(fixtures.TestBase, AssertsCompiledSQL): "SELECT mytable.id " "FROM mytable " "WHERE to_tsvector(%(to_tsvector_1)s, mytable.title) @@ " - """to_tsquery('english', %(to_tsvector_2)s)""", + """plainto_tsquery('english', %(to_tsvector_2)s)""", ) diff --git a/test/dialect/postgresql/test_query.py b/test/dialect/postgresql/test_query.py index 4e6ca46c1b..6afc2f7c1b 100644 --- a/test/dialect/postgresql/test_query.py +++ b/test/dialect/postgresql/test_query.py @@ -983,12 +983,13 @@ class MatchTest(fixtures.TablesTest, AssertsCompiledSQL): if self._strs_render_bind_casts(connection): self.assert_compile( matchtable.c.title.match("somstr"), - "matchtable.title @@ to_tsquery(%(title_1)s::VARCHAR(200))", + "matchtable.title @@ " + "plainto_tsquery(%(title_1)s::VARCHAR(200))", ) else: self.assert_compile( matchtable.c.title.match("somstr"), - "matchtable.title @@ to_tsquery(%(title_1)s)", + "matchtable.title @@ plainto_tsquery(%(title_1)s)", ) @testing.requires.format_paramstyle @@ -998,12 +999,12 @@ class MatchTest(fixtures.TablesTest, AssertsCompiledSQL): if self._strs_render_bind_casts(connection): self.assert_compile( matchtable.c.title.match("somstr"), - "matchtable.title @@ to_tsquery(%s::VARCHAR(200))", + "matchtable.title @@ plainto_tsquery(%s::VARCHAR(200))", ) else: self.assert_compile( matchtable.c.title.match("somstr"), - "matchtable.title @@ to_tsquery(%s)", + "matchtable.title @@ plainto_tsquery(%s)", ) def test_simple_match(self, connection): @@ -1051,9 +1052,16 @@ class MatchTest(fixtures.TablesTest, AssertsCompiledSQL): .order_by(matchtable.c.id) ).fetchall() eq_([3, 5], [r.id for r in results1]) + + def test_or_tsquery(self, connection): + matchtable = self.tables.matchtable results2 = connection.execute( matchtable.select() - .where(matchtable.c.title.match("nutshells | rubies")) + .where( + matchtable.c.title.bool_op("@@")( + func.to_tsquery("nutshells | rubies") + ) + ) .order_by(matchtable.c.id) ).fetchall() eq_([3, 5], [r.id for r in results2]) @@ -1069,9 +1077,14 @@ class MatchTest(fixtures.TablesTest, AssertsCompiledSQL): ) ).fetchall() eq_([5], [r.id for r in results1]) + + def test_and_tsquery(self, connection): + matchtable = self.tables.matchtable results2 = connection.execute( matchtable.select().where( - matchtable.c.title.match("python & nutshells") + matchtable.c.title.op("@@")( + func.to_tsquery("python & nutshells") + ) ) ).fetchall() eq_([5], [r.id for r in results2]) diff --git a/test/ext/mypy/plain_files/sql_operations.py b/test/ext/mypy/plain_files/sql_operations.py index 6b06535bf1..b4d0bd0060 100644 --- a/test/ext/mypy/plain_files/sql_operations.py +++ b/test/ext/mypy/plain_files/sql_operations.py @@ -2,6 +2,7 @@ import typing from sqlalchemy import Boolean from sqlalchemy import column +from sqlalchemy import func from sqlalchemy import Integer from sqlalchemy import select from sqlalchemy import String @@ -35,6 +36,9 @@ expr8 = c2 + 10 stmt = select(column("q")).where(lambda: column("g") > 5).where(c2 == 5) +expr9 = c1.bool_op("@@")(func.to_tsquery("some & query")) + + if typing.TYPE_CHECKING: # as far as if this is ColumnElement, BinaryElement, SQLCoreOperations, @@ -69,3 +73,6 @@ if typing.TYPE_CHECKING: # EXPECTED_RE_TYPE: sqlalchemy..*ColumnElement\[builtins.int.?\] reveal_type(expr8) + + # EXPECTED_TYPE: BinaryExpression[bool] + reveal_type(expr9) diff --git a/test/sql/test_operators.py b/test/sql/test_operators.py index 6f068159e0..2411fb0a33 100644 --- a/test/sql/test_operators.py +++ b/test/sql/test_operators.py @@ -2862,7 +2862,7 @@ class MatchTest(fixtures.TestBase, testing.AssertsCompiledSQL): def test_match_4(self): self.assert_compile( self.table1.c.myid.match("somstr"), - "mytable.myid @@ to_tsquery(%(myid_1)s)", + "mytable.myid @@ plainto_tsquery(%(myid_1)s)", dialect=postgresql.dialect(), ) @@ -2881,7 +2881,7 @@ class MatchTest(fixtures.TestBase, testing.AssertsCompiledSQL): def test_boolean_inversion_postgresql(self): self.assert_compile( ~self.table1.c.myid.match("somstr"), - "NOT mytable.myid @@ to_tsquery(%(myid_1)s)", + "NOT mytable.myid @@ plainto_tsquery(%(myid_1)s)", dialect=postgresql.dialect(), ) @@ -3429,7 +3429,7 @@ class ComposedLikeOperatorsTest(fixtures.TestBase, testing.AssertsCompiledSQL): class CustomOpTest(fixtures.TestBase): - def test_is_comparison(self): + def test_is_comparison_legacy(self): c = column("x") c2 = column("y") op1 = c.op("$", is_comparison=True)(c2).operator @@ -3438,6 +3438,15 @@ class CustomOpTest(fixtures.TestBase): assert operators.is_comparison(op1) assert not operators.is_comparison(op2) + def test_is_comparison_bool_op(self): + c = column("x") + c2 = column("y") + op1 = c.bool_op("$")(c2).operator + op2 = c.op("$")(c2).operator + + assert operators.is_comparison(op1) + assert not operators.is_comparison(op2) + @testing.combinations( (sqltypes.NULLTYPE,), (Integer(),), -- 2.47.2