From 6bb75283f16e5045a070c4abc744d5a953f7ab67 Mon Sep 17 00:00:00 2001 From: jonathan vanasco Date: Tue, 10 Jun 2014 18:56:27 -0400 Subject: [PATCH] - add postgresql_regconfig argument to PG dialect for match() operator, implements PG's to_tsquery('regconfig', 'arg') pattern. fixes #3078 --- doc/build/changelog/changelog_09.rst | 11 +++ lib/sqlalchemy/dialects/postgresql/base.py | 62 +++++++++++++++-- lib/sqlalchemy/sql/default_comparator.py | 2 +- lib/sqlalchemy/sql/operators.py | 4 +- test/dialect/postgresql/test_compiler.py | 81 ++++++++++++++++++++++ 5 files changed, 152 insertions(+), 8 deletions(-) diff --git a/doc/build/changelog/changelog_09.rst b/doc/build/changelog/changelog_09.rst index ac76a87501..687281c51d 100644 --- a/doc/build/changelog/changelog_09.rst +++ b/doc/build/changelog/changelog_09.rst @@ -14,6 +14,17 @@ :version: 0.9.7 :released: + .. change:: + :tags: feature, postgresql + :versions: 1.0.0 + :pullreq: bitbucket:22 + :tickets: 3078 + + Added kw argument ``postgresql_regconfig`` to the + :meth:`.Operators.match` operator, allows the "reg config" argument + to be specified to the ``to_tsquery()`` function emitted. + Pull request courtesy Jonathan Vanasco. + .. change:: :tags: feature, postgresql :versions: 1.0.0 diff --git a/lib/sqlalchemy/dialects/postgresql/base.py b/lib/sqlalchemy/dialects/postgresql/base.py index 02c1c86db3..21df75bb8e 100644 --- a/lib/sqlalchemy/dialects/postgresql/base.py +++ b/lib/sqlalchemy/dialects/postgresql/base.py @@ -271,6 +271,47 @@ produces a statement equivalent to:: SELECT CAST('some text' AS TSVECTOR) AS anon_1 +Full Text Searches in Postgresql are influenced by a combination of: the +PostgresSQL setting of ``default_text_search_config``, the ``regconfig`` used to +build the GIN/GiST indexes, and the ``regconfig`` optionally passed in during a +query. + +When performing a Full Text Search against a column that has a GIN or +GiST index that is already pre-computed (which is common on full text searches) +one may need to explicitly pass in a particular PostgresSQL ``regconfig`` value +to ensure the query-planner utilizes the index and does not re-compute the +column on demand. + +In order to provide for this explicit query planning, or to use different search +strategies, the ``match`` method accepts a ``postgresql_regconfig`` keyword +argument. + + select([mytable.c.id]).where( + mytable.c.title.match('somestring', postgresql_regconfig='english') + ) + +Emits the equivalent of:: + + SELECT mytable.id FROM mytable + WHERE mytable.title @@ to_tsquery('english', 'somestring') + +One can also specifically pass in a `'regconfig'` value to the ``to_tsvector()`` +command as the initial argument. + + select([mytable.c.id]).where( + func.to_tsvector('english', mytable.c.title )\ + .match('somestring', postgresql_regconfig='english') + ) + +produces a statement equivalent to:: + + SELECT mytable.id FROM mytable + WHERE to_tsvector('english', mytable.title) @@ + to_tsquery('english', 'somestring') + +It is recommended that you use the ``EXPLAIN ANALYZE...`` tool from PostgresSQL +to ensure that you are generating queries with SQLAlchemy that take full +advantage of any indexes you may have created for full text search. FROM ONLY ... ------------------------ @@ -1134,14 +1175,25 @@ class PGCompiler(compiler.SQLCompiler): def visit_getitem_binary(self, binary, operator, **kw): return "%s[%s]" % ( - self.process(binary.left, **kw), - self.process(binary.right, **kw) - ) + self.process(binary.left, **kw), + self.process(binary.right, **kw) + ) def visit_match_op_binary(self, binary, operator, **kw): + if "postgresql_regconfig" in binary.modifiers: + regconfig = self.render_literal_value(\ + binary.modifiers['postgresql_regconfig'], + sqltypes.STRINGTYPE) + if regconfig: + return "%s @@ to_tsquery(%s, %s)" % ( + self.process(binary.left, **kw), + regconfig, + self.process(binary.right, **kw) + ) return "%s @@ to_tsquery(%s)" % ( - self.process(binary.left, **kw), - self.process(binary.right, **kw)) + self.process(binary.left, **kw), + self.process(binary.right, **kw) + ) def visit_ilike_op_binary(self, binary, operator, **kw): escape = binary.modifiers.get("escape", None) diff --git a/lib/sqlalchemy/sql/default_comparator.py b/lib/sqlalchemy/sql/default_comparator.py index 20f13f70fa..4386eb52c2 100644 --- a/lib/sqlalchemy/sql/default_comparator.py +++ b/lib/sqlalchemy/sql/default_comparator.py @@ -213,7 +213,7 @@ class _DefaultColumnComparator(operators.ColumnOperators): """See :meth:`.ColumnOperators.match`.""" return self._boolean_compare(expr, operators.match_op, self._check_literal(expr, operators.match_op, - other)) + other), **kw) def _distinct_impl(self, expr, op, **kw): """See :meth:`.ColumnOperators.distinct`.""" diff --git a/lib/sqlalchemy/sql/operators.py b/lib/sqlalchemy/sql/operators.py index 9ad30e49e2..55700a3822 100644 --- a/lib/sqlalchemy/sql/operators.py +++ b/lib/sqlalchemy/sql/operators.py @@ -758,8 +758,8 @@ def notcontains_op(a, b, escape=None): return ~a.contains(b, escape=escape) -def match_op(a, b): - return a.match(b) +def match_op(a, b, **kw): + return a.match(b, **kw) def comma_op(a, b): diff --git a/test/dialect/postgresql/test_compiler.py b/test/dialect/postgresql/test_compiler.py index c813893852..76166b6dd2 100644 --- a/test/dialect/postgresql/test_compiler.py +++ b/test/dialect/postgresql/test_compiler.py @@ -695,3 +695,84 @@ class DistinctOnTest(fixtures.TestBase, AssertsCompiledSQL): "FROM t, (SELECT t.id AS id, t.a AS a, " "t.b AS b FROM t) AS sq WHERE t.id = sq.id" ) + +class FullTextSearchTest(fixtures.TestBase, AssertsCompiledSQL): + """Tests for full text searching + """ + __dialect__ = postgresql.dialect() + + def setup(self): + self.table = Table('t', MetaData(), + Column('id', Integer, primary_key=True), + Column('title', String), + Column('body', String), + ) + self.table_alt = table('mytable', + column('id', Integer), + column('title', String(128)), + column('body', String(128))) + + def _raise_query(self, q): + """ + useful for debugging. just do... + self._raise_query(q) + """ + c = q.compile(dialect=postgresql.dialect()) + raise ValueError(c) + + def test_match_basic(self): + s = select([self.table_alt.c.id])\ + .where(self.table_alt.c.title.match('somestring')) + self.assert_compile(s, + 'SELECT mytable.id ' + 'FROM mytable ' + 'WHERE mytable.title @@ to_tsquery(%(title_1)s)') + + def test_match_regconfig(self): + s = select([self.table_alt.c.id])\ + .where( + self.table_alt.c.title.match('somestring', + postgresql_regconfig='english') + ) + self.assert_compile(s, + 'SELECT mytable.id ' + 'FROM mytable ' + """WHERE mytable.title @@ to_tsquery('english', %(title_1)s)""") + + def test_match_tsvector(self): + s = select([self.table_alt.c.id])\ + .where( + func.to_tsvector( self.table_alt.c.title )\ + .match('somestring') + ) + self.assert_compile(s, + 'SELECT mytable.id ' + 'FROM mytable ' + 'WHERE to_tsvector(mytable.title) @@ to_tsquery(%(to_tsvector_1)s)') + + def test_match_tsvectorconfig(self): + s = select([self.table_alt.c.id])\ + .where( + func.to_tsvector( 'english', self.table_alt.c.title )\ + .match('somestring') + ) + self.assert_compile(s, + 'SELECT mytable.id ' + 'FROM mytable ' + 'WHERE to_tsvector(%(to_tsvector_1)s, mytable.title) @@ ' + 'to_tsquery(%(to_tsvector_2)s)' + ) + + def test_match_tsvectorconfig_regconfig(self): + s = select([self.table_alt.c.id])\ + .where(\ + func.to_tsvector( 'english', self.table_alt.c.title )\ + .match('somestring', postgresql_regconfig='english') + ) + self.assert_compile(s, + 'SELECT mytable.id ' + 'FROM mytable ' + 'WHERE to_tsvector(%(to_tsvector_1)s, mytable.title) @@ ' + """to_tsquery('english', %(to_tsvector_2)s)""" + ) + -- 2.47.3