]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Fix parsing of ignored operators in websearch_to_tsquery().
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 14 Jun 2024 00:34:43 +0000 (20:34 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 14 Jun 2024 00:34:43 +0000 (20:34 -0400)
The manual says clearly that punctuation in the input of
websearch_to_tsquery() is ignored, except for the special cases
of dashes and quotes.  However, this failed for cases like
"(foo bar) or something", or in general an ISOPERATOR character
in front of the "or".  We'd switch back to WAITOPERAND state,
then ignore the operator character while remaining in that state,
and then reach the "or" in WAITOPERAND state which (intentionally)
makes us treat it as data.

The fix is simple enough: if we see an ISOPERATOR character while in
WAITOPERATOR state, we have to skip it while staying in that state.
(We don't need to worry about other punctuation characters: those will
be consumed as though they were words, but then rejected by lexizing.)

In v14 and up (since commit eb086056f) we can simplify the code a bit
more too, because there is no longer a reason for the WAITOPERAND
state to distinguish between quoted and unquoted operands.

Per bug #18479 from Manos Emmanouilidis.  Back-patch to all supported
branches.

Discussion: https://postgr.es/m/18479-d9b46e2fc242c33e@postgresql.org

src/backend/utils/adt/tsquery.c
src/test/regress/expected/tsearch.out
src/test/regress/sql/tsearch.sql

index ded919b39b06b28b19ad9f46ea1a538950b48192..1a146372f7a938fc177f53036f8e885061029f5b 100644 (file)
@@ -420,7 +420,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
                                }
                                else if (ISOPERATOR(state->buf))
                                {
-                                       /* or else gettoken_tsvector() will raise an error */
+                                       /* ignore, else gettoken_tsvector() will raise an error */
                                        state->buf++;
                                        state->state = WAITOPERAND;
                                        continue;
@@ -452,15 +452,9 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
                                break;
 
                        case WAITOPERATOR:
-                               if (t_iseq(state->buf, '"'))
+                               if (*state->buf == '\0')
                                {
-                                       /*
-                                        * put implicit AND after an operand and handle this quote
-                                        * in WAITOPERAND
-                                        */
-                                       state->state = WAITOPERAND;
-                                       *operator = OP_AND;
-                                       return PT_OPR;
+                                       return PT_END;
                                }
                                else if (parse_or_operator(state))
                                {
@@ -468,15 +462,17 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
                                        *operator = OP_OR;
                                        return PT_OPR;
                                }
-                               else if (*state->buf == '\0')
+                               else if (ISOPERATOR(state->buf))
                                {
-                                       return PT_END;
+                                       /* ignore other operators in this state too */
+                                       state->buf++;
+                                       continue;
                                }
                                else if (!t_isspace(state->buf))
                                {
-                                       /* put implicit AND after an operand */
-                                       *operator = OP_AND;
+                                       /* insert implicit AND between operands */
                                        state->state = WAITOPERAND;
+                                       *operator = OP_AND;
                                        return PT_OPR;
                                }
                                break;
index cfe38aa306224b7f3b4723fa04d6d2916f6d1fc4..629147c5b119455630c2eb7641c116630f6031cb 100644 (file)
@@ -2539,12 +2539,19 @@ select websearch_to_tsquery('simple', 'abc <-> def');
  'abc' & 'def'
 (1 row)
 
+-- parens are ignored, too
 select websearch_to_tsquery('simple', 'abc (pg or class)');
   websearch_to_tsquery  
 ------------------------
  'abc' & 'pg' | 'class'
 (1 row)
 
+select websearch_to_tsquery('simple', '(foo bar) or (ding dong)');
+      websearch_to_tsquery       
+---------------------------------
+ 'foo' & 'bar' | 'ding' & 'dong'
+(1 row)
+
 -- NOT is ignored in quotes
 select websearch_to_tsquery('english', 'My brand new smartphone');
      websearch_to_tsquery      
index b5a3a68a6474a0ab82c1954e8a465ec70edc6728..0a90c1b539d9e5cd91ee2a167eecb2c983ff2942 100644 (file)
@@ -727,7 +727,10 @@ select websearch_to_tsquery('simple', ':');
 select websearch_to_tsquery('simple', 'abc & def');
 select websearch_to_tsquery('simple', 'abc | def');
 select websearch_to_tsquery('simple', 'abc <-> def');
+
+-- parens are ignored, too
 select websearch_to_tsquery('simple', 'abc (pg or class)');
+select websearch_to_tsquery('simple', '(foo bar) or (ding dong)');
 
 -- NOT is ignored in quotes
 select websearch_to_tsquery('english', 'My brand new smartphone');