]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Modify lexing of multi-char operators per pghackers discussion around
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 18 Mar 2000 18:03:12 +0000 (18:03 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 18 Mar 2000 18:03:12 +0000 (18:03 +0000)
16-Mar-00: trailing + or - is not part of the operator unless the operator
also contains characters not present in SQL92-defined operators.  This
solves the 'X=-Y' problem without unduly constraining users' choice of
operator names --- in particular, no existing Postgres operator names
become invalid.

Also, remove processing of // comments, as agreed in the same thread.

doc/src/sgml/ref/create_operator.sgml
doc/src/sgml/syntax.sgml
src/backend/parser/scan.l
src/bin/psql/mainloop.c
src/interfaces/ecpg/preproc/pgc.l

index 7f06c4cece36ca76b59fddd64b875db29ee36638..36d791d2a7987bd6bb73dbe5bf1b1bfb08f2c47f 100644 (file)
@@ -1,5 +1,5 @@
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_operator.sgml,v 1.11 1999/07/22 15:09:08 thomas Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/create_operator.sgml,v 1.12 2000/03/18 18:03:12 tgl Exp $
 Postgres documentation
 -->
 
@@ -60,8 +60,8 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
       <term><replaceable class="parameter">type1</replaceable></term>
       <listitem>
        <para>
-       The type for the left-hand side of the operator, if any. This option would be
-       omitted for a right-unary operator.
+       The type of the left-hand argument of the operator, if any.
+       This option would be omitted for a left-unary operator.
        </para>
       </listitem>
      </varlistentry>
@@ -69,8 +69,8 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
       <term><replaceable class="parameter">type2</replaceable></term>
       <listitem>
        <para>
-       The type for the right-hand side of the operator, if any. This option would be
-       omitted for a left-unary operator.
+       The type of the right-hand argument of the operator, if any.
+       This option would be omitted for a right-unary operator.
        </para>
       </listitem>
      </varlistentry>
@@ -78,7 +78,7 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
       <term><replaceable class="parameter">com_op</replaceable></term>
       <listitem>
        <para>
-       The commutator for this operator.
+       The commutator of this operator.
        </para>
       </listitem>
      </varlistentry>
@@ -110,7 +110,7 @@ CREATE OPERATOR <replaceable>name</replaceable> ( PROCEDURE = <replaceable class
       <term>HASHES</term>
       <listitem>
        <para>
-Indicates this operator can support a hash-join algorithm.
+       Indicates this operator can support a hash join.
        </para>
       </listitem>
      </varlistentry>
@@ -118,7 +118,8 @@ Indicates this operator can support a hash-join algorithm.
       <term><replaceable class="parameter">left_sort_op</replaceable></term>
       <listitem>
        <para>
-       Operator that sorts the left-hand data type of this operator.
+       If this operator can support a merge join, the
+       operator that sorts the left-hand data type of this operator.
        </para>
       </listitem>
      </varlistentry>
@@ -126,7 +127,8 @@ Indicates this operator can support a hash-join algorithm.
       <term><replaceable class="parameter">right_sort_op</replaceable></term>
       <listitem>
        <para>
-       Operator that sorts the right-hand data type of this operator.
+       If this operator can support a merge join, the
+       operator that sorts the right-hand data type of this operator.
        </para>
       </listitem>
      </varlistentry>
@@ -172,22 +174,56 @@ CREATE
   </para>
   <para>
    The operator <replaceable class="parameter">name</replaceable>
-   is a sequence of up to thirty two (32) characters in any combination
-   from the following:
+   is a sequence of up to NAMEDATALEN-1 (31 by default) characters
+   from the following list:
    <literallayout>
-+ - * / &lt; &gt; = ~ ! @ # % ^ & | ` ? $ : 
++ - * / &lt; &gt; = ~ ! @ # % ^ &amp; | ` ? $ : 
    </literallayout>
+
+   There are a few restrictions on your choice of name:
+   <itemizedlist>
+    <listitem>
+     <para>
+     "$" and ":" cannot be defined as single-character operators,
+     although they can be part of a multi-character operator name.
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+     "--" and "/*" cannot appear anywhere in an operator name,
+     since they will be taken as the start of a comment.
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+     A multi-character operator name cannot end in "+" or "-",
+     unless the name also contains at least one of these characters:
+     <literallayout>
+~ ! @ # % ^ &amp; | ` ? $ : 
+     </literallayout>
+     For example, <literal>@-</literal> is an allowed operator name,
+     but <literal>*-</literal> is not.
+     This restriction allows <productname>Postgres</productname> to
+     parse SQL-compliant queries without requiring spaces between tokens.
+     </para>
+    </listitem>
+   </itemizedlist>
+
    <note>
     <para>
-     No  alphabetic characters are allowed in an operator name.
-     This enables <productname>Postgres</productname> to parse SQL input
-     into tokens without requiring spaces between each token.
+     When working with non-SQL-standard operator names, you will usually
+     need to separate adjacent operators with spaces to avoid ambiguity.
+     For example, if you have defined a left-unary operator named "@",
+     you cannot write <literal>X*@Y</literal>; you must write
+     <literal>X* @Y</literal> to ensure that
+     <productname>Postgres</productname> reads it as two operator names
+     not one.
      </para>
    </note>   
   </para>
   <para>
-   The operator "!=" is mapped to "&lt;&gt;" on input, so they are
-   therefore equivalent.
+   The operator "!=" is mapped to "&lt;&gt;" on input, so these two names
+   are always equivalent.
   </para>
   <para>
    At least one of LEFTARG and RIGHTARG must be defined.  For
@@ -196,11 +232,11 @@ CREATE
    unary operators only RIGHTARG should be defined.
   </para>
   <para>
-   Also, the
+   The
    <replaceable class="parameter">func_name</replaceable> procedure must have
    been previously defined using <command>CREATE FUNCTION</command> and  must
    be defined to accept the correct number of arguments
-   (either  one or two).
+   (either  one or two) of the indicated types.
   </para>
   <para>
    The commutator operator should be identified if one exists,
@@ -247,8 +283,6 @@ MYBOXES.description !== "0,0,1,1"::box
    does not yet have a commutator itself, then the commutator's
    entry is updated to have the newly created operator as its
    commutator.  This applies to the negator, as well.
-  </para>
-  <para>
    This  is to allow the definition of two operators that are
    the commutators or the negators of each other.  The  first
    operator should be defined without a commutator or negator
@@ -258,7 +292,7 @@ MYBOXES.description !== "0,0,1,1"::box
    it also works to just have both operators refer to each other.)
   </para>
   <para>
-   The next three specifications are  present  to  support  the
+   The HASHES, SORT1, and SORT2 options are  present  to  support  the
    query  optimizer in performing joins.  
    <productname>Postgres</productname> can always
    evaluate a join (i.e., processing a clause with two  tuple
@@ -294,9 +328,8 @@ MYBOXES.description !== "0,0,1,1"::box
    be worth the complexity involved.
   </para>
   <para>
-   The  last  two  pieces of the specification are present so
-   the query optimizer  can  estimate  result  sizes.   If  a
-   clause of the form:
+   The RESTRICT and JOIN options assist the query optimizer in estimating
+   result sizes.  If a clause of the form:
    <programlisting>
 MYBOXES.description &lt;&lt;&lt; "0,0,1,1"::box
    </programlisting>
@@ -310,7 +343,7 @@ MYBOXES.description &lt;&lt;&lt; "0,0,1,1"::box
    data types and returns a floating point  number.   The
    query  optimizer  simply  calls this function, passing the
    parameter "0,0,1,1" and multiplies the result by the relation
-   size to get the desired expected number of instances.
+   size to get the expected number of instances.
   </para>
   <para>
    Similarly, when the operands of the operator both  contain
@@ -318,7 +351,7 @@ MYBOXES.description &lt;&lt;&lt; "0,0,1,1"::box
    size of the resulting join.  The function  join_proc  will
    return  another floating point number which will be multiplied
    by the cardinalities of the two classes involved  to
-   compute the desired expected result size.
+   compute the expected result size.
   </para>
   <para>
    The difference between the function
index 332464429cd79b3ba7d3eaf986deae491bb56b59..918d91a05cff7e39369792dec4ed3cca6865f90c 100644 (file)
@@ -315,12 +315,11 @@ UNCOMMITTED UNNAMED
 
    <para>
     A <firstterm>comment</firstterm>
-    is an arbitrary sequence of characters following double dashes up to the end
-    of the line.  We also support double-slashes as comments, e.g.:
+    is an arbitrary sequence of characters beginning with double dashes
+    and extending to the end of the line, e.g.:
 
     <programlisting>
 -- This is a standard SQL comment
-// And this is another supported comment style, like C++
     </programlisting>
 
 We also support C-style block comments, e.g.:
@@ -331,6 +330,9 @@ We also support C-style block comments, e.g.:
    comment
  */
     </programlisting>
+
+A comment beginning with "/*" extends to the first occurrence of "*/".
+
    </para>
   </sect1>
 
@@ -340,17 +342,22 @@ We also support C-style block comments, e.g.:
    <para>
     Names in SQL are sequences of less than NAMEDATALEN alphanumeric characters,
     starting with an alphabetic character.  By default, NAMEDATALEN is set
-    to 32but at the time the system is built, NAMEDATALEN can be changed
+    to 32  (but at the time the system is built, NAMEDATALEN can be changed
     by changing the <literal>#define</literal> in
-    src/backend/include/postgres.h.
+    src/backend/include/postgres.h).
     Underscore ("_") is considered an alphabetic character.
    </para>
 
    <para>
-    In some contexts, names may contain other characters if surrounded 
-    by double quotes. For example, table or column names may contain otherwise
-    disallowed characters such as spaces, ampersands, etc. using this
-    technique.
+    Names containing other characters may be formed by surrounding them
+    with double quotes.  For example, table or column names may contain
+    otherwise disallowed characters such as spaces, ampersands, etc. if
+    quoted.  Quoting a name also makes it case-sensitive,
+    whereas unquoted names are always folded to lower case.  For example,
+    the names <literal>FOO</literal>, <literal>foo</literal>
+    and <literal>"foo"</literal> are
+    considered the same by <productname>Postgres</productname>, but
+    <literal>"Foo"</literal> is a different name.
    </para>
   </sect1>
 
index f972d6ead174f5602bcc5aafd5c7e3a513625947..64a389b7680d4748fd7a1acfcf3cd05a86a96001 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.67 2000/03/13 01:52:06 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.68 2000/03/18 18:03:09 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -87,10 +87,10 @@ static void addlit(char *ytext, int yleng);
  * and to eliminate parsing troubles for numeric strings.
  * Exclusive states:
  *  <xb> binary numeric string - thomas 1997-11-16
- *  <xc> extended C-style comments - tgl 1997-07-12
- *  <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
+ *  <xc> extended C-style comments - thomas 1997-07-12
+ *  <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
  *  <xh> hexadecimal numeric string - thomas 1997-11-16
- *  <xq> quoted strings - tgl 1997-07-30
+ *  <xq> quoted strings - thomas 1997-07-30
  */
 
 %x xb
@@ -144,7 +144,7 @@ xdinside            [^"]+
  * have something like plus-slash-star, lex will think this is a 3-character
  * operator whereas we want to see it as a + operator and a comment start.
  * The solution is two-fold:
- * 1. append {op_and_self}* to xcstart so that it matches as much text as
+ * 1. append {op_chars}* to xcstart so that it matches as much text as
  *    {operator} would. Then the tie-breaker (first matching rule of same
  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
  *    in case it contains a star-slash that should terminate the comment.
@@ -154,7 +154,7 @@ xdinside            [^"]+
  * SQL92-style comments, which start with dash-dash, have similar interactions
  * with the operator rule.
  */
-xcstart                        \/\*{op_and_self}*
+xcstart                        \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               ([^*]+)|(\*+[^/])
 
@@ -166,10 +166,19 @@ identifier                {letter}{letter_or_digit}*
 
 typecast               "::"
 
-/* NB: if you change "self", fix the copy in the operator rule too! */
+/*
+ * "self" is the set of chars that should be returned as single-character
+ * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
+ * which can be one or more characters long (but if a single-char token
+ * appears in the "self" set, it is not to be returned as an Op).  Note
+ * that the sets overlap, but each has some chars that are not in the other.
+ *
+ * If you change either set, adjust the character lists appearing in the
+ * rule for "operator"!
+ */
 self                   [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
-op_and_self            [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
-operator               {op_and_self}+
+op_chars               [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
+operator               {op_chars}+
 
 /* we no longer allow unary minus in numbers. 
  * instead we pass it separately to parser. there it gets
@@ -202,7 +211,7 @@ horiz_space         [ \t\f]
 newline                        [\n\r]
 non_newline            [^\n\r]
 
-comment                        (("--"|"//"){non_newline}*)
+comment                        ("--"{non_newline}*)
 
 whitespace             ({space}|{comment})
 
@@ -220,7 +229,7 @@ other                       .
 
 /* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
  * AT&T lex does not properly handle C-style comments in this second lex block.
- * So, put comments here. tgl - 1997-09-08
+ * So, put comments here. thomas - 1997-09-08
  *
  * Quoted strings must allow some special characters such as single-quote
  *  and newline.
@@ -329,23 +338,57 @@ other                     .
 {self}                 { return yytext[0]; }
 
 {operator}             {
-                                       /* Check for embedded slash-star or dash-dash */
-                                       char *slashstar = strstr((char*)yytext, "/*");
-                                       char *dashdash = strstr((char*)yytext, "--");
+                                       /*
+                                        * Check for embedded slash-star or dash-dash; those
+                                        * are comment starts, so operator must stop there.
+                                        * Note that slash-star or dash-dash at the first
+                                        * character will match a prior rule, not this one.
+                                        */
+                                       int             nchars = yyleng;
+                                       char   *slashstar = strstr((char*)yytext, "/*");
+                                       char   *dashdash = strstr((char*)yytext, "--");
 
                                        if (slashstar && dashdash)
                                        {
+                                               /* if both appear, take the first one */
                                                if (slashstar > dashdash)
                                                        slashstar = dashdash;
                                        }
                                        else if (!slashstar)
                                                slashstar = dashdash;
-
                                        if (slashstar)
+                                               nchars = slashstar - ((char*)yytext);
+
+                                       /*
+                                        * For SQL92 compatibility, '+' and '-' cannot be the
+                                        * last char of a multi-char operator unless the operator
+                                        * contains chars that are not in SQL92 operators.
+                                        * The idea is to lex '=-' as two operators, but not
+                                        * to forbid operator names like '?-' that could not be
+                                        * sequences of SQL92 operators.
+                                        */
+                                       while (nchars > 1 &&
+                                                  (yytext[nchars-1] == '+' ||
+                                                       yytext[nchars-1] == '-'))
+                                       {
+                                               int             ic;
+
+                                               for (ic = nchars-2; ic >= 0; ic--)
+                                               {
+                                                       if (strchr("~!@#&`?$:%^|", yytext[ic]))
+                                                               break;
+                                               }
+                                               if (ic >= 0)
+                                                       break; /* found a char that makes it OK */
+                                               nchars--; /* else remove the +/-, and check again */
+                                       }
+
+                                       if (nchars < yyleng)
                                        {
-                                               int nchars = slashstar - ((char*)yytext);
+                                               /* Strip the unwanted chars from the token */
                                                yyless(nchars);
-                                               /* If what we have left is only one char, and it's
+                                               /*
+                                                * If what we have left is only one char, and it's
                                                 * one of the characters matching "self", then
                                                 * return it as a character token the same way
                                                 * that the "self" rule would have.
@@ -355,8 +398,9 @@ other                       .
                                                        return yytext[0];
                                        }
 
+                                       /* Convert "!=" operator to "<>" for compatibility */
                                        if (strcmp((char*)yytext, "!=") == 0)
-                                               yylval.str = pstrdup("<>"); /* compatibility */
+                                               yylval.str = pstrdup("<>");
                                        else
                                                yylval.str = pstrdup((char*)yytext);
                                        return Op;
index 4f71f3e410526fccb728ce0a5d8ce4b52a1a0b1f..eadd50e94af520bdde96313ce3b4604b07294088 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright 2000 by PostgreSQL Global Development Group
  *
- * $Header: /cvsroot/pgsql/src/bin/psql/mainloop.c,v 1.25 2000/03/13 13:46:32 petere Exp $
+ * $Header: /cvsroot/pgsql/src/bin/psql/mainloop.c,v 1.26 2000/03/18 18:03:11 tgl Exp $
  */
 #include "postgres.h"
 #include "mainloop.h"
@@ -318,8 +318,7 @@ MainLoop(FILE *source)
                        }
 
                        /* single-line comment? truncate line */
-                       else if ((line[i] == '-' && line[i + thislen] == '-') ||
-                                        (line[i] == '/' && line[i + thislen] == '/'))
+                       else if (line[i] == '-' && line[i + thislen] == '-')
                        {
                                line[i] = '\0'; /* remove comment */
                                break;
index bcc8e6430e201c6c1829dc4893f6df9214ae375b..992b293085b3de09267af30a446fffe0f12355ba 100644 (file)
@@ -12,7 +12,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.55 2000/03/18 05:44:21 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/pgc.l,v 1.56 2000/03/18 18:03:10 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -86,10 +86,10 @@ static struct _if_value {
  * and to eliminate parsing troubles for numeric strings.
  * Exclusive states:
  *  <xb> binary numeric string - thomas 1997-11-16
- *  <xc> extended C-style comments - tgl 1997-07-12
- *  <xd> delimited identifiers (double-quoted identifiers) - tgl 1997-10-27
+ *  <xc> extended C-style comments - thomas 1997-07-12
+ *  <xd> delimited identifiers (double-quoted identifiers) - thomas 1997-10-27
  *  <xh> hexadecimal numeric string - thomas 1997-11-16
- *  <xq> quoted strings - tgl 1997-07-30
+ *  <xq> quoted strings - thomas 1997-07-30
  */
 
 %x xb
@@ -146,14 +146,16 @@ xdcqdq                    \\\"
 xdcother               [^"]
 xdcinside              ({xdcqq}|{xdcqdq}|{xdcother})
 
-/* C-Style Comments
+/* C-style comments
+ *
  * The "extended comment" syntax closely resembles allowable operator syntax.
  * The tricky part here is to get lex to recognize a string starting with
  * slash-star as a comment, when interpreting it as an operator would produce
- * a longer match --- remember lex will prefer a longer match!  Also, if we 
- * have tor whereas we want to see it as a + operator and a comment start.
+ * a longer match --- remember lex will prefer a longer match!  Also, if we
+ * have something like plus-slash-star, lex will think this is a 3-character
+ * operator whereas we want to see it as a + operator and a comment start.
  * The solution is two-fold:
- * 1. append {op_and_self}* to xcstart so that it matches as much text as
+ * 1. append {op_chars}* to xcstart so that it matches as much text as
  *    {operator} would. Then the tie-breaker (first matching rule of same
  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
  *    in case it contains a star-slash that should terminate the comment.
@@ -163,22 +165,31 @@ xdcinside         ({xdcqq}|{xdcqdq}|{xdcother})
  * SQL92-style comments, which start with dash-dash, have similar interactions
  * with the operator rule.
  */
-xcstart                 \/\*{op_and_self}*
+xcstart                        \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               ([^*]+)|(\*+[^/])
 
 digit                  [0-9]
 letter                 [\200-\377_A-Za-z]
-letter_or_digit                [\200-\377_A-Za-z0-9]
+letter_or_digit        [\200-\377_A-Za-z0-9]
 
 identifier             {letter}{letter_or_digit}*
 
 typecast               "::"
 
-/* NB: if you change "self", fix the copy in the operator rule too! */
+/*
+ * "self" is the set of chars that should be returned as single-character
+ * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
+ * which can be one or more characters long (but if a single-char token
+ * appears in the "self" set, it is not to be returned as an Op).  Note
+ * that the sets overlap, but each has some chars that are not in the other.
+ *
+ * If you change either set, adjust the character lists appearing in the
+ * rule for "operator"!
+ */
 self                   [,()\[\].;$\:\+\-\*\/\%\^\<\>\=\|]
-op_and_self            [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
-operator               {op_and_self}+
+op_chars               [\~\!\@\#\^\&\|\`\?\$\:\+\-\*\/\%\<\>\=]
+operator               {op_chars}+
 
 /* we no longer allow unary minus in numbers. 
  * instead we pass it separately to parser. there it gets
@@ -215,7 +226,7 @@ horiz_space         [ \t\f]
 newline                 [\n\r]
 non_newline            [^\n\r]
 
-comment                 (("--"|"//"){non_newline}*)
+comment         ("--"{non_newline}*)
 
 whitespace             ({space}|{comment})
 
@@ -250,7 +261,7 @@ cppline                     {space}*#(.*\\{line_end})*.*
 
 /* DO NOT PUT ANY COMMENTS IN THE FOLLOWING SECTION.
  * AT&T lex does not properly handle C-style comments in this second lex block.
- * So, put comments here. tgl - 1997-09-08
+ * So, put comments here. thomas - 1997-09-08
  *
  * Quoted strings must allow some special characters such as single-quote
  *  and newline.
@@ -294,15 +305,16 @@ cppline                   {space}*#(.*\\{line_end})*.*
                                                mmerror(ET_ERROR, "Bad binary integer input!");
                                        return ICONST;
                                }
-<xb><<EOF>>            { mmerror(ET_ERROR, "Unterminated binary integer"); }
  
 <xh>{xhinside} |
 <xb>{xbinside} {
                                        addlit(yytext, yyleng);
                                }
 <xh>{xhcat}            |
-<xb>{xbcat}            {               /* ignore */
+<xb>{xbcat}            {
+                                       /* ignore */
                                }
+<xb><<EOF>>            { mmerror(ET_ERROR, "Unterminated binary integer"); }
 
 <SQL>{xhstart}         {
                                        BEGIN(xh);
@@ -367,23 +379,57 @@ cppline                   {space}*#(.*\\{line_end})*.*
                                  return yytext[0];
                                }
 <SQL>{operator}                        {
-                                       /* Check for embedded slash-star or dash-dash */
-                                        char *slashstar = strstr((char*)yytext, "/*");
-                                        char *dashdash = strstr((char*)yytext, "--");
+                                       /*
+                                        * Check for embedded slash-star or dash-dash; those
+                                        * are comment starts, so operator must stop there.
+                                        * Note that slash-star or dash-dash at the first
+                                        * character will match a prior rule, not this one.
+                                        */
+                                       int             nchars = yyleng;
+                                       char   *slashstar = strstr((char*)yytext, "/*");
+                                       char   *dashdash = strstr((char*)yytext, "--");
 
                                        if (slashstar && dashdash)
                                        {
+                                               /* if both appear, take the first one */
                                                if (slashstar > dashdash)
                                                        slashstar = dashdash;
                                        }
                                        else if (!slashstar)
                                                slashstar = dashdash;
-
                                        if (slashstar)
+                                               nchars = slashstar - ((char*)yytext);
+
+                                       /*
+                                        * For SQL92 compatibility, '+' and '-' cannot be the
+                                        * last char of a multi-char operator unless the operator
+                                        * contains chars that are not in SQL92 operators.
+                                        * The idea is to lex '=-' as two operators, but not
+                                        * to forbid operator names like '?-' that could not be
+                                        * sequences of SQL92 operators.
+                                        */
+                                       while (nchars > 1 &&
+                                                  (yytext[nchars-1] == '+' ||
+                                                       yytext[nchars-1] == '-'))
+                                       {
+                                               int             ic;
+
+                                               for (ic = nchars-2; ic >= 0; ic--)
+                                               {
+                                                       if (strchr("~!@#&`?$:%^|", yytext[ic]))
+                                                               break;
+                                               }
+                                               if (ic >= 0)
+                                                       break; /* found a char that makes it OK */
+                                               nchars--; /* else remove the +/-, and check again */
+                                       }
+
+                                       if (nchars < yyleng)
                                        {
-                                               int nchars = slashstar - ((char*)yytext);
+                                               /* Strip the unwanted chars from the token */
                                                yyless(nchars);
-                                               /* If what we have left is only one char, and it's
+                                               /*
+                                                * If what we have left is only one char, and it's
                                                 * one of the characters matching "self", then
                                                 * return it as a character token the same way
                                                 * that the "self" rule would have.
@@ -393,8 +439,9 @@ cppline                     {space}*#(.*\\{line_end})*.*
                                                        return yytext[0];
                                        }
 
+                                       /* Convert "!=" operator to "<>" for compatibility */
                                        if (strcmp((char*)yytext, "!=") == 0)
-                                               yylval.str = mm_strdup("<>"); /* compatability */
+                                               yylval.str = mm_strdup("<>");
                                        else
                                                yylval.str = mm_strdup((char*)yytext);
                                        return Op;