]> git.ipfire.org Git - thirdparty/postgresql.git/commitdiff
Teach plpgsql's lexer about dollar-quoted literals.
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 25 Feb 2004 18:10:51 +0000 (18:10 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 25 Feb 2004 18:10:51 +0000 (18:10 +0000)
Andrew Dunstan, some help from Tom Lane.

src/pl/plpgsql/src/gram.y
src/pl/plpgsql/src/pl_exec.c
src/pl/plpgsql/src/plpgsql.h
src/pl/plpgsql/src/scan.l

index 45c50d088b1a3ef8d89046a7df2910fefdf38d02..b6526ce47bcaa72cfc858ca12410a8d24f63c5f3 100644 (file)
@@ -4,7 +4,7 @@
  *                                               procedural language
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.50 2003/12/23 00:01:57 tgl Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.51 2004/02/25 18:10:51 tgl Exp $
  *
  *       This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -1235,7 +1235,7 @@ stmt_raise                : K_RAISE lno raise_level raise_msg raise_params ';'
 
 raise_msg              : T_STRING
                                        {
-                                               $$ = strdup(yytext);
+                                               $$ = plpgsql_get_string_value();
                                        }
                                ;
 
index 8c104f63637fbf3982e59581005880542c118bd3..0c409c0e64e74e3b0c818b6c76453fda9b4b4520 100644 (file)
@@ -3,7 +3,7 @@
  *                       procedural language
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.96 2004/02/24 01:44:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_exec.c,v 1.97 2004/02/25 18:10:51 tgl Exp $
  *
  *       This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -1805,7 +1805,7 @@ exec_stmt_raise(PLpgSQL_execstate * estate, PLpgSQL_stmt_raise * stmt)
        for (cp = stmt->message; *cp; cp++)
        {
                /*
-                * Occurences of a single % are replaced by the next argument's
+                * Occurrences of a single % are replaced by the next argument's
                 * external representation. Double %'s are converted to one %.
                 */
                if ((c[0] = *cp) == '%')
@@ -1834,21 +1834,6 @@ exec_stmt_raise(PLpgSQL_execstate * estate, PLpgSQL_stmt_raise * stmt)
                        continue;
                }
 
-               /*
-                * Occurrences of single ' are removed. double ' are reduced to
-                * single ones.  We must do this because the parameter stored by
-                * the grammar is the raw T_STRING input literal, rather than the
-                * de-lexed string as you might expect ...
-                */
-               if (*cp == '\'')
-               {
-                       cp++;
-                       if (*cp == '\'')
-                               plpgsql_dstring_append(&ds, c);
-                       else
-                               cp--;
-                       continue;
-               }
                plpgsql_dstring_append(&ds, c);
        }
 
index 5c48018c42af8b4eeaddff04cc0faef1426f0340..911e331adf2175c9538f061ae277a04b752613f3 100644 (file)
@@ -3,7 +3,7 @@
  *                       procedural language
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.43 2003/11/29 19:52:12 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.44 2004/02/25 18:10:51 tgl Exp $
  *
  *       This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -694,5 +694,6 @@ extern void plpgsql_push_back_token(int token);
 extern int     plpgsql_scanner_lineno(void);
 extern void plpgsql_scanner_init(const char *str, int functype);
 extern void plpgsql_scanner_finish(void);
+extern char *plpgsql_get_string_value(void);
 
 #endif   /* PLPGSQL_H */
index b891e2b9e185f319533b8d80553f5acbd12375ee..de447e09f1b711f89a0b7ae26f525a69784ad4cd 100644 (file)
@@ -4,7 +4,7 @@
  *                       procedural language
  *
  * IDENTIFICATION
- *    $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.31 2004/02/24 22:06:32 tgl Exp $
+ *    $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.32 2004/02/25 18:10:51 tgl Exp $
  *
  *    This software is copyrighted by Jan Wieck - Hamburg.
  *
@@ -57,6 +57,8 @@ static int    lookahead_token;
 static bool have_lookahead_token;
 static const char *cur_line_start;
 static int     cur_line_num;
+static char    *dolqstart;      /* current $foo$ quote start string */
+static int     dolqlen;                        /* signal to plpgsql_get_string_value */
 
 int    plpgsql_SpaceScanned = 0;
 %}
@@ -70,7 +72,9 @@ int   plpgsql_SpaceScanned = 0;
 %option case-insensitive
 
 
-%x     IN_STRING IN_COMMENT
+%x     IN_STRING
+%x     IN_COMMENT
+%x     IN_DOLLARQUOTE
 
 digit                  [0-9]
 ident_start            [A-Za-z\200-\377_]
@@ -84,6 +88,14 @@ param                        \${digit}+
 
 space                  [ \t\n\r\f]
 
+/* $foo$ style quotes ("dollar quoting")
+ * copied straight from the backend SQL parser
+ */
+dolq_start             [A-Za-z\200-\377_]
+dolq_cont              [A-Za-z\200-\377_0-9]
+dolqdelim              \$({dolq_start}{dolq_cont}*)?\$
+dolqinside             [^$]+
+
 %%
     /* ----------
      * Local variables in scanner to remember where
@@ -97,7 +109,7 @@ space                        [ \t\n\r\f]
      * Reset the state when entering the scanner
      * ----------
      */
-    BEGIN INITIAL;
+    BEGIN(INITIAL);
     plpgsql_SpaceScanned = 0;
 
     /* ----------
@@ -247,9 +259,9 @@ dump                        { return O_DUMP;                        }
 --[^\r\n]*             ;
 
 \/\*                   { start_lineno = plpgsql_scanner_lineno();
-                         BEGIN IN_COMMENT;
+                         BEGIN(IN_COMMENT);
                        }
-<IN_COMMENT>\*\/       { BEGIN INITIAL; plpgsql_SpaceScanned = 1; }
+<IN_COMMENT>\*\/       { BEGIN(INITIAL); plpgsql_SpaceScanned = 1; }
 <IN_COMMENT>\n         ;
 <IN_COMMENT>.          ;
 <IN_COMMENT><<EOF>>    {
@@ -260,7 +272,7 @@ dump                        { return O_DUMP;                        }
                        }
 
     /* ----------
-     * Collect anything inside of ''s and return one STRING
+     * Collect anything inside of ''s and return one STRING token
         *
         * Hacking yytext/yyleng here lets us avoid using yymore(), which is
         * a win for performance.  It's safe because we know the underlying
@@ -270,15 +282,18 @@ dump                      { return O_DUMP;                        }
 '                      {
                          start_lineno = plpgsql_scanner_lineno();
                          start_charpos = yytext;
-                         BEGIN IN_STRING;
+                         BEGIN(IN_STRING);
                        }
 <IN_STRING>\\.         { }
 <IN_STRING>\\          { /* can only happen with \ at EOF */ }
 <IN_STRING>''          { }
 <IN_STRING>'           {
-                         yyleng -= (yytext - start_charpos);
+                         /* tell plpgsql_get_string_value it's not a dollar quote */
+                         dolqlen = 0;
+                         /* adjust yytext/yyleng to describe whole string token */
+                         yyleng += (yytext - start_charpos);
                          yytext = start_charpos;
-                         BEGIN INITIAL;
+                         BEGIN(INITIAL);
                          return T_STRING;
                        }
 <IN_STRING>[^'\\]+     { }
@@ -289,6 +304,43 @@ dump                       { return O_DUMP;                        }
                                                 errmsg("unterminated string")));
                        }
 
+{dolqdelim}            {
+                         start_lineno = plpgsql_scanner_lineno();
+                         start_charpos = yytext;
+                         dolqstart = pstrdup(yytext);
+                         BEGIN(IN_DOLLARQUOTE);
+                       }
+<IN_DOLLARQUOTE>{dolqdelim} {
+                         if (strcmp(yytext, dolqstart) == 0)
+                         {
+                                       pfree(dolqstart);
+                                       /* tell plpgsql_get_string_value it is a dollar quote */
+                                       dolqlen = yyleng;
+                                       /* adjust yytext/yyleng to describe whole string token */
+                                       yyleng += (yytext - start_charpos);
+                                       yytext = start_charpos;
+                                       BEGIN(INITIAL);
+                                       return T_STRING;
+                         }
+                         else
+                         {
+                                       /*
+                                        * When we fail to match $...$ to dolqstart, transfer
+                                        * the $... part to the output, but put back the final
+                                        * $ for rescanning.  Consider $delim$...$junk$delim$
+                                        */
+                                       yyless(yyleng-1);
+                         }
+                       }
+<IN_DOLLARQUOTE>{dolqinside} { }
+<IN_DOLLARQUOTE>.      { /* needed for $ inside the quoted text */ }
+<IN_DOLLARQUOTE><<EOF>>        { 
+                               plpgsql_error_lineno = start_lineno;
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_DATATYPE_MISMATCH),
+                                                errmsg("unterminated dollar-quoted string")));
+                       }
+
     /* ----------
      * Any unmatched character is returned as is
      * ----------
@@ -429,7 +481,6 @@ plpgsql_scanner_init(const char *str, int functype)
        BEGIN(INITIAL);
 }
 
-
 /*
  * Called after parsing is done to clean up after plpgsql_scanner_init()
  */
@@ -439,3 +490,54 @@ plpgsql_scanner_finish(void)
        yy_delete_buffer(scanbufhandle);
        pfree(scanbuf);
 }
+
+/*
+ * Called after a T_STRING token is read to get the string literal's value
+ * as a malloc'd string.  (We make this a separate call because in many
+ * scenarios there's no need to get the decoded value.)
+ *
+ * Note: we expect the literal to be the most recently lexed token.  This
+ * would not work well if we supported multiple-token pushback or if 
+ * plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
+ */
+char *
+plpgsql_get_string_value(void)
+{
+       char       *result;
+       const char *cp;
+       int                     len;
+
+       if (dolqlen > 0)
+       {
+               /* Token is a $foo$...$foo$ string */
+               len = yyleng - 2 * dolqlen;
+               Assert(len >= 0);
+               result = (char *) malloc(len + 1);
+               memcpy(result, yytext + dolqlen, len);
+               result[len] = '\0';
+       }
+       else
+       {
+               /* Token is a '...' string */
+               result = (char *) malloc(yyleng + 1);   /* more than enough room */
+               len = 0;
+               for (cp = yytext; *cp; cp++)
+               {
+                       if (*cp == '\'')
+                       {
+                               if (cp[1] == '\'')
+                                       result[len++] = *cp++;
+                               /* else it must be string start or end quote */
+                       }
+                       else if (*cp == '\\')
+                       {
+                               if (cp[1] != '\0')      /* just a paranoid check */
+                                       result[len++] = *(++cp);
+                       }
+                       else
+                               result[len++] = *cp;
+               }
+               result[len] = '\0';
+       }
+       return result;
+}