From 19a64f101ded20b692ed919aafe0fb01027eef89 Mon Sep 17 00:00:00 2001
From: Bruno Haible <bruno@clisp.org>
Date: Sun, 15 Jun 2003 11:42:18 +0000
Subject: [PATCH] Clean up and fix a few small bugs.

---
 gettext-tools/src/ChangeLog |  31 +++
 gettext-tools/src/x-perl.c  | 471 ++++++++++++++++++------------------
 2 files changed, 273 insertions(+), 229 deletions(-)

diff --git a/gettext-tools/src/ChangeLog b/gettext-tools/src/ChangeLog
index f237a9ab8..66c3fcd0d 100644
--- a/gettext-tools/src/ChangeLog
+++ b/gettext-tools/src/ChangeLog
@@ -1,3 +1,34 @@
+2003-06-14  Bruno Haible  <bruno@clisp.org>
+
+	* x-perl.c (enum token_type_ty): Reorder, so as to simplify the
+	function free_token().
+	(struct token_ty): Fix comments.
+	(token2string): Add const.
+	(token_stack): Make static.
+	(*_debug, stack_dump): Remove all fflushs.
+	(linebuf, linesize, linepos, linebuf_size, last_token, here_eaten,
+	end_of_file): Make static.
+	(last_string, last_string_finished): Remove unused variables.
+	(phase1_getc, get_here_document, skip_pod): Test getline() result
+	for being < 0, not == EOF.
+	(phase1_ungetc): Signal internal error through abort(), not exit().
+	(get_here_document): Tweak. Increase bufmax proportionally.
+	(phase2_getc): Increase bufmax proportionally.
+	(prefer_division_over_regexp): Fix comment.
+	(extract_hex, extract_oct): Add const. Simplify.
+	(extract_quotelike): Add a safety check.
+	(extract_quotelike_pass1): Increase bufmax proportionally. Fix
+	insufficient memory allocation at "bufpos + len >= bufmax".
+	(extract_quotelike_pass3): Increase bufmax proportionally. Make crs
+	const. After \x{ABC} position crs after the closing brace. Simplify \c
+	code. Reduce memory allocation when \N{ABC} is seen. Fix bug in \l and
+	\u. Fix error message when $ or @ is seen.
+	(extract_variable): Increase bufmax proportionally.
+	(interpolate_keywords): Likewise. Convert c's value to unsigned char.
+	(x_perl_prelex): Increase bufmax proportionally. Compute 4th argument
+	to extract_triple_quotelike completely.
+	(extract_perl): Initialize token_stack correctly.
+
 2003-06-13  Bruno Haible  <bruno@clisp.org>
 
 	* msgexec.c (process_string): Pass null_stderr=false.
diff --git a/gettext-tools/src/x-perl.c b/gettext-tools/src/x-perl.c
index 781cfafe9..75fe7e8a0 100644
--- a/gettext-tools/src/x-perl.c
+++ b/gettext-tools/src/x-perl.c
@@ -41,7 +41,7 @@
 #define _(s) gettext(s)
 
 /* The Perl syntax is defined in perlsyn.pod.  Try the command
-   "perldoc perlsyn".  */
+   "man perlsyn" or "perldoc perlsyn".  */
 
 #define DEBUG_PERL 0
 #define DEBUG_MEMORY 0
@@ -65,8 +65,8 @@ enum token_type_ty
   token_type_named_op,          /* if, unless, while, ... */
   token_type_variable,          /* $... */
   token_type_symbol,		/* symbol, number */
-  token_type_regex_op,		/* s, tr, y, m.  */
   token_type_keyword_symbol,    /* keyword symbol (used by parser) */
+  token_type_regex_op,		/* s, tr, y, m.  */
   token_type_dot,               /* . */
   token_type_other		/* regexp, misc. operator */
 };
@@ -88,14 +88,16 @@ typedef struct token_ty token_ty;
 struct token_ty
 {
   token_type_ty type;
-  string_type_ty string_type;
-  char *string;		/* for token_type_{symbol,string} */
+  string_type_ty string_type;	/* for token_type_string */
+  char *string;			/* for token_type_named_op, token_type_string,
+				   token_type_symbol, token_type_keyword_symbol,
+				   token_type_variable */
   int line_number;
 };
 
 #if DEBUG_PERL
 static const char *
-token2string (token_ty *token)
+token2string (const token_ty *token)
 {
   switch (token->type)
     {
@@ -129,10 +131,10 @@ token2string (token_ty *token)
       return "token_type_variable";
     case token_type_symbol:
       return "token_type_symbol";
-    case token_type_regex_op:
-      return "token_type_regex_op";
     case token_type_keyword_symbol:
       return "token_type_keyword_symbol";
+    case token_type_regex_op:
+      return "token_type_regex_op";
     case token_type_dot:
       return "token_type_dot";
     case token_type_other:
@@ -157,10 +159,9 @@ struct stack
   struct stack_entry *last;
 };
 
-struct stack *token_stack;
+static struct stack *token_stack;
 
-/* Prototypes for local functions.  Needed to ensure compiler checking of
-   function argument counts despite of K&R C function definition syntax.  */
+/* Forward declaration of local functions.  */
 static void interpolate_keywords (message_list_ty *mlp, const char *string);
 static char *extract_quotelike_pass1 (int delim);
 static token_ty *x_perl_lex (message_list_ty *mlp);
@@ -174,9 +175,9 @@ remember_a_message_debug (message_list_ty *mlp, char *string, lex_pos_ty *pos)
 {
   void *retval;
 
-  fprintf (stderr, "*** remember_a_message (%p): ", string); fflush (stderr);
+  fprintf (stderr, "*** remember_a_message (%p): ", string);
   retval = remember_a_message (mlp, string, pos);
-  fprintf (stderr, "%p\n", retval); fflush (stderr);
+  fprintf (stderr, "%p\n", retval);
   return retval;
 }
 
@@ -184,9 +185,8 @@ static void
 remember_a_message_plural_debug (message_ty *mp, char *string, lex_pos_ty *pos)
 {
   fprintf (stderr, "*** remember_a_message_plural (%p, %p): ", mp, string);
-  fflush (stderr);
   remember_a_message_plural (mp, string, pos);
-  fprintf (stderr, "done\n"); fflush (stderr);
+  fprintf (stderr, "done\n");
 }
 
 static void *
@@ -194,9 +194,9 @@ xmalloc_debug (size_t bytes)
 {
   void *retval;
 
-  fprintf (stderr, "*** xmalloc (%u): ", bytes); fflush (stderr);
+  fprintf (stderr, "*** xmalloc (%u): ", bytes);
   retval = xmalloc (bytes);
-  fprintf (stderr, "%p\n", retval); fflush (stderr);
+  fprintf (stderr, "%p\n", retval);
   return retval;
 }
 
@@ -205,9 +205,9 @@ xrealloc_debug (void *buf, size_t bytes)
 {
   void *retval;
 
-  fprintf (stderr, "*** xrealloc (%p, %u): ", buf, bytes); fflush (stderr);
+  fprintf (stderr, "*** xrealloc (%p, %u): ", buf, bytes);
   retval = xrealloc (buf, bytes);
-  fprintf (stderr, "%p\n", retval); fflush (stderr);
+  fprintf (stderr, "%p\n", retval);
   return retval;
 }
 
@@ -217,9 +217,8 @@ xrealloc_static_debug (void *buf, size_t bytes)
   void *retval;
 
   fprintf (stderr, "*** xrealloc_static (%p, %u): ", buf, bytes);
-  fflush (stderr);
   retval = xrealloc (buf, bytes);
-  fprintf (stderr, "%p\n", retval); fflush (stderr);
+  fprintf (stderr, "%p\n", retval);
   return retval;
 }
 
@@ -229,18 +228,17 @@ xstrdup_debug (const char *string)
   char *retval;
 
   fprintf (stderr, "*** xstrdup (%p, %d): ", string, strlen (string));
-  fflush (stderr);
   retval = xstrdup (string);
-  fprintf (stderr, "%p\n", retval); fflush (stderr);
+  fprintf (stderr, "%p\n", retval);
   return retval;
 }
 
 static void
 free_debug (void *buf)
 {
-  fprintf (stderr, "*** free (%p): ", buf); fflush (stderr);
+  fprintf (stderr, "*** free (%p): ", buf);
   free (buf);
-  fprintf (stderr, "done\n"); fflush (stderr);
+  fprintf (stderr, "done\n");
 }
 
 # define xmalloc(b) xmalloc_debug (b)
@@ -274,7 +272,6 @@ stack_dump (struct stack *stack)
 	{
 	  token_ty *token = (token_ty *) last->data;
 	  fprintf (stderr, "  [%s]\n", token2string (token));
-	  fflush (stderr);
 	  switch (token->type)
 	    {
 	    case token_type_named_op:
@@ -283,7 +280,6 @@ stack_dump (struct stack *stack)
 	    case token_type_keyword_symbol:
 	    case token_type_variable:
 	      fprintf (stderr, "    string: %s\n", token->string);
-	      fflush (stderr);
 	      break;
 	    }
 	}
@@ -295,9 +291,8 @@ stack_dump (struct stack *stack)
 #endif
 
 /* Unshifts the pointer DATA onto the stack STACK.  The argument DESTROY
- * is a pointer to a function that frees the resources associated with
- * DATA or NULL (no destructor).
- */
+   is a pointer to a function that frees the resources associated with
+   DATA or NULL (no destructor).  */
 static void
 stack_unshift (struct stack *stack, void *data, void (*destroy) (token_ty *data))
 {
@@ -317,8 +312,7 @@ stack_unshift (struct stack *stack, void *data, void (*destroy) (token_ty *data)
 }
 
 /* Shifts the first element from the stack STACK and returns its contents or
- * NULL if the stack is empty.
- */
+   NULL if the stack is empty.  */
 static void *
 stack_shift (struct stack *stack)
 {
@@ -341,8 +335,7 @@ stack_shift (struct stack *stack)
 }
 
 /* Return the bottom of the stack without removing it from the stack or
- * NULL if the stack is empty.
- */
+   NULL if the stack is empty.  */
 static void *
 stack_head (struct stack *stack)
 {
@@ -458,34 +451,30 @@ static int last_comment_line;
 static int last_non_comment_line;
 
 /* The current line buffer.  */
-char *linebuf;
+static char *linebuf;
 
 /* The size of the current line.  */
-int linesize;
+static int linesize;
 
 /* The position in the current line.  */
-int linepos;
+static int linepos;
 
 /* The size of the input buffer.  */
-size_t linebuf_size;
+static size_t linebuf_size;
 
 /* The last token seen in the token stream.  This is important for the
    interpretation of '?' and '/'.  */
-token_type_ty last_token;
-
-/* The last string token waiting for a dot operator or finishing.  */
-token_ty last_string;
-
-/* True if LAST_STRING is finished.  */
-bool last_string_finished;
+static token_type_ty last_token;
 
 /* Number of lines eaten for here documents.  */
-int here_eaten;
+static int here_eaten;
 
 /* Paranoia: EOF marker for __END__ or __DATA__.  */
-bool end_of_file;
+static bool end_of_file;
+
 
 /* 1. line_number handling.  */
+
 /* Returns the next character from the input stream or EOF.  */
 static int
 phase1_getc ()
@@ -500,7 +489,7 @@ phase1_getc ()
     {
       linesize = getline (&linebuf, &linebuf_size, fp);
 
-      if (linesize == EOF)
+      if (linesize < 0)
 	{
 	  if (ferror (fp))
 	    error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
@@ -535,25 +524,30 @@ phase1_ungetc (int c)
   if (c != EOF)
     {
       if (linepos == 0)
-	error (EXIT_FAILURE, 0, _("\
-%s:%d: internal error: attempt to ungetc across line boundary"),
-	       real_file_name, line_number);
+	/* Attempt to ungetc across line boundary.  Shouldn't happen.
+	   No two phase1_ungetc calls are permitted in a row.  */
+	abort ();
 
       --linepos;
     }
 }
 
+/* Read a here document and return its contents.  */
+
 static char *
 get_here_document (const char *delimiter)
 {
+  /* Accumulator for the entire here document, including a NUL byte
+     at the end.  */
   static char *buffer;
   static size_t bufmax = 0;
   size_t bufpos = 0;
+  /* Current line being appended.  */
   static char *my_linebuf = NULL;
   static size_t my_linebuf_size = 0;
-  bool chomp = false;
 
-  if (bufpos >= bufmax)
+  /* Allocate the initial buffer.  Later on, bufmax > 0.  */
+  if (bufmax == 0)
     {
       buffer = xrealloc_static (NULL, 1);
       buffer[0] = '\0';
@@ -563,8 +557,9 @@ get_here_document (const char *delimiter)
   for (;;)
     {
       int read_bytes = getline (&my_linebuf, &my_linebuf_size, fp);
+      bool chomp;
 
-      if (read_bytes == EOF)
+      if (read_bytes < 0)
 	{
 	  if (ferror (fp))
 	    {
@@ -575,12 +570,11 @@ get_here_document (const char *delimiter)
 	    {
 	      error_with_progname = false;
 	      error (EXIT_SUCCESS, 0, _("\
-%s:%d: can\'t find string terminator \"%s\" anywhere before EOF"),
+%s:%d: can't find string terminator \"%s\" anywhere before EOF"),
 		     real_file_name, line_number, delimiter);
 	      error_with_progname = true;
-	      fflush (stderr);
 
-	      return xstrdup (buffer);
+	      break;
 	    }
 	}
 
@@ -599,28 +593,37 @@ get_here_document (const char *delimiter)
 	  --read_bytes;
 	}
 
-      if (read_bytes && my_linebuf[read_bytes - 1] == '\n')
+      /* Temporarily remove the trailing newline from my_linebuf.  */
+      chomp = false;
+      if (read_bytes >= 1 && my_linebuf[read_bytes - 1] == '\n')
 	{
 	  chomp = true;
 	  my_linebuf[read_bytes - 1] = '\0';
 	}
+
+      /* See whether this line terminates the here document.  */
       if (strcmp (my_linebuf, delimiter) == 0)
-	{
-	  return xstrdup (buffer);
-	}
+	break;
+
+      /* Add back the trailing newline to my_linebuf.  */
       if (chomp)
-	{
-	  my_linebuf[read_bytes - 1] = '\n';
-	}
+	my_linebuf[read_bytes - 1] = '\n';
 
-      if (bufpos + read_bytes + 1 >= bufmax)
+      /* Ensure room for read_bytes + 1 bytes.  */
+      if (bufpos + read_bytes >= bufmax)
 	{
-	  bufmax += read_bytes + 1;
+	  do
+	    bufmax = 2 * bufmax + 10;
+	  while (bufpos + read_bytes >= bufmax);
 	  buffer = xrealloc_static (buffer, bufmax);
 	}
+      /* Append this line to the accumulator.  */
       strcpy (buffer + bufpos, my_linebuf);
       bufpos += read_bytes;
     }
+
+  /* Done accumulating the here document.  */
+  return xstrdup (buffer);
 }
 
 /* Skips pod sections.  */
@@ -635,7 +638,7 @@ skip_pod ()
     {
       linesize = getline (&linebuf, &linebuf_size, fp);
 
-      if (linesize == EOF)
+      if (linesize < 0)
 	{
 	  if (ferror (fp))
 	    error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
@@ -654,6 +657,7 @@ skip_pod ()
     }
 }
 
+
 /* 2. Replace each comment that is not inside a string literal or regular
    expression with a newline character.  We need to remember the comment
    for later, because it may be attached to a keyword string.  */
@@ -691,14 +695,14 @@ phase2_getc ()
 	    break;
 	  if (buflen >= bufmax)
 	    {
-	      bufmax += 100;
+	      bufmax = 2 * bufmax + 10;
 	      buffer = xrealloc_static (buffer, bufmax);
 	    }
 	  buffer[buflen++] = c;
 	}
       if (buflen >= bufmax)
 	{
-	  bufmax += 100;
+	  bufmax = 2 * bufmax + 10;
 	  buffer = xrealloc_static (buffer, bufmax);
 	}
       buffer[buflen] = '\0';
@@ -717,13 +721,10 @@ phase2_ungetc (int c)
 
 /* There is an ambiguity about '/': It can start a division operator ('/' or
    '/=') or it can start a regular expression.  The distinction is important
-   because inside regular expressions, '#' and '"' lose its special meanings.
-   If you look at the awk grammar, you see that the operator is only allowed
-   right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
-   can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
-   So we prefer the division operator interpretation only right after
-   symbol, string, number, ')', ']', with whitespace but no newline allowed
-   in between.  */
+   because inside regular expressions, '#' loses its special meaning.
+   The distinction is possible depending on the parsing state: After a
+   variable or simple expression, it's a division operator; at the beginning
+   of an expression, it's a regexp.  */
 static bool prefer_division_over_regexp;
 
 /* Free the memory pointed to by a 'struct token_ty'.  */
@@ -746,10 +747,10 @@ free_token (token_ty *tp)
 }
 
 /* Extract an unsigned hexadecimal number from STRING, considering at
-   most LEN bytes and place the result in RESULT.  Returns a pointer
+   most LEN bytes and place the result in *RESULT.  Returns a pointer
    to the first character past the hexadecimal number.  */
-static char *
-extract_hex (char *string, size_t len, unsigned int *result)
+static const char *
+extract_hex (const char *string, size_t len, unsigned int *result)
 {
   size_t i;
 
@@ -757,14 +758,15 @@ extract_hex (char *string, size_t len, unsigned int *result)
 
   for (i = 0; i < len; i++)
     {
+      char c = string[i];
       int number;
 
-      if (string[i] >= 'A' && string[i] <= 'F')
-	number = 10 + string[i] - 'A';
-      else if (string[i] >= 'a' && string[i] <= 'f')
-	number = 10 + string[i] - 'a';
-      else if (string[i] >= '0' && string[i] <= '9')
-	number = string[i] - '0';
+      if (c >= 'A' && c <= 'F')
+	number = c - 'A' + 10;
+      else if (c >= 'a' && c <= 'f')
+	number = c - 'a' + 10;
+      else if (c >= '0' && c <= '9')
+	number = c - '0';
       else
 	break;
 
@@ -776,10 +778,10 @@ extract_hex (char *string, size_t len, unsigned int *result)
 }
 
 /* Extract an unsigned octal number from STRING, considering at
-   most LEN bytes and place the result in RESULT.  Returns a pointer
-   to the first character past the hexadecimal number.  */
-static char *
-extract_oct (char *string, size_t len, unsigned int *result)
+   most LEN bytes and place the result in *RESULT.  Returns a pointer
+   to the first character past the octal number.  */
+static const char *
+extract_oct (const char *string, size_t len, unsigned int *result)
 {
   size_t i;
 
@@ -787,10 +789,11 @@ extract_oct (char *string, size_t len, unsigned int *result)
 
   for (i = 0; i < len; i++)
     {
+      char c = string[i];
       int number;
 
-      if (string[i] >= '0' && string[i] <= '7')
-	number = string[i] - '0';
+      if (c >= '0' && c <= '7')
+	number = c - '0';
       else
 	break;
 
@@ -802,40 +805,47 @@ extract_oct (char *string, size_t len, unsigned int *result)
 }
 
 /* Extract the various quotelike constructs except for <<EOF.  See the
-   section "Gory details of parsing quoted constructs" in perlop.pod.  */
+   section "Gory details of parsing quoted constructs" in perlop.pod.
+   Return the resulting token in *tp; tp->type == token_type_string.  */
 static void
 extract_quotelike (token_ty *tp, int delim)
 {
   char *string = extract_quotelike_pass1 (delim);
-  tp->type = token_type_string;
+  size_t len = strlen (string);
 
-  string[strlen (string) - 1] = '\0';
+  tp->type = token_type_string;
+  /* Take the string without the delimiters at the start and at the end.  */
+  if (!(len >= 2))
+    abort ();
+  string[len - 1] = '\0';
   tp->string = xstrdup (string + 1);
   free (string);
-  return;
 }
 
 /* Extract the quotelike constructs with double delimiters, like
    s/[SEARCH]/[REPLACE]/.  This function does not eat up trailing
-   modifiers (left to the caller).  */
+   modifiers (left to the caller).
+   Return the resulting token in *tp; tp->type == token_type_regex_op.  */
 static void
 extract_triple_quotelike (message_list_ty *mlp, token_ty *tp, int delim,
 			  bool interpolate)
 {
-  char *string = extract_quotelike_pass1 (delim);
+  char *string;
 
   tp->type = token_type_regex_op;
-  if (interpolate && !extract_all && delim != '\'')
-    interpolate_keywords (mlp, string);
 
+  string = extract_quotelike_pass1 (delim);
+  if (interpolate)
+    interpolate_keywords (mlp, string);
   free (string);
 
   if (delim == '(' || delim == '<' || delim == '{' || delim == '[')
     {
-      /* Things can change.  */
+      /* The delimiter for the second string can be different, e.g.
+	 s{SEARCH}{REPLACE} or s{SEARCH}/REPLACE/.  See "man perlrequick".  */
       delim = phase1_getc ();
-      while (delim == ' ' || delim == '\t' || delim == '\r'
-	     || delim == '\n' || delim  == '\f')
+      while (delim == ' ' || delim == '\t' || delim == '\r' || delim == '\n'
+	     || delim  == '\f')
 	{
 	  /* The hash-sign is not a valid delimiter after whitespace, ergo
 	     use phase2_getc() and not phase1_getc() now.  */
@@ -843,22 +853,23 @@ extract_triple_quotelike (message_list_ty *mlp, token_ty *tp, int delim,
 	}
     }
   string = extract_quotelike_pass1 (delim);
-  if (interpolate && !extract_all && delim != '\'')
+  if (interpolate)
     interpolate_keywords (mlp, string);
   free (string);
-
-  return;
 }
 
 /* Pass 1 of extracting quotes: Find the end of the string, regardless
-   of the semantics of the construct.  */
+   of the semantics of the construct.  Return the complete string,
+   including the starting and the trailing delimiter, with backslashes
+   removed where appropriate.  */
 static char *
 extract_quotelike_pass1 (int delim)
 {
   /* This function is called recursively.  No way to allocate stuff
-     statically.  Consider using alloca() instead.  */
-  char *buffer = (char *) xmalloc (100);
-  int bufmax = 100;
+     statically.  Also alloca() is inappropriate due to limited stack
+     size on some platforms.  So we use malloc().  */
+  int bufmax = 10;
+  char *buffer = (char *) xmalloc (bufmax);
   int bufpos = 0;
   bool nested = true;
   int counter_delim;
@@ -880,7 +891,7 @@ extract_quotelike_pass1 (int delim)
     case '<':
       counter_delim = '>';
       break;
-    default:
+    default: /* "..." or '...' or |...| etc. */
       nested = false;
       counter_delim = delim;
       break;
@@ -890,9 +901,10 @@ extract_quotelike_pass1 (int delim)
     {
       int c = phase1_getc ();
 
-      if (bufpos >= bufmax - 1)
+      /* This round can produce 1 or 2 bytes.  Ensure room for 2 bytes.  */
+      if (bufpos + 2 > bufmax)
 	{
-	  bufmax += 100;
+	  bufmax = 2 * bufmax + 10;
 	  buffer = xrealloc (buffer, bufmax);
 	}
 
@@ -913,18 +925,19 @@ extract_quotelike_pass1 (int delim)
 	  char *inner = extract_quotelike_pass1 (delim);
 	  size_t len = strlen (inner);
 
+	  /* Ensure room for len + 1 bytes.  */
 	  if (bufpos + len >= bufmax)
 	    {
-	      bufmax += len;
+	      do
+		bufmax = 2 * bufmax + 10;
+	      while (bufpos + len >= bufmax);
 	      buffer = xrealloc (buffer, bufmax);
 	    }
 	  strcpy (buffer + bufpos, inner);
 	  free (inner);
 	  bufpos += len;
-	  continue;
 	}
-
-      if (c == '\\')
+      else if (c == '\\')
 	{
 	  c = phase1_getc ();
 	  if (c == '\\')
@@ -950,7 +963,9 @@ extract_quotelike_pass1 (int delim)
     }
 }
 
-/* Perform pass 3 of quotelike extraction (interpolation).  */
+/* Perform pass 3 of quotelike extraction (interpolation).
+   *tp is a token of type token_type_string.
+   This function replaces tp->string.  */
 /* FIXME: Currently may writes null-bytes into the string.  */
 static void
 extract_quotelike_pass3 (token_ty *tp, int error_level)
@@ -958,12 +973,10 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
   static char *buffer;
   static int bufmax = 0;
   int bufpos = 0;
-  char *string = tp->string;
-  unsigned char *crs = string;
-
-  bool uppercase = false;
-  bool lowercase = false;
-  bool quotemeta = false;
+  const char *crs;
+  bool uppercase;
+  bool lowercase;
+  bool quotemeta;
 
 #if DEBUG_PERL
   switch (tp->string_type)
@@ -989,11 +1002,17 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
   if (tp->string_type == string_type_verbatim)
     return;
 
+  /* Loop over tp->string, accumulating the expansion in buffer.  */
+  crs = tp->string;
+  uppercase = false;
+  lowercase = false;
+  quotemeta = false;
   while (*crs)
     {
-      if (bufpos >= bufmax - 6)
+      /* Ensure room for 6 bytes.  */
+      if (bufpos + 6 > bufmax)
 	{
-	  bufmax += 100;
+	  bufmax = 2 * bufmax + 10;
 	  buffer = xrealloc_static (buffer, bufmax);
 	}
 
@@ -1006,7 +1025,7 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 		{
 		  ++crs;
 		  buffer[bufpos++] = '\\';
-		  continue;
+		  break;
 		}
 	      /* FALLTHROUGH */
 	    default:
@@ -1057,7 +1076,9 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 		int length;
 
 		crs = extract_oct (crs + 1, 3, &oct_number);
-		length = u8_uctomb (buffer + bufpos, oct_number, 3);
+		/* Yes, octal escape sequences in the range 0x100..0x1ff are
+		   valid.  */
+		length = u8_uctomb (buffer + bufpos, oct_number, 2);
 		if (length > 0)
 		  bufpos += length;
 	      }
@@ -1068,10 +1089,9 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 		int length;
 
 		++crs;
-
 		if (*crs == '{')
 		  {
-		    char *end = strchr (crs, '}');
+		    const char *end = strchr (crs, '}');
 		    if (end == NULL)
 		      {
 			error_with_progname = false;
@@ -1084,7 +1104,8 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 		    else
 		      {
 			++crs;
-			(void) extract_hex (crs, 4, &hex_number);
+			(void) extract_hex (crs, end - crs, &hex_number);
+			crs = end + 1;
 		      }
 		  }
 		else
@@ -1102,35 +1123,38 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 	      crs += 2;
 	      if (*crs)
 		{
-		  int the_char = *crs;
+		  int the_char = (unsigned char) *crs;
 		  if (the_char >= 'a' || the_char <= 'z')
-		    the_char -= 0x20;
-		  buffer[bufpos++] = the_char + (the_char & 0x40 ? -64 : 64);
+		    the_char = the_char - 'a' + 'A';
+		  buffer[bufpos++] = the_char ^ 0x40;
 		}
 	      continue;
 	    case 'N':
 	      crs += 2;
 	      if (*crs == '{')
 		{
-		  char *name = xstrdup (crs + 1);
-		  char *end = strchr (name, '}');
+		  const char *end = strchr (crs + 1, '}');
 		  if (end != NULL)
 		    {
+		      char *name;
 		      unsigned int unicode;
-		      int length;
 
-		      *end = '\0';
+		      name = (char *) xmalloc (end - (crs + 1) + 1);
+		      memcpy (name, crs + 1, end - (crs + 1));
+		      name[end - (crs + 1)] = '\0';
 
-		      crs += 2 + strlen (name);
 		      unicode = unicode_name_character (name);
 		      if (unicode != UNINAME_INVALID)
 			{
-			  length = u8_uctomb (buffer + bufpos, unicode, 6);
+			  int length = u8_uctomb (buffer + bufpos, unicode, 6);
 			  if (length > 0)
 			    bufpos += length;
 			}
+
+		      free (name);
+
+		      crs = end + 1;
 		    }
-		  free (name);
 		}
 	      continue;
 	    }
@@ -1143,63 +1167,63 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 	  switch (*crs)
 	    {
 	    case 'E':
-	      quotemeta = uppercase = lowercase = false;
+	      uppercase = false;
+	      lowercase = false;
+	      quotemeta = false;
 	      ++crs;
 	      continue;
 	    case 'L':
-	      quotemeta = uppercase = false;
+	      uppercase = false;
 	      lowercase = true;
+	      quotemeta = false;
 	      ++crs;
 	      continue;
 	    case 'U':
-	      quotemeta = lowercase = false;
 	      uppercase = true;
+	      lowercase = false;
+	      quotemeta = false;
 	      ++crs;
 	      continue;
 	    case 'Q':
-	      uppercase = lowercase = false;
+	      uppercase = false;
+	      lowercase = false;
 	      quotemeta = true;
 	      ++crs;
 	      continue;
 	    case 'l':
 	      ++crs;
-	      if (crs[1] >= 'A' && crs[1] <= 'Z')
+	      if (*crs >= 'A' && *crs <= 'Z')
 		{
-		  buffer[bufpos++] = crs[1] + 0x20;
-		  ++crs;
+		  buffer[bufpos++] = *crs - 'A' + 'a';
 		}
-	      else if (crs[1] >= 0x80)
+	      else if ((unsigned char) *crs >= 0x80)
 		{
 		  error_with_progname = false;
 		  error (error_level, 0, _("\
 %s:%d: invalid interpolation (\"\\l\") of 8bit character \"%c\""),
 			 real_file_name, line_number, *crs);
 		  error_with_progname = true;
-		  ++crs;
 		}
-	      else
-		++crs;
+	      ++crs;
 	      continue;
 	    case 'u':
 	      ++crs;
-	      if (crs[1] >= 'a' && crs[1] <= 'z')
+	      if (*crs >= 'a' && *crs <= 'z')
 		{
-		  buffer[bufpos++] = crs[1] - 0x20;
-		  ++crs;
+		  buffer[bufpos++] = *crs - 'a' + 'A';
 		}
-	      else if (crs[1] >= 0x80)
+	      else if ((unsigned char) *crs >= 0x80)
 		{
 		  error_with_progname = false;
 		  error (error_level, 0, _("\
 %s:%d: invalid interpolation (\"\\u\") of 8bit character \"%c\""),
 			 real_file_name, line_number, *crs);
 		  error_with_progname = true;
-		  ++crs;
 		}
-	      else
-		++crs;
+	      ++crs;
 	      continue;
 	    case '\\':
+	      /* FIXME: This looks buggy.  */
 	      if (crs[1])
 		buffer[bufpos++] = crs[1];
 	      crs++;
@@ -1207,47 +1231,48 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 	    }
 	}
 
-
       if (*crs == '$' || *crs == '@')
 	{
-	  ++crs;
 	  error_with_progname = false;
 	  error (error_level, 0, _("\
 %s:%d: invalid variable interpolation at \"%c\""),
 		 real_file_name, line_number, *crs);
 	  error_with_progname = true;
+	  ++crs;
 	}
       else if (lowercase)
 	{
 	  if (*crs >= 'A' && *crs <= 'Z')
-	    buffer[bufpos++] = 0x20 + *crs++;
-	  else if (*crs >= 0x80)
+	    buffer[bufpos++] = *crs - 'A' + 'a';
+	  else if ((unsigned char) *crs >= 0x80)
 	    {
 	      error_with_progname = false;
 	      error (error_level, 0, _("\
 %s:%d: invalid interpolation (\"\\L\") of 8bit character \"%c\""),
 		     real_file_name, line_number, *crs);
 	      error_with_progname = true;
-	      buffer[bufpos++] = *crs++;
+	      buffer[bufpos++] = *crs;
 	    }
 	  else
-	    buffer[bufpos++] = *crs++;
+	    buffer[bufpos++] = *crs;
+	  ++crs;
 	}
       else if (uppercase)
 	{
 	  if (*crs >= 'a' && *crs <= 'z')
-	    buffer[bufpos++] = *crs++ - 0x20;
-	  else if (*crs >= 0x80)
+	    buffer[bufpos++] = *crs - 'a' + 'A';
+	  else if ((unsigned char) *crs >= 0x80)
 	    {
 	      error_with_progname = false;
 	      error (error_level, 0, _("\
 %s:%d: invalid interpolation (\"\\U\") of 8bit character \"%c\""),
 		     real_file_name, line_number, *crs);
 	      error_with_progname = true;
-	      buffer[bufpos++] = *crs++;
+	      buffer[bufpos++] = *crs;
 	    }
 	  else
-	    buffer[bufpos++] = *crs++;
+	    buffer[bufpos++] = *crs;
+	  ++crs;
 	}
       else if (quotemeta)
 	{
@@ -1259,11 +1284,12 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
 	}
     }
 
-  if (bufpos >= bufmax - 1)
-  {
-      bufmax += 100;
+  /* Ensure room for 1 more byte.  */
+  if (bufpos >= bufmax)
+    {
+      bufmax = 2 * bufmax + 10;
       buffer = xrealloc_static (buffer, bufmax);
-  }
+    }
 
   buffer[bufpos++] = '\0';
 
@@ -1271,15 +1297,15 @@ extract_quotelike_pass3 (token_ty *tp, int error_level)
   fprintf (stderr, "---> %s\n", buffer);
 #endif
 
+  /* Replace tp->string.  */
   free (tp->string);
   tp->string = xstrdup (buffer);
 }
 
 /* Parse a variable.  This is done in several steps:
- *
- * 1) Consume all leading occurcencies of '$', '@', '%', and '*'.
- * 2) Determine the name of the variable from the following input
- * 3) Parse possible following hash keys or array indexes.
+     1) Consume all leading occurencies of '$', '@', '%', and '*'.
+     2) Determine the name of the variable from the following input.
+     3) Parse possible following hash keys or array indexes.
  */
 static void
 extract_variable (message_list_ty *mlp, token_ty *tp, int first)
@@ -1308,7 +1334,7 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first)
     {
       if (bufpos >= bufmax)
 	{
-	  bufmax += 100;
+	  bufmax = 2 * bufmax + 10;
 	  buffer = xrealloc_static (buffer, bufmax);
 	}
       buffer[bufpos++] = c;
@@ -1329,16 +1355,15 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first)
 	maybe_hash_value = true;
       else if (bufpos == 2 && buffer[1] == '$')
 	{
-	  if (c != '{' && c != '_' && (!((c >= '0' && c <= '9')
-					 || (c >= 'A' && c <= 'Z')
-					 || (c >= 'a' && c <= 'z')
-					 || c == ':' || c == '\''
-					 || c >= 0x80)))
+	  if (!(c == '{'
+		|| (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+		|| (c >= '0' && c <= '9')
+		|| c == '_' || c == ':' || c == '\'' || c >= 0x80))
 	    {
 	      /* Special variable $$ for pid.  */
 	      if (bufpos >= bufmax)
 		{
-		  bufmax += 100;
+		  bufmax = 2 * bufmax + 10;
 		  buffer = xrealloc_static (buffer, bufmax);
 		}
 	      buffer[bufpos++] = '\0';
@@ -1365,7 +1390,7 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first)
    */
   if (bufpos >= bufmax)
     {
-      bufmax += 100;
+      bufmax = 2 * bufmax + 10;
       buffer = xrealloc_static (buffer, bufmax);
     }
   if (c == '{')
@@ -1385,15 +1410,14 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first)
     }
   else
     {
-      while ((c >= 'A' && c <= 'Z') ||
-	     (c >= 'a' && c <= 'z') ||
-	     (c >= '0' && c <= '9') ||
-	     c == '_' || c == ':' || c == '\'' || c >= 0x80)
+      while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+	     || (c >= '0' && c <= '9')
+	     || c == '_' || c == ':' || c == '\'' || c >= 0x80)
 	{
 	  ++varbody_length;
 	  if (bufpos >= bufmax)
 	    {
-	      bufmax += 100;
+	      bufmax = 2 * bufmax + 10;
 	      buffer = xrealloc_static (buffer, bufmax);
 	    }
 	  buffer[bufpos++] = c;
@@ -1402,9 +1426,9 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first)
       phase1_ungetc (c);
     }
 
-  if (bufpos >= bufmax - 1)
+  if (bufpos + 1 >= bufmax)
     {
-      bufmax += 100;
+      bufmax = 2 * bufmax + 10;
       buffer = xrealloc_static (buffer, bufmax);
     }
   buffer[bufpos++] = '\0';
@@ -1448,7 +1472,7 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first)
       int c = phase2_getc ();
 
       while (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f')
-	  c = phase2_getc ();
+	c = phase2_getc ();
 
       if (c == '-')
 	{
@@ -1460,7 +1484,7 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first)
 	      c = phase2_getc ();
 	      while (c == ' ' || c == '\t' || c == '\r'
 		     || c == '\n' || c == '\f')
-		  c = phase2_getc ();
+		c = phase2_getc ();
 	    }
 	  else if (c2 != '\n')
 	    {
@@ -1468,7 +1492,7 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first)
 		 special character recognized after a minus is greater-than
 		 for dereference.  However, the sequence "-\n>" that we
 	         treat incorrectly here, is a syntax error.  */
-		phase1_ungetc (c2);
+	      phase1_ungetc (c2);
 	    }
 	}
 
@@ -1600,16 +1624,15 @@ extract_variable (message_list_ty *mlp, token_ty *tp, int first)
 }
 
 /* Actually a simplified version of extract_variable().  It searches for
- * variables inside a double-quoted string that may interpolate to
- * some keyword hash (reference).
- */
+   variables inside a double-quoted string that may interpolate to
+   some keyword hash (reference).  */
 static void
 interpolate_keywords (message_list_ty *mlp, const char *string)
 {
   static char *buffer;
   static int bufmax = 0;
   int bufpos = 0;
-  int c = string[0];
+  int c = (unsigned char) string[0];
   bool maybe_hash_deref = false;
   enum parser_state
     {
@@ -1653,16 +1676,16 @@ interpolate_keywords (message_list_ty *mlp, const char *string)
   pos.file_name = logical_file_name;
   pos.line_number = line_number;
 
-  while ((c = *string++) != '\0')
+  while ((c = (unsigned char) *string++) != '\0')
     {
       void *keyword_value;
 
       if (state == initial)
 	bufpos = 0;
 
-      if (bufpos >= bufmax - 1)
+      if (bufpos + 1 >= bufmax)
 	{
-	  bufmax += 100;
+	  bufmax = 2 * bufmax + 10;
 	  buffer = xrealloc_static (buffer, bufmax);
 	}
 
@@ -1672,8 +1695,9 @@ interpolate_keywords (message_list_ty *mlp, const char *string)
 	  switch (c)
 	    {
 	    case '\\':
-	      c = *string++;
-	      if (!c) return;
+	      c = (unsigned char) *string++;
+	      if (c == '\0')
+		return;
 	      break;
 	    case '$':
 	      buffer[bufpos++] = '$';
@@ -1729,9 +1753,7 @@ interpolate_keywords (message_list_ty *mlp, const char *string)
 	    {
 	    case '-':
 	      if (find_entry (&keywords, buffer, bufpos, &keyword_value) == 0)
-		{
-		  state = minus;
-		}
+		state = minus;
 	      else
 		state = initial;
 	      break;
@@ -1741,9 +1763,7 @@ interpolate_keywords (message_list_ty *mlp, const char *string)
 		  buffer[0] = '%';
 		}
 	      if (find_entry (&keywords, buffer, bufpos, &keyword_value) == 0)
-		{
-		  state = wait_quote;
-		}
+		state = wait_quote;
 	      else
 		state = initial;
 	      break;
@@ -1756,9 +1776,7 @@ interpolate_keywords (message_list_ty *mlp, const char *string)
 		  buffer[bufpos++] = c;
 		}
 	      else
-		{
-		  state = initial;
-		}
+		state = initial;
 	      break;
 	    }
 	  break;
@@ -1839,9 +1857,7 @@ interpolate_keywords (message_list_ty *mlp, const char *string)
 		  buffer[bufpos++] = string++[0];
 		}
 	      else
-		{
-		  state = initial;
-		}
+		state = initial;
 	      break;
 	    default:
 	      buffer[bufpos++] = c;
@@ -1865,9 +1881,7 @@ interpolate_keywords (message_list_ty *mlp, const char *string)
 		  buffer[bufpos++] = string++[0];
 		}
 	      else
-		{
-		  state = initial;
-		}
+		state = initial;
 	      break;
 	    default:
 	      buffer[bufpos++] = c;
@@ -2006,7 +2020,7 @@ x_perl_prelex (message_list_ty *mlp, token_ty *tp)
 	    {
 	      if (bufpos >= bufmax)
 		{
-		  bufmax += 100;
+		  bufmax = 2 * bufmax + 10;
 		  buffer = xrealloc_static (buffer, bufmax);
 		}
 	      buffer[bufpos++] = c;
@@ -2036,7 +2050,7 @@ x_perl_prelex (message_list_ty *mlp, token_ty *tp)
 	    }
 	  if (bufpos >= bufmax)
 	    {
-	      bufmax += 100;
+	      bufmax = 2 * bufmax + 10;
 	      buffer = xrealloc_static (buffer, bufmax);
 	    }
 	  buffer[bufpos] = '\0';
@@ -2094,7 +2108,9 @@ x_perl_prelex (message_list_ty *mlp, token_ty *tp)
 		  prefer_division_over_regexp = true;
 		  return;
 		}
-	      extract_triple_quotelike (mlp, tp, delim, buffer[0] == 's');
+	      extract_triple_quotelike (mlp, tp, delim,
+					buffer[0] == 's' && !extract_all
+					&& delim != '\'');
 
 	      /* Eat the following modifiers.  */
 	      c = phase1_getc ();
@@ -2186,8 +2202,8 @@ x_perl_prelex (message_list_ty *mlp, token_ty *tp)
 		{
 		case 'q':
 		case 'x':
-		  tp->string_type = string_type_qq;
 		  tp->type = token_type_string;
+		  tp->string_type = string_type_qq;
 		  if (!extract_all)
 		    interpolate_keywords (mlp, tp->string);
 		  break;
@@ -2239,9 +2255,8 @@ x_perl_prelex (message_list_ty *mlp, token_ty *tp)
 	case '(':
 	  c = phase2_getc ();
 	  if (c == ')')
-	    {
-	      continue;  /* Ignore empty list.  */
-	    }
+	    /* Ignore empty list.  */
+	    continue;
 	  else
 	    phase2_ungetc (c);
 	  tp->type = token_type_lparen;
@@ -2356,7 +2371,7 @@ x_perl_prelex (message_list_ty *mlp, token_ty *tp)
 		    {
 		      if (bufpos >= bufmax)
 			{
-			  bufmax += 100;
+			  bufmax = 2 * bufmax + 10;
 			  buffer = xrealloc_static (buffer, bufmax);
 			}
 		      buffer[bufpos++] = c;
@@ -2473,7 +2488,6 @@ x_perl_lex (message_list_ty *mlp)
 #if DEBUG_PERL
 	  fprintf (stderr, "%s:%d: pre-fetching next token\n",
 		   real_file_name, line_number);
-	  fflush (stderr);
 #endif
 	  next = x_perl_lex (mlp);
 	  x_perl_unlex (next);
@@ -2939,11 +2953,11 @@ extract_perl (FILE *f, const char *real_filename, const char *logical_filename,
   last_token = token_type_semicolon;  /* Safe assumption.  */
   prefer_division_over_regexp = false;
 
-  last_string_finished = false;
-
   init_keywords ();
 
   token_stack = (struct stack *) xmalloc (sizeof (struct stack));
+  token_stack->first = NULL;
+  token_stack->last = NULL;
   here_eaten = 0;
   end_of_file = false;
 
@@ -2958,7 +2972,6 @@ extract_perl (FILE *f, const char *real_filename, const char *logical_filename,
   logical_file_name = NULL;
   line_number = 0;
   last_token = token_type_semicolon;
-  last_string_finished = false;
   stack_free (token_stack);
   free (token_stack);
   token_stack = NULL;
-- 
2.47.3