xgettext: PHP: Recognize strings with embedded expressions.

author Bruno Haible <bruno@clisp.org>

Wed, 18 Sep 2024 21:37:32 +0000 (23:37 +0200)

committer Bruno Haible <bruno@clisp.org>

Wed, 18 Sep 2024 21:37:32 +0000 (23:37 +0200)
author Bruno Haible <bruno@clisp.org>
Wed, 18 Sep 2024 21:37:32 +0000 (23:37 +0200)
committer Bruno Haible <bruno@clisp.org>
Wed, 18 Sep 2024 21:37:32 +0000 (23:37 +0200)
diff --git a/NEWS b/NEWS

index 687f8ae3dce243105367645b521fe7dfcebdf66a..56b66c34ac7727ad74e3554ff600f4163820cb01 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -26,6 +26,7 @@ Version 0.23 - September 2024
    - Tcl: With the forthcoming Tcl 9.0, characters outside the Unicode BMP
      in Tcl message catalogs (.msg files) will work regardless of the locale's
      encoding.
+  - PHP: Strings with embedded expressions are now recognized.
  
  * Runtime behaviour:
    - In the C.UTF-8 locale, like in the C locale, the *gettext() functions
diff --git a/gettext-tools/src/x-php.c b/gettext-tools/src/x-php.c

index dd8a9f07b8b325bff0c4e47543a141f3939742cc..2ea185d49f92636da7dca58f393f2945421d350a 100644 (file)
--- a/gettext-tools/src/x-php.c
+++ b/gettext-tools/src/x-php.c
@@ -135,55 +135,162 @@ init_flag_table_php ()
  }
  
  
-/* ======================== Reading of characters.  ======================== */
+/* =================== Variables used by the extractor.  =================== */
  
-/* The input file stream.  */
-static FILE *fp;
+/* Type definitions needed for the variables.  */
  
+enum token_type_ty
+{
+  token_type_eof,
+  token_type_lparen,            /* ( */
+  token_type_rparen,            /* ) */
+  token_type_comma,             /* , */
+  token_type_lbracket,          /* [ */
+  token_type_rbracket,          /* ] */
+  token_type_dot,               /* . */
+  token_type_operator1,         /* * / % ++ -- */
+  token_type_operator2,         /* + - ! ~ @ */
+  token_type_string_literal,    /* "abc" */
+  token_type_symbol,            /* symbol, number */
+  token_type_other              /* misc. operator */
+};
+typedef enum token_type_ty token_type_ty;
  
-/* 1. line_number handling.  */
+typedef struct token_ty token_ty;
+struct token_ty
+{
+  token_type_ty type;
+  char *string;         /* for token_type_string_literal, token_type_symbol */
+  refcounted_string_list_ty *comment;   /* for token_type_string_literal */
+  int line_number;
+};
+
+/* These variables are combined in a struct, so that we can invoke the
+   extractor in a reentrant way.  */
+
+struct php_extractor
+{
+  /* Accumulator for the output.  */
+  message_list_ty *mlp;
+
+  /* The input file stream, when reading from a file.  */
+  FILE *fp;
+  /* The input area, when reading from a string.  */
+  const char *input;
+  const char *input_end;
+
+  int line_number;
+
+  unsigned char phase1_pushback[2];
+  int phase1_pushback_length;
+
+#if 0
+  unsigned char phase2_pushback[1];
+  int phase2_pushback_length;
+#endif
+
+  /* For accumulating comments.  */
+  char *buffer;
+  size_t bufmax;
+  size_t buflen;
+
+  /* These are for tracking whether comments count as immediately before
+     keyword.  */
+  int last_comment_line;
+  int last_non_comment_line;
+
+  unsigned char phase3_pushback[1];
+  int phase3_pushback_length;
+
+  token_ty phase4_pushback[3];
+  int phase4_pushback_length;
  
-static unsigned char phase1_pushback[2];
-static int phase1_pushback_length;
+  token_type_ty phase5_last;
+
+  /* Maximum supported nesting depth.  */
+  #define MAX_NESTING_DEPTH 1000
+
+  /* Current nesting depths.  */
+  int paren_nesting_depth;
+  int bracket_nesting_depth;
+};
+
+static inline void
+php_extractor_init_rest (struct php_extractor *xp)
+{
+  xp->phase1_pushback_length = 0;
+#if 0
+  xp->phase2_pushback_length = 0;
+#endif
+
+  xp->buffer = NULL;
+  xp->bufmax = 0;
+  xp->buflen = 0;
+
+  xp->last_comment_line = -1;
+  xp->last_non_comment_line = -1;
+
+  xp->phase3_pushback_length = 0;
+  xp->phase4_pushback_length = 0;
+
+  xp->phase5_last = token_type_eof;
+
+  xp->paren_nesting_depth = 0;
+  xp->bracket_nesting_depth = 0;
+}
+
+/* Forward declarations.  */
+static void extract_php_input (struct php_extractor *xp);
+
+
+/* ======================== Reading of characters.  ======================== */
+
+/* 1. line_number handling.  */
  
  static int
-phase1_getc ()
+phase1_getc (struct php_extractor *xp)
  {
    int c;
  
-  if (phase1_pushback_length)
-    c = phase1_pushback[--phase1_pushback_length];
-  else
+  if (xp->phase1_pushback_length)
+    c = xp->phase1_pushback[--(xp->phase1_pushback_length)];
+  else if (xp->fp != NULL)
      {
-      c = getc (fp);
+      c = getc (xp->fp);
  
        if (c == EOF)
          {
-          if (ferror (fp))
+          if (ferror (xp->fp))
              error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
                     real_file_name);
            return EOF;
          }
      }
+  else
+    {
+      if (xp->input == xp->input_end)
+        return EOF;
+      c = *(xp->input++);
+    }
  
-  if (c == '\n')
-    line_number++;
+  if (xp->fp != NULL && c == '\n')
+    xp->line_number++;
  
    return c;
  }
  
  /* Supports 2 characters of pushback.  */
  static void
-phase1_ungetc (int c)
+phase1_ungetc (struct php_extractor *xp, int c)
  {
    if (c != EOF)
      {
        if (c == '\n')
-        --line_number;
+        --(xp->line_number);
  
-      if (phase1_pushback_length == SIZEOF (phase1_pushback))
+      if (xp->phase1_pushback_length == SIZEOF (xp->phase1_pushback))
          abort ();
-      phase1_pushback[phase1_pushback_length++] = c;
+      xp->phase1_pushback[xp->phase1_pushback_length++] = c;
      }
  }
  
@@ -192,18 +299,18 @@ phase1_ungetc (int c)
     therefore don't contain translatable strings.  */
  
  static void
-skip_html ()
+skip_html (struct php_extractor *xp)
  {
    for (;;)
      {
-      int c = phase1_getc ();
+      int c = phase1_getc (xp);
  
        if (c == EOF)
          return;
  
        if (c == '<')
          {
-          int c2 = phase1_getc ();
+          int c2 = phase1_getc (xp);
  
            if (c2 == EOF)
              break;
@@ -212,10 +319,10 @@ skip_html ()
              {
                /* <?php is the normal way to enter PHP mode. <? and <?= are
                   recognized by PHP depending on a configuration setting.  */
-              int c3 = phase1_getc ();
+              int c3 = phase1_getc (xp);
  
                if (c3 != '=')
-                phase1_ungetc (c3);
+                phase1_ungetc (xp, c3);
  
                return;
              }
@@ -224,17 +331,17 @@ skip_html ()
              {
                /* <% and <%= are recognized by PHP depending on a configuration
                   setting.  */
-              int c3 = phase1_getc ();
+              int c3 = phase1_getc (xp);
  
                if (c3 != '=')
-                phase1_ungetc (c3);
+                phase1_ungetc (xp, c3);
  
                return;
              }
  
            if (c2 == '<')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
  
@@ -243,160 +350,160 @@ skip_html ()
               < script language = 'php' >
               are always recognized.  */
            while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-            c2 = phase1_getc ();
+            c2 = phase1_getc (xp);
            if (c2 != 's' && c2 != 'S')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'c' && c2 != 'C')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'r' && c2 != 'R')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'i' && c2 != 'I')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'p' && c2 != 'P')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 't' && c2 != 'T')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (!(c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r'))
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
            do
-            c2 = phase1_getc ();
+            c2 = phase1_getc (xp);
            while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
            if (c2 != 'l' && c2 != 'L')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'a' && c2 != 'A')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'n' && c2 != 'N')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'g' && c2 != 'G')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'u' && c2 != 'U')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'a' && c2 != 'A')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'g' && c2 != 'G')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            if (c2 != 'e' && c2 != 'E')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-            c2 = phase1_getc ();
+            c2 = phase1_getc (xp);
            if (c2 != '=')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-            c2 = phase1_getc ();
+            c2 = phase1_getc (xp);
            if (c2 == '"')
              {
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
                if (c2 != 'p')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
                if (c2 != 'h')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
                if (c2 != 'p')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
                if (c2 != '"')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
              }
            else if (c2 == '\'')
              {
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
                if (c2 != 'p')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
                if (c2 != 'h')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
                if (c2 != 'p')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
                if (c2 != '\'')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
              }
@@ -404,28 +511,28 @@ skip_html ()
              {
                if (c2 != 'p')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
                if (c2 != 'h')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
                if (c2 != 'p')
                  {
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                    continue;
                  }
              }
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
            while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-            c2 = phase1_getc ();
+            c2 = phase1_getc (xp);
            if (c2 != '>')
              {
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
                continue;
              }
            return;
@@ -435,70 +542,67 @@ skip_html ()
  
  #if 0
  
-static unsigned char phase2_pushback[1];
-static int phase2_pushback_length;
-
  static int
-phase2_getc ()
+phase2_getc (struct php_extractor *xp)
  {
    int c;
  
-  if (phase2_pushback_length)
-    return phase2_pushback[--phase2_pushback_length];
+  if (xp->phase2_pushback_length)
+    return xp->phase2_pushback[--(xp->phase2_pushback_length)];
  
-  c = phase1_getc ();
+  c = phase1_getc (xp);
    switch (c)
      {
      case '?':
      case '%':
        {
-        int c2 = phase1_getc ();
+        int c2 = phase1_getc (xp);
          if (c2 == '>')
            {
              /* ?> and %> terminate PHP mode and switch back to HTML mode.  */
              skip_html ();
              return ' ';
            }
-        phase1_ungetc (c2);
+        phase1_ungetc (xp, c2);
        }
        break;
  
      case '<':
        {
-        int c2 = phase1_getc ();
+        int c2 = phase1_getc (xp);
  
          /* < / script > terminates PHP mode and switches back to HTML mode.  */
          while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-          c2 = phase1_getc ();
+          c2 = phase1_getc (xp);
          if (c2 == '/')
            {
              do
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
              while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
              if (c2 == 's' || c2 == 'S')
                {
-                c2 = phase1_getc ();
+                c2 = phase1_getc (xp);
                  if (c2 == 'c' || c2 == 'C')
                    {
-                    c2 = phase1_getc ();
+                    c2 = phase1_getc (xp);
                      if (c2 == 'r' || c2 == 'R')
                        {
-                        c2 = phase1_getc ();
+                        c2 = phase1_getc (xp);
                          if (c2 == 'i' || c2 == 'I')
                            {
-                            c2 = phase1_getc ();
+                            c2 = phase1_getc (xp);
                              if (c2 == 'p' || c2 == 'P')
                                {
-                                c2 = phase1_getc ();
+                                c2 = phase1_getc (xp);
                                  if (c2 == 't' || c2 == 'T')
                                    {
                                      do
-                                      c2 = phase1_getc ();
+                                      c2 = phase1_getc (xp);
                                      while (c2 == ' ' || c2 == '\t'
                                             || c2 == '\n' || c2 == '\r');
                                      if (c2 == '>')
                                        {
-                                        skip_html ();
+                                        skip_html (xp);
                                          return ' ';
                                        }
                                    }
@@ -508,7 +612,7 @@ phase2_getc ()
                    }
                }
            }
-        phase1_ungetc (c2);
+        phase1_ungetc (xp, c2);
        }
        break;
      }
@@ -517,13 +621,13 @@ phase2_getc ()
  }
  
  static void
-phase2_ungetc (int c)
+phase2_ungetc (struct php_extractor *xp, int c)
  {
    if (c != EOF)
      {
-      if (phase2_pushback_length == SIZEOF (phase2_pushback))
+      if (xp->phase2_pushback_length == SIZEOF (xp->phase2_pushback))
          abort ();
-      phase2_pushback[phase2_pushback_length++] = c;
+      xp->phase2_pushback[xp->phase2_pushback_length++] = c;
      }
  }
  
@@ -532,41 +636,38 @@ phase2_ungetc (int c)
  
  /* Accumulating comments.  */
  
-static char *buffer;
-static size_t bufmax;
-static size_t buflen;
-
  static inline void
-comment_start ()
+comment_start (struct php_extractor *xp)
  {
-  buflen = 0;
+  xp->buflen = 0;
  }
  
  static inline void
-comment_add (int c)
+comment_add (struct php_extractor *xp, int c)
  {
-  if (buflen >= bufmax)
+  if (xp->buflen >= xp->bufmax)
      {
-      bufmax = 2 * bufmax + 10;
-      buffer = xrealloc (buffer, bufmax);
+      xp->bufmax = 2 * xp->bufmax + 10;
+      xp->buffer = xrealloc (xp->buffer, xp->bufmax);
      }
-  buffer[buflen++] = c;
+  xp->buffer[xp->buflen++] = c;
  }
  
  static inline void
-comment_line_end (size_t chars_to_remove)
+comment_line_end (struct php_extractor *xp, size_t chars_to_remove)
  {
-  buflen -= chars_to_remove;
-  while (buflen >= 1
-         && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
-    --buflen;
-  if (chars_to_remove == 0 && buflen >= bufmax)
+  xp->buflen -= chars_to_remove;
+  while (xp->buflen >= 1
+         && (xp->buffer[xp->buflen - 1] == ' '
+             || xp->buffer[xp->buflen - 1] == '\t'))
+    --(xp->buflen);
+  if (chars_to_remove == 0 && xp->buflen >= xp->bufmax)
      {
-      bufmax = 2 * bufmax + 10;
-      buffer = xrealloc (buffer, bufmax);
+      xp->bufmax = 2 * xp->bufmax + 10;
+      xp->buffer = xrealloc (xp->buffer, xp->bufmax);
      }
-  buffer[buflen] = '\0';
-  savable_comment_add (buffer);
+  xp->buffer[xp->buflen] = '\0';
+  savable_comment_add (xp->buffer);
  }
  
  
@@ -574,62 +675,54 @@ comment_line_end (size_t chars_to_remove)
     space character.  We need to remember the comment for later, because
     it may be attached to a keyword string.  */
  
-/* These are for tracking whether comments count as immediately before
-   keyword.  */
-static int last_comment_line;
-static int last_non_comment_line;
-
-static unsigned char phase3_pushback[1];
-static int phase3_pushback_length;
-
  static int
-phase3_getc ()
+phase3_getc (struct php_extractor *xp)
  {
    int lineno;
    int c;
  
-  if (phase3_pushback_length)
-    return phase3_pushback[--phase3_pushback_length];
+  if (xp->phase3_pushback_length)
+    return xp->phase3_pushback[--(xp->phase3_pushback_length)];
  
-  c = phase1_getc ();
+  c = phase1_getc (xp);
  
    if (c == '#')
      {
        /* sh comment.  */
        bool last_was_qmark = false;
  
-      comment_start ();
-      lineno = line_number;
+      comment_start (xp);
+      lineno = xp->line_number;
        for (;;)
          {
-          c = phase1_getc ();
+          c = phase1_getc (xp);
            if (c == '\n' || c == EOF)
              {
-              comment_line_end (0);
+              comment_line_end (xp, 0);
                break;
              }
            if (last_was_qmark && c == '>')
              {
-              comment_line_end (1);
-              skip_html ();
+              comment_line_end (xp, 1);
+              skip_html (xp);
                break;
              }
            /* We skip all leading white space, but not EOLs.  */
-          if (!(buflen == 0 && (c == ' ' || c == '\t')))
-            comment_add (c);
+          if (!(xp->buflen == 0 && (c == ' ' || c == '\t')))
+            comment_add (xp, c);
            last_was_qmark = (c == '?' || c == '%');
          }
-      last_comment_line = lineno;
+      xp->last_comment_line = lineno;
        return '\n';
      }
    else if (c == '/')
      {
-      c = phase1_getc ();
+      c = phase1_getc (xp);
  
        switch (c)
          {
          default:
-          phase1_ungetc (c);
+          phase1_ungetc (xp, c);
            return '/';
  
          case '*':
@@ -637,24 +730,24 @@ phase3_getc ()
              /* C comment.  */
              bool last_was_star;
  
-            comment_start ();
-            lineno = line_number;
+            comment_start (xp);
+            lineno = xp->line_number;
              last_was_star = false;
              for (;;)
                {
-                c = phase1_getc ();
+                c = phase1_getc (xp);
                  if (c == EOF)
                    break;
                  /* We skip all leading white space, but not EOLs.  */
-                if (buflen == 0 && (c == ' ' || c == '\t'))
+                if (xp->buflen == 0 && (c == ' ' || c == '\t'))
                    continue;
-                comment_add (c);
+                comment_add (xp, c);
                  switch (c)
                    {
                    case '\n':
-                    comment_line_end (1);
-                    comment_start ();
-                    lineno = line_number;
+                    comment_line_end (xp, 1);
+                    comment_start (xp);
+                    lineno = xp->line_number;
                      last_was_star = false;
                      continue;
  
@@ -665,7 +758,7 @@ phase3_getc ()
                    case '/':
                      if (last_was_star)
                        {
-                        comment_line_end (2);
+                        comment_line_end (xp, 2);
                          break;
                        }
                      FALLTHROUGH;
@@ -676,7 +769,7 @@ phase3_getc ()
                    }
                  break;
                }
-            last_comment_line = lineno;
+            xp->last_comment_line = lineno;
              return ' ';
            }
  
@@ -685,28 +778,28 @@ phase3_getc ()
              /* C++ comment.  */
              bool last_was_qmark = false;
  
-            comment_start ();
-            lineno = line_number;
+            comment_start (xp);
+            lineno = xp->line_number;
              for (;;)
                {
-                c = phase1_getc ();
+                c = phase1_getc (xp);
                  if (c == '\n' || c == EOF)
                    {
-                    comment_line_end (0);
+                    comment_line_end (xp, 0);
                      break;
                    }
                  if (last_was_qmark && c == '>')
                    {
-                    comment_line_end (1);
-                    skip_html ();
+                    comment_line_end (xp, 1);
+                    skip_html (xp);
                      break;
                    }
                  /* We skip all leading white space, but not EOLs.  */
-                if (!(buflen == 0 && (c == ' ' || c == '\t')))
-                  comment_add (c);
+                if (!(xp->buflen == 0 && (c == ' ' || c == '\t')))
+                  comment_add (xp, c);
                  last_was_qmark = (c == '?' || c == '%');
                }
-            last_comment_line = lineno;
+            xp->last_comment_line = lineno;
              return '\n';
            }
          }
@@ -717,13 +810,13 @@ phase3_getc ()
  
  #ifdef unused
  static void
-phase3_ungetc (int c)
+phase3_ungetc (struct php_extractor *xp, int c)
  {
    if (c != EOF)
      {
-      if (phase3_pushback_length == SIZEOF (phase3_pushback))
+      if (xp->phase3_pushback_length == SIZEOF (xp->phase3_pushback))
          abort ();
-      phase3_pushback[phase3_pushback_length++] = c;
+      xp->phase3_pushback[xp->phase3_pushback_length++] = c;
      }
  }
  #endif
@@ -732,32 +825,7 @@ phase3_ungetc (int c)
  /* ========================== Reading of tokens.  ========================== */
  
  
-enum token_type_ty
-{
-  token_type_eof,
-  token_type_lparen,            /* ( */
-  token_type_rparen,            /* ) */
-  token_type_comma,             /* , */
-  token_type_lbracket,          /* [ */
-  token_type_rbracket,          /* ] */
-  token_type_dot,               /* . */
-  token_type_operator1,         /* * / % ++ -- */
-  token_type_operator2,         /* + - ! ~ @ */
-  token_type_string_literal,    /* "abc" */
-  token_type_symbol,            /* symbol, number */
-  token_type_other              /* misc. operator */
-};
-typedef enum token_type_ty token_type_ty;
-
-typedef struct token_ty token_ty;
-struct token_ty
-{
-  token_type_ty type;
-  char *string;         /* for token_type_string_literal, token_type_symbol */
-  refcounted_string_list_ty *comment;   /* for token_type_string_literal */
-  int line_number;
-};
-
+/* 'struct token_ty' is defined above.  */
  
  /* Free the memory pointed to by a 'struct token_ty'.  */
  static inline void
@@ -772,28 +840,25 @@ free_token (token_ty *tp)
  
  /* 4. Combine characters into tokens.  Discard whitespace.  */
  
-static token_ty phase4_pushback[3];
-static int phase4_pushback_length;
-
  static void
-phase4_get (token_ty *tp)
+phase4_get (struct php_extractor *xp, token_ty *tp)
  {
    static char *buffer;
    static int bufmax;
    int bufpos;
    int c;
  
-  if (phase4_pushback_length)
+  if (xp->phase4_pushback_length)
      {
-      *tp = phase4_pushback[--phase4_pushback_length];
+      *tp = xp->phase4_pushback[--(xp->phase4_pushback_length)];
        return;
      }
    tp->string = NULL;
  
    for (;;)
      {
-      tp->line_number = line_number;
-      c = phase3_getc ();
+      tp->line_number = xp->line_number;
+      c = phase3_getc (xp);
        switch (c)
          {
          case EOF:
@@ -801,7 +866,7 @@ phase4_get (token_ty *tp)
            return;
  
          case '\n':
-          if (last_non_comment_line > last_comment_line)
+          if (xp->last_non_comment_line > xp->last_comment_line)
              savable_comment_reset ();
            FALLTHROUGH;
          case ' ':
@@ -811,7 +876,7 @@ phase4_get (token_ty *tp)
            continue;
          }
  
-      last_non_comment_line = tp->line_number;
+      xp->last_non_comment_line = tp->line_number;
  
        switch (c)
          {
@@ -852,7 +917,7 @@ phase4_get (token_ty *tp)
                    buffer = xrealloc (buffer, bufmax);
                  }
                buffer[bufpos++] = c;
-              c = phase1_getc ();
+              c = phase1_getc (xp);
                switch (c)
                  {
                  case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
@@ -893,7 +958,7 @@ phase4_get (token_ty *tp)
                    continue;
  
                  default:
-                  phase1_ungetc (c);
+                  phase1_ungetc (xp, c);
                    break;
                  }
                break;
@@ -913,15 +978,15 @@ phase4_get (token_ty *tp)
            bufpos = 0;
            for (;;)
              {
-              c = phase1_getc ();
+              c = phase1_getc (xp);
                if (c == EOF || c == '\'')
                  break;
                if (c == '\\')
                  {
-                  c = phase1_getc ();
+                  c = phase1_getc (xp);
                    if (c != '\\' && c != '\'')
                      {
-                      phase1_ungetc (c);
+                      phase1_ungetc (xp, c);
                        c = '\\';
                      }
                  }
@@ -946,42 +1011,43 @@ phase4_get (token_ty *tp)
          case '"':
            /* Double-quoted string literal.  */
            tp->type = token_type_string_literal;
+        string_literal_continued:
            bufpos = 0;
            for (;;)
              {
-              c = phase1_getc ();
+              c = phase1_getc (xp);
                if (c == EOF || c == '"')
                  break;
                if (c == '$')
                  {
-                  c = phase1_getc ();
+                  c = phase1_getc (xp);
                    if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
-                      || c == '_' || c == '{' || c >= 0x7f)
+                      || c == '_' || c >= 0x7f)
                      {
                        /* String with variables.  */
                        tp->type = token_type_other;
                        continue;
                      }
-                  phase1_ungetc (c);
+                  if (c == '{')
+                    /* String with embedded expressions.  */
+                    goto string_with_embedded_expressions;
+                  phase1_ungetc (xp, c);
                    c = '$';
                  }
                if (c == '{')
                  {
-                  c = phase1_getc ();
+                  c = phase1_getc (xp);
                    if (c == '$')
-                    {
-                      /* String with expressions.  */
-                      tp->type = token_type_other;
-                      continue;
-                    }
-                  phase1_ungetc (c);
+                    /* String with embedded expressions.  */
+                    goto string_with_embedded_expressions;
+                  phase1_ungetc (xp, c);
                    c = '{';
                  }
                if (c == '\\')
                  {
                    int n, j;
  
-                  c = phase1_getc ();
+                  c = phase1_getc (xp);
                    switch (c)
                      {
                      case '"':
@@ -995,7 +1061,7 @@ phase4_get (token_ty *tp)
                        for (j = 0; j < 3; ++j)
                          {
                            n = n * 8 + c - '0';
-                          c = phase1_getc ();
+                          c = phase1_getc (xp);
                            switch (c)
                              {
                              default:
@@ -1007,7 +1073,7 @@ phase4_get (token_ty *tp)
                              }
                            break;
                          }
-                      phase1_ungetc (c);
+                      phase1_ungetc (xp, c);
                        c = n;
                        break;
  
@@ -1015,7 +1081,7 @@ phase4_get (token_ty *tp)
                        n = 0;
                        for (j = 0; j < 2; ++j)
                          {
-                          c = phase1_getc ();
+                          c = phase1_getc (xp);
                            switch (c)
                              {
                              case '0': case '1': case '2': case '3': case '4':
@@ -1031,7 +1097,7 @@ phase4_get (token_ty *tp)
                                n = n * 16 + 10 + c - 'a';
                                break;
                              default:
-                              phase1_ungetc (c);
+                              phase1_ungetc (xp, c);
                                c = 0;
                                break;
                              }
@@ -1040,7 +1106,7 @@ phase4_get (token_ty *tp)
                          }
                        if (j == 0)
                          {
-                          phase1_ungetc ('x');
+                          phase1_ungetc (xp, 'x');
                            c = '\\';
                          }
                        else
@@ -1058,7 +1124,7 @@ phase4_get (token_ty *tp)
                        break;
  
                      default:
-                      phase1_ungetc (c);
+                      phase1_ungetc (xp, c);
                        c = '\\';
                        break;
                      }
@@ -1083,20 +1149,98 @@ phase4_get (token_ty *tp)
              }
            return;
  
+        string_with_embedded_expressions:
+          tp->type = token_type_other;
+          {
+            size_t nesting_stack_alloc = 10;
+            char *nesting_stack = malloc (nesting_stack_alloc);
+            size_t nesting_stack_depth = 0;
+            /* We just read a '{', so expect a matching '}'.  */
+            nesting_stack[nesting_stack_depth++] = '}';
+
+            /* Find the extent of the expression.  */
+            bufpos = 0;
+            for (;;)
+              {
+                c = phase1_getc (xp);
+                if (c == EOF)
+                  break;
+                if (c == '"')
+                  {
+                    if (nesting_stack_depth > 0)
+                      if_error (IF_SEVERITY_WARNING,
+                                logical_file_name, xp->line_number, (size_t)(-1), false,
+                                _("unterminated expression in string literal, expected a '%c'"),
+                                nesting_stack[nesting_stack_depth - 1]);
+                    break;
+                  }
+                if (c == '{' || c == '[' || c == '(')
+                  {
+                    if (nesting_stack_depth >= nesting_stack_alloc)
+                      {
+                        nesting_stack_alloc = 2 * nesting_stack_alloc;
+                        nesting_stack =
+                          xrealloc (nesting_stack, nesting_stack_alloc);
+                      }
+                    nesting_stack[nesting_stack_depth++] =
+                      (c == '{' ? '}' : c == '[' ? ']' : ')');
+                  }
+                else if (c == '}' || c == ']' || c == ')')
+                  {
+                    if (nesting_stack_depth > 0
+                        && c == nesting_stack[nesting_stack_depth - 1])
+                      {
+                        if (--nesting_stack_depth == 0)
+                          break;
+                      }
+                    else
+                      if_error (IF_SEVERITY_WARNING,
+                                logical_file_name, xp->line_number, (size_t)(-1), false,
+                                _("unterminated expression in string literal contains unbalanced '%c'"),
+                                c);
+                  }
+                if (bufpos >= bufmax)
+                  {
+                    bufmax = 2 * bufmax + 10;
+                    buffer = xrealloc (buffer, bufmax);
+                  }
+                buffer[bufpos++] = c;
+              }
+
+            /* Recursively extract messages from the expression.  */
+            char *substring = xmalloc (bufpos);
+            memcpy (substring, buffer, bufpos);
+
+            struct php_extractor *rxp = XMALLOC (struct php_extractor);
+            rxp->mlp = xp->mlp;
+            rxp->fp = NULL;
+            rxp->input = substring;
+            rxp->input_end = substring + bufpos;
+            rxp->line_number = xp->line_number;
+            php_extractor_init_rest (rxp);
+
+            extract_php_input (rxp);
+
+            free (rxp);
+            free (substring);
+            free (nesting_stack);
+          }
+          goto string_literal_continued;
+
          case '?':
          case '%':
            {
-            int c2 = phase1_getc ();
+            int c2 = phase1_getc (xp);
              if (c2 == '>')
                {
                  /* ?> and %> terminate PHP mode and switch back to HTML
                     mode.  */
-                skip_html ();
+                skip_html (xp);
                  tp->type = token_type_other;
                }
              else
                {
-                phase1_ungetc (c2);
+                phase1_ungetc (xp, c2);
                  tp->type = (c == '%' ? token_type_operator1 : token_type_other);
                }
              return;
@@ -1134,14 +1278,14 @@ phase4_get (token_ty *tp)
          case '+':
          case '-':
            {
-            int c2 = phase1_getc ();
+            int c2 = phase1_getc (xp);
              if (c2 == c)
                /* ++ or -- */
                tp->type = token_type_operator1;
              else
                /* + or - */
                {
-                phase1_ungetc (c2);
+                phase1_ungetc (xp, c2);
                  tp->type = token_type_operator2;
                }
              return;
@@ -1155,10 +1299,10 @@ phase4_get (token_ty *tp)
  
          case '<':
            {
-            int c2 = phase1_getc ();
+            int c2 = phase1_getc (xp);
              if (c2 == '<')
                {
-                int c3 = phase1_getc ();
+                int c3 = phase1_getc (xp);
                  if (c3 == '<')
                    {
                      int label_start = 0;
@@ -1166,7 +1310,7 @@ phase4_get (token_ty *tp)
                      /* Start of here and now document.
                         Parse whitespace, then label, then newline.  */
                      do
-                      c = phase3_getc ();
+                      c = phase3_getc (xp);
                      while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
  
                      bufpos = 0;
@@ -1178,7 +1322,7 @@ phase4_get (token_ty *tp)
                              buffer = xrealloc (buffer, bufmax);
                            }
                          buffer[bufpos++] = c;
-                        c = phase3_getc ();
+                        c = phase3_getc (xp);
                        }
                      while (c != EOF && c != '\n' && c != '\r');
                      /* buffer[0..bufpos-1] now contains the label
@@ -1193,7 +1337,7 @@ phase4_get (token_ty *tp)
                      /* Now skip the here document.  */
                      for (;;)
                        {
-                        c = phase1_getc ();
+                        c = phase1_getc (xp);
                          if (c == EOF)
                            break;
                          if (c == '\n' || c == '\r')
@@ -1202,22 +1346,22 @@ phase4_get (token_ty *tp)
  
                              while (bufidx < bufpos)
                                {
-                                c = phase1_getc ();
+                                c = phase1_getc (xp);
                                  if (c == EOF)
                                    break;
                                  if (c != buffer[bufidx])
                                    {
-                                    phase1_ungetc (c);
+                                    phase1_ungetc (xp, c);
                                      break;
                                    }
                                  bufidx++;
                                }
                              if (bufidx == bufpos)
                                {
-                                c = phase1_getc ();
+                                c = phase1_getc (xp);
                                  if (c != ';')
-                                  phase1_ungetc (c);
-                                c = phase1_getc ();
+                                  phase1_ungetc (xp, c);
+                                c = phase1_getc (xp);
                                  if (c == '\n' || c == '\r')
                                    break;
                                }
@@ -1231,66 +1375,66 @@ phase4_get (token_ty *tp)
                      tp->type = token_type_other;
                      return;
                    }
-                phase1_ungetc (c3);
+                phase1_ungetc (xp, c3);
                }
  
              /* < / script > terminates PHP mode and switches back to HTML
                 mode.  */
              while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
-              c2 = phase1_getc ();
+              c2 = phase1_getc (xp);
              if (c2 == '/')
                {
                  do
-                  c2 = phase1_getc ();
+                  c2 = phase1_getc (xp);
                  while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
                  if (c2 == 's' || c2 == 'S')
                    {
-                    c2 = phase1_getc ();
+                    c2 = phase1_getc (xp);
                      if (c2 == 'c' || c2 == 'C')
                        {
-                        c2 = phase1_getc ();
+                        c2 = phase1_getc (xp);
                          if (c2 == 'r' || c2 == 'R')
                            {
-                            c2 = phase1_getc ();
+                            c2 = phase1_getc (xp);
                              if (c2 == 'i' || c2 == 'I')
                                {
-                                c2 = phase1_getc ();
+                                c2 = phase1_getc (xp);
                                  if (c2 == 'p' || c2 == 'P')
                                    {
-                                    c2 = phase1_getc ();
+                                    c2 = phase1_getc (xp);
                                      if (c2 == 't' || c2 == 'T')
                                        {
                                          do
-                                          c2 = phase1_getc ();
+                                          c2 = phase1_getc (xp);
                                          while (c2 == ' ' || c2 == '\t'
                                                 || c2 == '\n' || c2 == '\r');
                                          if (c2 == '>')
                                            {
-                                            skip_html ();
+                                            skip_html (xp);
                                            }
                                          else
-                                          phase1_ungetc (c2);
+                                          phase1_ungetc (xp, c2);
                                        }
                                      else
-                                      phase1_ungetc (c2);
+                                      phase1_ungetc (xp, c2);
                                    }
                                  else
-                                  phase1_ungetc (c2);
+                                  phase1_ungetc (xp, c2);
                                }
                              else
-                              phase1_ungetc (c2);
+                              phase1_ungetc (xp, c2);
                            }
                          else
-                          phase1_ungetc (c2);
+                          phase1_ungetc (xp, c2);
                        }
                      else
-                      phase1_ungetc (c2);
+                      phase1_ungetc (xp, c2);
                    }
                  else
-                  phase1_ungetc (c2);
+                  phase1_ungetc (xp, c2);
                }
              else
-              phase1_ungetc (c2);
+              phase1_ungetc (xp, c2);
  
              tp->type = token_type_other;
              return;
@@ -1310,13 +1454,13 @@ phase4_get (token_ty *tp)
  
  /* Supports 3 tokens of pushback.  */
  static void
-phase4_unget (token_ty *tp)
+phase4_unget (struct php_extractor *xp, token_ty *tp)
  {
    if (tp->type != token_type_eof)
      {
-      if (phase4_pushback_length == SIZEOF (phase4_pushback))
+      if (xp->phase4_pushback_length == SIZEOF (xp->phase4_pushback))
          abort ();
-      phase4_pushback[phase4_pushback_length++] = *tp;
+      xp->phase4_pushback[xp->phase4_pushback_length++] = *tp;
      }
  }
  
@@ -1334,17 +1478,15 @@ phase4_unget (token_ty *tp)
         higher precedence as '.', such as a multiplicative or postincrement
         expression).  */
  
-static token_type_ty phase5_last;
-
  static void
-x_php_lex (token_ty *tp)
+x_php_lex (struct php_extractor *xp, token_ty *tp)
  {
-  phase4_get (tp);
+  phase4_get (xp, tp);
    if (tp->type == token_type_string_literal
-      && !(phase5_last == token_type_dot
-           || phase5_last == token_type_operator1
-           || phase5_last == token_type_operator2
-           || phase5_last == token_type_rparen))
+      && !(xp->phase5_last == token_type_dot
+           || xp->phase5_last == token_type_operator1
+           || xp->phase5_last == token_type_operator2
+           || xp->phase5_last == token_type_rparen))
      {
        char *sum = tp->string;
        size_t sum_len = strlen (sum);
@@ -1353,17 +1495,17 @@ x_php_lex (token_ty *tp)
          {
            token_ty token2;
  
-          phase4_get (&token2);
+          phase4_get (xp, &token2);
            if (token2.type == token_type_dot)
              {
                token_ty token3;
  
-              phase4_get (&token3);
+              phase4_get (xp, &token3);
                if (token3.type == token_type_string_literal)
                  {
                    token_ty token_after;
  
-                  phase4_get (&token_after);
+                  phase4_get (xp, &token_after);
                    if (token_after.type != token_type_operator1)
                      {
                        char *addend = token3.string;
@@ -1373,21 +1515,21 @@ x_php_lex (token_ty *tp)
                        memcpy (sum + sum_len, addend, addend_len + 1);
                        sum_len += addend_len;
  
-                      phase4_unget (&token_after);
+                      phase4_unget (xp, &token_after);
                        free_token (&token3);
                        free_token (&token2);
                        continue;
                      }
-                  phase4_unget (&token_after);
+                  phase4_unget (xp, &token_after);
                  }
-              phase4_unget (&token3);
+              phase4_unget (xp, &token3);
              }
-          phase4_unget (&token2);
+          phase4_unget (xp, &token2);
            break;
          }
        tp->string = sum;
      }
-  phase5_last = tp->type;
+  xp->phase5_last = tp->type;
  }
  
  
@@ -1398,14 +1540,6 @@ x_php_lex (token_ty *tp)
  static flag_context_list_table_ty *flag_context_list_table;
  
  
-/* Maximum supported nesting depth.  */
-#define MAX_NESTING_DEPTH 1000
-
-/* Current nesting depths.  */
-static int paren_nesting_depth;
-static int bracket_nesting_depth;
-
-
  /* The file is broken into tokens.  Scan the token stream, looking for
     a keyword, followed by a left paren, followed by a string.  When we
     see this sequence, we have something to remember.  We assume we are
@@ -1422,12 +1556,12 @@ static int bracket_nesting_depth;
  
  
  /* Extract messages until the next balanced closing parenthesis or bracket.
-   Extracted messages are added to MLP.
+   Extracted messages are added to XP->MLP.
     DELIM can be either token_type_rparen or token_type_rbracket, or
     token_type_eof to accept both.
     Return true upon eof, false upon closing parenthesis or bracket.  */
  static bool
-extract_balanced (message_list_ty *mlp,
+extract_balanced (struct php_extractor *xp,
                    token_type_ty delim,
                    flag_region_ty *outer_region,
                    flag_context_list_iterator_ty context_iter,
@@ -1454,7 +1588,7 @@ extract_balanced (message_list_ty *mlp,
      {
        token_ty token;
  
-      x_php_lex (&token);
+      x_php_lex (xp, &token);
        switch (token.type)
          {
          case token_type_symbol:
@@ -1480,20 +1614,20 @@ extract_balanced (message_list_ty *mlp,
            continue;
  
          case token_type_lparen:
-          if (++paren_nesting_depth > MAX_NESTING_DEPTH)
+          if (++(xp->paren_nesting_depth) > MAX_NESTING_DEPTH)
              if_error (IF_SEVERITY_FATAL_ERROR,
-                      logical_file_name, line_number, (size_t)(-1), false,
+                      logical_file_name, xp->line_number, (size_t)(-1), false,
                        _("too many open parentheses"));
-          if (extract_balanced (mlp, token_type_rparen,
+          if (extract_balanced (xp, token_type_rparen,
                                  inner_region, next_context_iter,
-                                arglist_parser_alloc (mlp,
+                                arglist_parser_alloc (xp->mlp,
                                                        state ? next_shapes : NULL)))
              {
                arglist_parser_done (argparser, arg);
                unref_region (inner_region);
                return true;
              }
-          paren_nesting_depth--;
+          xp->paren_nesting_depth--;
            next_context_iter = null_context_list_iterator;
            state = 0;
            continue;
@@ -1521,20 +1655,20 @@ extract_balanced (message_list_ty *mlp,
            continue;
  
          case token_type_lbracket:
-          if (++bracket_nesting_depth > MAX_NESTING_DEPTH)
+          if (++(xp->bracket_nesting_depth) > MAX_NESTING_DEPTH)
              if_error (IF_SEVERITY_FATAL_ERROR,
-                      logical_file_name, line_number, (size_t)(-1), false,
+                      logical_file_name, xp->line_number, (size_t)(-1), false,
                        _("too many open brackets"));
-          if (extract_balanced (mlp, token_type_rbracket,
+          if (extract_balanced (xp, token_type_rbracket,
                                  null_context_region (),
                                  null_context_list_iterator,
-                                arglist_parser_alloc (mlp, NULL)))
+                                arglist_parser_alloc (xp->mlp, NULL)))
              {
                arglist_parser_done (argparser, arg);
                unref_region (inner_region);
                return true;
              }
-          bracket_nesting_depth--;
+          xp->bracket_nesting_depth--;
            next_context_iter = null_context_list_iterator;
            state = 0;
            continue;
@@ -1557,7 +1691,7 @@ extract_balanced (message_list_ty *mlp,
              pos.line_number = token.line_number;
  
              if (extract_all)
-              remember_a_message (mlp, NULL, token.string, false, false,
+              remember_a_message (xp->mlp, NULL, token.string, false, false,
                                    inner_region, &pos,
                                    NULL, token.comment, false);
              else
@@ -1596,51 +1730,46 @@ extract_balanced (message_list_ty *mlp,
  }
  
  
+static void
+extract_php_input (struct php_extractor *xp)
+{
+  /* Eat tokens until eof is seen.  When extract_balanced returns
+     due to an unbalanced closing parenthesis, just restart it.  */
+  while (!extract_balanced (xp, token_type_eof,
+                            null_context_region (), null_context_list_iterator,
+                            arglist_parser_alloc (xp->mlp, NULL)))
+    ;
+}
+
+
  void
  extract_php (FILE *f,
               const char *real_filename, const char *logical_filename,
               flag_context_list_table_ty *flag_table,
               msgdomain_list_ty *mdlp)
  {
-  message_list_ty *mlp = mdlp->item[0]->messages;
-
-  fp = f;
-  real_file_name = real_filename;
-  logical_file_name = xstrdup (logical_filename);
-  line_number = 1;
-
-  phase1_pushback_length = 0;
-#if 0
-  phase2_pushback_length = 0;
-#endif
-
-  last_comment_line = -1;
-  last_non_comment_line = -1;
-
-  phase3_pushback_length = 0;
-  phase4_pushback_length = 0;
-
-  phase5_last = token_type_eof;
-
    flag_context_list_table = flag_table;
-  paren_nesting_depth = 0;
-  bracket_nesting_depth = 0;
  
    init_keywords ();
  
+  struct php_extractor *xp = XMALLOC (struct php_extractor);
+
+  xp->mlp = mdlp->item[0]->messages;
+  xp->fp = f;
+  xp->input = NULL;
+  xp->input_end = NULL;
+  real_file_name = real_filename;
+  logical_file_name = xstrdup (logical_filename);
+  xp->line_number = 1;
+  php_extractor_init_rest (xp);
+
    /* Initial mode is HTML mode, not PHP mode.  */
-  skip_html ();
+  skip_html (xp);
  
-  /* Eat tokens until eof is seen.  When extract_balanced returns
-     due to an unbalanced closing parenthesis, just restart it.  */
-  while (!extract_balanced (mlp, token_type_eof,
-                            null_context_region (), null_context_list_iterator,
-                            arglist_parser_alloc (mlp, NULL)))
-    ;
+  extract_php_input (xp);
  
    /* Close scanner.  */
-  fp = NULL;
+  free (xp);
    real_file_name = NULL;
    logical_file_name = NULL;
-  line_number = 0;
  }
diff --git a/gettext-tools/tests/xgettext-php-1 b/gettext-tools/tests/xgettext-php-1

index 9c9891c4de00315924951ffc080666185e93f7f5..ebf31a988b9774a195bc31bbfd0d8ffe7bbbdad0 100755 (executable)
--- a/gettext-tools/tests/xgettext-php-1
+++ b/gettext-tools/tests/xgettext-php-1
@@ -1,7 +1,7 @@
  #!/bin/sh
  . "${srcdir=.}/init.sh"; path_prepend_ . ../src
  
-# Test PHP support: --add-comments option.
+# Test PHP support: --add-comments option, strings with embedded expressions.
  
  cat <<\EOF > xg-ph-1.php
  <?
@@ -19,6 +19,12 @@ echo _("Hey Jude");
  echo _("The Fabulous Four");
  // This will not be extracted.
  echo _(table["key"]);
+// Strings with embedded expressions, a.k.a. variable parsing.
+echo _("embedded_1_$foo bar");
+echo _("embedded_2_${foo}bar");
+echo _("embedded_3_{$foo}bar");
+echo _("embedded_4_{$array[func(_('embedded_4_sub1'))]}_bar_{$array[func(_('embedded_4_sub2'))]}_baz");
+echo _("embedded_5");
  ?>
  EOF
  
@@ -47,6 +53,15 @@ msgstr ""
  #.
  msgid "The Fabulous Four"
  msgstr ""
+
+msgid "embedded_4_sub1"
+msgstr ""
+
+msgid "embedded_4_sub2"
+msgstr ""
+
+msgid "embedded_5"
+msgstr ""
  EOF
  
  : ${DIFF=diff}
author	Bruno Haible <bruno@clisp.org>
	Wed, 18 Sep 2024 21:37:32 +0000 (23:37 +0200)
committer	Bruno Haible <bruno@clisp.org>
	Wed, 18 Sep 2024 21:37:32 +0000 (23:37 +0200)
NEWS		patch \| blob \| blame \| history
gettext-tools/src/x-php.c		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-php-1		patch \| blob \| blame \| history