}
-/* ======================== Reading of characters. ======================== */
+/* =================== Variables used by the extractor. =================== */
-/* The input file stream. */
-static FILE *fp;
+/* Type definitions needed for the variables. */
+enum token_type_ty
+{
+ token_type_eof,
+ token_type_lparen, /* ( */
+ token_type_rparen, /* ) */
+ token_type_comma, /* , */
+ token_type_lbracket, /* [ */
+ token_type_rbracket, /* ] */
+ token_type_dot, /* . */
+ token_type_operator1, /* * / % ++ -- */
+ token_type_operator2, /* + - ! ~ @ */
+ token_type_string_literal, /* "abc" */
+ token_type_symbol, /* symbol, number */
+ token_type_other /* misc. operator */
+};
+typedef enum token_type_ty token_type_ty;
-/* 1. line_number handling. */
+typedef struct token_ty token_ty;
+struct token_ty
+{
+ token_type_ty type;
+ char *string; /* for token_type_string_literal, token_type_symbol */
+ refcounted_string_list_ty *comment; /* for token_type_string_literal */
+ int line_number;
+};
+
+/* These variables are combined in a struct, so that we can invoke the
+ extractor in a reentrant way. */
+
+struct php_extractor
+{
+ /* Accumulator for the output. */
+ message_list_ty *mlp;
+
+ /* The input file stream, when reading from a file. */
+ FILE *fp;
+ /* The input area, when reading from a string. */
+ const char *input;
+ const char *input_end;
+
+ int line_number;
+
+ unsigned char phase1_pushback[2];
+ int phase1_pushback_length;
+
+#if 0
+ unsigned char phase2_pushback[1];
+ int phase2_pushback_length;
+#endif
+
+ /* For accumulating comments. */
+ char *buffer;
+ size_t bufmax;
+ size_t buflen;
+
+ /* These are for tracking whether comments count as immediately before
+ keyword. */
+ int last_comment_line;
+ int last_non_comment_line;
+
+ unsigned char phase3_pushback[1];
+ int phase3_pushback_length;
+
+ token_ty phase4_pushback[3];
+ int phase4_pushback_length;
-static unsigned char phase1_pushback[2];
-static int phase1_pushback_length;
+ token_type_ty phase5_last;
+
+ /* Maximum supported nesting depth. */
+ #define MAX_NESTING_DEPTH 1000
+
+ /* Current nesting depths. */
+ int paren_nesting_depth;
+ int bracket_nesting_depth;
+};
+
+static inline void
+php_extractor_init_rest (struct php_extractor *xp)
+{
+ xp->phase1_pushback_length = 0;
+#if 0
+ xp->phase2_pushback_length = 0;
+#endif
+
+ xp->buffer = NULL;
+ xp->bufmax = 0;
+ xp->buflen = 0;
+
+ xp->last_comment_line = -1;
+ xp->last_non_comment_line = -1;
+
+ xp->phase3_pushback_length = 0;
+ xp->phase4_pushback_length = 0;
+
+ xp->phase5_last = token_type_eof;
+
+ xp->paren_nesting_depth = 0;
+ xp->bracket_nesting_depth = 0;
+}
+
+/* Forward declarations. */
+static void extract_php_input (struct php_extractor *xp);
+
+
+/* ======================== Reading of characters. ======================== */
+
+/* 1. line_number handling. */
static int
-phase1_getc ()
+phase1_getc (struct php_extractor *xp)
{
int c;
- if (phase1_pushback_length)
- c = phase1_pushback[--phase1_pushback_length];
- else
+ if (xp->phase1_pushback_length)
+ c = xp->phase1_pushback[--(xp->phase1_pushback_length)];
+ else if (xp->fp != NULL)
{
- c = getc (fp);
+ c = getc (xp->fp);
if (c == EOF)
{
- if (ferror (fp))
+ if (ferror (xp->fp))
error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
real_file_name);
return EOF;
}
}
+ else
+ {
+ if (xp->input == xp->input_end)
+ return EOF;
+ c = *(xp->input++);
+ }
- if (c == '\n')
- line_number++;
+ if (xp->fp != NULL && c == '\n')
+ xp->line_number++;
return c;
}
/* Supports 2 characters of pushback. */
static void
-phase1_ungetc (int c)
+phase1_ungetc (struct php_extractor *xp, int c)
{
if (c != EOF)
{
if (c == '\n')
- --line_number;
+ --(xp->line_number);
- if (phase1_pushback_length == SIZEOF (phase1_pushback))
+ if (xp->phase1_pushback_length == SIZEOF (xp->phase1_pushback))
abort ();
- phase1_pushback[phase1_pushback_length++] = c;
+ xp->phase1_pushback[xp->phase1_pushback_length++] = c;
}
}
therefore don't contain translatable strings. */
static void
-skip_html ()
+skip_html (struct php_extractor *xp)
{
for (;;)
{
- int c = phase1_getc ();
+ int c = phase1_getc (xp);
if (c == EOF)
return;
if (c == '<')
{
- int c2 = phase1_getc ();
+ int c2 = phase1_getc (xp);
if (c2 == EOF)
break;
{
/* <?php is the normal way to enter PHP mode. <? and <?= are
recognized by PHP depending on a configuration setting. */
- int c3 = phase1_getc ();
+ int c3 = phase1_getc (xp);
if (c3 != '=')
- phase1_ungetc (c3);
+ phase1_ungetc (xp, c3);
return;
}
{
/* <% and <%= are recognized by PHP depending on a configuration
setting. */
- int c3 = phase1_getc ();
+ int c3 = phase1_getc (xp);
if (c3 != '=')
- phase1_ungetc (c3);
+ phase1_ungetc (xp, c3);
return;
}
if (c2 == '<')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
< script language = 'php' >
are always recognized. */
while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 's' && c2 != 'S')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'c' && c2 != 'C')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'r' && c2 != 'R')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'i' && c2 != 'I')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'p' && c2 != 'P')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 't' && c2 != 'T')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (!(c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r'))
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
do
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
if (c2 != 'l' && c2 != 'L')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'a' && c2 != 'A')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'n' && c2 != 'N')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'g' && c2 != 'G')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'u' && c2 != 'U')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'a' && c2 != 'A')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'g' && c2 != 'G')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'e' && c2 != 'E')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != '=')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == '"')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'p')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'h')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'p')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != '"')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
}
else if (c2 == '\'')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'p')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'h')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'p')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != '\'')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
}
{
if (c2 != 'p')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'h')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != 'p')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
}
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 != '>')
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
continue;
}
return;
#if 0
-static unsigned char phase2_pushback[1];
-static int phase2_pushback_length;
-
static int
-phase2_getc ()
+phase2_getc (struct php_extractor *xp)
{
int c;
- if (phase2_pushback_length)
- return phase2_pushback[--phase2_pushback_length];
+ if (xp->phase2_pushback_length)
+ return xp->phase2_pushback[--(xp->phase2_pushback_length)];
- c = phase1_getc ();
+ c = phase1_getc (xp);
switch (c)
{
case '?':
case '%':
{
- int c2 = phase1_getc ();
+ int c2 = phase1_getc (xp);
if (c2 == '>')
{
/* ?> and %> terminate PHP mode and switch back to HTML mode. */
skip_html ();
return ' ';
}
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
break;
case '<':
{
- int c2 = phase1_getc ();
+ int c2 = phase1_getc (xp);
/* < / script > terminates PHP mode and switches back to HTML mode. */
while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == '/')
{
do
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
if (c2 == 's' || c2 == 'S')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == 'c' || c2 == 'C')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == 'r' || c2 == 'R')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == 'i' || c2 == 'I')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == 'p' || c2 == 'P')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == 't' || c2 == 'T')
{
do
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
while (c2 == ' ' || c2 == '\t'
|| c2 == '\n' || c2 == '\r');
if (c2 == '>')
{
- skip_html ();
+ skip_html (xp);
return ' ';
}
}
}
}
}
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
break;
}
}
static void
-phase2_ungetc (int c)
+phase2_ungetc (struct php_extractor *xp, int c)
{
if (c != EOF)
{
- if (phase2_pushback_length == SIZEOF (phase2_pushback))
+ if (xp->phase2_pushback_length == SIZEOF (xp->phase2_pushback))
abort ();
- phase2_pushback[phase2_pushback_length++] = c;
+ xp->phase2_pushback[xp->phase2_pushback_length++] = c;
}
}
/* Accumulating comments. */
-static char *buffer;
-static size_t bufmax;
-static size_t buflen;
-
static inline void
-comment_start ()
+comment_start (struct php_extractor *xp)
{
- buflen = 0;
+ xp->buflen = 0;
}
static inline void
-comment_add (int c)
+comment_add (struct php_extractor *xp, int c)
{
- if (buflen >= bufmax)
+ if (xp->buflen >= xp->bufmax)
{
- bufmax = 2 * bufmax + 10;
- buffer = xrealloc (buffer, bufmax);
+ xp->bufmax = 2 * xp->bufmax + 10;
+ xp->buffer = xrealloc (xp->buffer, xp->bufmax);
}
- buffer[buflen++] = c;
+ xp->buffer[xp->buflen++] = c;
}
static inline void
-comment_line_end (size_t chars_to_remove)
+comment_line_end (struct php_extractor *xp, size_t chars_to_remove)
{
- buflen -= chars_to_remove;
- while (buflen >= 1
- && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
- --buflen;
- if (chars_to_remove == 0 && buflen >= bufmax)
+ xp->buflen -= chars_to_remove;
+ while (xp->buflen >= 1
+ && (xp->buffer[xp->buflen - 1] == ' '
+ || xp->buffer[xp->buflen - 1] == '\t'))
+ --(xp->buflen);
+ if (chars_to_remove == 0 && xp->buflen >= xp->bufmax)
{
- bufmax = 2 * bufmax + 10;
- buffer = xrealloc (buffer, bufmax);
+ xp->bufmax = 2 * xp->bufmax + 10;
+ xp->buffer = xrealloc (xp->buffer, xp->bufmax);
}
- buffer[buflen] = '\0';
- savable_comment_add (buffer);
+ xp->buffer[xp->buflen] = '\0';
+ savable_comment_add (xp->buffer);
}
space character. We need to remember the comment for later, because
it may be attached to a keyword string. */
-/* These are for tracking whether comments count as immediately before
- keyword. */
-static int last_comment_line;
-static int last_non_comment_line;
-
-static unsigned char phase3_pushback[1];
-static int phase3_pushback_length;
-
static int
-phase3_getc ()
+phase3_getc (struct php_extractor *xp)
{
int lineno;
int c;
- if (phase3_pushback_length)
- return phase3_pushback[--phase3_pushback_length];
+ if (xp->phase3_pushback_length)
+ return xp->phase3_pushback[--(xp->phase3_pushback_length)];
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '#')
{
/* sh comment. */
bool last_was_qmark = false;
- comment_start ();
- lineno = line_number;
+ comment_start (xp);
+ lineno = xp->line_number;
for (;;)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '\n' || c == EOF)
{
- comment_line_end (0);
+ comment_line_end (xp, 0);
break;
}
if (last_was_qmark && c == '>')
{
- comment_line_end (1);
- skip_html ();
+ comment_line_end (xp, 1);
+ skip_html (xp);
break;
}
/* We skip all leading white space, but not EOLs. */
- if (!(buflen == 0 && (c == ' ' || c == '\t')))
- comment_add (c);
+ if (!(xp->buflen == 0 && (c == ' ' || c == '\t')))
+ comment_add (xp, c);
last_was_qmark = (c == '?' || c == '%');
}
- last_comment_line = lineno;
+ xp->last_comment_line = lineno;
return '\n';
}
else if (c == '/')
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
switch (c)
{
default:
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
return '/';
case '*':
/* C comment. */
bool last_was_star;
- comment_start ();
- lineno = line_number;
+ comment_start (xp);
+ lineno = xp->line_number;
last_was_star = false;
for (;;)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == EOF)
break;
/* We skip all leading white space, but not EOLs. */
- if (buflen == 0 && (c == ' ' || c == '\t'))
+ if (xp->buflen == 0 && (c == ' ' || c == '\t'))
continue;
- comment_add (c);
+ comment_add (xp, c);
switch (c)
{
case '\n':
- comment_line_end (1);
- comment_start ();
- lineno = line_number;
+ comment_line_end (xp, 1);
+ comment_start (xp);
+ lineno = xp->line_number;
last_was_star = false;
continue;
case '/':
if (last_was_star)
{
- comment_line_end (2);
+ comment_line_end (xp, 2);
break;
}
FALLTHROUGH;
}
break;
}
- last_comment_line = lineno;
+ xp->last_comment_line = lineno;
return ' ';
}
/* C++ comment. */
bool last_was_qmark = false;
- comment_start ();
- lineno = line_number;
+ comment_start (xp);
+ lineno = xp->line_number;
for (;;)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '\n' || c == EOF)
{
- comment_line_end (0);
+ comment_line_end (xp, 0);
break;
}
if (last_was_qmark && c == '>')
{
- comment_line_end (1);
- skip_html ();
+ comment_line_end (xp, 1);
+ skip_html (xp);
break;
}
/* We skip all leading white space, but not EOLs. */
- if (!(buflen == 0 && (c == ' ' || c == '\t')))
- comment_add (c);
+ if (!(xp->buflen == 0 && (c == ' ' || c == '\t')))
+ comment_add (xp, c);
last_was_qmark = (c == '?' || c == '%');
}
- last_comment_line = lineno;
+ xp->last_comment_line = lineno;
return '\n';
}
}
#ifdef unused
static void
-phase3_ungetc (int c)
+phase3_ungetc (struct php_extractor *xp, int c)
{
if (c != EOF)
{
- if (phase3_pushback_length == SIZEOF (phase3_pushback))
+ if (xp->phase3_pushback_length == SIZEOF (xp->phase3_pushback))
abort ();
- phase3_pushback[phase3_pushback_length++] = c;
+ xp->phase3_pushback[xp->phase3_pushback_length++] = c;
}
}
#endif
/* ========================== Reading of tokens. ========================== */
-enum token_type_ty
-{
- token_type_eof,
- token_type_lparen, /* ( */
- token_type_rparen, /* ) */
- token_type_comma, /* , */
- token_type_lbracket, /* [ */
- token_type_rbracket, /* ] */
- token_type_dot, /* . */
- token_type_operator1, /* * / % ++ -- */
- token_type_operator2, /* + - ! ~ @ */
- token_type_string_literal, /* "abc" */
- token_type_symbol, /* symbol, number */
- token_type_other /* misc. operator */
-};
-typedef enum token_type_ty token_type_ty;
-
-typedef struct token_ty token_ty;
-struct token_ty
-{
- token_type_ty type;
- char *string; /* for token_type_string_literal, token_type_symbol */
- refcounted_string_list_ty *comment; /* for token_type_string_literal */
- int line_number;
-};
-
+/* 'struct token_ty' is defined above. */
/* Free the memory pointed to by a 'struct token_ty'. */
static inline void
/* 4. Combine characters into tokens. Discard whitespace. */
-static token_ty phase4_pushback[3];
-static int phase4_pushback_length;
-
static void
-phase4_get (token_ty *tp)
+phase4_get (struct php_extractor *xp, token_ty *tp)
{
static char *buffer;
static int bufmax;
int bufpos;
int c;
- if (phase4_pushback_length)
+ if (xp->phase4_pushback_length)
{
- *tp = phase4_pushback[--phase4_pushback_length];
+ *tp = xp->phase4_pushback[--(xp->phase4_pushback_length)];
return;
}
tp->string = NULL;
for (;;)
{
- tp->line_number = line_number;
- c = phase3_getc ();
+ tp->line_number = xp->line_number;
+ c = phase3_getc (xp);
switch (c)
{
case EOF:
return;
case '\n':
- if (last_non_comment_line > last_comment_line)
+ if (xp->last_non_comment_line > xp->last_comment_line)
savable_comment_reset ();
FALLTHROUGH;
case ' ':
continue;
}
- last_non_comment_line = tp->line_number;
+ xp->last_non_comment_line = tp->line_number;
switch (c)
{
buffer = xrealloc (buffer, bufmax);
}
buffer[bufpos++] = c;
- c = phase1_getc ();
+ c = phase1_getc (xp);
switch (c)
{
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
continue;
default:
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
break;
}
break;
bufpos = 0;
for (;;)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == EOF || c == '\'')
break;
if (c == '\\')
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c != '\\' && c != '\'')
{
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
c = '\\';
}
}
case '"':
/* Double-quoted string literal. */
tp->type = token_type_string_literal;
+ string_literal_continued:
bufpos = 0;
for (;;)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == EOF || c == '"')
break;
if (c == '$')
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
- || c == '_' || c == '{' || c >= 0x7f)
+ || c == '_' || c >= 0x7f)
{
/* String with variables. */
tp->type = token_type_other;
continue;
}
- phase1_ungetc (c);
+ if (c == '{')
+ /* String with embedded expressions. */
+ goto string_with_embedded_expressions;
+ phase1_ungetc (xp, c);
c = '$';
}
if (c == '{')
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '$')
- {
- /* String with expressions. */
- tp->type = token_type_other;
- continue;
- }
- phase1_ungetc (c);
+ /* String with embedded expressions. */
+ goto string_with_embedded_expressions;
+ phase1_ungetc (xp, c);
c = '{';
}
if (c == '\\')
{
int n, j;
- c = phase1_getc ();
+ c = phase1_getc (xp);
switch (c)
{
case '"':
for (j = 0; j < 3; ++j)
{
n = n * 8 + c - '0';
- c = phase1_getc ();
+ c = phase1_getc (xp);
switch (c)
{
default:
}
break;
}
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
c = n;
break;
n = 0;
for (j = 0; j < 2; ++j)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
switch (c)
{
case '0': case '1': case '2': case '3': case '4':
n = n * 16 + 10 + c - 'a';
break;
default:
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
c = 0;
break;
}
}
if (j == 0)
{
- phase1_ungetc ('x');
+ phase1_ungetc (xp, 'x');
c = '\\';
}
else
break;
default:
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
c = '\\';
break;
}
}
return;
+ string_with_embedded_expressions:
+ tp->type = token_type_other;
+ {
+ size_t nesting_stack_alloc = 10;
+ char *nesting_stack = malloc (nesting_stack_alloc);
+ size_t nesting_stack_depth = 0;
+ /* We just read a '{', so expect a matching '}'. */
+ nesting_stack[nesting_stack_depth++] = '}';
+
+ /* Find the extent of the expression. */
+ bufpos = 0;
+ for (;;)
+ {
+ c = phase1_getc (xp);
+ if (c == EOF)
+ break;
+ if (c == '"')
+ {
+ if (nesting_stack_depth > 0)
+ if_error (IF_SEVERITY_WARNING,
+ logical_file_name, xp->line_number, (size_t)(-1), false,
+ _("unterminated expression in string literal, expected a '%c'"),
+ nesting_stack[nesting_stack_depth - 1]);
+ break;
+ }
+ if (c == '{' || c == '[' || c == '(')
+ {
+ if (nesting_stack_depth >= nesting_stack_alloc)
+ {
+ nesting_stack_alloc = 2 * nesting_stack_alloc;
+ nesting_stack =
+ xrealloc (nesting_stack, nesting_stack_alloc);
+ }
+ nesting_stack[nesting_stack_depth++] =
+ (c == '{' ? '}' : c == '[' ? ']' : ')');
+ }
+ else if (c == '}' || c == ']' || c == ')')
+ {
+ if (nesting_stack_depth > 0
+ && c == nesting_stack[nesting_stack_depth - 1])
+ {
+ if (--nesting_stack_depth == 0)
+ break;
+ }
+ else
+ if_error (IF_SEVERITY_WARNING,
+ logical_file_name, xp->line_number, (size_t)(-1), false,
+ _("unterminated expression in string literal contains unbalanced '%c'"),
+ c);
+ }
+ if (bufpos >= bufmax)
+ {
+ bufmax = 2 * bufmax + 10;
+ buffer = xrealloc (buffer, bufmax);
+ }
+ buffer[bufpos++] = c;
+ }
+
+ /* Recursively extract messages from the expression. */
+ char *substring = xmalloc (bufpos);
+ memcpy (substring, buffer, bufpos);
+
+ struct php_extractor *rxp = XMALLOC (struct php_extractor);
+ rxp->mlp = xp->mlp;
+ rxp->fp = NULL;
+ rxp->input = substring;
+ rxp->input_end = substring + bufpos;
+ rxp->line_number = xp->line_number;
+ php_extractor_init_rest (rxp);
+
+ extract_php_input (rxp);
+
+ free (rxp);
+ free (substring);
+ free (nesting_stack);
+ }
+ goto string_literal_continued;
+
case '?':
case '%':
{
- int c2 = phase1_getc ();
+ int c2 = phase1_getc (xp);
if (c2 == '>')
{
/* ?> and %> terminate PHP mode and switch back to HTML
mode. */
- skip_html ();
+ skip_html (xp);
tp->type = token_type_other;
}
else
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
tp->type = (c == '%' ? token_type_operator1 : token_type_other);
}
return;
case '+':
case '-':
{
- int c2 = phase1_getc ();
+ int c2 = phase1_getc (xp);
if (c2 == c)
/* ++ or -- */
tp->type = token_type_operator1;
else
/* + or - */
{
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
tp->type = token_type_operator2;
}
return;
case '<':
{
- int c2 = phase1_getc ();
+ int c2 = phase1_getc (xp);
if (c2 == '<')
{
- int c3 = phase1_getc ();
+ int c3 = phase1_getc (xp);
if (c3 == '<')
{
int label_start = 0;
/* Start of here and now document.
Parse whitespace, then label, then newline. */
do
- c = phase3_getc ();
+ c = phase3_getc (xp);
while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
bufpos = 0;
buffer = xrealloc (buffer, bufmax);
}
buffer[bufpos++] = c;
- c = phase3_getc ();
+ c = phase3_getc (xp);
}
while (c != EOF && c != '\n' && c != '\r');
/* buffer[0..bufpos-1] now contains the label
/* Now skip the here document. */
for (;;)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == EOF)
break;
if (c == '\n' || c == '\r')
while (bufidx < bufpos)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == EOF)
break;
if (c != buffer[bufidx])
{
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
break;
}
bufidx++;
}
if (bufidx == bufpos)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c != ';')
- phase1_ungetc (c);
- c = phase1_getc ();
+ phase1_ungetc (xp, c);
+ c = phase1_getc (xp);
if (c == '\n' || c == '\r')
break;
}
tp->type = token_type_other;
return;
}
- phase1_ungetc (c3);
+ phase1_ungetc (xp, c3);
}
/* < / script > terminates PHP mode and switches back to HTML
mode. */
while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r')
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == '/')
{
do
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
while (c2 == ' ' || c2 == '\t' || c2 == '\n' || c2 == '\r');
if (c2 == 's' || c2 == 'S')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == 'c' || c2 == 'C')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == 'r' || c2 == 'R')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == 'i' || c2 == 'I')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == 'p' || c2 == 'P')
{
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == 't' || c2 == 'T')
{
do
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
while (c2 == ' ' || c2 == '\t'
|| c2 == '\n' || c2 == '\r');
if (c2 == '>')
{
- skip_html ();
+ skip_html (xp);
}
else
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
else
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
else
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
else
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
else
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
else
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
else
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
else
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
tp->type = token_type_other;
return;
/* Supports 3 tokens of pushback. */
static void
-phase4_unget (token_ty *tp)
+phase4_unget (struct php_extractor *xp, token_ty *tp)
{
if (tp->type != token_type_eof)
{
- if (phase4_pushback_length == SIZEOF (phase4_pushback))
+ if (xp->phase4_pushback_length == SIZEOF (xp->phase4_pushback))
abort ();
- phase4_pushback[phase4_pushback_length++] = *tp;
+ xp->phase4_pushback[xp->phase4_pushback_length++] = *tp;
}
}
higher precedence as '.', such as a multiplicative or postincrement
expression). */
-static token_type_ty phase5_last;
-
static void
-x_php_lex (token_ty *tp)
+x_php_lex (struct php_extractor *xp, token_ty *tp)
{
- phase4_get (tp);
+ phase4_get (xp, tp);
if (tp->type == token_type_string_literal
- && !(phase5_last == token_type_dot
- || phase5_last == token_type_operator1
- || phase5_last == token_type_operator2
- || phase5_last == token_type_rparen))
+ && !(xp->phase5_last == token_type_dot
+ || xp->phase5_last == token_type_operator1
+ || xp->phase5_last == token_type_operator2
+ || xp->phase5_last == token_type_rparen))
{
char *sum = tp->string;
size_t sum_len = strlen (sum);
{
token_ty token2;
- phase4_get (&token2);
+ phase4_get (xp, &token2);
if (token2.type == token_type_dot)
{
token_ty token3;
- phase4_get (&token3);
+ phase4_get (xp, &token3);
if (token3.type == token_type_string_literal)
{
token_ty token_after;
- phase4_get (&token_after);
+ phase4_get (xp, &token_after);
if (token_after.type != token_type_operator1)
{
char *addend = token3.string;
memcpy (sum + sum_len, addend, addend_len + 1);
sum_len += addend_len;
- phase4_unget (&token_after);
+ phase4_unget (xp, &token_after);
free_token (&token3);
free_token (&token2);
continue;
}
- phase4_unget (&token_after);
+ phase4_unget (xp, &token_after);
}
- phase4_unget (&token3);
+ phase4_unget (xp, &token3);
}
- phase4_unget (&token2);
+ phase4_unget (xp, &token2);
break;
}
tp->string = sum;
}
- phase5_last = tp->type;
+ xp->phase5_last = tp->type;
}
static flag_context_list_table_ty *flag_context_list_table;
-/* Maximum supported nesting depth. */
-#define MAX_NESTING_DEPTH 1000
-
-/* Current nesting depths. */
-static int paren_nesting_depth;
-static int bracket_nesting_depth;
-
-
/* The file is broken into tokens. Scan the token stream, looking for
a keyword, followed by a left paren, followed by a string. When we
see this sequence, we have something to remember. We assume we are
/* Extract messages until the next balanced closing parenthesis or bracket.
- Extracted messages are added to MLP.
+ Extracted messages are added to XP->MLP.
DELIM can be either token_type_rparen or token_type_rbracket, or
token_type_eof to accept both.
Return true upon eof, false upon closing parenthesis or bracket. */
static bool
-extract_balanced (message_list_ty *mlp,
+extract_balanced (struct php_extractor *xp,
token_type_ty delim,
flag_region_ty *outer_region,
flag_context_list_iterator_ty context_iter,
{
token_ty token;
- x_php_lex (&token);
+ x_php_lex (xp, &token);
switch (token.type)
{
case token_type_symbol:
continue;
case token_type_lparen:
- if (++paren_nesting_depth > MAX_NESTING_DEPTH)
+ if (++(xp->paren_nesting_depth) > MAX_NESTING_DEPTH)
if_error (IF_SEVERITY_FATAL_ERROR,
- logical_file_name, line_number, (size_t)(-1), false,
+ logical_file_name, xp->line_number, (size_t)(-1), false,
_("too many open parentheses"));
- if (extract_balanced (mlp, token_type_rparen,
+ if (extract_balanced (xp, token_type_rparen,
inner_region, next_context_iter,
- arglist_parser_alloc (mlp,
+ arglist_parser_alloc (xp->mlp,
state ? next_shapes : NULL)))
{
arglist_parser_done (argparser, arg);
unref_region (inner_region);
return true;
}
- paren_nesting_depth--;
+ xp->paren_nesting_depth--;
next_context_iter = null_context_list_iterator;
state = 0;
continue;
continue;
case token_type_lbracket:
- if (++bracket_nesting_depth > MAX_NESTING_DEPTH)
+ if (++(xp->bracket_nesting_depth) > MAX_NESTING_DEPTH)
if_error (IF_SEVERITY_FATAL_ERROR,
- logical_file_name, line_number, (size_t)(-1), false,
+ logical_file_name, xp->line_number, (size_t)(-1), false,
_("too many open brackets"));
- if (extract_balanced (mlp, token_type_rbracket,
+ if (extract_balanced (xp, token_type_rbracket,
null_context_region (),
null_context_list_iterator,
- arglist_parser_alloc (mlp, NULL)))
+ arglist_parser_alloc (xp->mlp, NULL)))
{
arglist_parser_done (argparser, arg);
unref_region (inner_region);
return true;
}
- bracket_nesting_depth--;
+ xp->bracket_nesting_depth--;
next_context_iter = null_context_list_iterator;
state = 0;
continue;
pos.line_number = token.line_number;
if (extract_all)
- remember_a_message (mlp, NULL, token.string, false, false,
+ remember_a_message (xp->mlp, NULL, token.string, false, false,
inner_region, &pos,
NULL, token.comment, false);
else
}
+static void
+extract_php_input (struct php_extractor *xp)
+{
+ /* Eat tokens until eof is seen. When extract_balanced returns
+ due to an unbalanced closing parenthesis, just restart it. */
+ while (!extract_balanced (xp, token_type_eof,
+ null_context_region (), null_context_list_iterator,
+ arglist_parser_alloc (xp->mlp, NULL)))
+ ;
+}
+
+
void
extract_php (FILE *f,
const char *real_filename, const char *logical_filename,
flag_context_list_table_ty *flag_table,
msgdomain_list_ty *mdlp)
{
- message_list_ty *mlp = mdlp->item[0]->messages;
-
- fp = f;
- real_file_name = real_filename;
- logical_file_name = xstrdup (logical_filename);
- line_number = 1;
-
- phase1_pushback_length = 0;
-#if 0
- phase2_pushback_length = 0;
-#endif
-
- last_comment_line = -1;
- last_non_comment_line = -1;
-
- phase3_pushback_length = 0;
- phase4_pushback_length = 0;
-
- phase5_last = token_type_eof;
-
flag_context_list_table = flag_table;
- paren_nesting_depth = 0;
- bracket_nesting_depth = 0;
init_keywords ();
+ struct php_extractor *xp = XMALLOC (struct php_extractor);
+
+ xp->mlp = mdlp->item[0]->messages;
+ xp->fp = f;
+ xp->input = NULL;
+ xp->input_end = NULL;
+ real_file_name = real_filename;
+ logical_file_name = xstrdup (logical_filename);
+ xp->line_number = 1;
+ php_extractor_init_rest (xp);
+
/* Initial mode is HTML mode, not PHP mode. */
- skip_html ();
+ skip_html (xp);
- /* Eat tokens until eof is seen. When extract_balanced returns
- due to an unbalanced closing parenthesis, just restart it. */
- while (!extract_balanced (mlp, token_type_eof,
- null_context_region (), null_context_list_iterator,
- arglist_parser_alloc (mlp, NULL)))
- ;
+ extract_php_input (xp);
/* Close scanner. */
- fp = NULL;
+ free (xp);
real_file_name = NULL;
logical_file_name = NULL;
- line_number = 0;
}