}
-/* ======================== Reading of characters. ======================== */
+/* =================== Variables used by the extractor. =================== */
-/* The input file stream. */
-static FILE *fp;
+/* Type definitions needed for the variables. */
-/* The current line buffer. */
-static char *linebuf;
-/* The size of the input buffer. */
-static size_t linebuf_size;
+enum token_type_ty
+{
+ token_type_eof,
+ token_type_lparen, /* ( */
+ token_type_rparen, /* ) */
+ token_type_comma, /* , */
+ token_type_fat_comma, /* => */
+ token_type_dereference, /* -> */
+ token_type_semicolon, /* ; */
+ token_type_lbrace, /* { */
+ token_type_rbrace, /* } */
+ token_type_lbracket, /* [ */
+ token_type_rbracket, /* ] */
+ token_type_string, /* quote-like */
+ token_type_string_interpol, /* quote-like with embedded expressions */
+ token_type_number, /* starting with a digit or dot */
+ token_type_named_op, /* if, unless, while, ... */
+ token_type_variable, /* $... */
+ token_type_object, /* A dereferenced variable, maybe a blessed
+ object. */
+ token_type_symbol, /* symbol, number */
+ token_type_regex_op, /* s, tr, y, m. */
+ token_type_dot, /* . */
+ token_type_other, /* regexp, misc. operator */
+ /* The following are not really token types, but variants used by
+ the parser. */
+ token_type_keyword_symbol, /* keyword symbol */
+ token_type_r_any /* rparen rbrace rbracket */
+};
+typedef enum token_type_ty token_type_ty;
-/* The size of the current line. */
-static int linesize;
+typedef struct token_ty token_ty;
+
+typedef struct token_stack_ty token_stack_ty;
+struct token_stack_ty
+{
+ token_ty **items;
+ size_t nitems;
+ size_t nitems_max;
+};
+
+
+/* These variables are combined in a struct, so that we can invoke the
+ extractor in a reentrant way. */
+
+struct perl_extractor
+{
+ /* Accumulator for the output. */
+ message_list_ty *mlp;
+
+ /* The input file stream, when reading from a file. */
+ FILE *fp;
+ /* The input area, when reading from a string. */
+ const char *input;
+ const char *input_end;
+
+ int line_number;
+
+ /* The current line buffer. */
+ char *linebuf;
+ /* The size of the input buffer. */
+ size_t linebuf_size;
+
+ /* The size of the current line. */
+ int linesize;
+
+ /* The position in the current line. */
+ int linepos;
+
+ /* Number of lines eaten for here documents. */
+ int eaten_here;
+
+ /* Paranoia: EOF marker for __END__ or __DATA__. */
+ bool end_of_file;
+
+ /* These are for tracking whether comments count as immediately before
+ keyword. */
+ int last_comment_line;
+ int last_non_comment_line;
+
+ /* Maximum supported nesting depth. */
+ #define MAX_NESTING_DEPTH 1000
-/* The position in the current line. */
-static int linepos;
+ /* Current nesting depth. */
+ int nesting_depth;
+
+ /* Last token type seen in the stream. Important for the interpretation
+ of slash and question mark. */
+ token_type_ty last_token_type;
+
+ /* A token stack used as a lookahead buffer. */
+ struct token_stack_ty token_stack;
+};
+
+static inline void
+perl_extractor_init_rest (struct perl_extractor *xp)
+{
+ xp->line_number = 0;
+ xp->linebuf = NULL;
+ xp->linebuf_size = 0;
+ xp->linesize = 0;
+ xp->linepos = 0;
+ xp->eaten_here = 0;
+ xp->end_of_file = false;
+ xp->last_comment_line = -1;
+ xp->last_non_comment_line = -1;
+ xp->nesting_depth = 0;
+ /* Safe assumption. */
+ xp->last_token_type = token_type_semicolon;
+ xp->token_stack.items = NULL;
+ xp->token_stack.nitems = 0;
+ xp->token_stack.nitems_max = 0;
+};
-/* Number of lines eaten for here documents. */
-static int eaten_here;
-/* Paranoia: EOF marker for __END__ or __DATA__. */
-static bool end_of_file;
+/* ======================== Reading of characters. ======================== */
/* 1. line_number handling. */
/* Returns the next character from the input stream or EOF. */
static int
-phase1_getc ()
+phase1_getc (struct perl_extractor *xp)
{
- line_number += eaten_here;
- eaten_here = 0;
+ xp->line_number += xp->eaten_here;
+ xp->eaten_here = 0;
- if (end_of_file)
+ if (xp->end_of_file)
return EOF;
- if (linepos >= linesize)
+ if (xp->fp != NULL)
{
- linesize = getline (&linebuf, &linebuf_size, fp);
-
- if (linesize < 0)
+ if (xp->linepos >= xp->linesize)
{
- if (ferror (fp))
- error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
- real_file_name);
- end_of_file = true;
- return EOF;
- }
+ xp->linesize = getline (&xp->linebuf, &xp->linebuf_size, xp->fp);
+
+ if (xp->linesize < 0)
+ {
+ if (ferror (xp->fp))
+ error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+ real_file_name);
+ xp->end_of_file = true;
+ return EOF;
+ }
- linepos = 0;
- ++line_number;
+ xp->linepos = 0;
+ ++(xp->line_number);
- /* Undosify. This is important for catching the end of <<EOF and
- <<'EOF'. We could rely on stdio doing this for us but
- it is not uncommon to to come across Perl scripts with CRLF
- newline conventions on systems that do not follow this
- convention. */
- if (linesize >= 2 && linebuf[linesize - 1] == '\n'
- && linebuf[linesize - 2] == '\r')
+ /* Undosify. This is important for catching the end of <<EOF and
+ <<'EOF'. We could rely on stdio doing this for us but
+ it is not uncommon to to come across Perl scripts with CRLF
+ newline conventions on systems that do not follow this
+ convention. */
+ if (xp->linesize >= 2 && xp->linebuf[xp->linesize - 1] == '\n'
+ && xp->linebuf[xp->linesize - 2] == '\r')
+ {
+ xp->linebuf[xp->linesize - 2] = '\n';
+ xp->linebuf[xp->linesize - 1] = '\0';
+ --(xp->linesize);
+ }
+ }
+ }
+ else
+ {
+ if (xp->linebuf == NULL)
{
- linebuf[linesize - 2] = '\n';
- linebuf[linesize - 1] = '\0';
- --linesize;
+ xp->linebuf = xp->input;
+ xp->linesize = xp->input_end - xp->input;
+ xp->linepos = 0;
+ }
+ if (xp->linepos >= xp->linesize)
+ {
+ xp->end_of_file = true;
+ return EOF;
}
}
- return linebuf[linepos++];
+ return xp->linebuf[xp->linepos++];
}
/* Supports only one pushback character. */
static void
-phase1_ungetc (int c)
+phase1_ungetc (struct perl_extractor *xp, int c)
{
if (c != EOF)
{
- if (linepos == 0)
+ if (xp->linepos == 0)
/* Attempt to ungetc across line boundary. Shouldn't happen.
No two phase1_ungetc calls are permitted in a row. */
abort ();
- --linepos;
+ --(xp->linepos);
}
}
encoded as well. */
static char *
-get_here_document (const char *delimiter)
+get_here_document (struct perl_extractor *xp, const char *delimiter)
{
/* Accumulator for the entire here document, including a NUL byte
at the end. */
for (;;)
{
- int read_bytes = getline (&my_linebuf, &my_linebuf_size, fp);
+ int read_bytes = getline (&my_linebuf, &my_linebuf_size, xp->fp);
char *my_line_utf8;
bool chomp;
if (read_bytes < 0)
{
- if (ferror (fp))
+ if (ferror (xp->fp))
{
error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
real_file_name);
else
{
if_error (IF_SEVERITY_WARNING,
- real_file_name, line_number, (size_t)(-1), false,
+ real_file_name, xp->line_number, (size_t)(-1), false,
_("can't find string terminator \"%s\" anywhere before EOF"),
delimiter);
break;
}
}
- ++eaten_here;
+ ++(xp->eaten_here);
/* Convert to UTF-8. */
my_line_utf8 =
from_current_source_encoding (my_linebuf, lc_string, logical_file_name,
- line_number + eaten_here);
+ xp->line_number + xp->eaten_here);
if (my_line_utf8 != my_linebuf)
{
if (strlen (my_line_utf8) >= my_linebuf_size)
/* Skips pod sections. */
static void
-skip_pod ()
+skip_pod (struct perl_extractor *xp)
{
- line_number += eaten_here;
- eaten_here = 0;
- linepos = 0;
+ xp->line_number += xp->eaten_here;
+ xp->eaten_here = 0;
+ xp->linepos = 0;
for (;;)
{
- linesize = getline (&linebuf, &linebuf_size, fp);
+ xp->linesize = getline (&xp->linebuf, &xp->linebuf_size, xp->fp);
- if (linesize < 0)
+ if (xp->linesize < 0)
{
- if (ferror (fp))
+ if (ferror (xp->fp))
error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
real_file_name);
return;
}
- ++line_number;
+ ++(xp->line_number);
- if (strncmp ("=cut", linebuf, 4) == 0)
+ if (strncmp ("=cut", xp->linebuf, 4) == 0)
{
/* Force reading of a new line on next call to phase1_getc(). */
- linepos = linesize;
+ xp->linepos = xp->linesize;
return;
}
}
}
-/* These are for tracking whether comments count as immediately before
- keyword. */
-static int last_comment_line;
-static int last_non_comment_line;
-
-
/* 2. Replace each comment that is not inside a string literal or regular
expression with a newline character. We need to remember the comment
for later, because it may be attached to a keyword string. */
static int
-phase2_getc ()
+phase2_getc (struct perl_extractor *xp)
{
static char *buffer;
static size_t bufmax;
int c;
char *utf8_string;
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '#')
{
buflen = 0;
- lineno = line_number;
+ lineno = xp->line_number;
/* Skip leading whitespace. */
for (;;)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == EOF)
break;
if (c != ' ' && c != '\t' && c != '\r' && c != '\f')
{
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
break;
}
}
/* Accumulate the comment. */
for (;;)
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '\n' || c == EOF)
break;
if (buflen >= bufmax)
lineno);
/* Save it until we encounter the corresponding string. */
savable_comment_add (utf8_string);
- last_comment_line = lineno;
+ xp->last_comment_line = lineno;
}
return c;
}
/* Supports only one pushback character. */
static void
-phase2_ungetc (int c)
+phase2_ungetc (struct perl_extractor *xp, int c)
{
if (c != EOF)
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
}
/* Whitespace recognition. */
/* ========================== Reading of tokens. ========================== */
-enum token_type_ty
-{
- token_type_eof,
- token_type_lparen, /* ( */
- token_type_rparen, /* ) */
- token_type_comma, /* , */
- token_type_fat_comma, /* => */
- token_type_dereference, /* -> */
- token_type_semicolon, /* ; */
- token_type_lbrace, /* { */
- token_type_rbrace, /* } */
- token_type_lbracket, /* [ */
- token_type_rbracket, /* ] */
- token_type_string, /* quote-like */
- token_type_string_interpol, /* quote-like with embedded expressions */
- token_type_number, /* starting with a digit or dot */
- token_type_named_op, /* if, unless, while, ... */
- token_type_variable, /* $... */
- token_type_object, /* A dereferenced variable, maybe a blessed
- object. */
- token_type_symbol, /* symbol, number */
- token_type_regex_op, /* s, tr, y, m. */
- token_type_dot, /* . */
- token_type_other, /* regexp, misc. operator */
- /* The following are not really token types, but variants used by
- the parser. */
- token_type_keyword_symbol, /* keyword symbol */
- token_type_r_any /* rparen rbrace rbracket */
-};
-typedef enum token_type_ty token_type_ty;
+/* 'enum token_type_ty' is defined above. */
/* Subtypes for strings, important for interpolation. */
enum string_type_ty
symbol_type_function /* Function name after 'sub'. */
};
-typedef struct token_ty token_ty;
struct token_ty
{
token_type_ty type;
including the starting and the trailing delimiter, with backslashes
removed where appropriate. */
static string_desc_t
-extract_quotelike_pass1 (int delim)
+extract_quotelike_pass1 (struct perl_extractor *xp, int delim)
{
/* This function is called recursively. No way to allocate stuff
statically. Also alloca() is inappropriate due to limited stack
for (;;)
{
- int c = phase1_getc ();
+ int c = phase1_getc (xp);
/* This round can produce 1 or 2 bytes. Ensure room for 2 bytes. */
if (bufpos + 2 > bufmax)
if (nested && c == delim)
{
- string_desc_t inner = extract_quotelike_pass1 (delim);
+ string_desc_t inner = extract_quotelike_pass1 (xp, delim);
size_t len = string_desc_length (inner);
/* Ensure room for len + 1 bytes. */
}
else if (c == '\\')
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '\\')
{
buffer[bufpos++] = '\\';
else
{
buffer[bufpos++] = '\\';
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
}
}
else
/* Like extract_quotelike_pass1, but return the complete string in UTF-8
encoding. */
static string_desc_t
-extract_quotelike_pass1_utf8 (int delim)
+extract_quotelike_pass1_utf8 (struct perl_extractor *xp, int delim)
{
- string_desc_t string = extract_quotelike_pass1 (delim);
+ string_desc_t string = extract_quotelike_pass1 (xp, delim);
string_desc_t utf8_string =
string_desc_from_current_source_encoding (string, lc_string,
- logical_file_name, line_number);
+ logical_file_name,
+ xp->line_number);
if (string_desc_data (utf8_string) != string_desc_data (string))
string_desc_free (string);
return utf8_string;
static flag_context_list_table_ty *flag_context_list_table;
-/* Maximum supported nesting depth. */
-#define MAX_NESTING_DEPTH 1000
-
-/* Current nesting depth. */
-static int nesting_depth;
-
-
/* Forward declaration of local functions. */
-static void interpolate_keywords (message_list_ty *mlp, string_desc_t string,
- int lineno);
-static token_ty *x_perl_lex (message_list_ty *mlp);
-static void x_perl_unlex (token_ty *tp);
-static bool extract_balanced (message_list_ty *mlp,
+static void interpolate_keywords (struct perl_extractor *xp,
+ string_desc_t string, int lineno);
+static token_ty *x_perl_lex (struct perl_extractor *xp);
+static void x_perl_unlex (struct perl_extractor *xp, token_ty *tp);
+static bool extract_balanced (struct perl_extractor *xp,
token_type_ty delim, bool eat_delim,
bool semicolon_delim, bool eat_semicolon_delim,
bool comma_delim,
flag_region_ty *outer_region,
flag_context_list_iterator_ty context_iter,
int arg, struct arglist_parser *argparser);
+static void extract_perl_input (struct perl_extractor *xp);
/* Extract an unsigned hexadecimal number from STRING, considering at
section "Gory details of parsing quoted constructs" in perlop.pod.
Return the resulting token in *tp; tp->type == token_type_string. */
static void
-extract_quotelike (token_ty *tp, int delim)
+extract_quotelike (struct perl_extractor *xp, token_ty *tp, int delim)
{
- string_desc_t string = extract_quotelike_pass1_utf8 (delim);
+ string_desc_t string = extract_quotelike_pass1_utf8 (xp, delim);
size_t len = string_desc_length (string);
tp->type = token_type_string;
modifiers (left to the caller).
Return the resulting token in *tp; tp->type == token_type_regex_op. */
static void
-extract_triple_quotelike (message_list_ty *mlp, token_ty *tp, int delim,
+extract_triple_quotelike (struct perl_extractor *xp, token_ty *tp, int delim,
bool interpolate)
{
string_desc_t string;
tp->type = token_type_regex_op;
- string = extract_quotelike_pass1_utf8 (delim);
+ string = extract_quotelike_pass1_utf8 (xp, delim);
if (interpolate)
- interpolate_keywords (mlp, string, line_number);
+ interpolate_keywords (xp, string, xp->line_number);
string_desc_free (string);
if (delim == '(' || delim == '<' || delim == '{' || delim == '[')
{
/* The delimiter for the second string can be different, e.g.
s{SEARCH}{REPLACE} or s{SEARCH}/REPLACE/. See "man perlrequick". */
- delim = phase1_getc ();
+ delim = phase1_getc (xp);
while (is_whitespace (delim))
{
/* The hash-sign is not a valid delimiter after whitespace, ergo
use phase2_getc() and not phase1_getc() now. */
- delim = phase2_getc ();
+ delim = phase2_getc (xp);
}
}
- string = extract_quotelike_pass1_utf8 (delim);
+ string = extract_quotelike_pass1_utf8 (xp, delim);
if (interpolate)
- interpolate_keywords (mlp, string, line_number);
+ interpolate_keywords (xp, string, xp->line_number);
string_desc_free (string);
}
This function does not access tp->comment. */
/* FIXME: Currently may writes null-bytes into the string. */
static void
-extract_quotelike_pass3 (token_ty *tp)
+extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp)
{
static char *buffer;
static int bufmax = 0;
if (end == NULL)
{
if_error (IF_SEVERITY_WARNING,
- real_file_name, line_number, (size_t)(-1), false,
+ real_file_name, xp->line_number, (size_t)(-1), false,
_("missing right brace on \\x{HEXNUMBER}"));
++crs;
continue;
else if ((unsigned char) *crs >= 0x80)
{
if_error (IF_SEVERITY_WARNING,
- real_file_name, line_number, (size_t)(-1), false,
+ real_file_name, xp->line_number, (size_t)(-1), false,
_("unsupported interpolation (\"\\l\") of 8bit character \"%c\""),
*crs);
}
else if ((unsigned char) *crs >= 0x80)
{
if_error (IF_SEVERITY_WARNING,
- real_file_name, line_number, (size_t)(-1), false,
+ real_file_name, xp->line_number, (size_t)(-1), false,
_("unsupported interpolation (\"\\u\") of 8bit character \"%c\""),
*crs);
}
if (!backslashed && !extract_all && (*crs == '$' || *crs == '@'))
{
if_error (IF_SEVERITY_WARNING,
- real_file_name, line_number, (size_t)(-1), false,
+ real_file_name, xp->line_number, (size_t)(-1), false,
_("unsupported variable interpolation at \"%c\""), *crs);
tp->type = token_type_string_interpol;
++crs;
else if ((unsigned char) *crs >= 0x80)
{
if_error (IF_SEVERITY_WARNING,
- real_file_name, line_number, (size_t)(-1), false,
+ real_file_name, xp->line_number, (size_t)(-1), false,
_("unsupported interpolation (\"\\L\") of 8bit character \"%c\""),
*crs);
buffer[bufpos++] = *crs;
else if ((unsigned char) *crs >= 0x80)
{
if_error (IF_SEVERITY_WARNING,
- real_file_name, line_number, (size_t)(-1), false,
+ real_file_name, xp->line_number, (size_t)(-1), false,
_("unsupported interpolation (\"\\U\") of 8bit character \"%c\""),
*crs);
buffer[bufpos++] = *crs;
3) Parse possible following hash keys or array indexes.
*/
static void
-extract_variable (message_list_ty *mlp, token_ty *tp, int first)
+extract_variable (struct perl_extractor *xp, token_ty *tp, int first)
{
static char *buffer;
static int bufmax = 0;
#if DEBUG_PERL
fprintf (stderr, "%s:%d: extracting variable type '%c'\n",
- real_file_name, line_number, first);
+ real_file_name, xp->line_number, first);
#endif
/*
buffer = xrealloc (buffer, bufmax);
}
buffer[bufpos++] = c;
- c = phase1_getc ();
+ c = phase1_getc (xp);
}
if (c == EOF)
tp->string = xstrdup (buffer);
#if DEBUG_PERL
fprintf (stderr, "%s:%d: is PID ($$)\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
return;
}
*/
#if DEBUG_PERL
fprintf (stderr, "%s:%d: braced {variable_name}\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
- if (extract_balanced (mlp,
+ if (extract_balanced (xp,
token_type_rbrace, true,
false, false, false,
null_context_region (), null_context_list_iterator,
- 1, arglist_parser_alloc (mlp, NULL)))
+ 1, arglist_parser_alloc (xp->mlp, NULL)))
{
tp->type = token_type_eof;
return;
buffer = xrealloc (buffer, bufmax);
}
buffer[bufpos++] = c;
- c = phase1_getc ();
+ c = phase1_getc (xp);
}
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
}
}
/* Probably some strange Perl variable like $`. */
if (varbody_length == 0)
{
- int c = phase1_getc ();
+ int c = phase1_getc (xp);
if (c == EOF || is_whitespace (c))
- phase1_ungetc (c); /* Loser. */
+ phase1_ungetc (xp, c); /* Loser. */
else
{
if (bufpos >= bufmax)
#if DEBUG_PERL
fprintf (stderr, "%s:%d: complete variable name: %s\n",
- real_file_name, line_number, tp->string);
+ real_file_name, xp->line_number, tp->string);
#endif
/*
int c;
do
- c = phase2_getc ();
+ c = phase2_getc (xp);
while (is_whitespace (c));
if (c == '-')
{
- int c2 = phase1_getc ();
+ int c2 = phase1_getc (xp);
if (c2 == '>')
{
is_dereference = true;
do
- c = phase2_getc ();
+ c = phase2_getc (xp);
while (is_whitespace (c));
}
else if (c2 != '\n')
special character recognized after a minus is greater-than
for dereference. However, the sequence "-\n>" that we
treat incorrectly here, is a syntax error. */
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
}
tp->type = token_type_object;
#if DEBUG_PERL
fprintf (stderr, "%s:%d: first keys preceded by \"->\"\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
}
else if (maybe_hash_value)
#if DEBUG_PERL
fprintf (stderr, "%s:%d: first keys preceded by '{'\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
if (hash_find_entry (&keywords, tp->string, strlen (tp->string),
flag_context_list_table_lookup (
flag_context_list_table,
tp->string, strlen (tp->string)));
- token_ty *t1 = x_perl_lex (mlp);
+ token_ty *t1 = x_perl_lex (xp);
#if DEBUG_PERL
fprintf (stderr, "%s:%d: extracting string key\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
if (t1->type == token_type_symbol
|| t1->type == token_type_named_op)
{
- token_ty *t2 = x_perl_lex (mlp);
+ token_ty *t2 = x_perl_lex (xp);
if (t2->type == token_type_rbrace)
{
flag_region_ty *region;
flag_context_list_iterator_advance (
&context_iter));
- pos.line_number = line_number;
+ pos.line_number = xp->line_number;
pos.file_name = logical_file_name;
- remember_a_message (mlp, NULL, xstrdup (t1->string),
+ remember_a_message (xp->mlp, NULL, xstrdup (t1->string),
true, false, region, &pos,
NULL, savable_comment, true);
free_token (t2);
}
else
{
- x_perl_unlex (t2);
+ x_perl_unlex (xp, t2);
}
}
else
{
- x_perl_unlex (t1);
- if (extract_balanced (mlp,
+ x_perl_unlex (xp, t1);
+ if (extract_balanced (xp,
token_type_rbrace, true,
false, false, false,
null_context_region (), context_iter,
- 1, arglist_parser_alloc (mlp, &shapes)))
+ 1, arglist_parser_alloc (xp->mlp, &shapes)))
return;
}
}
}
else
{
- phase2_ungetc (c);
+ phase2_ungetc (xp, c);
}
}
else
{
- phase2_ungetc (c);
+ phase2_ungetc (xp, c);
}
}
/* Now consume "->", "[...]", and "{...}". */
for (;;)
{
- int c = phase2_getc ();
+ int c = phase2_getc (xp);
int c2;
switch (c)
case '{':
#if DEBUG_PERL
fprintf (stderr, "%s:%d: extracting balanced '{' after varname\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
- extract_balanced (mlp,
+ extract_balanced (xp,
token_type_rbrace, true,
false, false, false,
null_context_region (), null_context_list_iterator,
- 1, arglist_parser_alloc (mlp, NULL));
+ 1, arglist_parser_alloc (xp->mlp, NULL));
break;
case '[':
#if DEBUG_PERL
fprintf (stderr, "%s:%d: extracting balanced '[' after varname\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
- extract_balanced (mlp,
+ extract_balanced (xp,
token_type_rbracket, true,
false, false, false,
null_context_region (), null_context_list_iterator,
- 1, arglist_parser_alloc (mlp, NULL));
+ 1, arglist_parser_alloc (xp->mlp, NULL));
break;
case '-':
- c2 = phase1_getc ();
+ c2 = phase1_getc (xp);
if (c2 == '>')
{
#if DEBUG_PERL
fprintf (stderr, "%s:%d: another \"->\" after varname\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
break;
}
special character recognized after a minus is greater-than
for dereference. However, the sequence "-\n>" that we
treat incorrectly here, is a syntax error. */
- phase1_ungetc (c2);
+ phase1_ungetc (xp, c2);
}
FALLTHROUGH;
default:
#if DEBUG_PERL
fprintf (stderr, "%s:%d: variable finished\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
- phase2_ungetc (c);
+ phase2_ungetc (xp, c);
return;
}
}
variables inside a double-quoted string that may interpolate to
some keyword hash (reference). The string is UTF-8 encoded. */
static void
-interpolate_keywords (message_list_ty *mlp, string_desc_t string, int lineno)
+interpolate_keywords (struct perl_extractor *xp, string_desc_t string,
+ int lineno)
{
static char *buffer;
static int bufmax = 0;
one_dollar,
two_dollars,
identifier,
+ seen_lbracket,
+ lbracket_dquote,
+ lbracket_squote,
minus,
wait_lbrace,
- wait_quote,
- dquote,
- squote,
- barekey,
+ seen_lbrace,
+ lbrace_dquote,
+ lbrace_squote,
+ lbrace_barekey,
wait_rbrace
} state;
token_ty token;
lex_pos_ty pos;
- if (++nesting_depth > MAX_NESTING_DEPTH)
+ if (++(xp->nesting_depth) > MAX_NESTING_DEPTH)
if_error (IF_SEVERITY_FATAL_ERROR,
- logical_file_name, line_number, (size_t)(-1), false,
+ logical_file_name, xp->line_number, (size_t)(-1), false,
_("too deeply nested expressions"));
/* States are:
*
- * initial: initial
- * one_dollar: dollar sign seen in state INITIAL
- * two_dollars: another dollar-sign has been seen in state ONE_DOLLAR
- * identifier: a valid identifier character has been seen in state
- * ONE_DOLLAR or TWO_DOLLARS
- * minus: a minus-sign has been seen in state IDENTIFIER
- * wait_lbrace: a greater-than has been seen in state MINUS
- * wait_quote: a left brace has been seen in state IDENTIFIER or in
- * state WAIT_LBRACE
- * dquote: a double-quote has been seen in state WAIT_QUOTE
- * squote: a single-quote has been seen in state WAIT_QUOTE
- * barekey: an bareword character has been seen in state WAIT_QUOTE
- * wait_rbrace: closing quote has been seen in state DQUOTE or SQUOTE
+ * initial: initial
+ * one_dollar: dollar sign seen in state INITIAL
+ * two_dollars: another dollar-sign has been seen in state ONE_DOLLAR
+ * identifier: a valid identifier character has been seen in state
+ * ONE_DOLLAR or TWO_DOLLARS
+ *
+ * seen_lbracket: a left bracket has been seen in state IDENTIFIER
+ * lbracket_dquote: a double-quote has been seen in state SEEN_LBRACKET
+ * lbracket_squote: a single-quote has been seen in state SEEN_LBRACKET
+ *
+ * minus: a minus-sign has been seen in state IDENTIFIER
+ * wait_lbrace: a greater-than has been seen in state MINUS
+ * seen_lbrace: a left brace has been seen in state IDENTIFIER or in
+ * state WAIT_LBRACE
+ * lbrace_dquote: a double-quote has been seen in state SEEN_LBRACE
+ * lbrace_squote: a single-quote has been seen in state SEEN_LBRACE
+ * lbrace_barekey: a bareword character has been seen in state SEEN_LBRACE
+ * wait_rbrace: closing quote has been seen in state LBRACE_DQUOTE or
+ * LBRACE_SQUOTE
*
- * In the states initial...identifier the context is null_context_region ();
+ * In the states initial...wait_rbracket the context is null_context_region ();
* in the states minus...wait_rbrace the context is the one suitable for the
* first argument of the last seen identifier.
*/
token.type = token_type_string;
token.sub_type = string_type_qq;
- token.line_number = line_number;
+ token.line_number = xp->line_number;
/* No need for token.comment = add_reference (savable_comment); here.
We can let token.comment uninitialized here, and use savable_comment
directly, because this function only parses the given string and does
case '\\':
if (index == length)
{
- nesting_depth--;
+ xp->nesting_depth--;
return;
}
c = string_desc_char_at (string, index++);
else
state = initial;
break;
+ case '[':
+ bufpos = 0;
+ state = seen_lbracket;
+ break;
case '{':
if (!maybe_hash_deref)
buffer[0] = '%';
inheriting_region (null_context_region (),
flag_context_list_iterator_advance (
&context_iter));
- state = wait_quote;
+ state = seen_lbrace;
}
else
state = initial;
break;
}
break;
+ case seen_lbracket:
+ switch (c)
+ {
+ case '\'':
+ buffer[bufpos++] = c;
+ state = lbracket_squote;
+ break;
+ case '"':
+ buffer[bufpos++] = c;
+ state = lbracket_dquote;
+ break;
+ case ']':
+ /* Recursively extract messages from the bracketed expression. */
+ {
+ char *substring = xmalloc (bufpos);
+ memcpy (substring, buffer, bufpos);
+
+ struct perl_extractor *rxp = XMALLOC (struct perl_extractor);
+ rxp->mlp = xp->mlp;
+ rxp->fp = NULL;
+ rxp->input = substring;
+ rxp->input_end = substring + bufpos;
+ rxp->line_number = xp->line_number;
+ perl_extractor_init_rest (rxp);
+
+ extract_perl_input (rxp);
+
+ free (rxp);
+ free (substring);
+ }
+ break;
+ default:
+ buffer[bufpos++] = c;
+ break;
+ }
+ break;
+ case lbracket_dquote:
+ switch (c)
+ {
+ case '"':
+ buffer[bufpos++] = c;
+ state = seen_lbracket;
+ break;
+ case '\\':
+ if (index == length)
+ {
+ region = null_context_region ();
+ state = initial;
+ }
+ else
+ {
+ c = string_desc_char_at (string, index++);
+ if (c == '\"')
+ {
+ buffer[bufpos++] = c;
+ }
+ else
+ {
+ buffer[bufpos++] = '\\';
+ buffer[bufpos++] = c;
+ }
+ }
+ break;
+ default:
+ buffer[bufpos++] = c;
+ break;
+ }
+ break;
+ case lbracket_squote:
+ switch (c)
+ {
+ case '\'':
+ buffer[bufpos++] = c;
+ state = seen_lbracket;
+ break;
+ case '\\':
+ if (index == length)
+ {
+ region = null_context_region ();
+ state = initial;
+ }
+ else
+ {
+ c = string_desc_char_at (string, index++);
+ if (c == '\'')
+ {
+ buffer[bufpos++] = c;
+ }
+ else
+ {
+ buffer[bufpos++] = '\\';
+ buffer[bufpos++] = c;
+ }
+ }
+ break;
+ default:
+ buffer[bufpos++] = c;
+ break;
+ }
+ break;
case minus:
switch (c)
{
switch (c)
{
case '{':
- state = wait_quote;
+ state = seen_lbrace;
break;
default:
region = null_context_region ();
break;
}
break;
- case wait_quote:
+ case seen_lbrace:
switch (c)
{
case_whitespace:
case '\'':
pos.line_number = lineno;
bufpos = 0;
- state = squote;
+ state = lbrace_squote;
break;
case '"':
pos.line_number = lineno;
bufpos = 0;
- state = dquote;
+ state = lbrace_dquote;
break;
default:
if (!c_isascii ((unsigned char) c)
pos.line_number = lineno;
bufpos = 0;
buffer[bufpos++] = c;
- state = barekey;
+ state = lbrace_barekey;
}
else
{
break;
}
break;
- case dquote:
+ case lbrace_dquote:
switch (c)
{
case '"':
/* The resulting string has to be interpolated twice. */
buffer[bufpos] = '\0';
token.string = xstrdup (buffer);
- extract_quotelike_pass3 (&token);
+ extract_quotelike_pass3 (xp, &token);
if (token.type == token_type_string)
{
/* The string can only shrink with interpolation (because
break;
}
break;
- case squote:
+ case lbrace_squote:
switch (c)
{
case '\'':
break;
}
break;
- case barekey:
+ case lbrace_barekey:
if (!c_isascii ((unsigned char) c)
|| c == '_' || (c >= '0' && c <= '9')
|| (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
{
buffer[bufpos] = '\0';
token.string = xstrdup (buffer);
- extract_quotelike_pass3 (&token);
+ extract_quotelike_pass3 (xp, &token);
if (token.type == token_type_string)
{
- remember_a_message (mlp, NULL, token.string, true, false,
- region, &pos, NULL, savable_comment,
- true);
+ remember_a_message (xp->mlp, NULL, token.string, true,
+ false, region, &pos, NULL,
+ savable_comment, true);
}
}
FALLTHROUGH;
}
}
- nesting_depth--;
+ xp->nesting_depth--;
return;
}
return retval;
}
-/* Last token type seen in the stream. Important for the interpretation
- of slash and question mark. */
-static token_type_ty last_token_type;
-
/* Combine characters into tokens. Discard whitespace. */
static void
-x_perl_prelex (message_list_ty *mlp, token_ty *tp)
+x_perl_prelex (struct perl_extractor *xp, token_ty *tp)
{
static char *buffer;
static int bufmax;
for (;;)
{
- c = phase2_getc ();
- tp->line_number = line_number;
- tp->last_type = last_token_type;
+ c = phase2_getc (xp);
+ tp->line_number = xp->line_number;
+ tp->last_type = xp->last_token_type;
switch (c)
{
return;
case '\n':
- if (last_non_comment_line > last_comment_line)
+ if (xp->last_non_comment_line > xp->last_comment_line)
savable_comment_reset ();
FALLTHROUGH;
case '\t':
case '$':
if (!extract_all)
{
- extract_variable (mlp, tp, c);
+ extract_variable (xp, tp, c);
return;
}
break;
}
- last_non_comment_line = tp->line_number;
+ xp->last_non_comment_line = tp->line_number;
switch (c)
{
case '.':
{
- int c2 = phase1_getc ();
- phase1_ungetc (c2);
+ int c2 = phase1_getc (xp);
+ phase1_ungetc (xp, c2);
if (c2 == '.')
{
tp->type = token_type_other;
buffer = xrealloc (buffer, bufmax);
}
buffer[bufpos++] = c;
- c = phase1_getc ();
+ c = phase1_getc (xp);
switch (c)
{
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
continue;
default:
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
break;
}
break;
if (strcmp (buffer, "__END__") == 0
|| strcmp (buffer, "__DATA__") == 0)
{
- end_of_file = true;
+ xp->end_of_file = true;
tp->type = token_type_eof;
return;
}
|| strcmp (buffer, "y") == 0
|| strcmp (buffer, "tr") == 0)
{
- int delim = phase1_getc ();
+ int delim = phase1_getc (xp);
while (is_whitespace (delim))
- delim = phase2_getc ();
+ delim = phase2_getc (xp);
if (delim == EOF)
{
|| (delim >= 'a' && delim <= 'z'))
{
/* False positive. */
- phase2_ungetc (delim);
+ phase2_ungetc (xp, delim);
tp->type = token_type_symbol;
tp->sub_type = symbol_type_none;
tp->string = xstrdup (buffer);
return;
}
- extract_triple_quotelike (mlp, tp, delim,
+ extract_triple_quotelike (xp, tp, delim,
buffer[0] == 's' && delim != '\'');
/* Eat the following modifiers. */
do
- c = phase1_getc ();
+ c = phase1_getc (xp);
while (c >= 'a' && c <= 'z');
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
return;
}
else if (strcmp (buffer, "m") == 0)
{
- int delim = phase1_getc ();
+ int delim = phase1_getc (xp);
while (is_whitespace (delim))
- delim = phase2_getc ();
+ delim = phase2_getc (xp);
if (delim == EOF)
{
|| (delim >= 'a' && delim <= 'z'))
{
/* False positive. */
- phase2_ungetc (delim);
+ phase2_ungetc (xp, delim);
tp->type = token_type_symbol;
tp->sub_type = symbol_type_none;
tp->string = xstrdup (buffer);
return;
}
- extract_quotelike (tp, delim);
+ extract_quotelike (xp, tp, delim);
if (delim != '\'')
- interpolate_keywords (mlp, string_desc_from_c (tp->string),
- line_number);
+ interpolate_keywords (xp, string_desc_from_c (tp->string),
+ xp->line_number);
free (tp->string);
drop_reference (tp->comment);
tp->type = token_type_regex_op;
/* Eat the following modifiers. */
do
- c = phase1_getc ();
+ c = phase1_getc (xp);
while (c >= 'a' && c <= 'z');
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
return;
}
else if (strcmp (buffer, "qq") == 0
a symbol. Rationale: Saying "qw (foo bar)" is the
same as "my @list = ('foo', 'bar'); @list;". */
- int delim = phase1_getc ();
+ int delim = phase1_getc (xp);
while (is_whitespace (delim))
- delim = phase2_getc ();
+ delim = phase2_getc (xp);
if (delim == EOF)
{
|| (delim >= 'a' && delim <= 'z'))
{
/* False positive. */
- phase2_ungetc (delim);
+ phase2_ungetc (xp, delim);
tp->type = token_type_symbol;
tp->sub_type = symbol_type_none;
tp->string = xstrdup (buffer);
return;
}
- extract_quotelike (tp, delim);
+ extract_quotelike (xp, tp, delim);
switch (buffer[1])
{
case 'x':
tp->type = token_type_string;
tp->sub_type = string_type_qq;
- interpolate_keywords (mlp, string_desc_from_c (tp->string),
- line_number);
+ interpolate_keywords (xp, string_desc_from_c (tp->string),
+ xp->line_number);
break;
case 'r':
drop_reference (tp->comment);
return;
case '"':
- extract_quotelike (tp, c);
+ extract_quotelike (xp, tp, c);
tp->sub_type = string_type_qq;
- interpolate_keywords (mlp, string_desc_from_c (tp->string),
- line_number);
+ interpolate_keywords (xp, string_desc_from_c (tp->string),
+ xp->line_number);
return;
case '`':
- extract_quotelike (tp, c);
+ extract_quotelike (xp, tp, c);
tp->sub_type = string_type_qq;
- interpolate_keywords (mlp, string_desc_from_c (tp->string),
- line_number);
+ interpolate_keywords (xp, string_desc_from_c (tp->string),
+ xp->line_number);
return;
case '\'':
- extract_quotelike (tp, c);
+ extract_quotelike (xp, tp, c);
tp->sub_type = string_type_q;
return;
case '=':
/* Check for fat comma. */
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '>')
{
tp->type = token_type_fat_comma;
return;
}
- else if (linepos == 2
- && (last_token_type == token_type_semicolon
- || last_token_type == token_type_rbrace)
+ else if (xp->linepos == 2
+ && (xp->last_token_type == token_type_semicolon
+ || xp->last_token_type == token_type_rbrace)
&& ((c >= 'A' && c <='Z')
|| (c >= 'a' && c <= 'z')))
{
#if DEBUG_PERL
fprintf (stderr, "%s:%d: start pod section\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
- skip_pod ();
+ skip_pod (xp);
#if DEBUG_PERL
fprintf (stderr, "%s:%d: end pod section\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
continue;
}
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
tp->type = token_type_other;
return;
case '<':
/* Check for <<EOF and friends. */
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '<')
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '\'')
{
char *string;
- extract_quotelike (tp, c);
- string = get_here_document (tp->string);
+ extract_quotelike (xp, tp, c);
+ string = get_here_document (xp, tp->string);
free (tp->string);
tp->string = string;
tp->type = token_type_string;
tp->sub_type = string_type_verbatim;
- tp->line_number = line_number + 1;
+ tp->line_number = xp->line_number + 1;
return;
}
else if (c == '"')
{
char *string;
- extract_quotelike (tp, c);
- string = get_here_document (tp->string);
+ extract_quotelike (xp, tp, c);
+ string = get_here_document (xp, tp->string);
free (tp->string);
tp->string = string;
tp->type = token_type_string;
tp->sub_type = string_type_qq;
- tp->line_number = line_number + 1;
- interpolate_keywords (mlp, string_desc_from_c (tp->string),
+ tp->line_number = xp->line_number + 1;
+ interpolate_keywords (xp, string_desc_from_c (tp->string),
tp->line_number);
return;
}
buffer = xrealloc (buffer, bufmax);
}
buffer[bufpos++] = c;
- c = phase1_getc ();
+ c = phase1_getc (xp);
}
if (c == EOF)
{
else
{
char *string;
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
if (bufpos >= bufmax)
{
bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
buffer[bufpos++] = '\0';
- string = get_here_document (buffer);
+ string = get_here_document (xp, buffer);
tp->string = string;
tp->type = token_type_string;
tp->sub_type = string_type_qq;
tp->comment = add_reference (savable_comment);
- tp->line_number = line_number + 1;
- interpolate_keywords (mlp, string_desc_from_c (tp->string),
+ tp->line_number = xp->line_number + 1;
+ interpolate_keywords (xp, string_desc_from_c (tp->string),
tp->line_number);
return;
}
}
else
{
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
tp->type = token_type_other;
}
return; /* End of case '>'. */
case '-':
/* Check for dereferencing operator. */
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c == '>')
{
tp->type = token_type_dereference;
tp->type = token_type_other;
return;
}
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
tp->type = token_type_other;
return;
case '?':
if (prefer_regexp_over_division (tp->last_type))
{
- extract_quotelike (tp, c);
- interpolate_keywords (mlp, string_desc_from_c (tp->string),
- line_number);
+ extract_quotelike (xp, tp, c);
+ interpolate_keywords (xp, string_desc_from_c (tp->string),
+ xp->line_number);
free (tp->string);
drop_reference (tp->comment);
tp->type = token_type_regex_op;
/* Eat the following modifiers. */
do
- c = phase1_getc ();
+ c = phase1_getc (xp);
while (c >= 'a' && c <= 'z');
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
return;
}
/* Recognize operator '//'. */
if (c == '/')
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
if (c != '/')
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
}
FALLTHROUGH;
}
-/* A token stack used as a lookahead buffer. */
-
-typedef struct token_stack_ty token_stack_ty;
-struct token_stack_ty
-{
- token_ty **items;
- size_t nitems;
- size_t nitems_max;
-};
-
-static struct token_stack_ty token_stack;
-
#if DEBUG_PERL
/* Dumps all resources allocated by stack STACK. */
static int
static token_ty *
-x_perl_lex (message_list_ty *mlp)
+x_perl_lex (struct perl_extractor *xp)
{
- if (++nesting_depth > MAX_NESTING_DEPTH)
+ if (++(xp->nesting_depth) > MAX_NESTING_DEPTH)
if_error (IF_SEVERITY_FATAL_ERROR,
- logical_file_name, line_number, (size_t)(-1), false,
+ logical_file_name, xp->line_number, (size_t)(-1), false,
_("too deeply nested expressions"));
#if DEBUG_PERL
- int dummy = token_stack_dump (&token_stack);
+ int dummy = token_stack_dump (&xp->token_stack);
#endif
- token_ty *tp = token_stack_pop (&token_stack);
+ token_ty *tp = token_stack_pop (&xp->token_stack);
if (!tp)
{
tp = XMALLOC (token_ty);
- x_perl_prelex (mlp, tp);
- tp->last_type = last_token_type;
- last_token_type = tp->type;
+ x_perl_prelex (xp, tp);
+ tp->last_type = xp->last_token_type;
+ xp->last_token_type = tp->type;
#if DEBUG_PERL
fprintf (stderr, "%s:%d: x_perl_prelex returned %s\n",
- real_file_name, line_number, token2string (tp));
+ real_file_name, xp->line_number, token2string (tp));
#endif
/* The interpretation of a slash or question mark after a function call
{
/* Class method call or chained method call (with at least
two arrow operators). */
- last_token_type = token_type_variable;
+ xp->last_token_type = token_type_variable;
}
else if (tp->last_type == token_type_object)
{
/* Instance method, not chained. */
- last_token_type = token_type_variable;
+ xp->last_token_type = token_type_variable;
}
else if (strcmp (tp->string, "wantarray") == 0
|| strcmp (tp->string, "fork") == 0
|| strcmp (tp->string, "wantarray") == 0)
{
/* A Perl built-in function that does not accept arguments. */
- last_token_type = token_type_variable;
+ xp->last_token_type = token_type_variable;
}
}
}
else
{
fprintf (stderr, "%s:%d: %s recycled from stack\n",
- real_file_name, line_number, token2string (tp));
+ real_file_name, xp->line_number, token2string (tp));
}
#endif
must not be interpreted as the beginning of a variable ')'. */
if (tp->type == token_type_symbol || tp->type == token_type_named_op)
{
- token_ty *next = token_stack_peek (&token_stack);
+ token_ty *next = token_stack_peek (&xp->token_stack);
if (!next)
{
#if DEBUG_PERL
fprintf (stderr, "%s:%d: pre-fetching next token\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
- next = x_perl_lex (mlp);
- x_perl_unlex (next);
+ next = x_perl_lex (xp);
+ x_perl_unlex (xp, next);
#if DEBUG_PERL
fprintf (stderr, "%s:%d: unshifted next token\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
}
#if DEBUG_PERL
fprintf (stderr, "%s:%d: next token is %s\n",
- real_file_name, line_number, token2string (next));
+ real_file_name, xp->line_number, token2string (next));
#endif
if (next->type == token_type_fat_comma)
tp->comment = add_reference (savable_comment);
#if DEBUG_PERL
fprintf (stderr, "%s:%d: token %s mutated to token_type_string\n",
- real_file_name, line_number, token2string (tp));
+ real_file_name, xp->line_number, token2string (tp));
#endif
}
else if (tp->type == token_type_symbol && tp->sub_type == symbol_type_sub
possible prototype information. */
#if DEBUG_PERL
fprintf (stderr, "%s:%d: subroutine declaration/definition '%s'\n",
- real_file_name, line_number, next->string);
+ real_file_name, xp->line_number, next->string);
#endif
next->sub_type = symbol_type_function;
}
#if DEBUG_PERL
fprintf (stderr, "%s:%d: consuming prototype information\n",
- real_file_name, line_number);
+ real_file_name, xp->line_number);
#endif
do
{
- c = phase1_getc ();
+ c = phase1_getc (xp);
#if DEBUG_PERL
fprintf (stderr, " consuming character '%c'\n", c);
#endif
}
while (c != EOF && c != ')');
- phase1_ungetc (c);
+ phase1_ungetc (xp, c);
}
}
- nesting_depth--;
+ xp->nesting_depth--;
return tp;
}
static void
-x_perl_unlex (token_ty *tp)
+x_perl_unlex (struct perl_extractor *xp, token_ty *tp)
{
- token_stack_push (&token_stack, tp);
+ token_stack_push (&xp->token_stack, tp);
}
token_type_string_interpol, it returns NULL instead. */
static char *
-collect_message (message_list_ty *mlp, token_ty *tp)
+collect_message (struct perl_extractor *xp, token_ty *tp)
{
char *string;
size_t len;
- extract_quotelike_pass3 (tp);
+ extract_quotelike_pass3 (xp, tp);
if (tp->type == token_type_string)
{
string = xstrdup (tp->string);
int c;
do
- c = phase2_getc ();
+ c = phase2_getc (xp);
while (is_whitespace (c));
if (c != '.')
{
- phase2_ungetc (c);
+ phase2_ungetc (xp, c);
return string;
}
do
- c = phase2_getc ();
+ c = phase2_getc (xp);
while (is_whitespace (c));
- phase2_ungetc (c);
+ phase2_ungetc (xp, c);
if (c == '"' || c == '\'' || c == '`'
|| ((c == '/' || c == '?')
&& prefer_regexp_over_division (tp->last_type))
|| c == 'q')
{
- token_ty *qstring = x_perl_lex (mlp);
+ token_ty *qstring = x_perl_lex (xp);
if (qstring->type != token_type_string)
{
/* assert (qstring->type == token_type_symbol) */
- x_perl_unlex (qstring);
+ x_perl_unlex (xp, qstring);
return string;
}
- extract_quotelike_pass3 (qstring);
+ extract_quotelike_pass3 (xp, qstring);
if (qstring->type == token_type_string)
{
if (string != NULL)
Returns true for EOF, false otherwise. */
static bool
-extract_balanced (message_list_ty *mlp,
+extract_balanced (struct perl_extractor *xp,
token_type_ty delim, bool eat_delim,
bool semicolon_delim, bool eat_semicolon_delim,
bool comma_delim,
++nesting_level;
#endif
- if (nesting_depth > MAX_NESTING_DEPTH)
+ if (xp->nesting_depth > MAX_NESTING_DEPTH)
if_error (IF_SEVERITY_FATAL_ERROR,
- logical_file_name, line_number, (size_t)(-1), false,
+ logical_file_name, xp->line_number, (size_t)(-1), false,
_("too deeply nested expressions"));
for (;;)
/* The current token. */
token_ty *tp;
- tp = x_perl_lex (mlp);
+ tp = x_perl_lex (xp);
if (first)
{
free_token (tp);
else
/* Preserve the delimiter for the caller. */
- x_perl_unlex (tp);
+ x_perl_unlex (xp, tp);
return false;
}
free_token (tp);
else
/* Preserve the semicolon for the caller. */
- x_perl_unlex (tp);
+ x_perl_unlex (xp, tp);
return false;
}
fprintf (stderr, "%s:%d: extract_balanced finished at comma (%d)\n",
logical_file_name, tp->line_number, --nesting_level);
#endif
- x_perl_unlex (tp);
+ x_perl_unlex (xp, tp);
return false;
}
/* An argument list starts, even though there is no '('. */
bool next_comma_delim;
- x_perl_unlex (tp);
+ x_perl_unlex (xp, tp);
if (next_shapes != NULL)
/* We know something about the function being called. Assume
best results. */
next_comma_delim = true;
- ++nesting_depth;
+ ++(xp->nesting_depth);
#if DEBUG_NESTING_DEPTH
- fprintf (stderr, "extract_balanced %d>> @%d\n", nesting_depth, line_number);
+ fprintf (stderr, "extract_balanced %d>> @%d\n", xp->nesting_depth, xp->line_number);
#endif
- if (extract_balanced (mlp,
+ if (extract_balanced (xp,
delim, false,
true, false, next_comma_delim,
inner_region, next_context_iter,
return true;
}
#if DEBUG_NESTING_DEPTH
- fprintf (stderr, "extract_balanced %d<< @%d\n", nesting_depth, line_number);
+ fprintf (stderr, "extract_balanced %d<< @%d\n", xp->nesting_depth, xp->line_number);
#endif
- nesting_depth--;
+ xp->nesting_depth--;
next_is_argument = false;
next_argparser = NULL;
(const struct callshapes *) keyword_value;
next_shapes = shapes;
- next_argparser = arglist_parser_alloc (mlp, shapes);
+ next_argparser = arglist_parser_alloc (xp->mlp, shapes);
}
else
{
next_shapes = NULL;
- next_argparser = arglist_parser_alloc (mlp, NULL);
+ next_argparser = arglist_parser_alloc (xp->mlp, NULL);
}
}
next_is_argument = true;
if (next_is_argument)
{
/* Parse the argument list of a function call. */
- ++nesting_depth;
+ ++(xp->nesting_depth);
#if DEBUG_NESTING_DEPTH
- fprintf (stderr, "extract_balanced %d>> @%d\n", nesting_depth, line_number);
+ fprintf (stderr, "extract_balanced %d>> @%d\n", xp->nesting_depth, xp->line_number);
#endif
- if (extract_balanced (mlp,
+ if (extract_balanced (xp,
token_type_rparen, true,
false, false, false,
inner_region, next_context_iter,
return true;
}
#if DEBUG_NESTING_DEPTH
- fprintf (stderr, "extract_balanced %d<< @%d\n", nesting_depth, line_number);
+ fprintf (stderr, "extract_balanced %d<< @%d\n", xp->nesting_depth, xp->line_number);
#endif
- nesting_depth--;
+ xp->nesting_depth--;
next_is_argument = false;
next_argparser = NULL;
}
else
{
/* Parse a parenthesized expression or comma expression. */
- ++nesting_depth;
+ ++(xp->nesting_depth);
#if DEBUG_NESTING_DEPTH
- fprintf (stderr, "extract_balanced %d>> @%d\n", nesting_depth, line_number);
+ fprintf (stderr, "extract_balanced %d>> @%d\n", xp->nesting_depth, xp->line_number);
#endif
- if (extract_balanced (mlp,
+ if (extract_balanced (xp,
token_type_rparen, true,
false, false, false,
inner_region, next_context_iter,
return true;
}
#if DEBUG_NESTING_DEPTH
- fprintf (stderr, "extract_balanced %d<< @%d\n", nesting_depth, line_number);
+ fprintf (stderr, "extract_balanced %d<< @%d\n", xp->nesting_depth, xp->line_number);
#endif
- nesting_depth--;
+ xp->nesting_depth--;
next_is_argument = false;
if (next_argparser != NULL)
free (next_argparser);
{
/* We have missed the argument. */
arglist_parser_done (argparser, arg);
- argparser = arglist_parser_alloc (mlp, NULL);
+ argparser = arglist_parser_alloc (xp->mlp, NULL);
arg = 0;
}
arg++;
if (extract_all)
{
- char *string = collect_message (mlp, tp);
+ char *string = collect_message (xp, tp);
if (string != NULL)
{
lex_pos_ty pos;
pos.file_name = logical_file_name;
pos.line_number = tp->line_number;
- remember_a_message (mlp, NULL, string, true, false,
+ remember_a_message (xp->mlp, NULL, string, true, false,
inner_region, &pos, NULL, tp->comment,
true);
}
if (must_collect)
{
- char *string = collect_message (mlp, tp);
+ char *string = collect_message (xp, tp);
if (string != NULL)
{
mixed_string_ty *ms =
if (arglist_parser_decidedp (argparser, arg))
{
arglist_parser_done (argparser, arg);
- argparser = arglist_parser_alloc (mlp, NULL);
+ argparser = arglist_parser_alloc (xp->mlp, NULL);
}
next_is_argument = false;
fprintf (stderr, "%s:%d: type lbrace (%d)\n",
logical_file_name, tp->line_number, nesting_level);
#endif
- ++nesting_depth;
+ ++(xp->nesting_depth);
#if DEBUG_NESTING_DEPTH
- fprintf (stderr, "extract_balanced %d>> @%d\n", nesting_depth, line_number);
+ fprintf (stderr, "extract_balanced %d>> @%d\n", xp->nesting_depth, xp->line_number);
#endif
- if (extract_balanced (mlp,
+ if (extract_balanced (xp,
token_type_rbrace, true,
false, false, false,
null_context_region (),
null_context_list_iterator,
- 1, arglist_parser_alloc (mlp, NULL)))
+ 1, arglist_parser_alloc (xp->mlp, NULL)))
{
arglist_parser_done (argparser, arg);
unref_region (inner_region);
return true;
}
#if DEBUG_NESTING_DEPTH
- fprintf (stderr, "extract_balanced %d<< @%d\n", nesting_depth, line_number);
+ fprintf (stderr, "extract_balanced %d<< @%d\n", xp->nesting_depth, xp->line_number);
#endif
- nesting_depth--;
+ xp->nesting_depth--;
next_is_argument = false;
if (next_argparser != NULL)
free (next_argparser);
fprintf (stderr, "%s:%d: type lbracket (%d)\n",
logical_file_name, tp->line_number, nesting_level);
#endif
- ++nesting_depth;
+ ++(xp->nesting_depth);
#if DEBUG_NESTING_DEPTH
- fprintf (stderr, "extract_balanced %d>> @%d\n", nesting_depth, line_number);
+ fprintf (stderr, "extract_balanced %d>> @%d\n", xp->nesting_depth, xp->line_number);
#endif
- if (extract_balanced (mlp,
+ if (extract_balanced (xp,
token_type_rbracket, true,
false, false, false,
null_context_region (),
null_context_list_iterator,
- 1, arglist_parser_alloc (mlp, NULL)))
+ 1, arglist_parser_alloc (xp->mlp, NULL)))
{
arglist_parser_done (argparser, arg);
unref_region (inner_region);
return true;
}
#if DEBUG_NESTING_DEPTH
- fprintf (stderr, "extract_balanced %d<< @%d\n", nesting_depth, line_number);
+ fprintf (stderr, "extract_balanced %d<< @%d\n", xp->nesting_depth, xp->line_number);
#endif
- nesting_depth--;
+ xp->nesting_depth--;
next_is_argument = false;
if (next_argparser != NULL)
free (next_argparser);
/* The ultimate sign. */
arglist_parser_done (argparser, arg);
- argparser = arglist_parser_alloc (mlp, NULL);
+ argparser = arglist_parser_alloc (xp->mlp, NULL);
/* FIXME: Instead of resetting outer_context here, it may be better
to recurse in the next_is_argument handling above, waiting for
}
}
+static void
+extract_perl_input (struct perl_extractor *xp)
+{
+ /* Eat tokens until eof is seen. When extract_balanced returns due to an
+ unbalanced closing paren / brace / bracket or due to a semicolon, just
+ restart it. */
+ while (!extract_balanced (xp,
+ token_type_r_any, true,
+ true, true, false,
+ null_context_region (), null_context_list_iterator,
+ 1, arglist_parser_alloc (xp->mlp, NULL)))
+ ;
+}
+
void
extract_perl (FILE *f, const char *real_filename, const char *logical_filename,
flag_context_list_table_ty *flag_table,
msgdomain_list_ty *mdlp)
{
- message_list_ty *mlp = mdlp->item[0]->messages;
-
- fp = f;
- real_file_name = real_filename;
- logical_file_name = xstrdup (logical_filename);
- line_number = 0;
-
- linesize = 0;
- linepos = 0;
- eaten_here = 0;
- end_of_file = false;
-
- last_comment_line = -1;
- last_non_comment_line = -1;
-
flag_context_list_table = flag_table;
- nesting_depth = 0;
- /* Safe assumption. */
- last_token_type = token_type_semicolon;
+ init_keywords ();
- token_stack.items = NULL;
- token_stack.nitems = 0;
- token_stack.nitems_max = 0;
+ struct perl_extractor *xp = XMALLOC (struct perl_extractor);
- init_keywords ();
+ xp->mlp = mdlp->item[0]->messages;
+ xp->fp = f;
+ xp->input = NULL;
+ xp->input_end = NULL;
+ real_file_name = real_filename;
+ logical_file_name = xstrdup (logical_filename);
+ perl_extractor_init_rest (xp);
- /* Eat tokens until eof is seen. When extract_balanced returns due to an
- unbalanced closing paren / brace / bracket or due to a semicolon, just
- restart it. */
- while (!extract_balanced (mlp,
- token_type_r_any, true,
- true, true, false,
- null_context_region (), null_context_list_iterator,
- 1, arglist_parser_alloc (mlp, NULL)))
- ;
+ extract_perl_input (xp);
- fp = NULL;
+ token_stack_free (&xp->token_stack);
+ free (xp);
real_file_name = NULL;
free (logical_file_name);
logical_file_name = NULL;
- line_number = 0;
- last_token_type = token_type_semicolon;
- token_stack_free (&token_stack);
- eaten_here = 0;
- end_of_file = true;
}