#define _(s) gettext(s)
/* The Perl syntax is defined in perlsyn.pod. Try the command
- "perldoc perlsyn". */
+ "man perlsyn" or "perldoc perlsyn". */
#define DEBUG_PERL 0
#define DEBUG_MEMORY 0
token_type_named_op, /* if, unless, while, ... */
token_type_variable, /* $... */
token_type_symbol, /* symbol, number */
- token_type_regex_op, /* s, tr, y, m. */
token_type_keyword_symbol, /* keyword symbol (used by parser) */
+ token_type_regex_op, /* s, tr, y, m. */
token_type_dot, /* . */
token_type_other /* regexp, misc. operator */
};
struct token_ty
{
token_type_ty type;
- string_type_ty string_type;
- char *string; /* for token_type_{symbol,string} */
+ string_type_ty string_type; /* for token_type_string */
+ char *string; /* for token_type_named_op, token_type_string,
+ token_type_symbol, token_type_keyword_symbol,
+ token_type_variable */
int line_number;
};
#if DEBUG_PERL
static const char *
-token2string (token_ty *token)
+token2string (const token_ty *token)
{
switch (token->type)
{
return "token_type_variable";
case token_type_symbol:
return "token_type_symbol";
- case token_type_regex_op:
- return "token_type_regex_op";
case token_type_keyword_symbol:
return "token_type_keyword_symbol";
+ case token_type_regex_op:
+ return "token_type_regex_op";
case token_type_dot:
return "token_type_dot";
case token_type_other:
struct stack_entry *last;
};
-struct stack *token_stack;
+static struct stack *token_stack;
-/* Prototypes for local functions. Needed to ensure compiler checking of
- function argument counts despite of K&R C function definition syntax. */
+/* Forward declaration of local functions. */
static void interpolate_keywords (message_list_ty *mlp, const char *string);
static char *extract_quotelike_pass1 (int delim);
static token_ty *x_perl_lex (message_list_ty *mlp);
{
void *retval;
- fprintf (stderr, "*** remember_a_message (%p): ", string); fflush (stderr);
+ fprintf (stderr, "*** remember_a_message (%p): ", string);
retval = remember_a_message (mlp, string, pos);
- fprintf (stderr, "%p\n", retval); fflush (stderr);
+ fprintf (stderr, "%p\n", retval);
return retval;
}
remember_a_message_plural_debug (message_ty *mp, char *string, lex_pos_ty *pos)
{
fprintf (stderr, "*** remember_a_message_plural (%p, %p): ", mp, string);
- fflush (stderr);
remember_a_message_plural (mp, string, pos);
- fprintf (stderr, "done\n"); fflush (stderr);
+ fprintf (stderr, "done\n");
}
static void *
{
void *retval;
- fprintf (stderr, "*** xmalloc (%u): ", bytes); fflush (stderr);
+ fprintf (stderr, "*** xmalloc (%u): ", bytes);
retval = xmalloc (bytes);
- fprintf (stderr, "%p\n", retval); fflush (stderr);
+ fprintf (stderr, "%p\n", retval);
return retval;
}
{
void *retval;
- fprintf (stderr, "*** xrealloc (%p, %u): ", buf, bytes); fflush (stderr);
+ fprintf (stderr, "*** xrealloc (%p, %u): ", buf, bytes);
retval = xrealloc (buf, bytes);
- fprintf (stderr, "%p\n", retval); fflush (stderr);
+ fprintf (stderr, "%p\n", retval);
return retval;
}
void *retval;
fprintf (stderr, "*** xrealloc_static (%p, %u): ", buf, bytes);
- fflush (stderr);
retval = xrealloc (buf, bytes);
- fprintf (stderr, "%p\n", retval); fflush (stderr);
+ fprintf (stderr, "%p\n", retval);
return retval;
}
char *retval;
fprintf (stderr, "*** xstrdup (%p, %d): ", string, strlen (string));
- fflush (stderr);
retval = xstrdup (string);
- fprintf (stderr, "%p\n", retval); fflush (stderr);
+ fprintf (stderr, "%p\n", retval);
return retval;
}
static void
free_debug (void *buf)
{
- fprintf (stderr, "*** free (%p): ", buf); fflush (stderr);
+ fprintf (stderr, "*** free (%p): ", buf);
free (buf);
- fprintf (stderr, "done\n"); fflush (stderr);
+ fprintf (stderr, "done\n");
}
# define xmalloc(b) xmalloc_debug (b)
{
token_ty *token = (token_ty *) last->data;
fprintf (stderr, " [%s]\n", token2string (token));
- fflush (stderr);
switch (token->type)
{
case token_type_named_op:
case token_type_keyword_symbol:
case token_type_variable:
fprintf (stderr, " string: %s\n", token->string);
- fflush (stderr);
break;
}
}
#endif
/* Unshifts the pointer DATA onto the stack STACK. The argument DESTROY
- * is a pointer to a function that frees the resources associated with
- * DATA or NULL (no destructor).
- */
+ is a pointer to a function that frees the resources associated with
+ DATA or NULL (no destructor). */
static void
stack_unshift (struct stack *stack, void *data, void (*destroy) (token_ty *data))
{
}
/* Shifts the first element from the stack STACK and returns its contents or
- * NULL if the stack is empty.
- */
+ NULL if the stack is empty. */
static void *
stack_shift (struct stack *stack)
{
}
/* Return the bottom of the stack without removing it from the stack or
- * NULL if the stack is empty.
- */
+ NULL if the stack is empty. */
static void *
stack_head (struct stack *stack)
{
static int last_non_comment_line;
/* The current line buffer. */
-char *linebuf;
+static char *linebuf;
/* The size of the current line. */
-int linesize;
+static int linesize;
/* The position in the current line. */
-int linepos;
+static int linepos;
/* The size of the input buffer. */
-size_t linebuf_size;
+static size_t linebuf_size;
/* The last token seen in the token stream. This is important for the
interpretation of '?' and '/'. */
-token_type_ty last_token;
-
-/* The last string token waiting for a dot operator or finishing. */
-token_ty last_string;
-
-/* True if LAST_STRING is finished. */
-bool last_string_finished;
+static token_type_ty last_token;
/* Number of lines eaten for here documents. */
-int here_eaten;
+static int here_eaten;
/* Paranoia: EOF marker for __END__ or __DATA__. */
-bool end_of_file;
+static bool end_of_file;
+
/* 1. line_number handling. */
+
/* Returns the next character from the input stream or EOF. */
static int
phase1_getc ()
{
linesize = getline (&linebuf, &linebuf_size, fp);
- if (linesize == EOF)
+ if (linesize < 0)
{
if (ferror (fp))
error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
if (c != EOF)
{
if (linepos == 0)
- error (EXIT_FAILURE, 0, _("\
-%s:%d: internal error: attempt to ungetc across line boundary"),
- real_file_name, line_number);
+ /* Attempt to ungetc across line boundary. Shouldn't happen.
+ No two phase1_ungetc calls are permitted in a row. */
+ abort ();
--linepos;
}
}
+/* Read a here document and return its contents. */
+
static char *
get_here_document (const char *delimiter)
{
+ /* Accumulator for the entire here document, including a NUL byte
+ at the end. */
static char *buffer;
static size_t bufmax = 0;
size_t bufpos = 0;
+ /* Current line being appended. */
static char *my_linebuf = NULL;
static size_t my_linebuf_size = 0;
- bool chomp = false;
- if (bufpos >= bufmax)
+ /* Allocate the initial buffer. Later on, bufmax > 0. */
+ if (bufmax == 0)
{
buffer = xrealloc_static (NULL, 1);
buffer[0] = '\0';
for (;;)
{
int read_bytes = getline (&my_linebuf, &my_linebuf_size, fp);
+ bool chomp;
- if (read_bytes == EOF)
+ if (read_bytes < 0)
{
if (ferror (fp))
{
{
error_with_progname = false;
error (EXIT_SUCCESS, 0, _("\
-%s:%d: can\'t find string terminator \"%s\" anywhere before EOF"),
+%s:%d: can't find string terminator \"%s\" anywhere before EOF"),
real_file_name, line_number, delimiter);
error_with_progname = true;
- fflush (stderr);
- return xstrdup (buffer);
+ break;
}
}
--read_bytes;
}
- if (read_bytes && my_linebuf[read_bytes - 1] == '\n')
+ /* Temporarily remove the trailing newline from my_linebuf. */
+ chomp = false;
+ if (read_bytes >= 1 && my_linebuf[read_bytes - 1] == '\n')
{
chomp = true;
my_linebuf[read_bytes - 1] = '\0';
}
+
+ /* See whether this line terminates the here document. */
if (strcmp (my_linebuf, delimiter) == 0)
- {
- return xstrdup (buffer);
- }
+ break;
+
+ /* Add back the trailing newline to my_linebuf. */
if (chomp)
- {
- my_linebuf[read_bytes - 1] = '\n';
- }
+ my_linebuf[read_bytes - 1] = '\n';
- if (bufpos + read_bytes + 1 >= bufmax)
+ /* Ensure room for read_bytes + 1 bytes. */
+ if (bufpos + read_bytes >= bufmax)
{
- bufmax += read_bytes + 1;
+ do
+ bufmax = 2 * bufmax + 10;
+ while (bufpos + read_bytes >= bufmax);
buffer = xrealloc_static (buffer, bufmax);
}
+ /* Append this line to the accumulator. */
strcpy (buffer + bufpos, my_linebuf);
bufpos += read_bytes;
}
+
+ /* Done accumulating the here document. */
+ return xstrdup (buffer);
}
/* Skips pod sections. */
{
linesize = getline (&linebuf, &linebuf_size, fp);
- if (linesize == EOF)
+ if (linesize < 0)
{
if (ferror (fp))
error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
}
}
+
/* 2. Replace each comment that is not inside a string literal or regular
expression with a newline character. We need to remember the comment
for later, because it may be attached to a keyword string. */
break;
if (buflen >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
buffer[buflen++] = c;
}
if (buflen >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
buffer[buflen] = '\0';
/* There is an ambiguity about '/': It can start a division operator ('/' or
'/=') or it can start a regular expression. The distinction is important
- because inside regular expressions, '#' and '"' lose its special meanings.
- If you look at the awk grammar, you see that the operator is only allowed
- right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
- can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
- So we prefer the division operator interpretation only right after
- symbol, string, number, ')', ']', with whitespace but no newline allowed
- in between. */
+ because inside regular expressions, '#' loses its special meaning.
+ The distinction is possible depending on the parsing state: After a
+ variable or simple expression, it's a division operator; at the beginning
+ of an expression, it's a regexp. */
static bool prefer_division_over_regexp;
/* Free the memory pointed to by a 'struct token_ty'. */
}
/* Extract an unsigned hexadecimal number from STRING, considering at
- most LEN bytes and place the result in RESULT. Returns a pointer
+ most LEN bytes and place the result in *RESULT. Returns a pointer
to the first character past the hexadecimal number. */
-static char *
-extract_hex (char *string, size_t len, unsigned int *result)
+static const char *
+extract_hex (const char *string, size_t len, unsigned int *result)
{
size_t i;
for (i = 0; i < len; i++)
{
+ char c = string[i];
int number;
- if (string[i] >= 'A' && string[i] <= 'F')
- number = 10 + string[i] - 'A';
- else if (string[i] >= 'a' && string[i] <= 'f')
- number = 10 + string[i] - 'a';
- else if (string[i] >= '0' && string[i] <= '9')
- number = string[i] - '0';
+ if (c >= 'A' && c <= 'F')
+ number = c - 'A' + 10;
+ else if (c >= 'a' && c <= 'f')
+ number = c - 'a' + 10;
+ else if (c >= '0' && c <= '9')
+ number = c - '0';
else
break;
}
/* Extract an unsigned octal number from STRING, considering at
- most LEN bytes and place the result in RESULT. Returns a pointer
- to the first character past the hexadecimal number. */
-static char *
-extract_oct (char *string, size_t len, unsigned int *result)
+ most LEN bytes and place the result in *RESULT. Returns a pointer
+ to the first character past the octal number. */
+static const char *
+extract_oct (const char *string, size_t len, unsigned int *result)
{
size_t i;
for (i = 0; i < len; i++)
{
+ char c = string[i];
int number;
- if (string[i] >= '0' && string[i] <= '7')
- number = string[i] - '0';
+ if (c >= '0' && c <= '7')
+ number = c - '0';
else
break;
}
/* Extract the various quotelike constructs except for <<EOF. See the
- section "Gory details of parsing quoted constructs" in perlop.pod. */
+ section "Gory details of parsing quoted constructs" in perlop.pod.
+ Return the resulting token in *tp; tp->type == token_type_string. */
static void
extract_quotelike (token_ty *tp, int delim)
{
char *string = extract_quotelike_pass1 (delim);
- tp->type = token_type_string;
+ size_t len = strlen (string);
- string[strlen (string) - 1] = '\0';
+ tp->type = token_type_string;
+ /* Take the string without the delimiters at the start and at the end. */
+ if (!(len >= 2))
+ abort ();
+ string[len - 1] = '\0';
tp->string = xstrdup (string + 1);
free (string);
- return;
}
/* Extract the quotelike constructs with double delimiters, like
s/[SEARCH]/[REPLACE]/. This function does not eat up trailing
- modifiers (left to the caller). */
+ modifiers (left to the caller).
+ Return the resulting token in *tp; tp->type == token_type_regex_op. */
static void
extract_triple_quotelike (message_list_ty *mlp, token_ty *tp, int delim,
bool interpolate)
{
- char *string = extract_quotelike_pass1 (delim);
+ char *string;
tp->type = token_type_regex_op;
- if (interpolate && !extract_all && delim != '\'')
- interpolate_keywords (mlp, string);
+ string = extract_quotelike_pass1 (delim);
+ if (interpolate)
+ interpolate_keywords (mlp, string);
free (string);
if (delim == '(' || delim == '<' || delim == '{' || delim == '[')
{
- /* Things can change. */
+ /* The delimiter for the second string can be different, e.g.
+ s{SEARCH}{REPLACE} or s{SEARCH}/REPLACE/. See "man perlrequick". */
delim = phase1_getc ();
- while (delim == ' ' || delim == '\t' || delim == '\r'
- || delim == '\n' || delim == '\f')
+ while (delim == ' ' || delim == '\t' || delim == '\r' || delim == '\n'
+ || delim == '\f')
{
/* The hash-sign is not a valid delimiter after whitespace, ergo
use phase2_getc() and not phase1_getc() now. */
}
}
string = extract_quotelike_pass1 (delim);
- if (interpolate && !extract_all && delim != '\'')
+ if (interpolate)
interpolate_keywords (mlp, string);
free (string);
-
- return;
}
/* Pass 1 of extracting quotes: Find the end of the string, regardless
- of the semantics of the construct. */
+ of the semantics of the construct. Return the complete string,
+ including the starting and the trailing delimiter, with backslashes
+ removed where appropriate. */
static char *
extract_quotelike_pass1 (int delim)
{
/* This function is called recursively. No way to allocate stuff
- statically. Consider using alloca() instead. */
- char *buffer = (char *) xmalloc (100);
- int bufmax = 100;
+ statically. Also alloca() is inappropriate due to limited stack
+ size on some platforms. So we use malloc(). */
+ int bufmax = 10;
+ char *buffer = (char *) xmalloc (bufmax);
int bufpos = 0;
bool nested = true;
int counter_delim;
case '<':
counter_delim = '>';
break;
- default:
+ default: /* "..." or '...' or |...| etc. */
nested = false;
counter_delim = delim;
break;
{
int c = phase1_getc ();
- if (bufpos >= bufmax - 1)
+ /* This round can produce 1 or 2 bytes. Ensure room for 2 bytes. */
+ if (bufpos + 2 > bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc (buffer, bufmax);
}
char *inner = extract_quotelike_pass1 (delim);
size_t len = strlen (inner);
+ /* Ensure room for len + 1 bytes. */
if (bufpos + len >= bufmax)
{
- bufmax += len;
+ do
+ bufmax = 2 * bufmax + 10;
+ while (bufpos + len >= bufmax);
buffer = xrealloc (buffer, bufmax);
}
strcpy (buffer + bufpos, inner);
free (inner);
bufpos += len;
- continue;
}
-
- if (c == '\\')
+ else if (c == '\\')
{
c = phase1_getc ();
if (c == '\\')
}
}
-/* Perform pass 3 of quotelike extraction (interpolation). */
+/* Perform pass 3 of quotelike extraction (interpolation).
+ *tp is a token of type token_type_string.
+ This function replaces tp->string. */
/* FIXME: Currently may writes null-bytes into the string. */
static void
extract_quotelike_pass3 (token_ty *tp, int error_level)
static char *buffer;
static int bufmax = 0;
int bufpos = 0;
- char *string = tp->string;
- unsigned char *crs = string;
-
- bool uppercase = false;
- bool lowercase = false;
- bool quotemeta = false;
+ const char *crs;
+ bool uppercase;
+ bool lowercase;
+ bool quotemeta;
#if DEBUG_PERL
switch (tp->string_type)
if (tp->string_type == string_type_verbatim)
return;
+ /* Loop over tp->string, accumulating the expansion in buffer. */
+ crs = tp->string;
+ uppercase = false;
+ lowercase = false;
+ quotemeta = false;
while (*crs)
{
- if (bufpos >= bufmax - 6)
+ /* Ensure room for 6 bytes. */
+ if (bufpos + 6 > bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
{
++crs;
buffer[bufpos++] = '\\';
- continue;
+ break;
}
/* FALLTHROUGH */
default:
int length;
crs = extract_oct (crs + 1, 3, &oct_number);
- length = u8_uctomb (buffer + bufpos, oct_number, 3);
+ /* Yes, octal escape sequences in the range 0x100..0x1ff are
+ valid. */
+ length = u8_uctomb (buffer + bufpos, oct_number, 2);
if (length > 0)
bufpos += length;
}
int length;
++crs;
-
if (*crs == '{')
{
- char *end = strchr (crs, '}');
+ const char *end = strchr (crs, '}');
if (end == NULL)
{
error_with_progname = false;
else
{
++crs;
- (void) extract_hex (crs, 4, &hex_number);
+ (void) extract_hex (crs, end - crs, &hex_number);
+ crs = end + 1;
}
}
else
crs += 2;
if (*crs)
{
- int the_char = *crs;
+ int the_char = (unsigned char) *crs;
if (the_char >= 'a' || the_char <= 'z')
- the_char -= 0x20;
- buffer[bufpos++] = the_char + (the_char & 0x40 ? -64 : 64);
+ the_char = the_char - 'a' + 'A';
+ buffer[bufpos++] = the_char ^ 0x40;
}
continue;
case 'N':
crs += 2;
if (*crs == '{')
{
- char *name = xstrdup (crs + 1);
- char *end = strchr (name, '}');
+ const char *end = strchr (crs + 1, '}');
if (end != NULL)
{
+ char *name;
unsigned int unicode;
- int length;
- *end = '\0';
+ name = (char *) xmalloc (end - (crs + 1) + 1);
+ memcpy (name, crs + 1, end - (crs + 1));
+ name[end - (crs + 1)] = '\0';
- crs += 2 + strlen (name);
unicode = unicode_name_character (name);
if (unicode != UNINAME_INVALID)
{
- length = u8_uctomb (buffer + bufpos, unicode, 6);
+ int length = u8_uctomb (buffer + bufpos, unicode, 6);
if (length > 0)
bufpos += length;
}
+
+ free (name);
+
+ crs = end + 1;
}
- free (name);
}
continue;
}
switch (*crs)
{
case 'E':
- quotemeta = uppercase = lowercase = false;
+ uppercase = false;
+ lowercase = false;
+ quotemeta = false;
++crs;
continue;
case 'L':
- quotemeta = uppercase = false;
+ uppercase = false;
lowercase = true;
+ quotemeta = false;
++crs;
continue;
case 'U':
- quotemeta = lowercase = false;
uppercase = true;
+ lowercase = false;
+ quotemeta = false;
++crs;
continue;
case 'Q':
- uppercase = lowercase = false;
+ uppercase = false;
+ lowercase = false;
quotemeta = true;
++crs;
continue;
case 'l':
++crs;
- if (crs[1] >= 'A' && crs[1] <= 'Z')
+ if (*crs >= 'A' && *crs <= 'Z')
{
- buffer[bufpos++] = crs[1] + 0x20;
- ++crs;
+ buffer[bufpos++] = *crs - 'A' + 'a';
}
- else if (crs[1] >= 0x80)
+ else if ((unsigned char) *crs >= 0x80)
{
error_with_progname = false;
error (error_level, 0, _("\
%s:%d: invalid interpolation (\"\\l\") of 8bit character \"%c\""),
real_file_name, line_number, *crs);
error_with_progname = true;
- ++crs;
}
- else
- ++crs;
+ ++crs;
continue;
case 'u':
++crs;
- if (crs[1] >= 'a' && crs[1] <= 'z')
+ if (*crs >= 'a' && *crs <= 'z')
{
- buffer[bufpos++] = crs[1] - 0x20;
- ++crs;
+ buffer[bufpos++] = *crs - 'a' + 'A';
}
- else if (crs[1] >= 0x80)
+ else if ((unsigned char) *crs >= 0x80)
{
error_with_progname = false;
error (error_level, 0, _("\
%s:%d: invalid interpolation (\"\\u\") of 8bit character \"%c\""),
real_file_name, line_number, *crs);
error_with_progname = true;
- ++crs;
}
- else
- ++crs;
+ ++crs;
continue;
case '\\':
+ /* FIXME: This looks buggy. */
if (crs[1])
buffer[bufpos++] = crs[1];
crs++;
}
}
-
if (*crs == '$' || *crs == '@')
{
- ++crs;
error_with_progname = false;
error (error_level, 0, _("\
%s:%d: invalid variable interpolation at \"%c\""),
real_file_name, line_number, *crs);
error_with_progname = true;
+ ++crs;
}
else if (lowercase)
{
if (*crs >= 'A' && *crs <= 'Z')
- buffer[bufpos++] = 0x20 + *crs++;
- else if (*crs >= 0x80)
+ buffer[bufpos++] = *crs - 'A' + 'a';
+ else if ((unsigned char) *crs >= 0x80)
{
error_with_progname = false;
error (error_level, 0, _("\
%s:%d: invalid interpolation (\"\\L\") of 8bit character \"%c\""),
real_file_name, line_number, *crs);
error_with_progname = true;
- buffer[bufpos++] = *crs++;
+ buffer[bufpos++] = *crs;
}
else
- buffer[bufpos++] = *crs++;
+ buffer[bufpos++] = *crs;
+ ++crs;
}
else if (uppercase)
{
if (*crs >= 'a' && *crs <= 'z')
- buffer[bufpos++] = *crs++ - 0x20;
- else if (*crs >= 0x80)
+ buffer[bufpos++] = *crs - 'a' + 'A';
+ else if ((unsigned char) *crs >= 0x80)
{
error_with_progname = false;
error (error_level, 0, _("\
%s:%d: invalid interpolation (\"\\U\") of 8bit character \"%c\""),
real_file_name, line_number, *crs);
error_with_progname = true;
- buffer[bufpos++] = *crs++;
+ buffer[bufpos++] = *crs;
}
else
- buffer[bufpos++] = *crs++;
+ buffer[bufpos++] = *crs;
+ ++crs;
}
else if (quotemeta)
{
}
}
- if (bufpos >= bufmax - 1)
- {
- bufmax += 100;
+ /* Ensure room for 1 more byte. */
+ if (bufpos >= bufmax)
+ {
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
- }
+ }
buffer[bufpos++] = '\0';
fprintf (stderr, "---> %s\n", buffer);
#endif
+ /* Replace tp->string. */
free (tp->string);
tp->string = xstrdup (buffer);
}
/* Parse a variable. This is done in several steps:
- *
- * 1) Consume all leading occurcencies of '$', '@', '%', and '*'.
- * 2) Determine the name of the variable from the following input
- * 3) Parse possible following hash keys or array indexes.
+ 1) Consume all leading occurencies of '$', '@', '%', and '*'.
+ 2) Determine the name of the variable from the following input.
+ 3) Parse possible following hash keys or array indexes.
*/
static void
extract_variable (message_list_ty *mlp, token_ty *tp, int first)
{
if (bufpos >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
buffer[bufpos++] = c;
maybe_hash_value = true;
else if (bufpos == 2 && buffer[1] == '$')
{
- if (c != '{' && c != '_' && (!((c >= '0' && c <= '9')
- || (c >= 'A' && c <= 'Z')
- || (c >= 'a' && c <= 'z')
- || c == ':' || c == '\''
- || c >= 0x80)))
+ if (!(c == '{'
+ || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+ || (c >= '0' && c <= '9')
+ || c == '_' || c == ':' || c == '\'' || c >= 0x80))
{
/* Special variable $$ for pid. */
if (bufpos >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
buffer[bufpos++] = '\0';
*/
if (bufpos >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
if (c == '{')
}
else
{
- while ((c >= 'A' && c <= 'Z') ||
- (c >= 'a' && c <= 'z') ||
- (c >= '0' && c <= '9') ||
- c == '_' || c == ':' || c == '\'' || c >= 0x80)
+ while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+ || (c >= '0' && c <= '9')
+ || c == '_' || c == ':' || c == '\'' || c >= 0x80)
{
++varbody_length;
if (bufpos >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
buffer[bufpos++] = c;
phase1_ungetc (c);
}
- if (bufpos >= bufmax - 1)
+ if (bufpos + 1 >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
buffer[bufpos++] = '\0';
int c = phase2_getc ();
while (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f')
- c = phase2_getc ();
+ c = phase2_getc ();
if (c == '-')
{
c = phase2_getc ();
while (c == ' ' || c == '\t' || c == '\r'
|| c == '\n' || c == '\f')
- c = phase2_getc ();
+ c = phase2_getc ();
}
else if (c2 != '\n')
{
special character recognized after a minus is greater-than
for dereference. However, the sequence "-\n>" that we
treat incorrectly here, is a syntax error. */
- phase1_ungetc (c2);
+ phase1_ungetc (c2);
}
}
}
/* Actually a simplified version of extract_variable(). It searches for
- * variables inside a double-quoted string that may interpolate to
- * some keyword hash (reference).
- */
+ variables inside a double-quoted string that may interpolate to
+ some keyword hash (reference). */
static void
interpolate_keywords (message_list_ty *mlp, const char *string)
{
static char *buffer;
static int bufmax = 0;
int bufpos = 0;
- int c = string[0];
+ int c = (unsigned char) string[0];
bool maybe_hash_deref = false;
enum parser_state
{
pos.file_name = logical_file_name;
pos.line_number = line_number;
- while ((c = *string++) != '\0')
+ while ((c = (unsigned char) *string++) != '\0')
{
void *keyword_value;
if (state == initial)
bufpos = 0;
- if (bufpos >= bufmax - 1)
+ if (bufpos + 1 >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
switch (c)
{
case '\\':
- c = *string++;
- if (!c) return;
+ c = (unsigned char) *string++;
+ if (c == '\0')
+ return;
break;
case '$':
buffer[bufpos++] = '$';
{
case '-':
if (find_entry (&keywords, buffer, bufpos, &keyword_value) == 0)
- {
- state = minus;
- }
+ state = minus;
else
state = initial;
break;
buffer[0] = '%';
}
if (find_entry (&keywords, buffer, bufpos, &keyword_value) == 0)
- {
- state = wait_quote;
- }
+ state = wait_quote;
else
state = initial;
break;
buffer[bufpos++] = c;
}
else
- {
- state = initial;
- }
+ state = initial;
break;
}
break;
buffer[bufpos++] = string++[0];
}
else
- {
- state = initial;
- }
+ state = initial;
break;
default:
buffer[bufpos++] = c;
buffer[bufpos++] = string++[0];
}
else
- {
- state = initial;
- }
+ state = initial;
break;
default:
buffer[bufpos++] = c;
{
if (bufpos >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
buffer[bufpos++] = c;
}
if (bufpos >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
buffer[bufpos] = '\0';
prefer_division_over_regexp = true;
return;
}
- extract_triple_quotelike (mlp, tp, delim, buffer[0] == 's');
+ extract_triple_quotelike (mlp, tp, delim,
+ buffer[0] == 's' && !extract_all
+ && delim != '\'');
/* Eat the following modifiers. */
c = phase1_getc ();
{
case 'q':
case 'x':
- tp->string_type = string_type_qq;
tp->type = token_type_string;
+ tp->string_type = string_type_qq;
if (!extract_all)
interpolate_keywords (mlp, tp->string);
break;
case '(':
c = phase2_getc ();
if (c == ')')
- {
- continue; /* Ignore empty list. */
- }
+ /* Ignore empty list. */
+ continue;
else
phase2_ungetc (c);
tp->type = token_type_lparen;
{
if (bufpos >= bufmax)
{
- bufmax += 100;
+ bufmax = 2 * bufmax + 10;
buffer = xrealloc_static (buffer, bufmax);
}
buffer[bufpos++] = c;
#if DEBUG_PERL
fprintf (stderr, "%s:%d: pre-fetching next token\n",
real_file_name, line_number);
- fflush (stderr);
#endif
next = x_perl_lex (mlp);
x_perl_unlex (next);
last_token = token_type_semicolon; /* Safe assumption. */
prefer_division_over_regexp = false;
- last_string_finished = false;
-
init_keywords ();
token_stack = (struct stack *) xmalloc (sizeof (struct stack));
+ token_stack->first = NULL;
+ token_stack->last = NULL;
here_eaten = 0;
end_of_file = false;
logical_file_name = NULL;
line_number = 0;
last_token = token_type_semicolon;
- last_string_finished = false;
stack_free (token_stack);
free (token_stack);
token_stack = NULL;