From: Bruno Haible Date: Thu, 26 Sep 2024 00:30:57 +0000 (+0200) Subject: Modernize: Use gnulib's string-buffer module for accumulating strings. X-Git-Tag: v0.23~110 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=63709a20897369ca8d2cebf8a7d1f9f51ae9e359;p=thirdparty%2Fgettext.git Modernize: Use gnulib's string-buffer module for accumulating strings. * autogen.sh (GNULIB_MODULES_RUNTIME_FOR_SRC, GNULIB_MODULES_TOOLS_FOR_SRC): Add xstring-buffer. * gettext-runtime/src/envsubst.c: Include string-buffer.h. (subst_from_stdin): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/cldr-plural.y: Include string-buffer.h. (yylex): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/cldr-plurals.c: Include string-buffer.h instead of xalloc.h. (extract_rules): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/its.c: Include string-buffer.h. (_its_collect_text_content): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/read-desktop.c: Include string-buffer.h. (desktop_lex): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/read-properties.c: Include string-buffer.h. (properties_parse): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-awk.c: Include string-buffer.h. (phase2_getc, phase3_get): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-c.c: Include string-buffer.h. (phase5_get): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-java.c: Include string-buffer.h. (phase5_get): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-javascript.c: Include string-buffer.h. (phase5_get): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-lua.c: Include string-buffer.h. (string_buf_length, string_buf_alloc, string_buf): Remove variables. (string_start, string_add, string_end): Remove functions. (phase3_get): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-perl.c: Include string-buffer.h. (get_here_document, phase2_getc, extract_quotelike_pass1, extract_quotelike_pass3, extract_variable, interpolate_keywords, x_perl_prelex): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-php.c: Include string-buffer.h. (process_heredoc, phase4_get): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-python.c: Include string-buffer.h. (phase5_get): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-rst.c: Include string-buffer.h. (buffer, bufmax): Remove variables. (buffer): New variable. (extract_rst, parse_integer): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. (extract_rsj): Update. * gettext-tools/src/x-smalltalk.c: Include string-buffer.h. (phase2_get): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-vala.c: Include string-buffer.h. (phase3_get): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. * gettext-tools/src/x-ycp.c: Include string-buffer.h. (phase2_getc, phase5_get): Use a stack-allocated string_buffer instead of 3 local variables and micro-optimized ad-hoc code. --- diff --git a/autogen.sh b/autogen.sh index 53dffdbe5..db360cd27 100755 --- a/autogen.sh +++ b/autogen.sh @@ -117,6 +117,7 @@ if ! $skip_gnulib; then unistd unlocked-io xalloc + xstring-buffer ' GNULIB_MODULES_RUNTIME_OTHER=' gettext-runtime-misc @@ -278,6 +279,7 @@ if ! $skip_gnulib; then xstrerror xstriconv xstriconveh + xstring-buffer xvasprintf ' # Common dependencies of GNULIB_MODULES_TOOLS_FOR_SRC and GNULIB_MODULES_TOOLS_FOR_LIBGREP. diff --git a/gettext-runtime/src/envsubst.c b/gettext-runtime/src/envsubst.c index f9a20e6f8..2303607c9 100644 --- a/gettext-runtime/src/envsubst.c +++ b/gettext-runtime/src/envsubst.c @@ -36,6 +36,7 @@ #include "relocatable.h" #include "basename-lgpl.h" #include "xalloc.h" +#include "string-buffer.h" #include "propername.h" #include "binary-io.h" #include "gettext.h" @@ -466,9 +467,6 @@ do_ungetc (int c) static void subst_from_stdin () { - static char *buffer; - static size_t bufmax; - static size_t buflen; int c; for (;;) @@ -490,19 +488,14 @@ subst_from_stdin () } if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_') { + struct string_buffer buffer; bool valid; /* Accumulate the VARIABLE in buffer. */ - buflen = 0; + sb_init (&buffer); do { - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen++] = c; - + sb_xappend1 (&buffer, c); c = do_getc (); } while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') @@ -530,23 +523,19 @@ subst_from_stdin () if (valid) { /* Terminate the variable in the buffer. */ - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen] = '\0'; + const char *variable = sb_xcontents_c (&buffer); /* Test whether the variable shall be substituted. */ if (!all_variables - && !sorted_string_list_member (&variables_set, buffer)) + && !sorted_string_list_member (&variables_set, variable)) valid = false; } if (valid) { /* Substitute the variable's value from the environment. */ - const char *env_value = getenv (buffer); + const char *variable = sb_xcontents_c (&buffer); + const char *env_value = getenv (variable); if (env_value != NULL) fputs (env_value, stdout); @@ -559,10 +548,12 @@ subst_from_stdin () putchar ('$'); if (opening_brace) putchar ('{'); - fwrite (buffer, buflen, 1, stdout); + string_desc_fwrite (stdout, sb_contents (&buffer)); if (closing_brace) putchar ('}'); } + + sb_free (&buffer); } else { diff --git a/gettext-tools/src/cldr-plural.y b/gettext-tools/src/cldr-plural.y index 51fc960ae..05c1b56ec 100644 --- a/gettext-tools/src/cldr-plural.y +++ b/gettext-tools/src/cldr-plural.y @@ -1,5 +1,5 @@ /* Unicode CLDR plural rule parser and converter - Copyright (C) 2015, 2020 Free Software Foundation, Inc. + Copyright (C) 2015-2024 Free Software Foundation, Inc. This file was written by Daiki Ueno , 2015. @@ -27,6 +27,7 @@ #include #include "unistr.h" #include "xalloc.h" +#include "string-buffer.h" #include "cldr-plural-exp.h" #include "cldr-plural.h" @@ -289,9 +290,6 @@ yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg) ucs4_t uc; int length; int result; - static char *buffer; - static size_t bufmax; - size_t bufpos; while (1) { @@ -384,67 +382,64 @@ yylex (YYSTYPE *lval, struct cldr_plural_parse_args *arg) case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - bufpos = 0; - for (;;) - { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = result; - result = *exp; - switch (result) - { - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': case 'g': case 'h': case 'i': case 'j': - case 'k': case 'l': case 'm': case 'n': case 'o': - case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': - case 'z': - ++exp; - continue; - default: - break; - } - break; - } + { + struct string_buffer buffer; - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; + sb_init (&buffer); + for (;;) + { + sb_xappend1 (&buffer, result); + result = *exp; + switch (result) + { + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + ++exp; + continue; + default: + break; + } + break; + } - /* Operands. */ - if (bufpos == 1) - { - switch (buffer[0]) - { - case 'n': case 'i': case 'f': case 't': case 'v': case 'w': - arg->cp = exp; - lval->ival = buffer[0]; - return OPERAND; - default: - break; - } - } + const char *ident = sb_xcontents_c (&buffer); - /* Keywords. */ - if (strcmp (buffer, "and") == 0) - { - arg->cp = exp; - return AND; - } - else if (strcmp (buffer, "or") == 0) - { - arg->cp = exp; - return OR; - } + /* Operands. */ + if (strlen (ident) == 1) + { + switch (ident[0]) + { + case 'n': case 'i': case 'f': case 't': case 'v': case 'w': + arg->cp = exp; + lval->ival = ident[0]; + sb_free (&buffer); + return OPERAND; + default: + break; + } + } + + /* Keywords. */ + if (strcmp (ident, "and") == 0) + { + arg->cp = exp; + sb_free (&buffer); + return AND; + } + else if (strcmp (ident, "or") == 0) + { + arg->cp = exp; + sb_free (&buffer); + return OR; + } - lval->sval = xstrdup (buffer); - result = KEYWORD; + lval->sval = sb_xdupfree_c (&buffer); + result = KEYWORD; + } break; case '!': if (exp[0] == '=') diff --git a/gettext-tools/src/cldr-plurals.c b/gettext-tools/src/cldr-plurals.c index 794cac7db..dccdc358a 100644 --- a/gettext-tools/src/cldr-plurals.c +++ b/gettext-tools/src/cldr-plurals.c @@ -1,5 +1,5 @@ /* Unicode CLDR plural rule parser and converter - Copyright (C) 2015, 2018-2023 Free Software Foundation, Inc. + Copyright (C) 2015-2024 Free Software Foundation, Inc. This file was written by Daiki Ueno , 2015. @@ -36,7 +36,7 @@ #include "relocatable.h" #include #include -#include "xalloc.h" +#include "string-buffer.h" #define _(s) gettext(s) @@ -49,9 +49,9 @@ extract_rules (FILE *fp, xmlDocPtr doc; xmlNodePtr node, n; size_t locale_length; - char *buffer = NULL, *p; - size_t bufmax = 0; - size_t buflen = 0; + struct string_buffer buffer; + + sb_init (&buffer); doc = xmlReadFd (fileno (fp), logical_filename, NULL, XML_PARSE_NONET @@ -128,10 +128,6 @@ extract_rules (FILE *fp, for (n2 = n->children; n2; n2 = n2->next) { - xmlChar *count; - xmlChar *content; - size_t length; - if (n2->type != XML_ELEMENT_NODE || !xmlStrEqual (n2->name, BAD_CAST "pluralRule")) continue; @@ -146,38 +142,24 @@ extract_rules (FILE *fp, break; } - count = xmlGetProp (n2, BAD_CAST "count"); - content = xmlNodeGetContent (n2); - length = xmlStrlen (count) + strlen (": ") - + xmlStrlen (content) + strlen ("; "); - - if (buflen + length + 1 > bufmax) - { - bufmax *= 2; - if (bufmax < buflen + length + 1) - bufmax = buflen + length + 1; - buffer = (char *) xrealloc (buffer, bufmax); - } - - sprintf (buffer + buflen, "%s: %s; ", count, content); + xmlChar *count = xmlGetProp (n2, BAD_CAST "count"); + xmlChar *content = xmlNodeGetContent (n2); + sb_xappendf (&buffer, "%s: %s; ", count, content); xmlFree (count); xmlFree (content); - - buflen += length; } } - if (buffer) - { - /* Scrub the last semicolon, if any. */ - p = strrchr (buffer, ';'); - if (p) - *p = '\0'; - } + { + /* Scrub the last semicolon, if any. */ + char *p = strrchr (sb_xcontents_c (&buffer), ';'); + if (p) + *p = '\0'; + } out: xmlFreeDoc (doc); - return buffer; + return sb_xdupfree_c (&buffer); } /* Display usage information and exit. */ diff --git a/gettext-tools/src/its.c b/gettext-tools/src/its.c index db37b08d2..f82a47889 100644 --- a/gettext-tools/src/its.c +++ b/gettext-tools/src/its.c @@ -39,6 +39,7 @@ #include "trim.h" #include "xalloc.h" #include "xvasprintf.h" +#include "string-buffer.h" #include "gettext.h" #define _(str) gettext (str) @@ -577,11 +578,11 @@ _its_collect_text_content (xmlNode *node, enum its_whitespace_type_ty whitespace, bool no_escape) { - char *buffer = NULL; - size_t bufmax = 0; - size_t bufpos = 0; + struct string_buffer buffer; xmlNode *n; + sb_init (&buffer); + for (n = node->children; n; n = n->next) { char *content = NULL; @@ -673,23 +674,11 @@ _its_collect_text_content (xmlNode *node, } if (content != NULL) - { - size_t length = strlen (content); - - if (bufpos + length + 1 >= bufmax) - { - bufmax = 2 * bufmax + length + 1; - buffer = xrealloc (buffer, bufmax); - } - strcpy (&buffer[bufpos], content); - bufpos += length; - } + sb_xappend_c (&buffer, content); free (content); } - if (buffer == NULL) - buffer = xstrdup (""); - return buffer; + return sb_xdupfree_c (&buffer); } static void diff --git a/gettext-tools/src/read-desktop.c b/gettext-tools/src/read-desktop.c index 490a51d97..cd3554217 100644 --- a/gettext-tools/src/read-desktop.c +++ b/gettext-tools/src/read-desktop.c @@ -22,8 +22,6 @@ /* Specification. */ #include "read-desktop.h" -#include "xalloc.h" - #include #include #include @@ -34,6 +32,7 @@ #include #include "xalloc.h" #include "xvasprintf.h" +#include "string-buffer.h" #include "c-ctype.h" #include "po-xerror.h" #include "gettext.h" @@ -210,24 +209,12 @@ free_token (token_ty *tp) static void desktop_lex (token_ty *tp) { - static char *buffer; - static size_t bufmax; - size_t bufpos; + struct string_buffer buffer; #undef APPEND -#define APPEND(c) \ - do \ - { \ - if (bufpos >= bufmax) \ - { \ - bufmax += 100; \ - buffer = xrealloc (buffer, bufmax); \ - } \ - buffer[bufpos++] = c; \ - } \ - while (0) - - bufpos = 0; +#define APPEND(c) sb_xappend1 (&buffer, (c)) + + sb_init (&buffer); for (;;) { int c; @@ -237,6 +224,7 @@ desktop_lex (token_ty *tp) switch (c) { case EOF: + sb_free (&buffer); tp->type = token_type_eof; return; @@ -277,7 +265,7 @@ desktop_lex (token_ty *tp) _("invalid non-blank character")); APPEND (0); tp->type = token_type_group; - tp->string = xstrdup (buffer); + tp->string = sb_xdupfree_c (&buffer); return; } @@ -293,7 +281,7 @@ desktop_lex (token_ty *tp) } APPEND (0); tp->type = token_type_comment; - tp->string = xstrdup (buffer); + tp->string = sb_xdupfree_c (&buffer); return; } @@ -340,7 +328,7 @@ desktop_lex (token_ty *tp) /* Finish the key part and start the locale part. */ APPEND (0); found_locale = true; - locale_start = bufpos; + locale_start = string_desc_length (sb_contents (&buffer)); for (;;) { @@ -381,7 +369,9 @@ desktop_lex (token_ty *tp) { po_xerror (PO_SEVERITY_WARNING, NULL, real_file_name, pos.line_number, 0, false, - xasprintf (_("missing '=' after \"%s\""), buffer)); + xasprintf (_("missing '=' after \"%s\""), + sb_xcontents_c (&buffer))); + sb_free (&buffer); for (;;) { c = phase2_getc (); @@ -409,7 +399,7 @@ desktop_lex (token_ty *tp) break; } - value_start = bufpos; + value_start = string_desc_length (sb_contents (&buffer)); for (;;) { c = phase2_getc (); @@ -418,10 +408,13 @@ desktop_lex (token_ty *tp) APPEND (c); } APPEND (0); + char *buffer_contents = sb_xdupfree_c (&buffer); tp->type = token_type_pair; - tp->string = xmemdup (buffer, bufpos); - tp->locale = found_locale ? &buffer[locale_start] : NULL; - tp->value = &buffer[value_start]; + tp->string = buffer_contents; + /* tp->locale and tp->value are live only as long as tp->string is + live. */ + tp->locale = found_locale ? &buffer_contents[locale_start] : NULL; + tp->value = &buffer_contents[value_start]; return; } default: @@ -445,12 +438,13 @@ desktop_lex (token_ty *tp) po_xerror (PO_SEVERITY_WARNING, NULL, real_file_name, pos.line_number, 0, false, _("invalid non-blank line")); + sb_free (&buffer); tp->type = token_type_other; return; } APPEND (0); tp->type = token_type_blank; - tp->string = xstrdup (buffer); + tp->string = sb_xdupfree_c (&buffer); return; } } diff --git a/gettext-tools/src/read-properties.c b/gettext-tools/src/read-properties.c index 8273e96fb..116bd4481 100644 --- a/gettext-tools/src/read-properties.c +++ b/gettext-tools/src/read-properties.c @@ -34,6 +34,7 @@ #include "read-catalog-abstract.h" #include "xalloc.h" #include "xvasprintf.h" +#include "string-buffer.h" #include "xstrerror.h" #include "xerror-handler.h" #include "msgl-ascii.h" @@ -641,32 +642,23 @@ properties_parse (abstract_catalog_reader_ty *catr, FILE *file, if (comment) { /* A comment line. */ - static char *buffer; - static size_t bufmax; - static size_t buflen; + struct string_buffer buffer; - buflen = 0; + sb_init (&buffer); for (;;) { c = phase2_getc (); - - if (buflen >= bufmax) - { - bufmax += 100; - buffer = xrealloc (buffer, bufmax); - } - if (c == EOF || c == '\n') break; - buffer[buflen++] = c; + sb_xappend1 (&buffer, c); } - buffer[buflen] = '\0'; + char *contents = sb_xdupfree_c (&buffer); catalog_reader_seen_generic_comment ( catr, conv_from_java ( - assume_utf8 ? buffer : conv_from_iso_8859_1 (buffer))); + assume_utf8 ? contents : conv_from_iso_8859_1 (contents))); } else { diff --git a/gettext-tools/src/x-awk.c b/gettext-tools/src/x-awk.c index dd617ee75..58aa34c4b 100644 --- a/gettext-tools/src/x-awk.c +++ b/gettext-tools/src/x-awk.c @@ -41,6 +41,7 @@ #include "xg-message.h" #include "if-error.h" #include "xalloc.h" +#include "string-buffer.h" #include "gettext.h" #define _(s) gettext(s) @@ -170,16 +171,14 @@ phase1_ungetc (int c) static int phase2_getc () { - static char *buffer; - static size_t bufmax; - size_t buflen; int lineno; int c; c = phase1_getc (); if (c == '#') { - buflen = 0; + struct string_buffer buffer; + sb_init (&buffer); lineno = line_number; for (;;) { @@ -187,23 +186,11 @@ phase2_getc () if (c == '\n' || c == EOF) break; /* We skip all leading white space, but not EOLs. */ - if (!(buflen == 0 && (c == ' ' || c == '\t'))) - { - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen++] = c; - } - } - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); + if (!(string_desc_length (sb_contents (&buffer)) == 0 + && (c == ' ' || c == '\t'))) + sb_xappend1 (&buffer, c); } - buffer[buflen] = '\0'; - savable_comment_add (buffer); + savable_comment_add (sb_xdupfree_c (&buffer)); last_comment_line = lineno; } return c; @@ -382,9 +369,6 @@ static int phase3_pushback_length; static void phase3_get (token_ty *tp) { - static char *buffer; - static int bufmax; - int bufpos; int c; if (phase3_pushback_length) @@ -454,97 +438,87 @@ phase3_get (token_ty *tp) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* Symbol, or part of a number. */ - bufpos = 0; - for (;;) - { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = phase2_getc (); - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '_': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - continue; - default: - if (bufpos == 1 && buffer[0] == '_' && c == '"') + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + sb_xappend1 (&buffer, c); + c = phase2_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + continue; + default: { - tp->type = token_type_i18nstring; - goto case_string; + string_desc_t contents = sb_contents (&buffer); + if (string_desc_length (contents) == 1 + && string_desc_char_at (contents, 0) == '_' + && c == '"') + { + sb_free (&buffer); + tp->type = token_type_i18nstring; + goto case_string; + } } - phase2_ungetc (c); - break; - } - break; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - tp->string = xstrdup (buffer); - tp->type = token_type_symbol; - /* Most identifiers can be variable names; after them we must - interpret '/' as division operator. But for awk's builtin - keywords we have three cases: - (a) Must interpret '/' as division operator. "length". - (b) Must interpret '/' as start of a regular expression. - "do", "exit", "print", "printf", "return". - (c) '/' after this keyword in invalid anyway. All others. - I used the following script for the distinction. - for k in $awk_keywords; do - echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null - done - */ - if (strcmp (buffer, "do") == 0 - || strcmp (buffer, "exit") == 0 - || strcmp (buffer, "print") == 0 - || strcmp (buffer, "printf") == 0 - || strcmp (buffer, "return") == 0) - prefer_division_over_regexp = false; - else - prefer_division_over_regexp = true; + phase2_ungetc (c); + break; + } + break; + } + tp->string = sb_xdupfree_c (&buffer); + tp->type = token_type_symbol; + /* Most identifiers can be variable names; after them we must + interpret '/' as division operator. But for awk's builtin + keywords we have three cases: + (a) Must interpret '/' as division operator. "length". + (b) Must interpret '/' as start of a regular expression. + "do", "exit", "print", "printf", "return". + (c) '/' after this keyword in invalid anyway. All others. + I used the following script for the distinction. + for k in $awk_keywords; do + echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null + done + */ + if (strcmp (tp->string, "do") == 0 + || strcmp (tp->string, "exit") == 0 + || strcmp (tp->string, "print") == 0 + || strcmp (tp->string, "printf") == 0 + || strcmp (tp->string, "return") == 0) + prefer_division_over_regexp = false; + else + prefer_division_over_regexp = true; + } return; case '"': tp->type = token_type_string; case_string: - bufpos = 0; - for (;;) - { - c = get_string_element (); - if (c == EOF || c == SE_QUOTES) - break; - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - tp->string = xstrdup (buffer); - prefer_division_over_regexp = true; + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + c = get_string_element (); + if (c == EOF || c == SE_QUOTES) + break; + sb_xappend1 (&buffer, c); + } + tp->string = sb_xdupfree_c (&buffer); + prefer_division_over_regexp = true; + } return; case '(': diff --git a/gettext-tools/src/x-c.c b/gettext-tools/src/x-c.c index 5b7125d50..34ebbec65 100644 --- a/gettext-tools/src/x-c.c +++ b/gettext-tools/src/x-c.c @@ -45,6 +45,7 @@ #include "if-error.h" #include "xalloc.h" #include "xvasprintf.h" +#include "string-buffer.h" #include "mem-hash-map.h" #include "po-charset.h" #include "gettext.h" @@ -1243,9 +1244,6 @@ static int phase5_pushback_length; static void phase5_get (token_ty *tp) { - static char *buffer; - static int bufmax; - int bufpos; int c; if (phase5_pushback_length) @@ -1298,218 +1296,218 @@ phase5_get (token_ty *tp) case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - bufpos = 0; - for (;;) - { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = phase4_getc (); - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '_': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - continue; + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + sb_xappend1 (&buffer, c); + c = phase4_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + continue; - case '"': - /* Recognize C11 / C++11 string literals. - See (for C) ISO 9899:2011 section 6.4.5 - and (for C++) ISO C++ 11 section 2.14.5 [lex.string]. - Note: The programmer who passes an UTF-8 encoded string to - gettext() or similar API functions will have to have called - bind_textdomain_codeset (DOMAIN, "UTF-8") first. */ - if ((bufpos == 1 - && (buffer[0] == 'u' || buffer[0] == 'U' - || buffer[0] == 'L')) - || (bufpos == 2 && buffer[0] == 'u' && buffer[1] == '8')) - goto string_literal; - /* Recognize C++11 raw string literals. - See ISO C++ 11 section 2.14.5 [lex.string]. - Here it is important to properly parse all cases according to - the standard, otherwise our parser could get confused by - double-quotes inside the raw string. - Note: The programmer who passes an UTF-8 encoded string to - gettext() or similar API functions will have to have called - bind_textdomain_codeset (DOMAIN, "UTF-8") first. */ - if (cxx_extensions - && (bufpos == 1 - || (bufpos == 2 - && (buffer[0] == 'u' || buffer[0] == 'U' - || buffer[0] == 'L')) - || (bufpos == 3 && buffer[0] == 'u' && buffer[1] == '8')) - && buffer[bufpos - 1] == 'R') + case '"': { - /* Only R and u8R raw strings can be used as gettext() - arguments, for type reasons. But the programmer may have - defined - - a c16gettext function that takes a 'const char16_t *' - argument, or - - a c32gettext function that takes a 'const char32_t *' - argument, or - - a wgettext function that takes a 'const wchar_t *' - argument. */ - int starting_line_number = line_number; - bufpos = 0; - /* Start the buffer with a closing parenthesis. This makes the - parsing code below simpler. */ - buffer[bufpos++] = ')'; - /* Parse the initial delimiter. */ - for (;;) + string_desc_t contents = sb_contents (&buffer); + const char *buf = string_desc_data (contents); + size_t buflen = string_desc_length (contents); + + /* Recognize C11 / C++11 string literals. + See (for C) ISO 9899:2011 section 6.4.5 + and (for C++) ISO C++ 11 section 2.14.5 [lex.string]. + Note: The programmer who passes an UTF-8 encoded string to + gettext() or similar API functions will have to have called + bind_textdomain_codeset (DOMAIN, "UTF-8") first. */ + if ((buflen == 1 + && (buf[0] == 'u' || buf[0] == 'U' || buf[0] == 'L')) + || (buflen == 2 && buf[0] == 'u' && buf[1] == '8')) { - bool valid_delimiter_char; - - c = phase3_getc (); - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': case 'G': case 'H': case 'I': case 'J': - case 'K': case 'L': case 'M': case 'N': case 'O': - case 'P': case 'Q': case 'R': case 'S': case 'T': - case 'U': case 'V': case 'W': case 'X': case 'Y': - case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': case 'g': case 'h': case 'i': case 'j': - case 'k': case 'l': case 'm': case 'n': case 'o': - case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': - case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case '_': case '{': case '}': case '[': case ']': - case '#': case '<': case '>': case '%': case ':': - case ';': case '.': case '?': case '*': case '+': - case '-': case '/': case '^': case '&': case '|': - case '~': case '!': case '=': case ',': case '\'': - valid_delimiter_char = true; - break; - case '"': - /* A double-quote within the delimiter! This is too - weird. We don't support this. */ - if_error (IF_SEVERITY_WARNING, - logical_file_name, starting_line_number, (size_t)(-1), false, - _("a double-quote in the delimiter of a raw string literal is unsupported")); - FALLTHROUGH; - default: - valid_delimiter_char = false; - break; - } - if (!valid_delimiter_char) - break; - - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; + sb_free (&buffer); + goto string_literal; } - if (c == '(') + /* Recognize C++11 raw string literals. + See ISO C++ 11 section 2.14.5 [lex.string]. + Here it is important to properly parse all cases according to + the standard, otherwise our parser could get confused by + double-quotes inside the raw string. + Note: The programmer who passes an UTF-8 encoded string to + gettext() or similar API functions will have to have called + bind_textdomain_codeset (DOMAIN, "UTF-8") first. */ + if (cxx_extensions + && (buflen == 1 + || (buflen == 2 + && (buf[0] == 'u' || buf[0] == 'U' || buf[0] == 'L')) + || (buflen == 3 && buf[0] == 'u' && buf[1] == '8')) + && buf[buflen - 1] == 'R') { - struct mixed_string_buffer msb; - /* The state is either 0 or - N, after a ')' and N-1 bytes of the delimiter have been - encountered. */ - int state; - - /* Start accumulating the string. */ - mixed_string_buffer_init (&msb, lc_string, - logical_file_name, line_number); - state = 0; - + /* Only R and u8R raw strings can be used as gettext() + arguments, for type reasons. But the programmer may have + defined + - a c16gettext function that takes a 'const char16_t *' + argument, or + - a c32gettext function that takes a 'const char32_t *' + argument, or + - a wgettext function that takes a 'const wchar_t *' + argument. */ + int starting_line_number = line_number; + buffer.length = 0; + /* Start the buffer with a closing parenthesis. This makes the + parsing code below simpler. */ + sb_xappend1 (&buffer, ')'); + /* Parse the initial delimiter. */ for (;;) { - c = phase3_getc (); - - /* Keep line_number in sync. */ - msb.line_number = line_number; + bool valid_delimiter_char; - if (c == EOF) - break; - - /* Update the state. */ - if (c == (state < bufpos ? buffer[state] : '"')) + c = phase3_getc (); + switch (c) { - if (state < bufpos) - state++; - else /* state == bufpos && c == '"' */ - { - /* Finished parsing the string. */ - tp->type = token_type_string_literal; - tp->mixed_string = mixed_string_buffer_result (&msb); - tp->comment = add_reference (savable_comment); - return; - } + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '_': case '{': case '}': case '[': case ']': + case '#': case '<': case '>': case '%': case ':': + case ';': case '.': case '?': case '*': case '+': + case '-': case '/': case '^': case '&': case '|': + case '~': case '!': case '=': case ',': case '\'': + valid_delimiter_char = true; + break; + case '"': + /* A double-quote within the delimiter! This is too + weird. We don't support this. */ + if_error (IF_SEVERITY_WARNING, + logical_file_name, starting_line_number, (size_t)(-1), false, + _("a double-quote in the delimiter of a raw string literal is unsupported")); + FALLTHROUGH; + default: + valid_delimiter_char = false; + break; } - else + if (!valid_delimiter_char) + break; + + sb_xappend1 (&buffer, c); + } + if (c == '(') + { + struct mixed_string_buffer msb; + /* The state is either 0 or + N, after a ')' and N-1 bytes of the delimiter have been + encountered. */ + int state; + + /* Start accumulating the string. */ + mixed_string_buffer_init (&msb, lc_string, + logical_file_name, line_number); + state = 0; + + for (;;) { - int i; + c = phase3_getc (); - /* None of the bytes buffer[0]...buffer[state-1] - can be ')'. */ - for (i = 0; i < state; i++) - mixed_string_buffer_append_char (&msb, buffer[i]); + /* Keep line_number in sync. */ + msb.line_number = line_number; - /* But c may be ')'. */ - if (c == ')') - state = 1; + if (c == EOF) + break; + + /* Update the state. */ + string_desc_t contents = sb_contents (&buffer); + const char *buf = string_desc_data (contents); + size_t buflen = string_desc_length (contents); + if (c == (state < buflen ? buf[state] : '"')) + { + if (state < buflen) + state++; + else /* state == buflen && c == '"' */ + { + /* Finished parsing the string. */ + sb_free (&buffer); + tp->type = token_type_string_literal; + tp->mixed_string = mixed_string_buffer_result (&msb); + tp->comment = add_reference (savable_comment); + return; + } + } else { - mixed_string_buffer_append_char (&msb, c); - state = 0; + int i; + + /* None of the bytes buf[0]...buf[state-1] + can be ')'. */ + for (i = 0; i < state; i++) + mixed_string_buffer_append_char (&msb, buf[i]); + + /* But c may be ')'. */ + if (c == ')') + state = 1; + else + { + mixed_string_buffer_append_char (&msb, c); + state = 0; + } } } } - } - if (c == EOF) - { - if_error (IF_SEVERITY_WARNING, - logical_file_name, starting_line_number, (size_t)(-1), false, - _("unterminated raw string literal")); - tp->type = token_type_eof; + if (c == EOF) + { + if_error (IF_SEVERITY_WARNING, + logical_file_name, starting_line_number, (size_t)(-1), false, + _("unterminated raw string literal")); + sb_free (&buffer); + tp->type = token_type_eof; + return; + } + /* The warning message for c == '"' was already emitted above. */ + if (c != '"') + if_error (IF_SEVERITY_WARNING, + logical_file_name, starting_line_number, (size_t)(-1), false, + _("invalid raw string literal syntax")); + sb_free (&buffer); + /* To get into a sane state, read up until the next double-quote, + newline, or EOF. */ + while (!(c == EOF || c == '"' || c == '\n')) + c = phase3_getc (); + tp->type = token_type_symbol; return; } - /* The warning message for c == '"' was already emitted above. */ - if (c != '"') - if_error (IF_SEVERITY_WARNING, - logical_file_name, starting_line_number, (size_t)(-1), false, - _("invalid raw string literal syntax")); - /* To get into a sane state, read up until the next double-quote, - newline, or EOF. */ - while (!(c == EOF || c == '"' || c == '\n')) - c = phase3_getc (); - tp->type = token_type_symbol; - return; } - FALLTHROUGH; + FALLTHROUGH; - default: - phase4_ungetc (c); - break; - } - break; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = 0; - tp->string = xstrdup (buffer); - tp->type = token_type_name; + default: + phase4_ungetc (c); + break; + } + break; + } + tp->string = sb_xdupfree_c (&buffer); + tp->type = token_type_name; + } return; case '.': @@ -1533,152 +1531,135 @@ phase5_get (token_ty *tp) /* The preprocessing number token is more "generous" than the C number tokens. This is mostly due to token pasting (another thing we can ignore here). */ - bufpos = 0; - for (;;) - { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = phase4_getc (); - switch (c) - { - case 'p': - case 'P': - /* In C99 and C++17, 'p' and 'P' can be used as an exponent - marker. */ - FALLTHROUGH; - case 'e': - case 'E': - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = phase4_getc (); - if (c != '+' && c != '-') - { - phase4_ungetc (c); - break; - } - continue; - - case 'A': case 'B': case 'C': case 'D': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case '.': - continue; + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + sb_xappend1 (&buffer, c); + c = phase4_getc (); + switch (c) + { + case 'p': + case 'P': + /* In C99 and C++17, 'p' and 'P' can be used as an exponent + marker. */ + FALLTHROUGH; + case 'e': + case 'E': + sb_xappend1 (&buffer, c); + c = phase4_getc (); + if (c != '+' && c != '-') + { + phase4_ungetc (c); + break; + } + continue; - case '_': - if (cxx_extensions) - /* In C++, an underscore can be part of a preprocessing number - token. */ + case 'A': case 'B': case 'C': case 'D': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '.': continue; - else - { - phase4_ungetc (c); - break; - } - case '\'': - if (cxx_extensions) - { - /* In C++14, a single-quote followed by a digit, ASCII letter, - or underscore can be part of a preprocessing number token. */ - int c1 = phase4_getc (); - switch (c1) - { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '_': - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = c1; - continue; - default: - /* The two phase4_getc() calls that returned c and c1 did - nothing more than to call phase3_getc(), without any - lookahead. Therefore 2 pushback characters are - supported in this case. */ - phase4_ungetc (c1); - break; - } - } - else - { - /* In C23, a single-quote between two hexadecimal digits - can be part of a number token. It's called a "digit - separator". See ISO C 23 § 6.4.4.1 and § 6.4.4.2. */ - if (bufpos > 0) - { - char prev = buffer[bufpos - 1]; - if ((prev >= '0' && prev <= '9') - || (prev >= 'A' && prev <= 'F') - || (prev >= 'a' && prev <= 'f')) - { - int c1 = phase4_getc (); - if ((c1 >= '0' && c1 <= '9') - || (c1 >= 'A' && c1 <= 'F') - || (c1 >= 'a' && c1 <= 'f')) - { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = c1; - continue; - } - /* The two phase4_getc() calls that returned c and c1 - did nothing more than to call phase3_getc(), - without any lookahead. Therefore 2 pushback - characters are supported in this case. */ - phase4_ungetc (c1); - } - } - } - FALLTHROUGH; - default: - phase4_ungetc (c); - break; - } - break; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = 0; - tp->type = token_type_number; - tp->number = atol (buffer); + case '_': + if (cxx_extensions) + /* In C++, an underscore can be part of a preprocessing number + token. */ + continue; + else + { + phase4_ungetc (c); + break; + } + + case '\'': + if (cxx_extensions) + { + /* In C++14, a single-quote followed by a digit, ASCII + letter, or underscore can be part of a preprocessing + number token. */ + int c1 = phase4_getc (); + switch (c1) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + case '_': + sb_xappend1 (&buffer, c); + c = c1; + continue; + default: + /* The two phase4_getc() calls that returned c and c1 did + nothing more than to call phase3_getc(), without any + lookahead. Therefore 2 pushback characters are + supported in this case. */ + phase4_ungetc (c1); + break; + } + } + else + { + /* In C23, a single-quote between two hexadecimal digits + can be part of a number token. It's called a "digit + separator". See ISO C 23 § 6.4.4.1 and § 6.4.4.2. */ + string_desc_t contents = sb_contents (&buffer); + if (string_desc_length (contents) > 0) + { + char prev = + string_desc_char_at (contents, + string_desc_length (contents) - 1); + if ((prev >= '0' && prev <= '9') + || (prev >= 'A' && prev <= 'F') + || (prev >= 'a' && prev <= 'f')) + { + int c1 = phase4_getc (); + if ((c1 >= '0' && c1 <= '9') + || (c1 >= 'A' && c1 <= 'F') + || (c1 >= 'a' && c1 <= 'f')) + { + sb_xappend1 (&buffer, c); + c = c1; + continue; + } + /* The two phase4_getc() calls that returned c and c1 + did nothing more than to call phase3_getc(), + without any lookahead. Therefore 2 pushback + characters are supported in this case. */ + phase4_ungetc (c1); + } + } + } + FALLTHROUGH; + default: + phase4_ungetc (c); + break; + } + break; + } + tp->type = token_type_number; + tp->number = atol (sb_xdupfree_c (&buffer)); + } return; case '\'': diff --git a/gettext-tools/src/x-java.c b/gettext-tools/src/x-java.c index cad9f2cdb..2c485325e 100644 --- a/gettext-tools/src/x-java.c +++ b/gettext-tools/src/x-java.c @@ -42,6 +42,7 @@ #include "xg-message.h" #include "if-error.h" #include "xalloc.h" +#include "string-buffer.h" #include "mem-hash-map.h" #include "po-charset.h" #include "unistr.h" @@ -1173,17 +1174,11 @@ phase5_get (token_ty *tp) characters. This avoids conversion hassles w.r.t. the --keyword arguments, and shouldn't be a big problem in practice. */ { - static char *buffer; - static int bufmax; - int bufpos = 0; + struct string_buffer buffer; + sb_init (&buffer); for (;;) { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = RED (c); + sb_xappend1 (&buffer, RED (c)); c = phase4_getc (); if (!((RED (c) >= 'A' && RED (c) <= 'Z') || (RED (c) >= 'a' && RED (c) <= 'z') @@ -1192,13 +1187,7 @@ phase5_get (token_ty *tp) break; } phase4_ungetc (c); - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - tp->string = xstrdup (buffer); + tp->string = sb_xdupfree_c (&buffer); tp->type = token_type_symbol; return; } diff --git a/gettext-tools/src/x-javascript.c b/gettext-tools/src/x-javascript.c index 5f60c77c8..b408e1b8a 100644 --- a/gettext-tools/src/x-javascript.c +++ b/gettext-tools/src/x-javascript.c @@ -49,6 +49,7 @@ #include "xerror.h" #include "xvasprintf.h" #include "xalloc.h" +#include "string-buffer.h" #include "c-strstr.h" #include "c-ctype.h" #include "po-charset.h" @@ -1187,19 +1188,12 @@ phase5_get (token_ty *tp) case '5': case '6': case '7': case '8': case '9': /* Symbol, or part of a number. */ { - static char *buffer; - static int bufmax; - int bufpos; + struct string_buffer buffer; - bufpos = 0; + sb_init (&buffer); for (;;) { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); c = phase3_getc (); switch (c) { @@ -1223,15 +1217,9 @@ phase5_get (token_ty *tp) } break; } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - tp->string = xstrdup (buffer); - if (strcmp (buffer, "return") == 0 - || strcmp (buffer, "else") == 0) + tp->string = sb_xdupfree_c (&buffer); + if (strcmp (tp->string, "return") == 0 + || strcmp (tp->string, "else") == 0) tp->type = last_token_type = token_type_keyword; else tp->type = last_token_type = token_type_symbol; diff --git a/gettext-tools/src/x-lua.c b/gettext-tools/src/x-lua.c index e7b52adfc..dc4b0b944 100644 --- a/gettext-tools/src/x-lua.c +++ b/gettext-tools/src/x-lua.c @@ -41,6 +41,7 @@ #include "xg-message.h" #include "if-error.h" #include "xalloc.h" +#include "string-buffer.h" #include "gettext.h" #include "po-charset.h" @@ -428,41 +429,6 @@ free_token (token_ty *tp) drop_reference (tp->comment); } -/* Our current string. */ -static int string_buf_length; -static int string_buf_alloc; -static char *string_buf; - -static void -string_start () -{ - string_buf_length = 0; -} - -static void -string_add (int c) -{ - if (string_buf_length >= string_buf_alloc) - { - string_buf_alloc = 2 * string_buf_alloc + 10; - string_buf = xrealloc (string_buf, string_buf_alloc); - } - - string_buf[string_buf_length++] = c; -} - -static void -string_end () -{ - if (string_buf_length >= string_buf_alloc) - { - string_buf_alloc = string_buf_alloc + 1; - string_buf = xrealloc (string_buf, string_buf_alloc); - } - - string_buf[string_buf_length] = '\0'; -} - /* We need 3 pushback tokens for string optimization. */ static int phase3_pushback_length; @@ -593,124 +559,126 @@ phase3_get (token_ty *tp) case '"': case '\'': - c_start = c; - string_start (); + { + c_start = c; + struct string_buffer buffer; + sb_init (&buffer); - for (;;) - { - /* We need unprocessed characters from phase 1. */ - c = phase1_getc (); + for (;;) + { + /* We need unprocessed characters from phase 1. */ + c = phase1_getc (); - if (c == EOF || c == c_start || c == '\n') - { - /* End of string. */ - string_end (); - tp->string = xstrdup (string_buf); - tp->comment = add_reference (savable_comment); - tp->type = token_type_string; - return; - } + if (c == EOF || c == c_start || c == '\n') + { + /* End of string. */ + tp->string = sb_xdupfree_c (&buffer); + tp->comment = add_reference (savable_comment); + tp->type = token_type_string; + return; + } - /* We got '\', this is probably an escape sequence. */ - if (c == '\\') - { - c = phase1_getc (); - switch (c) - { - case 'a': - string_add ('\a'); - break; - case 'b': - string_add ('\b'); - break; - case 'f': - string_add ('\f'); - break; - case 'n': - string_add ('\n'); - break; - case 'r': - string_add ('\r'); - break; - case 't': - string_add ('\t'); - break; - case 'v': - string_add ('\v'); - break; - case 'x': + /* We got '\', this is probably an escape sequence. */ + if (c == '\\') + { + c = phase1_getc (); + switch (c) { - int num = 0; - int i = 0; - - for (i = 0; i < 2; i++) - { - c = phase1_getc (); - if (c >= '0' && c <= '9') - num += c - '0'; - else if (c >= 'a' && c <= 'f') - num += c - 'a' + 10; - else if (c >= 'A' && c <= 'F') - num += c - 'A' + 10; - else - { - phase1_ungetc (c); - break; - } - - if (i == 0) - num *= 16; - } - - if (i == 2) - string_add (num); - } - - break; - case 'z': - /* Ignore the following whitespace. */ - do - { - c = phase1_getc (); - } - while (c == ' ' || c == '\n' || c == '\t' || c == '\r' - || c == '\f' || c == '\v'); - - phase1_ungetc (c); - - break; - default: - /* Check if it's a '\ddd' sequence. */ - if (c >= '0' && c <= '9') + case 'a': + sb_xappend1 (&buffer, '\a'); + break; + case 'b': + sb_xappend1 (&buffer, '\b'); + break; + case 'f': + sb_xappend1 (&buffer, '\f'); + break; + case 'n': + sb_xappend1 (&buffer, '\n'); + break; + case 'r': + sb_xappend1 (&buffer, '\r'); + break; + case 't': + sb_xappend1 (&buffer, '\t'); + break; + case 'v': + sb_xappend1 (&buffer, '\v'); + break; + case 'x': { int num = 0; int i = 0; - while (c >= '0' && c <= '9' && i < 3) + for (i = 0; i < 2; i++) { - num *= 10; - num += (c - '0'); c = phase1_getc (); - i++; - } + if (c >= '0' && c <= '9') + num += c - '0'; + else if (c >= 'a' && c <= 'f') + num += c - 'a' + 10; + else if (c >= 'A' && c <= 'F') + num += c - 'A' + 10; + else + { + phase1_ungetc (c); + break; + } - /* The last read character is either a - non-number or another number after our - '\ddd' sequence. We need to ungetc it. */ - phase1_ungetc (c); + if (i == 0) + num *= 16; + } - /* The sequence number is too big, this - causes a lexical error. Ignore it. */ - if (num < 256) - string_add (num); + if (i == 2) + sb_xappend1 (&buffer, num); } - else - string_add (c); - } - } - else - string_add (c); - } + + break; + case 'z': + /* Ignore the following whitespace. */ + do + { + c = phase1_getc (); + } + while (c == ' ' || c == '\n' || c == '\t' || c == '\r' + || c == '\f' || c == '\v'); + + phase1_ungetc (c); + + break; + default: + /* Check if it's a '\ddd' sequence. */ + if (c >= '0' && c <= '9') + { + int num = 0; + int i = 0; + + while (c >= '0' && c <= '9' && i < 3) + { + num *= 10; + num += (c - '0'); + c = phase1_getc (); + i++; + } + + /* The last read character is either a + non-number or another number after our + '\ddd' sequence. We need to ungetc it. */ + phase1_ungetc (c); + + /* The sequence number is too big, this + causes a lexical error. Ignore it. */ + if (num < 256) + sb_xappend1 (&buffer, num); + } + else + sb_xappend1 (&buffer, c); + } + } + else + sb_xappend1 (&buffer, c); + } + } break; case '[': @@ -742,68 +710,69 @@ phase3_get (token_ty *tp) } /* Found an opening long bracket. */ - string_start (); + { + struct string_buffer buffer; + sb_init (&buffer); - /* See if it is immediately followed by a newline. */ - c = phase1_getc (); - if (c != '\n') - phase1_ungetc (c); + /* See if it is immediately followed by a newline. */ + c = phase1_getc (); + if (c != '\n') + phase1_ungetc (c); - for (;;) - { - c = phase1_getc (); + for (;;) + { + c = phase1_getc (); - if (c == EOF) - { - string_end (); - tp->string = xstrdup (string_buf); - tp->comment = add_reference (savable_comment); - tp->type = token_type_string; - return; - } - if (c == ']') - { - c = phase1_getc (); + if (c == EOF) + { + tp->string = sb_xdupfree_c (&buffer); + tp->comment = add_reference (savable_comment); + tp->type = token_type_string; + return; + } + if (c == ']') + { + c = phase1_getc (); - /* Count the number of equal signs. */ - int esigns2 = 0; - while (c == '=') - { - esigns2++; - c = phase1_getc (); - } + /* Count the number of equal signs. */ + int esigns2 = 0; + while (c == '=') + { + esigns2++; + c = phase1_getc (); + } - if (c == ']' && esigns == esigns2) - { - /* We got ']==...==]', where the number of equal - signs matches the number of equal signs in - the opening bracket. */ - string_end (); - tp->string = xstrdup (string_buf); - tp->comment = add_reference (savable_comment); - tp->type = token_type_string; - return; - } - else - { - /* Otherwise we got either ']==' garbage or - ']==...==]' with a different number of equal - signs. - - Add ']' and equal signs to the string, and - ungetc the current character, because the - second ']' might be a part of another closing - long bracket, e.g. '==]===]'. */ - phase1_ungetc (c); - - string_add (']'); - while (esigns2--) - string_add ('='); - } - } - else - string_add (c); - } + if (c == ']' && esigns == esigns2) + { + /* We got ']==...==]', where the number of equal + signs matches the number of equal signs in + the opening bracket. */ + tp->string = sb_xdupfree_c (&buffer); + tp->comment = add_reference (savable_comment); + tp->type = token_type_string; + return; + } + else + { + /* Otherwise we got either ']==' garbage or + ']==...==]' with a different number of equal + signs. + + Add ']' and equal signs to the string, and + ungetc the current character, because the + second ']' might be a part of another closing + long bracket, e.g. '==]===]'. */ + phase1_ungetc (c); + + sb_xappend1 (&buffer, ']'); + while (esigns2--) + sb_xappend1 (&buffer, '='); + } + } + else + sb_xappend1 (&buffer, c); + } + } break; case ']': @@ -839,25 +808,36 @@ phase3_get (token_ty *tp) else if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_') { - string_start (); + struct string_buffer buffer; + sb_init (&buffer); + while ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9')) { - string_add (c); + sb_xappend1 (&buffer, c); c = phase1_getc (); } - string_end (); phase1_ungetc (c); - if (strcmp (string_buf, "not") == 0) - tp->type = token_type_operator1; - else if (strcmp (string_buf, "and") == 0) - tp->type = token_type_operator2; - else if (strcmp (string_buf, "or") == 0) - tp->type = token_type_operator2; + const char *contents = sb_xcontents_c (&buffer); + if (strcmp (contents, "not") == 0) + { + sb_free (&buffer); + tp->type = token_type_operator1; + } + else if (strcmp (contents, "and") == 0) + { + sb_free (&buffer); + tp->type = token_type_operator2; + } + else if (strcmp (contents, "or") == 0) + { + sb_free (&buffer); + tp->type = token_type_operator2; + } else { - tp->string = xstrdup (string_buf); + tp->string = sb_xdupfree_c (&buffer); tp->type = token_type_symbol; } return; diff --git a/gettext-tools/src/x-perl.c b/gettext-tools/src/x-perl.c index 178b54b62..4fab5a218 100644 --- a/gettext-tools/src/x-perl.c +++ b/gettext-tools/src/x-perl.c @@ -45,6 +45,7 @@ #include "xg-message.h" #include "if-error.h" #include "xalloc.h" +#include "string-buffer.h" #include "c-ctype.h" #include "po-charset.h" #include "unistr.h" @@ -428,20 +429,12 @@ get_here_document (struct perl_extractor *xp, const char *delimiter) { /* Accumulator for the entire here document, including a NUL byte at the end. */ - static char *buffer; - static size_t bufmax = 0; - size_t bufpos = 0; + struct string_buffer buffer; /* Current line being appended. */ static char *my_linebuf = NULL; static size_t my_linebuf_size = 0; - /* Allocate the initial buffer. Later on, bufmax > 0. */ - if (bufmax == 0) - { - buffer = XNMALLOC (1, char); - buffer[0] = '\0'; - bufmax = 1; - } + sb_init (&buffer); for (;;) { @@ -513,21 +506,12 @@ get_here_document (struct perl_extractor *xp, const char *delimiter) if (chomp) my_linebuf[read_bytes - 1] = '\n'; - /* Ensure room for read_bytes + 1 bytes. */ - if (bufpos + read_bytes >= bufmax) - { - do - bufmax = 2 * bufmax + 10; - while (bufpos + read_bytes >= bufmax); - buffer = xrealloc (buffer, bufmax); - } /* Append this line to the accumulator. */ - strcpy (buffer + bufpos, my_linebuf); - bufpos += read_bytes; + sb_xappend_desc (&buffer, string_desc_new_addr (read_bytes, my_linebuf)); } /* Done accumulating the here document. */ - return xstrdup (buffer); + return sb_xdupfree_c (&buffer); } /* Skips pod sections. */ @@ -569,9 +553,6 @@ skip_pod (struct perl_extractor *xp) static int phase2_getc (struct perl_extractor *xp) { - static char *buffer; - static size_t bufmax; - size_t buflen; int lineno; int c; char *utf8_string; @@ -579,7 +560,8 @@ phase2_getc (struct perl_extractor *xp) c = phase1_getc (xp); if (c == '#') { - buflen = 0; + struct string_buffer buffer; + sb_init (&buffer); lineno = xp->line_number; /* Skip leading whitespace. */ for (;;) @@ -599,23 +581,13 @@ phase2_getc (struct perl_extractor *xp) c = phase1_getc (xp); if (c == '\n' || c == EOF) break; - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen++] = c; + sb_xappend1 (&buffer, c); } - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen] = '\0'; /* Convert it to UTF-8. */ utf8_string = - from_current_source_encoding (buffer, lc_comment, logical_file_name, - lineno); + from_current_source_encoding (sb_xcontents_c (&buffer), lc_comment, + logical_file_name, lineno); + sb_free (&buffer); /* Save it until we encounter the corresponding string. */ savable_comment_add (utf8_string); xp->last_comment_line = lineno; @@ -765,16 +737,12 @@ free_token (token_ty *tp) static string_desc_t extract_quotelike_pass1 (struct perl_extractor *xp, int delim) { - /* This function is called recursively. No way to allocate stuff - statically. Also alloca() is inappropriate due to limited stack - size on some platforms. So we use malloc(). */ - int bufmax = 10; - char *buffer = XNMALLOC (bufmax, char); - int bufpos = 0; + struct string_buffer buffer; bool nested = true; int counter_delim; - buffer[bufpos++] = delim; + sb_init (&buffer); + sb_xappend1 (&buffer, delim); /* Find the closing delimiter. */ switch (delim) @@ -801,61 +769,43 @@ extract_quotelike_pass1 (struct perl_extractor *xp, int delim) { int c = phase1_getc (xp); - /* This round can produce 1 or 2 bytes. Ensure room for 2 bytes. */ - if (bufpos + 2 > bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - if (c == counter_delim || c == EOF) { - buffer[bufpos++] = counter_delim; /* will be stripped off later */ + sb_xappend1 (&buffer, counter_delim); /* will be stripped off later */ #if DEBUG_PERL - fprintf (stderr, "PASS1: %.*s\n", bufpos, buffer); + fprintf (stderr, "PASS1: %.*s\n", (int) buffer.length, buffer.data); #endif - return string_desc_new_addr (bufpos, buffer); + return sb_xdupfree (&buffer); } if (nested && c == delim) { string_desc_t inner = extract_quotelike_pass1 (xp, delim); - size_t len = string_desc_length (inner); - - /* Ensure room for len + 1 bytes. */ - if (bufpos + len >= bufmax) - { - do - bufmax = 2 * bufmax + 10; - while (bufpos + len >= bufmax); - buffer = xrealloc (buffer, bufmax); - } - memcpy (buffer + bufpos, string_desc_data (inner), len); + sb_xappend_desc (&buffer, inner); string_desc_free (inner); - bufpos += len; } else if (c == '\\') { c = phase1_getc (xp); if (c == '\\') { - buffer[bufpos++] = '\\'; - buffer[bufpos++] = '\\'; + sb_xappend1 (&buffer, '\\'); + sb_xappend1 (&buffer, '\\'); } else if (c == delim || c == counter_delim) { /* This is pass2 in Perl. */ - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); } else { - buffer[bufpos++] = '\\'; + sb_xappend1 (&buffer, '\\'); phase1_ungetc (xp, c); } } else { - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); } } } @@ -1018,9 +968,7 @@ extract_triple_quotelike (struct perl_extractor *xp, token_ty *tp, int delim, static void extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) { - static char *buffer; - static int bufmax = 0; - int bufpos = 0; + struct string_buffer buffer; const char *crs; bool uppercase; bool lowercase; @@ -1050,6 +998,8 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) if (tp->sub_type == string_type_verbatim) return; + sb_init (&buffer); + /* Loop over tp->string, accumulating the expansion in buffer. */ crs = tp->string; uppercase = false; @@ -1059,14 +1009,6 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) { bool backslashed; - /* Ensure room for 7 bytes, 6 (multi-)bytes plus a leading backslash - if \Q modifier is present. */ - if (bufpos + 7 > bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - if (tp->sub_type == string_type_q) { switch (*crs) @@ -1075,12 +1017,12 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) if (crs[1] == '\\') { crs += 2; - buffer[bufpos++] = '\\'; + sb_xappend1 (&buffer, '\\'); break; } FALLTHROUGH; default: - buffer[bufpos++] = *crs++; + sb_xappend1 (&buffer, *crs++); break; } continue; @@ -1094,37 +1036,36 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) { case 't': crs += 2; - buffer[bufpos++] = '\t'; + sb_xappend1 (&buffer, '\t'); continue; case 'n': crs += 2; - buffer[bufpos++] = '\n'; + sb_xappend1 (&buffer, '\n'); continue; case 'r': crs += 2; - buffer[bufpos++] = '\r'; + sb_xappend1 (&buffer, '\r'); continue; case 'f': crs += 2; - buffer[bufpos++] = '\f'; + sb_xappend1 (&buffer, '\f'); continue; case 'b': crs += 2; - buffer[bufpos++] = '\b'; + sb_xappend1 (&buffer, '\b'); continue; case 'a': crs += 2; - buffer[bufpos++] = '\a'; + sb_xappend1 (&buffer, '\a'); continue; case 'e': crs += 2; - buffer[bufpos++] = 0x1b; + sb_xappend1 (&buffer, 0x1b); continue; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { unsigned int oct_number; - int length; crs = extract_oct (crs + 1, 3, &oct_number); @@ -1142,19 +1083,19 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) oct_number = oct_number - 'A' + 'a'; } - /* Yes, octal escape sequences in the range 0x100..0x1ff are valid. */ - length = u8_uctomb ((unsigned char *) (buffer + bufpos), - oct_number, 2); + char tmpbuf[2]; + int length = + u8_uctomb ((unsigned char *) tmpbuf, oct_number, 2); if (length > 0) - bufpos += length; + sb_xappend_desc (&buffer, + string_desc_new_addr (length, tmpbuf)); } continue; case 'x': { unsigned int hex_number = 0; - int length; crs += 2; if (*crs == '{') @@ -1194,11 +1135,12 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) hex_number = hex_number - 'A' + 'a'; } - length = u8_uctomb ((unsigned char *) (buffer + bufpos), - hex_number, 6); - + char tmpbuf[6]; + int length = + u8_uctomb ((unsigned char *) tmpbuf, hex_number, 6); if (length > 0) - bufpos += length; + sb_xappend_desc (&buffer, + string_desc_new_addr (length, tmpbuf)); } continue; case 'c': @@ -1209,7 +1151,7 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) int the_char = (unsigned char) *crs; if (the_char >= 'a' && the_char <= 'z') the_char = the_char - 'a' + 'A'; - buffer[bufpos++] = the_char ^ 0x40; + sb_xappend1 (&buffer, the_char ^ 0x40); } continue; case 'N': @@ -1231,11 +1173,12 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) { /* FIXME: Convert to upper/lowercase if the corresponding flag is set to true. */ + char tmpbuf[6]; int length = - u8_uctomb ((unsigned char *) (buffer + bufpos), - unicode, 6); + u8_uctomb ((unsigned char *) tmpbuf, unicode, 6); if (length > 0) - bufpos += length; + sb_xappend_desc (&buffer, + string_desc_new_addr (length, tmpbuf)); } free (name); @@ -1277,7 +1220,7 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) ++crs; if (*crs >= 'A' && *crs <= 'Z') { - buffer[bufpos++] = *crs - 'A' + 'a'; + sb_xappend1 (&buffer, *crs - 'A' + 'a'); } else if ((unsigned char) *crs >= 0x80) { @@ -1288,7 +1231,7 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) } else { - buffer[bufpos++] = *crs; + sb_xappend1 (&buffer, *crs); } ++crs; continue; @@ -1296,7 +1239,7 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) ++crs; if (*crs >= 'a' && *crs <= 'z') { - buffer[bufpos++] = *crs - 'a' + 'A'; + sb_xappend1 (&buffer, *crs - 'a' + 'A'); } else if ((unsigned char) *crs >= 0x80) { @@ -1307,12 +1250,12 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) } else { - buffer[bufpos++] = *crs; + sb_xappend1 (&buffer, *crs); } ++crs; continue; case '\\': - buffer[bufpos++] = *crs; + sb_xappend1 (&buffer, *crs); ++crs; continue; default: @@ -1327,7 +1270,7 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) && !((*crs >= 'A' && *crs <= 'Z') || (*crs >= 'A' && *crs <= 'z') || (*crs >= '0' && *crs <= '9') || *crs == '_')) { - buffer[bufpos++] = '\\'; + sb_xappend1 (&buffer, '\\'); backslashed = true; } @@ -1342,50 +1285,41 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) else if (lowercase) { if (*crs >= 'A' && *crs <= 'Z') - buffer[bufpos++] = *crs - 'A' + 'a'; + sb_xappend1 (&buffer, *crs - 'A' + 'a'); else if ((unsigned char) *crs >= 0x80) { if_error (IF_SEVERITY_WARNING, real_file_name, xp->line_number, (size_t)(-1), false, _("unsupported interpolation (\"\\L\") of 8bit character \"%c\""), *crs); - buffer[bufpos++] = *crs; + sb_xappend1 (&buffer, *crs); } else - buffer[bufpos++] = *crs; + sb_xappend1 (&buffer, *crs); ++crs; } else if (uppercase) { if (*crs >= 'a' && *crs <= 'z') - buffer[bufpos++] = *crs - 'a' + 'A'; + sb_xappend1 (&buffer, *crs - 'a' + 'A'); else if ((unsigned char) *crs >= 0x80) { if_error (IF_SEVERITY_WARNING, real_file_name, xp->line_number, (size_t)(-1), false, _("unsupported interpolation (\"\\U\") of 8bit character \"%c\""), *crs); - buffer[bufpos++] = *crs; + sb_xappend1 (&buffer, *crs); } else - buffer[bufpos++] = *crs; + sb_xappend1 (&buffer, *crs); ++crs; } else { - buffer[bufpos++] = *crs++; + sb_xappend1 (&buffer, *crs++); } } - /* Ensure room for 1 more byte. */ - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - - buffer[bufpos++] = '\0'; - #if DEBUG_PERL fprintf (stderr, "---> %s\n", buffer); #endif @@ -1393,7 +1327,9 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) /* Replace tp->string. */ free (tp->string); if (tp->type == token_type_string) - tp->string = xstrdup (buffer); + tp->string = sb_xdupfree_c (&buffer); + else + sb_free (&buffer); } /* Parse a variable. This is done in several steps: @@ -1404,13 +1340,13 @@ extract_quotelike_pass3 (struct perl_extractor *xp, token_ty *tp) static void extract_variable (struct perl_extractor *xp, token_ty *tp, int first) { - static char *buffer; - static int bufmax = 0; - int bufpos = 0; + struct string_buffer buffer; size_t varbody_length = 0; bool maybe_hash_deref = false; bool maybe_hash_value = false; + sb_init (&buffer); + tp->type = token_type_variable; #if DEBUG_PERL @@ -1428,28 +1364,26 @@ extract_variable (struct perl_extractor *xp, token_ty *tp, int first) while (c == '$' || c == '*' || c == '#' || c == '@' || c == '%') { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); c = phase1_getc (xp); } if (c == EOF) { + sb_free (&buffer); tp->type = token_type_eof; return; } /* Hash references are treated in a special way, when looking for our keywords. */ - if (buffer[0] == '$') + string_desc_t contents = sb_contents (&buffer); + if (string_desc_char_at (contents, 0) == '$') { - if (bufpos == 1) + if (string_desc_length (contents) == 1) maybe_hash_value = true; - else if (bufpos == 2 && buffer[1] == '$') + else if (string_desc_length (contents) == 2 + && string_desc_char_at (contents, 1) == '$') { if (!(c == '{' || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') @@ -1457,13 +1391,7 @@ extract_variable (struct perl_extractor *xp, token_ty *tp, int first) || c == '_' || c == ':' || c == '\'' || c >= 0x80)) { /* Special variable $$ for pid. */ - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = '\0'; - tp->string = xstrdup (buffer); + tp->string = sb_xdupfree_c (&buffer); #if DEBUG_PERL fprintf (stderr, "%s:%d: is PID ($$)\n", real_file_name, xp->line_number); @@ -1474,7 +1402,8 @@ extract_variable (struct perl_extractor *xp, token_ty *tp, int first) } maybe_hash_deref = true; - bufpos = 1; + /* Truncate to length 1. */ + buffer.length = 1; } } @@ -1484,11 +1413,6 @@ extract_variable (struct perl_extractor *xp, token_ty *tp, int first) * in the global namespace but that subtle difference is not interesting * for us. */ - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } if (c == '{') { /* Yuck, we cannot accept ${gettext} as a keyword... Except for @@ -1506,17 +1430,13 @@ extract_variable (struct perl_extractor *xp, token_ty *tp, int first) null_context_region (), null_context_list_iterator, 1, arglist_parser_alloc (xp->mlp, NULL))) { + sb_free (&buffer); tp->type = token_type_eof; return; } - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); ++varbody_length; - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = '}'; + sb_xappend1 (&buffer, '}'); } else { @@ -1525,12 +1445,7 @@ extract_variable (struct perl_extractor *xp, token_ty *tp, int first) || c == '_' || c == ':' || c == '\'' || c >= 0x80) { ++varbody_length; - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); c = phase1_getc (xp); } phase1_ungetc (xp, c); @@ -1544,24 +1459,10 @@ extract_variable (struct perl_extractor *xp, token_ty *tp, int first) if (c == EOF || is_whitespace (c)) phase1_ungetc (xp, c); /* Loser. */ else - { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - } - } - - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); + sb_xappend1 (&buffer, c); } - buffer[bufpos++] = '\0'; - tp->string = xstrdup (buffer); + tp->string = sb_xdupfree_c (&buffer); #if DEBUG_PERL fprintf (stderr, "%s:%d: complete variable name: %s\n", @@ -1792,9 +1693,7 @@ static void interpolate_keywords (struct perl_extractor *xp, string_desc_t string, int lineno) { - static char *buffer; - static int bufmax = 0; - int bufpos = 0; + struct string_buffer buffer; flag_region_ty *region; size_t length; size_t index; @@ -1821,6 +1720,8 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, lex_pos_ty pos; + sb_init (&buffer); + if (++(xp->nesting_depth) > MAX_NESTING_DEPTH) if_error (IF_SEVERITY_FATAL_ERROR, logical_file_name, xp->line_number, (size_t)(-1), false, @@ -1874,17 +1775,11 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, c = string_desc_char_at (string, index++); if (state == initial) - bufpos = 0; + buffer.length = 0; if (c == '\n') lineno++; - if (bufpos + 1 >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - switch (state) { case initial: @@ -1894,12 +1789,13 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, if (index == length) { xp->nesting_depth--; + sb_free (&buffer); return; } c = string_desc_char_at (string, index++); break; case '$': - buffer[bufpos++] = '$'; + sb_xappend1 (&buffer, '$'); maybe_hash_deref = false; state = one_dollar; break; @@ -1925,7 +1821,7 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); state = identifier; } else @@ -1940,7 +1836,7 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); state = identifier; } else @@ -1950,46 +1846,60 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, switch (c) { case '-': - if (hash_find_entry (&keywords, buffer, bufpos, &keyword_value) - == 0) - { - flag_context_list_iterator_ty context_iter = - flag_context_list_iterator ( - flag_context_list_table_lookup ( - flag_context_list_table, - buffer, bufpos)); - region = - inheriting_region (null_context_region (), - flag_context_list_iterator_advance ( - &context_iter)); - state = minus; - } - else - state = initial; + { + string_desc_t contents = sb_contents (&buffer); + if (hash_find_entry (&keywords, + string_desc_data (contents), + string_desc_length (contents), + &keyword_value) + == 0) + { + flag_context_list_iterator_ty context_iter = + flag_context_list_iterator ( + flag_context_list_table_lookup ( + flag_context_list_table, + string_desc_data (contents), + string_desc_length (contents))); + region = + inheriting_region (null_context_region (), + flag_context_list_iterator_advance ( + &context_iter)); + state = minus; + } + else + state = initial; + } break; case '[': - bufpos = 0; + buffer.length = 0; state = seen_lbracket; break; case '{': - if (!maybe_hash_deref) - buffer[0] = '%'; - if (hash_find_entry (&keywords, buffer, bufpos, &keyword_value) - == 0) - { - flag_context_list_iterator_ty context_iter = - flag_context_list_iterator ( - flag_context_list_table_lookup ( - flag_context_list_table, - buffer, bufpos)); - region = - inheriting_region (null_context_region (), - flag_context_list_iterator_advance ( - &context_iter)); - state = seen_lbrace; - } - else - state = initial; + { + string_desc_t contents = sb_contents (&buffer); + if (!maybe_hash_deref) + string_desc_set_char_at (contents, 0, '%'); + if (hash_find_entry (&keywords, + string_desc_data (contents), + string_desc_length (contents), + &keyword_value) + == 0) + { + flag_context_list_iterator_ty context_iter = + flag_context_list_iterator ( + flag_context_list_table_lookup ( + flag_context_list_table, + string_desc_data (contents), + string_desc_length (contents))); + region = + inheriting_region (null_context_region (), + flag_context_list_iterator_advance ( + &context_iter)); + state = seen_lbrace; + } + else + state = initial; + } break; default: if (!c_isascii ((unsigned char) c) @@ -1998,7 +1908,7 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); } else state = initial; @@ -2009,35 +1919,31 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, switch (c) { case '\'': - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); state = lbracket_squote; break; case '"': - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); state = lbracket_dquote; break; case ']': /* Recursively extract messages from the bracketed expression. */ { - char *substring = xmalloc (bufpos); - memcpy (substring, buffer, bufpos); + string_desc_t substring = sb_contents (&buffer); struct perl_extractor *rxp = XMALLOC (struct perl_extractor); rxp->mlp = xp->mlp; - sf_istream_init_from_string_desc ( - &rxp->input, - string_desc_new_addr (bufpos, substring)); + sf_istream_init_from_string_desc (&rxp->input, substring); rxp->line_number = xp->line_number; perl_extractor_init_rest (rxp); extract_perl_input (rxp); free (rxp); - free (substring); } break; default: - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); break; } break; @@ -2045,7 +1951,7 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, switch (c) { case '"': - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); state = seen_lbracket; break; case '\\': @@ -2059,17 +1965,17 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, c = string_desc_char_at (string, index++); if (c == '\"') { - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); } else { - buffer[bufpos++] = '\\'; - buffer[bufpos++] = c; + sb_xappend1 (&buffer, '\\'); + sb_xappend1 (&buffer, c); } } break; default: - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); break; } break; @@ -2077,7 +1983,7 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, switch (c) { case '\'': - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); state = seen_lbracket; break; case '\\': @@ -2091,17 +1997,17 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, c = string_desc_char_at (string, index++); if (c == '\'') { - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); } else { - buffer[bufpos++] = '\\'; - buffer[bufpos++] = c; + sb_xappend1 (&buffer, '\\'); + sb_xappend1 (&buffer, c); } } break; default: - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); break; } break; @@ -2136,12 +2042,12 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, break; case '\'': pos.line_number = lineno; - bufpos = 0; + buffer.length = 0; state = lbrace_squote; break; case '"': pos.line_number = lineno; - bufpos = 0; + buffer.length = 0; state = lbrace_dquote; break; default: @@ -2150,8 +2056,8 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { pos.line_number = lineno; - bufpos = 0; - buffer[bufpos++] = c; + buffer.length = 0; + sb_xappend1 (&buffer, c); state = lbrace_barekey; } else @@ -2167,16 +2073,12 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, { case '"': /* The resulting string has to be interpolated twice. */ - buffer[bufpos] = '\0'; - token.string = xstrdup (buffer); + token.string = sb_xdupfree_c (&buffer); + sb_init (&buffer); extract_quotelike_pass3 (xp, &token); if (token.type == token_type_string) { - /* The string can only shrink with interpolation (because - we ignore \Q). */ - if (!(strlen (token.string) <= bufpos)) - abort (); - strcpy (buffer, token.string); + sb_xappend_c (&buffer, token.string); free (token.string); } state = wait_rbrace; @@ -2192,17 +2094,17 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, c = string_desc_char_at (string, index++); if (c == '\"') { - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); } else { - buffer[bufpos++] = '\\'; - buffer[bufpos++] = c; + sb_xappend1 (&buffer, '\\'); + sb_xappend1 (&buffer, c); } } break; default: - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); break; } break; @@ -2223,17 +2125,17 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, c = string_desc_char_at (string, index++); if (c == '\'') { - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); } else { - buffer[bufpos++] = '\\'; - buffer[bufpos++] = c; + sb_xappend1 (&buffer, '\\'); + sb_xappend1 (&buffer, c); } } break; default: - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); break; } break; @@ -2242,7 +2144,7 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, || c == '_' || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); break; } else if (is_whitespace (c)) @@ -2266,8 +2168,7 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, case '}': if (token.type == token_type_string) { - buffer[bufpos] = '\0'; - token.string = xstrdup (buffer); + token.string = sb_xdupfree_c (&buffer); extract_quotelike_pass3 (xp, &token); if (token.type == token_type_string) { @@ -2275,6 +2176,7 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, false, region, &pos, NULL, savable_comment, true); } + sb_init (&buffer); } FALLTHROUGH; default: @@ -2287,6 +2189,7 @@ interpolate_keywords (struct perl_extractor *xp, string_desc_t string, } xp->nesting_depth--; + sb_free (&buffer); return; } @@ -2384,9 +2287,6 @@ prefer_regexp_over_division (token_type_ty type) static void x_perl_prelex (struct perl_extractor *xp, token_ty *tp) { - static char *buffer; - static int bufmax; - int bufpos; int c; for (;;) @@ -2456,218 +2356,220 @@ x_perl_prelex (struct perl_extractor *xp, token_ty *tp) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* Symbol, or part of a number. */ - bufpos = 0; - for (;;) - { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = phase1_getc (xp); - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '_': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - continue; - - default: - phase1_ungetc (xp, c); - break; - } - break; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - - if (strcmp (buffer, "__END__") == 0 - || strcmp (buffer, "__DATA__") == 0) - { - xp->end_of_file = true; - tp->type = token_type_eof; - return; - } - else if (strcmp (buffer, "and") == 0 - || strcmp (buffer, "cmp") == 0 - || strcmp (buffer, "eq") == 0 - || strcmp (buffer, "if") == 0 - || strcmp (buffer, "ge") == 0 - || strcmp (buffer, "gt") == 0 - || strcmp (buffer, "le") == 0 - || strcmp (buffer, "lt") == 0 - || strcmp (buffer, "ne") == 0 - || strcmp (buffer, "not") == 0 - || strcmp (buffer, "or") == 0 - || strcmp (buffer, "unless") == 0 - || strcmp (buffer, "while") == 0 - || strcmp (buffer, "xor") == 0) - { - tp->type = token_type_named_op; - tp->string = xstrdup (buffer); - return; - } - else if (strcmp (buffer, "s") == 0 - || strcmp (buffer, "y") == 0 - || strcmp (buffer, "tr") == 0) - { - int delim = phase1_getc (xp); - - while (is_whitespace (delim)) - delim = phase2_getc (xp); + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + sb_xappend1 (&buffer, c); + c = phase1_getc (xp); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + continue; + + default: + phase1_ungetc (xp, c); + break; + } + break; + } + const char *contents = sb_xcontents_c (&buffer); - if (delim == EOF) - { - tp->type = token_type_eof; - return; - } - if ((delim >= '0' && delim <= '9') - || (delim >= 'A' && delim <= 'Z') - || (delim >= 'a' && delim <= 'z')) - { - /* False positive. */ - phase2_ungetc (xp, delim); - tp->type = token_type_symbol; - tp->sub_type = symbol_type_none; - tp->string = xstrdup (buffer); - return; - } - extract_triple_quotelike (xp, tp, delim, - buffer[0] == 's' && delim != '\''); + if (strcmp (contents, "__END__") == 0 + || strcmp (contents, "__DATA__") == 0) + { + sb_free (&buffer); + xp->end_of_file = true; + tp->type = token_type_eof; + return; + } + else if (strcmp (contents, "and") == 0 + || strcmp (contents, "cmp") == 0 + || strcmp (contents, "eq") == 0 + || strcmp (contents, "if") == 0 + || strcmp (contents, "ge") == 0 + || strcmp (contents, "gt") == 0 + || strcmp (contents, "le") == 0 + || strcmp (contents, "lt") == 0 + || strcmp (contents, "ne") == 0 + || strcmp (contents, "not") == 0 + || strcmp (contents, "or") == 0 + || strcmp (contents, "unless") == 0 + || strcmp (contents, "while") == 0 + || strcmp (contents, "xor") == 0) + { + tp->type = token_type_named_op; + tp->string = sb_xdupfree_c (&buffer); + return; + } + else if (strcmp (contents, "s") == 0 + || strcmp (contents, "y") == 0 + || strcmp (contents, "tr") == 0) + { + int delim = phase1_getc (xp); - /* Eat the following modifiers. */ - do - c = phase1_getc (xp); - while (c >= 'a' && c <= 'z'); - phase1_ungetc (xp, c); - return; - } - else if (strcmp (buffer, "m") == 0) - { - int delim = phase1_getc (xp); + while (is_whitespace (delim)) + delim = phase2_getc (xp); - while (is_whitespace (delim)) - delim = phase2_getc (xp); + if (delim == EOF) + { + sb_free (&buffer); + tp->type = token_type_eof; + return; + } + if ((delim >= '0' && delim <= '9') + || (delim >= 'A' && delim <= 'Z') + || (delim >= 'a' && delim <= 'z')) + { + /* False positive. */ + phase2_ungetc (xp, delim); + tp->type = token_type_symbol; + tp->sub_type = symbol_type_none; + tp->string = sb_xdupfree_c (&buffer); + return; + } + extract_triple_quotelike (xp, tp, delim, + contents[0] == 's' && delim != '\''); + sb_free (&buffer); + + /* Eat the following modifiers. */ + do + c = phase1_getc (xp); + while (c >= 'a' && c <= 'z'); + phase1_ungetc (xp, c); + return; + } + else if (strcmp (contents, "m") == 0) + { + int delim = phase1_getc (xp); - if (delim == EOF) - { - tp->type = token_type_eof; - return; - } - if ((delim >= '0' && delim <= '9') - || (delim >= 'A' && delim <= 'Z') - || (delim >= 'a' && delim <= 'z')) - { - /* False positive. */ - phase2_ungetc (xp, delim); - tp->type = token_type_symbol; - tp->sub_type = symbol_type_none; - tp->string = xstrdup (buffer); - return; - } - extract_quotelike (xp, tp, delim); - if (delim != '\'') - interpolate_keywords (xp, string_desc_from_c (tp->string), - xp->line_number); - free (tp->string); - drop_reference (tp->comment); - tp->type = token_type_regex_op; + while (is_whitespace (delim)) + delim = phase2_getc (xp); - /* Eat the following modifiers. */ - do - c = phase1_getc (xp); - while (c >= 'a' && c <= 'z'); - phase1_ungetc (xp, c); - return; - } - else if (strcmp (buffer, "qq") == 0 - || strcmp (buffer, "q") == 0 - || strcmp (buffer, "qx") == 0 - || strcmp (buffer, "qw") == 0 - || strcmp (buffer, "qr") == 0) - { - /* The qw (...) construct is not really a string but we - can treat in the same manner and then pretend it is - a symbol. Rationale: Saying "qw (foo bar)" is the - same as "my @list = ('foo', 'bar'); @list;". */ + if (delim == EOF) + { + sb_free (&buffer); + tp->type = token_type_eof; + return; + } + if ((delim >= '0' && delim <= '9') + || (delim >= 'A' && delim <= 'Z') + || (delim >= 'a' && delim <= 'z')) + { + /* False positive. */ + phase2_ungetc (xp, delim); + tp->type = token_type_symbol; + tp->sub_type = symbol_type_none; + tp->string = sb_xdupfree_c (&buffer); + return; + } + extract_quotelike (xp, tp, delim); + sb_free (&buffer); + if (delim != '\'') + interpolate_keywords (xp, string_desc_from_c (tp->string), + xp->line_number); + free (tp->string); + drop_reference (tp->comment); + tp->type = token_type_regex_op; + + /* Eat the following modifiers. */ + do + c = phase1_getc (xp); + while (c >= 'a' && c <= 'z'); + phase1_ungetc (xp, c); + return; + } + else if (strcmp (contents, "qq") == 0 + || strcmp (contents, "q") == 0 + || strcmp (contents, "qx") == 0 + || strcmp (contents, "qw") == 0 + || strcmp (contents, "qr") == 0) + { + /* The qw (...) construct is not really a string but we + can treat in the same manner and then pretend it is + a symbol. Rationale: Saying "qw (foo bar)" is the + same as "my @list = ('foo', 'bar'); @list;". */ - int delim = phase1_getc (xp); + int delim = phase1_getc (xp); - while (is_whitespace (delim)) - delim = phase2_getc (xp); + while (is_whitespace (delim)) + delim = phase2_getc (xp); - if (delim == EOF) - { - tp->type = token_type_eof; - return; - } + if (delim == EOF) + { + sb_free (&buffer); + tp->type = token_type_eof; + return; + } - if ((delim >= '0' && delim <= '9') - || (delim >= 'A' && delim <= 'Z') - || (delim >= 'a' && delim <= 'z')) - { - /* False positive. */ - phase2_ungetc (xp, delim); - tp->type = token_type_symbol; - tp->sub_type = symbol_type_none; - tp->string = xstrdup (buffer); - return; - } + if ((delim >= '0' && delim <= '9') + || (delim >= 'A' && delim <= 'Z') + || (delim >= 'a' && delim <= 'z')) + { + /* False positive. */ + phase2_ungetc (xp, delim); + tp->type = token_type_symbol; + tp->sub_type = symbol_type_none; + tp->string = sb_xdupfree_c (&buffer); + return; + } - extract_quotelike (xp, tp, delim); + extract_quotelike (xp, tp, delim); - switch (buffer[1]) - { - case 'q': - case 'x': - tp->type = token_type_string; - tp->sub_type = string_type_qq; - interpolate_keywords (xp, string_desc_from_c (tp->string), - xp->line_number); - break; - case 'r': - drop_reference (tp->comment); - tp->type = token_type_regex_op; - break; - case 'w': - drop_reference (tp->comment); - tp->type = token_type_symbol; - tp->sub_type = symbol_type_none; - break; - case '\0': - tp->type = token_type_string; - tp->sub_type = string_type_q; - break; - default: - abort (); - } - return; - } - else if ((buffer[0] >= '0' && buffer[0] <= '9') || buffer[0] == '.') - { - tp->type = token_type_number; - return; - } - tp->type = token_type_symbol; - tp->sub_type = (strcmp (buffer, "sub") == 0 - ? symbol_type_sub - : symbol_type_none); - tp->string = xstrdup (buffer); + switch (contents[1]) + { + case 'q': + case 'x': + tp->type = token_type_string; + tp->sub_type = string_type_qq; + interpolate_keywords (xp, string_desc_from_c (tp->string), + xp->line_number); + break; + case 'r': + drop_reference (tp->comment); + tp->type = token_type_regex_op; + break; + case 'w': + drop_reference (tp->comment); + tp->type = token_type_symbol; + tp->sub_type = symbol_type_none; + break; + case '\0': + tp->type = token_type_string; + tp->sub_type = string_type_q; + break; + default: + abort (); + } + sb_free (&buffer); + return; + } + else if ((contents[0] >= '0' && contents[0] <= '9') + || contents[0] == '.') + { + sb_free (&buffer); + tp->type = token_type_number; + return; + } + tp->type = token_type_symbol; + tp->sub_type = (strcmp (contents, "sub") == 0 + ? symbol_type_sub + : symbol_type_none); + tp->string = sb_xdupfree_c (&buffer); + } return; case '"': @@ -2786,22 +2688,19 @@ x_perl_prelex (struct perl_extractor *xp, token_ty *tp) || (c >= 'a' && c <= 'z') || c == '_') { - bufpos = 0; + struct string_buffer buffer; + sb_init (&buffer); while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' || c >= 0x80) { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); c = phase1_getc (xp); } if (c == EOF) { + sb_free (&buffer); tp->type = token_type_eof; return; } @@ -2809,13 +2708,7 @@ x_perl_prelex (struct perl_extractor *xp, token_ty *tp) { char *string; phase1_ungetc (xp, c); - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = '\0'; - string = get_here_document (xp, buffer); + string = get_here_document (xp, sb_xdupfree_c (&buffer)); tp->string = string; tp->type = token_type_string; tp->sub_type = string_type_qq; diff --git a/gettext-tools/src/x-php.c b/gettext-tools/src/x-php.c index 4060e5149..3aad337bf 100644 --- a/gettext-tools/src/x-php.c +++ b/gettext-tools/src/x-php.c @@ -42,6 +42,7 @@ #include "xg-message.h" #include "if-error.h" #include "xalloc.h" +#include "string-buffer.h" #include "gettext.h" #define _(s) gettext(s) @@ -848,149 +849,144 @@ process_heredoc (struct php_extractor *xp, const char *doc, int doc_line_number) { bool is_constant = true; int lineno = doc_line_number; - int bufmax = strlen (doc) + 1; - char *buffer = xmalloc (bufmax); - int bufpos; heredoc_continued: - bufpos = 0; - for (;;) - { - char c = *doc++; - if (c == '\0') - break; - if (c == '\n') - lineno++; - if (c == '$') - { - c = *doc++; - if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') - || c == '_' || c >= 0x7f) - { - /* String with variables. */ - is_constant = false; - continue; - } - if (c == '{') - /* Heredoc string with embedded expressions. */ - goto heredoc_with_embedded_expressions; - --doc; - c = '$'; - } - if (c == '{') - { - c = *doc++; - if (c == '$') - /* Heredoc string with embedded expressions. */ - goto heredoc_with_embedded_expressions; - --doc; - c = '{'; - } - if (c == '\\') - { - int n, j; - - c = *doc++; - switch (c) - { - case '\\': - case '$': - break; + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + char c = *doc++; + if (c == '\0') + break; + if (c == '\n') + lineno++; + if (c == '$') + { + c = *doc++; + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') + || c == '_' || c >= 0x7f) + { + /* String with variables. */ + is_constant = false; + continue; + } + if (c == '{') + { + /* Heredoc string with embedded expressions. */ + sb_free (&buffer); + goto heredoc_with_embedded_expressions; + } + --doc; + c = '$'; + } + if (c == '{') + { + c = *doc++; + if (c == '$') + { + /* Heredoc string with embedded expressions. */ + sb_free (&buffer); + goto heredoc_with_embedded_expressions; + } + --doc; + c = '{'; + } + if (c == '\\') + { + int n, j; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - n = 0; - for (j = 0; j < 3; ++j) - { - n = n * 8 + c - '0'; - c = *doc++; - switch (c) - { - default: - break; + c = *doc++; + switch (c) + { + case '\\': + case '$': + break; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - continue; - } - break; - } - --doc; - c = n; - break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + n = 0; + for (j = 0; j < 3; ++j) + { + n = n * 8 + c - '0'; + c = *doc++; + switch (c) + { + default: + break; - case 'x': - n = 0; - for (j = 0; j < 2; ++j) - { - c = *doc++; - switch (c) - { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - n = n * 16 + c - '0'; - break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - n = n * 16 + 10 + c - 'A'; - break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - n = n * 16 + 10 + c - 'a'; - break; - default: - --doc; - c = 0; - break; - } - if (c == 0) + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + continue; + } break; - } - if (j == 0) - { - --doc; - c = '\\'; - } - else + } + --doc; c = n; - break; + break; - case 'n': - c = '\n'; - break; - case 't': - c = '\t'; - break; - case 'r': - c = '\r'; - break; + case 'x': + n = 0; + for (j = 0; j < 2; ++j) + { + c = *doc++; + switch (c) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + n = n * 16 + c - '0'; + break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + n = n * 16 + 10 + c - 'A'; + break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + n = n * 16 + 10 + c - 'a'; + break; + default: + --doc; + c = 0; + break; + } + if (c == 0) + break; + } + if (j == 0) + { + --doc; + c = '\\'; + } + else + c = n; + break; - default: - --doc; - c = '\\'; - break; - } - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - } - if (bufpos >= bufmax) - { - bufmax = bufmax + 1; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - if (is_constant) - return buffer; - else - { - free (buffer); - return NULL; - } + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; + case 'r': + c = '\r'; + break; + + default: + --doc; + c = '\\'; + break; + } + } + sb_xappend1 (&buffer, c); + } + if (is_constant) + return sb_xdupfree_c (&buffer); + else + { + sb_free (&buffer); + return NULL; + } + } heredoc_with_embedded_expressions: is_constant = false; @@ -1002,7 +998,8 @@ process_heredoc (struct php_extractor *xp, const char *doc, int doc_line_number) nesting_stack[nesting_stack_depth++] = '}'; /* Find the extent of the expression. */ - bufpos = 0; + struct string_buffer buffer; + sb_init (&buffer); for (;;) { char c = *doc; @@ -1043,29 +1040,22 @@ process_heredoc (struct php_extractor *xp, const char *doc, int doc_line_number) _("unterminated expression in heredoc contains unbalanced '%c'"), c); } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); } /* Recursively extract messages from the expression. */ - char *substring = xmalloc (bufpos); - memcpy (substring, buffer, bufpos); + string_desc_t substring = sb_contents (&buffer); struct php_extractor *rxp = XMALLOC (struct php_extractor); rxp->mlp = xp->mlp; - sf_istream_init_from_string_desc (&rxp->input, - string_desc_new_addr (bufpos, substring)); + sf_istream_init_from_string_desc (&rxp->input, substring); rxp->line_number = xp->line_number; php_extractor_init_rest (rxp); extract_php_input (rxp); free (rxp); - free (substring); + sb_free (&buffer); free (nesting_stack); } goto heredoc_continued; @@ -1074,9 +1064,6 @@ process_heredoc (struct php_extractor *xp, const char *doc, int doc_line_number) static void phase4_get (struct php_extractor *xp, token_ty *tp) { - static char *buffer; - static int bufmax; - int bufpos; int c; if (xp->phase4_pushback_length) @@ -1139,245 +1126,229 @@ phase4_get (struct php_extractor *xp, token_ty *tp) case 240: case 241: case 242: case 243: case 244: case 245: case 246: case 247: case 248: case 249: case 250: case 251: case 252: case 253: case 254: case 255: - bufpos = 0; - for (;;) - { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = phase1_getc (xp); - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '_': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case 128: case 129: case 130: case 131: case 132: case 133: - case 134: case 135: case 136: case 137: case 138: case 139: - case 140: case 141: case 142: case 143: case 144: case 145: - case 146: case 147: case 148: case 149: case 150: case 151: - case 152: case 153: case 154: case 155: case 156: case 157: - case 158: case 159: case 160: case 161: case 162: case 163: - case 164: case 165: case 166: case 167: case 168: case 169: - case 170: case 171: case 172: case 173: case 174: case 175: - case 176: case 177: case 178: case 179: case 180: case 181: - case 182: case 183: case 184: case 185: case 186: case 187: - case 188: case 189: case 190: case 191: case 192: case 193: - case 194: case 195: case 196: case 197: case 198: case 199: - case 200: case 201: case 202: case 203: case 204: case 205: - case 206: case 207: case 208: case 209: case 210: case 211: - case 212: case 213: case 214: case 215: case 216: case 217: - case 218: case 219: case 220: case 221: case 222: case 223: - case 224: case 225: case 226: case 227: case 228: case 229: - case 230: case 231: case 232: case 233: case 234: case 235: - case 236: case 237: case 238: case 239: case 240: case 241: - case 242: case 243: case 244: case 245: case 246: case 247: - case 248: case 249: case 250: case 251: case 252: case 253: - case 254: case 255: - continue; + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + sb_xappend1 (&buffer, c); + c = phase1_getc (xp); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case 128: case 129: case 130: case 131: case 132: case 133: + case 134: case 135: case 136: case 137: case 138: case 139: + case 140: case 141: case 142: case 143: case 144: case 145: + case 146: case 147: case 148: case 149: case 150: case 151: + case 152: case 153: case 154: case 155: case 156: case 157: + case 158: case 159: case 160: case 161: case 162: case 163: + case 164: case 165: case 166: case 167: case 168: case 169: + case 170: case 171: case 172: case 173: case 174: case 175: + case 176: case 177: case 178: case 179: case 180: case 181: + case 182: case 183: case 184: case 185: case 186: case 187: + case 188: case 189: case 190: case 191: case 192: case 193: + case 194: case 195: case 196: case 197: case 198: case 199: + case 200: case 201: case 202: case 203: case 204: case 205: + case 206: case 207: case 208: case 209: case 210: case 211: + case 212: case 213: case 214: case 215: case 216: case 217: + case 218: case 219: case 220: case 221: case 222: case 223: + case 224: case 225: case 226: case 227: case 228: case 229: + case 230: case 231: case 232: case 233: case 234: case 235: + case 236: case 237: case 238: case 239: case 240: case 241: + case 242: case 243: case 244: case 245: case 246: case 247: + case 248: case 249: case 250: case 251: case 252: case 253: + case 254: case 255: + continue; - default: - phase1_ungetc (xp, c); - break; - } - break; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - tp->string = xstrdup (buffer); - tp->type = token_type_symbol; + default: + phase1_ungetc (xp, c); + break; + } + break; + } + tp->string = sb_xdupfree_c (&buffer); + tp->type = token_type_symbol; + } return; case '\'': /* Single-quoted string literal. */ - bufpos = 0; - for (;;) - { - c = phase1_getc (xp); - if (c == EOF || c == '\'') - break; - if (c == '\\') - { - c = phase1_getc (xp); - if (c != '\\' && c != '\'') - { - phase1_ungetc (xp, c); - c = '\\'; - } - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - tp->type = token_type_string_literal; - tp->string = xstrdup (buffer); - tp->comment = add_reference (savable_comment); + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + c = phase1_getc (xp); + if (c == EOF || c == '\'') + break; + if (c == '\\') + { + c = phase1_getc (xp); + if (c != '\\' && c != '\'') + { + phase1_ungetc (xp, c); + c = '\\'; + } + } + sb_xappend1 (&buffer, c); + } + tp->type = token_type_string_literal; + tp->string = sb_xdupfree_c (&buffer); + tp->comment = add_reference (savable_comment); + } return; case '"': /* Double-quoted string literal. */ tp->type = token_type_string_literal; string_literal_continued: - bufpos = 0; - for (;;) - { - c = phase1_getc (xp); - if (c == EOF || c == '"') - break; - if (c == '$') - { - c = phase1_getc (xp); - if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') - || c == '_' || c >= 0x7f) - { - /* String with variables. */ - tp->type = token_type_other; - continue; - } - if (c == '{') - /* String with embedded expressions. */ - goto string_with_embedded_expressions; - phase1_ungetc (xp, c); - c = '$'; - } - if (c == '{') - { - c = phase1_getc (xp); - if (c == '$') - /* String with embedded expressions. */ - goto string_with_embedded_expressions; - phase1_ungetc (xp, c); - c = '{'; - } - if (c == '\\') - { - int n, j; - - c = phase1_getc (xp); - switch (c) - { - case '"': - case '\\': - case '$': - break; + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + c = phase1_getc (xp); + if (c == EOF || c == '"') + break; + if (c == '$') + { + c = phase1_getc (xp); + if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') + || c == '_' || c >= 0x7f) + { + /* String with variables. */ + tp->type = token_type_other; + continue; + } + if (c == '{') + { + /* String with embedded expressions. */ + sb_free (&buffer); + goto string_with_embedded_expressions; + } + phase1_ungetc (xp, c); + c = '$'; + } + if (c == '{') + { + c = phase1_getc (xp); + if (c == '$') + { + /* String with embedded expressions. */ + sb_free (&buffer); + goto string_with_embedded_expressions; + } + phase1_ungetc (xp, c); + c = '{'; + } + if (c == '\\') + { + int n, j; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - n = 0; - for (j = 0; j < 3; ++j) - { - n = n * 8 + c - '0'; - c = phase1_getc (xp); - switch (c) - { - default: - break; + c = phase1_getc (xp); + switch (c) + { + case '"': + case '\\': + case '$': + break; - case '0': case '1': case '2': case '3': - case '4': case '5': case '6': case '7': - continue; - } - break; - } - phase1_ungetc (xp, c); - c = n; - break; + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + n = 0; + for (j = 0; j < 3; ++j) + { + n = n * 8 + c - '0'; + c = phase1_getc (xp); + switch (c) + { + default: + break; - case 'x': - n = 0; - for (j = 0; j < 2; ++j) - { - c = phase1_getc (xp); - switch (c) - { - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - n = n * 16 + c - '0'; - break; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': - n = n * 16 + 10 + c - 'A'; - break; - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': - n = n * 16 + 10 + c - 'a'; - break; - default: - phase1_ungetc (xp, c); - c = 0; - break; - } - if (c == 0) + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + continue; + } break; - } - if (j == 0) - { - phase1_ungetc (xp, 'x'); - c = '\\'; - } - else + } + phase1_ungetc (xp, c); c = n; - break; + break; - case 'n': - c = '\n'; - break; - case 't': - c = '\t'; - break; - case 'r': - c = '\r'; - break; + case 'x': + n = 0; + for (j = 0; j < 2; ++j) + { + c = phase1_getc (xp); + switch (c) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + n = n * 16 + c - '0'; + break; + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': + n = n * 16 + 10 + c - 'A'; + break; + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': + n = n * 16 + 10 + c - 'a'; + break; + default: + phase1_ungetc (xp, c); + c = 0; + break; + } + if (c == 0) + break; + } + if (j == 0) + { + phase1_ungetc (xp, 'x'); + c = '\\'; + } + else + c = n; + break; - default: - phase1_ungetc (xp, c); - c = '\\'; - break; - } - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - if (tp->type == token_type_string_literal) - { - tp->string = xstrdup (buffer); - tp->comment = add_reference (savable_comment); - } + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; + case 'r': + c = '\r'; + break; + + default: + phase1_ungetc (xp, c); + c = '\\'; + break; + } + } + sb_xappend1 (&buffer, c); + } + if (tp->type == token_type_string_literal) + { + tp->string = sb_xdupfree_c (&buffer); + tp->comment = add_reference (savable_comment); + } + else + sb_free (&buffer); + } return; string_with_embedded_expressions: @@ -1390,7 +1361,8 @@ phase4_get (struct php_extractor *xp, token_ty *tp) nesting_stack[nesting_stack_depth++] = '}'; /* Find the extent of the expression. */ - bufpos = 0; + struct string_buffer buffer; + sb_init (&buffer); for (;;) { c = phase1_getc (xp); @@ -1430,30 +1402,22 @@ phase4_get (struct php_extractor *xp, token_ty *tp) _("unterminated expression in string literal contains unbalanced '%c'"), c); } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); } /* Recursively extract messages from the expression. */ - char *substring = xmalloc (bufpos); - memcpy (substring, buffer, bufpos); + string_desc_t substring = sb_contents (&buffer); struct php_extractor *rxp = XMALLOC (struct php_extractor); rxp->mlp = xp->mlp; - sf_istream_init_from_string_desc ( - &rxp->input, - string_desc_new_addr (bufpos, substring)); + sf_istream_init_from_string_desc (&rxp->input, substring); rxp->line_number = xp->line_number; php_extractor_init_rest (rxp); extract_php_input (rxp); free (rxp); - free (substring); + sb_free (&buffer); free (nesting_stack); } goto string_literal_continued; @@ -1542,31 +1506,30 @@ phase4_get (struct php_extractor *xp, token_ty *tp) c = phase3_getc (xp); while (c == ' ' || c == '\t' || c == '\n' || c == '\r'); - bufpos = 0; + struct string_buffer buffer; + sb_init (&buffer); do { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); c = phase3_getc (xp); } while (c != EOF && c != '\n' && c != '\r'); - /* buffer[0..bufpos-1] now contains the label + /* buffer now contains the label (including single or double quotes). */ int doc_line_number = xp->line_number; bool heredoc = true; - int label_start = 0; - int label_end = bufpos; - if (bufpos >= 2 - && ((buffer[label_start] == '\'' && buffer[label_end - 1] == '\'') - || (buffer[label_start] == '"' && buffer[label_end - 1] == '"'))) + string_desc_t label = sb_contents (&buffer); + size_t label_start = 0; + size_t label_end = string_desc_length (label); + if (label_end >= 2 + && ((string_desc_char_at (label, label_start) == '\'' + && string_desc_char_at (label, label_end - 1) == '\'') + || (string_desc_char_at (label, label_start) == '"' + && string_desc_char_at (label, label_end - 1) == '"'))) { - heredoc = (buffer[label_start] == '"'); + heredoc = (string_desc_char_at (label, label_start) == '"'); label_start++; label_end--; } @@ -1607,7 +1570,7 @@ phase4_get (struct php_extractor *xp, token_ty *tp) } else if (in_label_pos >= 0 && in_label_pos < label_end - label_start - && c == buffer[label_start + in_label_pos]) + && c == string_desc_char_at (label, label_start + in_label_pos)) { in_label_pos++; } @@ -1680,6 +1643,8 @@ phase4_get (struct php_extractor *xp, token_ty *tp) } } + sb_free (&buffer); + /* The contents is the substring [doc, doc + doc_start_of_line). */ doc_len = doc_start_of_line; diff --git a/gettext-tools/src/x-python.c b/gettext-tools/src/x-python.c index 0c85c3799..995200eb6 100644 --- a/gettext-tools/src/x-python.c +++ b/gettext-tools/src/x-python.c @@ -49,6 +49,7 @@ #include "xerror.h" #include "xvasprintf.h" #include "xalloc.h" +#include "string-buffer.h" #include "c-strstr.h" #include "c-ctype.h" #include "po-charset.h" @@ -1315,19 +1316,12 @@ phase5_get (token_ty *tp) symbol: /* Symbol, or part of a number. */ { - static char *buffer; - static int bufmax; - int bufpos; + struct string_buffer buffer; - bufpos = 0; + sb_init (&buffer); for (;;) { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); c = phase3_getc (); switch (c) { @@ -1351,13 +1345,7 @@ phase5_get (token_ty *tp) } break; } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - tp->string = xstrdup (buffer); + tp->string = sb_xdupfree_c (&buffer); tp->type = token_type_symbol; return; } diff --git a/gettext-tools/src/x-rst.c b/gettext-tools/src/x-rst.c index 3e8507978..b93be2f32 100644 --- a/gettext-tools/src/x-rst.c +++ b/gettext-tools/src/x-rst.c @@ -40,6 +40,7 @@ #include "xg-message.h" #include "if-error.h" #include "xalloc.h" +#include "string-buffer.h" #include "gettext.h" #define _(s) gettext(s) @@ -69,8 +70,6 @@ extract_rst (FILE *f, flag_context_list_table_ty *flag_table, msgdomain_list_ty *mdlp) { - static char *buffer; - static int bufmax; message_list_ty *mlp = mdlp->item[0]->messages; int line_number; @@ -78,7 +77,6 @@ extract_rst (FILE *f, for (;;) { int c; - int bufpos; char *location; char *msgid; lex_pos_ty pos; @@ -107,114 +105,104 @@ extract_rst (FILE *f, } /* Read ModuleName.ConstName. */ - bufpos = 0; - for (;;) - { - if (c == EOF || c == '\n') - if_error (IF_SEVERITY_FATAL_ERROR, - logical_filename, line_number, (size_t)(-1), false, - _("invalid string definition")); - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - if (c == '=') - break; - buffer[bufpos++] = c; - c = getc (f); - if (c == EOF && ferror (f)) - goto bomb; - } - buffer[bufpos] = '\0'; - location = xstrdup (buffer); + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + if (c == EOF || c == '\n') + if_error (IF_SEVERITY_FATAL_ERROR, + logical_filename, line_number, (size_t)(-1), false, + _("invalid string definition")); + if (c == '=') + break; + sb_xappend1 (&buffer, c); + c = getc (f); + if (c == EOF && ferror (f)) + { + sb_free (&buffer); + goto bomb; + } + } + location = sb_xdupfree_c (&buffer); + } /* Read StringExpression. */ - bufpos = 0; - for (;;) - { - c = getc (f); - if (c == EOF) - break; - else if (c == '\n') - { - line_number++; + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + c = getc (f); + if (c == EOF) break; - } - else if (c == '\'') - { - for (;;) - { - c = getc (f); - /* Embedded single quotes like 'abc''def' don't occur. - See fpc-1.0.4/compiler/cresstr.pas. */ - if (c == EOF || c == '\n' || c == '\'') - break; - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - } - if (c == EOF) + else if (c == '\n') + { + line_number++; break; - else if (c == '\n') - { - line_number++; + } + else if (c == '\'') + { + for (;;) + { + c = getc (f); + /* Embedded single quotes like 'abc''def' don't occur. + See fpc-1.0.4/compiler/cresstr.pas. */ + if (c == EOF || c == '\n' || c == '\'') + break; + sb_xappend1 (&buffer, c); + } + if (c == EOF) break; - } - } - else if (c == '#') - { - int n; - c = getc (f); - if (c == EOF && ferror (f)) - goto bomb; - if (c == EOF || !c_isdigit (c)) - if_error (IF_SEVERITY_FATAL_ERROR, - logical_filename, line_number, (size_t)(-1), false, - _("missing number after #")); - n = (c - '0'); - for (;;) - { - c = getc (f); - if (c == EOF || !c_isdigit (c)) + else if (c == '\n') + { + line_number++; break; - n = n * 10 + (c - '0'); - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = (unsigned char) n; - if (c == EOF) - break; - ungetc (c, f); - } - else if (c == '+') - { - c = getc (f); - if (c == EOF) - break; - if (c == '\n') - line_number++; - else + } + } + else if (c == '#') + { + int n; + c = getc (f); + if (c == EOF && ferror (f)) + { + sb_free (&buffer); + goto bomb; + } + if (c == EOF || !c_isdigit (c)) + if_error (IF_SEVERITY_FATAL_ERROR, + logical_filename, line_number, (size_t)(-1), false, + _("missing number after #")); + n = (c - '0'); + for (;;) + { + c = getc (f); + if (c == EOF || !c_isdigit (c)) + break; + n = n * 10 + (c - '0'); + } + sb_xappend1 (&buffer, (unsigned char) n); + if (c == EOF) + break; ungetc (c, f); - } - else - if_error (IF_SEVERITY_FATAL_ERROR, - logical_filename, line_number, (size_t)(-1), false, - _("invalid string expression")); - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - msgid = xstrdup (buffer); + } + else if (c == '+') + { + c = getc (f); + if (c == EOF) + break; + if (c == '\n') + line_number++; + else + ungetc (c, f); + } + else + if_error (IF_SEVERITY_FATAL_ERROR, + logical_filename, line_number, (size_t)(-1), false, + _("invalid string expression")); + } + msgid = sb_xdupfree_c (&buffer); + } pos.file_name = location; pos.line_number = (size_t)(-1); @@ -349,34 +337,27 @@ enum parse_result pr_syntax /* syntax error inside the token */ }; -static char *buffer; -static int bufmax; +static struct string_buffer buffer; /* Parses an integer. Returns it in buffer, of length bufmax. Returns pr_parsed or pr_none. */ static enum parse_result parse_integer () + _GL_ATTRIBUTE_ACQUIRE_CAPABILITY (buffer.data) { int c; - int bufpos; + sb_init (&buffer); c = phase2_getc (); - bufpos = 0; for (;;) { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } if (!(c >= '0' && c <= '9')) break; - buffer[bufpos++] = c; + sb_xappend1 (&buffer, c); c = phase1_getc (); } phase1_ungetc (c); - buffer[bufpos] = '\0'; - return (bufpos == 0 ? pr_none : pr_parsed); + return (string_desc_length (sb_contents (&buffer)) == 0 ? pr_none : pr_parsed); } static struct mixed_string_buffer stringbuf; @@ -507,9 +488,16 @@ extract_rsj (FILE *f, { /* Parse an integer. */ if (parse_integer () != pr_parsed) - goto invalid_rsj; - if (strcmp (buffer, "1") != 0) - goto invalid_rsj_version; + { + sb_free (&buffer); + goto invalid_rsj; + } + if (strcmp (sb_xcontents_c (&buffer), "1") != 0) + { + sb_free (&buffer); + goto invalid_rsj_version; + } + sb_free (&buffer); } else if (strcmp (s1, "strings") == 0) { @@ -555,7 +543,11 @@ extract_rsj (FILE *f, { /* Parse an integer. */ if (parse_integer () != pr_parsed) - goto invalid_rsj; + { + sb_free (&buffer); + goto invalid_rsj; + } + sb_free (&buffer); } else if (strcmp (s2, "name") == 0) { @@ -584,7 +576,11 @@ extract_rsj (FILE *f, { /* Parse an integer. */ if (parse_integer () != pr_parsed) - goto invalid_rsj; + { + sb_free (&buffer); + goto invalid_rsj; + } + sb_free (&buffer); /* Parse a comma. */ c = phase2_getc (); diff --git a/gettext-tools/src/x-smalltalk.c b/gettext-tools/src/x-smalltalk.c index 3733d7755..eabd70d66 100644 --- a/gettext-tools/src/x-smalltalk.c +++ b/gettext-tools/src/x-smalltalk.c @@ -34,6 +34,7 @@ #include "xg-pos.h" #include "xg-message.h" #include "xalloc.h" +#include "string-buffer.h" #include "gettext.h" #define _(s) gettext(s) @@ -194,9 +195,6 @@ static int phase2_pushback_length; static void phase2_get (token_ty *tp) { - static char *buffer; - static int bufmax; - int bufpos; int c; if (phase2_pushback_length) @@ -263,37 +261,29 @@ phase2_get (token_ty *tp) { case '\'': /* String literal. */ - bufpos = 0; - for (;;) - { - c = phase1_getc (); - if (c == EOF) - break; - if (c == '\'') - { - c = phase1_getc (); - if (c != '\'') - { - phase1_ungetc (c); - break; - } - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = 0; - tp->type = token_type_string_literal; - tp->string = xstrdup (buffer); - tp->comment = add_reference (savable_comment); + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + c = phase1_getc (); + if (c == EOF) + break; + if (c == '\'') + { + c = phase1_getc (); + if (c != '\'') + { + phase1_ungetc (c); + break; + } + } + sb_xappend1 (&buffer, c); + } + tp->type = token_type_string_literal; + tp->string = sb_xdupfree_c (&buffer); + tp->comment = add_reference (savable_comment); + } return; case '+': @@ -360,71 +350,60 @@ phase2_get (token_ty *tp) case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': /* Recognize id or id":"[id":"]* or id":"[id":"]*id. */ - bufpos = 0; - for (;;) - { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = phase1_getc (); - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - continue; - case ':': - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = phase1_getc (); - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - continue; - default: - phase1_ungetc (c); - break; - } - break; - default: - phase1_ungetc (c); - break; - } - break; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - tp->string = xstrdup (buffer); - tp->type = token_type_symbol; + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + sb_xappend1 (&buffer, c); + c = phase1_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + continue; + case ':': + sb_xappend1 (&buffer, c); + c = phase1_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + continue; + default: + phase1_ungetc (c); + break; + } + break; + default: + phase1_ungetc (c); + break; + } + break; + } + tp->string = sb_xdupfree_c (&buffer); + tp->type = token_type_symbol; + } return; case '#': diff --git a/gettext-tools/src/x-vala.c b/gettext-tools/src/x-vala.c index bfe2a2bb0..5ff487f67 100644 --- a/gettext-tools/src/x-vala.c +++ b/gettext-tools/src/x-vala.c @@ -45,6 +45,7 @@ #include "if-error.h" #include "xalloc.h" #include "xvasprintf.h" +#include "string-buffer.h" #include "mem-hash-map.h" #include "po-charset.h" #include "gettext.h" @@ -706,22 +707,8 @@ phase3_scan_regex () static void phase3_get (token_ty *tp) { - static char *buffer; - static int bufmax; - int bufpos; - #undef APPEND -#define APPEND(c) \ - do \ - { \ - if (bufpos >= bufmax) \ - { \ - bufmax = 2 * bufmax + 10; \ - buffer = xrealloc (buffer, bufmax); \ - } \ - buffer[bufpos++] = c; \ - } \ - while (0) +#define APPEND(c) sb_xappend1 (&buffer, (c)) if (phase3_pushback_length) { @@ -773,42 +760,48 @@ phase3_get (token_ty *tp) case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - bufpos = 0; - for (;;) - { - APPEND (c); - c = phase2_getc (); - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '_': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - continue; - - default: - phase2_ungetc (c); - break; - } - break; - } - APPEND (0); - if (strcmp (buffer, "return") == 0) - tp->type = last_token_type = token_type_return; - else - { - tp->string = xstrdup (buffer); - tp->type = last_token_type = token_type_symbol; + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + APPEND (c); + c = phase2_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + continue; + + default: + phase2_ungetc (c); + break; + } + break; } + const char *contents = sb_xcontents_c (&buffer); + if (strcmp (contents, "return") == 0) + { + sb_free (&buffer); + tp->type = last_token_type = token_type_return; + } + else + { + tp->string = sb_xdupfree_c (&buffer); + tp->type = last_token_type = token_type_symbol; + } + } return; case '.': @@ -833,47 +826,50 @@ phase3_get (token_ty *tp) /* The preprocessing number token is more "generous" than the C number tokens. This is mostly due to token pasting (another thing we can ignore here). */ - bufpos = 0; - for (;;) - { - APPEND (c); - c = phase2_getc (); - switch (c) - { - case 'e': - case 'E': - APPEND (c); - c = phase2_getc (); - if (c != '+' && c != '-') - { - phase2_ungetc (c); - break; - } - continue; - - case 'A': case 'B': case 'C': case 'D': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - case '.': - continue; - - default: - phase2_ungetc (c); - break; - } - break; - } - APPEND (0); - tp->type = last_token_type = token_type_number; + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + APPEND (c); + c = phase2_getc (); + switch (c) + { + case 'e': + case 'E': + APPEND (c); + c = phase2_getc (); + if (c != '+' && c != '-') + { + phase2_ungetc (c); + break; + } + continue; + + case 'A': case 'B': case 'C': case 'D': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + case '.': + continue; + + default: + phase2_ungetc (c); + break; + } + break; + } + sb_free (&buffer); + tp->type = last_token_type = token_type_number; + } return; case '\'': diff --git a/gettext-tools/src/x-ycp.c b/gettext-tools/src/x-ycp.c index 7fb560e38..37edd49a2 100644 --- a/gettext-tools/src/x-ycp.c +++ b/gettext-tools/src/x-ycp.c @@ -39,6 +39,7 @@ #include "xg-message.h" #include "if-error.h" #include "xalloc.h" +#include "string-buffer.h" #include "gettext.h" #define _(s) gettext(s) @@ -135,9 +136,6 @@ static int phase2_pushback_length; static int phase2_getc () { - static char *buffer; - static size_t bufmax; - size_t buflen; int lineno; int c; bool last_was_star; @@ -155,7 +153,8 @@ phase2_getc () if (c == '#') { /* sh comment. */ - buflen = 0; + struct string_buffer buffer; + sb_init (&buffer); lineno = line_number; for (;;) { @@ -163,23 +162,11 @@ phase2_getc () if (c == '\n' || c == EOF) break; /* We skip all leading white space, but not EOLs. */ - if (!(buflen == 0 && (c == ' ' || c == '\t'))) - { - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen++] = c; - } - } - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); + if (!(string_desc_length (sb_contents (&buffer)) == 0 + && (c == ' ' || c == '\t'))) + sb_xappend1 (&buffer, c); } - buffer[buflen] = '\0'; - savable_comment_add (buffer); + savable_comment_add (sb_xdupfree_c (&buffer)); last_comment_line = lineno; return '\n'; } @@ -199,94 +186,85 @@ phase2_getc () case '*': /* C comment. */ - buflen = 0; - lineno = line_number; - last_was_star = false; - for (;;) - { - c = phase1_getc (); - if (c == EOF) - break; - /* We skip all leading white space, but not EOLs. */ - if (buflen == 0 && (c == ' ' || c == '\t')) - continue; - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen++] = c; - switch (c) - { - case '\n': - --buflen; - while (buflen >= 1 - && (buffer[buflen - 1] == ' ' - || buffer[buflen - 1] == '\t')) - --buflen; - buffer[buflen] = '\0'; - savable_comment_add (buffer); - buflen = 0; - lineno = line_number; - last_was_star = false; - continue; - - case '*': - last_was_star = true; - continue; - - case '/': - if (last_was_star) - { - buflen -= 2; - while (buflen >= 1 - && (buffer[buflen - 1] == ' ' - || buffer[buflen - 1] == '\t')) - --buflen; - buffer[buflen] = '\0'; - savable_comment_add (buffer); - break; - } - FALLTHROUGH; - - default: - last_was_star = false; + { + struct string_buffer buffer; + sb_init (&buffer); + lineno = line_number; + last_was_star = false; + for (;;) + { + c = phase1_getc (); + if (c == EOF) + { + sb_free (&buffer); + break; + } + /* We skip all leading white space, but not EOLs. */ + if (string_desc_length (sb_contents (&buffer)) == 0 + && (c == ' ' || c == '\t')) continue; - } - break; - } - last_comment_line = lineno; - return ' '; + sb_xappend1 (&buffer, c); + switch (c) + { + case '\n': + --buffer.length; + while (buffer.length >= 1 + && (buffer.data[buffer.length - 1] == ' ' + || buffer.data[buffer.length - 1] == '\t')) + --buffer.length; + savable_comment_add (sb_xdupfree_c (&buffer)); + sb_init (&buffer); + lineno = line_number; + last_was_star = false; + continue; + + case '*': + last_was_star = true; + continue; + + case '/': + if (last_was_star) + { + buffer.length -= 2; + while (buffer.length >= 1 + && (buffer.data[buffer.length - 1] == ' ' + || buffer.data[buffer.length - 1] == '\t')) + --buffer.length; + savable_comment_add (sb_xdupfree_c (&buffer)); + break; + } + FALLTHROUGH; + + default: + last_was_star = false; + continue; + } + break; + } + last_comment_line = lineno; + return ' '; + } case '/': /* C++ comment. */ - buflen = 0; - lineno = line_number; - for (;;) - { - c = phase1_getc (); - if (c == '\n' || c == EOF) - break; - /* We skip all leading white space, but not EOLs. */ - if (!(buflen == 0 && (c == ' ' || c == '\t'))) - { - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen++] = c; - } - } - if (buflen >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[buflen] = '\0'; - savable_comment_add (buffer); - last_comment_line = lineno; - return '\n'; + { + struct string_buffer buffer; + sb_init (&buffer); + lineno = line_number; + for (;;) + { + c = phase1_getc (); + if (c == '\n' || c == EOF) + break; + /* We skip all leading white space, but not EOLs. */ + if (!(string_desc_length (sb_contents (&buffer)) == 0 + && (c == ' ' || c == '\t'))) + sb_xappend1 (&buffer, c); + } + savable_comment_add (sb_xdupfree_c (&buffer)); + last_comment_line = lineno; + return '\n'; + } } } else @@ -423,9 +401,6 @@ static int phase5_pushback_length; static void phase5_get (token_ty *tp) { - static char *buffer; - static int bufmax; - int bufpos; int c; if (phase5_pushback_length) @@ -473,76 +448,66 @@ phase5_get (token_ty *tp) case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': /* Symbol, or part of a number. */ - bufpos = 0; - for (;;) - { - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - c = phase2_getc (); - switch (c) - { - case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': - case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': - case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': - case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': - case 'Y': case 'Z': - case '_': - case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': - case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': - case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': - case 's': case 't': case 'u': case 'v': case 'w': case 'x': - case 'y': case 'z': - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - continue; - default: - if (bufpos == 1 && buffer[0] == '_' && c == '(') + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + sb_xappend1 (&buffer, c); + c = phase2_getc (); + switch (c) + { + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': + case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': + case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': + case 'Y': case 'Z': + case '_': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': + case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': + case 's': case 't': case 'u': case 'v': case 'w': case 'x': + case 'y': case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + continue; + default: { - tp->type = token_type_i18n; - return; + string_desc_t contents = sb_contents (&buffer); + if (string_desc_length (contents) == 1 + && string_desc_char_at (contents, 0) == '_' + && c == '(') + { + sb_free (&buffer); + tp->type = token_type_i18n; + return; + } } - phase2_ungetc (c); - break; - } - break; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - tp->string = xstrdup (buffer); - tp->type = token_type_symbol; + phase2_ungetc (c); + break; + } + break; + } + tp->string = sb_xdupfree_c (&buffer); + tp->type = token_type_symbol; + } return; case '"': - bufpos = 0; - for (;;) - { - c = get_string_element (); - if (c == EOF || c == SE_QUOTES) - break; - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos++] = c; - } - if (bufpos >= bufmax) - { - bufmax = 2 * bufmax + 10; - buffer = xrealloc (buffer, bufmax); - } - buffer[bufpos] = '\0'; - tp->string = xstrdup (buffer); - tp->type = token_type_string_literal; - tp->comment = add_reference (savable_comment); + { + struct string_buffer buffer; + sb_init (&buffer); + for (;;) + { + c = get_string_element (); + if (c == EOF || c == SE_QUOTES) + break; + sb_xappend1 (&buffer, c); + } + tp->string = sb_xdupfree_c (&buffer); + tp->type = token_type_string_literal; + tp->comment = add_reference (savable_comment); + } return; case '(':