From: Bruno Haible Date: Wed, 11 Sep 2024 12:09:34 +0000 (+0200) Subject: xgettext: awk: Recognize string concatenation. X-Git-Tag: v0.23~136 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d0f044719d21af08295ef9d139e5ce29dee83aab;p=thirdparty%2Fgettext.git xgettext: awk: Recognize string concatenation. * gettext-tools/src/x-awk.c (SIZEOF): New macro. (phase3_pushback, phase3_pushback_length): New variables. (phase3_get): Renamed from x_awk_lex. Return pushed-back token if present. (phase3_unget): New function. (string_concat_free1): New function. (phase4_get): New function. (extract_parenthesized): Invoke phase4_get instead of x_awk_lex. (extract_awk): Initialize phase3_pushback_length. * gettext-tools/tests/xgettext-awk-1: Add test cases for string concatenation. * NEWS: Mention the improvement. --- diff --git a/NEWS b/NEWS index 14c5fcb01..0f78e3644 100644 --- a/NEWS +++ b/NEWS @@ -16,6 +16,7 @@ Version 0.23 - September 2024 o xgettext now recognizes comments of the form '#; '. - Java: Improved recognition of format strings when the String.formatted method is used. + - awk: String concatenation by juxtaposition is now recognized. - Smalltalk: The string concatenation operator ',' is now recognized. - Vala: Improved recognition of format strings when the string.printf method is used. diff --git a/gettext-tools/src/x-awk.c b/gettext-tools/src/x-awk.c index 71edaf6d0..dd617ee75 100644 --- a/gettext-tools/src/x-awk.c +++ b/gettext-tools/src/x-awk.c @@ -45,6 +45,8 @@ #define _(s) gettext(s) +#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) + /* The awk syntax is defined in the gawk manual page and documentation. See also gawk/awkgram.y. */ @@ -374,14 +376,23 @@ free_token (token_ty *tp) in between. */ static bool prefer_division_over_regexp; +static token_ty phase3_pushback[1]; +static int phase3_pushback_length; + static void -x_awk_lex (token_ty *tp) +phase3_get (token_ty *tp) { static char *buffer; static int bufmax; int bufpos; int c; + if (phase3_pushback_length) + { + *tp = phase3_pushback[--phase3_pushback_length]; + return; + } + for (;;) { tp->line_number = line_number; @@ -657,6 +668,56 @@ x_awk_lex (token_ty *tp) } } +/* Supports only one pushback token. */ +static void +phase3_unget (token_ty *tp) +{ + if (tp->type != token_type_eof) + { + if (phase3_pushback_length == SIZEOF (phase3_pushback)) + abort (); + phase3_pushback[phase3_pushback_length++] = *tp; + } +} + + +/* 8. Concatenate adjacent string literals to form single string literals. */ + +/* Concatenates two strings, and frees the first argument. */ +static char * +string_concat_free1 (char *s1, const char *s2) +{ + size_t len1 = strlen (s1); + size_t len2 = strlen (s2); + size_t len = len1 + len2 + 1; + char *result = XNMALLOC (len, char); + memcpy (result, s1, len1); + memcpy (result + len1, s2, len2 + 1); + free (s1); + return result; +} + +static void +phase4_get (token_ty *tp) +{ + phase3_get (tp); + if (tp->type != token_type_string) + return; + for (;;) + { + token_ty tmp; + + phase3_get (&tmp); + if (tmp.type != token_type_string) + { + phase3_unget (&tmp); + return; + } + tp->string = string_concat_free1 (tp->string, tmp.string); + free_token (&tmp); + } +} + /* ========================= Extracting strings. ========================== */ @@ -720,7 +781,7 @@ extract_parenthesized (message_list_ty *mlp, { token_ty token; - x_awk_lex (&token); + phase4_get (&token); if (next_is_argument && token.type != token_type_lparen) { @@ -892,6 +953,7 @@ extract_awk (FILE *f, last_non_comment_line = -1; prefer_division_over_regexp = false; + phase3_pushback_length = 0; flag_context_list_table = flag_table; nesting_depth = 0; diff --git a/gettext-tools/tests/xgettext-awk-1 b/gettext-tools/tests/xgettext-awk-1 index 830395891..9b398a4bf 100755 --- a/gettext-tools/tests/xgettext-awk-1 +++ b/gettext-tools/tests/xgettext-awk-1 @@ -1,7 +1,7 @@ #!/bin/sh . "${srcdir=.}/init.sh"; path_prepend_ . ../src -# Test awk support: --add-comments option. +# Test awk support: --add-comments option, string concatenation. cat <<\EOF > xg-a-1.awk # This comment will not be extracted. @@ -13,6 +13,12 @@ print _"Hey Jude" # TRANSLATORS: # Nickname of the Beatles print _"The Fabulous Four" +# This string is not extracted. +print "not extracted" +# String concatenation in regular contexts. +print dcgettext("Olivia" " " "Newton-John") +# String concatenation *not* happening right after the _ marker. +print _"hello" " world" EOF : ${XGETTEXT=xgettext} @@ -35,6 +41,12 @@ msgstr "" #. Nickname of the Beatles msgid "The Fabulous Four" msgstr "" + +msgid "Olivia Newton-John" +msgstr "" + +msgid "hello" +msgstr "" EOF : ${DIFF=diff}