From: Bruno Haible Date: Wed, 11 Sep 2024 12:09:29 +0000 (+0200) Subject: xgettext: Smalltalk: Recognize string concatenation. X-Git-Tag: v0.23~137 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=29f693f18c570fffe23f8727e0c44479c07f64c6;p=thirdparty%2Fgettext.git xgettext: Smalltalk: Recognize string concatenation. * gettext-tools/src/x-smalltalk.c (struct token_ty): Add field 'comment'. (free_token): New function. (phase2_get): Fill in tp->comment. (phase3_pushback): Increase size. (phase3_get): Fill in tp->comment. Do reference counting. (string_concat_free1): New function. (phase4_pushback, phase4_pushback_length): New variables. (phase4_get, phase4_unget): New functions. (extract_smalltalk): Initialize phase4_pushback_length. Call phase4_get instead of phase3_get, phase4_unget instead of phase3_unget. Use the comment stored in the token. Call free_token. * gettext-tools/tests/xgettext-smalltalk-1: Add a test case of string concatenation. * NEWS: Mention the improvement. --- diff --git a/NEWS b/NEWS index ec2b1e62e..14c5fcb01 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,4 @@ -Version 0.23 - August 2024 +Version 0.23 - September 2024 * Programming languages support: - Python: @@ -16,6 +16,7 @@ Version 0.23 - August 2024 o xgettext now recognizes comments of the form '#; '. - Java: Improved recognition of format strings when the String.formatted method is used. + - Smalltalk: The string concatenation operator ',' is now recognized. - Vala: Improved recognition of format strings when the string.printf method is used. - Glade: xgettext has improved support for GtkBuilder 4. diff --git a/gettext-tools/src/x-smalltalk.c b/gettext-tools/src/x-smalltalk.c index 055e59a89..3733d7755 100644 --- a/gettext-tools/src/x-smalltalk.c +++ b/gettext-tools/src/x-smalltalk.c @@ -171,9 +171,20 @@ struct token_ty { token_type_ty type; char *string; /* for token_type_string_literal, token_type_symbol */ + refcounted_string_list_ty *comment; /* for token_type_string_literal */ int line_number; }; +/* Free the memory pointed to by a 'struct token_ty'. */ +static inline void +free_token (token_ty *tp) +{ + if (tp->type == token_type_string_literal || tp->type == token_type_symbol) + free (tp->string); + if (tp->type == token_type_string_literal) + drop_reference (tp->comment); +} + /* 2. Combine characters into tokens. Discard comments and whitespace. */ @@ -282,6 +293,7 @@ phase2_get (token_ty *tp) buffer[bufpos] = 0; tp->type = token_type_string_literal; tp->string = xstrdup (buffer); + tp->comment = add_reference (savable_comment); return; case '+': @@ -447,7 +459,7 @@ phase2_unget (token_ty *tp) /* 3. Combine "# string_literal" and "# symbol" to a single token. */ -static token_ty phase3_pushback[1]; +static token_ty phase3_pushback[2]; static int phase3_pushback_length; static void @@ -468,15 +480,18 @@ phase3_get (token_ty *tp) if (token2.type == token_type_symbol || token2.type == token_type_string_literal) { + if (token2.type == token_type_string_literal) + drop_reference (token2.comment); tp->type = token_type_string_literal; tp->string = token2.string; + tp->comment = add_reference (savable_comment); } else phase2_unget (&token2); } } -/* Supports only one pushback token. */ +/* Supports 2 pushback tokens. */ static void phase3_unget (token_ty *tp) { @@ -489,6 +504,81 @@ phase3_unget (token_ty *tp) } +/* 4. String literal concatenation: + Combine "string1" , "string2" to "string1string2". */ + +/* Concatenates two strings, and frees the first argument. */ +static char * +string_concat_free1 (char *s1, const char *s2) +{ + size_t len1 = strlen (s1); + size_t len2 = strlen (s2); + size_t len = len1 + len2 + 1; + char *result = XNMALLOC (len, char); + memcpy (result, s1, len1); + memcpy (result + len1, s2, len2 + 1); + free (s1); + return result; +} + +static token_ty phase4_pushback[1]; +static int phase4_pushback_length; + +static void +phase4_get (token_ty *tp) +{ + if (phase4_pushback_length) + { + *tp = phase4_pushback[--phase4_pushback_length]; + return; + } + + phase3_get (tp); + if (tp->type == token_type_string_literal) + { + char *sum = tp->string; + + for (;;) + { + token_ty token2; + + phase3_get (&token2); + if (token2.type == token_type_symbol + && strcmp (token2.string, ",") == 0) + { + token_ty token3; + + phase3_get (&token3); + if (token3.type == token_type_string_literal) + { + sum = string_concat_free1 (sum, token3.string); + + free_token (&token3); + free_token (&token2); + continue; + } + phase3_unget (&token3); + } + phase3_unget (&token2); + break; + } + tp->string = sum; + } +} + +/* Supports only one pushback token. */ +static void +phase4_unget (token_ty *tp) +{ + if (tp->type != token_type_eof) + { + if (phase4_pushback_length == SIZEOF (phase4_pushback)) + abort (); + phase4_pushback[phase4_pushback_length++] = *tp; + } +} + + /* ========================= Extracting strings. ========================== */ /* The file is broken into tokens. Scan the token stream, looking for the @@ -520,6 +610,7 @@ extract_smalltalk (FILE *f, phase2_pushback_length = 0; phase3_pushback_length = 0; + phase4_pushback_length = 0; /* Eat tokens until eof is seen. */ { @@ -541,7 +632,7 @@ extract_smalltalk (FILE *f, { token_ty token; - phase3_get (&token); + phase4_get (&token); switch (token.type) { @@ -551,7 +642,7 @@ extract_smalltalk (FILE *f, strcmp (token.string, "at:") == 0 && state == 1 ? 3 : strcmp (token.string, "plural:") == 0 && state == 4 ? 5 : 0); - free (token.string); + free_token (&token); break; case token_type_string_literal: @@ -562,7 +653,7 @@ extract_smalltalk (FILE *f, pos.line_number = token.line_number; remember_a_message (mlp, NULL, token.string, false, false, null_context_region (), &pos, - NULL, savable_comment, false); + NULL, token.comment, false); state = 0; break; } @@ -574,16 +665,16 @@ extract_smalltalk (FILE *f, pos.file_name = logical_file_name; pos.line_number = token.line_number; - phase3_get (&token2); + phase4_get (&token2); plural_mp = remember_a_message (mlp, NULL, token.string, false, token2.type == token_type_symbol && strcmp (token.string, "plural:") == 0, null_context_region (), &pos, - NULL, savable_comment, false); + NULL, token.comment, false); - phase3_unget (&token2); + phase4_unget (&token2); state = 4; break; @@ -596,12 +687,12 @@ extract_smalltalk (FILE *f, if (plural_mp != NULL) remember_a_message_plural (plural_mp, token.string, false, null_context_region (), &pos, - savable_comment, false); + token.comment, false); state = 0; break; } state = 0; - free (token.string); + free_token (&token); break; case token_type_uniq: diff --git a/gettext-tools/tests/xgettext-smalltalk-1 b/gettext-tools/tests/xgettext-smalltalk-1 index 46dea6816..78e3abd2d 100755 --- a/gettext-tools/tests/xgettext-smalltalk-1 +++ b/gettext-tools/tests/xgettext-smalltalk-1 @@ -1,7 +1,7 @@ #!/bin/sh . "${srcdir=.}/init.sh"; path_prepend_ . ../src -# Test Smalltalk support: --add-comments option. +# Test Smalltalk support: --add-comments option, string concatenation. cat <<\EOF > xg-st-1.st "This comment will not be extracted." @@ -14,6 +14,7 @@ Transcript showCr: (NLS ? 'Hey Jude'). Nickname of the Beatles " Transcript showCr: (NLS ? 'The Fabulous Four'). +Transcript showCr: (NLS ? 'The Fabulous Four',' are ','The Beatles'). EOF : ${XGETTEXT=xgettext} @@ -37,6 +38,9 @@ msgstr "" #. msgid "The Fabulous Four" msgstr "" + +msgid "The Fabulous Four are The Beatles" +msgstr "" EOF : ${DIFF=diff}