From: Bruno Haible <bruno@clisp.org>
Date: Wed, 11 Sep 2024 12:09:29 +0000 (+0200)
Subject: xgettext: Smalltalk: Recognize string concatenation.
X-Git-Tag: v0.23~137
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=29f693f18c570fffe23f8727e0c44479c07f64c6;p=thirdparty%2Fgettext.git

xgettext: Smalltalk: Recognize string concatenation.

* gettext-tools/src/x-smalltalk.c (struct token_ty): Add field 'comment'.
(free_token): New function.
(phase2_get): Fill in tp->comment.
(phase3_pushback): Increase size.
(phase3_get): Fill in tp->comment. Do reference counting.
(string_concat_free1): New function.
(phase4_pushback, phase4_pushback_length): New variables.
(phase4_get, phase4_unget): New functions.
(extract_smalltalk): Initialize phase4_pushback_length. Call phase4_get instead
of phase3_get, phase4_unget instead of phase3_unget. Use the comment stored in
the token. Call free_token.
* gettext-tools/tests/xgettext-smalltalk-1: Add a test case of string
concatenation.
* NEWS: Mention the improvement.
---

diff --git a/NEWS b/NEWS
index ec2b1e62e..14c5fcb01 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,4 @@
-Version 0.23 - August 2024
+Version 0.23 - September 2024
 
 * Programming languages support:
   - Python:
@@ -16,6 +16,7 @@ Version 0.23 - August 2024
     o xgettext now recognizes comments of the form '#; <expression>'.
   - Java: Improved recognition of format strings when the String.formatted
     method is used.
+  - Smalltalk: The string concatenation operator ',' is now recognized.
   - Vala: Improved recognition of format strings when the string.printf method
     is used.
   - Glade: xgettext has improved support for GtkBuilder 4.
diff --git a/gettext-tools/src/x-smalltalk.c b/gettext-tools/src/x-smalltalk.c
index 055e59a89..3733d7755 100644
--- a/gettext-tools/src/x-smalltalk.c
+++ b/gettext-tools/src/x-smalltalk.c
@@ -171,9 +171,20 @@ struct token_ty
 {
   token_type_ty type;
   char *string;         /* for token_type_string_literal, token_type_symbol */
+  refcounted_string_list_ty *comment;  /* for token_type_string_literal */
   int line_number;
 };
 
+/* Free the memory pointed to by a 'struct token_ty'.  */
+static inline void
+free_token (token_ty *tp)
+{
+  if (tp->type == token_type_string_literal || tp->type == token_type_symbol)
+    free (tp->string);
+  if (tp->type == token_type_string_literal)
+    drop_reference (tp->comment);
+}
+
 
 /* 2. Combine characters into tokens.  Discard comments and whitespace.  */
 
@@ -282,6 +293,7 @@ phase2_get (token_ty *tp)
           buffer[bufpos] = 0;
           tp->type = token_type_string_literal;
           tp->string = xstrdup (buffer);
+          tp->comment = add_reference (savable_comment);
           return;
 
         case '+':
@@ -447,7 +459,7 @@ phase2_unget (token_ty *tp)
 
 /* 3. Combine "# string_literal" and "# symbol" to a single token.  */
 
-static token_ty phase3_pushback[1];
+static token_ty phase3_pushback[2];
 static int phase3_pushback_length;
 
 static void
@@ -468,15 +480,18 @@ phase3_get (token_ty *tp)
       if (token2.type == token_type_symbol
           || token2.type == token_type_string_literal)
         {
+          if (token2.type == token_type_string_literal)
+            drop_reference (token2.comment);
           tp->type = token_type_string_literal;
           tp->string = token2.string;
+          tp->comment = add_reference (savable_comment);
         }
       else
         phase2_unget (&token2);
     }
 }
 
-/* Supports only one pushback token.  */
+/* Supports 2 pushback tokens.  */
 static void
 phase3_unget (token_ty *tp)
 {
@@ -489,6 +504,81 @@ phase3_unget (token_ty *tp)
 }
 
 
+/* 4. String literal concatenation:
+   Combine "string1" , "string2" to "string1string2".  */
+
+/* Concatenates two strings, and frees the first argument.  */
+static char *
+string_concat_free1 (char *s1, const char *s2)
+{
+  size_t len1 = strlen (s1);
+  size_t len2 = strlen (s2);
+  size_t len = len1 + len2 + 1;
+  char *result = XNMALLOC (len, char);
+  memcpy (result, s1, len1);
+  memcpy (result + len1, s2, len2 + 1);
+  free (s1);
+  return result;
+}
+
+static token_ty phase4_pushback[1];
+static int phase4_pushback_length;
+
+static void
+phase4_get (token_ty *tp)
+{
+  if (phase4_pushback_length)
+    {
+      *tp = phase4_pushback[--phase4_pushback_length];
+      return;
+    }
+
+  phase3_get (tp);
+  if (tp->type == token_type_string_literal)
+    {
+      char *sum = tp->string;
+
+      for (;;)
+        {
+          token_ty token2;
+
+          phase3_get (&token2);
+          if (token2.type == token_type_symbol
+              && strcmp (token2.string, ",") == 0)
+            {
+              token_ty token3;
+
+              phase3_get (&token3);
+              if (token3.type == token_type_string_literal)
+                {
+                  sum = string_concat_free1 (sum, token3.string);
+
+                  free_token (&token3);
+                  free_token (&token2);
+                  continue;
+                }
+              phase3_unget (&token3);
+            }
+          phase3_unget (&token2);
+          break;
+        }
+      tp->string = sum;
+    }
+}
+
+/* Supports only one pushback token.  */
+static void
+phase4_unget (token_ty *tp)
+{
+  if (tp->type != token_type_eof)
+    {
+      if (phase4_pushback_length == SIZEOF (phase4_pushback))
+        abort ();
+      phase4_pushback[phase4_pushback_length++] = *tp;
+    }
+}
+
+
 /* ========================= Extracting strings.  ========================== */
 
 /* The file is broken into tokens.  Scan the token stream, looking for the
@@ -520,6 +610,7 @@ extract_smalltalk (FILE *f,
 
   phase2_pushback_length = 0;
   phase3_pushback_length = 0;
+  phase4_pushback_length = 0;
 
   /* Eat tokens until eof is seen.  */
   {
@@ -541,7 +632,7 @@ extract_smalltalk (FILE *f,
       {
         token_ty token;
 
-        phase3_get (&token);
+        phase4_get (&token);
 
         switch (token.type)
           {
@@ -551,7 +642,7 @@ extract_smalltalk (FILE *f,
                      strcmp (token.string, "at:") == 0 && state == 1 ? 3 :
                      strcmp (token.string, "plural:") == 0 && state == 4 ? 5 :
                      0);
-            free (token.string);
+            free_token (&token);
             break;
 
           case token_type_string_literal:
@@ -562,7 +653,7 @@ extract_smalltalk (FILE *f,
                 pos.line_number = token.line_number;
                 remember_a_message (mlp, NULL, token.string, false, false,
                                     null_context_region (), &pos,
-                                    NULL, savable_comment, false);
+                                    NULL, token.comment, false);
                 state = 0;
                 break;
               }
@@ -574,16 +665,16 @@ extract_smalltalk (FILE *f,
                 pos.file_name = logical_file_name;
                 pos.line_number = token.line_number;
 
-                phase3_get (&token2);
+                phase4_get (&token2);
 
                 plural_mp =
                   remember_a_message (mlp, NULL, token.string, false,
                                       token2.type == token_type_symbol
                                       && strcmp (token.string, "plural:") == 0,
                                       null_context_region (), &pos,
-                                      NULL, savable_comment, false);
+                                      NULL, token.comment, false);
 
-                phase3_unget (&token2);
+                phase4_unget (&token2);
 
                 state = 4;
                 break;
@@ -596,12 +687,12 @@ extract_smalltalk (FILE *f,
                 if (plural_mp != NULL)
                   remember_a_message_plural (plural_mp, token.string, false,
                                              null_context_region (), &pos,
-                                             savable_comment, false);
+                                             token.comment, false);
                 state = 0;
                 break;
               }
             state = 0;
-            free (token.string);
+            free_token (&token);
             break;
 
           case token_type_uniq:
diff --git a/gettext-tools/tests/xgettext-smalltalk-1 b/gettext-tools/tests/xgettext-smalltalk-1
index 46dea6816..78e3abd2d 100755
--- a/gettext-tools/tests/xgettext-smalltalk-1
+++ b/gettext-tools/tests/xgettext-smalltalk-1
@@ -1,7 +1,7 @@
 #!/bin/sh
 . "${srcdir=.}/init.sh"; path_prepend_ . ../src
 
-# Test Smalltalk support: --add-comments option.
+# Test Smalltalk support: --add-comments option, string concatenation.
 
 cat <<\EOF > xg-st-1.st
 "This comment will not be extracted."
@@ -14,6 +14,7 @@ Transcript showCr: (NLS ? 'Hey Jude').
      Nickname of the Beatles
 "
 Transcript showCr: (NLS ? 'The Fabulous Four').
+Transcript showCr: (NLS ? 'The Fabulous Four',' are ','The Beatles').
 EOF
 
 : ${XGETTEXT=xgettext}
@@ -37,6 +38,9 @@ msgstr ""
 #.
 msgid "The Fabulous Four"
 msgstr ""
+
+msgid "The Fabulous Four are The Beatles"
+msgstr ""
 EOF
 
 : ${DIFF=diff}