]> git.ipfire.org Git - thirdparty/gettext.git/commitdiff
xgettext: Scheme: Recognize datum-comments.
authorBruno Haible <bruno@clisp.org>
Fri, 16 Aug 2024 20:32:58 +0000 (22:32 +0200)
committerBruno Haible <bruno@clisp.org>
Fri, 16 Aug 2024 20:32:58 +0000 (22:32 +0200)
Reported by Florent Angly <florent.angly@gmail.com>
at <https://savannah.gnu.org/bugs/?61882>.

* gettext-tools/src/x-scheme.c (datum_comment_nesting_depth): New variable.
(read_object): Omit the normal processing of list elements if
datum_comment_nesting_depth is non-zero. Recognize datum comments.
(extract_whole_file): Initialize datum_comment_nesting_depth.
* gettext-tools/tests/xgettext-scheme-1: Add a test case with a datum comment.
* NEWS: Mention the change.

NEWS
gettext-tools/src/x-scheme.c
gettext-tools/tests/xgettext-scheme-1

diff --git a/NEWS b/NEWS
index b77299356dbfa99b6a351bb09af5a0ea44016f78..ec2b1e62ee4dc6e8b03e4ff5a8c073afd3ace110 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -5,13 +5,15 @@ Version 0.23 - August 2024
     o xgettext now assumes source code for Python 3 rather than Python 2.
       This affects the interpretation of escape sequences in string literals.
     o xgettext now recognizes the f-string syntax.
-  - Scheme: xgettext now supports the option '-L Guile' as an alternative to
-    '-L Scheme'.  They are nearly equivalent.  They differ in the interpretation
-    of escape sequences in string literals: While 'xgettext -L Scheme' assumes
-    the R6RS and R7RS syntax of string literals, 'xgettext -L Guile' assumes
-    the syntax of string literals understood by Guile 2.x and 3.0 (without
-    command-line option '--r6rs' or '--r7rs', and before a '#!r6rs' directive
-    is seen).
+  - Scheme:
+    o xgettext now supports the option '-L Guile' as an alternative to
+      '-L Scheme'.  They are nearly equivalent.  They differ in the
+      interpretation of escape sequences in string literals: While
+      'xgettext -L Scheme' assumes the R6RS and R7RS syntax of string literals,
+      'xgettext -L Guile' assumes the syntax of string literals understood by
+      Guile 2.x and 3.0 (without command-line option '--r6rs' or '--r7rs', and
+      before a '#!r6rs' directive is seen).
+    o xgettext now recognizes comments of the form '#; <expression>'.
   - Java: Improved recognition of format strings when the String.formatted
     method is used.
   - Vala: Improved recognition of format strings when the string.printf method
index 6e84df96bab744f6fa0084d1f42c366cd9864d4f..c32af1fa5918efb59634c56449b3ba3bd0878399 100644 (file)
      - R6RS: https://www.r6rs.org/
      - R7RS: https://standards.scheme.org/corrected-r7rs/r7rs.html
 
-   It is implemented in guile-3.0.10/libguile/read.c.
+   It is implemented in guile-3.0.10/module/ice-9/read.scm, with support of
+   #!r6rs.  What you see in guile-3.0.10/libguile/read.c is just the bootstrap
+   reader, without support of #!r6rs and similar directives.
+
    Since we are interested only in strings and in forms similar to
         (gettext msgid ...)
    or   (ngettext msgid msgid_plural ...)
    - The syntax code assigned to each character, and how tokens are built
      up from characters (single escape, multiple escape etc.).
 
-   - Comment syntax: ';' and '#! ... !#' and '#| ... |#' (may be nested).
+   - Comment syntax:
+       ';' up to end of line
+       '#;' <datum> (see R6RS § 4.2.3, R7RS § 2.2)
+       '#! ... !#'
+       '#| ... |#' (may be nested)
 
    - String syntax: "..." with single escapes.
 
@@ -687,6 +694,9 @@ static flag_context_list_table_ty *flag_context_list_table;
 /* Current nesting depth.  */
 static int nesting_depth;
 
+/* Current nesting depth of #;<datum> comments.  */
+static int datum_comment_nesting_depth;
+
 
 /* Read the next object.  */
 static void
@@ -785,49 +795,52 @@ read_object (struct object *op, flag_region_ty *outer_region)
                 if (inner.type == t_eof)
                   break;
 
-                if (arg == 0)
+                if (datum_comment_nesting_depth == 0)
                   {
-                    /* This is the function position.  */
-                    if (inner.type == t_symbol)
+                    if (arg == 0)
                       {
-                        char *symbol_name = string_of_object (&inner);
-                        void *keyword_value;
-
-                        if (hash_find_entry (&keywords,
-                                             symbol_name, strlen (symbol_name),
-                                             &keyword_value)
-                            == 0)
-                          shapes = (const struct callshapes *) keyword_value;
-
-                        argparser = arglist_parser_alloc (mlp, shapes);
-
-                        context_iter =
-                          flag_context_list_iterator (
-                            flag_context_list_table_lookup (
-                              flag_context_list_table,
-                              symbol_name, strlen (symbol_name)));
-
-                        free (symbol_name);
+                        /* This is the function position.  */
+                        if (inner.type == t_symbol)
+                          {
+                            char *symbol_name = string_of_object (&inner);
+                            void *keyword_value;
+
+                            if (hash_find_entry (&keywords,
+                                                 symbol_name, strlen (symbol_name),
+                                                 &keyword_value)
+                                == 0)
+                              shapes = (const struct callshapes *) keyword_value;
+
+                            argparser = arglist_parser_alloc (mlp, shapes);
+
+                            context_iter =
+                              flag_context_list_iterator (
+                                flag_context_list_table_lookup (
+                                  flag_context_list_table,
+                                  symbol_name, strlen (symbol_name)));
+
+                            free (symbol_name);
+                          }
+                        else
+                          context_iter = null_context_list_iterator;
                       }
                     else
-                      context_iter = null_context_list_iterator;
-                  }
-                else
-                  {
-                    /* These are the argument positions.  */
-                    if (argparser != NULL && inner.type == t_string)
                       {
-                        char *s = string_of_object (&inner);
-                        mixed_string_ty *ms =
-                          mixed_string_alloc_simple (s, lc_string,
+                        /* These are the argument positions.  */
+                        if (argparser != NULL && inner.type == t_string)
+                          {
+                            char *s = string_of_object (&inner);
+                            mixed_string_ty *ms =
+                              mixed_string_alloc_simple (s, lc_string,
+                                                         logical_file_name,
+                                                         inner.line_number_at_start);
+                            free (s);
+                            arglist_parser_remember (argparser, arg, ms,
+                                                     inner_region,
                                                      logical_file_name,
-                                                     inner.line_number_at_start);
-                        free (s);
-                        arglist_parser_remember (argparser, arg, ms,
-                                                 inner_region,
-                                                 logical_file_name,
-                                                 inner.line_number_at_start,
-                                                 savable_comment, false);
+                                                     inner.line_number_at_start,
+                                                     savable_comment, false);
+                          }
                       }
                   }
 
@@ -1047,6 +1060,25 @@ read_object (struct object *op, flag_region_ty *outer_region)
                     }
                 }
 
+              case ';':
+                /* Datum comment '#; <datum>'.
+                   See R6RS § 4.2.3, R7RS § 2.2.  */
+                {
+                  struct object inner;
+                  int saved_last_non_comment_line = last_non_comment_line;
+                  ++datum_comment_nesting_depth;
+                  ++nesting_depth;
+                  read_object (&inner, null_context_region ());
+                  nesting_depth--;
+                  datum_comment_nesting_depth--;
+                  last_non_comment_line = saved_last_non_comment_line;
+                  /* Dots and EOF are not allowed here.
+                     But be tolerant.  */
+                  free_object (&inner);
+                  last_comment_line = line_number;
+                  continue;
+                }
+
               case '!':
                 /* Block comment '#! ... !#'.  See
                    <https://www.gnu.org/software/guile/manual/html_node/Block-Comments.html>.  */
@@ -1437,6 +1469,7 @@ extract_whole_file (FILE *f,
 
   flag_context_list_table = flag_table;
   nesting_depth = 0;
+  datum_comment_nesting_depth = 0;
 
   init_keywords ();
 
index 797650d1281828e8bb8480636da1ca696bf66ed2..34f15c1aa341daf238db604eab1c560a6a9aa570 100755 (executable)
@@ -11,6 +11,9 @@ cat <<\EOF > xg-sc-1.scm
 #!  Not extracted either.
 !#
 (display (_ "The Fabulous Four"))
+#; (begin
+     (display (_ "The Onion")))
+(display (_ "The Fabulous Thunderbirds"))
 EOF
 
 : ${XGETTEXT=xgettext}
@@ -28,6 +31,9 @@ msgstr ""
 
 msgid "The Fabulous Four"
 msgstr ""
+
+msgid "The Fabulous Thunderbirds"
+msgstr ""
 EOF
 
 : ${DIFF=diff}