From 2c8f43903505eb1563bd62ab2ef1ff54cb44eddc Mon Sep 17 00:00:00 2001 From: Bruno Haible Date: Fri, 16 Aug 2024 22:32:58 +0200 Subject: [PATCH] xgettext: Scheme: Recognize datum-comments. Reported by Florent Angly at . * gettext-tools/src/x-scheme.c (datum_comment_nesting_depth): New variable. (read_object): Omit the normal processing of list elements if datum_comment_nesting_depth is non-zero. Recognize datum comments. (extract_whole_file): Initialize datum_comment_nesting_depth. * gettext-tools/tests/xgettext-scheme-1: Add a test case with a datum comment. * NEWS: Mention the change. --- NEWS | 16 ++-- gettext-tools/src/x-scheme.c | 111 +++++++++++++++++--------- gettext-tools/tests/xgettext-scheme-1 | 6 ++ 3 files changed, 87 insertions(+), 46 deletions(-) diff --git a/NEWS b/NEWS index b77299356..ec2b1e62e 100644 --- a/NEWS +++ b/NEWS @@ -5,13 +5,15 @@ Version 0.23 - August 2024 o xgettext now assumes source code for Python 3 rather than Python 2. This affects the interpretation of escape sequences in string literals. o xgettext now recognizes the f-string syntax. - - Scheme: xgettext now supports the option '-L Guile' as an alternative to - '-L Scheme'. They are nearly equivalent. They differ in the interpretation - of escape sequences in string literals: While 'xgettext -L Scheme' assumes - the R6RS and R7RS syntax of string literals, 'xgettext -L Guile' assumes - the syntax of string literals understood by Guile 2.x and 3.0 (without - command-line option '--r6rs' or '--r7rs', and before a '#!r6rs' directive - is seen). + - Scheme: + o xgettext now supports the option '-L Guile' as an alternative to + '-L Scheme'. They are nearly equivalent. They differ in the + interpretation of escape sequences in string literals: While + 'xgettext -L Scheme' assumes the R6RS and R7RS syntax of string literals, + 'xgettext -L Guile' assumes the syntax of string literals understood by + Guile 2.x and 3.0 (without command-line option '--r6rs' or '--r7rs', and + before a '#!r6rs' directive is seen). + o xgettext now recognizes comments of the form '#; '. - Java: Improved recognition of format strings when the String.formatted method is used. - Vala: Improved recognition of format strings when the string.printf method diff --git a/gettext-tools/src/x-scheme.c b/gettext-tools/src/x-scheme.c index 6e84df96b..c32af1fa5 100644 --- a/gettext-tools/src/x-scheme.c +++ b/gettext-tools/src/x-scheme.c @@ -52,7 +52,10 @@ - R6RS: https://www.r6rs.org/ - R7RS: https://standards.scheme.org/corrected-r7rs/r7rs.html - It is implemented in guile-3.0.10/libguile/read.c. + It is implemented in guile-3.0.10/module/ice-9/read.scm, with support of + #!r6rs. What you see in guile-3.0.10/libguile/read.c is just the bootstrap + reader, without support of #!r6rs and similar directives. + Since we are interested only in strings and in forms similar to (gettext msgid ...) or (ngettext msgid msgid_plural ...) @@ -72,7 +75,11 @@ - The syntax code assigned to each character, and how tokens are built up from characters (single escape, multiple escape etc.). - - Comment syntax: ';' and '#! ... !#' and '#| ... |#' (may be nested). + - Comment syntax: + ';' up to end of line + '#;' (see R6RS § 4.2.3, R7RS § 2.2) + '#! ... !#' + '#| ... |#' (may be nested) - String syntax: "..." with single escapes. @@ -687,6 +694,9 @@ static flag_context_list_table_ty *flag_context_list_table; /* Current nesting depth. */ static int nesting_depth; +/* Current nesting depth of #; comments. */ +static int datum_comment_nesting_depth; + /* Read the next object. */ static void @@ -785,49 +795,52 @@ read_object (struct object *op, flag_region_ty *outer_region) if (inner.type == t_eof) break; - if (arg == 0) + if (datum_comment_nesting_depth == 0) { - /* This is the function position. */ - if (inner.type == t_symbol) + if (arg == 0) { - char *symbol_name = string_of_object (&inner); - void *keyword_value; - - if (hash_find_entry (&keywords, - symbol_name, strlen (symbol_name), - &keyword_value) - == 0) - shapes = (const struct callshapes *) keyword_value; - - argparser = arglist_parser_alloc (mlp, shapes); - - context_iter = - flag_context_list_iterator ( - flag_context_list_table_lookup ( - flag_context_list_table, - symbol_name, strlen (symbol_name))); - - free (symbol_name); + /* This is the function position. */ + if (inner.type == t_symbol) + { + char *symbol_name = string_of_object (&inner); + void *keyword_value; + + if (hash_find_entry (&keywords, + symbol_name, strlen (symbol_name), + &keyword_value) + == 0) + shapes = (const struct callshapes *) keyword_value; + + argparser = arglist_parser_alloc (mlp, shapes); + + context_iter = + flag_context_list_iterator ( + flag_context_list_table_lookup ( + flag_context_list_table, + symbol_name, strlen (symbol_name))); + + free (symbol_name); + } + else + context_iter = null_context_list_iterator; } else - context_iter = null_context_list_iterator; - } - else - { - /* These are the argument positions. */ - if (argparser != NULL && inner.type == t_string) { - char *s = string_of_object (&inner); - mixed_string_ty *ms = - mixed_string_alloc_simple (s, lc_string, + /* These are the argument positions. */ + if (argparser != NULL && inner.type == t_string) + { + char *s = string_of_object (&inner); + mixed_string_ty *ms = + mixed_string_alloc_simple (s, lc_string, + logical_file_name, + inner.line_number_at_start); + free (s); + arglist_parser_remember (argparser, arg, ms, + inner_region, logical_file_name, - inner.line_number_at_start); - free (s); - arglist_parser_remember (argparser, arg, ms, - inner_region, - logical_file_name, - inner.line_number_at_start, - savable_comment, false); + inner.line_number_at_start, + savable_comment, false); + } } } @@ -1047,6 +1060,25 @@ read_object (struct object *op, flag_region_ty *outer_region) } } + case ';': + /* Datum comment '#; '. + See R6RS § 4.2.3, R7RS § 2.2. */ + { + struct object inner; + int saved_last_non_comment_line = last_non_comment_line; + ++datum_comment_nesting_depth; + ++nesting_depth; + read_object (&inner, null_context_region ()); + nesting_depth--; + datum_comment_nesting_depth--; + last_non_comment_line = saved_last_non_comment_line; + /* Dots and EOF are not allowed here. + But be tolerant. */ + free_object (&inner); + last_comment_line = line_number; + continue; + } + case '!': /* Block comment '#! ... !#'. See . */ @@ -1437,6 +1469,7 @@ extract_whole_file (FILE *f, flag_context_list_table = flag_table; nesting_depth = 0; + datum_comment_nesting_depth = 0; init_keywords (); diff --git a/gettext-tools/tests/xgettext-scheme-1 b/gettext-tools/tests/xgettext-scheme-1 index 797650d12..34f15c1aa 100755 --- a/gettext-tools/tests/xgettext-scheme-1 +++ b/gettext-tools/tests/xgettext-scheme-1 @@ -11,6 +11,9 @@ cat <<\EOF > xg-sc-1.scm #! Not extracted either. !# (display (_ "The Fabulous Four")) +#; (begin + (display (_ "The Onion"))) +(display (_ "The Fabulous Thunderbirds")) EOF : ${XGETTEXT=xgettext} @@ -28,6 +31,9 @@ msgstr "" msgid "The Fabulous Four" msgstr "" + +msgid "The Fabulous Thunderbirds" +msgstr "" EOF : ${DIFF=diff} -- 2.47.3