Add Rust support.

author Bruno Haible <bruno@clisp.org>

Tue, 28 Jan 2025 10:11:56 +0000 (11:11 +0100)

committer Bruno Haible <bruno@clisp.org>

Tue, 28 Jan 2025 21:13:12 +0000 (22:13 +0100)
author Bruno Haible <bruno@clisp.org>
Tue, 28 Jan 2025 10:11:56 +0000 (11:11 +0100)
committer Bruno Haible <bruno@clisp.org>
Tue, 28 Jan 2025 21:13:12 +0000 (22:13 +0100)
diff --git a/.gitignore b/.gitignore

index 0a1bb6588b5e6f54f68120161004f7743bccc6a3..6c3873e734ab8c2b62829480caf59bea8743ebb9 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -17,6 +17,10 @@
  /gettext-runtime/doc/Admin/matrix.xml
  /gettext-tools/misc/archive.dir.tar
  
+# Files brought in by autopull.sh:
+/gettext-tools/tree-sitter-*
+/gettext-tools/tree-sitter.cfg
+
  # Files brought in by gnulib-tool:
  /GNUmakefile
  /INSTALL.generic
@@ -943,6 +947,8 @@ core
  /gettext-tools/libgrep/sys/types.h
  /gettext-tools/po/??@*.insert-header
  /gettext-tools/src/gettext.res
+/gettext-tools/src/libxgettextts1.a
+/gettext-tools/src/libxgettextts2.a
  /gettext-tools/src/textstyle.h
  /gettext-tools/src/textstyle/stdbool.h
  /gettext-tools/src/textstyle/version.h
diff --git a/NEWS b/NEWS

index 18b90593a173e3cff6e235a3f8a5dbf1eb1778b5..8da488fb5d2cec4bfabbf24e7dc682e6f6134b79 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,10 @@ Version 0.24 - January 2025
  # Programming languages support:
    * JavaScript:
      - xgettext now parses recursive JSX expressions correctly.
+  * Rust:
+    - xgettext now supports Rust.
+    - 'msgfmt -c' now verifies the syntax of translations of Rust format
+      strings.
    * C++:
      - A new example 'hello-c++-gnome3' has been added.
    * Ruby:
diff --git a/autogen.sh b/autogen.sh

index cf22fa1d42b8bb5389694df99613a733a953521d..51967dd87b0e7f2a9c75b006644a70f38b4969e8 100755 (executable)
--- a/autogen.sh
+++ b/autogen.sh
@@ -246,6 +246,7 @@ if ! $skip_gnulib; then
      strcspn
      strerror
      string-desc
+    strnlen
      strpbrk
      strtol
      strtoul
@@ -256,6 +257,7 @@ if ! $skip_gnulib; then
      trim
      unicase/u8-casefold
      unictype/ctype-space
+    unictype/property-white-space
      unictype/syntax-java-whitespace
      unilbrk/ulc-width-linebreaks
      uniname/uniname
@@ -395,7 +397,9 @@ if ! $skip_gnulib; then
      strchrnul
      strerror
      string-desc
+    strnlen
      unictype/ctype-space
+    unictype/property-white-space
      unilbrk/ulc-width-linebreaks
      unistr/u8-mbtouc
      unistr/u8-mbtoucr
diff --git a/autopull.sh b/autopull.sh

index df98f23dfbb81da0a9047d9082ff277a0e88dff6..6eecb3d3bd2c15ccc1a356ae41ddd1f0bfdf20b8 100755 (executable)
--- a/autopull.sh
+++ b/autopull.sh
@@ -1,5 +1,5 @@
  #!/bin/sh
-# Copyright (C) 2003-2022 Free Software Foundation, Inc.
+# Copyright (C) 2003-2025 Free Software Foundation, Inc.
  #
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU General Public License as published by
@@ -49,6 +49,65 @@ if ! test -f gettext-tools/misc/archive.dir.tar; then
    test $retval -eq 0 || exit $retval
  fi
  
+# func_git_clone_shallow SUBDIR URL REVISION
+func_git_clone_shallow ()
+{
+  # Only want a shallow checkout of REVISION, but git does not
+  # support cloning by commit hash. So attempt a shallow fetch by
+  # commit hash to minimize the amount of data downloaded and changes
+  # needed to be processed, which can drastically reduce download and
+  # processing time for checkout. If the fetch by commit fails, a
+  # shallow fetch cannot be performed because we do not know what the
+  # depth of the commit is without fetching all commits. So fall back
+  # to fetching all commits.
+  # REVISION can be a commit id, a tag name, or a branch name.
+  mkdir -p "$1"
+  git -C "$1" init
+  git -C "$1" remote add origin "$2"
+  if git -C "$1" fetch --depth 1 origin "$3"; then
+    # "git fetch" of the specific commit succeeded.
+    git -C "$1" reset --hard FETCH_HEAD || { rm -rf "$1"; exit 1; }
+    # "git fetch" does not fetch tags (at least in git version 2.43).
+    # If REVISION is a tag (not a commit id or branch name),
+    # add the tag explicitly.
+    revision=`git -C "$1" log -1 --pretty=format:%H`
+    branch=`LC_ALL=C git -C "$1" remote show origin \
+            | sed -n -e 's/^    \([^ ]*\) * tracked$/\1/p'`
+    test "$revision" = "$3" || test "$branch" = "$3" || git -C "$1" tag "$3"
+  else
+    # Fetch the entire repository.
+    git -C "$1" fetch origin || { rm -rf "$1"; exit 1; }
+    git -C "$1" checkout "$3" || { rm -rf "$1"; exit 1; }
+  fi
+}
+
+# Fetch the compilable (mostly generated) tree-sitter source code.
+TREE_SITTER_VERSION=0.23.2
+TREE_SITTER_RUST_VERSION=0.23.2
+# Cache the relevant source code. Erase the rest of the tree-sitter projects.
+test -d gettext-tools/tree-sitter-$TREE_SITTER_VERSION || {
+  func_git_clone_shallow tree-sitter https://github.com/tree-sitter/tree-sitter.git v$TREE_SITTER_VERSION
+  (cd tree-sitter && patch -p1) < gettext-tools/build-aux/tree-sitter-portability.diff
+  mkdir gettext-tools/tree-sitter-$TREE_SITTER_VERSION
+  mv tree-sitter/LICENSE gettext-tools/tree-sitter-$TREE_SITTER_VERSION/LICENSE
+  mv tree-sitter/lib gettext-tools/tree-sitter-$TREE_SITTER_VERSION/lib
+  rm -rf tree-sitter
+}
+test -d gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION || {
+  func_git_clone_shallow tree-sitter-rust https://github.com/tree-sitter/tree-sitter-rust.git v$TREE_SITTER_RUST_VERSION
+  (cd tree-sitter-rust && patch -p1) < gettext-tools/build-aux/tree-sitter-rust-portability.diff
+  mkdir gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION
+  mv tree-sitter-rust/LICENSE gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION/LICENSE
+  mv tree-sitter-rust/src gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION/src
+  mv gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION/src/parser.c gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION/src/rust-parser.c
+  mv gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION/src/scanner.c gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION/src/rust-scanner.c
+  rm -rf tree-sitter-rust
+}
+cat > gettext-tools/tree-sitter.cfg <<EOF
+TREE_SITTER_VERSION=$TREE_SITTER_VERSION
+TREE_SITTER_RUST_VERSION=$TREE_SITTER_RUST_VERSION
+EOF
+
  dir0=`pwd`
  
  echo "$0: generating files in libtextstyle..."
diff --git a/check-copyright-headers b/check-copyright-headers

index 99601335393bee3f379f568704ef147d63b1c10b..7dd6db2ed29b70c954811a730c423351788742cd 100755 (executable)
--- a/check-copyright-headers
+++ b/check-copyright-headers
@@ -1,6 +1,6 @@
  #!/bin/sh
  #
-# Copyright (C) 2019-2024 Free Software Foundation, Inc.
+# Copyright (C) 2019-2025 Free Software Foundation, Inc.
  #
  # This program is free software: you can redistribute it and/or modify
  # it under the terms of the GNU General Public License as published by
@@ -129,6 +129,12 @@ func_check_file ()
        # in attaching our own copyright header to each.
        return 0 ;;
  
+    /gettext-tools/tree-sitter*/* )
+      # These files come from the tree-sitter project.
+      # We are not interested in attaching our own copyright header to each.
+      # Their license file is at gettext-tools/tree-sitter*/LICENSE.
+      return 0 ;;
+
      /gettext-tools/examples/hello-*-gnome2/m4/*.m4 | \
      /gettext-tools/projects/GNOME/teams.html )
        # These files come from the GNOME project.
diff --git a/gettext-tools/Makefile.am b/gettext-tools/Makefile.am

index 6cabe30ee2b622ebce28684ef4e0a2bb9653fc68..0b07b2a5151e3f732382b2b4c874d28b80f6507d 100644 (file)
--- a/gettext-tools/Makefile.am
+++ b/gettext-tools/Makefile.am
@@ -1,5 +1,5 @@
  ## Makefile for the gettext-tools directory of GNU gettext
-## Copyright (C) 1995-2024 Free Software Foundation, Inc.
+## Copyright (C) 1995-2025 Free Software Foundation, Inc.
  ##
  ## This program is free software: you can redistribute it and/or modify
  ## it under the terms of the GNU General Public License as published by
@@ -26,6 +26,63 @@ MOSTLYCLEANFILES = core *.stackdump
  DISTCLEANFILES = build-aux/xml-validate-10 build-aux/xml-validate-11
  
  
+# Files imported from tree-sitter.
+EXTRA_DIST += \
+  tree-sitter.cfg \
+  build-aux/tree-sitter-portability.diff \
+  tree-sitter-$(TREE_SITTER_VERSION)/LICENSE \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/include/tree_sitter/api.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/alloc.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/alloc.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/array.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/atomic.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/clock.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/error_costs.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/get_changed_ranges.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/get_changed_ranges.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/host.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/language.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/language.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/length.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/lexer.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/lexer.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/lib.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/node.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/parser.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/parser.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/point.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/query.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/reduce_action.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/reusable_node.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/stack.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/stack.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/subtree.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/subtree.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/tree.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/tree.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/tree_cursor.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/tree_cursor.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/unicode.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/unicode/LICENSE \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/unicode/README.md \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/unicode/ptypes.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/unicode/umachine.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/unicode/urename.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/unicode/utf.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/unicode/utf8.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/unicode/utf16.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/wasm_store.c \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/wasm_store.h \
+  tree-sitter-$(TREE_SITTER_VERSION)/lib/src/wasm/wasm-stdlib.h \
+  build-aux/tree-sitter-rust-portability.diff \
+  tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/LICENSE \
+  tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/rust-parser.c \
+  tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/rust-scanner.c \
+  tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/tree_sitter/alloc.h \
+  tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/tree_sitter/array.h \
+  tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/tree_sitter/parser.h
+
+
  # Files installed for the user.
  
  pkgdata_DATA = misc/disclaim-translations.txt
diff --git a/gettext-tools/build-aux/tree-sitter-portability.diff b/gettext-tools/build-aux/tree-sitter-portability.diff

new file mode 100644 (file)

index 0000000..14c79b3
--- /dev/null
+++ b/gettext-tools/build-aux/tree-sitter-portability.diff
@@ -0,0 +1,596 @@
+diff --git a/lib/src/alloc.h b/lib/src/alloc.h
+index a0eadb7a..8a055f87 100644
+--- a/lib/src/alloc.h
++++ b/lib/src/alloc.h
+@@ -9,7 +9,7 @@ extern "C" {
+ #include <stdio.h>
+ #include <stdlib.h>
+ 
+-#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32)
++#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32) || !(defined __GNUC__ || defined __clang__)
+ #define TS_PUBLIC
+ #else
+ #define TS_PUBLIC __attribute__((visibility("default")))
+diff --git a/lib/src/atomic.h b/lib/src/atomic.h
+index e680b60e..3640806f 100644
+--- a/lib/src/atomic.h
++++ b/lib/src/atomic.h
+@@ -5,7 +5,7 @@
+ #include <stdint.h>
+ #include <stdlib.h>
+ 
+-#ifdef __TINYC__
++#if 1 /* avoid portability pitfalls, cf. gnulib/m4/atomic-cas.m4 */
+ 
+ static inline size_t atomic_load(const volatile size_t *p) {
+   return *p;
+diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c
+index bcf8da94..d9e9ae66 100644
+--- a/lib/src/get_changed_ranges.c
++++ b/lib/src/get_changed_ranges.c
+@@ -160,7 +160,7 @@ static bool iterator_tree_is_visible(const Iterator *self) {
+     Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
+     return ts_language_alias_at(
+       self->language,
+-      parent.ptr->production_id,
++      parent.ptr->u.non_terminal.production_id,
+       entry.structural_child_index
+     ) != 0;
+   }
+@@ -187,7 +187,7 @@ static void iterator_get_visible_state(
+       const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
+       *alias_symbol = ts_language_alias_at(
+         self->language,
+-        parent->ptr->production_id,
++        parent->ptr->u.non_terminal.production_id,
+         entry.structural_child_index
+       );
+     }
+diff --git a/lib/src/node.c b/lib/src/node.c
+index 2768efbb..de07e694 100644
+--- a/lib/src/node.c
++++ b/lib/src/node.c
+@@ -60,7 +60,7 @@ static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
+   }
+   const TSSymbol *alias_sequence = ts_language_alias_sequence(
+     node->tree->language,
+-    subtree.ptr->production_id
++    subtree.ptr->u.non_terminal.production_id
+   );
+   return (NodeChildIterator) {
+     .tree = node->tree,
+@@ -141,9 +141,9 @@ static inline uint32_t ts_node__relevant_child_count(
+   Subtree tree = ts_node__subtree(self);
+   if (ts_subtree_child_count(tree) > 0) {
+     if (include_anonymous) {
+-      return tree.ptr->visible_child_count;
++      return tree.ptr->u.non_terminal.visible_child_count;
+     } else {
+-      return tree.ptr->named_child_count;
++      return tree.ptr->u.non_terminal.named_child_count;
+     }
+   } else {
+     return 0;
+@@ -610,7 +610,7 @@ recur:
+   const TSFieldMapEntry *field_map, *field_map_end;
+   ts_language_field_map(
+     self.tree->language,
+-    ts_node__subtree(self).ptr->production_id,
++    ts_node__subtree(self).ptr->u.non_terminal.production_id,
+     &field_map,
+     &field_map_end
+   );
+@@ -679,7 +679,7 @@ static inline const char *ts_node__field_name_from_language(TSNode self, uint32_
+     const TSFieldMapEntry *field_map, *field_map_end;
+     ts_language_field_map(
+       self.tree->language,
+-      ts_node__subtree(self).ptr->production_id,
++      ts_node__subtree(self).ptr->u.non_terminal.production_id,
+       &field_map,
+       &field_map_end
+     );
+@@ -749,7 +749,7 @@ TSNode ts_node_child_by_field_name(
+ uint32_t ts_node_child_count(TSNode self) {
+   Subtree tree = ts_node__subtree(self);
+   if (ts_subtree_child_count(tree) > 0) {
+-    return tree.ptr->visible_child_count;
++    return tree.ptr->u.non_terminal.visible_child_count;
+   } else {
+     return 0;
+   }
+@@ -758,7 +758,7 @@ uint32_t ts_node_child_count(TSNode self) {
+ uint32_t ts_node_named_child_count(TSNode self) {
+   Subtree tree = ts_node__subtree(self);
+   if (ts_subtree_child_count(tree) > 0) {
+-    return tree.ptr->named_child_count;
++    return tree.ptr->u.non_terminal.named_child_count;
+   } else {
+     return 0;
+   }
+diff --git a/lib/src/parser.c b/lib/src/parser.c
+index 2927d820..80719328 100644
+--- a/lib/src/parser.c
++++ b/lib/src/parser.c
+@@ -417,8 +417,8 @@ static void ts_parser__external_scanner_deserialize(
+   const char *data = NULL;
+   uint32_t length = 0;
+   if (external_token.ptr) {
+-    data = ts_external_scanner_state_data(&external_token.ptr->external_scanner_state);
+-    length = external_token.ptr->external_scanner_state.length;
++    data = ts_external_scanner_state_data(&external_token.ptr->u.external_scanner_state);
++    length = external_token.ptr->u.external_scanner_state.length;
+   }
+ 
+   if (ts_language_is_wasm(self->language)) {
+@@ -678,7 +678,7 @@ static Subtree ts_parser__lex(
+     if (found_external_token) {
+       MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result);
+       ts_external_scanner_state_init(
+-        &mut_result.ptr->external_scanner_state,
++        &mut_result.ptr->u.external_scanner_state,
+         self->lexer.debug_buffer,
+         external_scanner_state_len
+       );
+@@ -1012,7 +1012,7 @@ static StackVersion ts_parser__reduce(
+     } else {
+       parent.ptr->parse_state = state;
+     }
+-    parent.ptr->dynamic_precedence += dynamic_precedence;
++    parent.ptr->u.non_terminal.dynamic_precedence += dynamic_precedence;
+ 
+     // Push the parent node onto the stack, along with any extra tokens that
+     // were previously on top of the stack.
+@@ -1062,7 +1062,7 @@ static void ts_parser__accept(
+         root = ts_subtree_from_mut(ts_subtree_new_node(
+           ts_subtree_symbol(tree),
+           &trees,
+-          tree.ptr->production_id,
++          tree.ptr->u.non_terminal.production_id,
+           self->language
+         ));
+         ts_subtree_release(&self->tree_pool, tree);
+diff --git a/lib/src/stack.c b/lib/src/stack.c
+index 98d8c561..e5c05521 100644
+--- a/lib/src/stack.c
++++ b/lib/src/stack.c
+@@ -14,8 +14,10 @@
+ 
+ #if defined _WIN32 && !defined __GNUC__
+ #define forceinline __forceinline
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define forceinline static inline __attribute__((always_inline))
++#else
++#define forceinline static inline
+ #endif
+ 
+ typedef struct StackNode StackNode;
+@@ -798,7 +800,7 @@ bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f)
+     }
+ 
+     if (head->last_external_token.ptr) {
+-      const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
++      const ExternalScannerState *state = &head->last_external_token.ptr->u.external_scanner_state;
+       const char *data = ts_external_scanner_state_data(state);
+       fprintf(f, "\nexternal_scanner_state:");
+       for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
+diff --git a/lib/src/subtree.c b/lib/src/subtree.c
+index 2ab8f475..9d400536 100644
+--- a/lib/src/subtree.c
++++ b/lib/src/subtree.c
+@@ -26,34 +26,34 @@ typedef struct {
+ 
+ void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
+   self->length = length;
+-  if (length > sizeof(self->short_data)) {
+-    self->long_data = ts_malloc(length);
+-    memcpy(self->long_data, data, length);
++  if (length > sizeof(self->u.short_data)) {
++    self->u.long_data = ts_malloc(length);
++    memcpy(self->u.long_data, data, length);
+   } else {
+-    memcpy(self->short_data, data, length);
++    memcpy(self->u.short_data, data, length);
+   }
+ }
+ 
+ ExternalScannerState ts_external_scanner_state_copy(const ExternalScannerState *self) {
+   ExternalScannerState result = *self;
+-  if (self->length > sizeof(self->short_data)) {
+-    result.long_data = ts_malloc(self->length);
+-    memcpy(result.long_data, self->long_data, self->length);
++  if (self->length > sizeof(self->u.short_data)) {
++    result.u.long_data = ts_malloc(self->length);
++    memcpy(result.u.long_data, self->u.long_data, self->length);
+   }
+   return result;
+ }
+ 
+ void ts_external_scanner_state_delete(ExternalScannerState *self) {
+-  if (self->length > sizeof(self->short_data)) {
+-    ts_free(self->long_data);
++  if (self->length > sizeof(self->u.short_data)) {
++    ts_free(self->u.long_data);
+   }
+ }
+ 
+ const char *ts_external_scanner_state_data(const ExternalScannerState *self) {
+-  if (self->length > sizeof(self->short_data)) {
+-    return self->long_data;
++  if (self->length > sizeof(self->u.short_data)) {
++    return self->u.long_data;
+   } else {
+-    return self->short_data;
++    return self->u.short_data;
+   }
+ }
+ 
+@@ -216,7 +216,11 @@ Subtree ts_subtree_new_leaf(
+       .depends_on_column = depends_on_column,
+       .is_missing = false,
+       .is_keyword = is_keyword,
+-      {{.first_leaf = {.symbol = 0, .parse_state = 0}}}
++      .u = {
++        .non_terminal = {
++          .first_leaf = {.symbol = 0, .parse_state = 0}
++        }
++      }
+     };
+     return (Subtree) {.ptr = data};
+   }
+@@ -251,7 +255,7 @@ Subtree ts_subtree_new_error(
+   SubtreeHeapData *data = (SubtreeHeapData *)result.ptr;
+   data->fragile_left = true;
+   data->fragile_right = true;
+-  data->lookahead_char = lookahead_char;
++  data->u.lookahead_char = lookahead_char;
+   return result;
+ }
+ 
+@@ -267,8 +271,8 @@ MutableSubtree ts_subtree_clone(Subtree self) {
+       ts_subtree_retain(new_children[i]);
+     }
+   } else if (self.ptr->has_external_tokens) {
+-    result->external_scanner_state = ts_external_scanner_state_copy(
+-      &self.ptr->external_scanner_state
++    result->u.external_scanner_state = ts_external_scanner_state_copy(
++      &self.ptr->u.external_scanner_state
+     );
+   }
+   result->ref_count = 1;
+@@ -344,7 +348,7 @@ void ts_subtree_balance(Subtree self, SubtreePool *pool, const TSLanguage *langu
+   while (pool->tree_stack.size > 0) {
+     MutableSubtree tree = array_pop(&pool->tree_stack);
+ 
+-    if (tree.ptr->repeat_depth > 0) {
++    if (tree.ptr->u.non_terminal.repeat_depth > 0) {
+       Subtree child1 = ts_subtree_children(tree)[0];
+       Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1];
+       long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2);
+@@ -373,18 +377,18 @@ void ts_subtree_summarize_children(
+ ) {
+   assert(!self.data.is_inline);
+ 
+-  self.ptr->named_child_count = 0;
+-  self.ptr->visible_child_count = 0;
++  self.ptr->u.non_terminal.named_child_count = 0;
++  self.ptr->u.non_terminal.visible_child_count = 0;
+   self.ptr->error_cost = 0;
+-  self.ptr->repeat_depth = 0;
+-  self.ptr->visible_descendant_count = 0;
++  self.ptr->u.non_terminal.repeat_depth = 0;
++  self.ptr->u.non_terminal.visible_descendant_count = 0;
+   self.ptr->has_external_tokens = false;
+   self.ptr->depends_on_column = false;
+   self.ptr->has_external_scanner_state_change = false;
+-  self.ptr->dynamic_precedence = 0;
++  self.ptr->u.non_terminal.dynamic_precedence = 0;
+ 
+   uint32_t structural_index = 0;
+-  const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
++  const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->u.non_terminal.production_id);
+   uint32_t lookahead_end_byte = 0;
+ 
+   const Subtree *children = ts_subtree_children(self);
+@@ -430,27 +434,27 @@ void ts_subtree_summarize_children(
+         if (ts_subtree_visible(child)) {
+           self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
+         } else if (grandchild_count > 0) {
+-          self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
++          self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->u.non_terminal.visible_child_count;
+         }
+       }
+     }
+ 
+-    self.ptr->dynamic_precedence += ts_subtree_dynamic_precedence(child);
+-    self.ptr->visible_descendant_count += ts_subtree_visible_descendant_count(child);
++    self.ptr->u.non_terminal.dynamic_precedence += ts_subtree_dynamic_precedence(child);
++    self.ptr->u.non_terminal.visible_descendant_count += ts_subtree_visible_descendant_count(child);
+ 
+     if (alias_sequence && alias_sequence[structural_index] != 0 && !ts_subtree_extra(child)) {
+-      self.ptr->visible_descendant_count++;
+-      self.ptr->visible_child_count++;
++      self.ptr->u.non_terminal.visible_descendant_count++;
++      self.ptr->u.non_terminal.visible_child_count++;
+       if (ts_language_symbol_metadata(language, alias_sequence[structural_index]).named) {
+-        self.ptr->named_child_count++;
++        self.ptr->u.non_terminal.named_child_count++;
+       }
+     } else if (ts_subtree_visible(child)) {
+-      self.ptr->visible_descendant_count++;
+-      self.ptr->visible_child_count++;
+-      if (ts_subtree_named(child)) self.ptr->named_child_count++;
++      self.ptr->u.non_terminal.visible_descendant_count++;
++      self.ptr->u.non_terminal.visible_child_count++;
++      if (ts_subtree_named(child)) self.ptr->u.non_terminal.named_child_count++;
+     } else if (grandchild_count > 0) {
+-      self.ptr->visible_child_count += child.ptr->visible_child_count;
+-      self.ptr->named_child_count += child.ptr->named_child_count;
++      self.ptr->u.non_terminal.visible_child_count += child.ptr->u.non_terminal.visible_child_count;
++      self.ptr->u.non_terminal.named_child_count += child.ptr->u.non_terminal.named_child_count;
+     }
+ 
+     if (ts_subtree_has_external_tokens(child)) self.ptr->has_external_tokens = true;
+@@ -479,8 +483,8 @@ void ts_subtree_summarize_children(
+     Subtree first_child = children[0];
+     Subtree last_child = children[self.ptr->child_count - 1];
+ 
+-    self.ptr->first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
+-    self.ptr->first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
++    self.ptr->u.non_terminal.first_leaf.symbol = ts_subtree_leaf_symbol(first_child);
++    self.ptr->u.non_terminal.first_leaf.parse_state = ts_subtree_leaf_parse_state(first_child);
+ 
+     if (ts_subtree_fragile_left(first_child)) self.ptr->fragile_left = true;
+     if (ts_subtree_fragile_right(last_child)) self.ptr->fragile_right = true;
+@@ -492,9 +496,9 @@ void ts_subtree_summarize_children(
+       ts_subtree_symbol(first_child) == self.ptr->symbol
+     ) {
+       if (ts_subtree_repeat_depth(first_child) > ts_subtree_repeat_depth(last_child)) {
+-        self.ptr->repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
++        self.ptr->u.non_terminal.repeat_depth = ts_subtree_repeat_depth(first_child) + 1;
+       } else {
+-        self.ptr->repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
++        self.ptr->u.non_terminal.repeat_depth = ts_subtree_repeat_depth(last_child) + 1;
+       }
+     }
+   }
+@@ -531,11 +535,13 @@ MutableSubtree ts_subtree_new_node(
+     .fragile_left = fragile,
+     .fragile_right = fragile,
+     .is_keyword = false,
+-    {{
+-      .visible_descendant_count = 0,
+-      .production_id = production_id,
+-      .first_leaf = {.symbol = 0, .parse_state = 0},
+-    }}
++    .u = {
++      .non_terminal = {
++        .visible_descendant_count = 0,
++        .production_id = production_id,
++        .first_leaf = {.symbol = 0, .parse_state = 0},
++      }
++    }
+   };
+   MutableSubtree result = {.ptr = data};
+   ts_subtree_summarize_children(result, language);
+@@ -612,7 +618,7 @@ void ts_subtree_release(SubtreePool *pool, Subtree self) {
+       ts_free(children);
+     } else {
+       if (tree.ptr->has_external_tokens) {
+-        ts_external_scanner_state_delete(&tree.ptr->external_scanner_state);
++        ts_external_scanner_state_delete(&tree.ptr->u.external_scanner_state);
+       }
+       ts_subtree_pool_free(pool, tree.ptr);
+     }
+@@ -879,7 +885,7 @@ static size_t ts_subtree__write_to_string(
+ 
+     if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
+       cursor += snprintf(*writer, limit, "(UNEXPECTED ");
+-      cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
++      cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->u.lookahead_char);
+     } else {
+       TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
+       const char *symbol_name = ts_language_symbol_name(language, symbol);
+@@ -907,11 +913,11 @@ static size_t ts_subtree__write_to_string(
+   }
+ 
+   if (ts_subtree_child_count(self)) {
+-    const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
++    const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->u.non_terminal.production_id);
+     const TSFieldMapEntry *field_map, *field_map_end;
+     ts_language_field_map(
+       language,
+-      self.ptr->production_id,
++      self.ptr->u.non_terminal.production_id,
+       &field_map,
+       &field_map_end
+     );
+@@ -1010,8 +1016,8 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
+     ts_subtree_lookahead_bytes(*self)
+   );
+ 
+-  if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) {
+-    fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
++  if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->u.lookahead_char != 0) {
++    fprintf(f, "\ncharacter: '%c'", self->ptr->u.lookahead_char);
+   }
+ 
+   fprintf(f, "\"]\n");
+@@ -1041,14 +1047,14 @@ void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *
+ }
+ 
+ const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self) {
+-  static const ExternalScannerState empty_state = {{.short_data = {0}}, .length = 0};
++  static const ExternalScannerState empty_state = {.u = {.short_data = {0}}, .length = 0};
+   if (
+     self.ptr &&
+     !self.data.is_inline &&
+     self.ptr->has_external_tokens &&
+     self.ptr->child_count == 0
+   ) {
+-    return &self.ptr->external_scanner_state;
++    return &self.ptr->u.external_scanner_state;
+   } else {
+     return &empty_state;
+   }
+diff --git a/lib/src/subtree.h b/lib/src/subtree.h
+index f140ecdb..dde8729c 100644
+--- a/lib/src/subtree.h
++++ b/lib/src/subtree.h
+@@ -32,7 +32,7 @@ typedef struct {
+   union {
+     char *long_data;
+     char short_data[24];
+-  };
++  } u;
+   uint32_t length;
+ } ExternalScannerState;
+ 
+@@ -143,14 +143,14 @@ typedef struct {
+         TSSymbol symbol;
+         TSStateId parse_state;
+       } first_leaf;
+-    };
++    } non_terminal;
+ 
+     // External terminal subtrees (`child_count == 0 && has_external_tokens`)
+     ExternalScannerState external_scanner_state;
+ 
+     // Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
+     int32_t lookahead_char;
+-  };
++  } u;
+ } SubtreeHeapData;
+ 
+ // The fundamental building block of a syntax tree.
+@@ -248,13 +248,13 @@ static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
+ static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
+   if (self.data.is_inline) return self.data.symbol;
+   if (self.ptr->child_count == 0) return self.ptr->symbol;
+-  return self.ptr->first_leaf.symbol;
++  return self.ptr->u.non_terminal.first_leaf.symbol;
+ }
+ 
+ static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
+   if (self.data.is_inline) return self.data.parse_state;
+   if (self.ptr->child_count == 0) return self.ptr->parse_state;
+-  return self.ptr->first_leaf.parse_state;
++  return self.ptr->u.non_terminal.first_leaf.parse_state;
+ }
+ 
+ static inline Length ts_subtree_padding(Subtree self) {
+@@ -288,7 +288,7 @@ static inline uint32_t ts_subtree_child_count(Subtree self) {
+ }
+ 
+ static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
+-  return self.data.is_inline ? 0 : self.ptr->repeat_depth;
++  return self.data.is_inline ? 0 : self.ptr->u.non_terminal.repeat_depth;
+ }
+ 
+ static inline uint32_t ts_subtree_is_repetition(Subtree self) {
+@@ -300,12 +300,12 @@ static inline uint32_t ts_subtree_is_repetition(Subtree self) {
+ static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) {
+   return (self.data.is_inline || self.ptr->child_count == 0)
+     ? 0
+-    : self.ptr->visible_descendant_count;
++    : self.ptr->u.non_terminal.visible_descendant_count;
+ }
+ 
+ static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
+   if (ts_subtree_child_count(self) > 0) {
+-    return self.ptr->visible_child_count;
++    return self.ptr->u.non_terminal.visible_child_count;
+   } else {
+     return 0;
+   }
+@@ -320,12 +320,12 @@ static inline uint32_t ts_subtree_error_cost(Subtree self) {
+ }
+ 
+ static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
+-  return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
++  return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->u.non_terminal.dynamic_precedence;
+ }
+ 
+ static inline uint16_t ts_subtree_production_id(Subtree self) {
+   if (ts_subtree_child_count(self) > 0) {
+-    return self.ptr->production_id;
++    return self.ptr->u.non_terminal.production_id;
+   } else {
+     return 0;
+   }
+diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c
+index 24416663..b68770c0 100644
+--- a/lib/src/tree_cursor.c
++++ b/lib/src/tree_cursor.c
+@@ -24,7 +24,7 @@ static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint3
+     TreeCursorEntry *parent_entry = &self->stack.contents[index - 1];
+     return ts_language_alias_at(
+       self->tree->language,
+-      parent_entry->subtree->ptr->production_id,
++      parent_entry->subtree->ptr->u.non_terminal.production_id,
+       entry->structural_child_index
+     );
+   } else {
+@@ -39,7 +39,7 @@ static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCurs
+   }
+   const TSSymbol *alias_sequence = ts_language_alias_sequence(
+     self->tree->language,
+-    last_entry->subtree->ptr->production_id
++    last_entry->subtree->ptr->u.non_terminal.production_id
+   );
+ 
+   uint32_t descendant_index = last_entry->descendant_index;
+@@ -481,7 +481,7 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
+     TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
+     alias_symbol = ts_language_alias_at(
+       self->tree->language,
+-      parent_entry->subtree->ptr->production_id,
++      parent_entry->subtree->ptr->u.non_terminal.production_id,
+       last_entry->structural_child_index
+     );
+   }
+@@ -520,7 +520,7 @@ void ts_tree_cursor_current_status(
+ 
+     const TSSymbol *alias_sequence = ts_language_alias_sequence(
+       self->tree->language,
+-      parent_entry->subtree->ptr->production_id
++      parent_entry->subtree->ptr->u.non_terminal.production_id
+     );
+ 
+     #define subtree_symbol(subtree, structural_child_index) \
+@@ -570,7 +570,7 @@ void ts_tree_cursor_current_status(
+         } else if (ts_subtree_visible_child_count(sibling) > 0) {
+           *has_later_siblings = true;
+           if (*has_later_named_siblings) break;
+-          if (sibling.ptr->named_child_count > 0) {
++          if (sibling.ptr->u.non_terminal.named_child_count > 0) {
+             *has_later_named_siblings = true;
+             break;
+           }
+@@ -585,7 +585,7 @@ void ts_tree_cursor_current_status(
+       const TSFieldMapEntry *field_map, *field_map_end;
+       ts_language_field_map(
+         self->tree->language,
+-        parent_entry->subtree->ptr->production_id,
++        parent_entry->subtree->ptr->u.non_terminal.production_id,
+         &field_map, &field_map_end
+       );
+ 
+@@ -636,7 +636,7 @@ TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) {
+       TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
+       alias_symbol = ts_language_alias_at(
+         self->tree->language,
+-        parent_entry->subtree->ptr->production_id,
++        parent_entry->subtree->ptr->u.non_terminal.production_id,
+         entry->structural_child_index
+       );
+       is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree);
+@@ -672,7 +672,7 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
+     const TSFieldMapEntry *field_map, *field_map_end;
+     ts_language_field_map(
+       self->tree->language,
+-      parent_entry->subtree->ptr->production_id,
++      parent_entry->subtree->ptr->u.non_terminal.production_id,
+       &field_map, &field_map_end
+     );
+     for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
diff --git a/gettext-tools/build-aux/tree-sitter-rust-portability.diff b/gettext-tools/build-aux/tree-sitter-rust-portability.diff

new file mode 100644 (file)

index 0000000..c919e24
--- /dev/null
+++ b/gettext-tools/build-aux/tree-sitter-rust-portability.diff
@@ -0,0 +1,32 @@
+diff --git a/src/parser.c b/src/parser.c
+index ff8fce9..edb5420 100644
+--- a/src/parser.c
++++ b/src/parser.c
+@@ -195927,8 +195927,10 @@ void tree_sitter_rust_external_scanner_deserialize(void *, const char *, unsigne
+ #define TS_PUBLIC
+ #elif defined(_WIN32)
+ #define TS_PUBLIC __declspec(dllexport)
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define TS_PUBLIC __attribute__((visibility("default")))
++#else
++#define TS_PUBLIC
+ #endif
+ 
+ TS_PUBLIC const TSLanguage *tree_sitter_rust(void) {
+diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h
+index 799f599..130b4d0 100644
+--- a/src/tree_sitter/parser.h
++++ b/src/tree_sitter/parser.h
+@@ -155,8 +155,10 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
+ 
+ #ifdef _MSC_VER
+ #define UNUSED __pragma(warning(suppress : 4101))
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define UNUSED __attribute__((unused))
++#else
++#define UNUSED
+ #endif
+ 
+ #define START_LEXER()           \
diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac

index 92165731466013d54bb2a124159e0dfd6a93c194..83581c6a47db233270223b2657a61e5912d76f85 100644 (file)
--- a/gettext-tools/configure.ac
+++ b/gettext-tools/configure.ac
@@ -560,8 +560,19 @@ changequote([,])dnl
  fi
  AC_SUBST([ARCHIVE_FORMAT])
  
+dnl Get versions of files brought in by autopull.sh.
  ARCHIVE_VERSION=0.23.1
  AC_SUBST([ARCHIVE_VERSION])
+dnl We don't offer a --with-tree-sitter-prefix option, but instead always use
+dnl the tree-sitter version included in the tarball. This is required because
+dnl there is no stability guarantee for grammar elements: grammar elements can
+dnl be changed, extended, or even removed in future tree-sitter-<lang> versions,
+dnl whereas the x-<lang>.c code assumes that certain grammar elements have a
+dnl specific structure. We may need to change x-<lang>.c so that it works with
+dnl a newer version of tree-sitter-<lang>.
+. $srcdir/tree-sitter.cfg
+AC_SUBST([TREE_SITTER_VERSION])
+AC_SUBST([TREE_SITTER_RUST_VERSION])
  
  PACKAGE_SUFFIX="-$ARCHIVE_VERSION"
  AC_SUBST([PACKAGE_SUFFIX])
diff --git a/gettext-tools/doc/Makefile.am b/gettext-tools/doc/Makefile.am

index 47574e1ecd06b20d6d03b6442ce12a2127251807..bc43d4b23f36e9d7504513e96848f92febd90ad2 100644 (file)
--- a/gettext-tools/doc/Makefile.am
+++ b/gettext-tools/doc/Makefile.am
@@ -1,5 +1,5 @@
  ## Makefile for the gettext-tools/doc subdirectory of GNU gettext
-## Copyright (C) 1995-2023 Free Software Foundation, Inc.
+## Copyright (C) 1995-2025 Free Software Foundation, Inc.
  ##
  ## This program is free software: you can redistribute it and/or modify
  ## it under the terms of the GNU General Public License as published by
@@ -69,6 +69,7 @@ gettext_TEXINFOS = \
    lang-clisp-c.texi \
    lang-elisp.texi \
    lang-librep.texi \
+  lang-rust.texi \
    lang-ruby.texi \
    lang-sh.texi \
      $(top_srcdir)/../gettext-runtime/doc/rt-gettext.texi \
diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi

index 680c7309dbe66c890d0f6d8441cadd929dcb7146..1af57ccd643f4ff6c89f374c420f34464dd4fc9d 100644 (file)
--- a/gettext-tools/doc/gettext.texi
+++ b/gettext-tools/doc/gettext.texi
@@ -91,7 +91,7 @@ This file provides documentation for GNU @code{gettext} utilities.
  It also serves as a reference for the free Translation Project.
  
  @copying
-Copyright (C) 1995-1998, 2001-2024 Free Software Foundation, Inc.
+Copyright (C) 1995-1998, 2001-2025 Free Software Foundation, Inc.
  
  This manual is free documentation.  It is dually licensed under the
  GNU FDL and the GNU GPL.  This means that you can redistribute this
@@ -126,7 +126,7 @@ A copy of the license is included in @ref{GNU GPL}.
  @page
  @vskip 0pt plus 1filll
  @c @insertcopying
-Copyright (C) 1995-1998, 2001-2024 Free Software Foundation, Inc.
+Copyright (C) 1995-1998, 2001-2025 Free Software Foundation, Inc.
  
  This manual is free documentation.  It is dually licensed under the
  GNU FDL and the GNU GPL.  This means that you can redistribute this
@@ -405,6 +405,7 @@ The Translator's View
  * lisp-format::                 Lisp Format Strings
  * elisp-format::                Emacs Lisp Format Strings
  * librep-format::               librep Format Strings
+* rust-format::                 Rust Format Strings
  * ruby-format::                 Ruby Format Strings
  * sh-format::                   Shell Format Strings
  * awk-format::                  awk Format Strings
@@ -435,6 +436,7 @@ Individual Programming Languages
  * clisp C::                     GNU clisp C sources
  * Emacs Lisp::                  Emacs Lisp
  * librep::                      librep
+* Rust::                        Rust
  * Ruby::                        Ruby
  * sh::                          sh - Shell Script
  * bash::                        bash - Bourne-Again Shell Script
@@ -1679,6 +1681,12 @@ Likewise for Emacs Lisp, see @ref{elisp-format}.
  @kwindex no-librep-format@r{ flag}
  Likewise for librep, see @ref{librep-format}.
  
+@item rust-format
+@kwindex rust-format@r{ flag}
+@itemx no-rust-format
+@kwindex no-rust-format@r{ flag}
+Likewise for Rust, see @ref{rust-format}.
+
  @item ruby-format
  @kwindex ruby-format@r{ flag}
  @itemx no-ruby-format
@@ -9857,6 +9865,7 @@ strings.
  * lisp-format::                 Lisp Format Strings
  * elisp-format::                Emacs Lisp Format Strings
  * librep-format::               librep Format Strings
+* rust-format::                 Rust Format Strings
  * ruby-format::                 Ruby Format Strings
  * sh-format::                   Shell Format Strings
  * awk-format::                  awk Format Strings
@@ -10042,6 +10051,16 @@ librep format strings are documented in the librep manual, section
  @url{http://librep.sourceforge.net/librep-manual.html#Formatted%20Output},
  @url{http://www.gwinnup.org/research/docs/librep.html#SEC122}.
  
+@node rust-format
+@subsection Rust Format Strings
+
+Rust format strings are those supported by the @code{formatx} library
+@url{https://crates.io/crates/formatx}.
+These are those supported by the @code{format!} built-in
+@url{https://doc.rust-lang.org/std/fmt/}
+with the restrictions listed in
+@url{https://crates.io/crates/formatx}, section "Limitations".
+
  @node ruby-format
  @subsection Ruby Format Strings
  
@@ -10310,6 +10329,7 @@ that language, and to combine the resulting files using @code{msgcat}.
  * clisp C::                     GNU clisp C sources
  * Emacs Lisp::                  Emacs Lisp
  * librep::                      librep
+* Rust::                        Rust
  * Ruby::                        Ruby
  * sh::                          sh - Shell Script
  * bash::                        bash - Bourne-Again Shell Script
@@ -10337,6 +10357,7 @@ that language, and to combine the resulting files using @code{msgcat}.
  @include lang-clisp-c.texi
  @include lang-elisp.texi
  @include lang-librep.texi
+@include lang-rust.texi
  @include lang-ruby.texi
  @include lang-sh.texi
  @include lang-bash.texi
diff --git a/gettext-tools/doc/lang-rust.texi b/gettext-tools/doc/lang-rust.texi

new file mode 100644 (file)

index 0000000..b4cb94a
--- /dev/null
+++ b/gettext-tools/doc/lang-rust.texi
@@ -0,0 +1,126 @@
+@c This file is part of the GNU gettext manual.
+@c Copyright (C) 1995-2025 Free Software Foundation, Inc.
+@c See the file gettext.texi for copying conditions.
+
+@node Rust
+@subsection Rust
+@cindex Rust
+
+@ignore
+Note: When we say "Rust" here, we mean the Rust programming language.
+According to the Rust trademark policy valid in 2024
+https://foundation.rust-lang.org/policies/logo-policy-and-media-guide/#trademark-policy
+our use of the term "Rust" in the code does not require approval:
+  "Stating accurately that software is written in the Rust programming
+   language, that it is compatible with the Rust programming language,
+   or that it contains the Rust programming language, is allowed. In
+   those cases, you may use the Rust trademarks to indicate this,
+   without prior approval. This is true both for non-commercial and
+   commercial uses."
+And our use of the term "Rust" in the documentation does not require
+approval either:
+  "Using the word “Rust” on websites, brochures, documentation, academic
+   papers, books, and product packaging to refer to the Rust programming
+   language or the Rust project is allowed."
+So, we do NOT have to use different spellings, such as "R*st" or "Crablang".
+@end ignore
+
+@table @asis
+@item RPMs
+rust, rust-cargo
+
+@item Ubuntu packages
+rustc, cargo
+@c optionally librust-gettext-rs-dev
+
+@item File extension
+@code{rs}
+
+@item String syntax
+@code{"abc"}, @code{r"abc"}, @code{r#"abc"#} etc.
+
+@item gettext shorthand
+---
+
+@item gettext/ngettext functions
+@code{gettext}, @code{ngettext}
+
+@item textdomain
+@code{textdomain} function
+
+@item bindtextdomain
+@code{bindtextdomain} function
+
+@item setlocale
+@code{setlocale} function
+
+@item Prerequisite
+@code{$ cargo add gettext-rs}
+
+@code{use gettextrs::*;}
+
+Note: We recommend the @samp{gettext-rs} crate.
+We do not recommend the @samp{gettext} crate, because
+(as of 2025) it does not handle
+catalog fallback (e.g. from @code{de_AT} to @code{de})
+nor the @code{LANGUAGE} environment variable.
+
+@item Use or emulate GNU gettext
+use
+
+@item Extractor
+@code{xgettext}
+
+@item Formatting with positions
+There are three common ways of doing string formatting in Rust:
+@itemize @bullet
+@item
+Using the built-ins @code{format!}, @code{println!}, etc.
+This facility supports only constant strings, known at compile-time.
+Thus it cannot be used with translated format strings.
+You would get an error such as
+``error: format argument must be a string literal''.
+@item
+Using the @code{strfmt} library.
+@c https://crates.io/crates/strfmt
+@c https://lib.rs/crates/strfmt
+@c https://github.com/vitiral/strfmt
+The facility cannot be recommended,
+because it does not support the case where
+some of the values are strings and some of the values are numbers
+(without an excessive amount of contortions).
+@item
+Using the @code{formatx} library.
+@c https://crates.io/crates/formatx
+@c https://lib.rs/crates/formatx
+@c https://github.com/clitic/formatx
+This is the one we recommend.
+@end itemize
+
+So, you have to convert the @code{format!}, @code{println!}, etc.
+invocations to use @code{formatx}.
+For example,
+@example
+println!("Hello @{@}, you got @{@} coins.", name, left);
+@end example
+@noindent
+becomes
+@example
+println!("@{@}", formatx!(gettext("Hello @{@}, you got @{@} coins."),
+                        name, left)
+               .unwrap());
+@end example
+
+For swapped positions, a translator may translate
+@code{"Hello @{@}, you got @{@} coins."}
+with
+@code{"Hello, @{1@} coins are left for you, @{0@}."}
+
+@item Portability
+fully portable
+
+@item po-mode marking
+---
+@end table
+
+@c Rust me harder. Cargo me to hell.
diff --git a/gettext-tools/doc/xgettext.texi b/gettext-tools/doc/xgettext.texi

index ce78736e0815454c80310768519ce6d047ccf33b..299f5ef75f4b65c65140d275531684b015bf8935 100644 (file)
--- a/gettext-tools/doc/xgettext.texi
+++ b/gettext-tools/doc/xgettext.texi
@@ -1,5 +1,5 @@
  @c This file is part of the GNU gettext manual.
-@c Copyright (C) 1995-2024 Free Software Foundation, Inc.
+@c Copyright (C) 1995-2025 Free Software Foundation, Inc.
  @c See the file gettext.texi for copying conditions.
  
  @pindex xgettext
@@ -75,11 +75,11 @@ is written to standard output.
  Specifies the language of the input files.  The supported languages
  are @code{C}, @code{C++}, @code{ObjectiveC}, @code{PO}, @code{Shell},
  @code{Python}, @code{Lisp}, @code{EmacsLisp}, @code{librep}, @code{Scheme},
-@code{Guile},
-@code{Smalltalk}, @code{Java}, @code{JavaProperties}, @code{C#}, @code{awk},
-@code{YCP}, @code{Tcl}, @code{Perl}, @code{PHP}, @code{Ruby},
-@code{GCC-source}, @code{NXStringTable}, @code{RST}, @code{RSJ}, @code{Glade},
-@code{Lua}, @code{JavaScript}, @code{Vala}, @code{GSettings}, @code{Desktop}.
+@code{Guile}, @code{Smalltalk}, @code{Java}, @code{JavaProperties},
+@code{C#}, @code{Rust}, @code{Ruby}, @code{awk}, @code{YCP},
+@code{Tcl}, @code{Perl}, @code{PHP}, @code{GCC-source}, @code{NXStringTable},
+@code{RST}, @code{RSJ}, @code{Glade}, @code{Lua}, @code{JavaScript},
+@code{Vala}, @code{GSettings}, @code{Desktop}.
  
  @item -C
  @itemx --c++
diff --git a/gettext-tools/libgettextpo/Makefile.am b/gettext-tools/libgettextpo/Makefile.am

index 6472de63b073da339b7a8a817f1de1cd56541722..34a930e28289e7418eaa4d4292376bcb3e7f28a8 100644 (file)
--- a/gettext-tools/libgettextpo/Makefile.am
+++ b/gettext-tools/libgettextpo/Makefile.am
@@ -1,5 +1,5 @@
  ## Makefile for the gettext-tools/libgettextpo subdirectory of GNU gettext
-## Copyright (C) 1995-2024 Free Software Foundation, Inc.
+## Copyright (C) 1995-2025 Free Software Foundation, Inc.
  ##
  ## This program is free software: you can redistribute it and/or modify
  ## it under the terms of the GNU General Public License as published by
@@ -79,6 +79,7 @@ libgettextpo_la_AUXSOURCES = \
    ../src/format-lisp.c \
    ../src/format-elisp.c \
    ../src/format-librep.c \
+  ../src/format-rust.c \
    ../src/format-ruby.c \
    ../src/format-sh.c \
    ../src/format-awk.c \
diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES

index f877e5347cd0b6114c65dcb82d01304befca6af3..554667952837d1e92d2f8d71433b2ee98b285f51 100644 (file)
--- a/gettext-tools/src/FILES
+++ b/gettext-tools/src/FILES
@@ -236,6 +236,7 @@ format-scheme.c        Format string handling for Scheme.
  format-lisp.c          Format string handling for Common Lisp.
  format-elisp.c         Format string handling for Emacs Lisp.
  format-librep.c        Format string handling for librep.
+format-rust.c          Format string handling for Rust.
  format-ruby.c          Format string handling for Ruby.
  format-sh.c            Format string handling for Shell.
  format-awk.c           Format string handling for awk.
@@ -384,6 +385,9 @@ msgl-check.c
  | x-librep.h
  | x-librep.c
  |               String extractor for librep.
+| x-rust.h
+| x-rust.c
+|               String extractor for Rust.
  | x-ruby.h
  | x-ruby.c
  |               String extractor for Ruby.
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am

index cde2ad29f69aa03cd7a1425efb39166d82e4f057..c5aa1dacdcf623cdafb2a893b31bd0caa8be7aec 100644 (file)
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -1,5 +1,5 @@
  ## Makefile for the gettext-tools/src subdirectory of GNU gettext
-## Copyright (C) 1995-2024 Free Software Foundation, Inc.
+## Copyright (C) 1995-2025 Free Software Foundation, Inc.
  ##
  ## This program is free software: you can redistribute it and/or modify
  ## it under the terms of the GNU General Public License as published by
@@ -41,6 +41,8 @@ else
  noinst_LTLIBRARIES = libgettextsrc.la
  endif
  
+noinst_LIBRARIES = libxgettextts1.a libxgettextts2.a
+
  noinst_HEADERS = \
    pos.h message.h po-error.h xerror-handler.h po-xerror.h \
    read-po-internal.h po-charset.h read-po-lex.h \
@@ -78,6 +80,7 @@ noinst_HEADERS = \
    x-lisp.h \
    x-elisp.h \
    x-librep.h \
+  x-rust.h \
    x-ruby.h \
    x-sh.h \
    x-awk.h \
@@ -109,6 +112,7 @@ schemadir = $(pkgdatadir)/schema
  # The option -I$(top_srcdir) is needed so that woe32dll/export.h is found.
  AM_CPPFLAGS = \
    -I$(top_srcdir) \
+  -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include \
    -I../libgrep -I$(top_srcdir)/libgrep \
    -I../gnulib-lib -I$(top_srcdir)/gnulib-lib \
    -I../../gettext-runtime/intl -I$(top_srcdir)/../gettext-runtime/intl
@@ -189,6 +193,7 @@ FORMAT_SOURCE += \
    format-lisp.c \
    format-elisp.c \
    format-librep.c \
+  format-rust.c \
    format-ruby.c \
    format-sh.c \
    format-awk.c \
@@ -232,6 +237,19 @@ libgettextsrc_la_SOURCES = \
    locating-rules.c its.c \
    search-path.c
  
+# xgettext has some tree-sitter based backends.
+LIBXGETTEXTTS = libxgettextts2.a libxgettextts1.a
+libxgettextts1_a_SOURCES = \
+  ../tree-sitter-$(TREE_SITTER_VERSION)/lib/src/lib.c
+libxgettextts1_a_CPPFLAGS = \
+  -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include \
+  -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/src
+libxgettextts2_a_SOURCES = \
+  ../tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/rust-parser.c \
+  ../tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/rust-scanner.c
+libxgettextts2_a_CPPFLAGS = \
+  -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include
+
  # msggrep needs pattern matching.
  LIBGREP = ../libgrep/libgrep.a
  
@@ -277,6 +295,7 @@ xgettext_SOURCES += \
    x-lisp.c \
    x-elisp.c \
    x-librep.c \
+  x-rust.c \
    x-ruby.c \
    x-sh.c ../../gettext-runtime/src/escapes.h \
    x-awk.c \
@@ -428,7 +447,7 @@ msgcmp_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ @MSGMERGE_LIBM@ $(WOE32_LDADD
  msgfmt_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ $(WOE32_LDADD)
  msgmerge_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ @MSGMERGE_LIBM@ $(WOE32_LDADD) $(OPENMP_CFLAGS)
  msgunfmt_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ $(WOE32_LDADD)
-xgettext_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ @LTLIBICONV@ $(WOE32_LDADD)
+xgettext_LDADD = $(LIBXGETTEXTTS) libgettextsrc.la @INTL_MACOSX_LIBS@ @LTLIBICONV@ $(WOE32_LDADD)
  msgattrib_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ $(WOE32_LDADD)
  msgcat_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ $(WOE32_LDADD)
  msgcomm_LDADD = libgettextsrc.la @INTL_MACOSX_LIBS@ $(WOE32_LDADD)
@@ -446,7 +465,7 @@ msgcmp_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LD
  msgfmt_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD)
  msgmerge_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD)
  msgunfmt_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD)
-xgettext_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD)
+xgettext_DEPENDENCIES = $(LIBXGETTEXTTS) libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD)
  msgattrib_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD)
  msgcat_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD)
  msgcomm_DEPENDENCIES = libgettextsrc.la ../gnulib-lib/libgettextlib.la $(WOE32_LDADD)
diff --git a/gettext-tools/src/format-rust.c b/gettext-tools/src/format-rust.c

new file mode 100644 (file)

index 0000000..d478e83
--- /dev/null
+++ b/gettext-tools/src/format-rust.c
@@ -0,0 +1,477 @@
+/* Rust format strings.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2025.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "format.h"
+#include "c-ctype.h"
+#include "unictype.h"
+#include "unistr.h"
+#include "xalloc.h"
+#include "xvasprintf.h"
+#include "format-invalid.h"
+#include "gettext.h"
+
+#define _(str) gettext (str)
+
+/* Rust format strings are those supported by the 'formatx' library
+     <https://crates.io/crates/formatx>
+     <https://lib.rs/crates/formatx>
+     <https://github.com/clitic/formatx>
+   namely those supported by the 'format!' built-in
+     <https://doc.rust-lang.org/std/fmt/>
+   with the restrictions listed in
+     <https://crates.io/crates/formatx>, section "Limitations".
+
+   A format string directive here consists of
+     - an opening brace '{',
+     - an optional non-empty sequence of digits,
+     - optionally, a ':' and a format specifier, where a format specifier is
+       of the form [[fill]align][sign][#][0][minimumwidth][.precision][type]
+       where
+         - the fill character is any character,
+         - the align flag is one of '<', '>', '^',
+         - the sign is one of '+', '-',
+         - the # flag is '#',
+         - the 0 flag is '0',
+         - minimumwidth is a non-empty sequence of digits,
+         - precision is a non-empty sequence of digits,
+         - type is one of
+           - '?', for any type of argument,
+     - optionally, a sequence of Unicode (UTF-8) characters with property
+       White_Space,
+     - a closing brace '}'.
+   Brace characters '{' and '}' can be escaped by doubling them: '{{' and '}}'.
+
+   Numbered ('{m}') and unnumbered ('{}') argument specifications cannot be used
+   in the same string; that's unsupported (although it does not always lead to
+   an error at runtime, see <https://github.com/clitic/formatx/issues/7>).  */
+
+struct numbered_arg
+{
+  /* The number of the argument, 0-based.  */
+  unsigned int number;
+};
+
+struct spec
+{
+  unsigned int directives;
+  unsigned int numbered_arg_count;
+  struct numbered_arg *numbered;
+};
+
+
+/* All the parse_* functions (except parse_upto) follow the same
+   calling convention.  FORMATP shall point to the beginning of a token.
+   If parsing succeeds, FORMATP will point to the next character after
+   the token, and true is returned.  Otherwise, FORMATP will be
+   unchanged and false is returned.  */
+
+static int
+numbered_arg_compare (const void *p1, const void *p2)
+{
+  unsigned int n1 = ((const struct numbered_arg *) p1)->number;
+  unsigned int n2 = ((const struct numbered_arg *) p2)->number;
+
+  return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
+}
+
+static void *
+format_parse (const char *format, bool translated, char *fdi,
+              char **invalid_reason)
+{
+  struct spec spec;
+  unsigned int numbered_allocated;
+  bool seen_numbered_args;
+  unsigned int unnumbered_arg_count;
+  struct spec *result;
+
+  spec.directives = 0;
+  spec.numbered_arg_count = 0;
+  spec.numbered = NULL;
+  numbered_allocated = 0;
+  seen_numbered_args = false;
+  unnumbered_arg_count = 0;
+
+  for (; *format != '\0';)
+    {
+      /* Invariant: !seen_numbered_args || unnumbered_arg_count == 0.  */
+      if (*format == '{')
+        {
+          char c;
+
+          c = *++format;
+          if (c == '{')
+            {
+              /* An escaped '{'.  */
+            }
+          else
+            {
+              const char *const format_start = format;
+              unsigned int arg_id;
+
+              if (c_isdigit (*format))
+                {
+                  /* Numbered and unnumbered specifications are exclusive.  */
+                  if (unnumbered_arg_count > 0)
+                    {
+                      *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+                      FDI_SET (format, FMTDIR_ERROR);
+                      goto bad_format;
+                    }
+                  seen_numbered_args = true;
+
+                  arg_id = 0;
+                  do
+                    {
+                      if (arg_id >= UINT_MAX / 10)
+                        {
+                          *invalid_reason =
+                            xasprintf (_("In the directive number %u, the argument number is too large."), spec.directives);
+                          FDI_SET (format, FMTDIR_ERROR);
+                          goto bad_format;
+                        }
+                      /* Here arg_id <= floor(UINT_MAX/10) - 1.  */
+                      arg_id = arg_id * 10 + (*format - '0');
+                      /* Here arg_id < floor(UINT_MAX/10)*10 <= UINT_MAX.  */
+                      format++;
+                    }
+                  while (c_isdigit (*format));
+                }
+              else
+                {
+                  /* Numbered and unnumbered specifications are exclusive.  */
+                  if (seen_numbered_args > 0)
+                    {
+                      *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+                      FDI_SET (format - 1, FMTDIR_ERROR);
+                      goto bad_format;
+                    }
+
+                  arg_id = unnumbered_arg_count;
+                  unnumbered_arg_count++;
+                }
+
+              c = *format;
+              if (c == ':')
+                {
+                  format++;
+
+                  /* Format specifiers is in the form:
+                     [[fill]align][sign][#][0][minimumwidth][.precision][type]  */
+
+                  /* Look ahead two characters to skip [[fill]align].  */
+                  int c1, c2;
+
+                  c1 = format[0];
+                  if (c1 == '\0')
+                    {
+                      *invalid_reason =
+                        xasprintf (_("The directive number %u is unterminated."),
+                                   spec.directives);
+                      FDI_SET (format - 1, FMTDIR_ERROR);
+                      goto bad_format;
+                    }
+
+                  c2 = format[1];
+
+                  if (c2 == '<' || c2 == '>' || c2 == '^')
+                    format += 2;
+                  else if (c1 == '<' || c1 == '>' || c1 == '^')
+                    format++;
+
+                  if (*format == '+' || *format == '-')
+                    format++;
+                  if (*format == '#')
+                    format++;
+                  if (*format == '0')
+                    format++;
+
+                  /* Parse the optional minimumwidth.  */
+                  while (c_isdigit (*format))
+                    format++;
+
+                  /* Parse the optional .precision.  */
+                  if (*format == '.')
+                    {
+                      format++;
+                      if (c_isdigit (*format))
+                        do
+                          format++;
+                        while (c_isdigit (*format));
+                      else
+                        format--;
+                    }
+
+                  /* Parse the optional type.  */
+                  if (*format == '?')
+                    format++;
+                }
+
+              /* Parse Unicode (UTF-8) character with property White_Space.  */
+              while (*format != '}')
+                {
+                  ucs4_t uc;
+                  int n = u8_mbtouc (&uc,
+                                     (const uint8_t *) format,
+                                     strnlen (format, 4));
+                  if (n > 0 && uc_is_property_white_space (uc))
+                    format += n;
+                  else
+                    break;
+                }
+
+              if (*format != '}')
+                {
+                  *invalid_reason =
+                    xasprintf (_("The directive number %u is unterminated."),
+                               spec.directives);
+                  FDI_SET (format - 1, FMTDIR_ERROR);
+                  goto bad_format;
+                }
+
+              spec.directives++;
+
+              if (numbered_allocated == spec.numbered_arg_count)
+                {
+                  numbered_allocated = 2 * numbered_allocated + 1;
+                  spec.numbered = (struct numbered_arg *) xrealloc (spec.numbered, numbered_allocated * sizeof (struct numbered_arg));
+                }
+              spec.numbered[spec.numbered_arg_count].number = arg_id;
+              spec.numbered_arg_count++;
+
+              FDI_SET (format, FMTDIR_END);
+            }
+        }
+      format++;
+    }
+
+  /* Sort the numbered argument array, and eliminate duplicates.  */
+  if (spec.numbered_arg_count > 1)
+    {
+      unsigned int i, j;
+
+      qsort (spec.numbered, spec.numbered_arg_count,
+             sizeof (struct numbered_arg), numbered_arg_compare);
+
+      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
+      for (i = j = 0; i < spec.numbered_arg_count; i++)
+        if (j > 0 && spec.numbered[i].number == spec.numbered[j-1].number)
+          ;
+        else
+          {
+            if (j < i)
+              spec.numbered[j].number = spec.numbered[i].number;
+            j++;
+          }
+      spec.numbered_arg_count = j;
+    }
+
+  result = XMALLOC (struct spec);
+  *result = spec;
+  return result;
+
+ bad_format:
+  if (spec.numbered != NULL)
+    free (spec.numbered);
+  return NULL;
+}
+
+static void
+format_free (void *descr)
+{
+  struct spec *spec = (struct spec *) descr;
+
+  free (spec->numbered);
+  free (spec);
+}
+
+static int
+format_get_number_of_directives (void *descr)
+{
+  struct spec *spec = (struct spec *) descr;
+
+  return spec->directives;
+}
+
+static bool
+format_check (void *msgid_descr, void *msgstr_descr, bool equality,
+              formatstring_error_logger_t error_logger, void *error_logger_data,
+              const char *pretty_msgid, const char *pretty_msgstr)
+{
+  struct spec *spec1 = (struct spec *) msgid_descr;
+  struct spec *spec2 = (struct spec *) msgstr_descr;
+  bool err = false;
+
+  if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
+    {
+      unsigned int i, j;
+      unsigned int n1 = spec1->numbered_arg_count;
+      unsigned int n2 = spec2->numbered_arg_count;
+      unsigned int missing = 0; /* only used if !equality */
+
+      /* Check that the argument numbers are the same.
+         Both arrays are sorted.  We search for the first difference.  */
+      for (i = 0, j = 0; i < n1 || j < n2; )
+        {
+          int cmp = (i >= n1 ? 1 :
+                     j >= n2 ? -1 :
+                     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
+                     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
+                     0);
+
+          if (cmp > 0)
+            {
+              if (error_logger)
+                error_logger (error_logger_data,
+                              _("a format specification for argument %u, as in '%s', doesn't exist in '%s'"),
+                              spec2->numbered[j].number, pretty_msgstr,
+                              pretty_msgid);
+              err = true;
+              break;
+            }
+          else if (cmp < 0)
+            {
+              if (equality)
+                {
+                  if (error_logger)
+                    error_logger (error_logger_data,
+                                  _("a format specification for argument %u doesn't exist in '%s'"),
+                                  spec1->numbered[i].number, pretty_msgstr);
+                  err = true;
+                  break;
+                }
+              else if (missing)
+                {
+                  if (error_logger)
+                    error_logger (error_logger_data,
+                                  _("a format specification for arguments %u and %u doesn't exist in '%s', only one argument may be ignored"),
+                                  missing, spec1->numbered[i].number,
+                                  pretty_msgstr);
+                  err = true;
+                  break;
+                }
+              else
+                {
+                  missing = spec1->numbered[i].number;
+                  i++;
+                }
+            }
+          else
+            j++, i++;
+        }
+    }
+
+  return err;
+}
+
+
+struct formatstring_parser formatstring_rust =
+{
+  format_parse,
+  format_free,
+  format_get_number_of_directives,
+  NULL,
+  format_check
+};
+
+
+#ifdef TEST
+
+/* Test program: Print the argument list specification returned by
+   format_parse for strings read from standard input.  */
+
+#include <stdio.h>
+
+static void
+format_print (void *descr)
+{
+  struct spec *spec = (struct spec *) descr;
+  unsigned int last;
+  unsigned int i;
+
+  if (spec == NULL)
+    {
+      printf ("INVALID");
+      return;
+    }
+
+  printf ("(");
+  last = 0;
+  for (i = 0; i < spec->numbered_arg_count; i++)
+    {
+      unsigned int number = spec->numbered[i].number;
+
+      if (i > 0)
+        printf (" ");
+      if (number < last)
+        abort ();
+      for (; last < number; last++)
+        printf ("_ ");
+      printf ("*");
+      last = number + 1;
+    }
+  printf (")");
+}
+
+int
+main ()
+{
+  for (;;)
+    {
+      char *line = NULL;
+      size_t line_size = 0;
+      int line_len;
+      char *invalid_reason;
+      void *descr;
+
+      line_len = getline (&line, &line_size, stdin);
+      if (line_len < 0)
+        break;
+      if (line_len > 0 && line[line_len - 1] == '\n')
+        line[--line_len] = '\0';
+
+      invalid_reason = NULL;
+      descr = format_parse (line, false, NULL, &invalid_reason);
+
+      format_print (descr);
+      printf ("\n");
+      if (descr == NULL)
+        printf ("%s\n", invalid_reason);
+
+      free (invalid_reason);
+      free (line);
+    }
+
+  return 0;
+}
+
+/*
+ * For Emacs M-x compile
+ * Local Variables:
+ * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-rust.c ../gnulib-lib/libgettextlib.la"
+ * End:
+ */
+
+#endif /* TEST */
diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c

index 047160b120fd1c56aeac3eb88ddf24780c5ca94b..ff5f56d261ec4db13173a2277474a4cbebda8799 100644 (file)
--- a/gettext-tools/src/format.c
+++ b/gettext-tools/src/format.c
@@ -1,5 +1,5 @@
  /* Format strings.
-   Copyright (C) 2001-2010, 2012-2013, 2015, 2019-2020, 2023 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
     Written by Bruno Haible <haible@clisp.cons.org>, 2001.
  
     This program is free software: you can redistribute it and/or modify
@@ -47,6 +47,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] =
    /* format_lisp */             &formatstring_lisp,
    /* format_elisp */            &formatstring_elisp,
    /* format_librep */           &formatstring_librep,
+  /* format_rust */             &formatstring_rust,
    /* format_ruby */             &formatstring_ruby,
    /* format_sh */               &formatstring_sh,
    /* format_awk */              &formatstring_awk,
diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h

index d5c196b9188eb03b057b77ad49e8038f368c9323..fe0199b8a2381de4789139ebfe16f6991790eb45 100644 (file)
--- a/gettext-tools/src/format.h
+++ b/gettext-tools/src/format.h
@@ -113,6 +113,7 @@ extern DLL_VARIABLE struct formatstring_parser formatstring_scheme;
  extern DLL_VARIABLE struct formatstring_parser formatstring_lisp;
  extern DLL_VARIABLE struct formatstring_parser formatstring_elisp;
  extern DLL_VARIABLE struct formatstring_parser formatstring_librep;
+extern DLL_VARIABLE struct formatstring_parser formatstring_rust;
  extern DLL_VARIABLE struct formatstring_parser formatstring_ruby;
  extern DLL_VARIABLE struct formatstring_parser formatstring_sh;
  extern DLL_VARIABLE struct formatstring_parser formatstring_awk;
diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c

index 990c475c133dee33774f64b8dbe034c2fe7f91bd..ea1f61c029ac43a1a9b627883d62593a961aae36 100644 (file)
--- a/gettext-tools/src/message.c
+++ b/gettext-tools/src/message.c
@@ -1,5 +1,5 @@
  /* GNU gettext - internationalization aids
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
  
     This file was written by Peter Miller <millerp@canb.auug.org.au>
  
@@ -47,6 +47,7 @@ const char *const format_language[NFORMATS] =
    /* format_lisp */             "lisp",
    /* format_elisp */            "elisp",
    /* format_librep */           "librep",
+  /* format_rust */             "rust",
    /* format_ruby */             "ruby",
    /* format_sh */               "sh",
    /* format_awk */              "awk",
@@ -82,6 +83,7 @@ const char *const format_language_pretty[NFORMATS] =
    /* format_lisp */             "Lisp",
    /* format_elisp */            "Emacs Lisp",
    /* format_librep */           "librep",
+  /* format_rust */             "Rust",
    /* format_ruby */             "Ruby",
    /* format_sh */               "Shell",
    /* format_awk */              "awk",
diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h

index d0f1c8c985517ade3ed6bd74dd795375f24f138c..c7c9699ff21b6beb391d46934b7c5994fc0da843 100644 (file)
--- a/gettext-tools/src/message.h
+++ b/gettext-tools/src/message.h
@@ -1,5 +1,5 @@
  /* GNU gettext - internationalization aids
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
  
     This file was written by Peter Miller <millerp@canb.auug.org.au>
  
@@ -56,6 +56,7 @@ enum format_type
    format_lisp,
    format_elisp,
    format_librep,
+  format_rust,
    format_ruby,
    format_sh,
    format_awk,
@@ -75,7 +76,7 @@ enum format_type
    format_gfc_internal,
    format_ycp
  };
-#define NFORMATS 31     /* Number of format_type enum values.  */
+#define NFORMATS 32     /* Number of format_type enum values.  */
  extern DLL_VARIABLE const char *const format_language[NFORMATS];
  extern DLL_VARIABLE const char *const format_language_pretty[NFORMATS];
  
diff --git a/gettext-tools/src/x-rust.c b/gettext-tools/src/x-rust.c

new file mode 100644 (file)

index 0000000..c2a8c5c
--- /dev/null
+++ b/gettext-tools/src/x-rust.c
@@ -0,0 +1,1196 @@
+/* xgettext Rust backend.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025.  */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+/* Specification.  */
+#include "x-rust.h"
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <error.h>
+#include "message.h"
+#include "string-desc.h"
+#include "string-buffer.h"
+#include "xgettext.h"
+#include "xg-pos.h"
+#include "xg-mixed-string.h"
+#include "xg-arglist-context.h"
+#include "xg-arglist-callshape.h"
+#include "xg-arglist-parser.h"
+#include "xg-message.h"
+#include "if-error.h"
+#include "xalloc.h"
+#include "string-buffer.h"
+#include "read-file.h"
+#include "unistr.h"
+#include "po-charset.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+/* Use tree-sitter.
+   Documentation: <https://tree-sitter.github.io/tree-sitter/using-parsers>  */
+#include <tree_sitter/api.h>
+extern const TSLanguage *tree_sitter_rust (void);
+
+
+/* The Rust syntax is defined in https://doc.rust-lang.org/1.6.0/reference.html.
+   String syntax:
+   https://doc.rust-lang.org/1.6.0/reference.html#character-and-string-literals
+ */
+
+#define DEBUG_RUST 0
+
+
+/* ====================== Keyword set customization.  ====================== */
+
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table function_keywords;
+static hash_table macro_keywords;
+static bool default_keywords = true;
+
+
+void
+x_rust_extract_all ()
+{
+  extract_all = true;
+}
+
+
+void
+x_rust_keyword (const char *name)
+{
+  if (name == NULL)
+    default_keywords = false;
+  else
+    {
+      const char *end;
+      struct callshape shape;
+      const char *colon;
+
+      if (function_keywords.table == NULL)
+        hash_init (&function_keywords, 100);
+      if (macro_keywords.table == NULL)
+        hash_init (&macro_keywords, 100);
+
+      split_keywordspec (name, &end, &shape);
+
+      /* The characters between name and end should form a valid Rust
+         identifier, possibly with a trailing '!'.
+         A colon means an invalid parse in split_keywordspec().  */
+      colon = strchr (name, ':');
+      if (colon == NULL || colon >= end)
+        {
+          if (end > name && end[-1] == '!')
+            insert_keyword_callshape (&macro_keywords, name, end - 1 - name,
+                                      &shape);
+          else
+            insert_keyword_callshape (&function_keywords, name, end - name,
+                                      &shape);
+        }
+    }
+}
+
+/* Finish initializing the keywords hash table.
+   Called after argument processing, before each file is processed.  */
+static void
+init_keywords ()
+{
+  if (default_keywords)
+    {
+      /* These are the functions defined by the 'gettext-rs' Rust package.
+         https://docs.rs/gettext-rs/latest/gettextrs/#functions  */
+      /* When adding new keywords here, also update the documentation in
+         xgettext.texi!  */
+      x_rust_keyword ("gettext");
+      x_rust_keyword ("dgettext:2");
+      x_rust_keyword ("dcgettext:2");
+      x_rust_keyword ("ngettext:1,2");
+      x_rust_keyword ("dngettext:2,3");
+      x_rust_keyword ("dcngettext:2,3");
+      x_rust_keyword ("pgettext:1c,2");
+      x_rust_keyword ("npgettext:1c,2,3");
+      default_keywords = false;
+    }
+}
+
+/* The flag_table_rust is split into two tables, one for functions and one for
+   macros.  */
+flag_context_list_table_ty flag_table_rust_functions;
+flag_context_list_table_ty flag_table_rust_macros;
+
+void
+init_flag_table_rust ()
+{
+  /* These are the functions defined by the 'gettext-rs' Rust package.
+     https://docs.rs/gettext-rs/latest/gettextrs/#functions  */
+  xgettext_record_flag ("gettext:1:pass-rust-format");
+  xgettext_record_flag ("dgettext:2:pass-rust-format");
+  xgettext_record_flag ("dcgettext:2:pass-rust-format");
+  xgettext_record_flag ("ngettext:1:pass-rust-format");
+  xgettext_record_flag ("ngettext:2:pass-rust-format");
+  xgettext_record_flag ("dngettext:2:pass-rust-format");
+  xgettext_record_flag ("dngettext:3:pass-rust-format");
+  xgettext_record_flag ("dcngettext:2:pass-rust-format");
+  xgettext_record_flag ("dcngettext:3:pass-rust-format");
+  xgettext_record_flag ("pgettext:2:pass-rust-format");
+  xgettext_record_flag ("npgettext:2:pass-rust-format");
+  xgettext_record_flag ("npgettext:3:pass-rust-format");
+  /* These are the functions whose argument is a format string.
+     https://github.com/clitic/formatx  */
+  xgettext_record_flag ("formatx!:1:rust-format");
+}
+
+
+/* ======================== Parsing via tree-sitter. ======================== */
+/* To understand this code, look at
+     tree-sitter-rust/src/node-types.json
+   and
+     tree-sitter-rust/src/grammar.json
+ */
+
+/* The tree-sitter's language object.  */
+static const TSLanguage *ts_language;
+
+/* ------------------------- Node types and symbols ------------------------- */
+
+static TSSymbol ts_language_symbol (const char *name, bool is_named)
+{
+  TSSymbol result =
+    ts_language_symbol_for_name (ts_language, name, strlen (name), is_named);
+  if (result == 0)
+    /* If we get here, the grammar has evolved in an incompatible way.  */
+    abort ();
+  return result;
+}
+
+static TSFieldId ts_language_field (const char *name)
+{
+  TSFieldId result =
+    ts_language_field_id_for_name (ts_language, name, strlen (name));
+  if (result == 0)
+    /* If we get here, the grammar has evolved in an incompatible way.  */
+    abort ();
+  return result;
+}
+
+/* Optimization:
+   Instead of
+     strcmp (ts_node_type (node), "string_literal") == 0
+   it is faster to do
+     ts_node_symbol (node) == ts_symbol_string_literal
+ */
+static TSSymbol ts_symbol_line_comment;
+static TSSymbol ts_symbol_block_comment;
+static TSSymbol ts_symbol_string_literal;
+static TSSymbol ts_symbol_raw_string_literal;
+static TSSymbol ts_symbol_string_content;
+static TSSymbol ts_symbol_escape_sequence;
+static TSSymbol ts_symbol_identifier;
+static TSSymbol ts_symbol_call_expression;
+static TSSymbol ts_symbol_macro_invocation;
+static TSSymbol ts_symbol_arguments;
+static TSSymbol ts_symbol_token_tree;
+static TSSymbol ts_symbol_open_paren; /* ( */
+static TSSymbol ts_symbol_close_paren; /* ) */
+static TSSymbol ts_symbol_comma; /* , */
+static TSSymbol ts_symbol_exclam; /* ! */
+static TSFieldId ts_field_function;
+static TSFieldId ts_field_arguments;
+static TSFieldId ts_field_macro;
+
+static inline size_t
+ts_node_line_number (TSNode node)
+{
+  return ts_node_start_point (node).row + 1;
+}
+
+/* -------------------------------- Comments -------------------------------- */
+
+/* These are for tracking whether comments count as immediately before
+   keyword.  */
+static int last_comment_line;
+static int last_non_comment_line;
+
+/* Saves a comment line.  */
+static void save_comment_line (string_desc_t gist)
+{
+  /* Remove leading whitespace.  */
+  while (sd_length (gist) > 0
+         && (sd_char_at (gist, 0) == ' '
+             || sd_char_at (gist, 0) == '\t'))
+    gist = sd_substring (gist, 1, sd_length (gist));
+  /* Remove trailing whitespace.  */
+  size_t len = sd_length (gist);
+  while (len > 0
+         && (sd_char_at (gist, len - 1) == ' '
+             || sd_char_at (gist, len - 1) == '\t'))
+    len--;
+  gist = sd_substring (gist, 0, len);
+  savable_comment_add (sd_c (gist));
+}
+
+/* Does the comment handling for NODE.
+   Updates savable_comment, last_comment_line, last_non_comment_line.
+   It is important that this function gets called
+     - for each node (not only the named nodes!),
+     - in depth-first traversal order.  */
+static void handle_comments (TSNode node, const char *contents)
+{
+  #if DEBUG_RUST
+  fprintf (stderr, "LCL=%d LNCL=%d node=[%s]|%s|\n", last_comment_line, last_non_comment_line, ts_node_type (node), ts_node_string (node));
+  #endif
+  if (last_comment_line < last_non_comment_line
+      && last_non_comment_line < ts_node_line_number (node))
+    /* We have skipped over a newline.  This newline terminated a line
+       with non-comment tokens, after the last comment line.  */
+    savable_comment_reset ();
+
+  if (ts_node_symbol (node) == ts_symbol_line_comment)
+    {
+      string_desc_t entire =
+        sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+                     (char *) contents + ts_node_start_byte (node));
+      /* It should start with two slashes.  */
+      if (!(sd_length (entire) >= 2
+            && sd_char_at (entire, 0) == '/'
+            && sd_char_at (entire, 1) == '/'))
+        abort ();
+      save_comment_line (sd_substring (entire, 2, sd_length (entire)));
+      last_comment_line = ts_node_end_point (node).row + 1;
+    }
+  else if (ts_node_symbol (node) == ts_symbol_block_comment)
+    {
+      string_desc_t entire =
+        sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+                     (char *) contents + ts_node_start_byte (node));
+      /* It should start and end with the C comment markers.  */
+      if (!(sd_length (entire) >= 4
+            && sd_char_at (entire, 0) == '/'
+            && sd_char_at (entire, 1) == '*'
+            && sd_char_at (entire, sd_length (entire) - 2) == '*'
+            && sd_char_at (entire, sd_length (entire) - 1) == '/'))
+        abort ();
+      string_desc_t gist = sd_substring (entire, 2, sd_length (entire) - 2);
+      /* Split into lines.
+         Remove leading and trailing whitespace from each line.  */
+      for (;;)
+        {
+          ptrdiff_t nl_index = sd_index (gist, '\n');
+          if (nl_index >= 0)
+            {
+              save_comment_line (sd_substring (gist, 0, nl_index));
+              gist = sd_substring (gist, nl_index + 1, sd_length (gist));
+            }
+          else
+            {
+              save_comment_line (gist);
+              break;
+            }
+        }
+      last_comment_line = ts_node_end_point (node).row + 1;
+    }
+  else
+    last_non_comment_line = ts_node_line_number (node);
+}
+
+/* ---------------------------- String literals ---------------------------- */
+
+/* Combines the pieces of a string_literal or raw_string_literal.
+   Returns a freshly allocated UTF-8 encoded string.  */
+static char *
+string_literal_value (TSNode node, const char *contents)
+{
+  if (ts_node_named_child_count (node) == 1)
+    {
+      TSNode subnode = ts_node_named_child (node, 0);
+      if (ts_node_symbol (subnode) == ts_symbol_string_content)
+        {
+          /* Optimize the frequent special case of a string literal
+             that is non-empty and has no escape sequences.  */
+          string_desc_t subnode_string =
+            sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+                         (char *) contents + ts_node_start_byte (subnode));
+          return sd_c (subnode_string);
+        }
+    }
+
+  /* The general case.  */
+  struct string_buffer buffer;
+  sb_init (&buffer);
+  uint32_t count = ts_node_named_child_count (node);
+  bool skip_leading_whitespace = false;
+  uint32_t i;
+  for (i = 0; i < count; i++)
+    {
+      TSNode subnode = ts_node_named_child (node, i);
+      if (ts_node_symbol (subnode) == ts_symbol_string_content)
+        {
+          string_desc_t subnode_string =
+            sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+                         (char *) contents + ts_node_start_byte (subnode));
+          if (skip_leading_whitespace)
+            {
+              /* After backslash-newline, skip ASCII whitespace.  */
+              while (sd_length (subnode_string) > 0
+                     && (sd_char_at (subnode_string, 0) == ' '
+                         || sd_char_at (subnode_string, 0) == '\t'))
+                subnode_string = sd_substring (subnode_string, 1, sd_length (subnode_string));
+            }
+          sb_append_desc (&buffer, subnode_string);
+          skip_leading_whitespace = false;
+        }
+      else if (ts_node_symbol (subnode) == ts_symbol_escape_sequence)
+        {
+          const char *escape_start = contents + ts_node_start_byte (subnode);
+          const char *escape_end = contents + ts_node_end_byte (subnode);
+          /* The escape sequence must start with a backslash.  */
+          if (!(escape_end - escape_start >= 2 && escape_start[0] == '\\'))
+            abort ();
+          skip_leading_whitespace = false;
+          /* tree-sitter's grammar.js allows more escape sequences than
+             the Rust documentation and the Rust compiler.  Give a warning
+             for those case where the Rust compiler gives an error.  */
+          bool invalid = false;
+          if (escape_end - escape_start == 2)
+            {
+              switch (escape_start[1])
+                {
+                case '\\':
+                case '"':
+                case '\'': /* Not documented, but accepted by rustc.  */
+                  sb_xappend1 (&buffer, escape_start[1]);
+                  break;
+                case 'n':
+                  sb_xappend1 (&buffer, '\n');
+                  break;
+                case 'r':
+                  sb_xappend1 (&buffer, '\r');
+                  break;
+                case 't':
+                  sb_xappend1 (&buffer, '\t');
+                  break;
+                case '\n':
+                  skip_leading_whitespace = true;
+                  break;
+                default:
+                  invalid = true;
+                  break;
+                }
+            }
+          else if (escape_start[1] == 'x')
+            {
+              unsigned int value = 0;
+              const char *p;
+              for (p = escape_start + 2; p < escape_end; p++)
+                {
+                  /* Only 2 hexadecimal digits are accepted.
+                     No overflow is possible.  */
+                  char c = *p;
+                  if (c >= '0' && c <= '9')
+                    value = (value << 4) + (c - '0');
+                  else if (c >= 'A' && c <= 'Z')
+                    value = (value << 4) + (c - 'A' + 10);
+                  else if (c >= 'a' && c <= 'z')
+                    value = (value << 4) + (c - 'a' + 10);
+                  else
+                    invalid = true;
+                }
+              if (!invalid)
+                {
+                  uint8_t buf[6];
+                  int n = u8_uctomb (buf, value, sizeof (buf));
+                  if (n > 0)
+                    sb_xappend_desc (&buffer, sd_new_addr (n, (char *) buf));
+                  else
+                    invalid = true;
+                }
+            }
+          else if (escape_start[1] == 'u'
+                   && escape_end - escape_start > 4
+                   && escape_start[2] == '{' && escape_end[-1] == '}')
+            {
+              unsigned int value = 0;
+              const char *p;
+              for (p = escape_start + 3; p < escape_end - 1; p++)
+                {
+                  char c = *p;
+                  if (c >= '0' && c <= '9')
+                    value = (value << 4) + (c - '0');
+                  else if (c >= 'A' && c <= 'Z')
+                    value = (value << 4) + (c - 'A' + 10);
+                  else if (c >= 'a' && c <= 'z')
+                    value = (value << 4) + (c - 'a' + 10);
+                  else
+                    invalid = true;
+                  if (value >= 0x110000)
+                    invalid = true;
+                  if (invalid)
+                    break;
+                }
+              if (!invalid)
+                {
+                  uint8_t buf[6];
+                  int n = u8_uctomb (buf, value, sizeof (buf));
+                  if (n > 0)
+                    sb_xappend_desc (&buffer, sd_new_addr (n, (char *) buf));
+                  else
+                    invalid = true;
+                }
+            }
+          else
+            invalid = true;
+          if (invalid)
+            {
+              size_t line_number = ts_node_line_number (subnode);
+              if_error (IF_SEVERITY_WARNING,
+                        logical_file_name, line_number, (size_t)(-1), false,
+                        _("invalid escape sequence in string"));
+            }
+        }
+      else
+        abort ();
+    }
+  return sb_xdupfree_c (&buffer);
+}
+
+/* --------------------- Parsing and string extraction --------------------- */
+
+/* Maximum supported nesting depth.  */
+#define MAX_NESTING_DEPTH 1000
+
+static int nesting_depth;
+
+/* The file is parsed into an abstract syntax tree.  Scan the syntax tree,
+   looking for a keyword in identifier position of a call_expression or
+   macro_invocation, followed by followed by a string among the arguments.
+   When we see this pattern, we have something to remember.
+
+     Normal handling: Look for
+       keyword ( ... msgid ... )
+     Plural handling: Look for
+       keyword ( ... msgid ... msgid_plural ... )
+
+   We handle macro_invocation separately from call_expression, because in
+   a macro_invocation spaces are allowed between the identifier and the '!'
+   (i.e. 'println !' is as valid as 'println!').  Looking for 'println!'
+   would make the code more complicated.
+
+   We use recursion because the arguments before msgid or between msgid
+   and msgid_plural can contain subexpressions of the same form.  */
+
+/* Forward declarations.  */
+static void extract_from_node (TSNode node,
+                               flag_region_ty *outer_region,
+                               message_list_ty *mlp,
+                               const char *contents);
+
+/* Extracts messages from the function call consisting of
+     - CALLEE_NODE: a tree node of type 'identifier',
+     - ARGS_NODE: a tree node of type 'arguments'.
+   Extracted messages are added to MLP.  */
+static void
+extract_from_function_call (TSNode callee_node,
+                            TSNode args_node,
+                            flag_region_ty *outer_region,
+                            message_list_ty *mlp,
+                            const char *contents)
+{
+  uint32_t args_count = ts_node_child_count (args_node);
+
+  string_desc_t callee_name =
+    sd_new_addr (ts_node_end_byte (callee_node) - ts_node_start_byte (callee_node),
+                 (char *) contents + ts_node_start_byte (callee_node));
+
+  /* Context iterator.  */
+  flag_context_list_iterator_ty next_context_iter =
+    flag_context_list_iterator (
+      flag_context_list_table_lookup (
+        &flag_table_rust_functions,
+        sd_data (callee_name), sd_length (callee_name)));
+
+  void *keyword_value;
+  if (hash_find_entry (&function_keywords,
+                       sd_data (callee_name), sd_length (callee_name),
+                       &keyword_value)
+      == 0)
+    {
+      /* The callee has some information associated with it.  */
+      const struct callshapes *next_shapes = keyword_value;
+
+      /* We have a function, named by a relevant identifier, with an argument
+         list.  */
+
+      struct arglist_parser *argparser =
+        arglist_parser_alloc (mlp, next_shapes);
+
+      /* Current argument number.  */
+      uint32_t arg;
+      uint32_t i;
+
+      arg = 0;
+      for (i = 0; i < args_count; i++)
+        {
+          TSNode arg_node = ts_node_child (args_node, i);
+          handle_comments (arg_node, contents);
+          if (ts_node_is_named (arg_node)
+              && !(ts_node_symbol (arg_node) == ts_symbol_line_comment
+                   || ts_node_symbol (arg_node) == ts_symbol_block_comment))
+            {
+              arg++;
+              flag_region_ty *arg_region =
+                inheriting_region (outer_region,
+                                   flag_context_list_iterator_advance (
+                                     &next_context_iter));
+
+              bool already_extracted = false;
+              if (ts_node_symbol (arg_node) == ts_symbol_string_literal
+                  || ts_node_symbol (arg_node) == ts_symbol_raw_string_literal)
+                {
+                  lex_pos_ty pos;
+                  pos.file_name = logical_file_name;
+                  pos.line_number = ts_node_line_number (arg_node);
+
+                  char *string = string_literal_value (arg_node, contents);
+
+                  if (extract_all)
+                    {
+                      remember_a_message (mlp, NULL, string, true, false,
+                                          arg_region, &pos,
+                                          NULL, savable_comment, true);
+                      already_extracted = true;
+                    }
+                  else
+                    {
+                      mixed_string_ty *mixed_string =
+                        mixed_string_alloc_utf8 (string, lc_string,
+                                                 pos.file_name, pos.line_number);
+                      arglist_parser_remember (argparser, arg, mixed_string,
+                                               arg_region,
+                                               pos.file_name, pos.line_number,
+                                               savable_comment, true);
+                    }
+                }
+
+              if (!already_extracted)
+                {
+                  if (++nesting_depth > MAX_NESTING_DEPTH)
+                    if_error (IF_SEVERITY_FATAL_ERROR,
+                              logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                              _("too many open parentheses, brackets, or braces"));
+                  extract_from_node (arg_node,
+                                     arg_region,
+                                     mlp,
+                                     contents);
+                  nesting_depth--;
+                }
+
+              unref_region (arg_region);
+            }
+        }
+      arglist_parser_done (argparser, arg);
+      return;
+    }
+
+  /* Recurse.  */
+
+  /* Current argument number.  */
+  uint32_t arg;
+  uint32_t i;
+
+  arg = 0;
+  for (i = 0; i < args_count; i++)
+    {
+      TSNode arg_node = ts_node_child (args_node, i);
+      handle_comments (arg_node, contents);
+      if (ts_node_is_named (arg_node)
+          && !(ts_node_symbol (arg_node) == ts_symbol_line_comment
+               || ts_node_symbol (arg_node) == ts_symbol_block_comment))
+        {
+          arg++;
+          flag_region_ty *arg_region =
+            inheriting_region (outer_region,
+                               flag_context_list_iterator_advance (
+                                 &next_context_iter));
+
+          if (++nesting_depth > MAX_NESTING_DEPTH)
+            if_error (IF_SEVERITY_FATAL_ERROR,
+                      logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                      _("too many open parentheses, brackets, or braces"));
+          extract_from_node (arg_node,
+                             arg_region,
+                             mlp,
+                             contents);
+          nesting_depth--;
+
+          unref_region (arg_region);
+        }
+    }
+}
+
+/* Extracts messages from a function call like syntax in a macro invocation,
+   consisting of
+     - CALLEE_NODE: a tree node of type 'identifier', or NULL for a mere
+       parenthesized expression,
+     - ARGS_NODE: a tree node of type 'token_tree'.
+   Extracted messages are added to MLP.  */
+static void
+extract_from_function_call_like (TSNode *callee_node, bool callee_is_macro,
+                                 TSNode args_node,
+                                 flag_region_ty *outer_region,
+                                 message_list_ty *mlp,
+                                 const char *contents)
+{
+  /* We have a macro, named by a relevant identifier, with an argument list.
+     The args_node contains the argument tokens (some of them of type
+     token_tree).  They don't contain 'call_expression' and such.  Instead,
+     we need to recognize function call expressions ourselves.  */
+  uint32_t args_count = ts_node_child_count (args_node);
+
+  /* Context iterator.  */
+  flag_context_list_iterator_ty next_context_iter;
+  void *keyword_value;
+
+  if (callee_node != NULL)
+    {
+      string_desc_t callee_name =
+        sd_new_addr (ts_node_end_byte (*callee_node) - ts_node_start_byte (*callee_node),
+                     (char *) contents + ts_node_start_byte (*callee_node));
+
+      next_context_iter =
+        (args_count >= 2
+         && ts_node_symbol (ts_node_child (args_node, 0)) == ts_symbol_open_paren
+         ? flag_context_list_iterator (
+             flag_context_list_table_lookup (
+               callee_is_macro ? &flag_table_rust_macros : &flag_table_rust_functions,
+               sd_data (callee_name), sd_length (callee_name)))
+         : null_context_list_iterator);
+      if (hash_find_entry (callee_is_macro ? &macro_keywords : &function_keywords,
+                           sd_data (callee_name), sd_length (callee_name),
+                           &keyword_value)
+          == 0)
+        {
+          if (keyword_value == NULL)
+            abort ();
+        }
+      else
+        keyword_value = NULL;
+    }
+  else
+    {
+      next_context_iter = passthrough_context_list_iterator;
+      keyword_value = NULL;
+    }
+
+  if (keyword_value != NULL)
+    {
+      /* The callee has some information associated with it.  */
+      const struct callshapes *next_shapes = keyword_value;
+
+      #if DEBUG_RUST
+      {
+        fprintf (stderr, "children:\n");
+        uint32_t i;
+        for (i = 0; i < args_count; i++)
+          fprintf (stderr, "%u -> [%s]|%s|\n", i, ts_node_type (ts_node_child (args_node, i)), ts_node_string (ts_node_child (args_node, i)));
+      }
+      #endif
+
+      /* We are only interested in argument lists of the form (<TOKENS>),
+         not [<TOKENS>] or {<TOKENS>}.  */
+      if (args_count >= 2
+          && ts_node_symbol (ts_node_child (args_node, 0)) == ts_symbol_open_paren
+          && ts_node_symbol (ts_node_child (args_node, args_count - 1)) == ts_symbol_close_paren)
+        {
+          struct arglist_parser *argparser =
+            arglist_parser_alloc (mlp, next_shapes);
+          /* Current argument number.  */
+          uint32_t arg;
+          flag_region_ty *arg_region;
+          uint32_t i;
+          uint32_t prev2_token_in_same_arg;
+          uint32_t prev1_token_in_same_arg;
+
+          arg = 0;
+          for (i = 0; i < args_count; i++)
+            {
+              TSNode arg_node = ts_node_child (args_node, i);
+              handle_comments (arg_node, contents);
+              if (i == 0 || ts_node_symbol (arg_node) == ts_symbol_comma)
+                {
+                  /* The next argument starts here.  */
+                  arg++;
+                  if (i > 0)
+                    unref_region (arg_region);
+                  arg_region =
+                    inheriting_region (outer_region,
+                                       flag_context_list_iterator_advance (
+                                         &next_context_iter));
+                  prev2_token_in_same_arg = 0;
+                  prev1_token_in_same_arg = 0;
+                }
+              else
+                {
+                  bool already_extracted = false;
+                  if (ts_node_symbol (arg_node) == ts_symbol_string_literal
+                      || ts_node_symbol (arg_node) == ts_symbol_raw_string_literal)
+                    {
+                      lex_pos_ty pos;
+                      pos.file_name = logical_file_name;
+                      pos.line_number = ts_node_line_number (arg_node);
+
+                      char *string = string_literal_value (arg_node, contents);
+
+                      if (extract_all)
+                        {
+                          remember_a_message (mlp, NULL, string, true, false,
+                                              arg_region, &pos,
+                                              NULL, savable_comment, true);
+                          already_extracted = true;
+                        }
+                      else
+                        {
+                          mixed_string_ty *mixed_string =
+                            mixed_string_alloc_utf8 (string, lc_string,
+                                                     pos.file_name, pos.line_number);
+                          arglist_parser_remember (argparser, arg, mixed_string,
+                                                   arg_region,
+                                                   pos.file_name, pos.line_number,
+                                                   savable_comment, true);
+                        }
+                    }
+
+                  if (++nesting_depth > MAX_NESTING_DEPTH)
+                    if_error (IF_SEVERITY_FATAL_ERROR,
+                              logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                              _("too many open parentheses, brackets, or braces"));
+                  if (ts_node_symbol (arg_node) == ts_symbol_token_tree)
+                    {
+                      if (prev1_token_in_same_arg > 0
+                          && ts_node_symbol (ts_node_child (args_node, prev1_token_in_same_arg)) == ts_symbol_identifier)
+                        {
+                          /* A token sequence that looks like a function call.  */
+                          TSNode identifier_node = ts_node_child (args_node, prev1_token_in_same_arg);
+                          extract_from_function_call_like (
+                                             &identifier_node, false,
+                                             arg_node,
+                                             arg_region,
+                                             mlp,
+                                             contents);
+                        }
+                      else if (prev2_token_in_same_arg > 0
+                               && ts_node_symbol (ts_node_child (args_node, prev2_token_in_same_arg)) == ts_symbol_identifier
+                               && ts_node_symbol (ts_node_child (args_node, prev1_token_in_same_arg)) == ts_symbol_exclam)
+                        {
+                          /* A token sequence that looks like a macro invocation.  */
+                          TSNode identifier_node = ts_node_child (args_node, prev2_token_in_same_arg);
+                          extract_from_function_call_like (
+                                             &identifier_node, true,
+                                             arg_node,
+                                             arg_region,
+                                             mlp,
+                                             contents);
+                        }
+                      else
+                        /* A token sequence that looks like a parenthesized expression.  */
+                        extract_from_function_call_like (
+                                           NULL, false,
+                                           arg_node,
+                                           arg_region,
+                                           mlp,
+                                           contents);
+                    }
+                  else
+                    {
+                      if (!already_extracted)
+                        extract_from_node (arg_node,
+                                           arg_region,
+                                           mlp,
+                                           contents);
+                    }
+                  nesting_depth--;
+
+                  if (!(ts_node_symbol (arg_node) == ts_symbol_line_comment
+                        || ts_node_symbol (arg_node) == ts_symbol_block_comment))
+                    {
+                      prev2_token_in_same_arg = prev1_token_in_same_arg;
+                      prev1_token_in_same_arg = i;
+                    }
+                }
+            }
+          if (arg > 0)
+            unref_region (arg_region);
+          arglist_parser_done (argparser, arg);
+          return;
+        }
+    }
+
+  /* Recurse.  */
+
+  /* Current argument number.  */
+  uint32_t arg;
+  flag_region_ty *arg_region;
+  uint32_t i;
+  uint32_t prev2_token_in_same_arg;
+  uint32_t prev1_token_in_same_arg;
+
+  arg = 0;
+  for (i = 0; i < args_count; i++)
+    {
+      TSNode arg_node = ts_node_child (args_node, i);
+      handle_comments (arg_node, contents);
+      if (i == 0 || ts_node_symbol (arg_node) == ts_symbol_comma)
+        {
+          /* The next argument starts here.  */
+          arg++;
+          if (i > 0)
+            unref_region (arg_region);
+          arg_region =
+            inheriting_region (outer_region,
+                               flag_context_list_iterator_advance (
+                                 &next_context_iter));
+          prev2_token_in_same_arg = 0;
+          prev1_token_in_same_arg = 0;
+        }
+      else
+        {
+          if (++nesting_depth > MAX_NESTING_DEPTH)
+            if_error (IF_SEVERITY_FATAL_ERROR,
+                      logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                      _("too many open parentheses, brackets, or braces"));
+          if (ts_node_symbol (arg_node) == ts_symbol_token_tree)
+            {
+              if (prev1_token_in_same_arg > 0
+                  && ts_node_symbol (ts_node_child (args_node, prev1_token_in_same_arg)) == ts_symbol_identifier)
+                {
+                  /* A token sequence that looks like a function call.  */
+                  TSNode identifier_node = ts_node_child (args_node, prev1_token_in_same_arg);
+                  extract_from_function_call_like (
+                                     &identifier_node, false,
+                                     arg_node,
+                                     arg_region,
+                                     mlp,
+                                     contents);
+                }
+              else if (prev2_token_in_same_arg > 0
+                       && ts_node_symbol (ts_node_child (args_node, prev2_token_in_same_arg)) == ts_symbol_identifier
+                       && ts_node_symbol (ts_node_child (args_node, prev1_token_in_same_arg)) == ts_symbol_exclam)
+                {
+                  /* A token sequence that looks like a macro invocation.  */
+                  TSNode identifier_node = ts_node_child (args_node, prev2_token_in_same_arg);
+                  extract_from_function_call_like (
+                                     &identifier_node, true,
+                                     arg_node,
+                                     arg_region,
+                                     mlp,
+                                     contents);
+                }
+              else
+                /* A token sequence that looks like a parenthesized expression.  */
+                extract_from_function_call_like (
+                                   NULL, false,
+                                   arg_node,
+                                   arg_region,
+                                   mlp,
+                                   contents);
+            }
+          else
+            extract_from_node (arg_node,
+                               arg_region,
+                               mlp,
+                               contents);
+          nesting_depth--;
+
+          if (!(ts_node_symbol (arg_node) == ts_symbol_line_comment
+                || ts_node_symbol (arg_node) == ts_symbol_block_comment))
+            {
+              prev2_token_in_same_arg = prev1_token_in_same_arg;
+              prev1_token_in_same_arg = i;
+            }
+        }
+    }
+  if (arg > 0)
+    unref_region (arg_region);
+}
+
+/* Extracts messages in the syntax tree NODE.
+   Extracted messages are added to MLP.  */
+static void
+extract_from_node (TSNode node,
+                   flag_region_ty *outer_region,
+                   message_list_ty *mlp,
+                   const char *contents)
+{
+  if (extract_all
+      && (ts_node_symbol (node) == ts_symbol_string_literal
+          || ts_node_symbol (node) == ts_symbol_raw_string_literal))
+    {
+      lex_pos_ty pos;
+      pos.file_name = logical_file_name;
+      pos.line_number = ts_node_line_number (node);
+
+      char *string = string_literal_value (node, contents);
+
+      remember_a_message (mlp, NULL, string, true, false,
+                          outer_region, &pos,
+                          NULL, savable_comment, true);
+    }
+
+  if (ts_node_symbol (node) == ts_symbol_call_expression
+      && ts_node_named_child_count (node) >= 2)
+    {
+      TSNode callee_node = ts_node_named_child (node, 0);
+      /* This is the field called 'function'.  */
+      if (! ts_node_eq (ts_node_child_by_field_id (node, ts_field_function),
+                        callee_node))
+        abort ();
+      if (ts_node_symbol (callee_node) == ts_symbol_identifier)
+        {
+          TSNode args_node = ts_node_child_by_field_id (node, ts_field_arguments);
+          /* This is the field called 'arguments'.  */
+          if (ts_node_symbol (args_node) == ts_symbol_arguments)
+            {
+              /* Handle the potential comments between 'function' and 'arguments'.  */
+              {
+                uint32_t count = ts_node_child_count (node);
+                uint32_t i;
+                for (i = 0; i < count; i++)
+                  {
+                    TSNode subnode = ts_node_child (node, i);
+                    if (ts_node_eq (subnode, args_node))
+                      break;
+                    handle_comments (subnode, contents);
+                  }
+              }
+              extract_from_function_call (callee_node, args_node,
+                                          outer_region,
+                                          mlp,
+                                          contents);
+              return;
+            }
+        }
+    }
+
+  if (ts_node_symbol (node) == ts_symbol_macro_invocation
+      && ts_node_named_child_count (node) >= 2)
+    {
+      TSNode callee_node = ts_node_named_child (node, 0);
+      /* This is the field called 'macro'.  */
+      if (! ts_node_eq (ts_node_child_by_field_id (node, ts_field_macro),
+                        callee_node))
+        abort ();
+      if (ts_node_symbol (callee_node) == ts_symbol_identifier)
+        {
+          /* We have to search for the args_node.
+             It is not always = ts_node_named_child (node, 1),
+             namely when there are comments before it.  */
+          uint32_t count = ts_node_child_count (node);
+          uint32_t args_index;
+          for (args_index = 0; args_index < count; args_index++)
+            {
+              TSNode args_node = ts_node_child (node, args_index);
+              if (ts_node_symbol (args_node) == ts_symbol_token_tree)
+                {
+                  /* Handle the potential comments between 'macro' and the args_node.  */
+                  {
+                    uint32_t i;
+                    for (i = 0; i < count; i++)
+                      {
+                        TSNode subnode = ts_node_child (node, i);
+                        if (ts_node_eq (subnode, args_node))
+                          break;
+                        handle_comments (subnode, contents);
+                      }
+                  }
+                  extract_from_function_call_like (&callee_node, true,
+                                                   args_node,
+                                                   outer_region,
+                                                   mlp,
+                                                   contents);
+                  return;
+                }
+            }
+        }
+    }
+
+  #if DEBUG_RUST
+  if (ts_node_symbol (node) == ts_symbol_call_expression)
+    {
+      TSNode subnode = ts_node_child_by_field_id (node, ts_field_function);
+      fprintf (stderr, "-> %s\n", ts_node_string (subnode));
+      if (ts_node_symbol (subnode) == ts_symbol_identifier)
+        {
+          string_desc_t subnode_string =
+            sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+                         (char *) contents + ts_node_start_byte (subnode));
+          if (sd_equals (subnode_string, sd_from_c ("gettext")))
+            {
+              TSNode argsnode = ts_node_child_by_field_id (node, ts_field_arguments);
+              fprintf (stderr, "gettext arguments: %s\n", ts_node_string (argsnode));
+              fprintf (stderr, "gettext children:\n");
+              uint32_t count = ts_node_named_child_count (node);
+              uint32_t i;
+              for (i = 0; i < count; i++)
+                fprintf (stderr, "%u -> %s\n", i, ts_node_string (ts_node_named_child (node, i)));
+            }
+        }
+    }
+  if (ts_node_symbol (node) == ts_symbol_macro_invocation)
+    {
+      TSNode subnode = ts_node_child_by_field_id (node, ts_field_macro);
+      if (ts_node_symbol (subnode) == ts_symbol_identifier)
+        {
+          string_desc_t subnode_string =
+            sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+                         (char *) contents + ts_node_start_byte (subnode));
+          fprintf (stderr, "identifier=%s\n", sd_c (subnode_string));
+          if (sd_equals (subnode_string, sd_from_c ("println")))
+            {
+              fprintf (stderr, "children:\n");
+              uint32_t count = ts_node_child_count (node);
+              uint32_t i;
+              for (i = 0; i < count; i++)
+                fprintf (stderr, "%u -> [%s]|%s|\n", i, ts_node_type (ts_node_child (node, i)), ts_node_string (ts_node_child (node, i)));
+            }
+        }
+    }
+  #endif
+
+  /* Recurse.  */
+  if (!(ts_node_symbol (node) == ts_symbol_line_comment
+        || ts_node_symbol (node) == ts_symbol_block_comment))
+    {
+      uint32_t count = ts_node_child_count (node);
+      uint32_t i;
+      for (i = 0; i < count; i++)
+        {
+          TSNode subnode = ts_node_child (node, i);
+          handle_comments (subnode, contents);
+          if (++nesting_depth > MAX_NESTING_DEPTH)
+            if_error (IF_SEVERITY_FATAL_ERROR,
+                      logical_file_name, ts_node_line_number (subnode), (size_t)(-1), false,
+                      _("too many open parentheses, brackets, or braces"));
+          extract_from_node (subnode,
+                             outer_region,
+                             mlp,
+                             contents);
+          nesting_depth--;
+       }
+    }
+}
+
+void
+extract_rust (FILE *f,
+              const char *real_filename, const char *logical_filename,
+              flag_context_list_table_ty *flag_table,
+              msgdomain_list_ty *mdlp)
+{
+  message_list_ty *mlp = mdlp->item[0]->messages;
+
+  logical_file_name = xstrdup (logical_filename);
+
+  last_comment_line = -1;
+  last_non_comment_line = -1;
+
+  nesting_depth = 0;
+
+  init_keywords ();
+
+  if (ts_language == NULL)
+    {
+      ts_language = tree_sitter_rust ();
+      ts_symbol_line_comment       = ts_language_symbol ("line_comment", true);
+      ts_symbol_block_comment      = ts_language_symbol ("block_comment", true);
+      ts_symbol_string_literal     = ts_language_symbol ("string_literal", true);
+      ts_symbol_raw_string_literal = ts_language_symbol ("raw_string_literal", true);
+      ts_symbol_string_content     = ts_language_symbol ("string_content", true);
+      ts_symbol_escape_sequence    = ts_language_symbol ("escape_sequence", true);
+      ts_symbol_identifier         = ts_language_symbol ("identifier", true);
+      ts_symbol_call_expression    = ts_language_symbol ("call_expression", true);
+      ts_symbol_macro_invocation   = ts_language_symbol ("macro_invocation", true);
+      ts_symbol_arguments          = ts_language_symbol ("arguments", true);
+      ts_symbol_token_tree         = ts_language_symbol ("token_tree", true);
+      ts_symbol_open_paren         = ts_language_symbol ("(", false);
+      ts_symbol_close_paren        = ts_language_symbol (")", false);
+      ts_symbol_comma              = ts_language_symbol (",", false);
+      ts_symbol_exclam             = ts_language_symbol ("!", false);
+      ts_field_function  = ts_language_field ("function");
+      ts_field_arguments = ts_language_field ("arguments");
+      ts_field_macro     = ts_language_field ("macro");
+    }
+
+  /* Read the file into memory.  */
+  char *contents;
+  size_t contents_length;
+  contents = read_file (real_filename, 0, &contents_length);
+  if (contents == NULL)
+    error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+           real_filename);
+
+  /* tree-sitter works only on files whose size fits in an uint32_t.  */
+  if (contents_length > 0xFFFFFFFFUL)
+    error (EXIT_FAILURE, 0, _("file \"%s\" is unsupported because too large"),
+           real_filename);
+
+  /* Rust source files are UTF-8 encoded.
+     <https://doc.rust-lang.org/1.6.0/reference.html#input-format>  */
+  if (u8_check ((uint8_t *) contents, contents_length) != NULL)
+    error (EXIT_FAILURE, 0,
+           _("file \"%s\" is invalid because not UTF-8 encoded"),
+           real_filename);
+  xgettext_current_source_encoding = po_charset_utf8;
+
+  /* Create a parser.  */
+  TSParser *parser = ts_parser_new ();
+
+  /* Set the parser's language.  */
+  ts_parser_set_language (parser, ts_language);
+
+  /* Parse the file, producing a syntax tree.  */
+  TSTree *tree = ts_parser_parse_string (parser, NULL, contents, contents_length);
+
+  #if DEBUG_RUST
+  /* For debugging: Print the tree.  */
+  {
+    char *tree_as_string = ts_node_string (ts_tree_root_node (tree));
+    fprintf (stderr, "Syntax tree: %s\n", tree_as_string);
+    free (tree_as_string);
+  }
+  #endif
+
+  extract_from_node (ts_tree_root_node (tree),
+                     null_context_region (),
+                     mlp,
+                     contents);
+
+  ts_tree_delete (tree);
+  ts_parser_delete (parser);
+  free (contents);
+
+  logical_file_name = NULL;
+}
diff --git a/gettext-tools/src/x-rust.h b/gettext-tools/src/x-rust.h

new file mode 100644 (file)

index 0000000..10db553
--- /dev/null
+++ b/gettext-tools/src/x-rust.h
@@ -0,0 +1,57 @@
+/* xgettext Rust backend.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025.  */
+
+
+#include <stdio.h>
+
+#include "message.h"
+#include "xg-arglist-context.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define EXTENSIONS_RUST \
+  { "rs",     "Rust"  },                                                \
+
+#define SCANNERS_RUST \
+  { "Rust",             extract_rust, NULL,                             \
+                        NULL, &formatstring_rust, NULL },               \
+
+/* Scan a Rust file and add its translatable strings to mdlp.  */
+extern void extract_rust (FILE *fp, const char *real_filename,
+                          const char *logical_filename,
+                          flag_context_list_table_ty *flag_table,
+                          msgdomain_list_ty *mdlp);
+
+extern void x_rust_keyword (const char *keyword);
+extern void x_rust_extract_all (void);
+
+/* The flag_table_rust is split into two tables, one for functions and one for
+   macros.  */
+extern flag_context_list_table_ty flag_table_rust_functions;
+extern flag_context_list_table_ty flag_table_rust_macros;
+
+extern void init_flag_table_rust (void);
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c

index fa8acf3053c6a18fbd7db67b9a296eeb338b9384..9ab408489e2a0ebe62543d11f343b1acfa2b0c34 100644 (file)
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -1,5 +1,5 @@
  /* Extracts strings from C source file to Uniforum style .po file.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
     Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
  
     This program is free software: you can redistribute it and/or modify
@@ -109,6 +109,7 @@
  #include "x-lisp.h"
  #include "x-elisp.h"
  #include "x-librep.h"
+#include "x-rust.h"
  #include "x-ruby.h"
  #include "x-sh.h"
  #include "x-awk.h"
@@ -194,6 +195,10 @@ static flag_context_list_table_ty flag_table_scheme;
  static flag_context_list_table_ty flag_table_lisp;
  static flag_context_list_table_ty flag_table_elisp;
  static flag_context_list_table_ty flag_table_librep;
+#if 0 /* declared in x-rust.h */
+extern flag_context_list_table_ty flag_table_rust_functions;
+extern flag_context_list_table_ty flag_table_rust_macros;
+#endif
  static flag_context_list_table_ty flag_table_ruby;
  static flag_context_list_table_ty flag_table_sh;
  static flag_context_list_table_ty flag_table_awk;
@@ -380,6 +385,7 @@ main (int argc, char *argv[])
    init_flag_table_lisp ();
    init_flag_table_elisp ();
    init_flag_table_librep ();
+  init_flag_table_rust ();
    init_flag_table_ruby ();
    init_flag_table_sh ();
    init_flag_table_awk ();
@@ -413,6 +419,7 @@ main (int argc, char *argv[])
          x_tcl_extract_all ();
          x_perl_extract_all ();
          x_php_extract_all ();
+        x_rust_extract_all ();
          x_ruby_extract_all ();
          x_lua_extract_all ();
          x_javascript_extract_all ();
@@ -493,6 +500,7 @@ main (int argc, char *argv[])
          x_tcl_keyword (optarg);
          x_perl_keyword (optarg);
          x_php_keyword (optarg);
+        x_rust_keyword (optarg);
          x_ruby_keyword (optarg);
          x_lua_keyword (optarg);
          x_javascript_keyword (optarg);
@@ -1108,9 +1116,9 @@ Choice of input file language:\n"));
    -L, --language=NAME         recognise the specified language\n\
                                  (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\
                                  EmacsLisp, librep, Scheme, Guile, Smalltalk,\n\
-                                Java, JavaProperties, C#, awk, YCP, Tcl, Perl,\n\
-                                PHP, Ruby, GCC-source, NXStringTable, RST, RSJ,\n\
-                                Glade, Lua, JavaScript, Vala, Desktop)\n"));
+                                Java, JavaProperties, C#, Rust, Ruby, awk, YCP,\n\
+                                Tcl, Perl, PHP, GCC-source, NXStringTable, RST,\n\
+                                RSJ, Glade, Lua, JavaScript, Vala, Desktop)\n"));
        printf (_("\
    -C, --c++                   shorthand for --language=C++\n"));
        printf (_("\
@@ -1623,6 +1631,16 @@ xgettext_record_flag (const char *optionstring)
                                                      name_start, name_end,
                                                      argnum, value, pass);
                      break;
+                  case format_rust:
+                    if (name_end - name_start > 1 && name_end[-1] == '!')
+                      flag_context_list_table_insert (&flag_table_rust_macros, XFORMAT_PRIMARY,
+                                                      name_start, name_end - 1,
+                                                      argnum, value, pass);
+                    else
+                      flag_context_list_table_insert (&flag_table_rust_functions, XFORMAT_PRIMARY,
+                                                      name_start, name_end,
+                                                      argnum, value, pass);
+                    break;
                    case format_ruby:
                      flag_context_list_table_insert (&flag_table_ruby, XFORMAT_PRIMARY,
                                                      name_start, name_end,
@@ -2233,6 +2251,7 @@ language_to_extractor (const char *name)
      SCANNERS_LISP
      SCANNERS_ELISP
      SCANNERS_LIBREP
+    SCANNERS_RUST
      SCANNERS_RUBY
      SCANNERS_SH
      SCANNERS_AWK
@@ -2326,6 +2345,7 @@ extension_to_language (const char *extension)
      EXTENSIONS_LISP
      EXTENSIONS_ELISP
      EXTENSIONS_LIBREP
+    EXTENSIONS_RUST
      EXTENSIONS_RUBY
      EXTENSIONS_SH
      EXTENSIONS_AWK
diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am

index 8408fce9c36be108a2a499a419baf2d508b39427..7e1f9af01c77756b4fa3d246fc78cb5add141fc0 100644 (file)
--- a/gettext-tools/tests/Makefile.am
+++ b/gettext-tools/tests/Makefile.am
@@ -1,5 +1,5 @@
  ## Makefile for the gettext-tools/tests subdirectory of GNU gettext
-## Copyright (C) 1995-2024 Free Software Foundation, Inc.
+## Copyright (C) 1995-2025 Free Software Foundation, Inc.
  ##
  ## This program is free software: you can redistribute it and/or modify
  ## it under the terms of the GNU General Public License as published by
@@ -157,6 +157,10 @@ TESTS = gettext-1 gettext-2 \
         xgettext-python-stackovfl-1 xgettext-python-stackovfl-2 \
         xgettext-python-stackovfl-3 xgettext-python-stackovfl-4 \
         xgettext-ruby-1 xgettext-ruby-2 \
+       xgettext-rust-1 xgettext-rust-2 xgettext-rust-3 xgettext-rust-4 \
+       xgettext-rust-5 xgettext-rust-6 xgettext-rust-7 \
+       xgettext-rust-stackovfl-1 xgettext-rust-stackovfl-2 \
+       xgettext-rust-stackovfl-3 xgettext-rust-stackovfl-4 \
         xgettext-scheme-1 xgettext-scheme-2 xgettext-scheme-3 \
         xgettext-scheme-4 xgettext-scheme-5 xgettext-scheme-6 \
         xgettext-scheme-7 xgettext-scheme-8 \
@@ -205,6 +209,7 @@ TESTS = gettext-1 gettext-2 \
         format-qt-1 format-qt-2 \
         format-qt-plural-1 format-qt-plural-2 \
         format-ruby-1 format-ruby-2 \
+       format-rust-1 format-rust-2 \
         format-scheme-1 format-scheme-2 \
         format-sh-1 format-sh-2 \
         format-tcl-1 format-tcl-2 \
diff --git a/gettext-tools/tests/format-rust-1 b/gettext-tools/tests/format-rust-1

new file mode 100755 (executable)

index 0000000..9e1d2ed
--- /dev/null
+++ b/gettext-tools/tests/format-rust-1
@@ -0,0 +1,65 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test recognition of Rust format strings.
+
+cat <<\EOF > f-rs-1.data
+# Invalid: no argument
+"abc"
+# Invalid: escaped braces
+"abc{{}}"
+# Valid: a numeric argument
+"abc{0}"
+# Invalid: a named argument
+"abc{value}"
+# Valid: an omitted number
+"abc{}"
+# Invalid: unterminated directive
+"abc{1"
+# Valid: format specifier
+"abc{1:0}"
+# Valid: format specifier
+"abc{1:<<-#012.34}"
+# Invalid: conversion in format specifier
+"abc{1:<<-#012.34e}"
+# Invalid: empty precision
+"abc{1:8.}"
+# Invalid: invalid format specifier
+"abc{1:<c>}"
+EOF
+
+: ${XGETTEXT=xgettext}
+n=0
+while read comment; do
+  read string
+  n=`expr $n + 1`
+  cat <<EOF > f-rs-1-$n.in
+gettext(${string});
+EOF
+  ${XGETTEXT} -L Rust -o f-rs-1-$n.po f-rs-1-$n.in || Exit 1
+  test -f f-rs-1-$n.po || Exit 1
+  fail=
+  if echo "$comment" | grep 'Valid:' > /dev/null; then
+    if grep rust-format f-rs-1-$n.po > /dev/null; then
+      :
+    else
+      fail=yes
+    fi
+  else
+    if grep rust-format f-rs-1-$n.po > /dev/null; then
+      fail=yes
+    else
+      :
+    fi
+  fi
+  if test -n "$fail"; then
+    echo "Format string recognition error:" 1>&2
+    cat f-rs-1-$n.in 1>&2
+    echo "Got:" 1>&2
+    cat f-rs-1-$n.po 1>&2
+    Exit 1
+  fi
+  rm -f f-rs-1-$n.in f-rs-1-$n.po
+done < f-rs-1.data
+
+Exit 0
diff --git a/gettext-tools/tests/format-rust-2 b/gettext-tools/tests/format-rust-2

new file mode 100755 (executable)

index 0000000..bbeb74e
--- /dev/null
+++ b/gettext-tools/tests/format-rust-2
@@ -0,0 +1,70 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test checking of Rust format strings.
+
+cat <<\EOF > f-rs-2.data
+# Valid: permutation
+msgid  "abc{}{}{}def"
+msgstr "xyz{1}{0}{2}"
+# Valid: permutation
+msgid  "abc{2}{0}{1}def"
+msgstr "xyz{1}{0}{2}"
+# Invalid: missing argument
+msgid  "abc{1}def{0}"
+msgstr "xyz{0}"
+# Invalid: missing argument
+msgid  "abc{0}def{1}"
+msgstr "xyz{1}"
+# Invalid: added argument
+msgid  "abc{}def"
+msgstr "xyz{}uvw{}"
+# Invalid: added argument
+msgid  "abc{0}def"
+msgstr "xyz{0}uvw{1}"
+# Valid: multiple reuse of same argument
+msgid  "{2} {0} {1}"
+msgstr "{1} {0} {2} {0}"
+# Valid: single reuse of same argument
+msgid  "{1} {0} {2} {0}"
+msgstr "{2} {0} {1}"
+# Valid: "{{" is an escape of "{"
+msgid  "abc{{{1}{2}"
+msgstr "{2}abc{1}"
+EOF
+
+: ${MSGFMT=msgfmt}
+n=0
+while read comment; do
+  read msgid_line
+  read msgstr_line
+  n=`expr $n + 1`
+  cat <<EOF > f-rs-2-$n.po
+#, rust-format
+${msgid_line}
+${msgstr_line}
+EOF
+  fail=
+  if echo "$comment" | grep 'Valid:' > /dev/null; then
+    if ${MSGFMT} --check-format -o f-rs-2-$n.mo f-rs-2-$n.po; then
+      :
+    else
+      fail=yes
+    fi
+  else
+    ${MSGFMT} --check-format -o f-rs-2-$n.mo f-rs-2-$n.po 2> /dev/null
+    if test $? = 1; then
+      :
+    else
+      fail=yes
+    fi
+  fi
+  if test -n "$fail"; then
+    echo "Format string checking error:" 1>&2
+    cat f-rs-2-$n.po 1>&2
+    Exit 1
+  fi
+  rm -f f-rs-2-$n.po f-rs-2-$n.mo
+done < f-rs-2.data
+
+Exit 0
diff --git a/gettext-tools/tests/xgettext-rust-1 b/gettext-tools/tests/xgettext-rust-1

index 1c707cd386f3cecc2aa7915bb199b133682093a4..b9e088e94e5fdc383591a37e05bf5ba40a1227e9 100755 (executable)
--- a/gettext-tools/tests/xgettext-rust-1
+++ b/gettext-tools/tests/xgettext-rust-1
@@ -1,19 +1,18 @@
  #!/bin/sh
  . "${srcdir=.}/init.sh"; path_prepend_ . ../src
  
-# Some tests for Rust support
+# Test Rust support: Simple things.
  
  cat <<EOF > xg-rs-1.rs
  use gettext_rs::gettext;
  
  pub trait TestCase {
    fn TestCase() {
-    GettextResourceManager rm = new GettextResourceManager("test");
      // standard usage
      String test1 = gettext("Test String 1");
      /* C style comment */
      String test2 = gettext("Test String 2");
-    // "multiline" string
+    // Rust does not have string literal concatenation.
      String test3 = gettext("Test " +
      "String " +
      "3");
@@ -32,7 +31,7 @@ third line"#);
  EOF
  
  : ${XGETTEXT=xgettext}
-${XGETTEXT} --omit-header --no-location -c -d xg-rs-1.tmp xg-rs-1.rs || Exit 1
+${XGETTEXT} --omit-header --no-location -k'gettext!' -c -d xg-rs-1.tmp xg-rs-1.rs || Exit 1
  LC_ALL=C tr -d '\r' < xg-rs-1.tmp.po > xg-rs-1.po || Exit 1
  
  cat <<EOF > xg-rs-1.ok
@@ -44,10 +43,6 @@ msgstr ""
  msgid "Test String 2"
  msgstr ""
  
-#. "multiline" string
-msgid "Test String 3"
-msgstr ""
-
  #. macro
  msgid "Test String 4"
  msgstr ""
@@ -69,7 +64,6 @@ msgstr ""
  EOF
  
  : ${DIFF=diff}
-${DIFF} xg-rs-1.ok xg-rs-1.po
-result=$?
+${DIFF} xg-rs-1.ok xg-rs-1.po || Exit 1
  
-exit $result
+exit 0
diff --git a/gettext-tools/tests/xgettext-rust-2 b/gettext-tools/tests/xgettext-rust-2

index 1f861c29b2e0f06a22b741a621327eb7abb20859..5fa571e26b2bcd833409e4c88c39377e8a4eed3e 100755 (executable)
--- a/gettext-tools/tests/xgettext-rust-2
+++ b/gettext-tools/tests/xgettext-rust-2
@@ -1,23 +1,23 @@
  #!/bin/sh
  . "${srcdir=.}/init.sh"; path_prepend_ . ../src
  
-# More tests for Rust support: UTF-8 characters
+# Test Rust support: UTF-8 characters and Unicode escapes.
  
  cat <<\EOF > xg-rs-2.rs
  pub trait TestCase {
    fn main () {
-    catalog.gettext("Russian (Русский): Здравствуйте"));
-    catalog.gettext("Vietnamese (Tiếng Việt): Chào bạn"));
-    catalog.gettext("Japanese (日本語): こんにちは"));
-    catalog.gettext("Thai (ภาษาไทย): สวัสดีครับ"));
-    catalog.gettext("Script: 𝒞"));
-    catalog.gettext("Russian (\u{0420}\u{0443}\u{0441}\u{0441}\u{043a}\u{0438}\u{0439}): \u{0417}\u{0434}\u{0440}\u{0430}\u{0432}\u{0441}\u{0442}\u{0432}\u{0443}\u{0439}\u{0442}\u{0435}"));
-    catalog.gettext("Vietnamese (Ti\u{1ebf}ng Vi\u{1ec7}t): Ch\u{00e0}o b\u{1ea1}n"));
-    catalog.gettext("Japanese (\u{65e5}\u{672c}\u{8a9e}): \u{3053}\u{3093}\u{306b}\u{3061}\u{306f}"));
-    catalog.gettext("Thai (\u{0e20}\u{0e32}\u{0e29}\u{0e32}\u{0e44}\u{0e17}\u{0e22}): \u{0e2a}\u{0e27}\u{0e31}\u{0e2a}\u{0e14}\u{0e35}\u{0e04}\u{0e23}\u{0e31}\u{0e1a}"));
-    catalog.gettext("Script: \u{1d49e}"));
+    gettext("Russian (Русский): Здравствуйте"));
+    gettext("Vietnamese (Tiếng Việt): Chào bạn"));
+    gettext("Japanese (日本語): こんにちは"));
+    gettext("Thai (ภาษาไทย): สวัสดีครับ"));
+    gettext("Script: 𝒞"));
+    gettext("Russian (\u{0420}\u{0443}\u{0441}\u{0441}\u{043a}\u{0438}\u{0439}): \u{0417}\u{0434}\u{0440}\u{0430}\u{0432}\u{0441}\u{0442}\u{0432}\u{0443}\u{0439}\u{0442}\u{0435}"));
+    gettext("Vietnamese (Ti\u{1ebf}ng Vi\u{1ec7}t): Ch\u{00e0}o b\u{1ea1}n"));
+    gettext("Japanese (\u{65e5}\u{672c}\u{8a9e}): \u{3053}\u{3093}\u{306b}\u{3061}\u{306f}"));
+    gettext("Thai (\u{0e20}\u{0e32}\u{0e29}\u{0e32}\u{0e44}\u{0e17}\u{0e22}): \u{0e2a}\u{0e27}\u{0e31}\u{0e2a}\u{0e14}\u{0e35}\u{0e04}\u{0e23}\u{0e31}\u{0e1a}"));
+    gettext("Script: \u{1d49e}"));
      // And now a comment with Русский and 日本語 and Unicode escapes: B\u{00f6}se B\u{00fc}bchen
-    catalog.gettext("This string has a multilingual comment"));
+    gettext("This string has a multilingual comment"));
      // Unicode identifiers.
      String あ = "";
      String 𐀀 = "";
@@ -76,7 +76,6 @@ msgstr ""
  EOF
  
  : ${DIFF=diff}
-${DIFF} xg-rs-2.ok xg-rs-2.po
-result=$?
+${DIFF} xg-rs-2.ok xg-rs-2.po || Exit 1
  
-exit $result
+exit 0
diff --git a/gettext-tools/tests/xgettext-rust-3 b/gettext-tools/tests/xgettext-rust-3

index 85d453e65b9df557ff58c3708c5739691605fa24..4165ab0a700c7485f72dfcd1c4dc51acbb0b64ca 100755 (executable)
--- a/gettext-tools/tests/xgettext-rust-3
+++ b/gettext-tools/tests/xgettext-rust-3
@@ -1,7 +1,7 @@
  #!/bin/sh
  . "${srcdir=.}/init.sh"; path_prepend_ . ../src
  
-# More tests for Rust support: string syntax
+# Test Rust support: string syntax and escapes.
  
  LC_ALL=C tr '%' '\015' <<\EOF > xg-rs-3.rs
  pub trait TestCase {
@@ -23,8 +23,7 @@ pub trait TestCase {
      gettext("bel: \x07\n");
      gettext // Recognized despite comments
         ( /* Even across multiline
-comment! */ "this is a single " /* now comes the concatenation! */ + // after +
-         "long line");
+comment! */ "this is a single long line");
      // Byte string literals are extracted.
      gettext(b"byte 1");
      // In byte string literals, escape sequences are recognized.
@@ -36,19 +35,25 @@ comment! */ "this is a single " /* now comes the concatenation! */ + // after +
      // In raw string literals, only delimiters without the proper number of
      // hashes are recognized.
      gettext (r##"raw 3 ""test"##);
-    // Normal and raw string literals can be concatenated.
+    // Rust does not have string literal concatenation.
      gettext("left - \"quot" + r#"ation"" - right"#);
      // Character literals are not extracted.
      gettext('x');
-    // Invalid concatenations are not concatenated.
+    // Rust does not have string literal concatenation.
      gettext("fooba"+'r');
+    // In multiline strings, after backslash-newline, leading ASCII whitespace is ignored.
+    gettext("multiline\
+             with spaces\
+               with tabs\
+             with no-break spaces\
+　　　　　　　　with ideographic spaces");
    }
  }
  EOF
  
  : ${XGETTEXT=xgettext}
  # delete POT-Creation-Date: line because the date depends on local time.
-${XGETTEXT} --output xg-rs-3.tmp --add-location -c -kmygettext:2 xg-rs-3.rs 2>/dev/null || Exit 1
+${XGETTEXT} --output xg-rs-3.tmp --add-location -c xg-rs-3.rs 2>/dev/null || Exit 1
  sed -e '/\"POT-Creation-Date:.*/d' < xg-rs-3.tmp | LC_ALL=C tr -d '\r' > xg-rs-3.po || Exit 1
  
  cat <<\EOF > xg-rs-3.ok
@@ -75,12 +80,6 @@ msgstr ""
  msgid "Böse Bübchen"
  msgstr ""
  
-#. \u escapes with more than one u are invalid.
-#: xg-rs-3.rs:6
-#, rust-format
-msgid "Japanese: \\uu{6585}\\uuu{6723}語"
-msgstr ""
-
  #: xg-rs-3.rs:7
  msgid ""
  "embedded\n"
@@ -122,44 +121,40 @@ msgid "this is a single long line"
  msgstr ""
  
  #. Byte string literals are extracted.
-#: xg-rs-3.rs:23
+#: xg-rs-3.rs:22
  msgid "byte 1"
  msgstr ""
  
  #. In byte string literals, escape sequences are recognized.
-#: xg-rs-3.rs:25
+#: xg-rs-3.rs:24
  msgid "byte 2 \\ \\ \t \n"
  msgstr ""
  
  #. Raw string literals are extracted.
-#: xg-rs-3.rs:27
+#: xg-rs-3.rs:26
  msgid "raw 1"
  msgstr ""
  
  #. In raw string literals, no escape sequences are recognized.
-#: xg-rs-3.rs:29
+#: xg-rs-3.rs:28
  msgid "raw 2 \\u{005c} \\\\ \\t \\n \\'"
  msgstr ""
  
  #. In raw string literals, only delimiters without the proper number of
  #. hashes are recognized.
-#: xg-rs-3.rs:32
-msgid "raw 3 \"test"
+#: xg-rs-3.rs:31
+msgid "raw 3 \"\"test"
  msgstr ""
  
-#. Normal and raw string literals can be concatenated.
-#: xg-rs-3.rs:34
-msgid "left - \"quotation\" - right"
-msgstr ""
-
-#. Invalid concatenations are not concatenated.
-#: xg-rs-3.rs:38
-msgid "fooba"
+#. In multiline strings, after backslash-newline, leading ASCII whitespace is ignored.
+#: xg-rs-3.rs:39
+msgid ""
+"multilinewith spaceswith tabs             with no-break "
+"spaces　　　　　　　　with ideographic spaces"
  msgstr ""
  EOF
  
  : ${DIFF=diff}
-${DIFF} xg-rs-3.ok xg-rs-3.po
-result=$?
+${DIFF} xg-rs-3.ok xg-rs-3.po || Exit 1
  
-exit $result
+exit 0
diff --git a/gettext-tools/tests/xgettext-rust-4 b/gettext-tools/tests/xgettext-rust-4

index 3ed0ed1cb42bee2e54632626ccb29083c4b96fc8..3183a57707e71abc09c9b7b719a0733ea9a002f2 100755 (executable)
--- a/gettext-tools/tests/xgettext-rust-4
+++ b/gettext-tools/tests/xgettext-rust-4
@@ -1,7 +1,7 @@
  #!/bin/sh
  . "${srcdir=.}/init.sh"; path_prepend_ . ../src
  
-# Tests for Rust plural keyword support
+# Test Rust support: plurals.
  
  cat <<EOF > xg-rs-4.rs
  pub trait TestCase {
@@ -30,7 +30,6 @@ msgstr[1] ""
  EOF
  
  : ${DIFF=diff}
-${DIFF} xg-rs-4.ok xg-rs-4.po
-result=$?
+${DIFF} xg-rs-4.ok xg-rs-4.po || Exit 1
  
-exit $result
+exit 0
diff --git a/gettext-tools/tests/xgettext-rust-5 b/gettext-tools/tests/xgettext-rust-5

index edd98a29e672f75efd90ad5adcf7ac61e825e9a2..be3ec074cebefae9ec4e7b080dad91871b07844b 100755 (executable)
--- a/gettext-tools/tests/xgettext-rust-5
+++ b/gettext-tools/tests/xgettext-rust-5
@@ -3,16 +3,16 @@
  
  # Test Rust support: extraction of contexts.
  
-cat <<\EOF > xg-rs-7.rs
+cat <<\EOF > xg-rs-5.rs
  println!(gettext("help"));
  println!(pgettext("Help", "about"));
  EOF
  
  : ${XGETTEXT=xgettext}
-${XGETTEXT} --omit-header --no-location -d xg-rs-7.tmp xg-rs-7.rs || Exit 1
-LC_ALL=C tr -d '\r' < xg-rs-7.tmp.po > xg-rs-7.po || Exit 1
+${XGETTEXT} --omit-header --no-location -d xg-rs-5.tmp xg-rs-5.rs || Exit 1
+LC_ALL=C tr -d '\r' < xg-rs-5.tmp.po > xg-rs-5.po || Exit 1
  
-cat <<EOF > xg-rs-7.ok
+cat <<EOF > xg-rs-5.ok
  msgid "help"
  msgstr ""
  
@@ -22,7 +22,6 @@ msgstr ""
  EOF
  
  : ${DIFF=diff}
-${DIFF} xg-rs-7.ok xg-rs-7.po
-result=$?
+${DIFF} xg-rs-5.ok xg-rs-5.po || Exit 1
  
-exit $result
+exit 0
diff --git a/gettext-tools/tests/xgettext-rust-6 b/gettext-tools/tests/xgettext-rust-6

new file mode 100755 (executable)

index 0000000..d8c352a
--- /dev/null
+++ b/gettext-tools/tests/xgettext-rust-6
@@ -0,0 +1,57 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test Rust support: function call like syntax in macro invocations.
+
+cat <<\EOF > xg-rs-6.rs
+fn main ()
+{
+  tr! ("Hello {}");
+  tr! (gettext /*x*/ ("Hello1"));
+  tr! [foo(), gettext ("Hello2")];
+  tr ! (tr ! ("Hello3"));
+  tr! (foo(), tr! ("Hello4"));
+}
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -d xg-rs-6a.tmp xg-rs-6.rs || Exit 1
+LC_ALL=C tr -d '\r' < xg-rs-6a.tmp.po > xg-rs-6a.po || Exit 1
+
+cat <<EOF > xg-rs-6a.ok
+msgid "Hello1"
+msgstr ""
+
+msgid "Hello2"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-rs-6a.ok xg-rs-6a.po || exit 1
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -k'tr!' -d xg-rs-6b.tmp xg-rs-6.rs || Exit 1
+LC_ALL=C tr -d '\r' < xg-rs-6b.tmp.po > xg-rs-6b.po || Exit 1
+
+cat <<EOF > xg-rs-6b.ok
+#, rust-format
+msgid "Hello {}"
+msgstr ""
+
+msgid "Hello1"
+msgstr ""
+
+msgid "Hello2"
+msgstr ""
+
+msgid "Hello3"
+msgstr ""
+
+msgid "Hello4"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-rs-6b.ok xg-rs-6b.po || Exit 1
+
+exit 0
diff --git a/gettext-tools/tests/xgettext-rust-7 b/gettext-tools/tests/xgettext-rust-7

new file mode 100755 (executable)

index 0000000..f892d29
--- /dev/null
+++ b/gettext-tools/tests/xgettext-rust-7
@@ -0,0 +1,226 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test Rust support: propagation of 'rust-format'.
+
+cat <<\EOF > xg-rs-7.rs
+  gettext ("Hello10"));
+  (gettext ("Hello11"));
+  ((gettext ("Hello12")));
+  gettext (gettext ("Hello13"));
+  (gettext (gettext ("Hello14")));
+  ((gettext (gettext ("Hello15"))));
+  gettext ((gettext ("Hello16")));
+  gettext (((gettext ("Hello17"))));
+  gettext (foo(), gettext ("Hello18"));
+
+  format (gettext ("Hello20"));
+  format ((gettext ("Hello21")));
+  format (((gettext ("Hello22"))));
+  format (gettext (gettext ("Hello23")));
+  format ((gettext (gettext ("Hello24"))));
+  format (((gettext (gettext ("Hello25")))));
+  format (gettext ((gettext ("Hello26"))));
+  format (gettext (((gettext ("Hello27")))));
+  format (gettext (foo(), gettext ("Hello28")));
+
+  println! ("{}", gettext ("Hello30"));
+  println! ("{}", (gettext ("Hello31")));
+  println! ("{}", ((gettext ("Hello32"))));
+  println! ("{}", gettext (gettext ("Hello33")));
+  println! ("{}", (gettext (gettext ("Hello34"))));
+  println! ("{}", ((gettext (gettext ("Hello35")))));
+  println! ("{}", gettext ((gettext ("Hello36"))));
+  println! ("{}", gettext (((gettext ("Hello37")))));
+  println! ("{}", gettext (foo(), gettext ("Hello38")));
+
+  formatx! (gettext ("Hello40"));
+  formatx! ((gettext ("Hello41")));
+  formatx! (((gettext ("Hello42"))));
+  formatx! (gettext (gettext ("Hello43")));
+  formatx! ((gettext (gettext ("Hello44"))));
+  formatx! (((gettext (gettext ("Hello45")))));
+  formatx! (gettext ((gettext ("Hello46"))));
+  formatx! (gettext (((gettext ("Hello47")))));
+  formatx! (gettext (foo(), gettext ("Hello48")));
+
+  println! ("{}", formatx! (gettext ("Hello50")));
+  println! ("{}", formatx! ((gettext ("Hello51"))));
+  println! ("{}", formatx! (((gettext ("Hello52")))));
+  println! ("{}", formatx! (gettext (gettext ("Hello53"))));
+  println! ("{}", formatx! ((gettext (gettext ("Hello54")))));
+  println! ("{}", formatx! (((gettext (gettext ("Hello55"))))));
+  println! ("{}", formatx! (gettext ((gettext ("Hello56")))));
+  println! ("{}", formatx! (gettext (((gettext ("Hello57"))))));
+  println! ("{}", formatx! (gettext (foo(), gettext ("Hello58"))));
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location --flag=format:1:rust-format -d xg-rs-7.tmp xg-rs-7.rs || Exit 1
+LC_ALL=C tr -d '\r' < xg-rs-7.tmp.po > xg-rs-7.po || Exit 1
+
+cat <<EOF > xg-rs-7.ok
+msgid "Hello10"
+msgstr ""
+
+msgid "Hello11"
+msgstr ""
+
+msgid "Hello12"
+msgstr ""
+
+msgid "Hello13"
+msgstr ""
+
+msgid "Hello14"
+msgstr ""
+
+msgid "Hello15"
+msgstr ""
+
+msgid "Hello16"
+msgstr ""
+
+msgid "Hello17"
+msgstr ""
+
+msgid "Hello18"
+msgstr ""
+
+#, rust-format
+msgid "Hello20"
+msgstr ""
+
+#, rust-format
+msgid "Hello21"
+msgstr ""
+
+#, rust-format
+msgid "Hello22"
+msgstr ""
+
+#, rust-format
+msgid "Hello23"
+msgstr ""
+
+#, rust-format
+msgid "Hello24"
+msgstr ""
+
+#, rust-format
+msgid "Hello25"
+msgstr ""
+
+#, rust-format
+msgid "Hello26"
+msgstr ""
+
+#, rust-format
+msgid "Hello27"
+msgstr ""
+
+msgid "Hello28"
+msgstr ""
+
+msgid "Hello30"
+msgstr ""
+
+msgid "Hello31"
+msgstr ""
+
+msgid "Hello32"
+msgstr ""
+
+msgid "Hello33"
+msgstr ""
+
+msgid "Hello34"
+msgstr ""
+
+msgid "Hello35"
+msgstr ""
+
+msgid "Hello36"
+msgstr ""
+
+msgid "Hello37"
+msgstr ""
+
+msgid "Hello38"
+msgstr ""
+
+#, rust-format
+msgid "Hello40"
+msgstr ""
+
+#, rust-format
+msgid "Hello41"
+msgstr ""
+
+#, rust-format
+msgid "Hello42"
+msgstr ""
+
+#, rust-format
+msgid "Hello43"
+msgstr ""
+
+#, rust-format
+msgid "Hello44"
+msgstr ""
+
+#, rust-format
+msgid "Hello45"
+msgstr ""
+
+#, rust-format
+msgid "Hello46"
+msgstr ""
+
+#, rust-format
+msgid "Hello47"
+msgstr ""
+
+msgid "Hello48"
+msgstr ""
+
+#, rust-format
+msgid "Hello50"
+msgstr ""
+
+#, rust-format
+msgid "Hello51"
+msgstr ""
+
+#, rust-format
+msgid "Hello52"
+msgstr ""
+
+#, rust-format
+msgid "Hello53"
+msgstr ""
+
+#, rust-format
+msgid "Hello54"
+msgstr ""
+
+#, rust-format
+msgid "Hello55"
+msgstr ""
+
+#, rust-format
+msgid "Hello56"
+msgstr ""
+
+#, rust-format
+msgid "Hello57"
+msgstr ""
+
+msgid "Hello58"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-rs-7.ok xg-rs-7.po || Exit 1
+
+exit 0
diff --git a/gettext-tools/tests/xgettext-rust-stackovfl-1 b/gettext-tools/tests/xgettext-rust-stackovfl-1

new file mode 100755 (executable)

index 0000000..8a38c7b
--- /dev/null
+++ b/gettext-tools/tests/xgettext-rust-stackovfl-1
@@ -0,0 +1,64 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test Rust support: stack overflow prevented by nesting depth check.
+
+cat <<\EOF > xg-rs-so-1.rs
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((
+gettext("Hello!")
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))
+;
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -d xg-rs-so-1.tmp xg-rs-so-1.rs || Exit 1
+LC_ALL=C tr -d '\r' < xg-rs-so-1.tmp.po > xg-rs-so-1.po || Exit 1
+
+cat <<\EOF > xg-rs-so-1.ok
+msgid "Hello!"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-rs-so-1.ok xg-rs-so-1.po
+result=$?
+
+exit $result
diff --git a/gettext-tools/tests/xgettext-rust-stackovfl-2 b/gettext-tools/tests/xgettext-rust-stackovfl-2

new file mode 100755 (executable)

index 0000000..06edc8a
--- /dev/null
+++ b/gettext-tools/tests/xgettext-rust-stackovfl-2
@@ -0,0 +1,58 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test Rust support: stack overflow prevented by nesting depth check.
+
+cat <<\EOF > xg-rs-so-2.rs
+gettext
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+(((((((((((((((((((((((((((((((((((((((((((((((
+gettext("Hello!")
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+)))))))))))))))))))))))))))))))))))))))))))))))
+;
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -d xg-rs-so-2.tmp xg-rs-so-2.rs 2>xg-rs-so-2.err
+result=$?
+cat xg-rs-so-2.err
+test $result = 1 || Exit 1
+
+exit 0
diff --git a/gettext-tools/tests/xgettext-rust-stackovfl-3 b/gettext-tools/tests/xgettext-rust-stackovfl-3

new file mode 100755 (executable)

index 0000000..8209631
--- /dev/null
+++ b/gettext-tools/tests/xgettext-rust-stackovfl-3
@@ -0,0 +1,65 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test Rust support: stack overflow prevented by nesting depth check.
+
+cat <<\EOF > xg-rs-so-3.rs
+println!("{}",
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+(((((((((((((((((((((((((((((((((((((((((((((
+gettext("Hello!")
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+)))))))))))))))))))))))))))))))))))))))))))))
+);
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -d xg-rs-so-3.tmp xg-rs-so-3.rs || Exit 1
+LC_ALL=C tr -d '\r' < xg-rs-so-3.tmp.po > xg-rs-so-3.po || Exit 1
+
+cat <<\EOF > xg-rs-so-3.ok
+msgid "Hello!"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-rs-so-3.ok xg-rs-so-3.po
+result=$?
+
+exit $result
diff --git a/gettext-tools/tests/xgettext-rust-stackovfl-4 b/gettext-tools/tests/xgettext-rust-stackovfl-4

new file mode 100755 (executable)

index 0000000..2404cd7
--- /dev/null
+++ b/gettext-tools/tests/xgettext-rust-stackovfl-4
@@ -0,0 +1,58 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test Rust support: stack overflow prevented by nesting depth check.
+
+cat <<\EOF > xg-rs-so-4.rs
+println!("{}",
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((((((
+((((((((((((((((((((((((((((((((((((((((((((((
+gettext("Hello!")
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))))))
+))))))))))))))))))))))))))))))))))))))))))))))
+);
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -d xg-rs-so-4.tmp xg-rs-so-4.rs 2>xg-rs-so-4.err
+result=$?
+cat xg-rs-so-4.err
+test $result = 1 || Exit 1
+
+exit 0
diff --git a/gettext-tools/woe32dll/gettextsrc-exports.c b/gettext-tools/woe32dll/gettextsrc-exports.c

index 8d487a648324873f4cffea3857486bc3c0791474..1886968f5751ad3b1f00f3628ecbc988c93795df 100644 (file)
--- a/gettext-tools/woe32dll/gettextsrc-exports.c
+++ b/gettext-tools/woe32dll/gettextsrc-exports.c
@@ -1,5 +1,5 @@
  /* List of exported symbols of libgettextsrc on Cygwin and native Windows.
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
     Written by Bruno Haible <bruno@clisp.org>, 2006.
  
     This program is free software: you can redistribute it and/or modify
@@ -51,6 +51,7 @@ VARIABLE(formatstring_python_brace)
  VARIABLE(formatstring_qt)
  VARIABLE(formatstring_qt_plural)
  VARIABLE(formatstring_ruby)
+VARIABLE(formatstring_rust)
  VARIABLE(formatstring_scheme)
  VARIABLE(formatstring_sh)
  VARIABLE(formatstring_smalltalk)
author	Bruno Haible <bruno@clisp.org>
	Tue, 28 Jan 2025 10:11:56 +0000 (11:11 +0100)
committer	Bruno Haible <bruno@clisp.org>
	Tue, 28 Jan 2025 21:13:12 +0000 (22:13 +0100)
.gitignore		patch \| blob \| blame \| history
NEWS		patch \| blob \| blame \| history
autogen.sh		patch \| blob \| blame \| history
autopull.sh		patch \| blob \| blame \| history
check-copyright-headers		patch \| blob \| blame \| history
gettext-tools/Makefile.am		patch \| blob \| blame \| history
gettext-tools/build-aux/tree-sitter-portability.diff	[new file with mode: 0644]	patch \| blob
gettext-tools/build-aux/tree-sitter-rust-portability.diff	[new file with mode: 0644]	patch \| blob
gettext-tools/configure.ac		patch \| blob \| blame \| history
gettext-tools/doc/Makefile.am		patch \| blob \| blame \| history
gettext-tools/doc/gettext.texi		patch \| blob \| blame \| history
gettext-tools/doc/lang-rust.texi	[new file with mode: 0644]	patch \| blob
gettext-tools/doc/xgettext.texi		patch \| blob \| blame \| history
gettext-tools/libgettextpo/Makefile.am		patch \| blob \| blame \| history
gettext-tools/src/FILES		patch \| blob \| blame \| history
gettext-tools/src/Makefile.am		patch \| blob \| blame \| history
gettext-tools/src/format-rust.c	[new file with mode: 0644]	patch \| blob
gettext-tools/src/format.c		patch \| blob \| blame \| history
gettext-tools/src/format.h		patch \| blob \| blame \| history
gettext-tools/src/message.c		patch \| blob \| blame \| history
gettext-tools/src/message.h		patch \| blob \| blame \| history
gettext-tools/src/x-rust.c	[new file with mode: 0644]	patch \| blob
gettext-tools/src/x-rust.h	[new file with mode: 0644]	patch \| blob
gettext-tools/src/xgettext.c		patch \| blob \| blame \| history
gettext-tools/tests/Makefile.am		patch \| blob \| blame \| history
gettext-tools/tests/format-rust-1	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/format-rust-2	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-rust-1		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-rust-2		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-rust-3		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-rust-4		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-rust-5		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-rust-6	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-rust-7	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-rust-stackovfl-1	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-rust-stackovfl-2	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-rust-stackovfl-3	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-rust-stackovfl-4	[new file with mode: 0755]	patch \| blob
gettext-tools/woe32dll/gettextsrc-exports.c		patch \| blob \| blame \| history