/gettext-tools/src/gettext.res
/gettext-tools/src/libxgettextts1.a
/gettext-tools/src/libxgettextts2.a
+/gettext-tools/src/libxgettextts3.a
+/gettext-tools/src/libxgettextts4.a
/gettext-tools/src/textstyle.h
/gettext-tools/src/textstyle/stdbool.h
/gettext-tools/src/textstyle/version.h
- 'msgfmt -c' now verifies the syntax of translations of Go format
strings.
- New examples 'hello-go' and 'hello-go-http' have been added.
+ * TypeScript:
+ - xgettext now supports TypeScript and TSX (= TypeScript with JSX
+ extensions).
Version 0.24 - February 2025
TREE_SITTER_VERSION=0.23.2
TREE_SITTER_GO_VERSION=0.23.4
TREE_SITTER_RUST_VERSION=0.23.2
+TREE_SITTER_TYPESCRIPT_VERSION=0.23.2
# Cache the relevant source code. Erase the rest of the tree-sitter projects.
test -d gettext-tools/tree-sitter-$TREE_SITTER_VERSION || {
func_git_clone_shallow tree-sitter https://github.com/tree-sitter/tree-sitter.git v$TREE_SITTER_VERSION
mv gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION/src/scanner.c gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION/src/rust-scanner.c
rm -rf tree-sitter-rust
}
+test -d gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION || {
+ func_git_clone_shallow tree-sitter-typescript https://github.com/tree-sitter/tree-sitter-typescript.git v$TREE_SITTER_TYPESCRIPT_VERSION
+ (cd tree-sitter-typescript && patch -p1) < gettext-tools/build-aux/tree-sitter-typescript-portability.diff
+ mkdir gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION
+ mkdir gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/common
+ mkdir gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript
+ mkdir gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx
+ mv tree-sitter-typescript/LICENSE gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/LICENSE
+ mv tree-sitter-typescript/common/scanner.h gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/common/scanner.h
+ mv tree-sitter-typescript/typescript/src gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript/src
+ mv tree-sitter-typescript/tsx/src gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src
+ mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript/src/parser.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript/src/ts-parser.c
+ mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript/src/scanner.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript/src/ts-scanner.c
+ mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/parser.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/tsx-parser.c
+ mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/scanner.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/tsx-scanner.c
+ rm -rf tree-sitter-typescript
+}
cat > gettext-tools/tree-sitter.cfg <<EOF
TREE_SITTER_VERSION=$TREE_SITTER_VERSION
TREE_SITTER_GO_VERSION=$TREE_SITTER_GO_VERSION
TREE_SITTER_RUST_VERSION=$TREE_SITTER_RUST_VERSION
+TREE_SITTER_TYPESCRIPT_VERSION=$TREE_SITTER_TYPESCRIPT_VERSION
EOF
dir0=`pwd`
tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/rust-scanner.c \
tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/tree_sitter/alloc.h \
tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/tree_sitter/array.h \
- tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/tree_sitter/parser.h
-
+ tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/tree_sitter/parser.h \
+ build-aux/tree-sitter-typescript-portability.diff \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/LICENSE \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/common/scanner.h \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src/ts-parser.c \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src/ts-scanner.c \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src/tree_sitter/alloc.h \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src/tree_sitter/array.h \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src/tree_sitter/parser.h \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tsx-parser.c \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tsx-scanner.c \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/alloc.h \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/array.h \
+ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/parser.h
# Files installed for the user.
--- /dev/null
+diff --git a/tsx/src/parser.c b/tsx/src/parser.c
+index faa8aa4..726e493 100644
+--- a/tsx/src/parser.c
++++ b/tsx/src/parser.c
+@@ -282905,8 +282905,10 @@ void tree_sitter_tsx_external_scanner_deserialize(void *, const char *, unsigned
+ #define TS_PUBLIC
+ #elif defined(_WIN32)
+ #define TS_PUBLIC __declspec(dllexport)
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define TS_PUBLIC __attribute__((visibility("default")))
++#else
++#define TS_PUBLIC
+ #endif
+
+ TS_PUBLIC const TSLanguage *tree_sitter_tsx(void) {
+diff --git a/tsx/src/tree_sitter/parser.h b/tsx/src/tree_sitter/parser.h
+index 799f599..130b4d0 100644
+--- a/tsx/src/tree_sitter/parser.h
++++ b/tsx/src/tree_sitter/parser.h
+@@ -155,8 +155,10 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
+
+ #ifdef _MSC_VER
+ #define UNUSED __pragma(warning(suppress : 4101))
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define UNUSED __attribute__((unused))
++#else
++#define UNUSED
+ #endif
+
+ #define START_LEXER() \
+diff --git a/typescript/src/parser.c b/typescript/src/parser.c
+index a88f8e1..b03339e 100644
+--- a/typescript/src/parser.c
++++ b/typescript/src/parser.c
+@@ -282387,8 +282387,10 @@ void tree_sitter_typescript_external_scanner_deserialize(void *, const char *, u
+ #define TS_PUBLIC
+ #elif defined(_WIN32)
+ #define TS_PUBLIC __declspec(dllexport)
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define TS_PUBLIC __attribute__((visibility("default")))
++#else
++#define TS_PUBLIC
+ #endif
+
+ TS_PUBLIC const TSLanguage *tree_sitter_typescript(void) {
+diff --git a/typescript/src/tree_sitter/parser.h b/typescript/src/tree_sitter/parser.h
+index 799f599..130b4d0 100644
+--- a/typescript/src/tree_sitter/parser.h
++++ b/typescript/src/tree_sitter/parser.h
+@@ -155,8 +155,10 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
+
+ #ifdef _MSC_VER
+ #define UNUSED __pragma(warning(suppress : 4101))
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define UNUSED __attribute__((unused))
++#else
++#define UNUSED
+ #endif
+
+ #define START_LEXER() \
AC_SUBST([TREE_SITTER_VERSION])
AC_SUBST([TREE_SITTER_GO_VERSION])
AC_SUBST([TREE_SITTER_RUST_VERSION])
+AC_SUBST([TREE_SITTER_TYPESCRIPT_VERSION])
PACKAGE_SUFFIX="-$ARCHIVE_VERSION"
AC_SUBST([PACKAGE_SUFFIX])
lang-java.texi \
lang-csharp.texi \
lang-javascript.texi \
+ lang-typescript.texi \
lang-scheme.texi \
lang-lisp.texi \
lang-clisp-c.texi \
* Java:: Java
* C#:: C#
* JavaScript:: JavaScript
+* TypeScript:: TypeScript
* Scheme:: GNU guile - Scheme
* Common Lisp:: GNU clisp - Common Lisp
* clisp C:: GNU clisp C sources
@cindex Java, string concatenation
@cindex C#, string concatenation
@cindex JavaScript, string concatenation
+@cindex TypeScript, string concatenation
@cindex Go, string concatenation
@cindex Ruby, string concatenation
@cindex Shell, string concatenation
In C#, string concatenation is denoted by the @samp{+} operator.
@c Reference: https://learn.microsoft.com/en-us/dotnet/csharp/how-to/concatenate-multiple-strings
@item
-In JavaScript, string concatenation is denoted by the @samp{+} operator.
+In JavaScript and TypeScript, string concatenation is denoted by the @samp{+} operator.
@c Reference: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Addition
@item
In Go, string concatenation is denoted by the @samp{+} operator.
* Java:: Java
* C#:: C#
* JavaScript:: JavaScript
+* TypeScript:: TypeScript
* Scheme:: GNU guile - Scheme
* Common Lisp:: GNU clisp - Common Lisp
* clisp C:: GNU clisp C sources
@include lang-java.texi
@include lang-csharp.texi
@include lang-javascript.texi
+@include lang-typescript.texi
@include lang-scheme.texi
@include lang-lisp.texi
@include lang-clisp-c.texi
--- /dev/null
+@c This file is part of the GNU gettext manual.
+@c Copyright (C) 1995-2025 Free Software Foundation, Inc.
+@c See the file gettext.texi for copying conditions.
+
+@node TypeScript
+@subsection TypeScript and TSX
+
+@table @asis
+@item RPMs
+js
+
+@item Ubuntu packages
+gjs
+
+@item File extension
+@code{ts} for TypeScript, @code{tsx} for TSX (TypeScript with JSX)
+
+@item String syntax
+@itemize @bullet
+
+@item @code{"abc"}
+
+@item @code{'abc'}
+
+@item @code{`abc`}
+
+@end itemize
+
+@item gettext shorthand
+@code{_("abc")}
+
+@item gettext/ngettext functions
+@code{gettext}, @code{dgettext}, @code{dcgettext}, @code{ngettext},
+@code{dngettext}
+
+@item textdomain
+@code{textdomain} function
+
+@item bindtextdomain
+@code{bindtextdomain} function
+
+@item setlocale
+automatic
+
+@item Prerequisite
+unknown
+
+@item Use or emulate GNU gettext
+use, or emulate
+
+@item Extractor
+@code{xgettext}
+
+@item Formatting with positions
+A @code{format} method on strings can be used.
+But since it is not standard in TypeScript,
+you have to enable it yourself. @c TODO How?
+
+@item Portability
+On platforms without gettext, the functions are not available.
+
+@item po-mode marking
+---
+@end table
@code{Java}, @code{JavaProperties},
@code{C#},
@code{JavaScript},
+@code{TypeScript}, @code{TSX},
@code{Scheme}, @code{Guile},
@code{Lisp},
@code{EmacsLisp},
Java,
C#,
JavaScript,
+TypeScript, TSX,
Scheme, Guile,
Lisp,
EmacsLisp,
@code{gettext.dngettext:2,3}, @code{gettext.dcngettext:2,3}.
@item
-For JavaScript: @code{_}, @code{gettext}, @code{dgettext:2},
+For JavaScript, TypeScript, TSX:
+@code{_}, @code{gettext}, @code{dgettext:2},
@code{dcgettext:2}, @code{ngettext:1,2}, @code{dngettext:2,3},
@code{pgettext:1c,2}, @code{dpgettext:2c,3}.
Java,
C#,
JavaScript,
+TypeScript, TSX,
Scheme, Guile,
Lisp,
EmacsLisp,
| x-javascript.h
| x-javascript.c
| String extractor for JavaScript.
+| x-typescript.h
+| x-typescript.c
+| x-typescript-impl.h
+| String extractor for TypeScript.
+| x-typescriptx.h
+| x-typescriptx.c
+| x-typescript-impl.h
+| String extractor for TSX.
| x-scheme.h
| x-scheme.c
| String extractor for Scheme.
noinst_LTLIBRARIES = libgettextsrc.la
endif
-noinst_LIBRARIES = libxgettextts1.a libxgettextts2.a
+noinst_LIBRARIES = $(LIBXGETTEXTTS)
noinst_HEADERS = \
pos.h message.h po-error.h xerror-handler.h po-xerror.h \
x-java.h \
x-csharp.h \
x-javascript.h \
+ x-typescript.h \
+ x-typescriptx.h \
+ x-typescript-impl.h \
x-scheme.h \
x-lisp.h \
x-elisp.h \
search-path.c
# xgettext has some tree-sitter based backends.
-LIBXGETTEXTTS = libxgettextts2.a libxgettextts1.a
+LIBXGETTEXTTS = libxgettextts2.a libxgettextts3.a libxgettextts4.a libxgettextts1.a
libxgettextts1_a_SOURCES = \
../tree-sitter-$(TREE_SITTER_VERSION)/lib/src/lib.c
libxgettextts1_a_CPPFLAGS = \
../tree-sitter-go-$(TREE_SITTER_GO_VERSION)/src/go-parser.c
libxgettextts2_a_CPPFLAGS = \
-I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include
+libxgettextts3_a_SOURCES = \
+ ../tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src/ts-parser.c \
+ ../tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src/ts-scanner.c
+libxgettextts3_a_CPPFLAGS = \
+ -I$(top_srcdir)/tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src \
+ -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include
+libxgettextts4_a_SOURCES = \
+ ../tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tsx-parser.c \
+ ../tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tsx-scanner.c
+libxgettextts4_a_CPPFLAGS = \
+ -I$(top_srcdir)/tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src \
+ -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include
# msggrep needs pattern matching.
LIBGREP = ../libgrep/libgrep.a
x-java.c \
x-csharp.c \
x-javascript.c \
+ x-typescript.c \
+ x-typescriptx.c \
x-scheme.c \
x-lisp.c \
x-elisp.c \
--- /dev/null
+/* xgettext TypeScript and TSX backends.
+ Copyright (C) 2001-2025 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025. */
+
+/* The languages TypeScript and TSX (= TypeScript with JSX) are very similar.
+ The extractor code is therefore nearly identical. */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <error.h>
+#include "message.h"
+#include "string-desc.h"
+#include "xstring-desc.h"
+#include "string-buffer-reversed.h"
+#include "xgettext.h"
+#include "xg-pos.h"
+#include "xg-mixed-string.h"
+#include "xg-arglist-context.h"
+#include "xg-arglist-callshape.h"
+#include "xg-arglist-parser.h"
+#include "xg-message.h"
+#include "if-error.h"
+#include "xalloc.h"
+#include "read-file.h"
+#include "unistr.h"
+#include "po-charset.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+/* Use tree-sitter.
+ Documentation: <https://tree-sitter.github.io/tree-sitter/using-parsers> */
+#include <tree_sitter/api.h>
+extern const TSLanguage *TREE_SITTER_LANGUAGE (void);
+
+
+/* The TypeScript syntax is defined in https://www.typescriptlang.org/docs/. */
+
+#define DEBUG_TYPESCRIPT 0
+
+
+/* ====================== Keyword set customization. ====================== */
+
+/* If true extract all strings. */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
+
+void
+NOTE_OPTION_EXTRACT_ALL ()
+{
+ extract_all = true;
+}
+
+
+void
+NOTE_OPTION_KEYWORD (const char *name)
+{
+ if (name == NULL)
+ default_keywords = false;
+ else
+ {
+ const char *end;
+ struct callshape shape;
+ const char *colon;
+
+ if (keywords.table == NULL)
+ hash_init (&keywords, 100);
+
+ split_keywordspec (name, &end, &shape);
+
+ /* The characters between name and end should form a valid identifier.
+ A colon means an invalid parse in split_keywordspec(). */
+ colon = strchr (name, ':');
+ if (colon == NULL || colon >= end)
+ insert_keyword_callshape (&keywords, name, end - name, &shape);
+ }
+}
+
+/* Finish initializing the keywords hash table.
+ Called after argument processing, before each file is processed. */
+static void
+init_keywords ()
+{
+ if (default_keywords)
+ {
+ /* Same as in x-javascript.c. */
+ /* When adding new keywords here, also update the documentation in
+ xgettext.texi! */
+ NOTE_OPTION_KEYWORD ("gettext");
+ NOTE_OPTION_KEYWORD ("dgettext:2");
+ NOTE_OPTION_KEYWORD ("dcgettext:2");
+ NOTE_OPTION_KEYWORD ("ngettext:1,2");
+ NOTE_OPTION_KEYWORD ("dngettext:2,3");
+ NOTE_OPTION_KEYWORD ("pgettext:1c,2");
+ NOTE_OPTION_KEYWORD ("dpgettext:2c,3");
+ NOTE_OPTION_KEYWORD ("_");
+ default_keywords = false;
+ }
+}
+
+void
+INIT_FLAG_TABLE ()
+{
+ /* Same as in x-javascript.c. */
+ xgettext_record_flag ("gettext:1:pass-javascript-format");
+ xgettext_record_flag ("dgettext:2:pass-javascript-format");
+ xgettext_record_flag ("dcgettext:2:pass-javascript-format");
+ xgettext_record_flag ("ngettext:1:pass-javascript-format");
+ xgettext_record_flag ("ngettext:2:pass-javascript-format");
+ xgettext_record_flag ("dngettext:2:pass-javascript-format");
+ xgettext_record_flag ("dngettext:3:pass-javascript-format");
+ xgettext_record_flag ("pgettext:2:pass-javascript-format");
+ xgettext_record_flag ("dpgettext:3:pass-javascript-format");
+ xgettext_record_flag ("_:1:pass-javascript-format");
+}
+
+
+/* ======================== Parsing via tree-sitter. ======================== */
+/* To understand this code, look at
+ tree-sitter-typescript/typescript/src/node-types.json
+ and
+ tree-sitter-typescript/typescript/src/grammar.json
+ */
+
+/* The tree-sitter's language object. */
+static const TSLanguage *ts_language;
+
+/* ------------------------- Node types and symbols ------------------------- */
+
+static TSSymbol ts_language_symbol (const char *name, bool is_named)
+{
+ TSSymbol result =
+ ts_language_symbol_for_name (ts_language, name, strlen (name), is_named);
+ if (result == 0)
+ /* If we get here, the grammar has evolved in an incompatible way. */
+ abort ();
+ return result;
+}
+
+static TSFieldId ts_language_field (const char *name)
+{
+ TSFieldId result =
+ ts_language_field_id_for_name (ts_language, name, strlen (name));
+ if (result == 0)
+ /* If we get here, the grammar has evolved in an incompatible way. */
+ abort ();
+ return result;
+}
+
+/* Optimization:
+ Instead of
+ strcmp (ts_node_type (node), "string_literal") == 0
+ it is faster to do
+ ts_node_symbol (node) == ts_symbol_string_literal
+ */
+static TSSymbol ts_symbol_comment;
+static TSSymbol ts_symbol_string;
+static TSSymbol ts_symbol_string_fragment;
+static TSSymbol ts_symbol_escape_sequence;
+static TSSymbol ts_symbol_template_string;
+static TSSymbol ts_symbol_binary_expression;
+static TSSymbol ts_symbol_identifier;
+static TSSymbol ts_symbol_call_expression;
+static TSSymbol ts_symbol_arguments;
+static TSSymbol ts_symbol_plus; /* + */
+static TSFieldId ts_field_function;
+static TSFieldId ts_field_arguments;
+static TSFieldId ts_field_operator;
+static TSFieldId ts_field_left;
+static TSFieldId ts_field_right;
+
+static inline size_t
+ts_node_line_number (TSNode node)
+{
+ return ts_node_start_point (node).row + 1;
+}
+
+/* -------------------------------- The file -------------------------------- */
+
+/* The entire contents of the file being analyzed. */
+static const char *contents;
+
+/* -------------------------------- Comments -------------------------------- */
+
+/* These are for tracking whether comments count as immediately before
+ keyword. */
+static int last_comment_line;
+static int last_non_comment_line;
+
+/* Saves a comment line. */
+static void save_comment_line (string_desc_t gist)
+{
+ /* Remove leading whitespace. */
+ while (sd_length (gist) > 0
+ && (sd_char_at (gist, 0) == ' '
+ || sd_char_at (gist, 0) == '\t'))
+ gist = sd_substring (gist, 1, sd_length (gist));
+ /* Remove trailing whitespace. */
+ size_t len = sd_length (gist);
+ while (len > 0
+ && (sd_char_at (gist, len - 1) == ' '
+ || sd_char_at (gist, len - 1) == '\t'))
+ len--;
+ gist = sd_substring (gist, 0, len);
+ savable_comment_add (sd_c (gist));
+}
+
+/* Does the comment handling for NODE.
+ Updates savable_comment, last_comment_line, last_non_comment_line.
+ It is important that this function gets called
+ - for each node (not only the named nodes!),
+ - in depth-first traversal order. */
+static void handle_comments (TSNode node)
+{
+ #if DEBUG_TYPESCRIPT && 0
+ fprintf (stderr, "LCL=%d LNCL=%d node=[%s]|%s|\n", last_comment_line, last_non_comment_line, ts_node_type (node), ts_node_string (node));
+ #endif
+ if (last_comment_line < last_non_comment_line
+ && last_non_comment_line < ts_node_line_number (node))
+ /* We have skipped over a newline. This newline terminated a line
+ with non-comment tokens, after the last comment line. */
+ savable_comment_reset ();
+
+ if (ts_node_symbol (node) == ts_symbol_comment)
+ {
+ string_desc_t entire =
+ sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+ (char *) contents + ts_node_start_byte (node));
+ /* It should either start with two slashes... */
+ if (sd_length (entire) >= 2
+ && sd_char_at (entire, 0) == '/'
+ && sd_char_at (entire, 1) == '/')
+ {
+ save_comment_line (sd_substring (entire, 2, sd_length (entire)));
+ last_comment_line = ts_node_end_point (node).row + 1;
+ }
+ /* ... or it should start and end with the C comment markers. */
+ else if (sd_length (entire) >= 4
+ && sd_char_at (entire, 0) == '/'
+ && sd_char_at (entire, 1) == '*'
+ && sd_char_at (entire, sd_length (entire) - 2) == '*'
+ && sd_char_at (entire, sd_length (entire) - 1) == '/')
+ {
+ string_desc_t gist = sd_substring (entire, 2, sd_length (entire) - 2);
+ /* Split into lines.
+ Remove leading and trailing whitespace from each line. */
+ for (;;)
+ {
+ ptrdiff_t nl_index = sd_index (gist, '\n');
+ if (nl_index >= 0)
+ {
+ save_comment_line (sd_substring (gist, 0, nl_index));
+ gist = sd_substring (gist, nl_index + 1, sd_length (gist));
+ }
+ else
+ {
+ save_comment_line (gist);
+ break;
+ }
+ }
+ last_comment_line = ts_node_end_point (node).row + 1;
+ }
+ else
+ abort ();
+ }
+ else
+ last_non_comment_line = ts_node_line_number (node);
+}
+
+/* --------------------- string_buffer_reversed_unicode --------------------- */
+
+/* This type is like string_buffer_reversed with mostly UTF-8 contents, except
+ that it also handles Unicode surrogates: The combination of a low and a high
+ surrogate is converted to a single Unicode code point, whereas lone
+ surrogates are converted to U+FFFD (like 'struct mixed_string_buffer' does).
+ */
+struct string_buffer_reversed_unicode
+{
+ struct string_buffer_reversed sbr;
+ /* The second half of an UTF-16 surrogate character. */
+ unsigned short utf16_surr;
+ /* Its line number. */
+ size_t utf16_surr_line_number;
+};
+
+/* Initializes a 'struct string_buffer_reversed_unicode'. */
+static inline void
+sbru_init (struct string_buffer_reversed_unicode *buffer)
+{
+ sbr_init (&buffer->sbr);
+ buffer->utf16_surr = 0;
+}
+
+/* Auxiliary function: Handle the attempt to prepend a lone surrogate to
+ BUFFER. */
+static void
+sbru_prepend_lone_surrogate (struct string_buffer_reversed_unicode *buffer,
+ ucs4_t uc, size_t line_number)
+{
+ /* A half surrogate is invalid, therefore use U+FFFD instead.
+ It may be valid in a particular programming language.
+ But a half surrogate is invalid in UTF-8:
+ - RFC 3629 says
+ "The definition of UTF-8 prohibits encoding character
+ numbers between U+D800 and U+DFFF".
+ - Unicode 4.0 chapter 3
+ <http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf>
+ section 3.9, p.77, says
+ "Because surrogate code points are not Unicode scalar
+ values, any UTF-8 byte sequence that would otherwise
+ map to code points D800..DFFF is ill-formed."
+ and in table 3-6, p. 78, does not mention D800..DFFF.
+ - The unicode.org FAQ question "How do I convert an unpaired
+ UTF-16 surrogate to UTF-8?" has the answer
+ "By representing such an unpaired surrogate on its own
+ as a 3-byte sequence, the resulting UTF-8 data stream
+ would become ill-formed."
+ So use U+FFFD instead. */
+ if_error (IF_SEVERITY_WARNING,
+ logical_file_name, line_number, (size_t)(-1), false,
+ _("lone surrogate U+%04X"), uc);
+ string_desc_t fffd = /* U+FFFD in UTF-8 encoding. */
+ sd_new_addr (3, (char *) "\357\277\275");
+ sbr_xprepend_desc (&buffer->sbr, fffd);
+}
+
+/* Auxiliary function: Flush buffer->utf16_surr into buffer->sbr. */
+static inline void
+sbru_flush_utf16_surr (struct string_buffer_reversed_unicode *buffer)
+{
+ if (buffer->utf16_surr != 0)
+ {
+ sbru_prepend_lone_surrogate (buffer,
+ buffer->utf16_surr,
+ buffer->utf16_surr_line_number);
+ buffer->utf16_surr = 0;
+ }
+}
+
+/* Prepends the character C to BUFFER. */
+static void
+sbru_xprepend1 (struct string_buffer_reversed_unicode *buffer, char c)
+{
+ sbru_flush_utf16_surr (buffer);
+ sbr_xprepend1 (&buffer->sbr, c);
+}
+
+/* Prepends the contents of the memory area S to BUFFER. */
+static void
+sbru_xprepend_desc (struct string_buffer_reversed_unicode *buffer,
+ string_desc_t s)
+{
+ sbru_flush_utf16_surr (buffer);
+ sbr_xprepend_desc (&buffer->sbr, s);
+}
+
+/* Prepends a Unicode code point C to BUFFER. */
+static void
+sbru_xprepend_unicode (struct string_buffer_reversed_unicode *buffer,
+ ucs4_t c, TSNode node)
+{
+ /* Test whether this character and the previous one form a Unicode
+ surrogate character pair. */
+ if (buffer->utf16_surr != 0 && (c >= 0xd800 && c < 0xdc00))
+ {
+ unsigned short utf16buf[2];
+ ucs4_t uc;
+
+ utf16buf[0] = c;
+ utf16buf[1] = buffer->utf16_surr;
+ if (u16_mbtouc (&uc, utf16buf, 2) != 2)
+ abort ();
+
+ uint8_t buf[6];
+ int n = u8_uctomb (buf, uc, sizeof (buf));
+ if (!(n > 0))
+ abort ();
+ sbr_xprepend_desc (&buffer->sbr, sd_new_addr (n, (char *) buf));
+
+ buffer->utf16_surr = 0;
+ }
+ else
+ {
+ sbru_flush_utf16_surr (buffer);
+
+ if (c >= 0xdc00 && c < 0xe000)
+ {
+ buffer->utf16_surr = c;
+ buffer->utf16_surr_line_number = ts_node_line_number (node);
+ }
+ else if (c >= 0xd800 && c < 0xdc00)
+ sbru_prepend_lone_surrogate (buffer, c, ts_node_line_number (node));
+ else
+ {
+ uint8_t buf[6];
+ int n = u8_uctomb (buf, c, sizeof (buf));
+ if (!(n > 0))
+ abort ();
+ sbr_xprepend_desc (&buffer->sbr, sd_new_addr (n, (char *) buf));
+ }
+ }
+}
+
+/* Returns the contents of BUFFER (with an added trailing NUL, that is,
+ as a C string), and frees all other memory held by BUFFER.
+ Returns NULL if there was an error earlier.
+ It is the responsibility of the caller to free() the result. */
+static char *
+sbru_xdupfree_c (struct string_buffer_reversed_unicode *buffer)
+ _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE
+ _GL_ATTRIBUTE_RETURNS_NONNULL;
+static char *
+sbru_xdupfree_c (struct string_buffer_reversed_unicode *buffer)
+{
+ sbru_flush_utf16_surr (buffer);
+ return sbr_xdupfree_c (&buffer->sbr);
+}
+
+/* ---------------------------- String literals ---------------------------- */
+
+/* Determines whether NODE represents a string literal or the concatenation
+ of string literals (via the '+' operator). */
+static bool
+is_string_literal (TSNode node)
+{
+ start:
+ if (ts_node_symbol (node) == ts_symbol_string
+ || ts_node_symbol (node) == ts_symbol_template_string)
+ {
+ /* Test whether all named children nodes are of type 'string_fragment' or
+ 'escape_sequence' (and thus none of type 'template_substitution' or
+ 'ERROR'). */
+ uint32_t count = ts_node_named_child_count (node);
+ uint32_t i;
+ for (i = 0; i < count; i++)
+ {
+ TSNode subnode = ts_node_named_child (node, i);
+ if (!(ts_node_symbol (subnode) == ts_symbol_string_fragment
+ || ts_node_symbol (subnode) == ts_symbol_escape_sequence))
+ return false;
+ }
+ return true;
+ }
+ if (ts_node_symbol (node) == ts_symbol_binary_expression
+ && ts_node_symbol (ts_node_child_by_field_id (node, ts_field_operator)) == ts_symbol_plus
+ /* Recurse into the left and right subnodes. */
+ && is_string_literal (ts_node_child_by_field_id (node, ts_field_right)))
+ {
+ /*return is_string_literal (ts_node_child_by_field_id (node, ts_field_left));*/
+ node = ts_node_child_by_field_id (node, ts_field_left);
+ goto start;
+ }
+ return false;
+}
+
+/* Prepends the string literal pieces from NODE to BUFFER. */
+static void
+string_literal_accumulate_pieces (TSNode node,
+ struct string_buffer_reversed_unicode *buffer)
+{
+ start:
+ if (ts_node_symbol (node) == ts_symbol_string
+ || ts_node_symbol (node) == ts_symbol_template_string)
+ {
+ uint32_t count = ts_node_named_child_count (node);
+ uint32_t i;
+ for (i = count; i > 0; )
+ {
+ i--;
+ TSNode subnode = ts_node_named_child (node, i);
+ if (ts_node_symbol (subnode) == ts_symbol_string_fragment)
+ {
+ string_desc_t subnode_string =
+ sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+ (char *) contents + ts_node_start_byte (subnode));
+ sbru_xprepend_desc (buffer, subnode_string);
+ }
+ else if (ts_node_symbol (subnode) == ts_symbol_escape_sequence)
+ {
+ const char *escape_start = contents + ts_node_start_byte (subnode);
+ const char *escape_end = contents + ts_node_end_byte (subnode);
+ /* The escape sequence must start with a backslash. */
+ if (!(escape_end - escape_start >= 2 && escape_start[0] == '\\'))
+ abort ();
+ /* tree-sitter's grammar.js allows more escape sequences than the
+ tsc compiler. Give a warning for those case where the tsc
+ compiler gives an error. */
+ bool invalid = false;
+ if (escape_end - escape_start == 2)
+ {
+ switch (escape_start[1])
+ {
+ case '\n':
+ break;
+ case '\\':
+ case '"':
+ sbru_xprepend1 (buffer, escape_start[1]);
+ break;
+ case 'b':
+ sbru_xprepend1 (buffer, 0x08);
+ break;
+ case 'f':
+ sbru_xprepend1 (buffer, 0x0C);
+ break;
+ case 'n':
+ sbru_xprepend1 (buffer, '\n');
+ break;
+ case 'r':
+ sbru_xprepend1 (buffer, '\r');
+ break;
+ case 't':
+ sbru_xprepend1 (buffer, '\t');
+ break;
+ case 'v':
+ sbru_xprepend1 (buffer, 0x0B);
+ break;
+ default:
+ invalid = true;
+ break;
+ }
+ }
+ else if (escape_end - escape_start == 3
+ && escape_start[1] == '\r' && escape_start[2] == '\n')
+ /* Backslash-newline with a Windows CRLF. */
+ ;
+ else if (escape_start[1] >= '0' && escape_start[1] <= '7')
+ {
+ /* It's not clear whether octal escape sequences should be
+ supported. On one hand, they are supported in JavaScript.
+ On the other hand, tsc says:
+ "error TS1487: Octal escape sequences are not allowed." */
+ unsigned int value = 0;
+ /* Only up to 3 octal digits are accepted. */
+ if (escape_end - escape_start <= 1 + 3)
+ {
+ const char *p;
+ for (p = escape_start + 1; p < escape_end; p++)
+ {
+ /* No overflow is possible. */
+ char c = *p;
+ if (c >= '0' && c <= '7')
+ value = (value << 3) + (c - '0');
+ else
+ invalid = true;
+ }
+ if (value > 0xFF)
+ invalid = true;
+ }
+ if (!invalid)
+ sbru_xprepend1 (buffer, (unsigned char) value);
+ }
+ else if ((escape_start[1] == 'x' && escape_end - escape_start == 2 + 2)
+ || (escape_start[1] == 'u' && escape_end - escape_start == 2 + 4))
+ {
+ unsigned int value = 0;
+ const char *p;
+ for (p = escape_start + 2; p < escape_end; p++)
+ {
+ /* No overflow is possible. */
+ char c = *p;
+ if (c >= '0' && c <= '9')
+ value = (value << 4) + (c - '0');
+ else if (c >= 'A' && c <= 'Z')
+ value = (value << 4) + (c - 'A' + 10);
+ else if (c >= 'a' && c <= 'z')
+ value = (value << 4) + (c - 'a' + 10);
+ else
+ invalid = true;
+ }
+ if (escape_start[1] == 'x')
+ {
+ if (!invalid)
+ sbru_xprepend1 (buffer, (unsigned char) value);
+ }
+ else
+ sbru_xprepend_unicode (buffer, value, subnode);
+ }
+ else if (escape_start[1] == 'u'
+ && escape_end - escape_start > 4
+ && escape_start[2] == '{' && escape_end[-1] == '}')
+ {
+ unsigned int value = 0;
+ const char *p;
+ for (p = escape_start + 3; p < escape_end - 1; p++)
+ {
+ char c = *p;
+ if (c >= '0' && c <= '9')
+ value = (value << 4) + (c - '0');
+ else if (c >= 'A' && c <= 'Z')
+ value = (value << 4) + (c - 'A' + 10);
+ else if (c >= 'a' && c <= 'z')
+ value = (value << 4) + (c - 'a' + 10);
+ else
+ invalid = true;
+ if (value >= 0x110000)
+ invalid = true;
+ if (invalid)
+ break;
+ }
+ if (!invalid)
+ sbru_xprepend_unicode (buffer, value, subnode);
+ }
+ else
+ invalid = true;
+ if (invalid)
+ {
+ size_t line_number = ts_node_line_number (subnode);
+ if_error (IF_SEVERITY_WARNING,
+ logical_file_name, line_number, (size_t)(-1), false,
+ _("invalid escape sequence in string"));
+ }
+ }
+ else
+ abort ();
+ }
+ }
+ else if (ts_node_symbol (node) == ts_symbol_binary_expression
+ && ts_node_symbol (ts_node_child_by_field_id (node, ts_field_operator)) == ts_symbol_plus)
+ {
+ /* Recurse into the left and right subnodes. */
+ string_literal_accumulate_pieces (ts_node_child_by_field_id (node, ts_field_right), buffer);
+ /*string_literal_accumulate_pieces (ts_node_child_by_field_id (node, ts_field_left), buffer);*/
+ node = ts_node_child_by_field_id (node, ts_field_left);
+ goto start;
+ }
+ else
+ abort ();
+}
+
+/* Combines the pieces of a string or template_string or concatenated
+ string literal.
+ Returns a freshly allocated, mostly UTF-8 encoded string. */
+static char *
+string_literal_value (TSNode node)
+{
+ if (ts_node_symbol (node) == ts_symbol_string
+ && ts_node_named_child_count (node) == 1)
+ {
+ TSNode subnode = ts_node_named_child (node, 0);
+ if (ts_node_symbol (subnode) == ts_symbol_string_fragment)
+ {
+ /* Optimize the frequent special case of a normal string literal
+ that is non-empty and has no escape sequences. */
+ string_desc_t subnode_string =
+ sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+ (char *) contents + ts_node_start_byte (subnode));
+ return xsd_c (subnode_string);
+ }
+ }
+
+ /* The general case. */
+ struct string_buffer_reversed_unicode buffer;
+ sbru_init (&buffer);
+ string_literal_accumulate_pieces (node, &buffer);
+ return sbru_xdupfree_c (&buffer);
+}
+
+/* --------------------- Parsing and string extraction --------------------- */
+
+/* Context lookup table. */
+static flag_context_list_table_ty *flag_context_list_table;
+
+/* Maximum supported nesting depth. */
+#define MAX_NESTING_DEPTH 1000
+
+static int nesting_depth;
+
+/* The file is parsed into an abstract syntax tree. Scan the syntax tree,
+ looking for a keyword in identifier position of a call_expression or
+ macro_invocation, followed by followed by a string among the arguments.
+ When we see this pattern, we have something to remember.
+
+ Normal handling: Look for
+ keyword ( ... msgid ... )
+ Plural handling: Look for
+ keyword ( ... msgid ... msgid_plural ... )
+
+ We handle macro_invocation separately from call_expression, because in
+ a macro_invocation spaces are allowed between the identifier and the '!'
+ (i.e. 'println !' is as valid as 'println!'). Looking for 'println!'
+ would make the code more complicated.
+
+ We use recursion because the arguments before msgid or between msgid
+ and msgid_plural can contain subexpressions of the same form. */
+
+/* Forward declarations. */
+static void extract_from_node (TSNode node,
+ bool ignore,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp);
+
+/* Extracts messages from the function call consisting of
+ - CALLEE_NODE: a tree node of type 'identifier',
+ - ARGS_NODE: a tree node of type 'arguments'.
+ Extracted messages are added to MLP. */
+static void
+extract_from_function_call (TSNode callee_node,
+ TSNode args_node,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp)
+{
+ uint32_t args_count = ts_node_child_count (args_node);
+
+ string_desc_t callee_name =
+ sd_new_addr (ts_node_end_byte (callee_node) - ts_node_start_byte (callee_node),
+ (char *) contents + ts_node_start_byte (callee_node));
+
+ /* Context iterator. */
+ flag_context_list_iterator_ty next_context_iter =
+ flag_context_list_iterator (
+ flag_context_list_table_lookup (
+ flag_context_list_table,
+ sd_data (callee_name), sd_length (callee_name)));
+
+ void *keyword_value;
+ if (hash_find_entry (&keywords,
+ sd_data (callee_name), sd_length (callee_name),
+ &keyword_value)
+ == 0)
+ {
+ /* The callee has some information associated with it. */
+ const struct callshapes *next_shapes = keyword_value;
+
+ /* We have a function, named by a relevant identifier, with an argument
+ list. */
+
+ struct arglist_parser *argparser =
+ arglist_parser_alloc (mlp, next_shapes);
+
+ /* Current argument number. */
+ uint32_t arg;
+ uint32_t i;
+
+ arg = 0;
+ for (i = 0; i < args_count; i++)
+ {
+ TSNode arg_node = ts_node_child (args_node, i);
+ handle_comments (arg_node);
+ if (ts_node_is_named (arg_node)
+ && ts_node_symbol (arg_node) != ts_symbol_comment)
+ {
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ bool already_extracted = false;
+ if (is_string_literal (arg_node))
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = ts_node_line_number (arg_node);
+
+ char *string = string_literal_value (arg_node);
+
+ if (extract_all)
+ {
+ remember_a_message (mlp, NULL, string, true, false,
+ arg_region, &pos,
+ NULL, savable_comment, true);
+ already_extracted = true;
+ }
+ else
+ {
+ mixed_string_ty *mixed_string =
+ mixed_string_alloc_utf8 (string, lc_string,
+ pos.file_name, pos.line_number);
+ arglist_parser_remember (argparser, arg, mixed_string,
+ arg_region,
+ pos.file_name, pos.line_number,
+ savable_comment, true);
+ }
+ }
+
+ if (!already_extracted)
+ {
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_node,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+ }
+
+ unref_region (arg_region);
+ }
+ }
+ arglist_parser_done (argparser, arg);
+ return;
+ }
+
+ /* Recurse. */
+
+ /* Current argument number. */
+ uint32_t arg;
+ uint32_t i;
+
+ arg = 0;
+ for (i = 0; i < args_count; i++)
+ {
+ TSNode arg_node = ts_node_child (args_node, i);
+ handle_comments (arg_node);
+ if (ts_node_is_named (arg_node)
+ && ts_node_symbol (arg_node) != ts_symbol_comment)
+ {
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_node,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+
+ unref_region (arg_region);
+ }
+ }
+}
+
+/* Extracts messages in the syntax tree NODE.
+ Extracted messages are added to MLP. */
+static void
+extract_from_node (TSNode node,
+ bool ignore,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp)
+{
+ if (extract_all && !ignore && is_string_literal (node))
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = ts_node_line_number (node);
+
+ char *string = string_literal_value (node);
+
+ remember_a_message (mlp, NULL, string, true, false,
+ outer_region, &pos,
+ NULL, savable_comment, true);
+ }
+
+ if (ts_node_symbol (node) == ts_symbol_call_expression
+ && ts_node_named_child_count (node) >= 2)
+ {
+ TSNode callee_node = ts_node_named_child (node, 0);
+ /* This is the field called 'function'. */
+ if (! ts_node_eq (ts_node_child_by_field_id (node, ts_field_function),
+ callee_node))
+ abort ();
+ if (ts_node_symbol (callee_node) == ts_symbol_identifier)
+ {
+ TSNode args_node = ts_node_child_by_field_id (node, ts_field_arguments);
+ /* This is the field called 'arguments'. */
+ if (ts_node_symbol (args_node) == ts_symbol_arguments)
+ {
+ /* Handle the potential comments between 'function' and 'arguments'. */
+ {
+ uint32_t count = ts_node_child_count (node);
+ uint32_t i;
+ for (i = 0; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (node, i);
+ if (ts_node_eq (subnode, args_node))
+ break;
+ handle_comments (subnode);
+ }
+ }
+ extract_from_function_call (callee_node, args_node,
+ outer_region,
+ mlp);
+ return;
+ }
+ }
+ }
+
+ #if DEBUG_TYPESCRIPT && 0
+ if (ts_node_symbol (node) == ts_symbol_call_expression)
+ {
+ TSNode subnode = ts_node_child_by_field_id (node, ts_field_function);
+ fprintf (stderr, "-> %s\n", ts_node_string (subnode));
+ if (ts_node_symbol (subnode) == ts_symbol_identifier)
+ {
+ string_desc_t subnode_string =
+ sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+ (char *) contents + ts_node_start_byte (subnode));
+ if (sd_equals (subnode_string, sd_from_c ("gettext")))
+ {
+ TSNode argsnode = ts_node_child_by_field_id (node, ts_field_arguments);
+ fprintf (stderr, "gettext arguments: %s\n", ts_node_string (argsnode));
+ fprintf (stderr, "gettext children:\n");
+ uint32_t count = ts_node_named_child_count (node);
+ uint32_t i;
+ for (i = 0; i < count; i++)
+ fprintf (stderr, "%u -> %s\n", i, ts_node_string (ts_node_named_child (node, i)));
+ }
+ }
+ }
+ #endif
+
+ /* Recurse. */
+ if (ts_node_symbol (node) != ts_symbol_comment)
+ {
+ ignore = ignore || is_string_literal (node);
+ uint32_t count = ts_node_child_count (node);
+ uint32_t i;
+ for (i = 0; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (node, i);
+ handle_comments (subnode);
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (subnode), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (subnode,
+ ignore,
+ outer_region,
+ mlp);
+ nesting_depth--;
+ }
+ }
+}
+
+void
+EXTRACT (FILE *f,
+ const char *real_filename, const char *logical_filename,
+ flag_context_list_table_ty *flag_table,
+ msgdomain_list_ty *mdlp)
+{
+ message_list_ty *mlp = mdlp->item[0]->messages;
+
+ logical_file_name = xstrdup (logical_filename);
+
+ last_comment_line = -1;
+ last_non_comment_line = -1;
+
+ flag_context_list_table = flag_table;
+ nesting_depth = 0;
+
+ init_keywords ();
+
+ if (ts_language == NULL)
+ {
+ ts_language = TREE_SITTER_LANGUAGE ();
+ ts_symbol_comment = ts_language_symbol ("comment", true);
+ ts_symbol_string = ts_language_symbol ("string", true);
+ ts_symbol_string_fragment = ts_language_symbol ("string_fragment", true);
+ ts_symbol_escape_sequence = ts_language_symbol ("escape_sequence", true);
+ ts_symbol_template_string = ts_language_symbol ("template_string", true);
+ ts_symbol_binary_expression = ts_language_symbol ("binary_expression", true);
+ ts_symbol_identifier = ts_language_symbol ("identifier", true);
+ ts_symbol_call_expression = ts_language_symbol ("call_expression", true);
+ ts_symbol_arguments = ts_language_symbol ("arguments", true);
+ ts_symbol_plus = ts_language_symbol ("+", false);
+ ts_field_function = ts_language_field ("function");
+ ts_field_arguments = ts_language_field ("arguments");
+ ts_field_operator = ts_language_field ("operator");
+ ts_field_left = ts_language_field ("left");
+ ts_field_right = ts_language_field ("right");
+ }
+
+ /* Read the file into memory. */
+ char *contents_data;
+ size_t contents_length;
+ contents_data = read_file (real_filename, 0, &contents_length);
+ if (contents_data == NULL)
+ error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+ real_filename);
+
+ /* tree-sitter works only on files whose size fits in an uint32_t. */
+ if (contents_length > 0xFFFFFFFFUL)
+ error (EXIT_FAILURE, 0, _("file \"%s\" is unsupported because too large"),
+ real_filename);
+
+ /* TypeScript source files are usually UTF-8 encoded. */
+ if (u8_check ((uint8_t *) contents_data, contents_length) != NULL)
+ error (EXIT_FAILURE, 0,
+ _("file \"%s\" is unsupported because not UTF-8 encoded"),
+ real_filename);
+ xgettext_current_source_encoding = po_charset_utf8;
+
+ /* Create a parser. */
+ TSParser *parser = ts_parser_new ();
+
+ /* Set the parser's language. */
+ ts_parser_set_language (parser, ts_language);
+
+ /* Parse the file, producing a syntax tree. */
+ TSTree *tree = ts_parser_parse_string (parser, NULL, contents_data, contents_length);
+
+ #if DEBUG_TYPESCRIPT
+ /* For debugging: Print the tree. */
+ {
+ char *tree_as_string = ts_node_string (ts_tree_root_node (tree));
+ fprintf (stderr, "Syntax tree: %s\n", tree_as_string);
+ free (tree_as_string);
+ }
+ #endif
+
+ contents = contents_data;
+
+ extract_from_node (ts_tree_root_node (tree),
+ false,
+ null_context_region (),
+ mlp);
+
+ ts_tree_delete (tree);
+ ts_parser_delete (parser);
+ free (contents_data);
+
+ logical_file_name = NULL;
+}
--- /dev/null
+/* xgettext TypeScript backend.
+ Copyright (C) 2001-2025 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+/* Specification. */
+#include "x-typescript.h"
+
+#define NOTE_OPTION_KEYWORD x_typescript_keyword
+#define NOTE_OPTION_EXTRACT_ALL x_typescript_extract_all
+#define INIT_FLAG_TABLE init_flag_table_typescript
+#define EXTRACT extract_typescript
+#define TREE_SITTER_LANGUAGE tree_sitter_typescript
+#include "x-typescript-impl.h"
--- /dev/null
+/* xgettext TypeScript backend.
+ Copyright (C) 2002-2025 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025. */
+
+
+#include <stdio.h>
+
+#include "message.h"
+#include "xg-arglist-context.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define EXTENSIONS_TYPESCRIPT \
+ { "ts", "TypeScript" }, \
+
+#define SCANNERS_TYPESCRIPT \
+ { "TypeScript", extract_typescript, NULL, \
+ &flag_table_typescript, &formatstring_javascript, NULL }, \
+
+/* Scan a TypeScript file and add its translatable strings to mdlp. */
+extern void extract_typescript (FILE *fp, const char *real_filename,
+ const char *logical_filename,
+ flag_context_list_table_ty *flag_table,
+ msgdomain_list_ty *mdlp);
+
+extern void x_typescript_keyword (const char *keyword);
+extern void x_typescript_extract_all (void);
+
+extern void init_flag_table_typescript (void);
+
+
+#ifdef __cplusplus
+}
+#endif
--- /dev/null
+/* xgettext TSX backend.
+ Copyright (C) 2001-2025 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+/* Specification. */
+#include "x-typescriptx.h"
+
+#define NOTE_OPTION_KEYWORD x_typescriptx_keyword
+#define NOTE_OPTION_EXTRACT_ALL x_typescriptx_extract_all
+#define INIT_FLAG_TABLE init_flag_table_typescriptx
+#define EXTRACT extract_typescriptx
+#define TREE_SITTER_LANGUAGE tree_sitter_tsx
+#include "x-typescript-impl.h"
--- /dev/null
+/* xgettext TSX backend.
+ Copyright (C) 2002-2025 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025. */
+
+
+#include <stdio.h>
+
+#include "message.h"
+#include "xg-arglist-context.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define EXTENSIONS_TYPESCRIPTX \
+ { "tsx", "TSX" }, \
+
+#define SCANNERS_TYPESCRIPTX \
+ { "TSX", extract_typescriptx, NULL, \
+ &flag_table_typescriptx, &formatstring_javascript, NULL }, \
+
+/* Scan a TSX file and add its translatable strings to mdlp. */
+extern void extract_typescriptx (FILE *fp, const char *real_filename,
+ const char *logical_filename,
+ flag_context_list_table_ty *flag_table,
+ msgdomain_list_ty *mdlp);
+
+extern void x_typescriptx_keyword (const char *keyword);
+extern void x_typescriptx_extract_all (void);
+
+extern void init_flag_table_typescriptx (void);
+
+
+#ifdef __cplusplus
+}
+#endif
#include "x-java.h"
#include "x-csharp.h"
#include "x-javascript.h"
+#include "x-typescript.h"
+#include "x-typescriptx.h"
#include "x-scheme.h"
#include "x-lisp.h"
#include "x-elisp.h"
static flag_context_list_table_ty flag_table_java;
static flag_context_list_table_ty flag_table_csharp;
static flag_context_list_table_ty flag_table_javascript;
+static flag_context_list_table_ty flag_table_typescript;
+static flag_context_list_table_ty flag_table_typescriptx;
static flag_context_list_table_ty flag_table_scheme;
static flag_context_list_table_ty flag_table_lisp;
static flag_context_list_table_ty flag_table_elisp;
init_flag_table_java ();
init_flag_table_csharp ();
init_flag_table_javascript ();
+ init_flag_table_typescript ();
+ init_flag_table_typescriptx ();
init_flag_table_scheme ();
init_flag_table_lisp ();
init_flag_table_elisp ();
x_ruby_extract_all ();
x_lua_extract_all ();
x_javascript_extract_all ();
+ x_typescript_extract_all ();
+ x_typescriptx_extract_all ();
x_vala_extract_all ();
break;
x_ruby_keyword (optarg);
x_lua_keyword (optarg);
x_javascript_keyword (optarg);
+ x_typescript_keyword (optarg);
+ x_typescriptx_keyword (optarg);
x_vala_keyword (optarg);
x_desktop_keyword (optarg);
if (optarg == NULL)
printf (_("\
-L, --language=NAME recognise the specified language\n\
(C, C++, ObjectiveC, PO, Python, Java,\n\
- JavaProperties, C#, JavaScript, Scheme, Guile,\n\
- Lisp, EmacsLisp, librep, Rust, Go, Ruby, Shell,\n\
- awk, Lua, Smalltalk, Vala, Tcl, Perl, PHP,\n\
- GCC-source, YCP, NXStringTable, RST, RSJ,\n\
- Glade, GSettings, Desktop)\n"));
+ JavaProperties, C#, JavaScript, TypeScript, TSX,\n\
+ Scheme, Guile, Lisp, EmacsLisp, librep, Rust,\n\
+ Go, Ruby, Shell, awk, Lua, Smalltalk, Vala, Tcl,\n\
+ Perl, PHP, GCC-source, YCP, NXStringTable, RST,\n\
+ RSJ, Glade, GSettings, Desktop)\n"));
printf (_("\
-C, --c++ shorthand for --language=C++\n"));
printf (_("\
-a, --extract-all extract all strings\n"));
printf (_("\
(only languages C, C++, ObjectiveC, Python,\n\
- Java, C#, JavaScript, Scheme, Guile, Lisp,\n\
- EmacsLisp, librep, Rust, Go, Shell, awk, Lua,\n\
- Vala, Tcl, Perl, PHP, GCC-source, Glade,\n\
- GSettings)\n"));
+ Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
+ Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
+ awk, Lua, Vala, Tcl, Perl, PHP, GCC-source,\n\
+ Glade, GSettings)\n"));
printf (_("\
-kWORD, --keyword=WORD look for WORD as an additional keyword\n\
-k, --keyword do not to use default keywords\n"));
printf (_("\
(only languages C, C++, ObjectiveC, Python,\n\
- Java, C#, JavaScript, Scheme, Guile, Lisp,\n\
- EmacsLisp, librep, Rust, Go, Shell, awk, Lua,\n\
- Vala, Tcl, Perl, PHP, GCC-source, Glade,\n\
- GSettings, Desktop)\n"));
+ Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
+ Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
+ awk, Lua, Vala, Tcl, Perl, PHP, GCC-source,\n\
+ Glade, GSettings, Desktop)\n"));
printf (_("\
--flag=WORD:ARG:FLAG additional flag for strings inside the argument\n\
number ARG of keyword WORD\n"));
printf (_("\
(only languages C, C++, ObjectiveC, Python,\n\
- Java, C#, JavaScript, Scheme, Guile, Lisp,\n\
- EmacsLisp, librep, Rust, Go, Shell, awk, Lua,\n\
- Vala, Tcl, Perl, PHP, GCC-source, YCP)\n"));
+ Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
+ Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
+ awk, Lua, Vala, Tcl, Perl, PHP, GCC-source, YCP)\n"));
printf (_("\
--tag=WORD:FORMAT defines the behaviour of tagged template literals\n\
with tag WORD\n"));
flag_context_list_table_insert (&flag_table_javascript, XFORMAT_PRIMARY,
name_start, name_end,
argnum, value, pass);
+ flag_context_list_table_insert (&flag_table_typescript, XFORMAT_PRIMARY,
+ name_start, name_end,
+ argnum, value, pass);
+ flag_context_list_table_insert (&flag_table_typescriptx, XFORMAT_PRIMARY,
+ name_start, name_end,
+ argnum, value, pass);
break;
case format_scheme:
flag_context_list_table_insert (&flag_table_scheme, XFORMAT_PRIMARY,
SCANNERS_JAVA
SCANNERS_CSHARP
SCANNERS_JAVASCRIPT
+ SCANNERS_TYPESCRIPT
+ SCANNERS_TYPESCRIPTX
SCANNERS_SCHEME
SCANNERS_LISP
SCANNERS_ELISP
EXTENSIONS_JAVA
EXTENSIONS_CSHARP
EXTENSIONS_JAVASCRIPT
+ EXTENSIONS_TYPESCRIPT
+ EXTENSIONS_TYPESCRIPTX
EXTENSIONS_SCHEME
EXTENSIONS_LISP
EXTENSIONS_ELISP
xgettext-tcl-5 \
xgettext-tcl-stackovfl-1 xgettext-tcl-stackovfl-2 \
xgettext-tcl-stackovfl-3 xgettext-tcl-stackovfl-4 \
+ xgettext-typescript-1 xgettext-typescript-2 xgettext-typescript-3 \
+ xgettext-typescript-4 xgettext-typescript-5 xgettext-typescript-6 \
+ xgettext-typescript-7 \
xgettext-vala-1 xgettext-vala-2 xgettext-vala-3 xgettext-vala-4 \
xgettext-vala-5 xgettext-vala-6 xgettext-vala-7 \
xgettext-vala-stackovfl-1 xgettext-vala-stackovfl-2 \
#!/bin/sh
. "${srcdir=.}/init.sh"; path_prepend_ . ../src
-# Test of JavaScript JSX support.
+# Test of JavaScript E4X and JSX support.
cat <<\EOF > xg-js-6.js
class Foo extends React.Component {
}
}
+// Some E4X tests.
+
var x1 = <x1></x1>;
var s1 = _("Expected translation string #1");
var s2 = "foo";
return <a>{ 'b' }</a>;
}
var s10 = _("Expected translation string #9");
+
// Mixing JSX with template literals.
+
var s11 = 0;
var s12 = (
<div>
--- /dev/null
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of TypeScript support.
+
+cat <<\EOF > xg-ts-1.ts
+const s1: string = "Simple string, no gettext needed";
+const s2: string = _("Extract this first string");
+function foo(a: any): void {
+ const s3: string = `Prefix _(${_("Extract this second string")}) Postfix`;
+}
+const fooElement = document.getElementById("foo");
+if (fooElement && fooElement.innerHTML === _("Extract this third string")) {
+ /* _("This is a comment and must not be extracted!") */
+}
+/* TRANSLATORS: This is a translator comment. */
+gettext("Extract this fourth string");
+// TRANSLATORS: This is another translator comment.
+gettext("Extract this fifth string");
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments=TRANSLATORS: --no-location -o xg-ts-1.tmp xg-ts-1.ts 2>xg-ts-1.err
+test $? = 0 || { cat xg-ts-1.err; Exit 1; }
+func_filter_POT_Creation_Date xg-ts-1.tmp xg-ts-1.pot
+
+cat <<\EOF > xg-ts-1.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "Extract this first string"
+msgstr ""
+
+msgid "Extract this second string"
+msgstr ""
+
+msgid "Extract this third string"
+msgstr ""
+
+#. TRANSLATORS: This is a translator comment.
+msgid "Extract this fourth string"
+msgstr ""
+
+#. TRANSLATORS: This is another translator comment.
+msgid "Extract this fifth string"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-ts-1.ok xg-ts-1.pot
+result=$?
+
+exit $result
--- /dev/null
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of TypeScript support.
+# Playing with regex and division operator
+
+cat <<\EOF > xg-ts-2.ts
+// RegExp literals containing string quotes must not desync the parser
+const d: number = 1 / 2 / 4;
+const s: string = " x " + (/^\d/.exec("0815")?.[0] || "").replace(/[a-z]/g, '@');
+const s1: RegExpMatchArray | null = /"/.exec(_("RegExp test string #1"));
+const s2: RegExpMatchArray | null = /'/.exec(_("RegExp test string #2"));
+const s3: RegExpMatchArray | null = /['a-b]/.exec(_('RegExp test string #3'));
+const s4: RegExpMatchArray | null = /["a-b]/.exec(_('RegExp test string #4'));
+const s5: RegExpMatchArray | null = /[a-b']/.exec(_('RegExp test string #5'));
+const s6: RegExpMatchArray | null = /[a-b"]/.exec(_('RegExp test string #6'));
+const c: number = 35 / 2 / 8 + _( "RegExp test string #7").length / 32.0;
+const sizestr: string = Math.round(size/1024*factor)/factor+_( "RegExp test string #8");
+const cssClassType: string = attr.type.replace(/^.*\//, _('RegExp test string #9')).replace(/\./g, '-');
+const lookup: number = lookuptable[idx]/factor+_( "RegExp test string #10");
+function doit(): RegExpMatchArray | null {
+ return /\./.exec(_("RegExp test string #11"));
+}
+if (false)
+ /foo/.exec(_("RegExp test string #12"));
+else
+ /foo/.exec(_("RegExp test string #13"));
+const s7: boolean = /a\/\f\r\n\t\v\0\b\s\S\w\W\d\D\b\Bb/.test(_("RegExp test string #14"));
+const s8: RegExpExecArray | null = /(?=(a+))a*b\1/.exec(_("RegExp test string #15"));
+const s9: RegExpExecArray | null = /_\("a+"\)/.exec(_("RegExp test string #16"));
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments --no-location -o xg-ts-2.tmp xg-ts-2.ts 2>xg-ts-2.err
+test $? = 0 || { cat xg-ts-2.err; Exit 1; }
+func_filter_POT_Creation_Date xg-ts-2.tmp xg-ts-2.pot
+
+cat <<\EOF > xg-ts-2.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "RegExp test string #1"
+msgstr ""
+
+msgid "RegExp test string #2"
+msgstr ""
+
+msgid "RegExp test string #3"
+msgstr ""
+
+msgid "RegExp test string #4"
+msgstr ""
+
+msgid "RegExp test string #5"
+msgstr ""
+
+msgid "RegExp test string #6"
+msgstr ""
+
+msgid "RegExp test string #7"
+msgstr ""
+
+msgid "RegExp test string #8"
+msgstr ""
+
+msgid "RegExp test string #9"
+msgstr ""
+
+msgid "RegExp test string #10"
+msgstr ""
+
+msgid "RegExp test string #11"
+msgstr ""
+
+msgid "RegExp test string #12"
+msgstr ""
+
+msgid "RegExp test string #13"
+msgstr ""
+
+msgid "RegExp test string #14"
+msgstr ""
+
+msgid "RegExp test string #15"
+msgstr ""
+
+msgid "RegExp test string #16"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-ts-2.ok xg-ts-2.pot
+result=$?
+
+exit $result
--- /dev/null
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of TypeScript support: string concatenation,
+# strings with embedded expressions.
+
+cat <<\EOF > xg-ts-3.ts
+let s0: string;
+let s1: string = _("Concatenation #1 " + "- String part added");
+let s2: string = _('Concatenation #2 ' + '- String part added');
+
+// a
+let s3: string = // b
+ _("This" + " whole " // c
+ + "string" + // d
+ ' should' + " be " + 'extracted');
+
+// Strings with embedded expressions, a.k.a. template literals.
+let t: string = "";
+let e1: string = _(`embedded_1_${foo}_bar`);
+let e2: string = _(`embedded_2_${_("embedded_2_sub1")}_bar_${_('embedded_2_sub2')}_baz`);
+let e3: string = _(`embedded_3`);
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments --no-location -o xg-ts-3.tmp xg-ts-3.ts 2>xg-ts-3.err
+test $? = 0 || { cat xg-ts-3.err; Exit 1; }
+func_filter_POT_Creation_Date xg-ts-3.tmp xg-ts-3.pot
+
+cat <<\EOF > xg-ts-3.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "Concatenation #1 - String part added"
+msgstr ""
+
+msgid "Concatenation #2 - String part added"
+msgstr ""
+
+#. a
+#. b
+msgid "This whole string should be extracted"
+msgstr ""
+
+msgid "embedded_2_sub1"
+msgstr ""
+
+msgid "embedded_2_sub2"
+msgstr ""
+
+msgid "embedded_3"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-ts-3.ok xg-ts-3.pot
+result=$?
+
+exit $result
--- /dev/null
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of TypeScript escape sequences in string literals.
+
+cat <<\EOF > xg-ts-4.ts
+const s1: string = _("Unicode escape \u3042");
+const s2: string = _("Surrogate pair \uD835\uDC9C");
+const s3: string = _("Escape sequence \1411 \x622");
+const s4: string = _("Invalid escape sequence \xxx \y");
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments --no-location -o xg-ts-4.tmp xg-ts-4.ts 2>xg-ts-4.err
+test $? = 0 || { cat xg-ts-4.err; Exit 1; }
+func_filter_POT_Creation_Date xg-ts-4.tmp xg-ts-4.pot
+
+cat <<\EOF > xg-ts-4.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "Unicode escape あ"
+msgstr ""
+
+msgid "Surrogate pair 𝒜"
+msgstr ""
+
+msgid "Escape sequence a1 b2"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-ts-4.ok xg-ts-4.pot
+result=$?
+
+exit $result
--- /dev/null
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of TypeScript Unicode support.
+
+cat <<\EOF > xg-ts-5.ts
+// The following excerpt is adapted from json2.js
+const cx: RegExp = /[\u0000\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g;
+const escapable: RegExp = /[\\\"\x00-\x1f\x7f-\x9f\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g;
+let gap: string | undefined;
+const txt1: string = _("Expected translation string #1");
+let indent: string | undefined;
+const meta: Record<string, string> = {
+ '\b': '\\b',
+ '\t': '\\t',
+ '\n': '\\n',
+ '\f': '\\f',
+ '\r': '\\r',
+ '"': '\\"',
+ '\\': '\\\\' + _("Expected translation string #2")
+};
+const txt2: string = _("Expected translation string #3");
+let rep: any;
+const matched: string = curnodepath.match(new RegExp(`^\\${path}\/([\\w\\s]+)`))
+ + _("Expected translation string #4");
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments --no-location -o xg-ts-5.tmp xg-ts-5.ts 2>xg-ts-5.err
+test $? = 0 || { cat xg-ts-5.err; Exit 1; }
+func_filter_POT_Creation_Date xg-ts-5.tmp xg-ts-5.pot
+
+cat <<\EOF > xg-ts-5.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "Expected translation string #1"
+msgstr ""
+
+msgid "Expected translation string #2"
+msgstr ""
+
+msgid "Expected translation string #3"
+msgstr ""
+
+msgid "Expected translation string #4"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-ts-5.ok xg-ts-5.pot
+result=$?
+
+exit $result
--- /dev/null
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of TypeScript JSX support.
+
+cat <<\EOF > xg-ts-6.tsx
+import React from 'react';
+type FooProps = {};
+type FooState = {};
+class Foo extends React.Component<FooProps, FooState> {
+ render() {
+ return <div className="aClass" />;
+ }
+}
+type BarProps = {};
+type BarState = {};
+class Bar extends React.Component<BarProps, BarState> {
+ render() {
+ return (
+ <div>
+ <span className="someClass" />
+ { gettext('Expected translation string #0') }
+ </div>
+ );
+ }
+}
+const x1 = <x1></x1>;
+const s1: string = _("Expected translation string #1");
+const s2: string = "foo";
+const x2 = React.createElement(s2 as any, null, `foo ${s2} bar`);
+const x3 = (
+ <x3 a1="/">
+ <x4>{_("Expected translation string #2")}</x4>
+ </x3>
+);
+const x4 = (
+ <x5 a2="/">
+ {React.createElement(_("Expected translation string #3") as any)}
+ </x5>
+);
+const s9: string = _("Expected translation string #8");
+function fooFunction() {
+ return <a>{'b'}</a>;
+}
+const s10: string = _("Expected translation string #9");
+// Mixing JSX with template literals.
+const s11 = 0;
+const s12 = (
+ <div>
+ {_("Expected translation string #10")}
+ {`${_("Expected translation string #11")}`}
+ {_("Expected translation string #12")}
+ </div>
+);
+const s13: string = _("Expected translation string #13");
+const s14 = <div className={`${_("Expected translation string #14")}`} />;
+const s15: string = _("Expected translation string #15");
+const s16 = { a: 1, b: <div className={`${_("Expected translation string #16")}`} /> };
+const s17: string = _("Expected translation string #17");
+const s18 = `begin${<div>{_("Expected translation string #18")}</div>}end`;
+const s19: string = _("Expected translation string #19");
+const s20 = () => (
+ <Foo
+ a1={_("Expected translation string #20")}
+ a2={foo && <div>{_("Expected translation string #21")}</div>}
+ a3={_("Expected translation string #22")}
+ />
+);
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments --no-location -o xg-ts-6.tmp xg-ts-6.tsx 2>xg-ts-6.err
+test $? = 0 || { cat xg-ts-6.err; Exit 1; }
+func_filter_POT_Creation_Date xg-ts-6.tmp xg-ts-6.pot
+
+cat <<\EOF > xg-ts-6.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "Expected translation string #0"
+msgstr ""
+
+msgid "Expected translation string #1"
+msgstr ""
+
+msgid "Expected translation string #2"
+msgstr ""
+
+msgid "Expected translation string #3"
+msgstr ""
+
+msgid "Expected translation string #8"
+msgstr ""
+
+msgid "Expected translation string #9"
+msgstr ""
+
+msgid "Expected translation string #10"
+msgstr ""
+
+msgid "Expected translation string #11"
+msgstr ""
+
+msgid "Expected translation string #12"
+msgstr ""
+
+msgid "Expected translation string #13"
+msgstr ""
+
+msgid "Expected translation string #14"
+msgstr ""
+
+msgid "Expected translation string #15"
+msgstr ""
+
+msgid "Expected translation string #16"
+msgstr ""
+
+msgid "Expected translation string #17"
+msgstr ""
+
+msgid "Expected translation string #18"
+msgstr ""
+
+msgid "Expected translation string #19"
+msgstr ""
+
+msgid "Expected translation string #20"
+msgstr ""
+
+msgid "Expected translation string #21"
+msgstr ""
+
+msgid "Expected translation string #22"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-ts-6.ok xg-ts-6.pot
+result=$?
+
+exit $result
--- /dev/null
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of TypeScript template literal support.
+
+cat <<\EOF > xg-ts-7.ts
+const s0: string = _(`A template literal without substitutions`);
+const s1: string = _(`A template literal with
+embedded
+newlines`);
+const s2: string = _(`A template literal with ${n} substitutions`);
+const s3: string = _(`A template literal with several substitutions: ${a} and ${b} and ${c} and so on`);
+const s4: string = `/${looks_like_regex}`;
+const s5: string = _('not part of a regex');
+const s6: string = `that's a valid string. ` + _('This too');
+const s7: string = _(tag`A template literal with a tag`);
+const s8: string = `a${`b${`c`+d}`}e`;
+const s9: string = _("a normal string");
+const s10: string = `abc${foo({}, _('should be extracted'))}xyz`;
+const f1 = function (): string {
+ return _("first normal string") + `${foo}` + _("second normal string");
+};
+const s11: string = _("another normal string");
+const s12: { property: string } = { property: `A template literal with ${n} substitution` };
+const s13: string = _("yet another normal string");
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --add-comments --no-location -o xg-ts-7.tmp xg-ts-7.ts 2>xg-ts-7.err
+test $? = 0 || { cat xg-ts-7.err; Exit 1; }
+func_filter_POT_Creation_Date xg-ts-7.tmp xg-ts-7.pot
+
+cat <<\EOF > xg-ts-7.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "A template literal without substitutions"
+msgstr ""
+
+msgid ""
+"A template literal with\n"
+"embedded\n"
+"newlines"
+msgstr ""
+
+msgid "not part of a regex"
+msgstr ""
+
+msgid "This too"
+msgstr ""
+
+msgid "a normal string"
+msgstr ""
+
+msgid "should be extracted"
+msgstr ""
+
+msgid "first normal string"
+msgstr ""
+
+msgid "second normal string"
+msgstr ""
+
+msgid "another normal string"
+msgstr ""
+
+msgid "yet another normal string"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-ts-7.ok xg-ts-7.pot
+result=$?
+
+exit $result