From: Bruno Haible Date: Tue, 11 Mar 2025 08:55:56 +0000 (+0100) Subject: Add TypeScript support. X-Git-Tag: v0.25~86 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=4fa2cffa9430d14cca4924ed235ff0d14742b563;p=thirdparty%2Fgettext.git Add TypeScript support. * autopull.sh: Check out tree-sitter-typescript. Set TREE_SITTER_TYPESCRIPT_VERSION. * gettext-tools/build-aux/tree-sitter-typescript-portability.diff: New file. * gettext-tools/configure.ac: Set TREE_SITTER_TYPESCRIPT_VERSION. * gettext-tools/Makefile.am (EXTRA_DIST): Add the tree-sitter-typescript source code and patch. * gettext-tools/doc/lang-typescript.texi: New file. * gettext-tools/doc/Makefile.am (gettext_TEXINFOS): Add it. * gettext-tools/doc/gettext.texi (No string concatenation): Mention string concatenation in TypeScript. (List of Programming Languages): Include lang-typescript.texi. * gettext-tools/doc/xgettext.texi: Document the -L TypeScript and -L TSX options. * gettext-tools/src/x-typescript-impl.h: New file. * gettext-tools/src/x-typescript.h: New file. * gettext-tools/src/x-typescript.c: New file. * gettext-tools/src/x-typescriptx.h: New file. * gettext-tools/src/x-typescriptx.c: New file. * gettext-tools/src/xgettext.c: Include x-typescript.h, x-typescriptx.h. (flag_table_typescript, flag_table_typescriptx): New variables. (main): Invoke init_flag_table_typescript, init_flag_table_typescriptx, x_typescript_extract_all, x_typescriptx_extract_all, x_typescript_keyword, x_typescriptx_keyword. (usage): Document the -L TypeScript and -L TSX options. (xgettext_record_flag): Support format_javascript with TypeScript and TSX. (language_to_extractor, extension_to_language): Support TypeScript and TSX. * gettext-tools/src/FILES: Mention x-typescript.h, x-typescript.c, x-typescriptx.h, x-typescriptx.c, x-typescript-impl.h. * gettext-tools/src/Makefile.am (noinst_LIBRARIES): Reference LIBXGETTEXTTS. (noinst_HEADERS): Add x-typescript.h, x-typescriptx.h, x-typescript-impl.h. (LIBXGETTEXTTS): Add libxgettextts3.a, libxgettextts4.a. (libxgettextts3_a_SOURCES, libxgettextts3_a_CPPFLAGS): New variables. (libxgettextts4_a_SOURCES, libxgettextts4_a_CPPFLAGS): New variables. (xgettext_SOURCES): Add x-typescript.c, x-typescriptx.c. * gettext-tools/tests/xgettext-javascript-6: Improve comments. * gettext-tools/tests/xgettext-typescript-1: New file, based on gettext-tools/tests/xgettext-javascript-1. * gettext-tools/tests/xgettext-typescript-2: New file, based on gettext-tools/tests/xgettext-javascript-2. * gettext-tools/tests/xgettext-typescript-3: New file, based on gettext-tools/tests/xgettext-javascript-3. * gettext-tools/tests/xgettext-typescript-4: New file, based on gettext-tools/tests/xgettext-javascript-4. * gettext-tools/tests/xgettext-typescript-5: New file, based on gettext-tools/tests/xgettext-javascript-5. * gettext-tools/tests/xgettext-typescript-6: New file, based on gettext-tools/tests/xgettext-javascript-6. * gettext-tools/tests/xgettext-typescript-7: New file, based on gettext-tools/tests/xgettext-javascript-7. * gettext-tools/tests/Makefile.am (TESTS): Add the new tests. * NEWS: Mention the TypeScript support. --- diff --git a/.gitignore b/.gitignore index a33f80400..728d7d6bd 100644 --- a/.gitignore +++ b/.gitignore @@ -954,6 +954,8 @@ core /gettext-tools/src/gettext.res /gettext-tools/src/libxgettextts1.a /gettext-tools/src/libxgettextts2.a +/gettext-tools/src/libxgettextts3.a +/gettext-tools/src/libxgettextts4.a /gettext-tools/src/textstyle.h /gettext-tools/src/textstyle/stdbool.h /gettext-tools/src/textstyle/version.h diff --git a/NEWS b/NEWS index 3d81292ba..8e55d85e9 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,9 @@ Version 0.25 - March 2025 - 'msgfmt -c' now verifies the syntax of translations of Go format strings. - New examples 'hello-go' and 'hello-go-http' have been added. + * TypeScript: + - xgettext now supports TypeScript and TSX (= TypeScript with JSX + extensions). Version 0.24 - February 2025 diff --git a/autopull.sh b/autopull.sh index 2b39f66c3..aea517eee 100755 --- a/autopull.sh +++ b/autopull.sh @@ -88,6 +88,7 @@ func_git_clone_shallow () TREE_SITTER_VERSION=0.23.2 TREE_SITTER_GO_VERSION=0.23.4 TREE_SITTER_RUST_VERSION=0.23.2 +TREE_SITTER_TYPESCRIPT_VERSION=0.23.2 # Cache the relevant source code. Erase the rest of the tree-sitter projects. test -d gettext-tools/tree-sitter-$TREE_SITTER_VERSION || { func_git_clone_shallow tree-sitter https://github.com/tree-sitter/tree-sitter.git v$TREE_SITTER_VERSION @@ -116,10 +117,28 @@ test -d gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION || { mv gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION/src/scanner.c gettext-tools/tree-sitter-rust-$TREE_SITTER_RUST_VERSION/src/rust-scanner.c rm -rf tree-sitter-rust } +test -d gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION || { + func_git_clone_shallow tree-sitter-typescript https://github.com/tree-sitter/tree-sitter-typescript.git v$TREE_SITTER_TYPESCRIPT_VERSION + (cd tree-sitter-typescript && patch -p1) < gettext-tools/build-aux/tree-sitter-typescript-portability.diff + mkdir gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION + mkdir gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/common + mkdir gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript + mkdir gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx + mv tree-sitter-typescript/LICENSE gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/LICENSE + mv tree-sitter-typescript/common/scanner.h gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/common/scanner.h + mv tree-sitter-typescript/typescript/src gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript/src + mv tree-sitter-typescript/tsx/src gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src + mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript/src/parser.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript/src/ts-parser.c + mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript/src/scanner.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/typescript/src/ts-scanner.c + mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/parser.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/tsx-parser.c + mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/scanner.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/tsx-scanner.c + rm -rf tree-sitter-typescript +} cat > gettext-tools/tree-sitter.cfg <. AC_SUBST([TREE_SITTER_VERSION]) AC_SUBST([TREE_SITTER_GO_VERSION]) AC_SUBST([TREE_SITTER_RUST_VERSION]) +AC_SUBST([TREE_SITTER_TYPESCRIPT_VERSION]) PACKAGE_SUFFIX="-$ARCHIVE_VERSION" AC_SUBST([PACKAGE_SUFFIX]) diff --git a/gettext-tools/doc/Makefile.am b/gettext-tools/doc/Makefile.am index 8d975c673..1dfb89457 100644 --- a/gettext-tools/doc/Makefile.am +++ b/gettext-tools/doc/Makefile.am @@ -64,6 +64,7 @@ gettext_TEXINFOS = \ lang-java.texi \ lang-csharp.texi \ lang-javascript.texi \ + lang-typescript.texi \ lang-scheme.texi \ lang-lisp.texi \ lang-clisp-c.texi \ diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index edc27dc7d..4cb5e9692 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -432,6 +432,7 @@ Individual Programming Languages * Java:: Java * C#:: C# * JavaScript:: JavaScript +* TypeScript:: TypeScript * Scheme:: GNU guile - Scheme * Common Lisp:: GNU clisp - Common Lisp * clisp C:: GNU clisp C sources @@ -2323,6 +2324,7 @@ at runtime (or possibly at compile time, if the compiler supports that). @cindex Java, string concatenation @cindex C#, string concatenation @cindex JavaScript, string concatenation +@cindex TypeScript, string concatenation @cindex Go, string concatenation @cindex Ruby, string concatenation @cindex Shell, string concatenation @@ -2347,7 +2349,7 @@ In Java, string concatenation is denoted by the @samp{+} operator. In C#, string concatenation is denoted by the @samp{+} operator. @c Reference: https://learn.microsoft.com/en-us/dotnet/csharp/how-to/concatenate-multiple-strings @item -In JavaScript, string concatenation is denoted by the @samp{+} operator. +In JavaScript and TypeScript, string concatenation is denoted by the @samp{+} operator. @c Reference: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Addition @item In Go, string concatenation is denoted by the @samp{+} operator. @@ -10426,6 +10428,7 @@ that language, and to combine the resulting files using @code{msgcat}. * Java:: Java * C#:: C# * JavaScript:: JavaScript +* TypeScript:: TypeScript * Scheme:: GNU guile - Scheme * Common Lisp:: GNU clisp - Common Lisp * clisp C:: GNU clisp C sources @@ -10455,6 +10458,7 @@ that language, and to combine the resulting files using @code{msgcat}. @include lang-java.texi @include lang-csharp.texi @include lang-javascript.texi +@include lang-typescript.texi @include lang-scheme.texi @include lang-lisp.texi @include lang-clisp-c.texi diff --git a/gettext-tools/doc/lang-typescript.texi b/gettext-tools/doc/lang-typescript.texi new file mode 100644 index 000000000..5654f4637 --- /dev/null +++ b/gettext-tools/doc/lang-typescript.texi @@ -0,0 +1,64 @@ +@c This file is part of the GNU gettext manual. +@c Copyright (C) 1995-2025 Free Software Foundation, Inc. +@c See the file gettext.texi for copying conditions. + +@node TypeScript +@subsection TypeScript and TSX + +@table @asis +@item RPMs +js + +@item Ubuntu packages +gjs + +@item File extension +@code{ts} for TypeScript, @code{tsx} for TSX (TypeScript with JSX) + +@item String syntax +@itemize @bullet + +@item @code{"abc"} + +@item @code{'abc'} + +@item @code{`abc`} + +@end itemize + +@item gettext shorthand +@code{_("abc")} + +@item gettext/ngettext functions +@code{gettext}, @code{dgettext}, @code{dcgettext}, @code{ngettext}, +@code{dngettext} + +@item textdomain +@code{textdomain} function + +@item bindtextdomain +@code{bindtextdomain} function + +@item setlocale +automatic + +@item Prerequisite +unknown + +@item Use or emulate GNU gettext +use, or emulate + +@item Extractor +@code{xgettext} + +@item Formatting with positions +A @code{format} method on strings can be used. +But since it is not standard in TypeScript, +you have to enable it yourself. @c TODO How? + +@item Portability +On platforms without gettext, the functions are not available. + +@item po-mode marking +--- +@end table diff --git a/gettext-tools/doc/xgettext.texi b/gettext-tools/doc/xgettext.texi index b6b4e9801..cf49bfd06 100644 --- a/gettext-tools/doc/xgettext.texi +++ b/gettext-tools/doc/xgettext.texi @@ -79,6 +79,7 @@ Specifies the language of the input files. The supported languages are @code{Java}, @code{JavaProperties}, @code{C#}, @code{JavaScript}, +@code{TypeScript}, @code{TSX}, @code{Scheme}, @code{Guile}, @code{Lisp}, @code{EmacsLisp}, @@ -314,6 +315,7 @@ Python, Java, C#, JavaScript, +TypeScript, TSX, Scheme, Guile, Lisp, EmacsLisp, @@ -403,7 +405,8 @@ For Lua: @code{_}, @code{gettext.gettext}, @code{gettext.dgettext:2}, @code{gettext.dngettext:2,3}, @code{gettext.dcngettext:2,3}. @item -For JavaScript: @code{_}, @code{gettext}, @code{dgettext:2}, +For JavaScript, TypeScript, TSX: +@code{_}, @code{gettext}, @code{dgettext:2}, @code{dcgettext:2}, @code{ngettext:1,2}, @code{dngettext:2,3}, @code{pgettext:1c,2}, @code{dpgettext:2c,3}. @@ -471,6 +474,7 @@ Python, Java, C#, JavaScript, +TypeScript, TSX, Scheme, Guile, Lisp, EmacsLisp, diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES index e7f99d19f..053e9fc41 100644 --- a/gettext-tools/src/FILES +++ b/gettext-tools/src/FILES @@ -374,6 +374,14 @@ msgl-check.c | x-javascript.h | x-javascript.c | String extractor for JavaScript. +| x-typescript.h +| x-typescript.c +| x-typescript-impl.h +| String extractor for TypeScript. +| x-typescriptx.h +| x-typescriptx.c +| x-typescript-impl.h +| String extractor for TSX. | x-scheme.h | x-scheme.c | String extractor for Scheme. diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 12285c3dc..51c9efd52 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -41,7 +41,7 @@ else noinst_LTLIBRARIES = libgettextsrc.la endif -noinst_LIBRARIES = libxgettextts1.a libxgettextts2.a +noinst_LIBRARIES = $(LIBXGETTEXTTS) noinst_HEADERS = \ pos.h message.h po-error.h xerror-handler.h po-xerror.h \ @@ -76,6 +76,9 @@ noinst_HEADERS = \ x-java.h \ x-csharp.h \ x-javascript.h \ + x-typescript.h \ + x-typescriptx.h \ + x-typescript-impl.h \ x-scheme.h \ x-lisp.h \ x-elisp.h \ @@ -240,7 +243,7 @@ libgettextsrc_la_SOURCES = \ search-path.c # xgettext has some tree-sitter based backends. -LIBXGETTEXTTS = libxgettextts2.a libxgettextts1.a +LIBXGETTEXTTS = libxgettextts2.a libxgettextts3.a libxgettextts4.a libxgettextts1.a libxgettextts1_a_SOURCES = \ ../tree-sitter-$(TREE_SITTER_VERSION)/lib/src/lib.c libxgettextts1_a_CPPFLAGS = \ @@ -252,6 +255,18 @@ libxgettextts2_a_SOURCES = \ ../tree-sitter-go-$(TREE_SITTER_GO_VERSION)/src/go-parser.c libxgettextts2_a_CPPFLAGS = \ -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include +libxgettextts3_a_SOURCES = \ + ../tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src/ts-parser.c \ + ../tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src/ts-scanner.c +libxgettextts3_a_CPPFLAGS = \ + -I$(top_srcdir)/tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/typescript/src \ + -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include +libxgettextts4_a_SOURCES = \ + ../tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tsx-parser.c \ + ../tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tsx-scanner.c +libxgettextts4_a_CPPFLAGS = \ + -I$(top_srcdir)/tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src \ + -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include # msggrep needs pattern matching. LIBGREP = ../libgrep/libgrep.a @@ -294,6 +309,8 @@ xgettext_SOURCES += \ x-java.c \ x-csharp.c \ x-javascript.c \ + x-typescript.c \ + x-typescriptx.c \ x-scheme.c \ x-lisp.c \ x-elisp.c \ diff --git a/gettext-tools/src/x-typescript-impl.h b/gettext-tools/src/x-typescript-impl.h new file mode 100644 index 000000000..56b886fa9 --- /dev/null +++ b/gettext-tools/src/x-typescript-impl.h @@ -0,0 +1,1043 @@ +/* xgettext TypeScript and TSX backends. + Copyright (C) 2001-2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2025. */ + +/* The languages TypeScript and TSX (= TypeScript with JSX) are very similar. + The extractor code is therefore nearly identical. */ + +#include +#include +#include +#include +#include +#include + +#include +#include "message.h" +#include "string-desc.h" +#include "xstring-desc.h" +#include "string-buffer-reversed.h" +#include "xgettext.h" +#include "xg-pos.h" +#include "xg-mixed-string.h" +#include "xg-arglist-context.h" +#include "xg-arglist-callshape.h" +#include "xg-arglist-parser.h" +#include "xg-message.h" +#include "if-error.h" +#include "xalloc.h" +#include "read-file.h" +#include "unistr.h" +#include "po-charset.h" +#include "gettext.h" + +#define _(s) gettext(s) + +/* Use tree-sitter. + Documentation: */ +#include +extern const TSLanguage *TREE_SITTER_LANGUAGE (void); + + +/* The TypeScript syntax is defined in https://www.typescriptlang.org/docs/. */ + +#define DEBUG_TYPESCRIPT 0 + + +/* ====================== Keyword set customization. ====================== */ + +/* If true extract all strings. */ +static bool extract_all = false; + +static hash_table keywords; +static bool default_keywords = true; + + +void +NOTE_OPTION_EXTRACT_ALL () +{ + extract_all = true; +} + + +void +NOTE_OPTION_KEYWORD (const char *name) +{ + if (name == NULL) + default_keywords = false; + else + { + const char *end; + struct callshape shape; + const char *colon; + + if (keywords.table == NULL) + hash_init (&keywords, 100); + + split_keywordspec (name, &end, &shape); + + /* The characters between name and end should form a valid identifier. + A colon means an invalid parse in split_keywordspec(). */ + colon = strchr (name, ':'); + if (colon == NULL || colon >= end) + insert_keyword_callshape (&keywords, name, end - name, &shape); + } +} + +/* Finish initializing the keywords hash table. + Called after argument processing, before each file is processed. */ +static void +init_keywords () +{ + if (default_keywords) + { + /* Same as in x-javascript.c. */ + /* When adding new keywords here, also update the documentation in + xgettext.texi! */ + NOTE_OPTION_KEYWORD ("gettext"); + NOTE_OPTION_KEYWORD ("dgettext:2"); + NOTE_OPTION_KEYWORD ("dcgettext:2"); + NOTE_OPTION_KEYWORD ("ngettext:1,2"); + NOTE_OPTION_KEYWORD ("dngettext:2,3"); + NOTE_OPTION_KEYWORD ("pgettext:1c,2"); + NOTE_OPTION_KEYWORD ("dpgettext:2c,3"); + NOTE_OPTION_KEYWORD ("_"); + default_keywords = false; + } +} + +void +INIT_FLAG_TABLE () +{ + /* Same as in x-javascript.c. */ + xgettext_record_flag ("gettext:1:pass-javascript-format"); + xgettext_record_flag ("dgettext:2:pass-javascript-format"); + xgettext_record_flag ("dcgettext:2:pass-javascript-format"); + xgettext_record_flag ("ngettext:1:pass-javascript-format"); + xgettext_record_flag ("ngettext:2:pass-javascript-format"); + xgettext_record_flag ("dngettext:2:pass-javascript-format"); + xgettext_record_flag ("dngettext:3:pass-javascript-format"); + xgettext_record_flag ("pgettext:2:pass-javascript-format"); + xgettext_record_flag ("dpgettext:3:pass-javascript-format"); + xgettext_record_flag ("_:1:pass-javascript-format"); +} + + +/* ======================== Parsing via tree-sitter. ======================== */ +/* To understand this code, look at + tree-sitter-typescript/typescript/src/node-types.json + and + tree-sitter-typescript/typescript/src/grammar.json + */ + +/* The tree-sitter's language object. */ +static const TSLanguage *ts_language; + +/* ------------------------- Node types and symbols ------------------------- */ + +static TSSymbol ts_language_symbol (const char *name, bool is_named) +{ + TSSymbol result = + ts_language_symbol_for_name (ts_language, name, strlen (name), is_named); + if (result == 0) + /* If we get here, the grammar has evolved in an incompatible way. */ + abort (); + return result; +} + +static TSFieldId ts_language_field (const char *name) +{ + TSFieldId result = + ts_language_field_id_for_name (ts_language, name, strlen (name)); + if (result == 0) + /* If we get here, the grammar has evolved in an incompatible way. */ + abort (); + return result; +} + +/* Optimization: + Instead of + strcmp (ts_node_type (node), "string_literal") == 0 + it is faster to do + ts_node_symbol (node) == ts_symbol_string_literal + */ +static TSSymbol ts_symbol_comment; +static TSSymbol ts_symbol_string; +static TSSymbol ts_symbol_string_fragment; +static TSSymbol ts_symbol_escape_sequence; +static TSSymbol ts_symbol_template_string; +static TSSymbol ts_symbol_binary_expression; +static TSSymbol ts_symbol_identifier; +static TSSymbol ts_symbol_call_expression; +static TSSymbol ts_symbol_arguments; +static TSSymbol ts_symbol_plus; /* + */ +static TSFieldId ts_field_function; +static TSFieldId ts_field_arguments; +static TSFieldId ts_field_operator; +static TSFieldId ts_field_left; +static TSFieldId ts_field_right; + +static inline size_t +ts_node_line_number (TSNode node) +{ + return ts_node_start_point (node).row + 1; +} + +/* -------------------------------- The file -------------------------------- */ + +/* The entire contents of the file being analyzed. */ +static const char *contents; + +/* -------------------------------- Comments -------------------------------- */ + +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; + +/* Saves a comment line. */ +static void save_comment_line (string_desc_t gist) +{ + /* Remove leading whitespace. */ + while (sd_length (gist) > 0 + && (sd_char_at (gist, 0) == ' ' + || sd_char_at (gist, 0) == '\t')) + gist = sd_substring (gist, 1, sd_length (gist)); + /* Remove trailing whitespace. */ + size_t len = sd_length (gist); + while (len > 0 + && (sd_char_at (gist, len - 1) == ' ' + || sd_char_at (gist, len - 1) == '\t')) + len--; + gist = sd_substring (gist, 0, len); + savable_comment_add (sd_c (gist)); +} + +/* Does the comment handling for NODE. + Updates savable_comment, last_comment_line, last_non_comment_line. + It is important that this function gets called + - for each node (not only the named nodes!), + - in depth-first traversal order. */ +static void handle_comments (TSNode node) +{ + #if DEBUG_TYPESCRIPT && 0 + fprintf (stderr, "LCL=%d LNCL=%d node=[%s]|%s|\n", last_comment_line, last_non_comment_line, ts_node_type (node), ts_node_string (node)); + #endif + if (last_comment_line < last_non_comment_line + && last_non_comment_line < ts_node_line_number (node)) + /* We have skipped over a newline. This newline terminated a line + with non-comment tokens, after the last comment line. */ + savable_comment_reset (); + + if (ts_node_symbol (node) == ts_symbol_comment) + { + string_desc_t entire = + sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node), + (char *) contents + ts_node_start_byte (node)); + /* It should either start with two slashes... */ + if (sd_length (entire) >= 2 + && sd_char_at (entire, 0) == '/' + && sd_char_at (entire, 1) == '/') + { + save_comment_line (sd_substring (entire, 2, sd_length (entire))); + last_comment_line = ts_node_end_point (node).row + 1; + } + /* ... or it should start and end with the C comment markers. */ + else if (sd_length (entire) >= 4 + && sd_char_at (entire, 0) == '/' + && sd_char_at (entire, 1) == '*' + && sd_char_at (entire, sd_length (entire) - 2) == '*' + && sd_char_at (entire, sd_length (entire) - 1) == '/') + { + string_desc_t gist = sd_substring (entire, 2, sd_length (entire) - 2); + /* Split into lines. + Remove leading and trailing whitespace from each line. */ + for (;;) + { + ptrdiff_t nl_index = sd_index (gist, '\n'); + if (nl_index >= 0) + { + save_comment_line (sd_substring (gist, 0, nl_index)); + gist = sd_substring (gist, nl_index + 1, sd_length (gist)); + } + else + { + save_comment_line (gist); + break; + } + } + last_comment_line = ts_node_end_point (node).row + 1; + } + else + abort (); + } + else + last_non_comment_line = ts_node_line_number (node); +} + +/* --------------------- string_buffer_reversed_unicode --------------------- */ + +/* This type is like string_buffer_reversed with mostly UTF-8 contents, except + that it also handles Unicode surrogates: The combination of a low and a high + surrogate is converted to a single Unicode code point, whereas lone + surrogates are converted to U+FFFD (like 'struct mixed_string_buffer' does). + */ +struct string_buffer_reversed_unicode +{ + struct string_buffer_reversed sbr; + /* The second half of an UTF-16 surrogate character. */ + unsigned short utf16_surr; + /* Its line number. */ + size_t utf16_surr_line_number; +}; + +/* Initializes a 'struct string_buffer_reversed_unicode'. */ +static inline void +sbru_init (struct string_buffer_reversed_unicode *buffer) +{ + sbr_init (&buffer->sbr); + buffer->utf16_surr = 0; +} + +/* Auxiliary function: Handle the attempt to prepend a lone surrogate to + BUFFER. */ +static void +sbru_prepend_lone_surrogate (struct string_buffer_reversed_unicode *buffer, + ucs4_t uc, size_t line_number) +{ + /* A half surrogate is invalid, therefore use U+FFFD instead. + It may be valid in a particular programming language. + But a half surrogate is invalid in UTF-8: + - RFC 3629 says + "The definition of UTF-8 prohibits encoding character + numbers between U+D800 and U+DFFF". + - Unicode 4.0 chapter 3 + + section 3.9, p.77, says + "Because surrogate code points are not Unicode scalar + values, any UTF-8 byte sequence that would otherwise + map to code points D800..DFFF is ill-formed." + and in table 3-6, p. 78, does not mention D800..DFFF. + - The unicode.org FAQ question "How do I convert an unpaired + UTF-16 surrogate to UTF-8?" has the answer + "By representing such an unpaired surrogate on its own + as a 3-byte sequence, the resulting UTF-8 data stream + would become ill-formed." + So use U+FFFD instead. */ + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("lone surrogate U+%04X"), uc); + string_desc_t fffd = /* U+FFFD in UTF-8 encoding. */ + sd_new_addr (3, (char *) "\357\277\275"); + sbr_xprepend_desc (&buffer->sbr, fffd); +} + +/* Auxiliary function: Flush buffer->utf16_surr into buffer->sbr. */ +static inline void +sbru_flush_utf16_surr (struct string_buffer_reversed_unicode *buffer) +{ + if (buffer->utf16_surr != 0) + { + sbru_prepend_lone_surrogate (buffer, + buffer->utf16_surr, + buffer->utf16_surr_line_number); + buffer->utf16_surr = 0; + } +} + +/* Prepends the character C to BUFFER. */ +static void +sbru_xprepend1 (struct string_buffer_reversed_unicode *buffer, char c) +{ + sbru_flush_utf16_surr (buffer); + sbr_xprepend1 (&buffer->sbr, c); +} + +/* Prepends the contents of the memory area S to BUFFER. */ +static void +sbru_xprepend_desc (struct string_buffer_reversed_unicode *buffer, + string_desc_t s) +{ + sbru_flush_utf16_surr (buffer); + sbr_xprepend_desc (&buffer->sbr, s); +} + +/* Prepends a Unicode code point C to BUFFER. */ +static void +sbru_xprepend_unicode (struct string_buffer_reversed_unicode *buffer, + ucs4_t c, TSNode node) +{ + /* Test whether this character and the previous one form a Unicode + surrogate character pair. */ + if (buffer->utf16_surr != 0 && (c >= 0xd800 && c < 0xdc00)) + { + unsigned short utf16buf[2]; + ucs4_t uc; + + utf16buf[0] = c; + utf16buf[1] = buffer->utf16_surr; + if (u16_mbtouc (&uc, utf16buf, 2) != 2) + abort (); + + uint8_t buf[6]; + int n = u8_uctomb (buf, uc, sizeof (buf)); + if (!(n > 0)) + abort (); + sbr_xprepend_desc (&buffer->sbr, sd_new_addr (n, (char *) buf)); + + buffer->utf16_surr = 0; + } + else + { + sbru_flush_utf16_surr (buffer); + + if (c >= 0xdc00 && c < 0xe000) + { + buffer->utf16_surr = c; + buffer->utf16_surr_line_number = ts_node_line_number (node); + } + else if (c >= 0xd800 && c < 0xdc00) + sbru_prepend_lone_surrogate (buffer, c, ts_node_line_number (node)); + else + { + uint8_t buf[6]; + int n = u8_uctomb (buf, c, sizeof (buf)); + if (!(n > 0)) + abort (); + sbr_xprepend_desc (&buffer->sbr, sd_new_addr (n, (char *) buf)); + } + } +} + +/* Returns the contents of BUFFER (with an added trailing NUL, that is, + as a C string), and frees all other memory held by BUFFER. + Returns NULL if there was an error earlier. + It is the responsibility of the caller to free() the result. */ +static char * +sbru_xdupfree_c (struct string_buffer_reversed_unicode *buffer) + _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC_FREE + _GL_ATTRIBUTE_RETURNS_NONNULL; +static char * +sbru_xdupfree_c (struct string_buffer_reversed_unicode *buffer) +{ + sbru_flush_utf16_surr (buffer); + return sbr_xdupfree_c (&buffer->sbr); +} + +/* ---------------------------- String literals ---------------------------- */ + +/* Determines whether NODE represents a string literal or the concatenation + of string literals (via the '+' operator). */ +static bool +is_string_literal (TSNode node) +{ + start: + if (ts_node_symbol (node) == ts_symbol_string + || ts_node_symbol (node) == ts_symbol_template_string) + { + /* Test whether all named children nodes are of type 'string_fragment' or + 'escape_sequence' (and thus none of type 'template_substitution' or + 'ERROR'). */ + uint32_t count = ts_node_named_child_count (node); + uint32_t i; + for (i = 0; i < count; i++) + { + TSNode subnode = ts_node_named_child (node, i); + if (!(ts_node_symbol (subnode) == ts_symbol_string_fragment + || ts_node_symbol (subnode) == ts_symbol_escape_sequence)) + return false; + } + return true; + } + if (ts_node_symbol (node) == ts_symbol_binary_expression + && ts_node_symbol (ts_node_child_by_field_id (node, ts_field_operator)) == ts_symbol_plus + /* Recurse into the left and right subnodes. */ + && is_string_literal (ts_node_child_by_field_id (node, ts_field_right))) + { + /*return is_string_literal (ts_node_child_by_field_id (node, ts_field_left));*/ + node = ts_node_child_by_field_id (node, ts_field_left); + goto start; + } + return false; +} + +/* Prepends the string literal pieces from NODE to BUFFER. */ +static void +string_literal_accumulate_pieces (TSNode node, + struct string_buffer_reversed_unicode *buffer) +{ + start: + if (ts_node_symbol (node) == ts_symbol_string + || ts_node_symbol (node) == ts_symbol_template_string) + { + uint32_t count = ts_node_named_child_count (node); + uint32_t i; + for (i = count; i > 0; ) + { + i--; + TSNode subnode = ts_node_named_child (node, i); + if (ts_node_symbol (subnode) == ts_symbol_string_fragment) + { + string_desc_t subnode_string = + sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode), + (char *) contents + ts_node_start_byte (subnode)); + sbru_xprepend_desc (buffer, subnode_string); + } + else if (ts_node_symbol (subnode) == ts_symbol_escape_sequence) + { + const char *escape_start = contents + ts_node_start_byte (subnode); + const char *escape_end = contents + ts_node_end_byte (subnode); + /* The escape sequence must start with a backslash. */ + if (!(escape_end - escape_start >= 2 && escape_start[0] == '\\')) + abort (); + /* tree-sitter's grammar.js allows more escape sequences than the + tsc compiler. Give a warning for those case where the tsc + compiler gives an error. */ + bool invalid = false; + if (escape_end - escape_start == 2) + { + switch (escape_start[1]) + { + case '\n': + break; + case '\\': + case '"': + sbru_xprepend1 (buffer, escape_start[1]); + break; + case 'b': + sbru_xprepend1 (buffer, 0x08); + break; + case 'f': + sbru_xprepend1 (buffer, 0x0C); + break; + case 'n': + sbru_xprepend1 (buffer, '\n'); + break; + case 'r': + sbru_xprepend1 (buffer, '\r'); + break; + case 't': + sbru_xprepend1 (buffer, '\t'); + break; + case 'v': + sbru_xprepend1 (buffer, 0x0B); + break; + default: + invalid = true; + break; + } + } + else if (escape_end - escape_start == 3 + && escape_start[1] == '\r' && escape_start[2] == '\n') + /* Backslash-newline with a Windows CRLF. */ + ; + else if (escape_start[1] >= '0' && escape_start[1] <= '7') + { + /* It's not clear whether octal escape sequences should be + supported. On one hand, they are supported in JavaScript. + On the other hand, tsc says: + "error TS1487: Octal escape sequences are not allowed." */ + unsigned int value = 0; + /* Only up to 3 octal digits are accepted. */ + if (escape_end - escape_start <= 1 + 3) + { + const char *p; + for (p = escape_start + 1; p < escape_end; p++) + { + /* No overflow is possible. */ + char c = *p; + if (c >= '0' && c <= '7') + value = (value << 3) + (c - '0'); + else + invalid = true; + } + if (value > 0xFF) + invalid = true; + } + if (!invalid) + sbru_xprepend1 (buffer, (unsigned char) value); + } + else if ((escape_start[1] == 'x' && escape_end - escape_start == 2 + 2) + || (escape_start[1] == 'u' && escape_end - escape_start == 2 + 4)) + { + unsigned int value = 0; + const char *p; + for (p = escape_start + 2; p < escape_end; p++) + { + /* No overflow is possible. */ + char c = *p; + if (c >= '0' && c <= '9') + value = (value << 4) + (c - '0'); + else if (c >= 'A' && c <= 'Z') + value = (value << 4) + (c - 'A' + 10); + else if (c >= 'a' && c <= 'z') + value = (value << 4) + (c - 'a' + 10); + else + invalid = true; + } + if (escape_start[1] == 'x') + { + if (!invalid) + sbru_xprepend1 (buffer, (unsigned char) value); + } + else + sbru_xprepend_unicode (buffer, value, subnode); + } + else if (escape_start[1] == 'u' + && escape_end - escape_start > 4 + && escape_start[2] == '{' && escape_end[-1] == '}') + { + unsigned int value = 0; + const char *p; + for (p = escape_start + 3; p < escape_end - 1; p++) + { + char c = *p; + if (c >= '0' && c <= '9') + value = (value << 4) + (c - '0'); + else if (c >= 'A' && c <= 'Z') + value = (value << 4) + (c - 'A' + 10); + else if (c >= 'a' && c <= 'z') + value = (value << 4) + (c - 'a' + 10); + else + invalid = true; + if (value >= 0x110000) + invalid = true; + if (invalid) + break; + } + if (!invalid) + sbru_xprepend_unicode (buffer, value, subnode); + } + else + invalid = true; + if (invalid) + { + size_t line_number = ts_node_line_number (subnode); + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("invalid escape sequence in string")); + } + } + else + abort (); + } + } + else if (ts_node_symbol (node) == ts_symbol_binary_expression + && ts_node_symbol (ts_node_child_by_field_id (node, ts_field_operator)) == ts_symbol_plus) + { + /* Recurse into the left and right subnodes. */ + string_literal_accumulate_pieces (ts_node_child_by_field_id (node, ts_field_right), buffer); + /*string_literal_accumulate_pieces (ts_node_child_by_field_id (node, ts_field_left), buffer);*/ + node = ts_node_child_by_field_id (node, ts_field_left); + goto start; + } + else + abort (); +} + +/* Combines the pieces of a string or template_string or concatenated + string literal. + Returns a freshly allocated, mostly UTF-8 encoded string. */ +static char * +string_literal_value (TSNode node) +{ + if (ts_node_symbol (node) == ts_symbol_string + && ts_node_named_child_count (node) == 1) + { + TSNode subnode = ts_node_named_child (node, 0); + if (ts_node_symbol (subnode) == ts_symbol_string_fragment) + { + /* Optimize the frequent special case of a normal string literal + that is non-empty and has no escape sequences. */ + string_desc_t subnode_string = + sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode), + (char *) contents + ts_node_start_byte (subnode)); + return xsd_c (subnode_string); + } + } + + /* The general case. */ + struct string_buffer_reversed_unicode buffer; + sbru_init (&buffer); + string_literal_accumulate_pieces (node, &buffer); + return sbru_xdupfree_c (&buffer); +} + +/* --------------------- Parsing and string extraction --------------------- */ + +/* Context lookup table. */ +static flag_context_list_table_ty *flag_context_list_table; + +/* Maximum supported nesting depth. */ +#define MAX_NESTING_DEPTH 1000 + +static int nesting_depth; + +/* The file is parsed into an abstract syntax tree. Scan the syntax tree, + looking for a keyword in identifier position of a call_expression or + macro_invocation, followed by followed by a string among the arguments. + When we see this pattern, we have something to remember. + + Normal handling: Look for + keyword ( ... msgid ... ) + Plural handling: Look for + keyword ( ... msgid ... msgid_plural ... ) + + We handle macro_invocation separately from call_expression, because in + a macro_invocation spaces are allowed between the identifier and the '!' + (i.e. 'println !' is as valid as 'println!'). Looking for 'println!' + would make the code more complicated. + + We use recursion because the arguments before msgid or between msgid + and msgid_plural can contain subexpressions of the same form. */ + +/* Forward declarations. */ +static void extract_from_node (TSNode node, + bool ignore, + flag_region_ty *outer_region, + message_list_ty *mlp); + +/* Extracts messages from the function call consisting of + - CALLEE_NODE: a tree node of type 'identifier', + - ARGS_NODE: a tree node of type 'arguments'. + Extracted messages are added to MLP. */ +static void +extract_from_function_call (TSNode callee_node, + TSNode args_node, + flag_region_ty *outer_region, + message_list_ty *mlp) +{ + uint32_t args_count = ts_node_child_count (args_node); + + string_desc_t callee_name = + sd_new_addr (ts_node_end_byte (callee_node) - ts_node_start_byte (callee_node), + (char *) contents + ts_node_start_byte (callee_node)); + + /* Context iterator. */ + flag_context_list_iterator_ty next_context_iter = + flag_context_list_iterator ( + flag_context_list_table_lookup ( + flag_context_list_table, + sd_data (callee_name), sd_length (callee_name))); + + void *keyword_value; + if (hash_find_entry (&keywords, + sd_data (callee_name), sd_length (callee_name), + &keyword_value) + == 0) + { + /* The callee has some information associated with it. */ + const struct callshapes *next_shapes = keyword_value; + + /* We have a function, named by a relevant identifier, with an argument + list. */ + + struct arglist_parser *argparser = + arglist_parser_alloc (mlp, next_shapes); + + /* Current argument number. */ + uint32_t arg; + uint32_t i; + + arg = 0; + for (i = 0; i < args_count; i++) + { + TSNode arg_node = ts_node_child (args_node, i); + handle_comments (arg_node); + if (ts_node_is_named (arg_node) + && ts_node_symbol (arg_node) != ts_symbol_comment) + { + arg++; + flag_region_ty *arg_region = + inheriting_region (outer_region, + flag_context_list_iterator_advance ( + &next_context_iter)); + + bool already_extracted = false; + if (is_string_literal (arg_node)) + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = ts_node_line_number (arg_node); + + char *string = string_literal_value (arg_node); + + if (extract_all) + { + remember_a_message (mlp, NULL, string, true, false, + arg_region, &pos, + NULL, savable_comment, true); + already_extracted = true; + } + else + { + mixed_string_ty *mixed_string = + mixed_string_alloc_utf8 (string, lc_string, + pos.file_name, pos.line_number); + arglist_parser_remember (argparser, arg, mixed_string, + arg_region, + pos.file_name, pos.line_number, + savable_comment, true); + } + } + + if (!already_extracted) + { + if (++nesting_depth > MAX_NESTING_DEPTH) + if_error (IF_SEVERITY_FATAL_ERROR, + logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false, + _("too many open parentheses, brackets, or braces")); + extract_from_node (arg_node, + false, + arg_region, + mlp); + nesting_depth--; + } + + unref_region (arg_region); + } + } + arglist_parser_done (argparser, arg); + return; + } + + /* Recurse. */ + + /* Current argument number. */ + uint32_t arg; + uint32_t i; + + arg = 0; + for (i = 0; i < args_count; i++) + { + TSNode arg_node = ts_node_child (args_node, i); + handle_comments (arg_node); + if (ts_node_is_named (arg_node) + && ts_node_symbol (arg_node) != ts_symbol_comment) + { + arg++; + flag_region_ty *arg_region = + inheriting_region (outer_region, + flag_context_list_iterator_advance ( + &next_context_iter)); + + if (++nesting_depth > MAX_NESTING_DEPTH) + if_error (IF_SEVERITY_FATAL_ERROR, + logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false, + _("too many open parentheses, brackets, or braces")); + extract_from_node (arg_node, + false, + arg_region, + mlp); + nesting_depth--; + + unref_region (arg_region); + } + } +} + +/* Extracts messages in the syntax tree NODE. + Extracted messages are added to MLP. */ +static void +extract_from_node (TSNode node, + bool ignore, + flag_region_ty *outer_region, + message_list_ty *mlp) +{ + if (extract_all && !ignore && is_string_literal (node)) + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = ts_node_line_number (node); + + char *string = string_literal_value (node); + + remember_a_message (mlp, NULL, string, true, false, + outer_region, &pos, + NULL, savable_comment, true); + } + + if (ts_node_symbol (node) == ts_symbol_call_expression + && ts_node_named_child_count (node) >= 2) + { + TSNode callee_node = ts_node_named_child (node, 0); + /* This is the field called 'function'. */ + if (! ts_node_eq (ts_node_child_by_field_id (node, ts_field_function), + callee_node)) + abort (); + if (ts_node_symbol (callee_node) == ts_symbol_identifier) + { + TSNode args_node = ts_node_child_by_field_id (node, ts_field_arguments); + /* This is the field called 'arguments'. */ + if (ts_node_symbol (args_node) == ts_symbol_arguments) + { + /* Handle the potential comments between 'function' and 'arguments'. */ + { + uint32_t count = ts_node_child_count (node); + uint32_t i; + for (i = 0; i < count; i++) + { + TSNode subnode = ts_node_child (node, i); + if (ts_node_eq (subnode, args_node)) + break; + handle_comments (subnode); + } + } + extract_from_function_call (callee_node, args_node, + outer_region, + mlp); + return; + } + } + } + + #if DEBUG_TYPESCRIPT && 0 + if (ts_node_symbol (node) == ts_symbol_call_expression) + { + TSNode subnode = ts_node_child_by_field_id (node, ts_field_function); + fprintf (stderr, "-> %s\n", ts_node_string (subnode)); + if (ts_node_symbol (subnode) == ts_symbol_identifier) + { + string_desc_t subnode_string = + sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode), + (char *) contents + ts_node_start_byte (subnode)); + if (sd_equals (subnode_string, sd_from_c ("gettext"))) + { + TSNode argsnode = ts_node_child_by_field_id (node, ts_field_arguments); + fprintf (stderr, "gettext arguments: %s\n", ts_node_string (argsnode)); + fprintf (stderr, "gettext children:\n"); + uint32_t count = ts_node_named_child_count (node); + uint32_t i; + for (i = 0; i < count; i++) + fprintf (stderr, "%u -> %s\n", i, ts_node_string (ts_node_named_child (node, i))); + } + } + } + #endif + + /* Recurse. */ + if (ts_node_symbol (node) != ts_symbol_comment) + { + ignore = ignore || is_string_literal (node); + uint32_t count = ts_node_child_count (node); + uint32_t i; + for (i = 0; i < count; i++) + { + TSNode subnode = ts_node_child (node, i); + handle_comments (subnode); + if (++nesting_depth > MAX_NESTING_DEPTH) + if_error (IF_SEVERITY_FATAL_ERROR, + logical_file_name, ts_node_line_number (subnode), (size_t)(-1), false, + _("too many open parentheses, brackets, or braces")); + extract_from_node (subnode, + ignore, + outer_region, + mlp); + nesting_depth--; + } + } +} + +void +EXTRACT (FILE *f, + const char *real_filename, const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp) +{ + message_list_ty *mlp = mdlp->item[0]->messages; + + logical_file_name = xstrdup (logical_filename); + + last_comment_line = -1; + last_non_comment_line = -1; + + flag_context_list_table = flag_table; + nesting_depth = 0; + + init_keywords (); + + if (ts_language == NULL) + { + ts_language = TREE_SITTER_LANGUAGE (); + ts_symbol_comment = ts_language_symbol ("comment", true); + ts_symbol_string = ts_language_symbol ("string", true); + ts_symbol_string_fragment = ts_language_symbol ("string_fragment", true); + ts_symbol_escape_sequence = ts_language_symbol ("escape_sequence", true); + ts_symbol_template_string = ts_language_symbol ("template_string", true); + ts_symbol_binary_expression = ts_language_symbol ("binary_expression", true); + ts_symbol_identifier = ts_language_symbol ("identifier", true); + ts_symbol_call_expression = ts_language_symbol ("call_expression", true); + ts_symbol_arguments = ts_language_symbol ("arguments", true); + ts_symbol_plus = ts_language_symbol ("+", false); + ts_field_function = ts_language_field ("function"); + ts_field_arguments = ts_language_field ("arguments"); + ts_field_operator = ts_language_field ("operator"); + ts_field_left = ts_language_field ("left"); + ts_field_right = ts_language_field ("right"); + } + + /* Read the file into memory. */ + char *contents_data; + size_t contents_length; + contents_data = read_file (real_filename, 0, &contents_length); + if (contents_data == NULL) + error (EXIT_FAILURE, errno, _("error while reading \"%s\""), + real_filename); + + /* tree-sitter works only on files whose size fits in an uint32_t. */ + if (contents_length > 0xFFFFFFFFUL) + error (EXIT_FAILURE, 0, _("file \"%s\" is unsupported because too large"), + real_filename); + + /* TypeScript source files are usually UTF-8 encoded. */ + if (u8_check ((uint8_t *) contents_data, contents_length) != NULL) + error (EXIT_FAILURE, 0, + _("file \"%s\" is unsupported because not UTF-8 encoded"), + real_filename); + xgettext_current_source_encoding = po_charset_utf8; + + /* Create a parser. */ + TSParser *parser = ts_parser_new (); + + /* Set the parser's language. */ + ts_parser_set_language (parser, ts_language); + + /* Parse the file, producing a syntax tree. */ + TSTree *tree = ts_parser_parse_string (parser, NULL, contents_data, contents_length); + + #if DEBUG_TYPESCRIPT + /* For debugging: Print the tree. */ + { + char *tree_as_string = ts_node_string (ts_tree_root_node (tree)); + fprintf (stderr, "Syntax tree: %s\n", tree_as_string); + free (tree_as_string); + } + #endif + + contents = contents_data; + + extract_from_node (ts_tree_root_node (tree), + false, + null_context_region (), + mlp); + + ts_tree_delete (tree); + ts_parser_delete (parser); + free (contents_data); + + logical_file_name = NULL; +} diff --git a/gettext-tools/src/x-typescript.c b/gettext-tools/src/x-typescript.c new file mode 100644 index 000000000..d15bb2fab --- /dev/null +++ b/gettext-tools/src/x-typescript.c @@ -0,0 +1,31 @@ +/* xgettext TypeScript backend. + Copyright (C) 2001-2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2025. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* Specification. */ +#include "x-typescript.h" + +#define NOTE_OPTION_KEYWORD x_typescript_keyword +#define NOTE_OPTION_EXTRACT_ALL x_typescript_extract_all +#define INIT_FLAG_TABLE init_flag_table_typescript +#define EXTRACT extract_typescript +#define TREE_SITTER_LANGUAGE tree_sitter_typescript +#include "x-typescript-impl.h" diff --git a/gettext-tools/src/x-typescript.h b/gettext-tools/src/x-typescript.h new file mode 100644 index 000000000..0e806e364 --- /dev/null +++ b/gettext-tools/src/x-typescript.h @@ -0,0 +1,52 @@ +/* xgettext TypeScript backend. + Copyright (C) 2002-2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2025. */ + + +#include + +#include "message.h" +#include "xg-arglist-context.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#define EXTENSIONS_TYPESCRIPT \ + { "ts", "TypeScript" }, \ + +#define SCANNERS_TYPESCRIPT \ + { "TypeScript", extract_typescript, NULL, \ + &flag_table_typescript, &formatstring_javascript, NULL }, \ + +/* Scan a TypeScript file and add its translatable strings to mdlp. */ +extern void extract_typescript (FILE *fp, const char *real_filename, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp); + +extern void x_typescript_keyword (const char *keyword); +extern void x_typescript_extract_all (void); + +extern void init_flag_table_typescript (void); + + +#ifdef __cplusplus +} +#endif diff --git a/gettext-tools/src/x-typescriptx.c b/gettext-tools/src/x-typescriptx.c new file mode 100644 index 000000000..9b8139933 --- /dev/null +++ b/gettext-tools/src/x-typescriptx.c @@ -0,0 +1,31 @@ +/* xgettext TSX backend. + Copyright (C) 2001-2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2025. */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +/* Specification. */ +#include "x-typescriptx.h" + +#define NOTE_OPTION_KEYWORD x_typescriptx_keyword +#define NOTE_OPTION_EXTRACT_ALL x_typescriptx_extract_all +#define INIT_FLAG_TABLE init_flag_table_typescriptx +#define EXTRACT extract_typescriptx +#define TREE_SITTER_LANGUAGE tree_sitter_tsx +#include "x-typescript-impl.h" diff --git a/gettext-tools/src/x-typescriptx.h b/gettext-tools/src/x-typescriptx.h new file mode 100644 index 000000000..e822d2a29 --- /dev/null +++ b/gettext-tools/src/x-typescriptx.h @@ -0,0 +1,52 @@ +/* xgettext TSX backend. + Copyright (C) 2002-2025 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* Written by Bruno Haible , 2025. */ + + +#include + +#include "message.h" +#include "xg-arglist-context.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +#define EXTENSIONS_TYPESCRIPTX \ + { "tsx", "TSX" }, \ + +#define SCANNERS_TYPESCRIPTX \ + { "TSX", extract_typescriptx, NULL, \ + &flag_table_typescriptx, &formatstring_javascript, NULL }, \ + +/* Scan a TSX file and add its translatable strings to mdlp. */ +extern void extract_typescriptx (FILE *fp, const char *real_filename, + const char *logical_filename, + flag_context_list_table_ty *flag_table, + msgdomain_list_ty *mdlp); + +extern void x_typescriptx_keyword (const char *keyword); +extern void x_typescriptx_extract_all (void); + +extern void init_flag_table_typescriptx (void); + + +#ifdef __cplusplus +} +#endif diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c index c26dd0b03..a762f5fd6 100644 --- a/gettext-tools/src/xgettext.c +++ b/gettext-tools/src/xgettext.c @@ -107,6 +107,8 @@ #include "x-java.h" #include "x-csharp.h" #include "x-javascript.h" +#include "x-typescript.h" +#include "x-typescriptx.h" #include "x-scheme.h" #include "x-lisp.h" #include "x-elisp.h" @@ -194,6 +196,8 @@ static flag_context_list_table_ty flag_table_python; static flag_context_list_table_ty flag_table_java; static flag_context_list_table_ty flag_table_csharp; static flag_context_list_table_ty flag_table_javascript; +static flag_context_list_table_ty flag_table_typescript; +static flag_context_list_table_ty flag_table_typescriptx; static flag_context_list_table_ty flag_table_scheme; static flag_context_list_table_ty flag_table_lisp; static flag_context_list_table_ty flag_table_elisp; @@ -392,6 +396,8 @@ main (int argc, char *argv[]) init_flag_table_java (); init_flag_table_csharp (); init_flag_table_javascript (); + init_flag_table_typescript (); + init_flag_table_typescriptx (); init_flag_table_scheme (); init_flag_table_lisp (); init_flag_table_elisp (); @@ -436,6 +442,8 @@ main (int argc, char *argv[]) x_ruby_extract_all (); x_lua_extract_all (); x_javascript_extract_all (); + x_typescript_extract_all (); + x_typescriptx_extract_all (); x_vala_extract_all (); break; @@ -518,6 +526,8 @@ main (int argc, char *argv[]) x_ruby_keyword (optarg); x_lua_keyword (optarg); x_javascript_keyword (optarg); + x_typescript_keyword (optarg); + x_typescriptx_keyword (optarg); x_vala_keyword (optarg); x_desktop_keyword (optarg); if (optarg == NULL) @@ -1137,11 +1147,11 @@ Choice of input file language:\n")); printf (_("\ -L, --language=NAME recognise the specified language\n\ (C, C++, ObjectiveC, PO, Python, Java,\n\ - JavaProperties, C#, JavaScript, Scheme, Guile,\n\ - Lisp, EmacsLisp, librep, Rust, Go, Ruby, Shell,\n\ - awk, Lua, Smalltalk, Vala, Tcl, Perl, PHP,\n\ - GCC-source, YCP, NXStringTable, RST, RSJ,\n\ - Glade, GSettings, Desktop)\n")); + JavaProperties, C#, JavaScript, TypeScript, TSX,\n\ + Scheme, Guile, Lisp, EmacsLisp, librep, Rust,\n\ + Go, Ruby, Shell, awk, Lua, Smalltalk, Vala, Tcl,\n\ + Perl, PHP, GCC-source, YCP, NXStringTable, RST,\n\ + RSJ, Glade, GSettings, Desktop)\n")); printf (_("\ -C, --c++ shorthand for --language=C++\n")); printf (_("\ @@ -1181,27 +1191,27 @@ Language specific options:\n")); -a, --extract-all extract all strings\n")); printf (_("\ (only languages C, C++, ObjectiveC, Python,\n\ - Java, C#, JavaScript, Scheme, Guile, Lisp,\n\ - EmacsLisp, librep, Rust, Go, Shell, awk, Lua,\n\ - Vala, Tcl, Perl, PHP, GCC-source, Glade,\n\ - GSettings)\n")); + Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\ + Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\ + awk, Lua, Vala, Tcl, Perl, PHP, GCC-source,\n\ + Glade, GSettings)\n")); printf (_("\ -kWORD, --keyword=WORD look for WORD as an additional keyword\n\ -k, --keyword do not to use default keywords\n")); printf (_("\ (only languages C, C++, ObjectiveC, Python,\n\ - Java, C#, JavaScript, Scheme, Guile, Lisp,\n\ - EmacsLisp, librep, Rust, Go, Shell, awk, Lua,\n\ - Vala, Tcl, Perl, PHP, GCC-source, Glade,\n\ - GSettings, Desktop)\n")); + Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\ + Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\ + awk, Lua, Vala, Tcl, Perl, PHP, GCC-source,\n\ + Glade, GSettings, Desktop)\n")); printf (_("\ --flag=WORD:ARG:FLAG additional flag for strings inside the argument\n\ number ARG of keyword WORD\n")); printf (_("\ (only languages C, C++, ObjectiveC, Python,\n\ - Java, C#, JavaScript, Scheme, Guile, Lisp,\n\ - EmacsLisp, librep, Rust, Go, Shell, awk, Lua,\n\ - Vala, Tcl, Perl, PHP, GCC-source, YCP)\n")); + Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\ + Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\ + awk, Lua, Vala, Tcl, Perl, PHP, GCC-source, YCP)\n")); printf (_("\ --tag=WORD:FORMAT defines the behaviour of tagged template literals\n\ with tag WORD\n")); @@ -1664,6 +1674,12 @@ xgettext_record_flag (const char *optionstring) flag_context_list_table_insert (&flag_table_javascript, XFORMAT_PRIMARY, name_start, name_end, argnum, value, pass); + flag_context_list_table_insert (&flag_table_typescript, XFORMAT_PRIMARY, + name_start, name_end, + argnum, value, pass); + flag_context_list_table_insert (&flag_table_typescriptx, XFORMAT_PRIMARY, + name_start, name_end, + argnum, value, pass); break; case format_scheme: flag_context_list_table_insert (&flag_table_scheme, XFORMAT_PRIMARY, @@ -2319,6 +2335,8 @@ language_to_extractor (const char *name) SCANNERS_JAVA SCANNERS_CSHARP SCANNERS_JAVASCRIPT + SCANNERS_TYPESCRIPT + SCANNERS_TYPESCRIPTX SCANNERS_SCHEME SCANNERS_LISP SCANNERS_ELISP @@ -2414,6 +2432,8 @@ extension_to_language (const char *extension) EXTENSIONS_JAVA EXTENSIONS_CSHARP EXTENSIONS_JAVASCRIPT + EXTENSIONS_TYPESCRIPT + EXTENSIONS_TYPESCRIPTX EXTENSIONS_SCHEME EXTENSIONS_LISP EXTENSIONS_ELISP diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index 4a451b183..086e31754 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -182,6 +182,9 @@ TESTS = gettext-1 gettext-2 \ xgettext-tcl-5 \ xgettext-tcl-stackovfl-1 xgettext-tcl-stackovfl-2 \ xgettext-tcl-stackovfl-3 xgettext-tcl-stackovfl-4 \ + xgettext-typescript-1 xgettext-typescript-2 xgettext-typescript-3 \ + xgettext-typescript-4 xgettext-typescript-5 xgettext-typescript-6 \ + xgettext-typescript-7 \ xgettext-vala-1 xgettext-vala-2 xgettext-vala-3 xgettext-vala-4 \ xgettext-vala-5 xgettext-vala-6 xgettext-vala-7 \ xgettext-vala-stackovfl-1 xgettext-vala-stackovfl-2 \ diff --git a/gettext-tools/tests/xgettext-javascript-6 b/gettext-tools/tests/xgettext-javascript-6 index de68018fe..d2016e9aa 100755 --- a/gettext-tools/tests/xgettext-javascript-6 +++ b/gettext-tools/tests/xgettext-javascript-6 @@ -1,7 +1,7 @@ #!/bin/sh . "${srcdir=.}/init.sh"; path_prepend_ . ../src -# Test of JavaScript JSX support. +# Test of JavaScript E4X and JSX support. cat <<\EOF > xg-js-6.js class Foo extends React.Component { @@ -22,6 +22,8 @@ class Bar extends React.Component { } } +// Some E4X tests. + var x1 = ; var s1 = _("Expected translation string #1"); var s2 = "foo"; @@ -48,7 +50,9 @@ function foo() { return { 'b' }; } var s10 = _("Expected translation string #9"); + // Mixing JSX with template literals. + var s11 = 0; var s12 = (
diff --git a/gettext-tools/tests/xgettext-typescript-1 b/gettext-tools/tests/xgettext-typescript-1 new file mode 100755 index 000000000..2c8595881 --- /dev/null +++ b/gettext-tools/tests/xgettext-typescript-1 @@ -0,0 +1,68 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test of TypeScript support. + +cat <<\EOF > xg-ts-1.ts +const s1: string = "Simple string, no gettext needed"; +const s2: string = _("Extract this first string"); +function foo(a: any): void { + const s3: string = `Prefix _(${_("Extract this second string")}) Postfix`; +} +const fooElement = document.getElementById("foo"); +if (fooElement && fooElement.innerHTML === _("Extract this third string")) { + /* _("This is a comment and must not be extracted!") */ +} +/* TRANSLATORS: This is a translator comment. */ +gettext("Extract this fourth string"); +// TRANSLATORS: This is another translator comment. +gettext("Extract this fifth string"); +EOF + +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments=TRANSLATORS: --no-location -o xg-ts-1.tmp xg-ts-1.ts 2>xg-ts-1.err +test $? = 0 || { cat xg-ts-1.err; Exit 1; } +func_filter_POT_Creation_Date xg-ts-1.tmp xg-ts-1.pot + +cat <<\EOF > xg-ts-1.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "Extract this first string" +msgstr "" + +msgid "Extract this second string" +msgstr "" + +msgid "Extract this third string" +msgstr "" + +#. TRANSLATORS: This is a translator comment. +msgid "Extract this fourth string" +msgstr "" + +#. TRANSLATORS: This is another translator comment. +msgid "Extract this fifth string" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-ts-1.ok xg-ts-1.pot +result=$? + +exit $result diff --git a/gettext-tools/tests/xgettext-typescript-2 b/gettext-tools/tests/xgettext-typescript-2 new file mode 100755 index 000000000..bfd4cc604 --- /dev/null +++ b/gettext-tools/tests/xgettext-typescript-2 @@ -0,0 +1,110 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test of TypeScript support. +# Playing with regex and division operator + +cat <<\EOF > xg-ts-2.ts +// RegExp literals containing string quotes must not desync the parser +const d: number = 1 / 2 / 4; +const s: string = " x " + (/^\d/.exec("0815")?.[0] || "").replace(/[a-z]/g, '@'); +const s1: RegExpMatchArray | null = /"/.exec(_("RegExp test string #1")); +const s2: RegExpMatchArray | null = /'/.exec(_("RegExp test string #2")); +const s3: RegExpMatchArray | null = /['a-b]/.exec(_('RegExp test string #3')); +const s4: RegExpMatchArray | null = /["a-b]/.exec(_('RegExp test string #4')); +const s5: RegExpMatchArray | null = /[a-b']/.exec(_('RegExp test string #5')); +const s6: RegExpMatchArray | null = /[a-b"]/.exec(_('RegExp test string #6')); +const c: number = 35 / 2 / 8 + _( "RegExp test string #7").length / 32.0; +const sizestr: string = Math.round(size/1024*factor)/factor+_( "RegExp test string #8"); +const cssClassType: string = attr.type.replace(/^.*\//, _('RegExp test string #9')).replace(/\./g, '-'); +const lookup: number = lookuptable[idx]/factor+_( "RegExp test string #10"); +function doit(): RegExpMatchArray | null { + return /\./.exec(_("RegExp test string #11")); +} +if (false) + /foo/.exec(_("RegExp test string #12")); +else + /foo/.exec(_("RegExp test string #13")); +const s7: boolean = /a\/\f\r\n\t\v\0\b\s\S\w\W\d\D\b\Bb/.test(_("RegExp test string #14")); +const s8: RegExpExecArray | null = /(?=(a+))a*b\1/.exec(_("RegExp test string #15")); +const s9: RegExpExecArray | null = /_\("a+"\)/.exec(_("RegExp test string #16")); +EOF + +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-ts-2.tmp xg-ts-2.ts 2>xg-ts-2.err +test $? = 0 || { cat xg-ts-2.err; Exit 1; } +func_filter_POT_Creation_Date xg-ts-2.tmp xg-ts-2.pot + +cat <<\EOF > xg-ts-2.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "RegExp test string #1" +msgstr "" + +msgid "RegExp test string #2" +msgstr "" + +msgid "RegExp test string #3" +msgstr "" + +msgid "RegExp test string #4" +msgstr "" + +msgid "RegExp test string #5" +msgstr "" + +msgid "RegExp test string #6" +msgstr "" + +msgid "RegExp test string #7" +msgstr "" + +msgid "RegExp test string #8" +msgstr "" + +msgid "RegExp test string #9" +msgstr "" + +msgid "RegExp test string #10" +msgstr "" + +msgid "RegExp test string #11" +msgstr "" + +msgid "RegExp test string #12" +msgstr "" + +msgid "RegExp test string #13" +msgstr "" + +msgid "RegExp test string #14" +msgstr "" + +msgid "RegExp test string #15" +msgstr "" + +msgid "RegExp test string #16" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-ts-2.ok xg-ts-2.pot +result=$? + +exit $result diff --git a/gettext-tools/tests/xgettext-typescript-3 b/gettext-tools/tests/xgettext-typescript-3 new file mode 100755 index 000000000..110f15aae --- /dev/null +++ b/gettext-tools/tests/xgettext-typescript-3 @@ -0,0 +1,74 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test of TypeScript support: string concatenation, +# strings with embedded expressions. + +cat <<\EOF > xg-ts-3.ts +let s0: string; +let s1: string = _("Concatenation #1 " + "- String part added"); +let s2: string = _('Concatenation #2 ' + '- String part added'); + +// a +let s3: string = // b + _("This" + " whole " // c + + "string" + // d + ' should' + " be " + 'extracted'); + +// Strings with embedded expressions, a.k.a. template literals. +let t: string = ""; +let e1: string = _(`embedded_1_${foo}_bar`); +let e2: string = _(`embedded_2_${_("embedded_2_sub1")}_bar_${_('embedded_2_sub2')}_baz`); +let e3: string = _(`embedded_3`); +EOF + +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-ts-3.tmp xg-ts-3.ts 2>xg-ts-3.err +test $? = 0 || { cat xg-ts-3.err; Exit 1; } +func_filter_POT_Creation_Date xg-ts-3.tmp xg-ts-3.pot + +cat <<\EOF > xg-ts-3.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "Concatenation #1 - String part added" +msgstr "" + +msgid "Concatenation #2 - String part added" +msgstr "" + +#. a +#. b +msgid "This whole string should be extracted" +msgstr "" + +msgid "embedded_2_sub1" +msgstr "" + +msgid "embedded_2_sub2" +msgstr "" + +msgid "embedded_3" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-ts-3.ok xg-ts-3.pot +result=$? + +exit $result diff --git a/gettext-tools/tests/xgettext-typescript-4 b/gettext-tools/tests/xgettext-typescript-4 new file mode 100755 index 000000000..b725c7bcc --- /dev/null +++ b/gettext-tools/tests/xgettext-typescript-4 @@ -0,0 +1,51 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test of TypeScript escape sequences in string literals. + +cat <<\EOF > xg-ts-4.ts +const s1: string = _("Unicode escape \u3042"); +const s2: string = _("Surrogate pair \uD835\uDC9C"); +const s3: string = _("Escape sequence \1411 \x622"); +const s4: string = _("Invalid escape sequence \xxx \y"); +EOF + +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-ts-4.tmp xg-ts-4.ts 2>xg-ts-4.err +test $? = 0 || { cat xg-ts-4.err; Exit 1; } +func_filter_POT_Creation_Date xg-ts-4.tmp xg-ts-4.pot + +cat <<\EOF > xg-ts-4.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "Unicode escape あ" +msgstr "" + +msgid "Surrogate pair 𝒜" +msgstr "" + +msgid "Escape sequence a1 b2" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-ts-4.ok xg-ts-4.pot +result=$? + +exit $result diff --git a/gettext-tools/tests/xgettext-typescript-5 b/gettext-tools/tests/xgettext-typescript-5 new file mode 100755 index 000000000..922f23582 --- /dev/null +++ b/gettext-tools/tests/xgettext-typescript-5 @@ -0,0 +1,69 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test of TypeScript Unicode support. + +cat <<\EOF > xg-ts-5.ts +// The following excerpt is adapted from json2.js +const cx: RegExp = /[\u0000\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g; +const escapable: RegExp = /[\\\"\x00-\x1f\x7f-\x9f\u00ad\u0600-\u0604\u070f\u17b4\u17b5\u200c-\u200f\u2028-\u202f\u2060-\u206f\ufeff\ufff0-\uffff]/g; +let gap: string | undefined; +const txt1: string = _("Expected translation string #1"); +let indent: string | undefined; +const meta: Record = { + '\b': '\\b', + '\t': '\\t', + '\n': '\\n', + '\f': '\\f', + '\r': '\\r', + '"': '\\"', + '\\': '\\\\' + _("Expected translation string #2") +}; +const txt2: string = _("Expected translation string #3"); +let rep: any; +const matched: string = curnodepath.match(new RegExp(`^\\${path}\/([\\w\\s]+)`)) + + _("Expected translation string #4"); +EOF + +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-ts-5.tmp xg-ts-5.ts 2>xg-ts-5.err +test $? = 0 || { cat xg-ts-5.err; Exit 1; } +func_filter_POT_Creation_Date xg-ts-5.tmp xg-ts-5.pot + +cat <<\EOF > xg-ts-5.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "Expected translation string #1" +msgstr "" + +msgid "Expected translation string #2" +msgstr "" + +msgid "Expected translation string #3" +msgstr "" + +msgid "Expected translation string #4" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-ts-5.ok xg-ts-5.pot +result=$? + +exit $result diff --git a/gettext-tools/tests/xgettext-typescript-6 b/gettext-tools/tests/xgettext-typescript-6 new file mode 100755 index 000000000..75ffa4c47 --- /dev/null +++ b/gettext-tools/tests/xgettext-typescript-6 @@ -0,0 +1,157 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test of TypeScript JSX support. + +cat <<\EOF > xg-ts-6.tsx +import React from 'react'; +type FooProps = {}; +type FooState = {}; +class Foo extends React.Component { + render() { + return
; + } +} +type BarProps = {}; +type BarState = {}; +class Bar extends React.Component { + render() { + return ( +
+ + { gettext('Expected translation string #0') } +
+ ); + } +} +const x1 = ; +const s1: string = _("Expected translation string #1"); +const s2: string = "foo"; +const x2 = React.createElement(s2 as any, null, `foo ${s2} bar`); +const x3 = ( + + {_("Expected translation string #2")} + +); +const x4 = ( + + {React.createElement(_("Expected translation string #3") as any)} + +); +const s9: string = _("Expected translation string #8"); +function fooFunction() { + return {'b'}; +} +const s10: string = _("Expected translation string #9"); +// Mixing JSX with template literals. +const s11 = 0; +const s12 = ( +
+ {_("Expected translation string #10")} + {`${_("Expected translation string #11")}`} + {_("Expected translation string #12")} +
+); +const s13: string = _("Expected translation string #13"); +const s14 =
; +const s15: string = _("Expected translation string #15"); +const s16 = { a: 1, b:
}; +const s17: string = _("Expected translation string #17"); +const s18 = `begin${
{_("Expected translation string #18")}
}end`; +const s19: string = _("Expected translation string #19"); +const s20 = () => ( + {_("Expected translation string #21")}
} + a3={_("Expected translation string #22")} + /> +); +EOF + +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-ts-6.tmp xg-ts-6.tsx 2>xg-ts-6.err +test $? = 0 || { cat xg-ts-6.err; Exit 1; } +func_filter_POT_Creation_Date xg-ts-6.tmp xg-ts-6.pot + +cat <<\EOF > xg-ts-6.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "Expected translation string #0" +msgstr "" + +msgid "Expected translation string #1" +msgstr "" + +msgid "Expected translation string #2" +msgstr "" + +msgid "Expected translation string #3" +msgstr "" + +msgid "Expected translation string #8" +msgstr "" + +msgid "Expected translation string #9" +msgstr "" + +msgid "Expected translation string #10" +msgstr "" + +msgid "Expected translation string #11" +msgstr "" + +msgid "Expected translation string #12" +msgstr "" + +msgid "Expected translation string #13" +msgstr "" + +msgid "Expected translation string #14" +msgstr "" + +msgid "Expected translation string #15" +msgstr "" + +msgid "Expected translation string #16" +msgstr "" + +msgid "Expected translation string #17" +msgstr "" + +msgid "Expected translation string #18" +msgstr "" + +msgid "Expected translation string #19" +msgstr "" + +msgid "Expected translation string #20" +msgstr "" + +msgid "Expected translation string #21" +msgstr "" + +msgid "Expected translation string #22" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-ts-6.ok xg-ts-6.pot +result=$? + +exit $result diff --git a/gettext-tools/tests/xgettext-typescript-7 b/gettext-tools/tests/xgettext-typescript-7 new file mode 100755 index 000000000..fba11c4bf --- /dev/null +++ b/gettext-tools/tests/xgettext-typescript-7 @@ -0,0 +1,90 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test of TypeScript template literal support. + +cat <<\EOF > xg-ts-7.ts +const s0: string = _(`A template literal without substitutions`); +const s1: string = _(`A template literal with +embedded +newlines`); +const s2: string = _(`A template literal with ${n} substitutions`); +const s3: string = _(`A template literal with several substitutions: ${a} and ${b} and ${c} and so on`); +const s4: string = `/${looks_like_regex}`; +const s5: string = _('not part of a regex'); +const s6: string = `that's a valid string. ` + _('This too'); +const s7: string = _(tag`A template literal with a tag`); +const s8: string = `a${`b${`c`+d}`}e`; +const s9: string = _("a normal string"); +const s10: string = `abc${foo({}, _('should be extracted'))}xyz`; +const f1 = function (): string { + return _("first normal string") + `${foo}` + _("second normal string"); +}; +const s11: string = _("another normal string"); +const s12: { property: string } = { property: `A template literal with ${n} substitution` }; +const s13: string = _("yet another normal string"); +EOF + +: ${XGETTEXT=xgettext} +${XGETTEXT} --add-comments --no-location -o xg-ts-7.tmp xg-ts-7.ts 2>xg-ts-7.err +test $? = 0 || { cat xg-ts-7.err; Exit 1; } +func_filter_POT_Creation_Date xg-ts-7.tmp xg-ts-7.pot + +cat <<\EOF > xg-ts-7.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "A template literal without substitutions" +msgstr "" + +msgid "" +"A template literal with\n" +"embedded\n" +"newlines" +msgstr "" + +msgid "not part of a regex" +msgstr "" + +msgid "This too" +msgstr "" + +msgid "a normal string" +msgstr "" + +msgid "should be extracted" +msgstr "" + +msgid "first normal string" +msgstr "" + +msgid "second normal string" +msgstr "" + +msgid "another normal string" +msgstr "" + +msgid "yet another normal string" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-ts-7.ok xg-ts-7.pot +result=$? + +exit $result