From: Bruno Haible Date: Sun, 27 Jul 2025 22:07:10 +0000 (+0200) Subject: OCaml support: Add OCaml support in the tools. X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=66ce533273113468dad491f80ea248a37d39e72f;p=thirdparty%2Fgettext.git OCaml support: Add OCaml support in the tools. This implements several improvements, compared to "ocaml-gettext --action extract ...": - Comments are extracted. - '#, ocaml-format' annotation on f_ arguments. - Recognizes concatenated strings, such as ("abc" ^ "def"). - Supports (s_ ""). * autopull.sh: Check out tree-sitter-ocaml. Set TREE_SITTER_OCAML_VERSION. * gettext-tools/build-aux/tree-sitter-ocaml-portability.diff: New file. * gettext-tools/configure.ac: Set TREE_SITTER_OCAML_VERSION. * gettext-tools/Makefile.am (EXTRA_DIST): Add the tree-sitter-ocaml source code and patch. * gettext-tools/doc/gettext.texi (PO Files): Mention ocaml-format. (Translators for other Languages): New subsection "OCaml Format Strings". * gettext-tools/doc/lang-ocaml.texi: Mention all the supported *gettext functions. Mention format strings with positions. * gettext-tools/doc/xgettext.texi: Document the OCaml support in more places. * gettext-tools/src/message.h (format_ocaml): New enum value. (NFORMATS): Increment. * gettext-tools/src/message.c (format_language, format_language_pretty): Add an entry for format_ocaml. * gettext-tools/src/format-ocaml.c: New file, based on gettext-tools/src/format-awk.c. * gettext-tools/src/format.h (formatstring_ocaml): New declaration. * gettext-tools/src/format.c (formatstring_parsers): Add formatstring_ocaml. * gettext-tools/src/x-ocaml.h (SCANNERS_OCAML): Reference flag_table_ocaml and formatstring_ocaml. (extract_ocaml): Declare with first argument of type 'FILE *'. * gettext-tools/src/x-ocaml.c: Completely rewritten. * gettext-tools/src/xgettext.c (flag_table_ocaml): New variable. (usage): Document the OCaml support in more places. (xgettext_record_flag): Support format_ocaml. * gettext-tools/src/FILES: Mention format-ocaml.c. * gettext-tools/src/Makefile.am (FORMAT_SOURCE): Add format-ocaml.c. (LIBXGETTEXTTS): Add libxgettextts5.a. (libxgettextts5_a_*): New variables. * gettext-tools/libgettextpo/Makefile.am (libgettextpo_la_AUXSOURCES): Add format-ocaml.c. * gettext-tools/po/POTFILES.in: Add src/format-ocaml.c. * gettext-tools/tests/format-ocaml-1: New file. * gettext-tools/tests/format-ocaml-2: New file. * gettext-tools/tests/xgettext-ocaml-1: New file. * gettext-tools/tests/xgettext-ocaml-2: New file. * gettext-tools/tests/Makefile.am (TESTS): Add the new tests. * NEWS: Mention the OCaml support. --- diff --git a/.gitignore b/.gitignore index dc321b979..7aac1b569 100644 --- a/.gitignore +++ b/.gitignore @@ -718,6 +718,7 @@ autom4te.cache/ /gettext-tools/src/libxgettextts2.a /gettext-tools/src/libxgettextts3.a /gettext-tools/src/libxgettextts4.a +/gettext-tools/src/libxgettextts5.a /gettext-tools/src/libxgettextx.a /gettext-tools/src/msgfmt.net.exe /gettext-tools/src/msgfmt.net.exe.mdb diff --git a/NEWS b/NEWS index 56645e9eb..2fa1524e6 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,11 @@ +Version 0.27 - September 2025 + +# Programming languages support: + * OCaml: + - xgettext now supports OCaml. + - 'msgfmt -c' now verifies the syntax of translations of OCaml format + strings. + Version 0.26 - July 2025 # Programming languages support: diff --git a/autopull.sh b/autopull.sh index 6cc7e5aaf..31553df7f 100755 --- a/autopull.sh +++ b/autopull.sh @@ -89,6 +89,7 @@ TREE_SITTER_VERSION=0.23.2 TREE_SITTER_GO_VERSION=0.23.4 TREE_SITTER_RUST_VERSION=0.23.2 TREE_SITTER_TYPESCRIPT_VERSION=0.23.2 +TREE_SITTER_OCAML_VERSION=0.23.2 TREE_SITTER_D_VERSION=0.8.2 # Cache the relevant source code. Erase the rest of the tree-sitter projects. test -d gettext-tools/tree-sitter-$TREE_SITTER_VERSION || { @@ -135,6 +136,20 @@ test -d gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION || mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/scanner.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/tsx-scanner.c rm -rf tree-sitter-typescript } +test -d gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION || { + func_git_clone_shallow tree-sitter-ocaml https://github.com/tree-sitter/tree-sitter-ocaml.git v$TREE_SITTER_OCAML_VERSION + (cd tree-sitter-ocaml && patch -p1) < gettext-tools/build-aux/tree-sitter-ocaml-portability.diff + mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION + mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/common + mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars + mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml + mv tree-sitter-ocaml/LICENSE gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/LICENSE + mv tree-sitter-ocaml/common/scanner.h gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/common/scanner.h + mv tree-sitter-ocaml/grammars/ocaml/src gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src + mv gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/parser.c gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/ocaml-parser.c + mv gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/scanner.c gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/ocaml-scanner.c + rm -rf tree-sitter-ocaml +} test -d gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION || { func_git_clone_shallow tree-sitter-d https://github.com/gdamore/tree-sitter-d.git v$TREE_SITTER_D_VERSION (cd tree-sitter-d && patch -p1) < gettext-tools/build-aux/tree-sitter-d-portability.diff @@ -151,6 +166,7 @@ TREE_SITTER_VERSION=$TREE_SITTER_VERSION TREE_SITTER_GO_VERSION=$TREE_SITTER_GO_VERSION TREE_SITTER_RUST_VERSION=$TREE_SITTER_RUST_VERSION TREE_SITTER_TYPESCRIPT_VERSION=$TREE_SITTER_TYPESCRIPT_VERSION +TREE_SITTER_OCAML_VERSION=$TREE_SITTER_OCAML_VERSION TREE_SITTER_D_VERSION=$TREE_SITTER_D_VERSION EOF diff --git a/gettext-tools/Makefile.am b/gettext-tools/Makefile.am index bafc917f4..0cec542ce 100644 --- a/gettext-tools/Makefile.am +++ b/gettext-tools/Makefile.am @@ -100,6 +100,14 @@ EXTRA_DIST += \ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/alloc.h \ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/array.h \ tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/parser.h \ + build-aux/tree-sitter-ocaml-portability.diff \ + tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/LICENSE \ + tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/common/scanner.h \ + tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-parser.c \ + tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-scanner.c \ + tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/tree_sitter/alloc.h \ + tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/tree_sitter/array.h \ + tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/tree_sitter/parser.h \ build-aux/tree-sitter-d-portability.diff \ tree-sitter-d-$(TREE_SITTER_D_VERSION)/LICENSE \ tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/d-parser.c \ diff --git a/gettext-tools/build-aux/tree-sitter-ocaml-portability.diff b/gettext-tools/build-aux/tree-sitter-ocaml-portability.diff new file mode 100644 index 000000000..3a039094b --- /dev/null +++ b/gettext-tools/build-aux/tree-sitter-ocaml-portability.diff @@ -0,0 +1,32 @@ +diff --git a/grammars/ocaml/src/parser.c b/grammars/ocaml/src/parser.c +index 653700c..3ddc246 100644 +--- a/grammars/ocaml/src/parser.c ++++ b/grammars/ocaml/src/parser.c +@@ -1103727,8 +1103727,10 @@ void tree_sitter_ocaml_external_scanner_deserialize(void *, const char *, unsign + #define TS_PUBLIC + #elif defined(_WIN32) + #define TS_PUBLIC __declspec(dllexport) +-#else ++#elif defined __GNUC__ || defined __clang__ + #define TS_PUBLIC __attribute__((visibility("default"))) ++#else ++#define TS_PUBLIC + #endif + + TS_PUBLIC const TSLanguage *tree_sitter_ocaml(void) { +diff --git a/grammars/ocaml/src/tree_sitter/parser.h b/grammars/ocaml/src/tree_sitter/parser.h +index 799f599..130b4d0 100644 +--- a/grammars/ocaml/src/tree_sitter/parser.h ++++ b/grammars/ocaml/src/tree_sitter/parser.h +@@ -155,8 +155,10 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t + + #ifdef _MSC_VER + #define UNUSED __pragma(warning(suppress : 4101)) +-#else ++#elif defined __GNUC__ || defined __clang__ + #define UNUSED __attribute__((unused)) ++#else ++#define UNUSED + #endif + + #define START_LEXER() \ diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac index b4ca6275f..de1380a6e 100644 --- a/gettext-tools/configure.ac +++ b/gettext-tools/configure.ac @@ -637,6 +637,7 @@ AC_SUBST([TREE_SITTER_VERSION]) AC_SUBST([TREE_SITTER_GO_VERSION]) AC_SUBST([TREE_SITTER_RUST_VERSION]) AC_SUBST([TREE_SITTER_TYPESCRIPT_VERSION]) +AC_SUBST([TREE_SITTER_OCAML_VERSION]) AC_SUBST([TREE_SITTER_D_VERSION]) dnl Determine the directory name to use for version-specific data files. diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index 9b5018f35..5004314a6 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -426,6 +426,7 @@ The Translator's View * object-pascal-format:: Object Pascal Format Strings * modula2-format:: Modula-2 Format Strings * d-format:: D Format Strings +* ocaml-format:: OCaml Format Strings * smalltalk-format:: Smalltalk Format Strings * qt-format:: Qt Format Strings * qt-plural-format:: Qt Plural Format Strings @@ -1846,6 +1847,12 @@ Likewise for Modula-2, see @ref{modula2-format}. @kwindex no-d-format@r{ flag} Likewise for D, see @ref{d-format}. +@item ocaml-format +@kwindex ocaml-format@r{ flag} +@itemx no-ocaml-format +@kwindex no-ocaml-format@r{ flag} +Likewise for OCaml, see @ref{ocaml-format}. + @item smalltalk-format @kwindex smalltalk-format@r{ flag} @itemx no-smalltalk-format @@ -10149,6 +10156,7 @@ strings. * object-pascal-format:: Object Pascal Format Strings * modula2-format:: Modula-2 Format Strings * d-format:: D Format Strings +* ocaml-format:: OCaml Format Strings * smalltalk-format:: Smalltalk Format Strings * qt-format:: Qt Format Strings * qt-plural-format:: Qt Plural Format Strings @@ -10514,6 +10522,18 @@ D format strings are described in the documentation of the D module @code{std.format}, at @uref{https://dlang.org/library/std/format.html}. +@node ocaml-format +@subsection OCaml Format Strings + +OCaml format strings are described +in the documentation of the @code{Printf} module, +at @uref{https://ocaml.org/manual/5.3/api/Printf.html#VALfprintf}. +In translated strings (@code{msgstr} values), +arguments can be specified through a number, +using the syntax @samp{%@var{m}$} instead of @samp{%} +and @samp{*@var{m}$} instead of @samp{*} +to designate the argument number @var{m}. + @node smalltalk-format @subsection Smalltalk Format Strings diff --git a/gettext-tools/doc/lang-ocaml.texi b/gettext-tools/doc/lang-ocaml.texi index 24e76a90f..9e900bdfd 100644 --- a/gettext-tools/doc/lang-ocaml.texi +++ b/gettext-tools/doc/lang-ocaml.texi @@ -24,7 +24,13 @@ ocaml, opam @code{(s_ "abc")} or, for format strings, @code{(f_ "abc")} @item gettext/ngettext functions -@code{s_}, @code{f_}, @code{sn_}, @code{fn_} +@code{s_}, @code{f_}, @code{sn_}, @code{fn_}, and also +@code{gettext}, @code{fgettext}, +@code{dgettext}, @code{fdgettext}, +@code{dcgettext}, @code{fdcgettext}, +@code{ngettext}, @code{fngettext}, +@code{dngettext}, @code{fdngettext}, +@code{dcngettext}, @code{fdcngettext}. @item textdomain @code{textdomain} field in first parameter of @code{Gettext.Program} @@ -47,7 +53,7 @@ as second parameter of @code{Gettext.Program}) @code{xgettext} @item Formatting with positions ---- +@code{Printf.fprintf "%2$d %1$d"} @item Portability fully portable diff --git a/gettext-tools/doc/xgettext.texi b/gettext-tools/doc/xgettext.texi index 423051bde..b089aa629 100644 --- a/gettext-tools/doc/xgettext.texi +++ b/gettext-tools/doc/xgettext.texi @@ -270,6 +270,7 @@ awk, Lua, Modula-2, D, +OCaml, Vala, Tcl, Perl, @@ -335,6 +336,7 @@ awk, Lua, Modula-2, D, +OCaml, Vala, Tcl, Perl, @@ -502,6 +504,7 @@ awk, Lua, Modula-2, D, +OCaml, Vala, Tcl, Perl, diff --git a/gettext-tools/libgettextpo/Makefile.am b/gettext-tools/libgettextpo/Makefile.am index 971d1364f..8a23571e8 100644 --- a/gettext-tools/libgettextpo/Makefile.am +++ b/gettext-tools/libgettextpo/Makefile.am @@ -89,6 +89,7 @@ libgettextpo_la_AUXSOURCES = \ ../src/format-pascal.c \ ../src/format-modula2.c \ ../src/format-d.c \ + ../src/format-ocaml.c \ ../src/format-smalltalk.c \ ../src/format-qt.c \ ../src/format-qt-plural.c \ diff --git a/gettext-tools/po/POTFILES.in b/gettext-tools/po/POTFILES.in index 0a2f0e1a9..2de342158 100644 --- a/gettext-tools/po/POTFILES.in +++ b/gettext-tools/po/POTFILES.in @@ -30,6 +30,7 @@ src/format-librep.c src/format-lisp.c src/format-lua.c src/format-modula2.c +src/format-ocaml.c src/format-pascal.c src/format-perl-brace.c src/format-perl.c diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES index b34f0cee3..997146bf8 100644 --- a/gettext-tools/src/FILES +++ b/gettext-tools/src/FILES @@ -246,6 +246,7 @@ format-lua.c Format string handling for Lua. format-pascal.c Format string handling for Object Pascal. format-modula2.c Format string handling for Modula-2. format-d.c Format string handling for D. +format-ocaml.c Format string handling for OCaml. format-smalltalk.c Format string handling for Smalltalk and YCP. format-qt.c Format string handling for Qt. format-qt-plural.c Format string handling for Qt plural forms. diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am index 0e66d1571..93469a11c 100644 --- a/gettext-tools/src/Makefile.am +++ b/gettext-tools/src/Makefile.am @@ -210,6 +210,7 @@ FORMAT_SOURCE += \ format-pascal.c \ format-modula2.c \ format-d.c \ + format-ocaml.c \ format-smalltalk.c \ format-qt.c \ format-qt-plural.c \ @@ -249,7 +250,7 @@ libgettextsrc_la_SOURCES = \ search-path.c # xgettext has some tree-sitter based backends. -LIBXGETTEXTTS = libxgettextts2.a libxgettextts3.a libxgettextts4.a libxgettextts1.a +LIBXGETTEXTTS = libxgettextts2.a libxgettextts3.a libxgettextts4.a libxgettextts5.a libxgettextts1.a libxgettextts1_a_SOURCES = \ ../tree-sitter-$(TREE_SITTER_VERSION)/lib/src/lib.c libxgettextts1_a_CPPFLAGS = \ @@ -279,6 +280,13 @@ libxgettextts4_a_CPPFLAGS = \ -I$(top_srcdir)/tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src \ -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include libxgettextts4_a_CFLAGS = $(AM_CFLAGS) @FOREIGN_WARN_CFLAGS@ @NO_EXPENSIVE_WARN_CFLAGS@ +libxgettextts5_a_SOURCES = \ + ../tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-parser.c \ + ../tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-scanner.c +libxgettextts5_a_CPPFLAGS = \ + -I$(top_srcdir)/tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src \ + -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include +libxgettextts5_a_CFLAGS = $(AM_CFLAGS) @FOREIGN_WARN_CFLAGS@ @NO_EXPENSIVE_WARN_CFLAGS@ # msggrep needs pattern matching. LIBGREP = ../libgrep/libgrep.a ../libgrep/gnulib-lib/libgnu.a diff --git a/gettext-tools/src/format-ocaml.c b/gettext-tools/src/format-ocaml.c new file mode 100644 index 000000000..acd0b6dd5 --- /dev/null +++ b/gettext-tools/src/format-ocaml.c @@ -0,0 +1,1100 @@ +/* OCaml format strings. + Copyright (C) 2001-2025 Free Software Foundation, Inc. + Written by Bruno Haible , 2025. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include +#include +#include + +#include "format.h" +#include "gettext.h" +#include "xalloc.h" +#include "format-invalid.h" +#include "c-ctype.h" +#include "xvasprintf.h" + +#define _(str) gettext (str) + +/* The OCaml format strings are described in the OCaml reference manual, + at https://ocaml.org/manual/5.3/api/Printf.html#VALfprintf . + They are implemented in ocaml-5.3.0/stdlib/scanf.ml. + + A directive + - starts with '%', + - [in msgstr only] is optionally followed by + a positive integer m, then '$' + - is optionally followed by a sequence of flags, each being one of + '+', '-', ' ', '0', '#', + - is optionally followed by a width specification: + a positive integer, or + '*', or + [in msgstr only] '*', then a positive integer, then '$', + - is optionally followed by a precision specification: + '.' then optionally: + a positive integer, or + '*', or + [in msgstr only] '*', then a positive integer, then '$', + - is finished by a specifier + - 'd', 'i', 'u', 'x', 'X', 'o', that need an integer argument, + - 'l' then 'd', 'i', 'u', 'x', 'X', 'o', that need an int32 argument, + - 'n' then 'd', 'i', 'u', 'x', 'X', 'o', that need an nativeint argument, + - 'L' then 'd', 'i', 'u', 'x', 'X', 'o', that need an int64 argument, + - 's', that needs a string argument, + - 'S', that needs a string argument and outputs it in OCaml syntax, + - 'c', that needs a character argument, + - 'C', that needs a character argument and outputs it in OCaml syntax, + - 'f', 'e', 'E', 'g', 'G', 'h', 'H', that need a floating-point argument, + - 'F', that needs a floating-point argument and outputs it in OCaml syntax, + - 'B', that needs a boolean argument, + - 'a', that takes a function (of type : out_channel -> unit) argument, + - 't', that takes two arguments: a function (of type : out_channel -> -> unit) + and a , + - '{' FMT '%}', that takes a format string argument without msgstr + extensions, expected to have the same signature as FMT, effectively + ignores it, and instead outputs the minimal format string with the + same signature as FMT: a concatenation of + - "%i" for an integer argument, + - "%li" for an int32 argument, + - "%ni" for a nativeint argument, + - "%Li" for an int64 argument, + - "%s" for a string argument, + - "%c" for a character argument, + - "%f" for a floating-point argument, + - "%B" for a boolean argument, + - "%a" for a function argument, + - "%t" for two arguments, as described above, + - '(' FMT '%)', that takes a format string argument without msgstr + extensions, expected to have the same signature as FMT, and a set + of arguments suitable for FMT, + - '!', '%', '@', ',', that take no argument. + Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot + be used in the same string. + */ + +enum format_arg_type +{ + FAT_NONE = 0, + /* Basic types */ + FAT_INTEGER = 1, + FAT_INT32 = 2, + FAT_NATIVEINT = 3, + FAT_INT64 = 4, + FAT_STRING = 5, + FAT_CHARACTER = 6, + FAT_FLOATINGPOINT = 7, + FAT_BOOLEAN = 8, + FAT_FUNCTION_A = 9, + FAT_FUNCTION_T = 10, /* first argument for %t */ + FAT_FUNCTION_T2 = 11, /* second argument for %t */ + FAT_FORMAT_STRING = 12, + /* Flags */ + FAT_OCAML_SYNTAX = 1 << 4, + FAT_OPTIONAL_OCAML_SYNTAX = 1 << 5, + /* Bitmasks */ + FAT_BASIC_MASK = (FAT_INTEGER | FAT_INT32 | FAT_NATIVEINT | FAT_INT64 + | FAT_STRING | FAT_CHARACTER | FAT_FLOATINGPOINT + | FAT_BOOLEAN | FAT_FUNCTION_A | FAT_FUNCTION_T + | FAT_FUNCTION_T2 | FAT_FORMAT_STRING) +}; +#ifdef __cplusplus +typedef int format_arg_type_t; +#else +typedef enum format_arg_type format_arg_type_t; +#endif + +struct numbered_arg +{ + size_t number; + format_arg_type_t type; + char *signature; /* for type == FAT_FORMAT_STRING */ +}; + +struct spec +{ + size_t directives; + size_t numbered_arg_count; + struct numbered_arg *numbered; +}; + + +static int +numbered_arg_compare (const void *p1, const void *p2) +{ + size_t n1 = ((const struct numbered_arg *) p1)->number; + size_t n2 = ((const struct numbered_arg *) p2)->number; + + return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0); +} + +/* Frees the memory held by *spec. */ +static void +destroy_spec (struct spec *spec) +{ + if (spec->numbered != NULL) + { + size_t i; + for (i = spec->numbered_arg_count; i > 0; ) + { + --i; + if (spec->numbered[i].type == FAT_FORMAT_STRING) + free (spec->numbered[i].signature); + } + free (spec->numbered); + } +} + +/* Returns the signature of a format string + as a freshly allocated string. */ +static char * +format_string_signature (const struct spec *spec) +{ + size_t len; + { + size_t i; + const struct numbered_arg *p; + len = spec->numbered_arg_count; + for (i = 0, p = spec->numbered; i < spec->numbered_arg_count; i++, p++) + if ((p->type & FAT_BASIC_MASK) == FAT_FORMAT_STRING) + len += strlen (p->signature) + 1; + } + char *signature = (char *) xmalloc (len + 1); + { + size_t i; + const struct numbered_arg *p; + char *s; + for (i = 0, p = spec->numbered, s = signature; + i < spec->numbered_arg_count; + i++, p++) + switch (p->type & FAT_BASIC_MASK) + { + case FAT_INTEGER: + *s++ = 'i'; + break; + case FAT_INT32: + *s++ = 'l'; + break; + case FAT_NATIVEINT: + *s++ = 'n'; + break; + case FAT_INT64: + *s++ = 'L'; + break; + case FAT_STRING: + *s++ = 's'; + break; + case FAT_CHARACTER: + *s++ = 'c'; + break; + case FAT_FLOATINGPOINT: + *s++ = 'f'; + break; + case FAT_BOOLEAN: + *s++ = 'B'; + break; + case FAT_FUNCTION_A: + *s++ = 'a'; + break; + case FAT_FUNCTION_T: + *s++ = 't'; + break; + case FAT_FUNCTION_T2: + break; + case FAT_FORMAT_STRING: + *s++ = '('; + memcpy (s, p->signature, strlen (p->signature)); + s += strlen (p->signature); + *s++ = ')'; + break; + default: + abort (); + } + *s = '\0'; + } + return signature; +} + +/* When a type is specified via format string substitution, e.g. "%(%s%)", both + the variant without OCaml syntax "%s" and the variant with OCaml syntax "%S" + are allowed. */ +static format_arg_type_t +type_without_translator_constraint (format_arg_type_t type) +{ + switch (type & FAT_BASIC_MASK) + { + case FAT_STRING: + case FAT_CHARACTER: + case FAT_FLOATINGPOINT: + return (type & FAT_BASIC_MASK) | FAT_OPTIONAL_OCAML_SYNTAX; + default: + return type; + } +} + +/* Parse a piece of format string, until the matching terminating format + directive is encountered. + spec is the global struct spec. + format is the remainder of the format string. + It is updated upon valid return. + terminator is '\0' at the top-level, otherwise '}' or ')'. + translated is true when msgstr extensions should be accepted. + fdi is an array to be filled with format directive indicators, or NULL. + If the format string is invalid, false is returned and *invalid_reason is + set to an error message explaining why. */ +static bool +parse_upto (struct spec *spec, + const char **formatp, + char terminator, bool translated, + char *fdi, char **invalid_reason) +{ + const char *format = *formatp; + const char *const format_start = format; + size_t numbered_allocated; + size_t unnumbered_arg_count; + + spec->directives = 0; + spec->numbered_arg_count = 0; + spec->numbered = NULL; + numbered_allocated = 0; + unnumbered_arg_count = 0; + + for (; *format != '\0';) + /* Invariant: spec->numbered_arg_count == 0 || unnumbered_arg_count == 0. */ + if (*format++ == '%') + { + /* A directive. */ + size_t number = 0; + format_arg_type_t type; + char *signature = NULL; + + FDI_SET (format - 1, FMTDIR_START); + spec->directives++; + + if (translated && c_isdigit (*format)) + { + const char *f = format; + size_t m = 0; + + do + { + m = 10 * m + (*f - '0'); + f++; + } + while (c_isdigit (*f)); + + if (*f == '$') + { + if (m == 0) + { + *invalid_reason = INVALID_ARGNO_0 (spec->directives); + FDI_SET (f, FMTDIR_ERROR); + goto bad_format; + } + number = m; + format = ++f; + } + } + + /* Parse flags. */ + while (*format == ' ' || *format == '+' || *format == '-' + || *format == '#' || *format == '0') + format++; + + /* Parse width. */ + if (*format == '*') + { + size_t width_number = 0; + + format++; + + if (translated && c_isdigit (*format)) + { + const char *f = format; + size_t m = 0; + + do + { + m = 10 * m + (*f - '0'); + f++; + } + while (c_isdigit (*f)); + + if (*f == '$') + { + if (m == 0) + { + *invalid_reason = + INVALID_WIDTH_ARGNO_0 (spec->directives); + FDI_SET (f, FMTDIR_ERROR); + goto bad_format; + } + width_number = m; + format = ++f; + } + } + + if (width_number) + { + /* Numbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (unnumbered_arg_count > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + FDI_SET (format - 1, FMTDIR_ERROR); + goto bad_format; + } + + if (numbered_allocated == spec->numbered_arg_count) + { + numbered_allocated = 2 * numbered_allocated + 1; + spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg)); + } + spec->numbered[spec->numbered_arg_count].number = width_number; + spec->numbered[spec->numbered_arg_count].type = FAT_INTEGER; + spec->numbered_arg_count++; + } + else + { + /* Unnumbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (spec->numbered_arg_count > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + FDI_SET (format - 1, FMTDIR_ERROR); + goto bad_format; + } + + if (numbered_allocated == unnumbered_arg_count) + { + numbered_allocated = 2 * numbered_allocated + 1; + spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg)); + } + spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec->numbered[unnumbered_arg_count].type = FAT_INTEGER; + unnumbered_arg_count++; + } + } + else if (c_isdigit (*format)) + { + do format++; while (c_isdigit (*format)); + } + + /* Parse precision. */ + if (*format == '.') + { + format++; + + if (*format == '*') + { + size_t precision_number = 0; + + format++; + + if (translated && c_isdigit (*format)) + { + const char *f = format; + size_t m = 0; + + do + { + m = 10 * m + (*f - '0'); + f++; + } + while (c_isdigit (*f)); + + if (*f == '$') + { + if (m == 0) + { + *invalid_reason = + INVALID_PRECISION_ARGNO_0 (spec->directives); + FDI_SET (f, FMTDIR_ERROR); + goto bad_format; + } + precision_number = m; + format = ++f; + } + } + + if (precision_number) + { + /* Numbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (unnumbered_arg_count > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + FDI_SET (format - 1, FMTDIR_ERROR); + goto bad_format; + } + + if (numbered_allocated == spec->numbered_arg_count) + { + numbered_allocated = 2 * numbered_allocated + 1; + spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg)); + } + spec->numbered[spec->numbered_arg_count].number = precision_number; + spec->numbered[spec->numbered_arg_count].type = FAT_INTEGER; + spec->numbered_arg_count++; + } + else + { + /* Unnumbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (spec->numbered_arg_count > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + FDI_SET (format - 1, FMTDIR_ERROR); + goto bad_format; + } + + if (numbered_allocated == unnumbered_arg_count) + { + numbered_allocated = 2 * numbered_allocated + 1; + spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg)); + } + spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec->numbered[unnumbered_arg_count].type = FAT_INTEGER; + unnumbered_arg_count++; + } + } + else if (c_isdigit (*format)) + { + do format++; while (c_isdigit (*format)); + } + } + + /* Parse the specifier. */ + enum format_arg_type integer_type = FAT_INTEGER; + if (*format == 'l') + { + integer_type = FAT_INT32; + format++; + } + else if (*format == 'n') + { + integer_type = FAT_NATIVEINT; + format++; + } + else if (*format == 'L') + { + integer_type = FAT_INT64; + format++; + } + + switch (*format) + { + case 'd': + case 'i': + case 'u': + case 'x': case 'X': + case 'o': + type = integer_type; + break; + default: + if (integer_type != FAT_INTEGER) + --format; + switch (*format) + { + case 's': + type = FAT_STRING; + break; + case 'S': + type = FAT_STRING | FAT_OCAML_SYNTAX; + break; + case 'c': + type = FAT_CHARACTER; + break; + case 'C': + type = FAT_CHARACTER | FAT_OCAML_SYNTAX; + break; + case 'f': + case 'e': case 'E': + case 'g': case 'G': + case 'h': case 'H': + type = FAT_FLOATINGPOINT; + break; + case 'F': + type = FAT_FLOATINGPOINT | FAT_OCAML_SYNTAX; + break; + case 'B': + type = FAT_BOOLEAN; + break; + case 'a': + type = FAT_FUNCTION_A; + break; + case 't': + type = FAT_FUNCTION_T; + break; + case '{': + { + struct spec sub_spec; + *formatp = format; + if (!parse_upto (&sub_spec, formatp, '}', false, + fdi, invalid_reason)) + { + FDI_SET (**formatp == '\0' ? *formatp - 1 : *formatp, + FMTDIR_ERROR); + goto bad_format; + } + format = *formatp; + type = FAT_FORMAT_STRING; + signature = format_string_signature (&sub_spec); + destroy_spec (&sub_spec); + } + break; + case '}': + if (terminator != '}') + { + *invalid_reason = + xasprintf (_("Found '%s' without matching '%s'."), "%}", "%{"); + FDI_SET (format - 1, FMTDIR_ERROR); + goto bad_format; + } + spec->directives--; + goto done; + case '(': + { + struct spec sub_spec; + *formatp = format; + if (!parse_upto (&sub_spec, formatp, ')', false, + fdi, invalid_reason)) + { + FDI_SET (**formatp == '\0' ? *formatp - 1 : *formatp, + FMTDIR_ERROR); + goto bad_format; + } + format = *formatp; + type = FAT_FORMAT_STRING; + signature = format_string_signature (&sub_spec); + + if (number) + { + /* Numbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (unnumbered_arg_count > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + FDI_SET (format, FMTDIR_ERROR); + goto bad_format; + } + + size_t new_numbered_arg_count = + spec->numbered_arg_count + 1 + sub_spec.numbered_arg_count; + if (numbered_allocated < new_numbered_arg_count) + { + numbered_allocated = 2 * numbered_allocated + 1; + if (numbered_allocated < new_numbered_arg_count) + numbered_allocated = new_numbered_arg_count; + spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg)); + } + spec->numbered[spec->numbered_arg_count].number = number; + spec->numbered[spec->numbered_arg_count].type = type; + spec->numbered[spec->numbered_arg_count].signature = signature; + spec->numbered_arg_count++; + for (size_t i = 0; i < sub_spec.numbered_arg_count; i++) + { + spec->numbered[spec->numbered_arg_count].number = number + sub_spec.numbered[i].number; + spec->numbered[spec->numbered_arg_count].type = + type_without_translator_constraint (sub_spec.numbered[i].type); + if (sub_spec.numbered[i].type == FAT_FORMAT_STRING) + spec->numbered[spec->numbered_arg_count].signature = sub_spec.numbered[i].signature; + spec->numbered_arg_count++; + } + } + else + { + /* Unnumbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (spec->numbered_arg_count > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + FDI_SET (format, FMTDIR_ERROR); + goto bad_format; + } + + size_t new_unnumbered_arg_count = + unnumbered_arg_count + 1 + sub_spec.numbered_arg_count; + if (numbered_allocated < new_unnumbered_arg_count) + { + numbered_allocated = 2 * numbered_allocated + 1; + if (numbered_allocated < new_unnumbered_arg_count) + numbered_allocated = new_unnumbered_arg_count; + spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg)); + } + spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec->numbered[unnumbered_arg_count].type = type; + spec->numbered[unnumbered_arg_count].signature = signature; + unnumbered_arg_count++; + for (size_t i = 0; i < sub_spec.numbered_arg_count; i++) + { + spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec->numbered[unnumbered_arg_count].type = + type_without_translator_constraint (sub_spec.numbered[i].type); + if (sub_spec.numbered[i].type == FAT_FORMAT_STRING) + spec->numbered[unnumbered_arg_count].signature = sub_spec.numbered[i].signature; + unnumbered_arg_count++; + } + } + + free (sub_spec.numbered); + } + goto done_specifier; + case ')': + if (terminator != ')') + { + *invalid_reason = + xasprintf (_("Found '%s' without matching '%s'."), "%)", "%("); + FDI_SET (format - 1, FMTDIR_ERROR); + goto bad_format; + } + spec->directives--; + goto done; + case '!': + case '%': + case '@': + case ',': + type = FAT_NONE; + break; + default: + if (*format == '\0') + { + *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); + FDI_SET (format - 1, FMTDIR_ERROR); + } + else + { + *invalid_reason = + INVALID_CONVERSION_SPECIFIER (spec->directives, *format); + FDI_SET (format, FMTDIR_ERROR); + } + goto bad_format; + } + break; + } + + if (type != FAT_NONE) + { + if (number) + { + /* Numbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (unnumbered_arg_count > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + FDI_SET (format, FMTDIR_ERROR); + goto bad_format; + } + + size_t new_numbered_arg_count = + spec->numbered_arg_count + 1 + (type == FAT_FUNCTION_T); + if (numbered_allocated < new_numbered_arg_count) + { + numbered_allocated = 2 * numbered_allocated + 1; + if (numbered_allocated < new_numbered_arg_count) + numbered_allocated = new_numbered_arg_count; + spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg)); + } + spec->numbered[spec->numbered_arg_count].number = number; + spec->numbered[spec->numbered_arg_count].type = type; + if (type == FAT_FORMAT_STRING) + spec->numbered[spec->numbered_arg_count].signature = signature; + spec->numbered_arg_count++; + if (type == FAT_FUNCTION_T) + { + spec->numbered[spec->numbered_arg_count].number = number + 1; + spec->numbered[spec->numbered_arg_count].type = FAT_FUNCTION_T2; + spec->numbered_arg_count++; + } + } + else + { + /* Unnumbered argument. */ + + /* Numbered and unnumbered specifications are exclusive. */ + if (spec->numbered_arg_count > 0) + { + *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED (); + FDI_SET (format, FMTDIR_ERROR); + goto bad_format; + } + + size_t new_unnumbered_arg_count = + unnumbered_arg_count + 1 + (type == FAT_FUNCTION_T); + if (numbered_allocated < new_unnumbered_arg_count) + { + numbered_allocated = 2 * numbered_allocated + 1; + if (numbered_allocated < new_unnumbered_arg_count) + numbered_allocated = new_unnumbered_arg_count; + spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg)); + } + spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec->numbered[unnumbered_arg_count].type = type; + if (type == FAT_FORMAT_STRING) + spec->numbered[unnumbered_arg_count].signature = signature; + unnumbered_arg_count++; + if (type == FAT_FUNCTION_T) + { + spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1; + spec->numbered[unnumbered_arg_count].type = FAT_FUNCTION_T2; + unnumbered_arg_count++; + } + } + } + + done_specifier: + FDI_SET (format, FMTDIR_END); + + format++; + } + + if (terminator != '\0') + { + *invalid_reason = xasprintf (_("Found '%%%c' without matching '%%%c'."), + terminator == '}' ? '{' : '(', terminator); + goto bad_format; + } + + done: + /* Convert the unnumbered argument array to numbered arguments. */ + if (unnumbered_arg_count > 0) + spec->numbered_arg_count = unnumbered_arg_count; + /* Sort the numbered argument array, and eliminate duplicates. */ + else if (spec->numbered_arg_count > 1) + { + size_t i, j; + bool err; + + qsort (spec->numbered, spec->numbered_arg_count, + sizeof (struct numbered_arg), numbered_arg_compare); + + /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ + err = false; + for (i = j = 0; i < spec->numbered_arg_count; i++) + if (j > 0 && spec->numbered[i].number == spec->numbered[j-1].number) + { + format_arg_type_t type1 = spec->numbered[i].type; + format_arg_type_t type2 = spec->numbered[j-1].type; + format_arg_type_t type_both; + + if (((type1 == type2) + && (type1 != FAT_FORMAT_STRING + || strcmp (spec->numbered[i].signature, + spec->numbered[j-1].signature) == 0)) + || (((type1 | type2) & FAT_OPTIONAL_OCAML_SYNTAX) != 0 + && (((type1 & ~FAT_OPTIONAL_OCAML_SYNTAX) | FAT_OCAML_SYNTAX) + == ((type2 & ~FAT_OPTIONAL_OCAML_SYNTAX) | FAT_OCAML_SYNTAX)))) + type_both = (type1 | type2) & ~FAT_OPTIONAL_OCAML_SYNTAX; + else + { + /* Incompatible types. */ + type_both = FAT_NONE; + if (!err) + *invalid_reason = + INVALID_INCOMPATIBLE_ARG_TYPES (spec->numbered[i].number); + err = true; + } + + spec->numbered[j-1].type = type_both; + if (type_both == FAT_FORMAT_STRING) + free (spec->numbered[i].signature); + } + else + { + if (j < i) + { + spec->numbered[j].number = spec->numbered[i].number; + spec->numbered[j].type = spec->numbered[i].type; + if (spec->numbered[j].type == FAT_FORMAT_STRING) + spec->numbered[j].signature = spec->numbered[i].signature; + } + j++; + } + spec->numbered_arg_count = j; + if (err) + /* *invalid_reason has already been set above. */ + goto bad_format; + } + + *formatp = format; + return true; + + bad_format: + destroy_spec (spec); + return false; +} + +static void * +format_parse (const char *format, bool translated, char *fdi, + char **invalid_reason) +{ + struct spec spec; + struct spec *result; + + if (!parse_upto (&spec, &format, '\0', translated, fdi, invalid_reason)) + return NULL; + + result = XMALLOC (struct spec); + *result = spec; + return result; +} + +static void +format_free (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + destroy_spec (spec); + free (spec); +} + +static int +format_get_number_of_directives (void *descr) +{ + struct spec *spec = (struct spec *) descr; + + return spec->directives; +} + +static bool +format_check (void *msgid_descr, void *msgstr_descr, bool equality, + formatstring_error_logger_t error_logger, void *error_logger_data, + const char *pretty_msgid, const char *pretty_msgstr) +{ + struct spec *spec1 = (struct spec *) msgid_descr; + struct spec *spec2 = (struct spec *) msgstr_descr; + bool err = false; + + if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0) + { + size_t i, j; + size_t n1 = spec1->numbered_arg_count; + size_t n2 = spec2->numbered_arg_count; + + /* Check that the argument numbers are the same. + Both arrays are sorted. We search for the first difference. */ + for (i = 0, j = 0; i < n1 || j < n2; ) + { + int cmp = (i >= n1 ? 1 : + j >= n2 ? -1 : + spec1->numbered[i].number > spec2->numbered[j].number ? 1 : + spec1->numbered[i].number < spec2->numbered[j].number ? -1 : + 0); + + if (cmp > 0) + { + if (error_logger) + error_logger (error_logger_data, + _("a format specification for argument %zu, as in '%s', doesn't exist in '%s'"), + spec2->numbered[j].number, pretty_msgstr, + pretty_msgid); + err = true; + break; + } + else if (cmp < 0) + { + if (equality) + { + if (error_logger) + error_logger (error_logger_data, + _("a format specification for argument %zu doesn't exist in '%s'"), + spec1->numbered[i].number, pretty_msgstr); + err = true; + break; + } + else + i++; + } + else + j++, i++; + } + /* Check that the argument types are essentially the same. */ + if (!err) + for (i = 0, j = 0; j < n2; ) + { + if (spec1->numbered[i].number == spec2->numbered[j].number) + { + format_arg_type_t type1 = spec1->numbered[i].type; + format_arg_type_t type2 = spec2->numbered[j].type; + + if (!(((type1 == type2) + && (type1 != FAT_FORMAT_STRING + || strcmp (spec1->numbered[i].signature, + spec2->numbered[j].signature) == 0)) + || ((type2 & FAT_OPTIONAL_OCAML_SYNTAX) != 0 + && (type2 & ~FAT_OPTIONAL_OCAML_SYNTAX) + == (type1 & ~FAT_OCAML_SYNTAX)))) + { + if (error_logger) + error_logger (error_logger_data, + _("format specifications in '%s' and '%s' for argument %zu are not the same"), + pretty_msgid, pretty_msgstr, + spec2->numbered[j].number); + err = true; + break; + } + j++, i++; + } + else + i++; + } + } + + return err; +} + + +struct formatstring_parser formatstring_ocaml = +{ + format_parse, + format_free, + format_get_number_of_directives, + NULL, + format_check +}; + + +#ifdef TEST + +/* Test program: Print the argument list specification returned by + format_parse for strings read from standard input. */ + +#include + +static void +format_print (void *descr) +{ + struct spec *spec = (struct spec *) descr; + size_t last; + size_t i; + + if (spec == NULL) + { + printf ("INVALID"); + return; + } + + printf ("("); + last = 1; + for (i = 0; i < spec->numbered_arg_count; i++) + { + size_t number = spec->numbered[i].number; + + if (i > 0) + printf (" "); + if (number < last) + abort (); + for (; last < number; last++) + printf ("_ "); + switch (spec->numbered[i].type & FAT_BASIC_MASK) + { + case FAT_INTEGER: + printf ("i"); + break; + case FAT_INT32: + printf ("l"); + break; + case FAT_NATIVEINT: + printf ("n"); + break; + case FAT_INT64: + printf ("L"); + break; + case FAT_STRING: + printf ("s"); + break; + case FAT_CHARACTER: + printf ("c"); + break; + case FAT_FLOATINGPOINT: + printf ("f"); + break; + case FAT_BOOLEAN: + printf ("B"); + break; + case FAT_FUNCTION_A: + printf ("a"); + break; + case FAT_FUNCTION_T: + printf ("t1"); + break; + case FAT_FUNCTION_T2: + printf ("t2"); + break; + case FAT_FORMAT_STRING: + printf ("\"%s\"", spec->numbered[i].signature); + break; + default: + abort (); + } + if (spec->numbered[i].type & FAT_OCAML_SYNTAX) + printf ("!"); + if (spec->numbered[i].type & FAT_OPTIONAL_OCAML_SYNTAX) + printf ("?"); + last = number + 1; + } + printf (")"); +} + +int +main () +{ + for (;;) + { + char *line = NULL; + size_t line_size = 0; + int line_len; + char *invalid_reason; + void *descr; + + line_len = getline (&line, &line_size, stdin); + if (line_len < 0) + break; + if (line_len > 0 && line[line_len - 1] == '\n') + line[--line_len] = '\0'; + + invalid_reason = NULL; + descr = format_parse (line, true, NULL, &invalid_reason); + + format_print (descr); + printf ("\n"); + if (descr == NULL) + printf ("%s\n", invalid_reason); + + free (invalid_reason); + free (line); + } + + return 0; +} + +/* + * For Emacs M-x compile + * Local Variables: + * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-ocaml.c ../gnulib-lib/libgettextlib.la" + * End: + */ + +#endif /* TEST */ diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c index 8df7aa1ef..928938684 100644 --- a/gettext-tools/src/format.c +++ b/gettext-tools/src/format.c @@ -57,6 +57,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] = /* format_pascal */ &formatstring_pascal, /* format_modula2 */ &formatstring_modula2, /* format_d */ &formatstring_d, + /* format_ocaml */ &formatstring_ocaml, /* format_smalltalk */ &formatstring_smalltalk, /* format_qt */ &formatstring_qt, /* format_qt_plural */ &formatstring_qt_plural, diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h index 02e1531f1..c262a292b 100644 --- a/gettext-tools/src/format.h +++ b/gettext-tools/src/format.h @@ -123,6 +123,7 @@ extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_lua; extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_pascal; extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_modula2; extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_d; +extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_ocaml; extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_smalltalk; extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_qt; extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_qt_plural; diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c index 7a605b350..6db1e1809 100644 --- a/gettext-tools/src/message.c +++ b/gettext-tools/src/message.c @@ -57,6 +57,7 @@ const char *const format_language[NFORMATS] = /* format_pascal */ "object-pascal", /* format_modula2 */ "modula2", /* format_d */ "d", + /* format_ocaml */ "ocaml", /* format_smalltalk */ "smalltalk", /* format_qt */ "qt", /* format_qt_plursl */ "qt-plural", @@ -97,6 +98,7 @@ const char *const format_language_pretty[NFORMATS] = /* format_pascal */ "Object Pascal", /* format_modula2 */ "Modula-2", /* format_d */ "D", + /* format_ocaml */ "OCaml", /* format_smalltalk */ "Smalltalk", /* format_qt */ "Qt", /* format_qt_plural */ "Qt plural", diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h index 5790295cd..147bbd864 100644 --- a/gettext-tools/src/message.h +++ b/gettext-tools/src/message.h @@ -66,6 +66,7 @@ enum format_type format_pascal, format_modula2, format_d, + format_ocaml, format_smalltalk, format_qt, format_qt_plural, @@ -80,7 +81,7 @@ enum format_type format_gfc_internal, format_ycp }; -#define NFORMATS 36 /* Number of format_type enum values. */ +#define NFORMATS 37 /* Number of format_type enum values. */ extern LIBGETTEXTSRC_DLL_VARIABLE const char *const format_language[NFORMATS]; extern LIBGETTEXTSRC_DLL_VARIABLE const char *const format_language_pretty[NFORMATS]; diff --git a/gettext-tools/src/x-ocaml.c b/gettext-tools/src/x-ocaml.c index 2a6b021f9..5151b7e89 100644 --- a/gettext-tools/src/x-ocaml.c +++ b/gettext-tools/src/x-ocaml.c @@ -22,121 +22,964 @@ /* Specification. */ #include "x-ocaml.h" +#include #include +#include #include #include +#include #include #include "message.h" -#include "clean-temp.h" -#include "concat-filename.h" -#include "execute.h" -#include "xvasprintf.h" -#include "x-po.h" +#include "string-desc.h" +#include "xstring-desc.h" +#include "string-buffer.h" #include "xgettext.h" +#include "xg-pos.h" +#include "xg-mixed-string.h" +#include "xg-arglist-context.h" +#include "xg-arglist-callshape.h" +#include "xg-arglist-parser.h" +#include "xg-message.h" +#include "if-error.h" +#include "xalloc.h" +#include "read-file.h" +#include "unistr.h" +#include "po-charset.h" #include "gettext.h" -/* A convenience macro. I don't like writing gettext() every time. */ -#define _(str) gettext (str) +#define _(s) gettext (s) -/* We don't parse OCaml directly, but instead rely on the 'ocaml-gettext' - program, that invokes the 'ocaml-xgettext' program. Both are contained - in the 'opam' package named 'gettext': - https://opam.ocaml.org/packages/gettext/ - https://github.com/gildor478/ocaml-gettext - https://github.com/gildor478/ocaml-gettext/blob/master/doc/reference-manual.md +/* Use tree-sitter. + Documentation: */ +#include +extern const TSLanguage *tree_sitter_ocaml (void); - Comments start with '(*' and end with '*)' and can be nested. - Reference: + +/* The OCaml syntax is defined in . + + String syntax: Strings are delimited by double-quotes or by {id| |id} pairs. + Backslash is the escape character. Among the escape sequences, there is in + particular backslash-newline-spaces_or_tabs and \u{nnnn}. + Reference: + + Comment syntax: Comments start with '(*' and end with '*)' and can be nested. + References: + */ +#define DEBUG_OCAML 0 + /* ====================== Keyword set customization. ====================== */ -/* This function currently has no effect. */ +/* If true extract all strings. */ +static bool extract_all = false; + +static hash_table keywords; +static bool default_keywords = true; + void x_ocaml_extract_all (void) { + extract_all = true; } -/* This function currently has no effect. */ void -x_ocaml_keyword (const char *keyword) +x_ocaml_keyword (const char *name) { + if (name == NULL) + default_keywords = false; + else + { + const char *end; + struct callshape shape; + const char *colon; + + if (keywords.table == NULL) + hash_init (&keywords, 100); + + split_keywordspec (name, &end, &shape); + + /* The characters between name and end should form a valid identifier. + A colon means an invalid parse in split_keywordspec(). */ + colon = strchr (name, ':'); + if (colon == NULL || colon >= end) + insert_keyword_callshape (&keywords, name, end - name, &shape); + } +} + +/* Finish initializing the keywords hash table. + Called after argument processing, before each file is processed. */ +static void +init_keywords () +{ + if (default_keywords) + { + /* Compatible with ocaml-gettext/src/bin/ocaml-xgettext/xgettext.ml. */ + /* When adding new keywords here, also update the documentation in + xgettext.texi! */ + x_ocaml_keyword ("s_"); + x_ocaml_keyword ("f_"); + x_ocaml_keyword ("sn_:1,2"); + x_ocaml_keyword ("fn_:1,2"); + x_ocaml_keyword ("gettext:2"); + x_ocaml_keyword ("fgettext:2"); + x_ocaml_keyword ("dgettext:3"); + x_ocaml_keyword ("fdgettext:3"); + x_ocaml_keyword ("dcgettext:3"); + x_ocaml_keyword ("fdcgettext:3"); + x_ocaml_keyword ("ngettext:2,3"); + x_ocaml_keyword ("fngettext:2,3"); + x_ocaml_keyword ("dngettext:3,4"); + x_ocaml_keyword ("fdngettext:3,4"); + x_ocaml_keyword ("dcngettext:3,4"); + x_ocaml_keyword ("fdcngettext:3,4"); + default_keywords = false; + } } /* This function currently has no effect. */ void init_flag_table_ocaml (void) { + /* Compatible with ocaml-gettext/src/bin/ocaml-xgettext/xgettext.ml. */ + xgettext_record_flag ("s_:1:impossible-ocaml-format"); + xgettext_record_flag ("f_:1:ocaml-format"); + xgettext_record_flag ("sn_:1:impossible-ocaml-format"); + xgettext_record_flag ("sn_:2:impossible-ocaml-format"); + xgettext_record_flag ("fn_:1:ocaml-format"); + xgettext_record_flag ("fn_:2:ocaml-format"); + xgettext_record_flag ("gettext:2:impossible-ocaml-format"); + xgettext_record_flag ("fgettext:2:ocaml-format"); + xgettext_record_flag ("dgettext:3:impossible-ocaml-format"); + xgettext_record_flag ("fdgettext:3:ocaml-format"); + xgettext_record_flag ("dcgettext:3:impossible-ocaml-format"); + xgettext_record_flag ("fdcgettext:3:ocaml-format"); + xgettext_record_flag ("ngettext:2:impossible-ocaml-format"); + xgettext_record_flag ("ngettext:3:impossible-ocaml-format"); + xgettext_record_flag ("fngettext:2:ocaml-format"); + xgettext_record_flag ("fngettext:3:ocaml-format"); + xgettext_record_flag ("dngettext:3:impossible-ocaml-format"); + xgettext_record_flag ("dngettext:4:impossible-ocaml-format"); + xgettext_record_flag ("fdngettext:3:ocaml-format"); + xgettext_record_flag ("fdngettext:4:ocaml-format"); + xgettext_record_flag ("dcngettext:3:impossible-ocaml-format"); + xgettext_record_flag ("dcngettext:4:impossible-ocaml-format"); + xgettext_record_flag ("fdcngettext:3:ocaml-format"); + xgettext_record_flag ("fdcngettext:4:ocaml-format"); +} + + +/* ======================== Parsing via tree-sitter. ======================== */ +/* To understand this code, look at + tree-sitter-ocaml/grammars/ocaml/src/node-types.json + and + tree-sitter-ocaml/grammars/ocaml/src/grammar.json + */ + +/* The tree-sitter's language object. */ +static const TSLanguage *ts_language; + +/* ------------------------- Node types and symbols ------------------------- */ + +static TSSymbol +ts_language_symbol (const char *name, bool is_named) +{ + TSSymbol result = + ts_language_symbol_for_name (ts_language, name, strlen (name), is_named); + if (result == 0) + /* If we get here, the grammar has evolved in an incompatible way. */ + abort (); + return result; +} + +static TSFieldId +ts_language_field (const char *name) +{ + TSFieldId result = + ts_language_field_id_for_name (ts_language, name, strlen (name)); + if (result == 0) + /* If we get here, the grammar has evolved in an incompatible way. */ + abort (); + return result; +} + +/* Optimization: + Instead of + strcmp (ts_node_type (node), "string") == 0 + it is faster to do + ts_node_symbol (node) == ts_symbol_string + */ +static TSSymbol ts_symbol_comment; +static TSSymbol ts_symbol_string; +static TSSymbol ts_symbol_string_content; +static TSSymbol ts_symbol_escape_sequence; +static TSSymbol ts_symbol_quoted_string; +static TSSymbol ts_symbol_quoted_string_content; +static TSSymbol ts_symbol_infix_expression; +static TSSymbol ts_symbol_concat_operator; +static TSSymbol ts_symbol_application_expression; +static TSSymbol ts_symbol_value_path; +static TSSymbol ts_symbol_value_name; +static TSSymbol ts_symbol_parenthesized_expression; +static TSSymbol ts_symbol_lparen; +static TSSymbol ts_symbol_rparen; +static TSFieldId ts_field_operator; +static TSFieldId ts_field_left; +static TSFieldId ts_field_right; +static TSFieldId ts_field_function; + +static inline size_t +ts_node_line_number (TSNode node) +{ + return ts_node_start_point (node).row + 1; } +/* -------------------------------- The file -------------------------------- */ -/* ========================= Extracting strings. ========================== */ +/* The entire contents of the file being analyzed. */ +static const char *contents; +/* -------------------------------- Comments -------------------------------- */ + +/* These are for tracking whether comments count as immediately before + keyword. */ +static int last_comment_line; +static int last_non_comment_line; + +/* Saves a comment line. */ +static void save_comment_line (string_desc_t gist) +{ + /* Remove leading whitespace. */ + while (sd_length (gist) > 0 + && (sd_char_at (gist, 0) == ' ' + || sd_char_at (gist, 0) == '\t')) + gist = sd_substring (gist, 1, sd_length (gist)); + /* Remove trailing whitespace. */ + size_t len = sd_length (gist); + while (len > 0 + && (sd_char_at (gist, len - 1) == ' ' + || sd_char_at (gist, len - 1) == '\t')) + len--; + gist = sd_substring (gist, 0, len); + savable_comment_add (sd_c (gist)); +} + +/* Does the comment handling for NODE. + Updates savable_comment, last_comment_line, last_non_comment_line. + It is important that this function gets called + - for each node (not only the named nodes!), + - in depth-first traversal order. */ +static void handle_comments (TSNode node) +{ + #if DEBUG_OCAML + fprintf (stderr, "LCL=%d LNCL=%d node=[%s]|%s|\n", last_comment_line, last_non_comment_line, ts_node_type (node), ts_node_string (node)); + #endif + if (last_comment_line < last_non_comment_line + && last_non_comment_line < ts_node_line_number (node)) + /* We have skipped over a newline. This newline terminated a line + with non-comment tokens, after the last comment line. */ + savable_comment_reset (); + + if (ts_node_symbol (node) == ts_symbol_comment) + { + string_desc_t entire = + sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node), + contents + ts_node_start_byte (node)); + /* It should start and end with the comment markers. */ + if (!(sd_length (entire) >= 4 + && sd_char_at (entire, 0) == '(' + && sd_char_at (entire, 1) == '*' + && sd_char_at (entire, sd_length (entire) - 2) == '*' + && sd_char_at (entire, sd_length (entire) - 1) == ')')) + abort (); + string_desc_t gist = sd_substring (entire, 2, sd_length (entire) - 2); + /* Split into lines. + Remove leading and trailing whitespace from each line. */ + for (;;) + { + ptrdiff_t nl_index = sd_index (gist, '\n'); + if (nl_index >= 0) + { + save_comment_line (sd_substring (gist, 0, nl_index)); + gist = sd_substring (gist, nl_index + 1, sd_length (gist)); + } + else + { + save_comment_line (gist); + break; + } + } + last_comment_line = ts_node_end_point (node).row + 1; + } + else + last_non_comment_line = ts_node_line_number (node); +} + +/* ---------------------------- String literals ---------------------------- */ + +/* Determines whether NODE represents the string concatenation operator '^'. */ +static bool +is_string_concatenation_operator (TSNode node) +{ + if (ts_node_symbol (node) == ts_symbol_concat_operator) + { + string_desc_t operator_string = + sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node), + contents + ts_node_start_byte (node)); + if (sd_equals (operator_string, sd_from_c ("^"))) + return true; + } + return false; +} + +/* Determines whether NODE represents a string literal or the concatenation + of string literals (via the '^' operator). */ static bool -is_not_header (const message_ty *mp) +is_string_literal (TSNode node) { - return !is_header (mp); + start: + if (ts_node_symbol (node) == ts_symbol_string + || ts_node_symbol (node) == ts_symbol_quoted_string) + return true; + if (ts_node_symbol (node) == ts_symbol_infix_expression + && is_string_concatenation_operator (ts_node_child_by_field_id (node, ts_field_operator)) + /* Recurse into the left and right subnodes. */ + && is_string_literal (ts_node_child_by_field_id (node, ts_field_left))) + { + /*return is_string_literal (ts_node_child_by_field_id (node, ts_field_right));*/ + node = ts_node_child_by_field_id (node, ts_field_right); + goto start; + } + if (ts_node_symbol (node) == ts_symbol_parenthesized_expression) + { + uint32_t count = ts_node_child_count (node); + if (count > 0 + && ts_node_symbol (ts_node_child (node, 0)) == ts_symbol_lparen + && ts_node_symbol (ts_node_child (node, count - 1)) == ts_symbol_rparen) + { + uint32_t subnodes = 0; + uint32_t last_subnode_index = 0; + uint32_t i; + for (i = 1; i < count - 1; i++) + { + TSNode subnode = ts_node_child (node, i); + if (ts_node_is_named (subnode) + && ts_node_symbol (subnode) != ts_symbol_comment) + { + subnodes++; + last_subnode_index = i; + } + } + if (subnodes == 1) + { + TSNode subnode = ts_node_child (node, last_subnode_index); + /* Recurse. */ + /*return is_string_literal (subnode);*/ + node = subnode; + goto start; + } + } + } + + return false; +} + +/* Appends the string literal pieces from NODE to BUFFER. */ +static void +string_literal_accumulate_pieces (TSNode node, + struct string_buffer *buffer) +{ + start: + if (ts_node_symbol (node) == ts_symbol_string) + { + uint32_t count = ts_node_named_child_count (node); + uint32_t i; + for (i = 0; i < count; i++) + { + TSNode subnode = ts_node_named_child (node, i); + if (ts_node_symbol (subnode) == ts_symbol_string_content) + { + const char *subnode_start = contents + ts_node_start_byte (subnode); + const char *subnode_end = contents + ts_node_end_byte (subnode); + uint32_t subcount = ts_node_child_count (subnode); + #if DEBUG_OCAML + { + fprintf (stderr, "string_content children:\n"); + uint32_t j; + for (j = 0; j < subcount; j++) + fprintf (stderr, "%u -> [%s]|%s|\n", j, ts_node_type (ts_node_child (subnode, j)), ts_node_string (ts_node_child (subnode, j))); + } + #endif + /* Iterate over the children nodes of type escape_sequence. + Other children nodes, such as conversion_specification or + pretty_printing_indication, can be ignored. */ + uint32_t j; + for (j = 0; j < subcount; j++) + { + TSNode subsubnode = ts_node_child (subnode, j); + if (ts_node_symbol (subsubnode) == ts_symbol_escape_sequence) + { + const char *escape_start = contents + ts_node_start_byte (subsubnode); + const char *escape_end = contents + ts_node_end_byte (subsubnode); + sb_xappend_desc (buffer, + sd_new_addr (escape_start - subnode_start, subnode_start)); + + /* The escape sequence must start with a backslash. */ + if (!(escape_end - escape_start >= 2 && escape_start[0] == '\\')) + abort (); + /* tree-sitter's grammar.js allows more escape sequences + than the OCaml system. Give a warning for those cases + where the OCaml system gives an error. */ + bool invalid = false; + if (escape_end - escape_start >= 2 + && (escape_start[1] == '\n' || escape_start[1] == '\r')) + /* backslash-newline-spaces_or_tabs */ + ; + else if (escape_end - escape_start == 2) + { + switch (escape_start[1]) + { + case '\\': + case '"': + case '\'': + case ' ': + sb_xappend1 (buffer, escape_start[1]); + break; + case 'n': + sb_xappend1 (buffer, '\n'); + break; + case 'r': + sb_xappend1 (buffer, '\r'); + break; + case 't': + sb_xappend1 (buffer, '\t'); + break; + case 'b': + sb_xappend1 (buffer, 0x08); + break; + default: + abort (); + } + } + else if (escape_end - escape_start == 4 + && (escape_start[1] >= '0' + && escape_start[1] <= '9')) + { + /* Only exactly 3 decimal digits are accepted. */ + unsigned int value = 0; + const char *p; + for (p = escape_start + 1; p < escape_end; p++) + { + /* No overflow is possible. */ + char c = *p; + if (c >= '0' && c <= '9') + value = value * 10 + (c - '0'); + else + abort (); + } + if (value > 0xFF) + invalid = true; + if (!invalid) + sb_xappend1 (buffer, (unsigned char) value); + } + else if (escape_end - escape_start == 4 + && escape_start[1] == 'x') + { + /* Only exactly 2 hexadecimal digits are accepted. */ + unsigned int value = 0; + const char *p; + for (p = escape_start + 2; p < escape_end; p++) + { + /* No overflow is possible. */ + char c = *p; + if (c >= '0' && c <= '9') + value = (value << 4) + (c - '0'); + else if (c >= 'A' && c <= 'Z') + value = (value << 4) + (c - 'A' + 10); + else if (c >= 'a' && c <= 'z') + value = (value << 4) + (c - 'a' + 10); + else + abort (); + } + sb_xappend1 (buffer, (unsigned char) value); + } + else if (escape_end - escape_start == 5 + && escape_start[1] == 'o') + { + /* Only exactly 3 octal digits are accepted. */ + unsigned int value = 0; + const char *p; + for (p = escape_start + 2; p < escape_end; p++) + { + /* No overflow is possible. */ + char c = *p; + if (c >= '0' && c <= '7') + value = (value << 3) + (c - '0'); + else + abort (); + } + if (value > 0xFF) + abort (); + sb_xappend1 (buffer, (unsigned char) value); + } + else if (escape_end - escape_start > 4 + && escape_start[1] == 'u' + && escape_start[2] == '{' + && escape_end[-1] == '}') + { + if (escape_end - escape_start <= 4 + 6) + { + /* 1 to 6 hexadecimal digits are accepted. */ + unsigned int value = 0; + const char *p; + for (p = escape_start + 3; p < escape_end - 1; p++) + { + /* No overflow is possible. */ + char c = *p; + if (c >= '0' && c <= '9') + value = (value << 4) + (c - '0'); + else if (c >= 'A' && c <= 'Z') + value = (value << 4) + (c - 'A' + 10); + else if (c >= 'a' && c <= 'z') + value = (value << 4) + (c - 'a' + 10); + else + abort (); + } + if (value >= 0x110000 + || (value >= 0xD800 && value <= 0xDFFF)) + invalid = true; + if (!invalid) + { + uint8_t buf[6]; + int n = u8_uctomb (buf, value, sizeof (buf)); + if (n > 0) + sb_xappend_desc (buffer, + sd_new_addr (n, (const char *) buf)); + else + invalid = true; + } + } + else + invalid = true; + } + else + abort (); + if (invalid) + { + size_t line_number = ts_node_line_number (subnode); + if_error (IF_SEVERITY_WARNING, + logical_file_name, line_number, (size_t)(-1), false, + _("invalid escape sequence in string")); + } + + subnode_start = escape_end; + } + } + sb_xappend_desc (buffer, + sd_new_addr (subnode_end - subnode_start, subnode_start)); + } + else + abort (); + } + } + else if (ts_node_symbol (node) == ts_symbol_quoted_string) + { + uint32_t count = ts_node_named_child_count (node); + uint32_t i; + for (i = 0; i < count; i++) + { + TSNode subnode = ts_node_named_child (node, i); + if (ts_node_symbol (subnode) == ts_symbol_quoted_string_content) + { + /* We can ignore the children nodes here, since none of them can + be of type escape_sequence. */ + string_desc_t subnode_string = + sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode), + contents + ts_node_start_byte (subnode)); + sb_xappend_desc (buffer, subnode_string); + } + } + } + else if (ts_node_symbol (node) == ts_symbol_infix_expression + && is_string_concatenation_operator (ts_node_child_by_field_id (node, ts_field_operator))) + { + /* Recurse into the left and right subnodes. */ + string_literal_accumulate_pieces (ts_node_child_by_field_id (node, ts_field_left), buffer); + /*string_literal_accumulate_pieces (ts_node_child_by_field_id (node, ts_field_right), buffer);*/ + node = ts_node_child_by_field_id (node, ts_field_right); + goto start; + } + else if (ts_node_symbol (node) == ts_symbol_parenthesized_expression) + { + uint32_t count = ts_node_child_count (node); + /* is_string_literal has already checked that the first child node is '(', + that the last child node is ')', and that in-between there is exactly + one non-comment node. */ + if (!(count > 0)) + abort (); + uint32_t i; + for (i = 1; i < count - 1; i++) + { + TSNode subnode = ts_node_child (node, i); + if (ts_node_is_named (subnode) + && ts_node_symbol (subnode) != ts_symbol_comment) + { + /* Recurse. */ + /*string_literal_accumulate_pieces (subnode, buffer);*/ + node = subnode; + goto start; + } + } + abort (); + } + else + abort (); +} + +/* Combines the pieces of a string literal or concatenated string literal. + Returns a freshly allocated, mostly UTF-8 encoded string. */ +static char * +string_literal_value (TSNode node) +{ + struct string_buffer buffer; + sb_init (&buffer); + string_literal_accumulate_pieces (node, &buffer); + return sb_xdupfree_c (&buffer); +} + +/* --------------------- Parsing and string extraction --------------------- */ + +/* Context lookup table. */ +static flag_context_list_table_ty *flag_context_list_table; + +/* Maximum supported nesting depth. */ +#define MAX_NESTING_DEPTH 1000 + +static int nesting_depth; + +/* The file is parsed into an abstract syntax tree. Scan the syntax tree, + looking for a keyword in function position of a application_expression, + followed by followed by a string among the arguments. + When we see this pattern, we have something to remember. + + Normal handling: Look for + keyword ... msgid ... + Plural handling: Look for + keyword ... msgid ... msgid_plural ... + + We use recursion because the arguments before msgid or between msgid + and msgid_plural can contain subexpressions of the same form. */ + +/* Forward declarations. */ +static void extract_from_node (TSNode node, + bool ignore, + flag_region_ty *outer_region, + message_list_ty *mlp); + +/* Extracts messages from the function application consisting of + - FUNCTION_NODE: a tree node of type 'value_path', + - FUNCTION_NAME_NODE: a tree node of type 'value_name', + the last named node of FUNCTION_NODE, + - ARGS_NODE: a tree node of type 'application_expression', + of which FUNCTION_NAME is the 'function' field. + Extracted messages are added to MLP. */ +static void +extract_from_function_call (TSNode function_node, + TSNode function_name_node, + TSNode args_node, + flag_region_ty *outer_region, + message_list_ty *mlp) +{ + uint32_t args_count = ts_node_child_count (args_node); + + string_desc_t function_name = + sd_new_addr (ts_node_end_byte (function_name_node) - ts_node_start_byte (function_name_node), + contents + ts_node_start_byte (function_name_node)); + + /* Context iterator. */ + flag_context_list_iterator_ty next_context_iter = + flag_context_list_iterator ( + flag_context_list_table_lookup ( + flag_context_list_table, + sd_data (function_name), sd_length (function_name))); + + /* Information associated with the callee. */ + const struct callshapes *next_shapes = NULL; + + /* Look in the keywords table. */ + void *keyword_value; + if (hash_find_entry (&keywords, + sd_data (function_name), sd_length (function_name), + &keyword_value) + == 0) + next_shapes = (const struct callshapes *) keyword_value; + + if (next_shapes != NULL) + { + /* We have a function, named by a relevant identifier, with an argument + list. */ + + struct arglist_parser *argparser = + arglist_parser_alloc (mlp, next_shapes); + + /* Current argument number. */ + uint32_t arg; + uint32_t i; + + arg = 0; + for (i = 0; i < args_count; i++) + { + TSNode arg_node = ts_node_child (args_node, i); + handle_comments (arg_node); + if (ts_node_is_named (arg_node) + && ts_node_symbol (arg_node) != ts_symbol_comment + && !ts_node_eq (arg_node, function_node)) + { + arg++; + flag_region_ty *arg_region = + inheriting_region (outer_region, + flag_context_list_iterator_advance ( + &next_context_iter)); + + bool already_extracted = false; + if (is_string_literal (arg_node)) + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = ts_node_line_number (arg_node); + + char *string = string_literal_value (arg_node); + + if (extract_all) + { + remember_a_message (mlp, NULL, string, true, false, + arg_region, &pos, + NULL, savable_comment, true); + already_extracted = true; + } + else + { + mixed_string_ty *mixed_string = + mixed_string_alloc_utf8 (string, lc_string, + pos.file_name, pos.line_number); + arglist_parser_remember (argparser, arg, mixed_string, + arg_region, + pos.file_name, pos.line_number, + savable_comment, true); + } + } + + if (!already_extracted) + { + if (++nesting_depth > MAX_NESTING_DEPTH) + if_error (IF_SEVERITY_FATAL_ERROR, + logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false, + _("too many open parentheses")); + extract_from_node (arg_node, + false, + arg_region, + mlp); + nesting_depth--; + } + + unref_region (arg_region); + } + } + arglist_parser_done (argparser, arg); + return; + } + + /* Recurse. */ + + uint32_t i; + + for (i = 0; i < args_count; i++) + { + TSNode arg_node = ts_node_child (args_node, i); + handle_comments (arg_node); + if (ts_node_is_named (arg_node) + && ts_node_symbol (arg_node) != ts_symbol_comment) + { + flag_region_ty *arg_region = + inheriting_region (outer_region, + flag_context_list_iterator_advance ( + &next_context_iter)); + + if (++nesting_depth > MAX_NESTING_DEPTH) + if_error (IF_SEVERITY_FATAL_ERROR, + logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false, + _("too many open parentheses")); + extract_from_node (arg_node, + false, + arg_region, + mlp); + nesting_depth--; + + unref_region (arg_region); + } + } +} + +/* Extracts messages in the syntax tree NODE. + Extracted messages are added to MLP. */ +static void +extract_from_node (TSNode node, + bool ignore, + flag_region_ty *outer_region, + message_list_ty *mlp) +{ + if (extract_all && !ignore && is_string_literal (node)) + { + lex_pos_ty pos; + pos.file_name = logical_file_name; + pos.line_number = ts_node_line_number (node); + + char *string = string_literal_value (node); + + remember_a_message (mlp, NULL, string, true, false, + outer_region, &pos, + NULL, savable_comment, true); + } + + if (ts_node_symbol (node) == ts_symbol_application_expression + && ts_node_named_child_count (node) >= 2) + { + TSNode function_node = ts_node_named_child (node, 0); + /* This is the field called 'function'. */ + if (! ts_node_eq (ts_node_child_by_field_id (node, ts_field_function), + function_node)) + abort (); + if (ts_node_symbol (function_node) == ts_symbol_value_path + && ts_node_named_child_count (function_node) > 0) + { + TSNode function_name_node = + ts_node_named_child (function_node, + ts_node_named_child_count (function_node) - 1); + if (ts_node_symbol (function_name_node) == ts_symbol_value_name) + { + extract_from_function_call (function_node, function_name_node, node, + outer_region, + mlp); + return; + } + } + } + + /* Recurse. */ + if (!(ts_node_symbol (node) == ts_symbol_comment)) + { + ignore = ignore || is_string_literal (node); + uint32_t count = ts_node_child_count (node); + uint32_t i; + for (i = 0; i < count; i++) + { + TSNode subnode = ts_node_child (node, i); + handle_comments (subnode); + if (++nesting_depth > MAX_NESTING_DEPTH) + if_error (IF_SEVERITY_FATAL_ERROR, + logical_file_name, ts_node_line_number (subnode), (size_t)(-1), false, + _("too many open parentheses, brackets, or braces")); + extract_from_node (subnode, + ignore, + outer_region, + mlp); + nesting_depth--; + } + } } void -extract_ocaml (const char *found_in_dir, const char *real_filename, - const char *logical_filename, +extract_ocaml (FILE *f, + const char *real_filename, const char *logical_filename, flag_context_list_table_ty *flag_table, msgdomain_list_ty *mdlp) { - /* Invoke - ocaml-gettext --action extract --extract-pot .pot real_filename */ + message_list_ty *mlp = mdlp->item[0]->messages; - /* First, create a temporary directory where this invocation can place its - output. */ - struct temp_dir *tmpdir = create_temp_dir ("ocgt", NULL, false); - if (tmpdir == NULL) - exit (EXIT_FAILURE); + logical_file_name = xstrdup (logical_filename); - /* Prepare the temporary POT file name. */ - char *temp_file_name = xconcatenated_filename (tmpdir->dir_name, "temp.pot", NULL); - register_temp_file (tmpdir, temp_file_name); + last_comment_line = -1; + last_non_comment_line = -1; - /* Invoke ocaml-gettext. */ - const char *progname = "ocaml-gettext"; - { - const char *argv[7]; - int exitstatus; - /* Prepare arguments. */ - argv[0] = progname; - argv[1] = "--action"; - argv[2] = "extract"; - argv[3] = "--extract-pot"; - argv[4] = temp_file_name; - argv[5] = logical_filename; - argv[6] = NULL; - exitstatus = execute (progname, progname, argv, NULL, found_in_dir, - true, false, false, false, true, false, NULL); - if (exitstatus != 0) - error (EXIT_FAILURE, 0, _("%s subprocess failed with exit code %d"), - progname, exitstatus); - } + flag_context_list_table = flag_table; + nesting_depth = 0; + + init_keywords (); + + if (ts_language == NULL) + { + ts_language = tree_sitter_ocaml (); + ts_symbol_comment = ts_language_symbol ("comment", true); + ts_symbol_string = ts_language_symbol ("string", true); + ts_symbol_string_content = ts_language_symbol ("string_content", true); + ts_symbol_escape_sequence = ts_language_symbol ("escape_sequence", true); + ts_symbol_quoted_string = ts_language_symbol ("quoted_string", true);; + ts_symbol_quoted_string_content = ts_language_symbol ("quoted_string_content", true);; + ts_symbol_infix_expression = ts_language_symbol ("infix_expression", true); + ts_symbol_concat_operator = ts_language_symbol ("concat_operator", true); + ts_symbol_application_expression = ts_language_symbol ("application_expression", true); + ts_symbol_value_path = ts_language_symbol ("value_path", true); + ts_symbol_value_name = ts_language_symbol ("value_name", true); + ts_symbol_parenthesized_expression = ts_language_symbol ("parenthesized_expression", true); + ts_symbol_lparen = ts_language_symbol ("(", false); + ts_symbol_rparen = ts_language_symbol (")", false); + ts_field_operator = ts_language_field ("operator"); + ts_field_left = ts_language_field ("left"); + ts_field_right = ts_language_field ("right"); + ts_field_function = ts_language_field ("function"); + } + + /* Read the file into memory. */ + char *contents_data; + size_t contents_length; + contents_data = read_file (real_filename, 0, &contents_length); + if (contents_data == NULL) + error (EXIT_FAILURE, errno, _("error while reading \"%s\""), + real_filename); + + /* tree-sitter works only on files whose size fits in an uint32_t. */ + if (contents_length > 0xFFFFFFFFUL) + error (EXIT_FAILURE, 0, _("file \"%s\" is unsupported because too large"), + real_filename); - /* Read the resulting POT file. */ + /* OCaml source files are "expected to be" UTF-8 encoded. + */ + if (u8_check ((uint8_t *) contents_data, contents_length) != NULL) + error (EXIT_FAILURE, 0, + _("file \"%s\" is invalid because not UTF-8 encoded"), + real_filename); + xgettext_current_source_encoding = po_charset_utf8; + + /* Create a parser. */ + TSParser *parser = ts_parser_new (); + + /* Set the parser's language. */ + ts_parser_set_language (parser, ts_language); + + /* Parse the file, producing a syntax tree. */ + TSTree *tree = ts_parser_parse_string (parser, NULL, contents_data, contents_length); + + #if DEBUG_OCAML + /* For debugging: Print the tree. */ { - FILE *fp = fopen (temp_file_name, "r"); - if (fp == NULL) - error (EXIT_FAILURE, 0, _("%s subprocess did not create the expected file"), - progname); - char *dummy_filename = xasprintf (_("(output from '%s')"), progname); - extract_po (fp, temp_file_name, dummy_filename, flag_table, mdlp); - fclose (fp); - free (dummy_filename); + char *tree_as_string = ts_node_string (ts_tree_root_node (tree)); + fprintf (stderr, "Syntax tree: %s\n", tree_as_string); + free (tree_as_string); } + #endif - cleanup_temp_dir (tmpdir); + contents = contents_data; - if (xgettext_omit_header) - { - /* Remove the header entry. */ - if (mdlp->nitems > 0) - message_list_remove_if_not (mdlp->item[0]->messages, is_not_header); - } + extract_from_node (ts_tree_root_node (tree), + false, + null_context_region (), + mlp); + + ts_tree_delete (tree); + ts_parser_delete (parser); + free (contents_data); + + logical_file_name = NULL; } diff --git a/gettext-tools/src/x-ocaml.h b/gettext-tools/src/x-ocaml.h index 2efa4574f..0504daef0 100644 --- a/gettext-tools/src/x-ocaml.h +++ b/gettext-tools/src/x-ocaml.h @@ -31,10 +31,11 @@ extern "C" { { "ml", "OCaml" }, \ #define SCANNERS_OCAML \ - { "OCaml", NULL, extract_ocaml, \ - NULL, NULL, NULL }, \ + { "OCaml", extract_ocaml, NULL, \ + &flag_table_ocaml, &formatstring_ocaml, NULL }, \ -extern void extract_ocaml (const char *found_in_dir, const char *real_filename, +/* Scan a OCaml file and add its translatable strings to mdlp. */ +extern void extract_ocaml (FILE *fp, const char *real_filename, const char *logical_filename, flag_context_list_table_ty *flag_table, msgdomain_list_ty *mdlp); diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c index 92b77add0..5f7fa51df 100644 --- a/gettext-tools/src/xgettext.c +++ b/gettext-tools/src/xgettext.c @@ -225,6 +225,7 @@ static flag_context_list_table_ty flag_table_awk; static flag_context_list_table_ty flag_table_lua; static flag_context_list_table_ty flag_table_modula2; static flag_context_list_table_ty flag_table_d; +static flag_context_list_table_ty flag_table_ocaml; static flag_context_list_table_ty flag_table_vala; static flag_context_list_table_ty flag_table_tcl; static flag_context_list_table_ty flag_table_perl; @@ -1234,8 +1235,8 @@ Language specific options:\n")); (only languages C, C++, ObjectiveC, Python,\n\ Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\ Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\ - awk, Lua, Modula-2, D, Vala, Tcl, Perl, PHP,\n\ - GCC-source, Glade, GSettings)\n")); + awk, Lua, Modula-2, D, OCaml, Vala, Tcl, Perl,\n\ + PHP, GCC-source, Glade, GSettings)\n")); printf (_("\ -kWORD, --keyword=WORD look for WORD as an additional keyword\n\ -k, --keyword do not to use default keywords\n")); @@ -1243,8 +1244,8 @@ Language specific options:\n")); (only languages C, C++, ObjectiveC, Python,\n\ Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\ Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\ - awk, Lua, Modula-2, D, Vala, Tcl, Perl, PHP,\n\ - GCC-source, Glade, GSettings, Desktop)\n")); + awk, Lua, Modula-2, D, OCaml, Vala, Tcl, Perl,\n\ + PHP, GCC-source, Glade, GSettings, Desktop)\n")); printf (_("\ --flag=WORD:ARG:FLAG additional flag for strings inside the argument\n\ number ARG of keyword WORD\n")); @@ -1252,8 +1253,8 @@ Language specific options:\n")); (only languages C, C++, ObjectiveC, Python,\n\ Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\ Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\ - awk, Lua, Modula-2, D, Vala, Tcl, Perl, PHP,\n\ - GCC-source, YCP)\n")); + awk, Lua, Modula-2, D, OCaml, Vala, Tcl, Perl,\n\ + PHP, GCC-source, YCP)\n")); printf (_("\ --tag=WORD:FORMAT defines the behaviour of tagged template literals\n\ with tag WORD\n")); @@ -1786,6 +1787,11 @@ xgettext_record_flag (const char *optionstring) name_start, name_end, argnum, value, pass); break; + case format_ocaml: + flag_context_list_table_insert (&flag_table_ocaml, XFORMAT_PRIMARY, + name_start, name_end, + argnum, value, pass); + break; case format_smalltalk: break; case format_qt: diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am index f31668999..3ff4ef642 100644 --- a/gettext-tools/tests/Makefile.am +++ b/gettext-tools/tests/Makefile.am @@ -145,6 +145,7 @@ TESTS = gettext-1 gettext-2 \ xgettext-lua-stackovfl-1 xgettext-lua-stackovfl-2 \ xgettext-lua-stackovfl-3 xgettext-lua-stackovfl-4 \ xgettext-modula2-1 xgettext-modula2-2 \ + xgettext-ocaml-1 xgettext-ocaml-2 \ xgettext-objc-1 xgettext-objc-2 \ xgettext-perl-1 xgettext-perl-2 xgettext-perl-3 xgettext-perl-4 \ xgettext-perl-5 xgettext-perl-6 xgettext-perl-7 xgettext-perl-8 \ @@ -214,6 +215,7 @@ TESTS = gettext-1 gettext-2 \ format-lisp-1 format-lisp-2 \ format-lua-1 format-lua-2 \ format-modula2-1 format-modula2-2 \ + format-ocaml-1 format-ocaml-2 \ format-php-1 format-php-2 format-php-3 \ format-python-1 format-python-2 format-python-3 \ format-python-brace-1 format-python-brace-2 \ diff --git a/gettext-tools/tests/format-ocaml-1 b/gettext-tools/tests/format-ocaml-1 new file mode 100755 index 000000000..0a9b4dc45 --- /dev/null +++ b/gettext-tools/tests/format-ocaml-1 @@ -0,0 +1,231 @@ +#! /bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test recognition of OCaml format strings. + +cat <<\EOF > f-oc-1.data +# Valid: no argument +"abc%!" +# Valid: no argument +"abc%%" +# Valid: no argument +"abc%@" +# Valid: no argument +"abc%," +# Valid: one integer argument +"abc%d" +# Valid: one integer argument +"abc%i" +# Valid: one integer argument +"abc%u" +# Valid: one integer argument +"abc%x" +# Valid: one integer argument +"abc%X" +# Valid: one integer argument +"abc%o" +# Valid: one int32 argument +"abc%ld" +# Valid: one int32 argument +"abc%li" +# Valid: one int32 argument +"abc%lu" +# Valid: one int32 argument +"abc%lx" +# Valid: one int32 argument +"abc%lX" +# Valid: one int32 argument +"abc%lo" +# Valid: one nativeint argument +"abc%nd" +# Valid: one nativeint argument +"abc%ni" +# Valid: one nativeint argument +"abc%nu" +# Valid: one nativeint argument +"abc%nx" +# Valid: one nativeint argument +"abc%nX" +# Valid: one nativeint argument +"abc%no" +# Valid: one int64 argument +"abc%Ld" +# Valid: one int64 argument +"abc%Li" +# Valid: one int64 argument +"abc%Lu" +# Valid: one int64 argument +"abc%Lx" +# Valid: one int64 argument +"abc%LX" +# Valid: one int64 argument +"abc%Lo" +# Valid: one string argument +"abc%s" +# Valid: one string argument +"abc%S" +# Valid: one character argument +"abc%c" +# Valid: one character argument +"abc%C" +# Valid: one floating-point argument +"abc%f" +# Valid: one floating-point argument +"abc%e" +# Valid: one floating-point argument +"abc%E" +# Valid: one floating-point argument +"abc%g" +# Valid: one floating-point argument +"abc%G" +# Valid: one floating-point argument +"abc%h" +# Valid: one floating-point argument +"abc%H" +# Valid: one floating-point argument +"abc%F" +# Valid: one boolean argument +"abc%B" +# Valid: one function argument +"abc%a" +# Valid: one function-with-arg argument +"abc%t" +# Valid: one format-string argument +"abc%{%s%}" +# Valid: one format-string argument +"abc%{%S%}" +# Valid: one format-string argument and one string argument +"abc%(%s%)" +# Valid: one format-string argument and one string argument +"abc%(%S%)" +# Valid: one argument with flags +"abc%0#g" +# Valid: one argument with width +"abc%2g" +# Valid: one argument with width +"abc%*g" +# Valid: one argument with precision +"abc%.4g" +# Valid: one argument with precision +"abc%.*g" +# Valid: one argument with width and precision +"abc%14.4g" +# Valid: one argument with width and precision +"abc%14.*g" +# Valid: one argument with width and precision +"abc%*.4g" +# Valid: one argument with width and precision +"abc%*.*g" +# Invalid: unterminated +"abc%" +# Invalid: unterminated +"abc%l" +# Invalid: unterminated +"abc%n" +# Invalid: unterminated +"abc%L" +# Invalid: unknown format specifier +"abc%y" +# Invalid: unknown format specifier +"abc%A" +# Invalid: unknown format specifier +"abc%T" +# Invalid: %{ and %} don't match +"abc%{%s" +# Invalid: %{ and %} don't match +"abc%s%}" +# Invalid: %{ and %} don't match +"abc%s%}%{" +# Invalid: %( and %) don't match +"abc%(%s" +# Invalid: %( and %) don't match +"abc%s%)" +# Invalid: %( and %) don't match +"abc%s%)%(" +# Invalid: %{%} and %(%) nesting +"abc%{%(%}%)" +# Invalid: %{%} and %(%) nesting +"abc%(%{%)%}" +# Invalid: flags after width +"abc%*0g" +# Valid: null precision +"abc%.f" +# Invalid: twice precision +"abc%.4.2g" +# Valid: three arguments +"abc%d%u%u" +# Valid only in msgstr: a numbered argument +"abc%1$d" +# Invalid: zero +"abc%0$d" +# Valid only in msgstr: two-digit numbered arguments +"abc%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz" +# Invalid: unterminated number +"abc%1" +# Invalid: flags before number +"abc%+1$d" +# Valid only in msgstr: three arguments, two with same number +"abc%1$4x,%2$c,%1$u" +# Invalid: argument with conflicting types +"abc%1$4x,%2$c,%1$s" +# Valid only in msgstr: no conflicting types +"abc%2$t%1$d" +# Invalid: argument with conflicting types +"abc%1$t%2$d" +# Valid only in msgstr: no conflict +"abc%1$4x,%2$c,%1$u" +# Invalid: mixing of numbered and unnumbered arguments +"abc%d%2$x" +# Valid only in msgstr: numbered argument with constant precision +"abc%1$.9x" +# Invalid: mixing of numbered and unnumbered arguments +"abc%1$.*x" +# Valid only in msgstr: missing non-final argument +"abc%2$x%3$s" +# Valid only in msgstr: permutation +"abc%2$ddef%1$d" +# Valid only in msgstr: multiple uses of same argument +"abc%2$xdef%1$sghi%2$x" +# Valid only in msgstr: one argument with width +"abc%2$#*1$g" +# Valid only in msgstr: one argument with width and precision +"abc%3$*2$.*1$g" +# Invalid: zero +"abc%2$*0$.*1$g" +EOF + +: ${XGETTEXT=xgettext} +n=0 +while read comment; do + read string + n=`expr $n + 1` + cat < f-oc-1-$n.in +let a = f_ ${string} +EOF + ${XGETTEXT} -L OCaml -o f-oc-1-$n.po f-oc-1-$n.in || Exit 1 + test -f f-oc-1-$n.po || Exit 1 + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if grep ocaml-format f-oc-1-$n.po > /dev/null; then + : + else + fail=yes + fi + else + if grep ocaml-format f-oc-1-$n.po > /dev/null; then + fail=yes + else + : + fi + fi + if test -n "$fail"; then + echo "Format string recognition error:" 1>&2 + cat f-oc-1-$n.in 1>&2 + echo "Got:" 1>&2 + cat f-oc-1-$n.po 1>&2 + Exit 1 + fi + rm -f f-oc-1-$n.in f-oc-1-$n.po +done < f-oc-1.data + +Exit 0 diff --git a/gettext-tools/tests/format-ocaml-2 b/gettext-tools/tests/format-ocaml-2 new file mode 100755 index 000000000..b6d220489 --- /dev/null +++ b/gettext-tools/tests/format-ocaml-2 @@ -0,0 +1,718 @@ +#! /bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test checking of OCaml format strings. + +cat <<\EOF > f-oc-2.data +# Valid: %% doesn't count +msgid "abc%%def" +msgstr "xyz" +# Invalid: invalid msgstr +msgid "abc%%def" +msgstr "xyz%" +# Valid: same arguments +msgid "abc%s%gdef" +msgstr "xyz%s%g" +# Valid: same arguments, with different widths +msgid "abc%2sdef" +msgstr "xyz%3s" +# Valid: same arguments but in numbered syntax +msgid "abc%d" +msgstr "xyz%1$d" +# Valid: same arguments but in numbered syntax +msgid "abc%s%gdef" +msgstr "xyz%1$s%2$g" +# Valid: permutation +msgid "abc%s%g%cdef" +msgstr "xyz%3$c%2$g%1$s" +# Invalid: too few arguments +msgid "abc%sdef%u" +msgstr "xyz%1$s" +# Invalid: too few arguments +msgid "abc%sdef%u" +msgstr "xyz%s" +# Invalid: too many arguments +msgid "abc%udef" +msgstr "xyz%uvw%c" +# Valid: same numbered arguments, with different widths +msgid "abc%4s%5s" +msgstr "xyz%2$4s%1$5s" +# Invalid: missing final argument +msgid "abc%udef%s" +msgstr "xyz%1$u" +# Invalid: missing non-final argument +msgid "abc%sdef%u" +msgstr "xyz%2$u" +# Invalid: added argument +msgid "abc%udef" +msgstr "xyz%1$uvw%2$c" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%i" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%u" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%x" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%X" +# Valid: type compatibility +msgid "abc%d" +msgstr "xyz%o" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%u" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%x" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%X" +# Valid: type compatibility +msgid "abc%i" +msgstr "xyz%o" +# Valid: type compatibility +msgid "abc%u" +msgstr "xyz%x" +# Valid: type compatibility +msgid "abc%u" +msgstr "xyz%X" +# Valid: type compatibility +msgid "abc%u" +msgstr "xyz%o" +# Valid: type compatibility +msgid "abc%x" +msgstr "xyz%X" +# Valid: type compatibility +msgid "abc%x" +msgstr "xyz%o" +# Valid: type compatibility +msgid "abc%X" +msgstr "xyz%o" +# Valid: type compatibility +msgid "abc%ld" +msgstr "xyz%li" +# Valid: type compatibility +msgid "abc%ld" +msgstr "xyz%lu" +# Valid: type compatibility +msgid "abc%ld" +msgstr "xyz%lx" +# Valid: type compatibility +msgid "abc%ld" +msgstr "xyz%lX" +# Valid: type compatibility +msgid "abc%ld" +msgstr "xyz%lo" +# Valid: type compatibility +msgid "abc%li" +msgstr "xyz%lu" +# Valid: type compatibility +msgid "abc%li" +msgstr "xyz%lx" +# Valid: type compatibility +msgid "abc%li" +msgstr "xyz%lX" +# Valid: type compatibility +msgid "abc%li" +msgstr "xyz%lo" +# Valid: type compatibility +msgid "abc%lu" +msgstr "xyz%lx" +# Valid: type compatibility +msgid "abc%lu" +msgstr "xyz%lX" +# Valid: type compatibility +msgid "abc%lu" +msgstr "xyz%lo" +# Valid: type compatibility +msgid "abc%lx" +msgstr "xyz%lX" +# Valid: type compatibility +msgid "abc%lx" +msgstr "xyz%lo" +# Valid: type compatibility +msgid "abc%lX" +msgstr "xyz%lo" +# Valid: type compatibility +msgid "abc%nd" +msgstr "xyz%ni" +# Valid: type compatibility +msgid "abc%nd" +msgstr "xyz%nu" +# Valid: type compatibility +msgid "abc%nd" +msgstr "xyz%nx" +# Valid: type compatibility +msgid "abc%nd" +msgstr "xyz%nX" +# Valid: type compatibility +msgid "abc%nd" +msgstr "xyz%no" +# Valid: type compatibility +msgid "abc%ni" +msgstr "xyz%nu" +# Valid: type compatibility +msgid "abc%ni" +msgstr "xyz%nx" +# Valid: type compatibility +msgid "abc%ni" +msgstr "xyz%nX" +# Valid: type compatibility +msgid "abc%ni" +msgstr "xyz%no" +# Valid: type compatibility +msgid "abc%nu" +msgstr "xyz%nx" +# Valid: type compatibility +msgid "abc%nu" +msgstr "xyz%nX" +# Valid: type compatibility +msgid "abc%nu" +msgstr "xyz%no" +# Valid: type compatibility +msgid "abc%nx" +msgstr "xyz%nX" +# Valid: type compatibility +msgid "abc%nx" +msgstr "xyz%no" +# Valid: type compatibility +msgid "abc%nX" +msgstr "xyz%no" +# Valid: type compatibility +msgid "abc%Ld" +msgstr "xyz%Li" +# Valid: type compatibility +msgid "abc%Ld" +msgstr "xyz%Lu" +# Valid: type compatibility +msgid "abc%Ld" +msgstr "xyz%Lx" +# Valid: type compatibility +msgid "abc%Ld" +msgstr "xyz%LX" +# Valid: type compatibility +msgid "abc%Ld" +msgstr "xyz%Lo" +# Valid: type compatibility +msgid "abc%Li" +msgstr "xyz%Lu" +# Valid: type compatibility +msgid "abc%Li" +msgstr "xyz%Lx" +# Valid: type compatibility +msgid "abc%Li" +msgstr "xyz%LX" +# Valid: type compatibility +msgid "abc%Li" +msgstr "xyz%Lo" +# Valid: type compatibility +msgid "abc%Lu" +msgstr "xyz%Lx" +# Valid: type compatibility +msgid "abc%Lu" +msgstr "xyz%LX" +# Valid: type compatibility +msgid "abc%Lu" +msgstr "xyz%Lo" +# Valid: type compatibility +msgid "abc%Lx" +msgstr "xyz%LX" +# Valid: type compatibility +msgid "abc%Lx" +msgstr "xyz%Lo" +# Valid: type compatibility +msgid "abc%LX" +msgstr "xyz%Lo" +# Valid: type compatibility +msgid "abc%f" +msgstr "xyz%e" +# Valid: type compatibility +msgid "abc%f" +msgstr "xyz%E" +# Valid: type compatibility +msgid "abc%f" +msgstr "xyz%g" +# Valid: type compatibility +msgid "abc%f" +msgstr "xyz%G" +# Valid: type compatibility +msgid "abc%f" +msgstr "xyz%h" +# Valid: type compatibility +msgid "abc%f" +msgstr "xyz%H" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%E" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%g" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%G" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%h" +# Valid: type compatibility +msgid "abc%e" +msgstr "xyz%H" +# Valid: type compatibility +msgid "abc%E" +msgstr "xyz%g" +# Valid: type compatibility +msgid "abc%E" +msgstr "xyz%G" +# Valid: type compatibility +msgid "abc%E" +msgstr "xyz%h" +# Valid: type compatibility +msgid "abc%E" +msgstr "xyz%H" +# Valid: type compatibility +msgid "abc%g" +msgstr "xyz%G" +# Valid: type compatibility +msgid "abc%g" +msgstr "xyz%h" +# Valid: type compatibility +msgid "abc%g" +msgstr "xyz%H" +# Valid: type compatibility +msgid "abc%G" +msgstr "xyz%h" +# Valid: type compatibility +msgid "abc%G" +msgstr "xyz%H" +# Valid: type compatibility +msgid "abc%h" +msgstr "xyz%H" +# Valid: type compatibility +msgid "abc%{%s%}" +msgstr "xyz%{%S%}" +# Valid: type compatibility +msgid "abc%{%S%}" +msgstr "xyz%{%s%}" +# Valid: type compatibility +msgid "abc%{%c%}" +msgstr "xyz%{%C%}" +# Valid: type compatibility +msgid "abc%{%C%}" +msgstr "xyz%{%c%}" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%ld" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%nd" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%Ld" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%c" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%C" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%F" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%B" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%d" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%nd" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%Ld" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%c" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%C" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%F" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%B" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%ld" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%Ld" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%c" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%C" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%F" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%B" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%nd" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%s" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%c" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%C" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%F" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%B" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%Ld" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%S" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%c" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%C" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%F" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%B" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%s" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%S" +msgstr "xyz%c" +# Invalid: type incompatibility +msgid "abc%S" +msgstr "xyz%C" +# Invalid: type incompatibility +msgid "abc%S" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%S" +msgstr "xyz%F" +# Invalid: type incompatibility +msgid "abc%S" +msgstr "xyz%B" +# Invalid: type incompatibility +msgid "abc%S" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%S" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%S" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%S" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%C" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%F" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%B" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%c" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%C" +msgstr "xyz%f" +# Invalid: type incompatibility +msgid "abc%C" +msgstr "xyz%F" +# Invalid: type incompatibility +msgid "abc%C" +msgstr "xyz%B" +# Invalid: type incompatibility +msgid "abc%C" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%C" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%C" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%C" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%f" +msgstr "xyz%F" +# Invalid: type incompatibility +msgid "abc%f" +msgstr "xyz%B" +# Invalid: type incompatibility +msgid "abc%f" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%f" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%f" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%f" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%F" +msgstr "xyz%B" +# Invalid: type incompatibility +msgid "abc%F" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%F" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%F" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%F" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%B" +msgstr "xyz%a" +# Invalid: type incompatibility +msgid "abc%B" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%B" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%B" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%a" +msgstr "xyz%t" +# Invalid: type incompatibility +msgid "abc%a" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%a" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility +msgid "abc%t" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility +msgid "abc%t" +msgstr "xyz%(%s%)" +# Invalid: type incompatibility of format strings +msgid "abc%(hhh%s%i%)def" +msgstr "xyz%(hhh%s%)%i" +# Invalid: too many arguments +msgid "abc%{%s%}" +msgstr "xyz%(%s%)" +# Invalid: too few arguments +msgid "abc%(%s%)" +msgstr "xyz%{%s%}" +# Invalid: type incompatibility for width +msgid "abc%g%*g" +msgstr "xyz%*g%g" +# Invalid: zero +msgid "abc" +msgstr "xyz%0$d" +# Valid: two-digit numbered arguments +msgid "abc%d%d%d%d%d%d%d%d%d%d%d" +msgstr "xyz%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz" +# Invalid: unterminated number +msgid "abc" +msgstr "xyz%1" +# Invalid: flags before number +msgid "abc%d" +msgstr "xyz%+1$d" +# Valid: three arguments, two with same number +msgid "abc%d%c" +msgstr "xyz%1$4x,%2$c,%1$u" +# Invalid: argument with conflicting types +msgid "abc%d%c" +msgstr "xyz%1$4x,%2$c,%1$s" +# Valid: no conflicting types +msgid "abc%d%t" +msgstr "xyz%2$t%1$d" +# Invalid: argument with conflicting types +msgid "abc%t%d" +msgstr "xyz%1$t%2$d" +# Valid: no conflict +msgid "abc%u%c%a" +msgstr "xyz%1$4x,%2$c,%1$u,%3$a" +# Invalid: mixing of numbered and unnumbered arguments +msgid "abc%d%x" +msgstr "xyz%d%2$x" +# Valid: numbered argument with constant precision +msgid "abc%.9x" +msgstr "xyz%1$.9x" +# Invalid: mixing of numbered and unnumbered arguments +msgid "abc%d%d" +msgstr "xyz%1$.*x" +# Invalid: missing non-final argument +msgid "abc%d%x%s" +msgstr "xyz%2$x%3$s" +# Valid: permutation +msgid "abc%d%d" +msgstr "xyz%2$ddef%1$d" +# Valid: multiple uses of same argument +msgid "abc%s%x" +msgstr "xyz%2$xdef%1$sghi%2$x" +# Valid: one argument with width +msgid "abc%i%g" +msgstr "xyz%2$#*1$g" +# Valid: one argument with width and precision +msgid "abc%i%i%g" +msgstr "xyz%3$*2$.*1$g" +# Invalid: zero +msgid "abc%i%g" +msgstr "xyz%2$*0$.*1$g" +EOF + +: ${MSGFMT=msgfmt} +n=0 +while read comment; do + read msgid_line + read msgstr_line + n=`expr $n + 1` + cat < f-oc-2-$n.po +#, ocaml-format +${msgid_line} +${msgstr_line} +EOF + fail= + if echo "$comment" | grep 'Valid:' > /dev/null; then + if ${MSGFMT} --check-format -o f-oc-2-$n.mo f-oc-2-$n.po; then + : + else + fail=yes + fi + else + ${MSGFMT} --check-format -o f-oc-2-$n.mo f-oc-2-$n.po 2> /dev/null + if test $? = 1; then + : + else + fail=yes + fi + fi + if test -n "$fail"; then + echo "Format string checking error:" 1>&2 + cat f-oc-2-$n.po 1>&2 + Exit 1 + fi + rm -f f-oc-2-$n.po f-oc-2-$n.mo +done < f-oc-2.data + +Exit 0 diff --git a/gettext-tools/tests/xgettext-ocaml-1 b/gettext-tools/tests/xgettext-ocaml-1 new file mode 100755 index 000000000..a0ecd5d84 --- /dev/null +++ b/gettext-tools/tests/xgettext-ocaml-1 @@ -0,0 +1,102 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test OCaml support: Simple things. + +cat <<\EOF > xg-oc-1.ml + (* A one-line comment. *) + let test1 = s_ "Test String 1" in + (* Two one-line *) + (* comments *) + let test2 = (s_ "Test String 2") in + (* A multi-line + comment. *) + let test3 = s_("Test String 3") in + (* A (* nesting *) (* one-line *) comment. *) + let test4 = s_ "Test String 4" in + (* A (* + nesting + *) + (* + multi-line *) + comment. *) + let test5 = s_ "Test String 5" in + (* + s_("Not extracted"); + *) + let dummy = 0 in + (* OCaml has string literal concatenation. *) + let test6 = s_("Test " ^ + "String " + ^ "6") in + (* Empty string. *) + let test7 = s_ "" in + + (* sprintf expects a format string. *) + sprintf (f_ "weight %u") w + + print_string(s_ "Test string 11 %s") + let n = 2 in + let s = "2" in + print_string(sprintf(fn_ "%0s piece of cake" "%s pieces of cake" n) s) +EOF + +: ${XGETTEXT=xgettext} +${XGETTEXT} --omit-header --no-location -c -d xg-oc-1.tmp xg-oc-1.ml || Exit 1 +LC_ALL=C tr -d '\r' < xg-oc-1.tmp.po > xg-oc-1.po || Exit 1 + +cat <<\EOF > xg-oc-1.ok +#. A one-line comment. +msgid "Test String 1" +msgstr "" + +#. Two one-line +#. comments +msgid "Test String 2" +msgstr "" + +#. A multi-line +#. comment. +msgid "Test String 3" +msgstr "" + +#. A (* nesting *) (* one-line *) comment. +msgid "Test String 4" +msgstr "" + +#. A (* +#. nesting +#. *) +#. (* +#. multi-line *) +#. comment. +msgid "Test String 5" +msgstr "" + +#. OCaml has string literal concatenation. +msgid "Test String 6" +msgstr "" + +#. Empty string. +msgid "" +msgstr "" + +#. sprintf expects a format string. +#, ocaml-format +msgid "weight %u" +msgstr "" + +msgid "Test string 11 %s" +msgstr "" + +#, ocaml-format +msgid "%0s piece of cake" +msgid_plural "%s pieces of cake" +msgstr[0] "" +msgstr[1] "" +EOF + +: ${DIFF=diff} +${DIFF} xg-oc-1.ok xg-oc-1.po || Exit 1 + +exit 0 diff --git a/gettext-tools/tests/xgettext-ocaml-2 b/gettext-tools/tests/xgettext-ocaml-2 new file mode 100755 index 000000000..30f9d9b85 --- /dev/null +++ b/gettext-tools/tests/xgettext-ocaml-2 @@ -0,0 +1,58 @@ +#!/bin/sh +. "${srcdir=.}/init.sh"; path_prepend_ . ../src + +# Test OCaml support: String literals. + +cat <<\EOF > xg-oc-2.ml +let s1 = s_ {abracadabra|Test string 1 abc\ndef\tghi\\jkl\ + mno\u{20AC}pqr|xyz|abracadabra} in +let s2 = s_ "Test string 2 abc\ndef\tghi\\jkl\ + mno\u{20AC}pqr\ stu\'vwx" in +let s3 = s_ ("Test string 3" ^ " abc" ^ "def") in +let s4 = s_ "Test string 4 \0377\064\x404\o1006\u{1F603}" in +EOF + +: ${XGETTEXT=xgettext} +${XGETTEXT} --no-location -d xg-oc-2.tmp xg-oc-2.ml || Exit 1 +func_filter_POT_Creation_Date xg-oc-2.tmp.po xg-oc-2.pot + +cat <<\EOF > xg-oc-2.ok +# SOME DESCRIPTIVE TITLE. +# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the PACKAGE package. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"Language: \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" + +msgid "" +"Test string 1 abc\\ndef\\tghi\\\\jkl\\\n" +" mno\\u{20AC}pqr|xyz" +msgstr "" + +msgid "" +"Test string 2 abc\n" +"def\tghi\\jklmno€pqr stu'vwx" +msgstr "" + +msgid "Test string 3 abcdef" +msgstr "" + +msgid "Test string 4 %7@@4@6😃" +msgstr "" +EOF + +: ${DIFF=diff} +${DIFF} xg-oc-2.ok xg-oc-2.pot || Exit 1 + +exit 0