OCaml support: Add OCaml support in the tools.

author Bruno Haible <bruno@clisp.org>

Sun, 27 Jul 2025 22:07:10 +0000 (00:07 +0200)

committer Bruno Haible <bruno@clisp.org>

Sun, 27 Jul 2025 22:09:35 +0000 (00:09 +0200)
author Bruno Haible <bruno@clisp.org>
Sun, 27 Jul 2025 22:07:10 +0000 (00:07 +0200)
committer Bruno Haible <bruno@clisp.org>
Sun, 27 Jul 2025 22:09:35 +0000 (00:09 +0200)
diff --git a/.gitignore b/.gitignore

index dc321b97963251ea526b43bca50f88a6a9d68f35..7aac1b569a2d5cf4b32edaacea837e66dabac1b9 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -718,6 +718,7 @@ autom4te.cache/
  /gettext-tools/src/libxgettextts2.a
  /gettext-tools/src/libxgettextts3.a
  /gettext-tools/src/libxgettextts4.a
+/gettext-tools/src/libxgettextts5.a
  /gettext-tools/src/libxgettextx.a
  /gettext-tools/src/msgfmt.net.exe
  /gettext-tools/src/msgfmt.net.exe.mdb
diff --git a/NEWS b/NEWS

index 56645e9ebdbc7c5a989d7d9ae005ee94de52d805..2fa1524e63d950ec8fa6da1f07609a576e73b4c7 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,11 @@
+Version 0.27 - September 2025
+
+# Programming languages support:
+  * OCaml:
+    - xgettext now supports OCaml.
+    - 'msgfmt -c' now verifies the syntax of translations of OCaml format
+      strings.
+
  Version 0.26 - July 2025
  
  # Programming languages support:
diff --git a/autopull.sh b/autopull.sh

index 6cc7e5aaf95061ad1b70b8c620865afc484cff70..31553df7f2500f7c259c153a007bd4c8e6c4e7e5 100755 (executable)
--- a/autopull.sh
+++ b/autopull.sh
@@ -89,6 +89,7 @@ TREE_SITTER_VERSION=0.23.2
  TREE_SITTER_GO_VERSION=0.23.4
  TREE_SITTER_RUST_VERSION=0.23.2
  TREE_SITTER_TYPESCRIPT_VERSION=0.23.2
+TREE_SITTER_OCAML_VERSION=0.23.2
  TREE_SITTER_D_VERSION=0.8.2
  # Cache the relevant source code. Erase the rest of the tree-sitter projects.
  test -d gettext-tools/tree-sitter-$TREE_SITTER_VERSION || {
@@ -135,6 +136,20 @@ test -d gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION ||
    mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/scanner.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/tsx-scanner.c
    rm -rf tree-sitter-typescript
  }
+test -d gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION || {
+  func_git_clone_shallow tree-sitter-ocaml https://github.com/tree-sitter/tree-sitter-ocaml.git v$TREE_SITTER_OCAML_VERSION
+  (cd tree-sitter-ocaml && patch -p1) < gettext-tools/build-aux/tree-sitter-ocaml-portability.diff
+  mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION
+  mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/common
+  mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars
+  mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml
+  mv tree-sitter-ocaml/LICENSE gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/LICENSE
+  mv tree-sitter-ocaml/common/scanner.h gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/common/scanner.h
+  mv tree-sitter-ocaml/grammars/ocaml/src gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src
+  mv gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/parser.c gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/ocaml-parser.c
+  mv gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/scanner.c gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/ocaml-scanner.c
+  rm -rf tree-sitter-ocaml
+}
  test -d gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION || {
    func_git_clone_shallow tree-sitter-d https://github.com/gdamore/tree-sitter-d.git v$TREE_SITTER_D_VERSION
    (cd tree-sitter-d && patch -p1) < gettext-tools/build-aux/tree-sitter-d-portability.diff
@@ -151,6 +166,7 @@ TREE_SITTER_VERSION=$TREE_SITTER_VERSION
  TREE_SITTER_GO_VERSION=$TREE_SITTER_GO_VERSION
  TREE_SITTER_RUST_VERSION=$TREE_SITTER_RUST_VERSION
  TREE_SITTER_TYPESCRIPT_VERSION=$TREE_SITTER_TYPESCRIPT_VERSION
+TREE_SITTER_OCAML_VERSION=$TREE_SITTER_OCAML_VERSION
  TREE_SITTER_D_VERSION=$TREE_SITTER_D_VERSION
  EOF
  
diff --git a/gettext-tools/Makefile.am b/gettext-tools/Makefile.am

index bafc917f48ed29fa94a0089646256fe26e04dd2d..0cec542ce3f06872289980075ac9cb68cb938c62 100644 (file)
--- a/gettext-tools/Makefile.am
+++ b/gettext-tools/Makefile.am
@@ -100,6 +100,14 @@ EXTRA_DIST += \
    tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/alloc.h \
    tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/array.h \
    tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/parser.h \
+  build-aux/tree-sitter-ocaml-portability.diff \
+  tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/LICENSE \
+  tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/common/scanner.h \
+  tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-parser.c \
+  tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-scanner.c \
+  tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/tree_sitter/alloc.h \
+  tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/tree_sitter/array.h \
+  tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/tree_sitter/parser.h \
    build-aux/tree-sitter-d-portability.diff \
    tree-sitter-d-$(TREE_SITTER_D_VERSION)/LICENSE \
    tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/d-parser.c \
diff --git a/gettext-tools/build-aux/tree-sitter-ocaml-portability.diff b/gettext-tools/build-aux/tree-sitter-ocaml-portability.diff

new file mode 100644 (file)

index 0000000..3a03909
--- /dev/null
+++ b/gettext-tools/build-aux/tree-sitter-ocaml-portability.diff
@@ -0,0 +1,32 @@
+diff --git a/grammars/ocaml/src/parser.c b/grammars/ocaml/src/parser.c
+index 653700c..3ddc246 100644
+--- a/grammars/ocaml/src/parser.c
++++ b/grammars/ocaml/src/parser.c
+@@ -1103727,8 +1103727,10 @@ void tree_sitter_ocaml_external_scanner_deserialize(void *, const char *, unsign
+ #define TS_PUBLIC
+ #elif defined(_WIN32)
+ #define TS_PUBLIC __declspec(dllexport)
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define TS_PUBLIC __attribute__((visibility("default")))
++#else
++#define TS_PUBLIC
+ #endif
+ 
+ TS_PUBLIC const TSLanguage *tree_sitter_ocaml(void) {
+diff --git a/grammars/ocaml/src/tree_sitter/parser.h b/grammars/ocaml/src/tree_sitter/parser.h
+index 799f599..130b4d0 100644
+--- a/grammars/ocaml/src/tree_sitter/parser.h
++++ b/grammars/ocaml/src/tree_sitter/parser.h
+@@ -155,8 +155,10 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
+ 
+ #ifdef _MSC_VER
+ #define UNUSED __pragma(warning(suppress : 4101))
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define UNUSED __attribute__((unused))
++#else
++#define UNUSED
+ #endif
+ 
+ #define START_LEXER()           \
diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac

index b4ca6275fb3fcb06cfc02c2d74ffe257757c4b08..de1380a6e401e6bdd1969cbe3cb8114c690acb6c 100644 (file)
--- a/gettext-tools/configure.ac
+++ b/gettext-tools/configure.ac
@@ -637,6 +637,7 @@ AC_SUBST([TREE_SITTER_VERSION])
  AC_SUBST([TREE_SITTER_GO_VERSION])
  AC_SUBST([TREE_SITTER_RUST_VERSION])
  AC_SUBST([TREE_SITTER_TYPESCRIPT_VERSION])
+AC_SUBST([TREE_SITTER_OCAML_VERSION])
  AC_SUBST([TREE_SITTER_D_VERSION])
  
  dnl Determine the directory name to use for version-specific data files.
diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi

index 9b5018f35bf092715c8a9a3047e0a1848608c4d4..5004314a67a275e28f8dbc1236e542f18a405ea3 100644 (file)
--- a/gettext-tools/doc/gettext.texi
+++ b/gettext-tools/doc/gettext.texi
@@ -426,6 +426,7 @@ The Translator's View
  * object-pascal-format::        Object Pascal Format Strings
  * modula2-format::              Modula-2 Format Strings
  * d-format::                    D Format Strings
+* ocaml-format::                OCaml Format Strings
  * smalltalk-format::            Smalltalk Format Strings
  * qt-format::                   Qt Format Strings
  * qt-plural-format::            Qt Plural Format Strings
@@ -1846,6 +1847,12 @@ Likewise for Modula-2, see @ref{modula2-format}.
  @kwindex no-d-format@r{ flag}
  Likewise for D, see @ref{d-format}.
  
+@item ocaml-format
+@kwindex ocaml-format@r{ flag}
+@itemx no-ocaml-format
+@kwindex no-ocaml-format@r{ flag}
+Likewise for OCaml, see @ref{ocaml-format}.
+
  @item smalltalk-format
  @kwindex smalltalk-format@r{ flag}
  @itemx no-smalltalk-format
@@ -10149,6 +10156,7 @@ strings.
  * object-pascal-format::        Object Pascal Format Strings
  * modula2-format::              Modula-2 Format Strings
  * d-format::                    D Format Strings
+* ocaml-format::                OCaml Format Strings
  * smalltalk-format::            Smalltalk Format Strings
  * qt-format::                   Qt Format Strings
  * qt-plural-format::            Qt Plural Format Strings
@@ -10514,6 +10522,18 @@ D format strings are described
  in the documentation of the D module @code{std.format},
  at @uref{https://dlang.org/library/std/format.html}.
  
+@node ocaml-format
+@subsection OCaml Format Strings
+
+OCaml format strings are described
+in the documentation of the @code{Printf} module,
+at @uref{https://ocaml.org/manual/5.3/api/Printf.html#VALfprintf}.
+In translated strings (@code{msgstr} values),
+arguments can be specified through a number,
+using the syntax @samp{%@var{m}$} instead of @samp{%}
+and @samp{*@var{m}$} instead of @samp{*}
+to designate the argument number @var{m}.
+
  @node smalltalk-format
  @subsection Smalltalk Format Strings
  
diff --git a/gettext-tools/doc/lang-ocaml.texi b/gettext-tools/doc/lang-ocaml.texi

index 24e76a90fe40ee32962604293475a1803cb12996..9e900bdfde3113d5f6a3132ed360ff02bb398435 100644 (file)
--- a/gettext-tools/doc/lang-ocaml.texi
+++ b/gettext-tools/doc/lang-ocaml.texi
@@ -24,7 +24,13 @@ ocaml, opam
  @code{(s_ "abc")} or, for format strings, @code{(f_ "abc")}
  
  @item gettext/ngettext functions
-@code{s_}, @code{f_}, @code{sn_}, @code{fn_}
+@code{s_}, @code{f_}, @code{sn_}, @code{fn_}, and also
+@code{gettext}, @code{fgettext},
+@code{dgettext}, @code{fdgettext},
+@code{dcgettext}, @code{fdcgettext},
+@code{ngettext}, @code{fngettext},
+@code{dngettext}, @code{fdngettext},
+@code{dcngettext}, @code{fdcngettext}.
  
  @item textdomain
  @code{textdomain} field in first parameter of @code{Gettext.Program}
@@ -47,7 +53,7 @@ as second parameter of @code{Gettext.Program})
  @code{xgettext}
  
  @item Formatting with positions
----
+@code{Printf.fprintf "%2$d %1$d"}
  
  @item Portability
  fully portable
diff --git a/gettext-tools/doc/xgettext.texi b/gettext-tools/doc/xgettext.texi

index 423051bde148e52ae281db53469d6cdabf018d9c..b089aa629ebd8402dd50457a14ab12b4a7efe6ea 100644 (file)
--- a/gettext-tools/doc/xgettext.texi
+++ b/gettext-tools/doc/xgettext.texi
@@ -270,6 +270,7 @@ awk,
  Lua,
  Modula-2,
  D,
+OCaml,
  Vala,
  Tcl,
  Perl,
@@ -335,6 +336,7 @@ awk,
  Lua,
  Modula-2,
  D,
+OCaml,
  Vala,
  Tcl,
  Perl,
@@ -502,6 +504,7 @@ awk,
  Lua,
  Modula-2,
  D,
+OCaml,
  Vala,
  Tcl,
  Perl,
diff --git a/gettext-tools/libgettextpo/Makefile.am b/gettext-tools/libgettextpo/Makefile.am

index 971d1364f5e4e2281a700c2387cb846c2c37f8e1..8a23571e8b3862e05285bb7546364ea18767a96e 100644 (file)
--- a/gettext-tools/libgettextpo/Makefile.am
+++ b/gettext-tools/libgettextpo/Makefile.am
@@ -89,6 +89,7 @@ libgettextpo_la_AUXSOURCES = \
    ../src/format-pascal.c \
    ../src/format-modula2.c \
    ../src/format-d.c \
+  ../src/format-ocaml.c \
    ../src/format-smalltalk.c \
    ../src/format-qt.c \
    ../src/format-qt-plural.c \
diff --git a/gettext-tools/po/POTFILES.in b/gettext-tools/po/POTFILES.in

index 0a2f0e1a97e2a2dddc5973568b58e38e664adda6..2de342158d0ac269999baa664e49d90b9eadab90 100644 (file)
--- a/gettext-tools/po/POTFILES.in
+++ b/gettext-tools/po/POTFILES.in
@@ -30,6 +30,7 @@ src/format-librep.c
  src/format-lisp.c
  src/format-lua.c
  src/format-modula2.c
+src/format-ocaml.c
  src/format-pascal.c
  src/format-perl-brace.c
  src/format-perl.c
diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES

index b34f0cee30bb095aa9c58940841872721f4290e2..997146bf88fdd1e98b697d892eff2e69604323e6 100644 (file)
--- a/gettext-tools/src/FILES
+++ b/gettext-tools/src/FILES
@@ -246,6 +246,7 @@ format-lua.c           Format string handling for Lua.
  format-pascal.c        Format string handling for Object Pascal.
  format-modula2.c       Format string handling for Modula-2.
  format-d.c             Format string handling for D.
+format-ocaml.c         Format string handling for OCaml.
  format-smalltalk.c     Format string handling for Smalltalk and YCP.
  format-qt.c            Format string handling for Qt.
  format-qt-plural.c     Format string handling for Qt plural forms.
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am

index 0e66d1571b0f916a16b2d7f835b1da7e47ef0136..93469a11c412b5d37413b41ad82f7639b0c84a34 100644 (file)
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -210,6 +210,7 @@ FORMAT_SOURCE += \
    format-pascal.c \
    format-modula2.c \
    format-d.c \
+  format-ocaml.c \
    format-smalltalk.c \
    format-qt.c \
    format-qt-plural.c \
@@ -249,7 +250,7 @@ libgettextsrc_la_SOURCES = \
    search-path.c
  
  # xgettext has some tree-sitter based backends.
-LIBXGETTEXTTS = libxgettextts2.a libxgettextts3.a libxgettextts4.a libxgettextts1.a
+LIBXGETTEXTTS = libxgettextts2.a libxgettextts3.a libxgettextts4.a libxgettextts5.a libxgettextts1.a
  libxgettextts1_a_SOURCES = \
    ../tree-sitter-$(TREE_SITTER_VERSION)/lib/src/lib.c
  libxgettextts1_a_CPPFLAGS = \
@@ -279,6 +280,13 @@ libxgettextts4_a_CPPFLAGS = \
    -I$(top_srcdir)/tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src \
    -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include
  libxgettextts4_a_CFLAGS = $(AM_CFLAGS) @FOREIGN_WARN_CFLAGS@ @NO_EXPENSIVE_WARN_CFLAGS@
+libxgettextts5_a_SOURCES = \
+  ../tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-parser.c \
+  ../tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-scanner.c
+libxgettextts5_a_CPPFLAGS = \
+  -I$(top_srcdir)/tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src \
+  -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include
+libxgettextts5_a_CFLAGS = $(AM_CFLAGS) @FOREIGN_WARN_CFLAGS@ @NO_EXPENSIVE_WARN_CFLAGS@
  
  # msggrep needs pattern matching.
  LIBGREP = ../libgrep/libgrep.a ../libgrep/gnulib-lib/libgnu.a
diff --git a/gettext-tools/src/format-ocaml.c b/gettext-tools/src/format-ocaml.c

new file mode 100644 (file)

index 0000000..acd0b6d
--- /dev/null
+++ b/gettext-tools/src/format-ocaml.c
@@ -0,0 +1,1100 @@
+/* OCaml format strings.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2025.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "format.h"
+#include "gettext.h"
+#include "xalloc.h"
+#include "format-invalid.h"
+#include "c-ctype.h"
+#include "xvasprintf.h"
+
+#define _(str) gettext (str)
+
+/* The OCaml format strings are described in the OCaml reference manual,
+   at https://ocaml.org/manual/5.3/api/Printf.html#VALfprintf .
+   They are implemented in ocaml-5.3.0/stdlib/scanf.ml.
+
+   A directive
+   - starts with '%',
+   - [in msgstr only] is optionally followed by
+       a positive integer m, then '$'
+   - is optionally followed by a sequence of flags, each being one of
+       '+', '-', ' ', '0', '#',
+   - is optionally followed by a width specification:
+       a positive integer, or
+       '*', or
+       [in msgstr only] '*', then a positive integer, then '$',
+   - is optionally followed by a precision specification:
+       '.' then optionally:
+         a positive integer, or
+         '*', or
+         [in msgstr only] '*', then a positive integer, then '$',
+   - is finished by a specifier
+       - 'd', 'i', 'u', 'x', 'X', 'o', that need an integer argument,
+       - 'l' then 'd', 'i', 'u', 'x', 'X', 'o', that need an int32 argument,
+       - 'n' then 'd', 'i', 'u', 'x', 'X', 'o', that need an nativeint argument,
+       - 'L' then 'd', 'i', 'u', 'x', 'X', 'o', that need an int64 argument,
+       - 's', that needs a string argument,
+       - 'S', that needs a string argument and outputs it in OCaml syntax,
+       - 'c', that needs a character argument,
+       - 'C', that needs a character argument and outputs it in OCaml syntax,
+       - 'f', 'e', 'E', 'g', 'G', 'h', 'H', that need a floating-point argument,
+       - 'F', that needs a floating-point argument and outputs it in OCaml syntax,
+       - 'B', that needs a boolean argument,
+       - 'a', that takes a function (of type : out_channel -> unit) argument,
+       - 't', that takes two arguments: a function (of type : out_channel -> <T> -> unit)
+              and a <T>,
+       - '{' FMT '%}', that takes a format string argument without msgstr
+         extensions, expected to have the same signature as FMT, effectively
+         ignores it, and instead outputs the minimal format string with the
+         same signature as FMT: a concatenation of
+           - "%i" for an integer argument,
+           - "%li" for an int32 argument,
+           - "%ni" for a nativeint argument,
+           - "%Li" for an int64 argument,
+           - "%s" for a string argument,
+           - "%c" for a character argument,
+           - "%f" for a floating-point argument,
+           - "%B" for a boolean argument,
+           - "%a" for a function argument,
+           - "%t" for two arguments, as described above,
+       - '(' FMT '%)', that takes a format string argument without msgstr
+         extensions, expected to have the same signature as FMT, and a set
+         of arguments suitable for FMT,
+       - '!', '%', '@', ',', that take no argument.
+   Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
+   be used in the same string.
+ */
+
+enum format_arg_type
+{
+  FAT_NONE              = 0,
+  /* Basic types */
+  FAT_INTEGER           = 1,
+  FAT_INT32             = 2,
+  FAT_NATIVEINT         = 3,
+  FAT_INT64             = 4,
+  FAT_STRING            = 5,
+  FAT_CHARACTER         = 6,
+  FAT_FLOATINGPOINT     = 7,
+  FAT_BOOLEAN           = 8,
+  FAT_FUNCTION_A        = 9,
+  FAT_FUNCTION_T        = 10, /* first argument for %t */
+  FAT_FUNCTION_T2       = 11, /* second argument for %t */
+  FAT_FORMAT_STRING     = 12,
+  /* Flags */
+  FAT_OCAML_SYNTAX          = 1 << 4,
+  FAT_OPTIONAL_OCAML_SYNTAX = 1 << 5,
+  /* Bitmasks */
+  FAT_BASIC_MASK        = (FAT_INTEGER | FAT_INT32 | FAT_NATIVEINT | FAT_INT64
+                           | FAT_STRING | FAT_CHARACTER | FAT_FLOATINGPOINT
+                           | FAT_BOOLEAN | FAT_FUNCTION_A | FAT_FUNCTION_T
+                           | FAT_FUNCTION_T2 | FAT_FORMAT_STRING)
+};
+#ifdef __cplusplus
+typedef int format_arg_type_t;
+#else
+typedef enum format_arg_type format_arg_type_t;
+#endif
+
+struct numbered_arg
+{
+  size_t number;
+  format_arg_type_t type;
+  char *signature;        /* for type == FAT_FORMAT_STRING */
+};
+
+struct spec
+{
+  size_t directives;
+  size_t numbered_arg_count;
+  struct numbered_arg *numbered;
+};
+
+
+static int
+numbered_arg_compare (const void *p1, const void *p2)
+{
+  size_t n1 = ((const struct numbered_arg *) p1)->number;
+  size_t n2 = ((const struct numbered_arg *) p2)->number;
+
+  return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
+}
+
+/* Frees the memory held by *spec.  */
+static void
+destroy_spec (struct spec *spec)
+{
+  if (spec->numbered != NULL)
+    {
+      size_t i;
+      for (i = spec->numbered_arg_count; i > 0; )
+        {
+          --i;
+          if (spec->numbered[i].type == FAT_FORMAT_STRING)
+            free (spec->numbered[i].signature);
+        }
+      free (spec->numbered);
+    }
+}
+
+/* Returns the signature of a format string
+   as a freshly allocated string.  */
+static char *
+format_string_signature (const struct spec *spec)
+{
+  size_t len;
+  {
+    size_t i;
+    const struct numbered_arg *p;
+    len = spec->numbered_arg_count;
+    for (i = 0, p = spec->numbered; i < spec->numbered_arg_count; i++, p++)
+      if ((p->type & FAT_BASIC_MASK) == FAT_FORMAT_STRING)
+        len += strlen (p->signature) + 1;
+  }
+  char *signature = (char *) xmalloc (len + 1);
+  {
+    size_t i;
+    const struct numbered_arg *p;
+    char *s;
+    for (i = 0, p = spec->numbered, s = signature;
+         i < spec->numbered_arg_count;
+         i++, p++)
+      switch (p->type & FAT_BASIC_MASK)
+        {
+        case FAT_INTEGER:
+          *s++ = 'i';
+          break;
+        case FAT_INT32:
+          *s++ = 'l';
+          break;
+        case FAT_NATIVEINT:
+          *s++ = 'n';
+          break;
+        case FAT_INT64:
+          *s++ = 'L';
+          break;
+        case FAT_STRING:
+          *s++ = 's';
+          break;
+        case FAT_CHARACTER:
+          *s++ = 'c';
+          break;
+        case FAT_FLOATINGPOINT:
+          *s++ = 'f';
+          break;
+        case FAT_BOOLEAN:
+          *s++ = 'B';
+          break;
+        case FAT_FUNCTION_A:
+          *s++ = 'a';
+          break;
+        case FAT_FUNCTION_T:
+          *s++ = 't';
+          break;
+        case FAT_FUNCTION_T2:
+          break;
+        case FAT_FORMAT_STRING:
+          *s++ = '(';
+          memcpy (s, p->signature, strlen (p->signature));
+          s += strlen (p->signature);
+          *s++ = ')';
+          break;
+        default:
+          abort ();
+        }
+    *s = '\0';
+  }
+  return signature;
+}
+
+/* When a type is specified via format string substitution, e.g. "%(%s%)", both
+   the variant without OCaml syntax "%s" and the variant with OCaml syntax "%S"
+   are allowed.  */
+static format_arg_type_t
+type_without_translator_constraint (format_arg_type_t type)
+{
+  switch (type & FAT_BASIC_MASK)
+    {
+    case FAT_STRING:
+    case FAT_CHARACTER:
+    case FAT_FLOATINGPOINT:
+      return (type & FAT_BASIC_MASK) | FAT_OPTIONAL_OCAML_SYNTAX;
+    default:
+      return type;
+    }
+}
+
+/* Parse a piece of format string, until the matching terminating format
+   directive is encountered.
+   spec is the global struct spec.
+   format is the remainder of the format string.
+   It is updated upon valid return.
+   terminator is '\0' at the top-level, otherwise '}' or ')'.
+   translated is true when msgstr extensions should be accepted.
+   fdi is an array to be filled with format directive indicators, or NULL.
+   If the format string is invalid, false is returned and *invalid_reason is
+   set to an error message explaining why.  */
+static bool
+parse_upto (struct spec *spec,
+            const char **formatp,
+            char terminator, bool translated,
+            char *fdi, char **invalid_reason)
+{
+  const char *format = *formatp;
+  const char *const format_start = format;
+  size_t numbered_allocated;
+  size_t unnumbered_arg_count;
+
+  spec->directives = 0;
+  spec->numbered_arg_count = 0;
+  spec->numbered = NULL;
+  numbered_allocated = 0;
+  unnumbered_arg_count = 0;
+
+  for (; *format != '\0';)
+    /* Invariant: spec->numbered_arg_count == 0 || unnumbered_arg_count == 0.  */
+    if (*format++ == '%')
+      {
+        /* A directive.  */
+        size_t number = 0;
+        format_arg_type_t type;
+        char *signature = NULL;
+
+        FDI_SET (format - 1, FMTDIR_START);
+        spec->directives++;
+
+        if (translated && c_isdigit (*format))
+          {
+            const char *f = format;
+            size_t m = 0;
+
+            do
+              {
+                m = 10 * m + (*f - '0');
+                f++;
+              }
+            while (c_isdigit (*f));
+
+            if (*f == '$')
+              {
+                if (m == 0)
+                  {
+                    *invalid_reason = INVALID_ARGNO_0 (spec->directives);
+                    FDI_SET (f, FMTDIR_ERROR);
+                    goto bad_format;
+                  }
+                number = m;
+                format = ++f;
+              }
+          }
+
+        /* Parse flags.  */
+        while (*format == ' ' || *format == '+' || *format == '-'
+               || *format == '#' || *format == '0')
+          format++;
+
+        /* Parse width.  */
+        if (*format == '*')
+          {
+            size_t width_number = 0;
+
+            format++;
+
+            if (translated && c_isdigit (*format))
+              {
+                const char *f = format;
+                size_t m = 0;
+
+                do
+                  {
+                    m = 10 * m + (*f - '0');
+                    f++;
+                  }
+                while (c_isdigit (*f));
+
+                if (*f == '$')
+                  {
+                    if (m == 0)
+                      {
+                        *invalid_reason =
+                          INVALID_WIDTH_ARGNO_0 (spec->directives);
+                        FDI_SET (f, FMTDIR_ERROR);
+                        goto bad_format;
+                      }
+                    width_number = m;
+                    format = ++f;
+                  }
+              }
+
+            if (width_number)
+              {
+                /* Numbered argument.  */
+
+                /* Numbered and unnumbered specifications are exclusive.  */
+                if (unnumbered_arg_count > 0)
+                  {
+                    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+                    FDI_SET (format - 1, FMTDIR_ERROR);
+                    goto bad_format;
+                  }
+
+                if (numbered_allocated == spec->numbered_arg_count)
+                  {
+                    numbered_allocated = 2 * numbered_allocated + 1;
+                    spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+                  }
+                spec->numbered[spec->numbered_arg_count].number = width_number;
+                spec->numbered[spec->numbered_arg_count].type = FAT_INTEGER;
+                spec->numbered_arg_count++;
+              }
+            else
+              {
+                /* Unnumbered argument.  */
+
+                /* Numbered and unnumbered specifications are exclusive.  */
+                if (spec->numbered_arg_count > 0)
+                  {
+                    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+                    FDI_SET (format - 1, FMTDIR_ERROR);
+                    goto bad_format;
+                  }
+
+                if (numbered_allocated == unnumbered_arg_count)
+                  {
+                    numbered_allocated = 2 * numbered_allocated + 1;
+                    spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+                  }
+                spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+                spec->numbered[unnumbered_arg_count].type = FAT_INTEGER;
+                unnumbered_arg_count++;
+              }
+          }
+        else if (c_isdigit (*format))
+          {
+            do format++; while (c_isdigit (*format));
+          }
+
+        /* Parse precision.  */
+        if (*format == '.')
+          {
+            format++;
+
+            if (*format == '*')
+              {
+                size_t precision_number = 0;
+
+                format++;
+
+                if (translated && c_isdigit (*format))
+                  {
+                    const char *f = format;
+                    size_t m = 0;
+
+                    do
+                      {
+                        m = 10 * m + (*f - '0');
+                        f++;
+                      }
+                    while (c_isdigit (*f));
+
+                    if (*f == '$')
+                      {
+                        if (m == 0)
+                          {
+                            *invalid_reason =
+                              INVALID_PRECISION_ARGNO_0 (spec->directives);
+                            FDI_SET (f, FMTDIR_ERROR);
+                            goto bad_format;
+                          }
+                        precision_number = m;
+                        format = ++f;
+                      }
+                  }
+
+                if (precision_number)
+                  {
+                    /* Numbered argument.  */
+
+                    /* Numbered and unnumbered specifications are exclusive.  */
+                    if (unnumbered_arg_count > 0)
+                      {
+                        *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+                        FDI_SET (format - 1, FMTDIR_ERROR);
+                        goto bad_format;
+                      }
+
+                    if (numbered_allocated == spec->numbered_arg_count)
+                      {
+                        numbered_allocated = 2 * numbered_allocated + 1;
+                        spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+                      }
+                    spec->numbered[spec->numbered_arg_count].number = precision_number;
+                    spec->numbered[spec->numbered_arg_count].type = FAT_INTEGER;
+                    spec->numbered_arg_count++;
+                  }
+                else
+                  {
+                    /* Unnumbered argument.  */
+
+                    /* Numbered and unnumbered specifications are exclusive.  */
+                    if (spec->numbered_arg_count > 0)
+                      {
+                        *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+                        FDI_SET (format - 1, FMTDIR_ERROR);
+                        goto bad_format;
+                      }
+
+                    if (numbered_allocated == unnumbered_arg_count)
+                      {
+                        numbered_allocated = 2 * numbered_allocated + 1;
+                        spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+                      }
+                    spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+                    spec->numbered[unnumbered_arg_count].type = FAT_INTEGER;
+                    unnumbered_arg_count++;
+                  }
+              }
+            else if (c_isdigit (*format))
+              {
+                do format++; while (c_isdigit (*format));
+              }
+          }
+
+        /* Parse the specifier.  */
+        enum format_arg_type integer_type = FAT_INTEGER;
+        if (*format == 'l')
+          {
+            integer_type = FAT_INT32;
+            format++;
+          }
+        else if (*format == 'n')
+          {
+            integer_type = FAT_NATIVEINT;
+            format++;
+          }
+        else if (*format == 'L')
+          {
+            integer_type = FAT_INT64;
+            format++;
+          }
+
+        switch (*format)
+          {
+          case 'd':
+          case 'i':
+          case 'u':
+          case 'x': case 'X':
+          case 'o':
+            type = integer_type;
+            break;
+          default:
+            if (integer_type != FAT_INTEGER)
+              --format;
+            switch (*format)
+              {
+              case 's':
+                type = FAT_STRING;
+                break;
+              case 'S':
+                type = FAT_STRING | FAT_OCAML_SYNTAX;
+                break;
+              case 'c':
+                type = FAT_CHARACTER;
+                break;
+              case 'C':
+                type = FAT_CHARACTER | FAT_OCAML_SYNTAX;
+                break;
+              case 'f':
+              case 'e': case 'E':
+              case 'g': case 'G':
+              case 'h': case 'H':
+                type = FAT_FLOATINGPOINT;
+                break;
+              case 'F':
+                type = FAT_FLOATINGPOINT | FAT_OCAML_SYNTAX;
+                break;
+              case 'B':
+                type = FAT_BOOLEAN;
+                break;
+              case 'a':
+                type = FAT_FUNCTION_A;
+                break;
+              case 't':
+                type = FAT_FUNCTION_T;
+                break;
+              case '{':
+                {
+                  struct spec sub_spec;
+                  *formatp = format;
+                  if (!parse_upto (&sub_spec, formatp, '}', false,
+                                   fdi, invalid_reason))
+                    {
+                      FDI_SET (**formatp == '\0' ? *formatp - 1 : *formatp,
+                               FMTDIR_ERROR);
+                      goto bad_format;
+                    }
+                  format = *formatp;
+                  type = FAT_FORMAT_STRING;
+                  signature = format_string_signature (&sub_spec);
+                  destroy_spec (&sub_spec);
+                }
+                break;
+              case '}':
+                if (terminator != '}')
+                  {
+                    *invalid_reason =
+                      xasprintf (_("Found '%s' without matching '%s'."), "%}", "%{");
+                    FDI_SET (format - 1, FMTDIR_ERROR);
+                    goto bad_format;
+                  }
+                spec->directives--;
+                goto done;
+              case '(':
+                {
+                  struct spec sub_spec;
+                  *formatp = format;
+                  if (!parse_upto (&sub_spec, formatp, ')', false,
+                                   fdi, invalid_reason))
+                    {
+                      FDI_SET (**formatp == '\0' ? *formatp - 1 : *formatp,
+                               FMTDIR_ERROR);
+                      goto bad_format;
+                    }
+                  format = *formatp;
+                  type = FAT_FORMAT_STRING;
+                  signature = format_string_signature (&sub_spec);
+
+                  if (number)
+                    {
+                      /* Numbered argument.  */
+
+                      /* Numbered and unnumbered specifications are exclusive.  */
+                      if (unnumbered_arg_count > 0)
+                        {
+                          *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+                          FDI_SET (format, FMTDIR_ERROR);
+                          goto bad_format;
+                        }
+
+                      size_t new_numbered_arg_count =
+                        spec->numbered_arg_count + 1 + sub_spec.numbered_arg_count;
+                      if (numbered_allocated < new_numbered_arg_count)
+                        {
+                          numbered_allocated = 2 * numbered_allocated + 1;
+                          if (numbered_allocated < new_numbered_arg_count)
+                            numbered_allocated = new_numbered_arg_count;
+                          spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+                        }
+                      spec->numbered[spec->numbered_arg_count].number = number;
+                      spec->numbered[spec->numbered_arg_count].type = type;
+                      spec->numbered[spec->numbered_arg_count].signature = signature;
+                      spec->numbered_arg_count++;
+                      for (size_t i = 0; i < sub_spec.numbered_arg_count; i++)
+                        {
+                          spec->numbered[spec->numbered_arg_count].number = number + sub_spec.numbered[i].number;
+                          spec->numbered[spec->numbered_arg_count].type =
+                            type_without_translator_constraint (sub_spec.numbered[i].type);
+                          if (sub_spec.numbered[i].type == FAT_FORMAT_STRING)
+                            spec->numbered[spec->numbered_arg_count].signature = sub_spec.numbered[i].signature;
+                          spec->numbered_arg_count++;
+                        }
+                    }
+                  else
+                    {
+                      /* Unnumbered argument.  */
+
+                      /* Numbered and unnumbered specifications are exclusive.  */
+                      if (spec->numbered_arg_count > 0)
+                        {
+                          *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+                          FDI_SET (format, FMTDIR_ERROR);
+                          goto bad_format;
+                        }
+
+                      size_t new_unnumbered_arg_count =
+                        unnumbered_arg_count + 1 + sub_spec.numbered_arg_count;
+                      if (numbered_allocated < new_unnumbered_arg_count)
+                        {
+                          numbered_allocated = 2 * numbered_allocated + 1;
+                          if (numbered_allocated < new_unnumbered_arg_count)
+                            numbered_allocated = new_unnumbered_arg_count;
+                          spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+                        }
+                      spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+                      spec->numbered[unnumbered_arg_count].type = type;
+                      spec->numbered[unnumbered_arg_count].signature = signature;
+                      unnumbered_arg_count++;
+                      for (size_t i = 0; i < sub_spec.numbered_arg_count; i++)
+                        {
+                          spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+                          spec->numbered[unnumbered_arg_count].type =
+                            type_without_translator_constraint (sub_spec.numbered[i].type);
+                          if (sub_spec.numbered[i].type == FAT_FORMAT_STRING)
+                            spec->numbered[unnumbered_arg_count].signature = sub_spec.numbered[i].signature;
+                          unnumbered_arg_count++;
+                        }
+                    }
+
+                  free (sub_spec.numbered);
+                }
+                goto done_specifier;
+              case ')':
+                if (terminator != ')')
+                  {
+                    *invalid_reason =
+                      xasprintf (_("Found '%s' without matching '%s'."), "%)", "%(");
+                    FDI_SET (format - 1, FMTDIR_ERROR);
+                    goto bad_format;
+                  }
+                spec->directives--;
+                goto done;
+              case '!':
+              case '%':
+              case '@':
+              case ',':
+                type = FAT_NONE;
+                break;
+              default:
+                if (*format == '\0')
+                  {
+                    *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
+                    FDI_SET (format - 1, FMTDIR_ERROR);
+                  }
+                else
+                  {
+                    *invalid_reason =
+                      INVALID_CONVERSION_SPECIFIER (spec->directives, *format);
+                    FDI_SET (format, FMTDIR_ERROR);
+                  }
+                goto bad_format;
+              }
+            break;
+          }
+
+        if (type != FAT_NONE)
+          {
+            if (number)
+              {
+                /* Numbered argument.  */
+
+                /* Numbered and unnumbered specifications are exclusive.  */
+                if (unnumbered_arg_count > 0)
+                  {
+                    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+                    FDI_SET (format, FMTDIR_ERROR);
+                    goto bad_format;
+                  }
+
+                size_t new_numbered_arg_count =
+                  spec->numbered_arg_count + 1 + (type == FAT_FUNCTION_T);
+                if (numbered_allocated < new_numbered_arg_count)
+                  {
+                    numbered_allocated = 2 * numbered_allocated + 1;
+                    if (numbered_allocated < new_numbered_arg_count)
+                      numbered_allocated = new_numbered_arg_count;
+                    spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+                  }
+                spec->numbered[spec->numbered_arg_count].number = number;
+                spec->numbered[spec->numbered_arg_count].type = type;
+                if (type == FAT_FORMAT_STRING)
+                  spec->numbered[spec->numbered_arg_count].signature = signature;
+                spec->numbered_arg_count++;
+                if (type == FAT_FUNCTION_T)
+                  {
+                    spec->numbered[spec->numbered_arg_count].number = number + 1;
+                    spec->numbered[spec->numbered_arg_count].type = FAT_FUNCTION_T2;
+                    spec->numbered_arg_count++;
+                  }
+              }
+            else
+              {
+                /* Unnumbered argument.  */
+
+                /* Numbered and unnumbered specifications are exclusive.  */
+                if (spec->numbered_arg_count > 0)
+                  {
+                    *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+                    FDI_SET (format, FMTDIR_ERROR);
+                    goto bad_format;
+                  }
+
+                size_t new_unnumbered_arg_count =
+                  unnumbered_arg_count + 1 + (type == FAT_FUNCTION_T);
+                if (numbered_allocated < new_unnumbered_arg_count)
+                  {
+                    numbered_allocated = 2 * numbered_allocated + 1;
+                    if (numbered_allocated < new_unnumbered_arg_count)
+                      numbered_allocated = new_unnumbered_arg_count;
+                    spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+                  }
+                spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+                spec->numbered[unnumbered_arg_count].type = type;
+                if (type == FAT_FORMAT_STRING)
+                  spec->numbered[unnumbered_arg_count].signature = signature;
+                unnumbered_arg_count++;
+                if (type == FAT_FUNCTION_T)
+                  {
+                    spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+                    spec->numbered[unnumbered_arg_count].type = FAT_FUNCTION_T2;
+                    unnumbered_arg_count++;
+                  }
+              }
+          }
+
+       done_specifier:
+        FDI_SET (format, FMTDIR_END);
+
+        format++;
+      }
+
+  if (terminator != '\0')
+    {
+      *invalid_reason = xasprintf (_("Found '%%%c' without matching '%%%c'."),
+                                   terminator == '}' ? '{' : '(', terminator);
+      goto bad_format;
+    }
+
+ done:
+  /* Convert the unnumbered argument array to numbered arguments.  */
+  if (unnumbered_arg_count > 0)
+    spec->numbered_arg_count = unnumbered_arg_count;
+  /* Sort the numbered argument array, and eliminate duplicates.  */
+  else if (spec->numbered_arg_count > 1)
+    {
+      size_t i, j;
+      bool err;
+
+      qsort (spec->numbered, spec->numbered_arg_count,
+             sizeof (struct numbered_arg), numbered_arg_compare);
+
+      /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i.  */
+      err = false;
+      for (i = j = 0; i < spec->numbered_arg_count; i++)
+        if (j > 0 && spec->numbered[i].number == spec->numbered[j-1].number)
+          {
+            format_arg_type_t type1 = spec->numbered[i].type;
+            format_arg_type_t type2 = spec->numbered[j-1].type;
+            format_arg_type_t type_both;
+
+            if (((type1 == type2)
+                 && (type1 != FAT_FORMAT_STRING
+                     || strcmp (spec->numbered[i].signature,
+                                spec->numbered[j-1].signature) == 0))
+                || (((type1 | type2) & FAT_OPTIONAL_OCAML_SYNTAX) != 0
+                    && (((type1 & ~FAT_OPTIONAL_OCAML_SYNTAX) | FAT_OCAML_SYNTAX)
+                        == ((type2 & ~FAT_OPTIONAL_OCAML_SYNTAX) | FAT_OCAML_SYNTAX))))
+              type_both = (type1 | type2) & ~FAT_OPTIONAL_OCAML_SYNTAX;
+            else
+              {
+                /* Incompatible types.  */
+                type_both = FAT_NONE;
+                if (!err)
+                  *invalid_reason =
+                    INVALID_INCOMPATIBLE_ARG_TYPES (spec->numbered[i].number);
+                err = true;
+              }
+
+            spec->numbered[j-1].type = type_both;
+            if (type_both == FAT_FORMAT_STRING)
+              free (spec->numbered[i].signature);
+          }
+        else
+          {
+            if (j < i)
+              {
+                spec->numbered[j].number = spec->numbered[i].number;
+                spec->numbered[j].type = spec->numbered[i].type;
+                if (spec->numbered[j].type == FAT_FORMAT_STRING)
+                  spec->numbered[j].signature = spec->numbered[i].signature;
+              }
+            j++;
+          }
+      spec->numbered_arg_count = j;
+      if (err)
+        /* *invalid_reason has already been set above.  */
+        goto bad_format;
+    }
+
+  *formatp = format;
+  return true;
+
+ bad_format:
+  destroy_spec (spec);
+  return false;
+}
+
+static void *
+format_parse (const char *format, bool translated, char *fdi,
+              char **invalid_reason)
+{
+  struct spec spec;
+  struct spec *result;
+
+  if (!parse_upto (&spec, &format, '\0', translated, fdi, invalid_reason))
+    return NULL;
+
+  result = XMALLOC (struct spec);
+  *result = spec;
+  return result;
+}
+
+static void
+format_free (void *descr)
+{
+  struct spec *spec = (struct spec *) descr;
+
+  destroy_spec (spec);
+  free (spec);
+}
+
+static int
+format_get_number_of_directives (void *descr)
+{
+  struct spec *spec = (struct spec *) descr;
+
+  return spec->directives;
+}
+
+static bool
+format_check (void *msgid_descr, void *msgstr_descr, bool equality,
+              formatstring_error_logger_t error_logger, void *error_logger_data,
+              const char *pretty_msgid, const char *pretty_msgstr)
+{
+  struct spec *spec1 = (struct spec *) msgid_descr;
+  struct spec *spec2 = (struct spec *) msgstr_descr;
+  bool err = false;
+
+  if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
+    {
+      size_t i, j;
+      size_t n1 = spec1->numbered_arg_count;
+      size_t n2 = spec2->numbered_arg_count;
+
+      /* Check that the argument numbers are the same.
+         Both arrays are sorted.  We search for the first difference.  */
+      for (i = 0, j = 0; i < n1 || j < n2; )
+        {
+          int cmp = (i >= n1 ? 1 :
+                     j >= n2 ? -1 :
+                     spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
+                     spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
+                     0);
+
+          if (cmp > 0)
+            {
+              if (error_logger)
+                error_logger (error_logger_data,
+                              _("a format specification for argument %zu, as in '%s', doesn't exist in '%s'"),
+                              spec2->numbered[j].number, pretty_msgstr,
+                              pretty_msgid);
+              err = true;
+              break;
+            }
+          else if (cmp < 0)
+            {
+              if (equality)
+                {
+                  if (error_logger)
+                    error_logger (error_logger_data,
+                                  _("a format specification for argument %zu doesn't exist in '%s'"),
+                                  spec1->numbered[i].number, pretty_msgstr);
+                  err = true;
+                  break;
+                }
+              else
+                i++;
+            }
+          else
+            j++, i++;
+        }
+      /* Check that the argument types are essentially the same.  */
+      if (!err)
+        for (i = 0, j = 0; j < n2; )
+          {
+            if (spec1->numbered[i].number == spec2->numbered[j].number)
+              {
+                format_arg_type_t type1 = spec1->numbered[i].type;
+                format_arg_type_t type2 = spec2->numbered[j].type;
+
+                if (!(((type1 == type2)
+                       && (type1 != FAT_FORMAT_STRING
+                           || strcmp (spec1->numbered[i].signature,
+                                      spec2->numbered[j].signature) == 0))
+                      || ((type2 & FAT_OPTIONAL_OCAML_SYNTAX) != 0
+                          && (type2 & ~FAT_OPTIONAL_OCAML_SYNTAX)
+                             == (type1 & ~FAT_OCAML_SYNTAX))))
+                  {
+                    if (error_logger)
+                      error_logger (error_logger_data,
+                                    _("format specifications in '%s' and '%s' for argument %zu are not the same"),
+                                    pretty_msgid, pretty_msgstr,
+                                    spec2->numbered[j].number);
+                    err = true;
+                    break;
+                  }
+                j++, i++;
+              }
+            else
+              i++;
+          }
+    }
+
+  return err;
+}
+
+
+struct formatstring_parser formatstring_ocaml =
+{
+  format_parse,
+  format_free,
+  format_get_number_of_directives,
+  NULL,
+  format_check
+};
+
+
+#ifdef TEST
+
+/* Test program: Print the argument list specification returned by
+   format_parse for strings read from standard input.  */
+
+#include <stdio.h>
+
+static void
+format_print (void *descr)
+{
+  struct spec *spec = (struct spec *) descr;
+  size_t last;
+  size_t i;
+
+  if (spec == NULL)
+    {
+      printf ("INVALID");
+      return;
+    }
+
+  printf ("(");
+  last = 1;
+  for (i = 0; i < spec->numbered_arg_count; i++)
+    {
+      size_t number = spec->numbered[i].number;
+
+      if (i > 0)
+        printf (" ");
+      if (number < last)
+        abort ();
+      for (; last < number; last++)
+        printf ("_ ");
+      switch (spec->numbered[i].type & FAT_BASIC_MASK)
+        {
+        case FAT_INTEGER:
+          printf ("i");
+          break;
+        case FAT_INT32:
+          printf ("l");
+          break;
+        case FAT_NATIVEINT:
+          printf ("n");
+          break;
+        case FAT_INT64:
+          printf ("L");
+          break;
+        case FAT_STRING:
+          printf ("s");
+          break;
+        case FAT_CHARACTER:
+          printf ("c");
+          break;
+        case FAT_FLOATINGPOINT:
+          printf ("f");
+          break;
+        case FAT_BOOLEAN:
+          printf ("B");
+          break;
+        case FAT_FUNCTION_A:
+          printf ("a");
+          break;
+        case FAT_FUNCTION_T:
+          printf ("t1");
+          break;
+        case FAT_FUNCTION_T2:
+          printf ("t2");
+          break;
+        case FAT_FORMAT_STRING:
+          printf ("\"%s\"", spec->numbered[i].signature);
+          break;
+        default:
+          abort ();
+        }
+      if (spec->numbered[i].type & FAT_OCAML_SYNTAX)
+        printf ("!");
+      if (spec->numbered[i].type & FAT_OPTIONAL_OCAML_SYNTAX)
+        printf ("?");
+      last = number + 1;
+    }
+  printf (")");
+}
+
+int
+main ()
+{
+  for (;;)
+    {
+      char *line = NULL;
+      size_t line_size = 0;
+      int line_len;
+      char *invalid_reason;
+      void *descr;
+
+      line_len = getline (&line, &line_size, stdin);
+      if (line_len < 0)
+        break;
+      if (line_len > 0 && line[line_len - 1] == '\n')
+        line[--line_len] = '\0';
+
+      invalid_reason = NULL;
+      descr = format_parse (line, true, NULL, &invalid_reason);
+
+      format_print (descr);
+      printf ("\n");
+      if (descr == NULL)
+        printf ("%s\n", invalid_reason);
+
+      free (invalid_reason);
+      free (line);
+    }
+
+  return 0;
+}
+
+/*
+ * For Emacs M-x compile
+ * Local Variables:
+ * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-ocaml.c ../gnulib-lib/libgettextlib.la"
+ * End:
+ */
+
+#endif /* TEST */
diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c

index 8df7aa1efae8445b1c3810f7146c0155dba20055..928938684f4d3987e519d9259ba7a11e37d22e20 100644 (file)
--- a/gettext-tools/src/format.c
+++ b/gettext-tools/src/format.c
@@ -57,6 +57,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] =
    /* format_pascal */           &formatstring_pascal,
    /* format_modula2 */          &formatstring_modula2,
    /* format_d */                &formatstring_d,
+  /* format_ocaml */            &formatstring_ocaml,
    /* format_smalltalk */        &formatstring_smalltalk,
    /* format_qt */               &formatstring_qt,
    /* format_qt_plural */        &formatstring_qt_plural,
diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h

index 02e1531f1d1b3e96c919196498bb1b3ed011573c..c262a292ba249e50d6318883fde514b1aba70dfa 100644 (file)
--- a/gettext-tools/src/format.h
+++ b/gettext-tools/src/format.h
@@ -123,6 +123,7 @@ extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_lua;
  extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_pascal;
  extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_modula2;
  extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_d;
+extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_ocaml;
  extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_smalltalk;
  extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_qt;
  extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_qt_plural;
diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c

index 7a605b35029e75986f0c0db3f0c7d277289cf7b1..6db1e1809240a612c0bbbd22f2a72277c1a0c583 100644 (file)
--- a/gettext-tools/src/message.c
+++ b/gettext-tools/src/message.c
@@ -57,6 +57,7 @@ const char *const format_language[NFORMATS] =
    /* format_pascal */           "object-pascal",
    /* format_modula2 */          "modula2",
    /* format_d */                "d",
+  /* format_ocaml */            "ocaml",
    /* format_smalltalk */        "smalltalk",
    /* format_qt */               "qt",
    /* format_qt_plursl */        "qt-plural",
@@ -97,6 +98,7 @@ const char *const format_language_pretty[NFORMATS] =
    /* format_pascal */           "Object Pascal",
    /* format_modula2 */          "Modula-2",
    /* format_d */                "D",
+  /* format_ocaml */            "OCaml",
    /* format_smalltalk */        "Smalltalk",
    /* format_qt */               "Qt",
    /* format_qt_plural */        "Qt plural",
diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h

index 5790295cd40f10da8dc518137ce7c255f9ea90d0..147bbd864d209605ffc36b6a8c77647732bb1fdb 100644 (file)
--- a/gettext-tools/src/message.h
+++ b/gettext-tools/src/message.h
@@ -66,6 +66,7 @@ enum format_type
    format_pascal,
    format_modula2,
    format_d,
+  format_ocaml,
    format_smalltalk,
    format_qt,
    format_qt_plural,
@@ -80,7 +81,7 @@ enum format_type
    format_gfc_internal,
    format_ycp
  };
-#define NFORMATS 36     /* Number of format_type enum values.  */
+#define NFORMATS 37     /* Number of format_type enum values.  */
  extern LIBGETTEXTSRC_DLL_VARIABLE const char *const format_language[NFORMATS];
  extern LIBGETTEXTSRC_DLL_VARIABLE const char *const format_language_pretty[NFORMATS];
  
diff --git a/gettext-tools/src/x-ocaml.c b/gettext-tools/src/x-ocaml.c

index 2a6b021f96586f84ec386e313c61a039dbfc2860..5151b7e89514be095333f061d5e58629b36007cf 100644 (file)
--- a/gettext-tools/src/x-ocaml.c
+++ b/gettext-tools/src/x-ocaml.c
@@ -22,121 +22,964 @@
  /* Specification.  */
  #include "x-ocaml.h"
  
+#include <errno.h>
  #include <stdbool.h>
+#include <stdint.h>
  #include <stdio.h>
  #include <stdlib.h>
+#include <string.h>
  
  #include <error.h>
  #include "message.h"
-#include "clean-temp.h"
-#include "concat-filename.h"
-#include "execute.h"
-#include "xvasprintf.h"
-#include "x-po.h"
+#include "string-desc.h"
+#include "xstring-desc.h"
+#include "string-buffer.h"
  #include "xgettext.h"
+#include "xg-pos.h"
+#include "xg-mixed-string.h"
+#include "xg-arglist-context.h"
+#include "xg-arglist-callshape.h"
+#include "xg-arglist-parser.h"
+#include "xg-message.h"
+#include "if-error.h"
+#include "xalloc.h"
+#include "read-file.h"
+#include "unistr.h"
+#include "po-charset.h"
  #include "gettext.h"
  
-/* A convenience macro.  I don't like writing gettext() every time.  */
-#define _(str) gettext (str)
+#define _(s) gettext (s)
  
-/* We don't parse OCaml directly, but instead rely on the 'ocaml-gettext'
-   program, that invokes the 'ocaml-xgettext' program.  Both are contained
-   in the 'opam' package named 'gettext':
-     https://opam.ocaml.org/packages/gettext/
-     https://github.com/gildor478/ocaml-gettext
-     https://github.com/gildor478/ocaml-gettext/blob/master/doc/reference-manual.md
+/* Use tree-sitter.
+   Documentation: <https://tree-sitter.github.io/tree-sitter/using-parsers>  */
+#include <tree_sitter/api.h>
+extern const TSLanguage *tree_sitter_ocaml (void);
  
-   Comments start with '(*' and end with '*)' and can be nested.
-   Reference: <https://ocaml.org/docs/tour-of-ocaml>
+
+/* The OCaml syntax is defined in <https://ocaml.org/docs/language>.
+
+   String syntax: Strings are delimited by double-quotes or by {id| |id} pairs.
+   Backslash is the escape character. Among the escape sequences, there is in
+   particular backslash-newline-spaces_or_tabs and \u{nnnn}.
+   Reference: <https://ocaml.org/manual/5.3/lex.html#sss:stringliterals>
+
+   Comment syntax: Comments start with '(*' and end with '*)' and can be nested.
+   References: <https://ocaml.org/manual/5.3/lex.html#sss:lex:comments>
+               <https://ocaml.org/docs/tour-of-ocaml>
   */
  
+#define DEBUG_OCAML 0
+
  
  /* ====================== Keyword set customization.  ====================== */
  
-/* This function currently has no effect.  */
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
  void
  x_ocaml_extract_all (void)
  {
+  extract_all = true;
  }
  
-/* This function currently has no effect.  */
  void
-x_ocaml_keyword (const char *keyword)
+x_ocaml_keyword (const char *name)
  {
+  if (name == NULL)
+    default_keywords = false;
+  else
+    {
+      const char *end;
+      struct callshape shape;
+      const char *colon;
+
+      if (keywords.table == NULL)
+        hash_init (&keywords, 100);
+
+      split_keywordspec (name, &end, &shape);
+
+      /* The characters between name and end should form a valid identifier.
+         A colon means an invalid parse in split_keywordspec().  */
+      colon = strchr (name, ':');
+      if (colon == NULL || colon >= end)
+        insert_keyword_callshape (&keywords, name, end - name, &shape);
+    }
+}
+
+/* Finish initializing the keywords hash table.
+   Called after argument processing, before each file is processed.  */
+static void
+init_keywords ()
+{
+  if (default_keywords)
+    {
+      /* Compatible with ocaml-gettext/src/bin/ocaml-xgettext/xgettext.ml.  */
+      /* When adding new keywords here, also update the documentation in
+         xgettext.texi!  */
+      x_ocaml_keyword ("s_");
+      x_ocaml_keyword ("f_");
+      x_ocaml_keyword ("sn_:1,2");
+      x_ocaml_keyword ("fn_:1,2");
+      x_ocaml_keyword ("gettext:2");
+      x_ocaml_keyword ("fgettext:2");
+      x_ocaml_keyword ("dgettext:3");
+      x_ocaml_keyword ("fdgettext:3");
+      x_ocaml_keyword ("dcgettext:3");
+      x_ocaml_keyword ("fdcgettext:3");
+      x_ocaml_keyword ("ngettext:2,3");
+      x_ocaml_keyword ("fngettext:2,3");
+      x_ocaml_keyword ("dngettext:3,4");
+      x_ocaml_keyword ("fdngettext:3,4");
+      x_ocaml_keyword ("dcngettext:3,4");
+      x_ocaml_keyword ("fdcngettext:3,4");
+      default_keywords = false;
+    }
  }
  
  /* This function currently has no effect.  */
  void
  init_flag_table_ocaml (void)
  {
+  /* Compatible with ocaml-gettext/src/bin/ocaml-xgettext/xgettext.ml.  */
+  xgettext_record_flag ("s_:1:impossible-ocaml-format");
+  xgettext_record_flag ("f_:1:ocaml-format");
+  xgettext_record_flag ("sn_:1:impossible-ocaml-format");
+  xgettext_record_flag ("sn_:2:impossible-ocaml-format");
+  xgettext_record_flag ("fn_:1:ocaml-format");
+  xgettext_record_flag ("fn_:2:ocaml-format");
+  xgettext_record_flag ("gettext:2:impossible-ocaml-format");
+  xgettext_record_flag ("fgettext:2:ocaml-format");
+  xgettext_record_flag ("dgettext:3:impossible-ocaml-format");
+  xgettext_record_flag ("fdgettext:3:ocaml-format");
+  xgettext_record_flag ("dcgettext:3:impossible-ocaml-format");
+  xgettext_record_flag ("fdcgettext:3:ocaml-format");
+  xgettext_record_flag ("ngettext:2:impossible-ocaml-format");
+  xgettext_record_flag ("ngettext:3:impossible-ocaml-format");
+  xgettext_record_flag ("fngettext:2:ocaml-format");
+  xgettext_record_flag ("fngettext:3:ocaml-format");
+  xgettext_record_flag ("dngettext:3:impossible-ocaml-format");
+  xgettext_record_flag ("dngettext:4:impossible-ocaml-format");
+  xgettext_record_flag ("fdngettext:3:ocaml-format");
+  xgettext_record_flag ("fdngettext:4:ocaml-format");
+  xgettext_record_flag ("dcngettext:3:impossible-ocaml-format");
+  xgettext_record_flag ("dcngettext:4:impossible-ocaml-format");
+  xgettext_record_flag ("fdcngettext:3:ocaml-format");
+  xgettext_record_flag ("fdcngettext:4:ocaml-format");
+}
+
+
+/* ======================== Parsing via tree-sitter. ======================== */
+/* To understand this code, look at
+     tree-sitter-ocaml/grammars/ocaml/src/node-types.json
+   and
+     tree-sitter-ocaml/grammars/ocaml/src/grammar.json
+ */
+
+/* The tree-sitter's language object.  */
+static const TSLanguage *ts_language;
+
+/* ------------------------- Node types and symbols ------------------------- */
+
+static TSSymbol
+ts_language_symbol (const char *name, bool is_named)
+{
+  TSSymbol result =
+    ts_language_symbol_for_name (ts_language, name, strlen (name), is_named);
+  if (result == 0)
+    /* If we get here, the grammar has evolved in an incompatible way.  */
+    abort ();
+  return result;
+}
+
+static TSFieldId
+ts_language_field (const char *name)
+{
+  TSFieldId result =
+    ts_language_field_id_for_name (ts_language, name, strlen (name));
+  if (result == 0)
+    /* If we get here, the grammar has evolved in an incompatible way.  */
+    abort ();
+  return result;
+}
+
+/* Optimization:
+   Instead of
+     strcmp (ts_node_type (node), "string") == 0
+   it is faster to do
+     ts_node_symbol (node) == ts_symbol_string
+ */
+static TSSymbol ts_symbol_comment;
+static TSSymbol ts_symbol_string;
+static TSSymbol ts_symbol_string_content;
+static TSSymbol ts_symbol_escape_sequence;
+static TSSymbol ts_symbol_quoted_string;
+static TSSymbol ts_symbol_quoted_string_content;
+static TSSymbol ts_symbol_infix_expression;
+static TSSymbol ts_symbol_concat_operator;
+static TSSymbol ts_symbol_application_expression;
+static TSSymbol ts_symbol_value_path;
+static TSSymbol ts_symbol_value_name;
+static TSSymbol ts_symbol_parenthesized_expression;
+static TSSymbol ts_symbol_lparen;
+static TSSymbol ts_symbol_rparen;
+static TSFieldId ts_field_operator;
+static TSFieldId ts_field_left;
+static TSFieldId ts_field_right;
+static TSFieldId ts_field_function;
+
+static inline size_t
+ts_node_line_number (TSNode node)
+{
+  return ts_node_start_point (node).row + 1;
  }
  
+/* -------------------------------- The file -------------------------------- */
  
-/* ========================= Extracting strings.  ========================== */
+/* The entire contents of the file being analyzed.  */
+static const char *contents;
  
+/* -------------------------------- Comments -------------------------------- */
+
+/* These are for tracking whether comments count as immediately before
+   keyword.  */
+static int last_comment_line;
+static int last_non_comment_line;
+
+/* Saves a comment line.  */
+static void save_comment_line (string_desc_t gist)
+{
+  /* Remove leading whitespace.  */
+  while (sd_length (gist) > 0
+         && (sd_char_at (gist, 0) == ' '
+             || sd_char_at (gist, 0) == '\t'))
+    gist = sd_substring (gist, 1, sd_length (gist));
+  /* Remove trailing whitespace.  */
+  size_t len = sd_length (gist);
+  while (len > 0
+         && (sd_char_at (gist, len - 1) == ' '
+             || sd_char_at (gist, len - 1) == '\t'))
+    len--;
+  gist = sd_substring (gist, 0, len);
+  savable_comment_add (sd_c (gist));
+}
+
+/* Does the comment handling for NODE.
+   Updates savable_comment, last_comment_line, last_non_comment_line.
+   It is important that this function gets called
+     - for each node (not only the named nodes!),
+     - in depth-first traversal order.  */
+static void handle_comments (TSNode node)
+{
+  #if DEBUG_OCAML
+  fprintf (stderr, "LCL=%d LNCL=%d node=[%s]|%s|\n", last_comment_line, last_non_comment_line, ts_node_type (node), ts_node_string (node));
+  #endif
+  if (last_comment_line < last_non_comment_line
+      && last_non_comment_line < ts_node_line_number (node))
+    /* We have skipped over a newline.  This newline terminated a line
+       with non-comment tokens, after the last comment line.  */
+    savable_comment_reset ();
+
+  if (ts_node_symbol (node) == ts_symbol_comment)
+    {
+      string_desc_t entire =
+        sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+                     contents + ts_node_start_byte (node));
+      /* It should start and end with the comment markers.  */
+      if (!(sd_length (entire) >= 4
+            && sd_char_at (entire, 0) == '('
+            && sd_char_at (entire, 1) == '*'
+            && sd_char_at (entire, sd_length (entire) - 2) == '*'
+            && sd_char_at (entire, sd_length (entire) - 1) == ')'))
+        abort ();
+      string_desc_t gist = sd_substring (entire, 2, sd_length (entire) - 2);
+      /* Split into lines.
+         Remove leading and trailing whitespace from each line.  */
+      for (;;)
+        {
+          ptrdiff_t nl_index = sd_index (gist, '\n');
+          if (nl_index >= 0)
+            {
+              save_comment_line (sd_substring (gist, 0, nl_index));
+              gist = sd_substring (gist, nl_index + 1, sd_length (gist));
+            }
+          else
+            {
+              save_comment_line (gist);
+              break;
+            }
+        }
+      last_comment_line = ts_node_end_point (node).row + 1;
+    }
+  else
+    last_non_comment_line = ts_node_line_number (node);
+}
+
+/* ---------------------------- String literals ---------------------------- */
+
+/* Determines whether NODE represents the string concatenation operator '^'.  */
+static bool
+is_string_concatenation_operator (TSNode node)
+{
+  if (ts_node_symbol (node) == ts_symbol_concat_operator)
+    {
+      string_desc_t operator_string =
+        sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+                     contents + ts_node_start_byte (node));
+      if (sd_equals (operator_string, sd_from_c ("^")))
+        return true;
+    }
+  return false;
+}
+
+/* Determines whether NODE represents a string literal or the concatenation
+   of string literals (via the '^' operator).  */
  static bool
-is_not_header (const message_ty *mp)
+is_string_literal (TSNode node)
  {
-  return !is_header (mp);
+ start:
+  if (ts_node_symbol (node) == ts_symbol_string
+      || ts_node_symbol (node) == ts_symbol_quoted_string)
+    return true;
+  if (ts_node_symbol (node) == ts_symbol_infix_expression
+      && is_string_concatenation_operator (ts_node_child_by_field_id (node, ts_field_operator))
+      /* Recurse into the left and right subnodes.  */
+      && is_string_literal (ts_node_child_by_field_id (node, ts_field_left)))
+    {
+      /*return is_string_literal (ts_node_child_by_field_id (node, ts_field_right));*/
+      node = ts_node_child_by_field_id (node, ts_field_right);
+      goto start;
+    }
+  if (ts_node_symbol (node) == ts_symbol_parenthesized_expression)
+    {
+      uint32_t count = ts_node_child_count (node);
+      if (count > 0
+          && ts_node_symbol (ts_node_child (node, 0)) == ts_symbol_lparen
+          && ts_node_symbol (ts_node_child (node, count - 1)) == ts_symbol_rparen)
+        {
+          uint32_t subnodes = 0;
+          uint32_t last_subnode_index = 0;
+          uint32_t i;
+          for (i = 1; i < count - 1; i++)
+            {
+              TSNode subnode = ts_node_child (node, i);
+              if (ts_node_is_named (subnode)
+                  && ts_node_symbol (subnode) != ts_symbol_comment)
+                {
+                  subnodes++;
+                  last_subnode_index = i;
+                }
+            }
+          if (subnodes == 1)
+            {
+              TSNode subnode = ts_node_child (node, last_subnode_index);
+              /* Recurse.  */
+              /*return is_string_literal (subnode);*/
+              node = subnode;
+              goto start;
+            }
+        }
+    }
+
+  return false;
+}
+
+/* Appends the string literal pieces from NODE to BUFFER.  */
+static void
+string_literal_accumulate_pieces (TSNode node,
+                                  struct string_buffer *buffer)
+{
+ start:
+  if (ts_node_symbol (node) == ts_symbol_string)
+    {
+      uint32_t count = ts_node_named_child_count (node);
+      uint32_t i;
+      for (i = 0; i < count; i++)
+        {
+          TSNode subnode = ts_node_named_child (node, i);
+          if (ts_node_symbol (subnode) == ts_symbol_string_content)
+            {
+              const char *subnode_start = contents + ts_node_start_byte (subnode);
+              const char *subnode_end = contents + ts_node_end_byte (subnode);
+              uint32_t subcount = ts_node_child_count (subnode);
+              #if DEBUG_OCAML
+              {
+                fprintf (stderr, "string_content children:\n");
+                uint32_t j;
+                for (j = 0; j < subcount; j++)
+                  fprintf (stderr, "%u -> [%s]|%s|\n", j, ts_node_type (ts_node_child (subnode, j)), ts_node_string (ts_node_child (subnode, j)));
+              }
+              #endif
+              /* Iterate over the children nodes of type escape_sequence.
+                 Other children nodes, such as conversion_specification or
+                 pretty_printing_indication, can be ignored.  */
+              uint32_t j;
+              for (j = 0; j < subcount; j++)
+                {
+                  TSNode subsubnode = ts_node_child (subnode, j);
+                  if (ts_node_symbol (subsubnode) == ts_symbol_escape_sequence)
+                    {
+                      const char *escape_start = contents + ts_node_start_byte (subsubnode);
+                      const char *escape_end = contents + ts_node_end_byte (subsubnode);
+                      sb_xappend_desc (buffer,
+                                       sd_new_addr (escape_start - subnode_start, subnode_start));
+
+                      /* The escape sequence must start with a backslash.  */
+                      if (!(escape_end - escape_start >= 2 && escape_start[0] == '\\'))
+                        abort ();
+                      /* tree-sitter's grammar.js allows more escape sequences
+                         than the OCaml system.  Give a warning for those cases
+                         where the OCaml system gives an error.  */
+                      bool invalid = false;
+                      if (escape_end - escape_start >= 2
+                          && (escape_start[1] == '\n' || escape_start[1] == '\r'))
+                        /* backslash-newline-spaces_or_tabs  */
+                        ;
+                      else if (escape_end - escape_start == 2)
+                        {
+                          switch (escape_start[1])
+                            {
+                            case '\\':
+                            case '"':
+                            case '\'':
+                            case ' ':
+                              sb_xappend1 (buffer, escape_start[1]);
+                              break;
+                            case 'n':
+                              sb_xappend1 (buffer, '\n');
+                              break;
+                            case 'r':
+                              sb_xappend1 (buffer, '\r');
+                              break;
+                            case 't':
+                              sb_xappend1 (buffer, '\t');
+                              break;
+                            case 'b':
+                              sb_xappend1 (buffer, 0x08);
+                              break;
+                            default:
+                              abort ();
+                            }
+                        }
+                      else if (escape_end - escape_start == 4
+                               && (escape_start[1] >= '0'
+                                   && escape_start[1] <= '9'))
+                        {
+                          /* Only exactly 3 decimal digits are accepted.  */
+                          unsigned int value = 0;
+                          const char *p;
+                          for (p = escape_start + 1; p < escape_end; p++)
+                            {
+                              /* No overflow is possible.  */
+                              char c = *p;
+                              if (c >= '0' && c <= '9')
+                                value = value * 10 + (c - '0');
+                              else
+                                abort ();
+                            }
+                          if (value > 0xFF)
+                            invalid = true;
+                          if (!invalid)
+                            sb_xappend1 (buffer, (unsigned char) value);
+                        }
+                      else if (escape_end - escape_start == 4
+                               && escape_start[1] == 'x')
+                        {
+                          /* Only exactly 2 hexadecimal digits are accepted.  */
+                          unsigned int value = 0;
+                          const char *p;
+                          for (p = escape_start + 2; p < escape_end; p++)
+                            {
+                              /* No overflow is possible.  */
+                              char c = *p;
+                              if (c >= '0' && c <= '9')
+                                value = (value << 4) + (c - '0');
+                              else if (c >= 'A' && c <= 'Z')
+                                value = (value << 4) + (c - 'A' + 10);
+                              else if (c >= 'a' && c <= 'z')
+                                value = (value << 4) + (c - 'a' + 10);
+                              else
+                                abort ();
+                            }
+                          sb_xappend1 (buffer, (unsigned char) value);
+                        }
+                      else if (escape_end - escape_start == 5
+                               && escape_start[1] == 'o')
+                        {
+                          /* Only exactly 3 octal digits are accepted.  */
+                          unsigned int value = 0;
+                          const char *p;
+                          for (p = escape_start + 2; p < escape_end; p++)
+                            {
+                              /* No overflow is possible.  */
+                              char c = *p;
+                              if (c >= '0' && c <= '7')
+                                value = (value << 3) + (c - '0');
+                              else
+                                abort ();
+                            }
+                          if (value > 0xFF)
+                            abort ();
+                          sb_xappend1 (buffer, (unsigned char) value);
+                        }
+                      else if (escape_end - escape_start > 4
+                               && escape_start[1] == 'u'
+                               && escape_start[2] == '{'
+                               && escape_end[-1] == '}')
+                        {
+                          if (escape_end - escape_start <= 4 + 6)
+                            {
+                              /* 1 to 6 hexadecimal digits are accepted.  */
+                              unsigned int value = 0;
+                              const char *p;
+                              for (p = escape_start + 3; p < escape_end - 1; p++)
+                                {
+                                  /* No overflow is possible.  */
+                                  char c = *p;
+                                  if (c >= '0' && c <= '9')
+                                    value = (value << 4) + (c - '0');
+                                  else if (c >= 'A' && c <= 'Z')
+                                    value = (value << 4) + (c - 'A' + 10);
+                                  else if (c >= 'a' && c <= 'z')
+                                    value = (value << 4) + (c - 'a' + 10);
+                                  else
+                                    abort ();
+                                }
+                              if (value >= 0x110000
+                                  || (value >= 0xD800 && value <= 0xDFFF))
+                                invalid = true;
+                              if (!invalid)
+                                {
+                                  uint8_t buf[6];
+                                  int n = u8_uctomb (buf, value, sizeof (buf));
+                                  if (n > 0)
+                                    sb_xappend_desc (buffer,
+                                                     sd_new_addr (n, (const char *) buf));
+                                  else
+                                    invalid = true;
+                                }
+                            }
+                          else
+                            invalid = true;
+                        }
+                      else
+                        abort ();
+                      if (invalid)
+                        {
+                          size_t line_number = ts_node_line_number (subnode);
+                          if_error (IF_SEVERITY_WARNING,
+                                    logical_file_name, line_number, (size_t)(-1), false,
+                                    _("invalid escape sequence in string"));
+                        }
+
+                      subnode_start = escape_end;
+                    }
+                }
+              sb_xappend_desc (buffer,
+                               sd_new_addr (subnode_end - subnode_start, subnode_start));
+            }
+          else
+            abort ();
+        }
+    }
+  else if (ts_node_symbol (node) == ts_symbol_quoted_string)
+    {
+      uint32_t count = ts_node_named_child_count (node);
+      uint32_t i;
+      for (i = 0; i < count; i++)
+        {
+          TSNode subnode = ts_node_named_child (node, i);
+          if (ts_node_symbol (subnode) == ts_symbol_quoted_string_content)
+            {
+              /* We can ignore the children nodes here, since none of them can
+                 be of type escape_sequence.  */
+              string_desc_t subnode_string =
+                sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+                             contents + ts_node_start_byte (subnode));
+              sb_xappend_desc (buffer, subnode_string);
+            }
+        }
+    }
+  else if (ts_node_symbol (node) == ts_symbol_infix_expression
+           && is_string_concatenation_operator (ts_node_child_by_field_id (node, ts_field_operator)))
+    {
+      /* Recurse into the left and right subnodes.  */
+      string_literal_accumulate_pieces (ts_node_child_by_field_id (node, ts_field_left), buffer);
+      /*string_literal_accumulate_pieces (ts_node_child_by_field_id (node, ts_field_right), buffer);*/
+      node = ts_node_child_by_field_id (node, ts_field_right);
+      goto start;
+    }
+  else if (ts_node_symbol (node) == ts_symbol_parenthesized_expression)
+    {
+      uint32_t count = ts_node_child_count (node);
+      /* is_string_literal has already checked that the first child node is '(',
+         that the last child node is ')', and that in-between there is exactly
+         one non-comment node.  */
+      if (!(count > 0))
+        abort ();
+      uint32_t i;
+      for (i = 1; i < count - 1; i++)
+        {
+          TSNode subnode = ts_node_child (node, i);
+          if (ts_node_is_named (subnode)
+              && ts_node_symbol (subnode) != ts_symbol_comment)
+            {
+              /* Recurse.  */
+              /*string_literal_accumulate_pieces (subnode, buffer);*/
+              node = subnode;
+              goto start;
+            }
+        }
+      abort ();
+    }
+  else
+    abort ();
+}
+
+/* Combines the pieces of a string literal or concatenated string literal.
+   Returns a freshly allocated, mostly UTF-8 encoded string.  */
+static char *
+string_literal_value (TSNode node)
+{
+  struct string_buffer buffer;
+  sb_init (&buffer);
+  string_literal_accumulate_pieces (node, &buffer);
+  return sb_xdupfree_c (&buffer);
+}
+
+/* --------------------- Parsing and string extraction --------------------- */
+
+/* Context lookup table.  */
+static flag_context_list_table_ty *flag_context_list_table;
+
+/* Maximum supported nesting depth.  */
+#define MAX_NESTING_DEPTH 1000
+
+static int nesting_depth;
+
+/* The file is parsed into an abstract syntax tree.  Scan the syntax tree,
+   looking for a keyword in function position of a application_expression,
+   followed by followed by a string among the arguments.
+   When we see this pattern, we have something to remember.
+
+     Normal handling: Look for
+       keyword ... msgid ...
+     Plural handling: Look for
+       keyword ... msgid ... msgid_plural ...
+
+   We use recursion because the arguments before msgid or between msgid
+   and msgid_plural can contain subexpressions of the same form.  */
+
+/* Forward declarations.  */
+static void extract_from_node (TSNode node,
+                               bool ignore,
+                               flag_region_ty *outer_region,
+                               message_list_ty *mlp);
+
+/* Extracts messages from the function application consisting of
+     - FUNCTION_NODE: a tree node of type 'value_path',
+     - FUNCTION_NAME_NODE: a tree node of type 'value_name',
+       the last named node of FUNCTION_NODE,
+     - ARGS_NODE: a tree node of type 'application_expression',
+       of which FUNCTION_NAME is the 'function' field.
+   Extracted messages are added to MLP.  */
+static void
+extract_from_function_call (TSNode function_node,
+                            TSNode function_name_node,
+                            TSNode args_node,
+                            flag_region_ty *outer_region,
+                            message_list_ty *mlp)
+{
+  uint32_t args_count = ts_node_child_count (args_node);
+
+  string_desc_t function_name =
+    sd_new_addr (ts_node_end_byte (function_name_node) - ts_node_start_byte (function_name_node),
+                 contents + ts_node_start_byte (function_name_node));
+
+  /* Context iterator.  */
+  flag_context_list_iterator_ty next_context_iter =
+    flag_context_list_iterator (
+      flag_context_list_table_lookup (
+        flag_context_list_table,
+        sd_data (function_name), sd_length (function_name)));
+
+  /* Information associated with the callee.  */
+  const struct callshapes *next_shapes = NULL;
+
+  /* Look in the keywords table.  */
+  void *keyword_value;
+  if (hash_find_entry (&keywords,
+                       sd_data (function_name), sd_length (function_name),
+                       &keyword_value)
+      == 0)
+    next_shapes = (const struct callshapes *) keyword_value;
+
+  if (next_shapes != NULL)
+    {
+      /* We have a function, named by a relevant identifier, with an argument
+         list.  */
+
+      struct arglist_parser *argparser =
+        arglist_parser_alloc (mlp, next_shapes);
+
+      /* Current argument number.  */
+      uint32_t arg;
+      uint32_t i;
+
+      arg = 0;
+      for (i = 0; i < args_count; i++)
+        {
+          TSNode arg_node = ts_node_child (args_node, i);
+          handle_comments (arg_node);
+          if (ts_node_is_named (arg_node)
+              && ts_node_symbol (arg_node) != ts_symbol_comment
+              && !ts_node_eq (arg_node, function_node))
+            {
+              arg++;
+              flag_region_ty *arg_region =
+                inheriting_region (outer_region,
+                                   flag_context_list_iterator_advance (
+                                     &next_context_iter));
+
+              bool already_extracted = false;
+              if (is_string_literal (arg_node))
+                {
+                  lex_pos_ty pos;
+                  pos.file_name = logical_file_name;
+                  pos.line_number = ts_node_line_number (arg_node);
+
+                  char *string = string_literal_value (arg_node);
+
+                  if (extract_all)
+                    {
+                      remember_a_message (mlp, NULL, string, true, false,
+                                          arg_region, &pos,
+                                          NULL, savable_comment, true);
+                      already_extracted = true;
+                    }
+                  else
+                    {
+                      mixed_string_ty *mixed_string =
+                        mixed_string_alloc_utf8 (string, lc_string,
+                                                 pos.file_name, pos.line_number);
+                      arglist_parser_remember (argparser, arg, mixed_string,
+                                               arg_region,
+                                               pos.file_name, pos.line_number,
+                                               savable_comment, true);
+                    }
+                }
+
+              if (!already_extracted)
+                {
+                  if (++nesting_depth > MAX_NESTING_DEPTH)
+                    if_error (IF_SEVERITY_FATAL_ERROR,
+                              logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                              _("too many open parentheses"));
+                  extract_from_node (arg_node,
+                                     false,
+                                     arg_region,
+                                     mlp);
+                  nesting_depth--;
+                }
+
+              unref_region (arg_region);
+            }
+        }
+      arglist_parser_done (argparser, arg);
+      return;
+    }
+
+  /* Recurse.  */
+
+  uint32_t i;
+
+  for (i = 0; i < args_count; i++)
+    {
+      TSNode arg_node = ts_node_child (args_node, i);
+      handle_comments (arg_node);
+      if (ts_node_is_named (arg_node)
+          && ts_node_symbol (arg_node) != ts_symbol_comment)
+        {
+          flag_region_ty *arg_region =
+            inheriting_region (outer_region,
+                               flag_context_list_iterator_advance (
+                                 &next_context_iter));
+
+          if (++nesting_depth > MAX_NESTING_DEPTH)
+            if_error (IF_SEVERITY_FATAL_ERROR,
+                      logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                      _("too many open parentheses"));
+          extract_from_node (arg_node,
+                             false,
+                             arg_region,
+                             mlp);
+          nesting_depth--;
+
+          unref_region (arg_region);
+        }
+    }
+}
+
+/* Extracts messages in the syntax tree NODE.
+   Extracted messages are added to MLP.  */
+static void
+extract_from_node (TSNode node,
+                   bool ignore,
+                   flag_region_ty *outer_region,
+                   message_list_ty *mlp)
+{
+  if (extract_all && !ignore && is_string_literal (node))
+    {
+      lex_pos_ty pos;
+      pos.file_name = logical_file_name;
+      pos.line_number = ts_node_line_number (node);
+
+      char *string = string_literal_value (node);
+
+      remember_a_message (mlp, NULL, string, true, false,
+                          outer_region, &pos,
+                          NULL, savable_comment, true);
+    }
+
+  if (ts_node_symbol (node) == ts_symbol_application_expression
+      && ts_node_named_child_count (node) >= 2)
+    {
+      TSNode function_node = ts_node_named_child (node, 0);
+      /* This is the field called 'function'.  */
+      if (! ts_node_eq (ts_node_child_by_field_id (node, ts_field_function),
+                        function_node))
+        abort ();
+      if (ts_node_symbol (function_node) == ts_symbol_value_path
+          && ts_node_named_child_count (function_node) > 0)
+        {
+          TSNode function_name_node =
+            ts_node_named_child (function_node,
+                                 ts_node_named_child_count (function_node) - 1);
+          if (ts_node_symbol (function_name_node) == ts_symbol_value_name)
+            {
+              extract_from_function_call (function_node, function_name_node, node,
+                                          outer_region,
+                                          mlp);
+              return;
+            }
+        }
+    }
+
+  /* Recurse.  */
+  if (!(ts_node_symbol (node) == ts_symbol_comment))
+    {
+      ignore = ignore || is_string_literal (node);
+      uint32_t count = ts_node_child_count (node);
+      uint32_t i;
+      for (i = 0; i < count; i++)
+        {
+          TSNode subnode = ts_node_child (node, i);
+          handle_comments (subnode);
+          if (++nesting_depth > MAX_NESTING_DEPTH)
+            if_error (IF_SEVERITY_FATAL_ERROR,
+                      logical_file_name, ts_node_line_number (subnode), (size_t)(-1), false,
+                      _("too many open parentheses, brackets, or braces"));
+          extract_from_node (subnode,
+                             ignore,
+                             outer_region,
+                             mlp);
+          nesting_depth--;
+       }
+    }
  }
  
  void
-extract_ocaml (const char *found_in_dir, const char *real_filename,
-               const char *logical_filename,
+extract_ocaml (FILE *f,
+               const char *real_filename, const char *logical_filename,
                 flag_context_list_table_ty *flag_table,
                 msgdomain_list_ty *mdlp)
  {
-  /* Invoke
-       ocaml-gettext --action extract --extract-pot <temp>.pot real_filename  */
+  message_list_ty *mlp = mdlp->item[0]->messages;
  
-  /* First, create a temporary directory where this invocation can place its
-     output.  */
-  struct temp_dir *tmpdir = create_temp_dir ("ocgt", NULL, false);
-  if (tmpdir == NULL)
-    exit (EXIT_FAILURE);
+  logical_file_name = xstrdup (logical_filename);
  
-  /* Prepare the temporary POT file name.  */
-  char *temp_file_name = xconcatenated_filename (tmpdir->dir_name, "temp.pot", NULL);
-  register_temp_file (tmpdir, temp_file_name);
+  last_comment_line = -1;
+  last_non_comment_line = -1;
  
-  /* Invoke ocaml-gettext.  */
-  const char *progname = "ocaml-gettext";
-  {
-    const char *argv[7];
-    int exitstatus;
-    /* Prepare arguments.  */
-    argv[0] = progname;
-    argv[1] = "--action";
-    argv[2] = "extract";
-    argv[3] = "--extract-pot";
-    argv[4] = temp_file_name;
-    argv[5] = logical_filename;
-    argv[6] = NULL;
-    exitstatus = execute (progname, progname, argv, NULL, found_in_dir,
-                          true, false, false, false, true, false, NULL);
-    if (exitstatus != 0)
-      error (EXIT_FAILURE, 0, _("%s subprocess failed with exit code %d"),
-             progname, exitstatus);
-  }
+  flag_context_list_table = flag_table;
+  nesting_depth = 0;
+
+  init_keywords ();
+
+  if (ts_language == NULL)
+    {
+      ts_language = tree_sitter_ocaml ();
+      ts_symbol_comment                  = ts_language_symbol ("comment", true);
+      ts_symbol_string                   = ts_language_symbol ("string", true);
+      ts_symbol_string_content           = ts_language_symbol ("string_content", true);
+      ts_symbol_escape_sequence          = ts_language_symbol ("escape_sequence", true);
+      ts_symbol_quoted_string            = ts_language_symbol ("quoted_string", true);;
+      ts_symbol_quoted_string_content    = ts_language_symbol ("quoted_string_content", true);;
+      ts_symbol_infix_expression         = ts_language_symbol ("infix_expression", true);
+      ts_symbol_concat_operator          = ts_language_symbol ("concat_operator", true);
+      ts_symbol_application_expression   = ts_language_symbol ("application_expression", true);
+      ts_symbol_value_path               = ts_language_symbol ("value_path", true);
+      ts_symbol_value_name               = ts_language_symbol ("value_name", true);
+      ts_symbol_parenthesized_expression = ts_language_symbol ("parenthesized_expression", true);
+      ts_symbol_lparen                   = ts_language_symbol ("(", false);
+      ts_symbol_rparen                   = ts_language_symbol (")", false);
+      ts_field_operator = ts_language_field ("operator");
+      ts_field_left     = ts_language_field ("left");
+      ts_field_right    = ts_language_field ("right");
+      ts_field_function = ts_language_field ("function");
+    }
+
+  /* Read the file into memory.  */
+  char *contents_data;
+  size_t contents_length;
+  contents_data = read_file (real_filename, 0, &contents_length);
+  if (contents_data == NULL)
+    error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+           real_filename);
+
+  /* tree-sitter works only on files whose size fits in an uint32_t.  */
+  if (contents_length > 0xFFFFFFFFUL)
+    error (EXIT_FAILURE, 0, _("file \"%s\" is unsupported because too large"),
+           real_filename);
  
-  /* Read the resulting POT file.  */
+  /* OCaml source files are "expected to be" UTF-8 encoded.
+     <https://ocaml.org/manual/5.3/lex.html#sss:lex:text-encoding>  */
+  if (u8_check ((uint8_t *) contents_data, contents_length) != NULL)
+    error (EXIT_FAILURE, 0,
+           _("file \"%s\" is invalid because not UTF-8 encoded"),
+           real_filename);
+  xgettext_current_source_encoding = po_charset_utf8;
+
+  /* Create a parser.  */
+  TSParser *parser = ts_parser_new ();
+
+  /* Set the parser's language.  */
+  ts_parser_set_language (parser, ts_language);
+
+  /* Parse the file, producing a syntax tree.  */
+  TSTree *tree = ts_parser_parse_string (parser, NULL, contents_data, contents_length);
+
+  #if DEBUG_OCAML
+  /* For debugging: Print the tree.  */
    {
-    FILE *fp = fopen (temp_file_name, "r");
-    if (fp == NULL)
-      error (EXIT_FAILURE, 0, _("%s subprocess did not create the expected file"),
-             progname);
-    char *dummy_filename = xasprintf (_("(output from '%s')"), progname);
-    extract_po (fp, temp_file_name, dummy_filename, flag_table, mdlp);
-    fclose (fp);
-    free (dummy_filename);
+    char *tree_as_string = ts_node_string (ts_tree_root_node (tree));
+    fprintf (stderr, "Syntax tree: %s\n", tree_as_string);
+    free (tree_as_string);
    }
+  #endif
  
-  cleanup_temp_dir (tmpdir);
+  contents = contents_data;
  
-  if (xgettext_omit_header)
-    {
-      /* Remove the header entry.  */
-      if (mdlp->nitems > 0)
-        message_list_remove_if_not (mdlp->item[0]->messages, is_not_header);
-    }
+  extract_from_node (ts_tree_root_node (tree),
+                     false,
+                     null_context_region (),
+                     mlp);
+
+  ts_tree_delete (tree);
+  ts_parser_delete (parser);
+  free (contents_data);
+
+  logical_file_name = NULL;
  }
diff --git a/gettext-tools/src/x-ocaml.h b/gettext-tools/src/x-ocaml.h

index 2efa4574f0010a24fc4bb40ec973eb4bae5e4898..0504daef079a15e5066a9c31dc06c32881e92df4 100644 (file)
--- a/gettext-tools/src/x-ocaml.h
+++ b/gettext-tools/src/x-ocaml.h
@@ -31,10 +31,11 @@ extern "C" {
    { "ml",     "OCaml"  },                                               \
  
  #define SCANNERS_OCAML \
-  { "OCaml",            NULL, extract_ocaml,                            \
-                        NULL, NULL, NULL },                             \
+  { "OCaml",            extract_ocaml, NULL,                            \
+                        &flag_table_ocaml, &formatstring_ocaml, NULL }, \
  
-extern void extract_ocaml (const char *found_in_dir, const char *real_filename,
+/* Scan a OCaml file and add its translatable strings to mdlp.  */
+extern void extract_ocaml (FILE *fp, const char *real_filename,
                             const char *logical_filename,
                             flag_context_list_table_ty *flag_table,
                             msgdomain_list_ty *mdlp);
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c

index 92b77add06b3fa0e164c9e53b00b98b3f337d9d8..5f7fa51df60f05507eec94ad8af2de160c17aaf1 100644 (file)
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -225,6 +225,7 @@ static flag_context_list_table_ty flag_table_awk;
  static flag_context_list_table_ty flag_table_lua;
  static flag_context_list_table_ty flag_table_modula2;
  static flag_context_list_table_ty flag_table_d;
+static flag_context_list_table_ty flag_table_ocaml;
  static flag_context_list_table_ty flag_table_vala;
  static flag_context_list_table_ty flag_table_tcl;
  static flag_context_list_table_ty flag_table_perl;
@@ -1234,8 +1235,8 @@ Language specific options:\n"));
                                  (only languages C, C++, ObjectiveC, Python,\n\
                                  Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
                                  Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
-                                awk, Lua, Modula-2, D, Vala, Tcl, Perl, PHP,\n\
-                                GCC-source, Glade, GSettings)\n"));
+                                awk, Lua, Modula-2, D, OCaml, Vala, Tcl, Perl,\n\
+                                PHP, GCC-source, Glade, GSettings)\n"));
        printf (_("\
    -kWORD, --keyword=WORD      look for WORD as an additional keyword\n\
    -k, --keyword               do not to use default keywords\n"));
@@ -1243,8 +1244,8 @@ Language specific options:\n"));
                                  (only languages C, C++, ObjectiveC, Python,\n\
                                  Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
                                  Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
-                                awk, Lua, Modula-2, D, Vala, Tcl, Perl, PHP,\n\
-                                GCC-source, Glade, GSettings, Desktop)\n"));
+                                awk, Lua, Modula-2, D, OCaml, Vala, Tcl, Perl,\n\
+                                PHP, GCC-source, Glade, GSettings, Desktop)\n"));
        printf (_("\
        --flag=WORD:ARG:FLAG    additional flag for strings inside the argument\n\
                                number ARG of keyword WORD\n"));
@@ -1252,8 +1253,8 @@ Language specific options:\n"));
                                  (only languages C, C++, ObjectiveC, Python,\n\
                                  Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
                                  Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
-                                awk, Lua, Modula-2, D, Vala, Tcl, Perl, PHP,\n\
-                                GCC-source, YCP)\n"));
+                                awk, Lua, Modula-2, D, OCaml, Vala, Tcl, Perl,\n\
+                                PHP, GCC-source, YCP)\n"));
        printf (_("\
        --tag=WORD:FORMAT       defines the behaviour of tagged template literals\n\
                                with tag WORD\n"));
@@ -1786,6 +1787,11 @@ xgettext_record_flag (const char *optionstring)
                                                      name_start, name_end,
                                                      argnum, value, pass);
                      break;
+                  case format_ocaml:
+                    flag_context_list_table_insert (&flag_table_ocaml, XFORMAT_PRIMARY,
+                                                    name_start, name_end,
+                                                    argnum, value, pass);
+                    break;
                    case format_smalltalk:
                      break;
                    case format_qt:
diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am

index f3166899936e718114b7423dce0fc06bb7eb2aa5..3ff4ef642a76010f4e869fbb86127f9525a7d332 100644 (file)
--- a/gettext-tools/tests/Makefile.am
+++ b/gettext-tools/tests/Makefile.am
@@ -145,6 +145,7 @@ TESTS = gettext-1 gettext-2 \
         xgettext-lua-stackovfl-1 xgettext-lua-stackovfl-2 \
         xgettext-lua-stackovfl-3 xgettext-lua-stackovfl-4 \
         xgettext-modula2-1 xgettext-modula2-2 \
+       xgettext-ocaml-1 xgettext-ocaml-2 \
         xgettext-objc-1 xgettext-objc-2 \
         xgettext-perl-1 xgettext-perl-2 xgettext-perl-3 xgettext-perl-4 \
         xgettext-perl-5 xgettext-perl-6 xgettext-perl-7 xgettext-perl-8 \
@@ -214,6 +215,7 @@ TESTS = gettext-1 gettext-2 \
         format-lisp-1 format-lisp-2 \
         format-lua-1 format-lua-2 \
         format-modula2-1 format-modula2-2 \
+       format-ocaml-1 format-ocaml-2 \
         format-php-1 format-php-2 format-php-3 \
         format-python-1 format-python-2 format-python-3 \
         format-python-brace-1 format-python-brace-2 \
diff --git a/gettext-tools/tests/format-ocaml-1 b/gettext-tools/tests/format-ocaml-1

new file mode 100755 (executable)

index 0000000..0a9b4dc
--- /dev/null
+++ b/gettext-tools/tests/format-ocaml-1
@@ -0,0 +1,231 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test recognition of OCaml format strings.
+
+cat <<\EOF > f-oc-1.data
+# Valid: no argument
+"abc%!"
+# Valid: no argument
+"abc%%"
+# Valid: no argument
+"abc%@"
+# Valid: no argument
+"abc%,"
+# Valid: one integer argument
+"abc%d"
+# Valid: one integer argument
+"abc%i"
+# Valid: one integer argument
+"abc%u"
+# Valid: one integer argument
+"abc%x"
+# Valid: one integer argument
+"abc%X"
+# Valid: one integer argument
+"abc%o"
+# Valid: one int32 argument
+"abc%ld"
+# Valid: one int32 argument
+"abc%li"
+# Valid: one int32 argument
+"abc%lu"
+# Valid: one int32 argument
+"abc%lx"
+# Valid: one int32 argument
+"abc%lX"
+# Valid: one int32 argument
+"abc%lo"
+# Valid: one nativeint argument
+"abc%nd"
+# Valid: one nativeint argument
+"abc%ni"
+# Valid: one nativeint argument
+"abc%nu"
+# Valid: one nativeint argument
+"abc%nx"
+# Valid: one nativeint argument
+"abc%nX"
+# Valid: one nativeint argument
+"abc%no"
+# Valid: one int64 argument
+"abc%Ld"
+# Valid: one int64 argument
+"abc%Li"
+# Valid: one int64 argument
+"abc%Lu"
+# Valid: one int64 argument
+"abc%Lx"
+# Valid: one int64 argument
+"abc%LX"
+# Valid: one int64 argument
+"abc%Lo"
+# Valid: one string argument
+"abc%s"
+# Valid: one string argument
+"abc%S"
+# Valid: one character argument
+"abc%c"
+# Valid: one character argument
+"abc%C"
+# Valid: one floating-point argument
+"abc%f"
+# Valid: one floating-point argument
+"abc%e"
+# Valid: one floating-point argument
+"abc%E"
+# Valid: one floating-point argument
+"abc%g"
+# Valid: one floating-point argument
+"abc%G"
+# Valid: one floating-point argument
+"abc%h"
+# Valid: one floating-point argument
+"abc%H"
+# Valid: one floating-point argument
+"abc%F"
+# Valid: one boolean argument
+"abc%B"
+# Valid: one function argument
+"abc%a"
+# Valid: one function-with-arg argument
+"abc%t"
+# Valid: one format-string argument
+"abc%{%s%}"
+# Valid: one format-string argument
+"abc%{%S%}"
+# Valid: one format-string argument and one string argument
+"abc%(%s%)"
+# Valid: one format-string argument and one string argument
+"abc%(%S%)"
+# Valid: one argument with flags
+"abc%0#g"
+# Valid: one argument with width
+"abc%2g"
+# Valid: one argument with width
+"abc%*g"
+# Valid: one argument with precision
+"abc%.4g"
+# Valid: one argument with precision
+"abc%.*g"
+# Valid: one argument with width and precision
+"abc%14.4g"
+# Valid: one argument with width and precision
+"abc%14.*g"
+# Valid: one argument with width and precision
+"abc%*.4g"
+# Valid: one argument with width and precision
+"abc%*.*g"
+# Invalid: unterminated
+"abc%"
+# Invalid: unterminated
+"abc%l"
+# Invalid: unterminated
+"abc%n"
+# Invalid: unterminated
+"abc%L"
+# Invalid: unknown format specifier
+"abc%y"
+# Invalid: unknown format specifier
+"abc%A"
+# Invalid: unknown format specifier
+"abc%T"
+# Invalid: %{ and %} don't match
+"abc%{%s"
+# Invalid: %{ and %} don't match
+"abc%s%}"
+# Invalid: %{ and %} don't match
+"abc%s%}%{"
+# Invalid: %( and %) don't match
+"abc%(%s"
+# Invalid: %( and %) don't match
+"abc%s%)"
+# Invalid: %( and %) don't match
+"abc%s%)%("
+# Invalid: %{%} and %(%) nesting
+"abc%{%(%}%)"
+# Invalid: %{%} and %(%) nesting
+"abc%(%{%)%}"
+# Invalid: flags after width
+"abc%*0g"
+# Valid: null precision
+"abc%.f"
+# Invalid: twice precision
+"abc%.4.2g"
+# Valid: three arguments
+"abc%d%u%u"
+# Valid only in msgstr: a numbered argument
+"abc%1$d"
+# Invalid: zero
+"abc%0$d"
+# Valid only in msgstr: two-digit numbered arguments
+"abc%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz"
+# Invalid: unterminated number
+"abc%1"
+# Invalid: flags before number
+"abc%+1$d"
+# Valid only in msgstr: three arguments, two with same number
+"abc%1$4x,%2$c,%1$u"
+# Invalid: argument with conflicting types
+"abc%1$4x,%2$c,%1$s"
+# Valid only in msgstr: no conflicting types
+"abc%2$t%1$d"
+# Invalid: argument with conflicting types
+"abc%1$t%2$d"
+# Valid only in msgstr: no conflict
+"abc%1$4x,%2$c,%1$u"
+# Invalid: mixing of numbered and unnumbered arguments
+"abc%d%2$x"
+# Valid only in msgstr: numbered argument with constant precision
+"abc%1$.9x"
+# Invalid: mixing of numbered and unnumbered arguments
+"abc%1$.*x"
+# Valid only in msgstr: missing non-final argument
+"abc%2$x%3$s"
+# Valid only in msgstr: permutation
+"abc%2$ddef%1$d"
+# Valid only in msgstr: multiple uses of same argument
+"abc%2$xdef%1$sghi%2$x"
+# Valid only in msgstr: one argument with width
+"abc%2$#*1$g"
+# Valid only in msgstr: one argument with width and precision
+"abc%3$*2$.*1$g"
+# Invalid: zero
+"abc%2$*0$.*1$g"
+EOF
+
+: ${XGETTEXT=xgettext}
+n=0
+while read comment; do
+  read string
+  n=`expr $n + 1`
+  cat <<EOF > f-oc-1-$n.in
+let a = f_ ${string}
+EOF
+  ${XGETTEXT} -L OCaml -o f-oc-1-$n.po f-oc-1-$n.in || Exit 1
+  test -f f-oc-1-$n.po || Exit 1
+  fail=
+  if echo "$comment" | grep 'Valid:' > /dev/null; then
+    if grep ocaml-format f-oc-1-$n.po > /dev/null; then
+      :
+    else
+      fail=yes
+    fi
+  else
+    if grep ocaml-format f-oc-1-$n.po > /dev/null; then
+      fail=yes
+    else
+      :
+    fi
+  fi
+  if test -n "$fail"; then
+    echo "Format string recognition error:" 1>&2
+    cat f-oc-1-$n.in 1>&2
+    echo "Got:" 1>&2
+    cat f-oc-1-$n.po 1>&2
+    Exit 1
+  fi
+  rm -f f-oc-1-$n.in f-oc-1-$n.po
+done < f-oc-1.data
+
+Exit 0
diff --git a/gettext-tools/tests/format-ocaml-2 b/gettext-tools/tests/format-ocaml-2

new file mode 100755 (executable)

index 0000000..b6d2204
--- /dev/null
+++ b/gettext-tools/tests/format-ocaml-2
@@ -0,0 +1,718 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test checking of OCaml format strings.
+
+cat <<\EOF > f-oc-2.data
+# Valid: %% doesn't count
+msgid  "abc%%def"
+msgstr "xyz"
+# Invalid: invalid msgstr
+msgid  "abc%%def"
+msgstr "xyz%"
+# Valid: same arguments
+msgid  "abc%s%gdef"
+msgstr "xyz%s%g"
+# Valid: same arguments, with different widths
+msgid  "abc%2sdef"
+msgstr "xyz%3s"
+# Valid: same arguments but in numbered syntax
+msgid  "abc%d"
+msgstr "xyz%1$d"
+# Valid: same arguments but in numbered syntax
+msgid  "abc%s%gdef"
+msgstr "xyz%1$s%2$g"
+# Valid: permutation
+msgid  "abc%s%g%cdef"
+msgstr "xyz%3$c%2$g%1$s"
+# Invalid: too few arguments
+msgid  "abc%sdef%u"
+msgstr "xyz%1$s"
+# Invalid: too few arguments
+msgid  "abc%sdef%u"
+msgstr "xyz%s"
+# Invalid: too many arguments
+msgid  "abc%udef"
+msgstr "xyz%uvw%c"
+# Valid: same numbered arguments, with different widths
+msgid  "abc%4s%5s"
+msgstr "xyz%2$4s%1$5s"
+# Invalid: missing final argument
+msgid  "abc%udef%s"
+msgstr "xyz%1$u"
+# Invalid: missing non-final argument
+msgid  "abc%sdef%u"
+msgstr "xyz%2$u"
+# Invalid: added argument
+msgid  "abc%udef"
+msgstr "xyz%1$uvw%2$c"
+# Valid: type compatibility
+msgid  "abc%d"
+msgstr "xyz%i"
+# Valid: type compatibility
+msgid  "abc%d"
+msgstr "xyz%u"
+# Valid: type compatibility
+msgid  "abc%d"
+msgstr "xyz%x"
+# Valid: type compatibility
+msgid  "abc%d"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid  "abc%d"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid  "abc%i"
+msgstr "xyz%u"
+# Valid: type compatibility
+msgid  "abc%i"
+msgstr "xyz%x"
+# Valid: type compatibility
+msgid  "abc%i"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid  "abc%i"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid  "abc%u"
+msgstr "xyz%x"
+# Valid: type compatibility
+msgid  "abc%u"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid  "abc%u"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid  "abc%x"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid  "abc%x"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid  "abc%X"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid  "abc%ld"
+msgstr "xyz%li"
+# Valid: type compatibility
+msgid  "abc%ld"
+msgstr "xyz%lu"
+# Valid: type compatibility
+msgid  "abc%ld"
+msgstr "xyz%lx"
+# Valid: type compatibility
+msgid  "abc%ld"
+msgstr "xyz%lX"
+# Valid: type compatibility
+msgid  "abc%ld"
+msgstr "xyz%lo"
+# Valid: type compatibility
+msgid  "abc%li"
+msgstr "xyz%lu"
+# Valid: type compatibility
+msgid  "abc%li"
+msgstr "xyz%lx"
+# Valid: type compatibility
+msgid  "abc%li"
+msgstr "xyz%lX"
+# Valid: type compatibility
+msgid  "abc%li"
+msgstr "xyz%lo"
+# Valid: type compatibility
+msgid  "abc%lu"
+msgstr "xyz%lx"
+# Valid: type compatibility
+msgid  "abc%lu"
+msgstr "xyz%lX"
+# Valid: type compatibility
+msgid  "abc%lu"
+msgstr "xyz%lo"
+# Valid: type compatibility
+msgid  "abc%lx"
+msgstr "xyz%lX"
+# Valid: type compatibility
+msgid  "abc%lx"
+msgstr "xyz%lo"
+# Valid: type compatibility
+msgid  "abc%lX"
+msgstr "xyz%lo"
+# Valid: type compatibility
+msgid  "abc%nd"
+msgstr "xyz%ni"
+# Valid: type compatibility
+msgid  "abc%nd"
+msgstr "xyz%nu"
+# Valid: type compatibility
+msgid  "abc%nd"
+msgstr "xyz%nx"
+# Valid: type compatibility
+msgid  "abc%nd"
+msgstr "xyz%nX"
+# Valid: type compatibility
+msgid  "abc%nd"
+msgstr "xyz%no"
+# Valid: type compatibility
+msgid  "abc%ni"
+msgstr "xyz%nu"
+# Valid: type compatibility
+msgid  "abc%ni"
+msgstr "xyz%nx"
+# Valid: type compatibility
+msgid  "abc%ni"
+msgstr "xyz%nX"
+# Valid: type compatibility
+msgid  "abc%ni"
+msgstr "xyz%no"
+# Valid: type compatibility
+msgid  "abc%nu"
+msgstr "xyz%nx"
+# Valid: type compatibility
+msgid  "abc%nu"
+msgstr "xyz%nX"
+# Valid: type compatibility
+msgid  "abc%nu"
+msgstr "xyz%no"
+# Valid: type compatibility
+msgid  "abc%nx"
+msgstr "xyz%nX"
+# Valid: type compatibility
+msgid  "abc%nx"
+msgstr "xyz%no"
+# Valid: type compatibility
+msgid  "abc%nX"
+msgstr "xyz%no"
+# Valid: type compatibility
+msgid  "abc%Ld"
+msgstr "xyz%Li"
+# Valid: type compatibility
+msgid  "abc%Ld"
+msgstr "xyz%Lu"
+# Valid: type compatibility
+msgid  "abc%Ld"
+msgstr "xyz%Lx"
+# Valid: type compatibility
+msgid  "abc%Ld"
+msgstr "xyz%LX"
+# Valid: type compatibility
+msgid  "abc%Ld"
+msgstr "xyz%Lo"
+# Valid: type compatibility
+msgid  "abc%Li"
+msgstr "xyz%Lu"
+# Valid: type compatibility
+msgid  "abc%Li"
+msgstr "xyz%Lx"
+# Valid: type compatibility
+msgid  "abc%Li"
+msgstr "xyz%LX"
+# Valid: type compatibility
+msgid  "abc%Li"
+msgstr "xyz%Lo"
+# Valid: type compatibility
+msgid  "abc%Lu"
+msgstr "xyz%Lx"
+# Valid: type compatibility
+msgid  "abc%Lu"
+msgstr "xyz%LX"
+# Valid: type compatibility
+msgid  "abc%Lu"
+msgstr "xyz%Lo"
+# Valid: type compatibility
+msgid  "abc%Lx"
+msgstr "xyz%LX"
+# Valid: type compatibility
+msgid  "abc%Lx"
+msgstr "xyz%Lo"
+# Valid: type compatibility
+msgid  "abc%LX"
+msgstr "xyz%Lo"
+# Valid: type compatibility
+msgid  "abc%f"
+msgstr "xyz%e"
+# Valid: type compatibility
+msgid  "abc%f"
+msgstr "xyz%E"
+# Valid: type compatibility
+msgid  "abc%f"
+msgstr "xyz%g"
+# Valid: type compatibility
+msgid  "abc%f"
+msgstr "xyz%G"
+# Valid: type compatibility
+msgid  "abc%f"
+msgstr "xyz%h"
+# Valid: type compatibility
+msgid  "abc%f"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%E"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%g"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%G"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%h"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid  "abc%E"
+msgstr "xyz%g"
+# Valid: type compatibility
+msgid  "abc%E"
+msgstr "xyz%G"
+# Valid: type compatibility
+msgid  "abc%E"
+msgstr "xyz%h"
+# Valid: type compatibility
+msgid  "abc%E"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid  "abc%g"
+msgstr "xyz%G"
+# Valid: type compatibility
+msgid  "abc%g"
+msgstr "xyz%h"
+# Valid: type compatibility
+msgid  "abc%g"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid  "abc%G"
+msgstr "xyz%h"
+# Valid: type compatibility
+msgid  "abc%G"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid  "abc%h"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid  "abc%{%s%}"
+msgstr "xyz%{%S%}"
+# Valid: type compatibility
+msgid  "abc%{%S%}"
+msgstr "xyz%{%s%}"
+# Valid: type compatibility
+msgid  "abc%{%c%}"
+msgstr "xyz%{%C%}"
+# Valid: type compatibility
+msgid  "abc%{%C%}"
+msgstr "xyz%{%c%}"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%ld"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%nd"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%Ld"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%nd"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%Ld"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%ld"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%Ld"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%nd"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%Ld"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%S"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid  "abc%S"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid  "abc%S"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid  "abc%S"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid  "abc%S"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid  "abc%S"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%S"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%S"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%S"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%C"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid  "abc%C"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid  "abc%C"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid  "abc%C"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%C"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%C"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%C"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%f"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid  "abc%f"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid  "abc%f"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%f"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%f"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%f"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%F"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid  "abc%F"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%F"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%F"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%F"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%B"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid  "abc%B"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%B"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%B"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%a"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid  "abc%a"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%a"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%t"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid  "abc%t"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility of format strings
+msgid  "abc%(hhh%s%i%)def"
+msgstr "xyz%(hhh%s%)%i"
+# Invalid: too many arguments
+msgid  "abc%{%s%}"
+msgstr "xyz%(%s%)"
+# Invalid: too few arguments
+msgid  "abc%(%s%)"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility for width
+msgid  "abc%g%*g"
+msgstr "xyz%*g%g"
+# Invalid: zero
+msgid  "abc"
+msgstr "xyz%0$d"
+# Valid: two-digit numbered arguments
+msgid  "abc%d%d%d%d%d%d%d%d%d%d%d"
+msgstr "xyz%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz"
+# Invalid: unterminated number
+msgid  "abc"
+msgstr "xyz%1"
+# Invalid: flags before number
+msgid  "abc%d"
+msgstr "xyz%+1$d"
+# Valid: three arguments, two with same number
+msgid  "abc%d%c"
+msgstr "xyz%1$4x,%2$c,%1$u"
+# Invalid: argument with conflicting types
+msgid  "abc%d%c"
+msgstr "xyz%1$4x,%2$c,%1$s"
+# Valid: no conflicting types
+msgid  "abc%d%t"
+msgstr "xyz%2$t%1$d"
+# Invalid: argument with conflicting types
+msgid  "abc%t%d"
+msgstr "xyz%1$t%2$d"
+# Valid: no conflict
+msgid  "abc%u%c%a"
+msgstr "xyz%1$4x,%2$c,%1$u,%3$a"
+# Invalid: mixing of numbered and unnumbered arguments
+msgid  "abc%d%x"
+msgstr "xyz%d%2$x"
+# Valid: numbered argument with constant precision
+msgid  "abc%.9x"
+msgstr "xyz%1$.9x"
+# Invalid: mixing of numbered and unnumbered arguments
+msgid  "abc%d%d"
+msgstr "xyz%1$.*x"
+# Invalid: missing non-final argument
+msgid  "abc%d%x%s"
+msgstr "xyz%2$x%3$s"
+# Valid: permutation
+msgid  "abc%d%d"
+msgstr "xyz%2$ddef%1$d"
+# Valid: multiple uses of same argument
+msgid  "abc%s%x"
+msgstr "xyz%2$xdef%1$sghi%2$x"
+# Valid: one argument with width
+msgid  "abc%i%g"
+msgstr "xyz%2$#*1$g"
+# Valid: one argument with width and precision
+msgid  "abc%i%i%g"
+msgstr "xyz%3$*2$.*1$g"
+# Invalid: zero
+msgid  "abc%i%g"
+msgstr "xyz%2$*0$.*1$g"
+EOF
+
+: ${MSGFMT=msgfmt}
+n=0
+while read comment; do
+  read msgid_line
+  read msgstr_line
+  n=`expr $n + 1`
+  cat <<EOF > f-oc-2-$n.po
+#, ocaml-format
+${msgid_line}
+${msgstr_line}
+EOF
+  fail=
+  if echo "$comment" | grep 'Valid:' > /dev/null; then
+    if ${MSGFMT} --check-format -o f-oc-2-$n.mo f-oc-2-$n.po; then
+      :
+    else
+      fail=yes
+    fi
+  else
+    ${MSGFMT} --check-format -o f-oc-2-$n.mo f-oc-2-$n.po 2> /dev/null
+    if test $? = 1; then
+      :
+    else
+      fail=yes
+    fi
+  fi
+  if test -n "$fail"; then
+    echo "Format string checking error:" 1>&2
+    cat f-oc-2-$n.po 1>&2
+    Exit 1
+  fi
+  rm -f f-oc-2-$n.po f-oc-2-$n.mo
+done < f-oc-2.data
+
+Exit 0
diff --git a/gettext-tools/tests/xgettext-ocaml-1 b/gettext-tools/tests/xgettext-ocaml-1

new file mode 100755 (executable)

index 0000000..a0ecd5d
--- /dev/null
+++ b/gettext-tools/tests/xgettext-ocaml-1
@@ -0,0 +1,102 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test OCaml support: Simple things.
+
+cat <<\EOF > xg-oc-1.ml
+  (* A one-line comment. *)
+  let test1 = s_ "Test String 1" in
+  (* Two one-line *)
+  (* comments *)
+  let test2 = (s_ "Test String 2") in
+  (* A multi-line
+     comment.  *)
+  let test3 = s_("Test String 3") in
+  (* A (* nesting *) (* one-line *) comment. *)
+  let test4 = s_ "Test String 4" in
+  (* A (*
+     nesting
+     *)
+     (*
+     multi-line *)
+     comment. *)
+  let test5 = s_ "Test String 5" in
+  (*
+    s_("Not extracted");
+  *)
+  let dummy = 0 in
+  (* OCaml has string literal concatenation. *)
+  let test6 = s_("Test " ^
+  "String "
+  ^ "6") in
+  (* Empty string. *)
+  let test7 = s_ "" in
+
+  (* sprintf expects a format string. *)
+  sprintf (f_ "weight %u") w
+
+  print_string(s_ "Test string 11 %s")
+  let n = 2 in
+  let s = "2" in
+  print_string(sprintf(fn_ "%0s piece of cake" "%s pieces of cake" n) s)
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -c -d xg-oc-1.tmp xg-oc-1.ml || Exit 1
+LC_ALL=C tr -d '\r' < xg-oc-1.tmp.po > xg-oc-1.po || Exit 1
+
+cat <<\EOF > xg-oc-1.ok
+#. A one-line comment.
+msgid "Test String 1"
+msgstr ""
+
+#. Two one-line
+#. comments
+msgid "Test String 2"
+msgstr ""
+
+#. A multi-line
+#. comment.
+msgid "Test String 3"
+msgstr ""
+
+#. A (* nesting *) (* one-line *) comment.
+msgid "Test String 4"
+msgstr ""
+
+#. A (*
+#. nesting
+#. *)
+#. (*
+#. multi-line *)
+#. comment.
+msgid "Test String 5"
+msgstr ""
+
+#. OCaml has string literal concatenation.
+msgid "Test String 6"
+msgstr ""
+
+#. Empty string.
+msgid ""
+msgstr ""
+
+#. sprintf expects a format string.
+#, ocaml-format
+msgid "weight %u"
+msgstr ""
+
+msgid "Test string 11 %s"
+msgstr ""
+
+#, ocaml-format
+msgid "%0s piece of cake"
+msgid_plural "%s pieces of cake"
+msgstr[0] ""
+msgstr[1] ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-oc-1.ok xg-oc-1.po || Exit 1
+
+exit 0
diff --git a/gettext-tools/tests/xgettext-ocaml-2 b/gettext-tools/tests/xgettext-ocaml-2

new file mode 100755 (executable)

index 0000000..30f9d9b
--- /dev/null
+++ b/gettext-tools/tests/xgettext-ocaml-2
@@ -0,0 +1,58 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test OCaml support: String literals.
+
+cat <<\EOF > xg-oc-2.ml
+let s1 = s_ {abracadabra|Test string 1 abc\ndef\tghi\\jkl\
+   mno\u{20AC}pqr|xyz|abracadabra} in
+let s2 = s_ "Test string 2 abc\ndef\tghi\\jkl\
+   mno\u{20AC}pqr\ stu\'vwx" in
+let s3 = s_ ("Test string 3" ^ " abc" ^ "def") in
+let s4 = s_ "Test string 4 \0377\064\x404\o1006\u{1F603}" in
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --no-location -d xg-oc-2.tmp xg-oc-2.ml || Exit 1
+func_filter_POT_Creation_Date xg-oc-2.tmp.po xg-oc-2.pot
+
+cat <<\EOF > xg-oc-2.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid ""
+"Test string 1 abc\\ndef\\tghi\\\\jkl\\\n"
+"   mno\\u{20AC}pqr|xyz"
+msgstr ""
+
+msgid ""
+"Test string 2 abc\n"
+"def\tghi\\jklmno€pqr stu'vwx"
+msgstr ""
+
+msgid "Test string 3 abcdef"
+msgstr ""
+
+msgid "Test string 4 %7@@4@6😃"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-oc-2.ok xg-oc-2.pot || Exit 1
+
+exit 0
author	Bruno Haible <bruno@clisp.org>
	Sun, 27 Jul 2025 22:07:10 +0000 (00:07 +0200)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 27 Jul 2025 22:09:35 +0000 (00:09 +0200)
.gitignore		patch \| blob \| blame \| history
NEWS		patch \| blob \| blame \| history
autopull.sh		patch \| blob \| blame \| history
gettext-tools/Makefile.am		patch \| blob \| blame \| history
gettext-tools/build-aux/tree-sitter-ocaml-portability.diff	[new file with mode: 0644]	patch \| blob
gettext-tools/configure.ac		patch \| blob \| blame \| history
gettext-tools/doc/gettext.texi		patch \| blob \| blame \| history
gettext-tools/doc/lang-ocaml.texi		patch \| blob \| blame \| history
gettext-tools/doc/xgettext.texi		patch \| blob \| blame \| history
gettext-tools/libgettextpo/Makefile.am		patch \| blob \| blame \| history
gettext-tools/po/POTFILES.in		patch \| blob \| blame \| history
gettext-tools/src/FILES		patch \| blob \| blame \| history
gettext-tools/src/Makefile.am		patch \| blob \| blame \| history
gettext-tools/src/format-ocaml.c	[new file with mode: 0644]	patch \| blob
gettext-tools/src/format.c		patch \| blob \| blame \| history
gettext-tools/src/format.h		patch \| blob \| blame \| history
gettext-tools/src/message.c		patch \| blob \| blame \| history
gettext-tools/src/message.h		patch \| blob \| blame \| history
gettext-tools/src/x-ocaml.c		patch \| blob \| blame \| history
gettext-tools/src/x-ocaml.h		patch \| blob \| blame \| history
gettext-tools/src/xgettext.c		patch \| blob \| blame \| history
gettext-tools/tests/Makefile.am		patch \| blob \| blame \| history
gettext-tools/tests/format-ocaml-1	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/format-ocaml-2	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-ocaml-1	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-ocaml-2	[new file with mode: 0755]	patch \| blob