/gettext-tools/src/libxgettextts2.a
/gettext-tools/src/libxgettextts3.a
/gettext-tools/src/libxgettextts4.a
+/gettext-tools/src/libxgettextts5.a
/gettext-tools/src/libxgettextx.a
/gettext-tools/src/msgfmt.net.exe
/gettext-tools/src/msgfmt.net.exe.mdb
+Version 0.27 - September 2025
+
+# Programming languages support:
+ * OCaml:
+ - xgettext now supports OCaml.
+ - 'msgfmt -c' now verifies the syntax of translations of OCaml format
+ strings.
+
Version 0.26 - July 2025
# Programming languages support:
TREE_SITTER_GO_VERSION=0.23.4
TREE_SITTER_RUST_VERSION=0.23.2
TREE_SITTER_TYPESCRIPT_VERSION=0.23.2
+TREE_SITTER_OCAML_VERSION=0.23.2
TREE_SITTER_D_VERSION=0.8.2
# Cache the relevant source code. Erase the rest of the tree-sitter projects.
test -d gettext-tools/tree-sitter-$TREE_SITTER_VERSION || {
mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/scanner.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/tsx-scanner.c
rm -rf tree-sitter-typescript
}
+test -d gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION || {
+ func_git_clone_shallow tree-sitter-ocaml https://github.com/tree-sitter/tree-sitter-ocaml.git v$TREE_SITTER_OCAML_VERSION
+ (cd tree-sitter-ocaml && patch -p1) < gettext-tools/build-aux/tree-sitter-ocaml-portability.diff
+ mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION
+ mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/common
+ mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars
+ mkdir gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml
+ mv tree-sitter-ocaml/LICENSE gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/LICENSE
+ mv tree-sitter-ocaml/common/scanner.h gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/common/scanner.h
+ mv tree-sitter-ocaml/grammars/ocaml/src gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src
+ mv gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/parser.c gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/ocaml-parser.c
+ mv gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/scanner.c gettext-tools/tree-sitter-ocaml-$TREE_SITTER_OCAML_VERSION/grammars/ocaml/src/ocaml-scanner.c
+ rm -rf tree-sitter-ocaml
+}
test -d gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION || {
func_git_clone_shallow tree-sitter-d https://github.com/gdamore/tree-sitter-d.git v$TREE_SITTER_D_VERSION
(cd tree-sitter-d && patch -p1) < gettext-tools/build-aux/tree-sitter-d-portability.diff
TREE_SITTER_GO_VERSION=$TREE_SITTER_GO_VERSION
TREE_SITTER_RUST_VERSION=$TREE_SITTER_RUST_VERSION
TREE_SITTER_TYPESCRIPT_VERSION=$TREE_SITTER_TYPESCRIPT_VERSION
+TREE_SITTER_OCAML_VERSION=$TREE_SITTER_OCAML_VERSION
TREE_SITTER_D_VERSION=$TREE_SITTER_D_VERSION
EOF
tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/alloc.h \
tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/array.h \
tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/parser.h \
+ build-aux/tree-sitter-ocaml-portability.diff \
+ tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/LICENSE \
+ tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/common/scanner.h \
+ tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-parser.c \
+ tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-scanner.c \
+ tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/tree_sitter/alloc.h \
+ tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/tree_sitter/array.h \
+ tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/tree_sitter/parser.h \
build-aux/tree-sitter-d-portability.diff \
tree-sitter-d-$(TREE_SITTER_D_VERSION)/LICENSE \
tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/d-parser.c \
--- /dev/null
+diff --git a/grammars/ocaml/src/parser.c b/grammars/ocaml/src/parser.c
+index 653700c..3ddc246 100644
+--- a/grammars/ocaml/src/parser.c
++++ b/grammars/ocaml/src/parser.c
+@@ -1103727,8 +1103727,10 @@ void tree_sitter_ocaml_external_scanner_deserialize(void *, const char *, unsign
+ #define TS_PUBLIC
+ #elif defined(_WIN32)
+ #define TS_PUBLIC __declspec(dllexport)
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define TS_PUBLIC __attribute__((visibility("default")))
++#else
++#define TS_PUBLIC
+ #endif
+
+ TS_PUBLIC const TSLanguage *tree_sitter_ocaml(void) {
+diff --git a/grammars/ocaml/src/tree_sitter/parser.h b/grammars/ocaml/src/tree_sitter/parser.h
+index 799f599..130b4d0 100644
+--- a/grammars/ocaml/src/tree_sitter/parser.h
++++ b/grammars/ocaml/src/tree_sitter/parser.h
+@@ -155,8 +155,10 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
+
+ #ifdef _MSC_VER
+ #define UNUSED __pragma(warning(suppress : 4101))
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define UNUSED __attribute__((unused))
++#else
++#define UNUSED
+ #endif
+
+ #define START_LEXER() \
AC_SUBST([TREE_SITTER_GO_VERSION])
AC_SUBST([TREE_SITTER_RUST_VERSION])
AC_SUBST([TREE_SITTER_TYPESCRIPT_VERSION])
+AC_SUBST([TREE_SITTER_OCAML_VERSION])
AC_SUBST([TREE_SITTER_D_VERSION])
dnl Determine the directory name to use for version-specific data files.
* object-pascal-format:: Object Pascal Format Strings
* modula2-format:: Modula-2 Format Strings
* d-format:: D Format Strings
+* ocaml-format:: OCaml Format Strings
* smalltalk-format:: Smalltalk Format Strings
* qt-format:: Qt Format Strings
* qt-plural-format:: Qt Plural Format Strings
@kwindex no-d-format@r{ flag}
Likewise for D, see @ref{d-format}.
+@item ocaml-format
+@kwindex ocaml-format@r{ flag}
+@itemx no-ocaml-format
+@kwindex no-ocaml-format@r{ flag}
+Likewise for OCaml, see @ref{ocaml-format}.
+
@item smalltalk-format
@kwindex smalltalk-format@r{ flag}
@itemx no-smalltalk-format
* object-pascal-format:: Object Pascal Format Strings
* modula2-format:: Modula-2 Format Strings
* d-format:: D Format Strings
+* ocaml-format:: OCaml Format Strings
* smalltalk-format:: Smalltalk Format Strings
* qt-format:: Qt Format Strings
* qt-plural-format:: Qt Plural Format Strings
in the documentation of the D module @code{std.format},
at @uref{https://dlang.org/library/std/format.html}.
+@node ocaml-format
+@subsection OCaml Format Strings
+
+OCaml format strings are described
+in the documentation of the @code{Printf} module,
+at @uref{https://ocaml.org/manual/5.3/api/Printf.html#VALfprintf}.
+In translated strings (@code{msgstr} values),
+arguments can be specified through a number,
+using the syntax @samp{%@var{m}$} instead of @samp{%}
+and @samp{*@var{m}$} instead of @samp{*}
+to designate the argument number @var{m}.
+
@node smalltalk-format
@subsection Smalltalk Format Strings
@code{(s_ "abc")} or, for format strings, @code{(f_ "abc")}
@item gettext/ngettext functions
-@code{s_}, @code{f_}, @code{sn_}, @code{fn_}
+@code{s_}, @code{f_}, @code{sn_}, @code{fn_}, and also
+@code{gettext}, @code{fgettext},
+@code{dgettext}, @code{fdgettext},
+@code{dcgettext}, @code{fdcgettext},
+@code{ngettext}, @code{fngettext},
+@code{dngettext}, @code{fdngettext},
+@code{dcngettext}, @code{fdcngettext}.
@item textdomain
@code{textdomain} field in first parameter of @code{Gettext.Program}
@code{xgettext}
@item Formatting with positions
----
+@code{Printf.fprintf "%2$d %1$d"}
@item Portability
fully portable
Lua,
Modula-2,
D,
+OCaml,
Vala,
Tcl,
Perl,
Lua,
Modula-2,
D,
+OCaml,
Vala,
Tcl,
Perl,
Lua,
Modula-2,
D,
+OCaml,
Vala,
Tcl,
Perl,
../src/format-pascal.c \
../src/format-modula2.c \
../src/format-d.c \
+ ../src/format-ocaml.c \
../src/format-smalltalk.c \
../src/format-qt.c \
../src/format-qt-plural.c \
src/format-lisp.c
src/format-lua.c
src/format-modula2.c
+src/format-ocaml.c
src/format-pascal.c
src/format-perl-brace.c
src/format-perl.c
format-pascal.c Format string handling for Object Pascal.
format-modula2.c Format string handling for Modula-2.
format-d.c Format string handling for D.
+format-ocaml.c Format string handling for OCaml.
format-smalltalk.c Format string handling for Smalltalk and YCP.
format-qt.c Format string handling for Qt.
format-qt-plural.c Format string handling for Qt plural forms.
format-pascal.c \
format-modula2.c \
format-d.c \
+ format-ocaml.c \
format-smalltalk.c \
format-qt.c \
format-qt-plural.c \
search-path.c
# xgettext has some tree-sitter based backends.
-LIBXGETTEXTTS = libxgettextts2.a libxgettextts3.a libxgettextts4.a libxgettextts1.a
+LIBXGETTEXTTS = libxgettextts2.a libxgettextts3.a libxgettextts4.a libxgettextts5.a libxgettextts1.a
libxgettextts1_a_SOURCES = \
../tree-sitter-$(TREE_SITTER_VERSION)/lib/src/lib.c
libxgettextts1_a_CPPFLAGS = \
-I$(top_srcdir)/tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src \
-I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include
libxgettextts4_a_CFLAGS = $(AM_CFLAGS) @FOREIGN_WARN_CFLAGS@ @NO_EXPENSIVE_WARN_CFLAGS@
+libxgettextts5_a_SOURCES = \
+ ../tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-parser.c \
+ ../tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src/ocaml-scanner.c
+libxgettextts5_a_CPPFLAGS = \
+ -I$(top_srcdir)/tree-sitter-ocaml-$(TREE_SITTER_OCAML_VERSION)/grammars/ocaml/src \
+ -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include
+libxgettextts5_a_CFLAGS = $(AM_CFLAGS) @FOREIGN_WARN_CFLAGS@ @NO_EXPENSIVE_WARN_CFLAGS@
# msggrep needs pattern matching.
LIBGREP = ../libgrep/libgrep.a ../libgrep/gnulib-lib/libgnu.a
--- /dev/null
+/* OCaml format strings.
+ Copyright (C) 2001-2025 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2025.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "format.h"
+#include "gettext.h"
+#include "xalloc.h"
+#include "format-invalid.h"
+#include "c-ctype.h"
+#include "xvasprintf.h"
+
+#define _(str) gettext (str)
+
+/* The OCaml format strings are described in the OCaml reference manual,
+ at https://ocaml.org/manual/5.3/api/Printf.html#VALfprintf .
+ They are implemented in ocaml-5.3.0/stdlib/scanf.ml.
+
+ A directive
+ - starts with '%',
+ - [in msgstr only] is optionally followed by
+ a positive integer m, then '$'
+ - is optionally followed by a sequence of flags, each being one of
+ '+', '-', ' ', '0', '#',
+ - is optionally followed by a width specification:
+ a positive integer, or
+ '*', or
+ [in msgstr only] '*', then a positive integer, then '$',
+ - is optionally followed by a precision specification:
+ '.' then optionally:
+ a positive integer, or
+ '*', or
+ [in msgstr only] '*', then a positive integer, then '$',
+ - is finished by a specifier
+ - 'd', 'i', 'u', 'x', 'X', 'o', that need an integer argument,
+ - 'l' then 'd', 'i', 'u', 'x', 'X', 'o', that need an int32 argument,
+ - 'n' then 'd', 'i', 'u', 'x', 'X', 'o', that need an nativeint argument,
+ - 'L' then 'd', 'i', 'u', 'x', 'X', 'o', that need an int64 argument,
+ - 's', that needs a string argument,
+ - 'S', that needs a string argument and outputs it in OCaml syntax,
+ - 'c', that needs a character argument,
+ - 'C', that needs a character argument and outputs it in OCaml syntax,
+ - 'f', 'e', 'E', 'g', 'G', 'h', 'H', that need a floating-point argument,
+ - 'F', that needs a floating-point argument and outputs it in OCaml syntax,
+ - 'B', that needs a boolean argument,
+ - 'a', that takes a function (of type : out_channel -> unit) argument,
+ - 't', that takes two arguments: a function (of type : out_channel -> <T> -> unit)
+ and a <T>,
+ - '{' FMT '%}', that takes a format string argument without msgstr
+ extensions, expected to have the same signature as FMT, effectively
+ ignores it, and instead outputs the minimal format string with the
+ same signature as FMT: a concatenation of
+ - "%i" for an integer argument,
+ - "%li" for an int32 argument,
+ - "%ni" for a nativeint argument,
+ - "%Li" for an int64 argument,
+ - "%s" for a string argument,
+ - "%c" for a character argument,
+ - "%f" for a floating-point argument,
+ - "%B" for a boolean argument,
+ - "%a" for a function argument,
+ - "%t" for two arguments, as described above,
+ - '(' FMT '%)', that takes a format string argument without msgstr
+ extensions, expected to have the same signature as FMT, and a set
+ of arguments suitable for FMT,
+ - '!', '%', '@', ',', that take no argument.
+ Numbered ('%m$' or '*m$') and unnumbered argument specifications cannot
+ be used in the same string.
+ */
+
+enum format_arg_type
+{
+ FAT_NONE = 0,
+ /* Basic types */
+ FAT_INTEGER = 1,
+ FAT_INT32 = 2,
+ FAT_NATIVEINT = 3,
+ FAT_INT64 = 4,
+ FAT_STRING = 5,
+ FAT_CHARACTER = 6,
+ FAT_FLOATINGPOINT = 7,
+ FAT_BOOLEAN = 8,
+ FAT_FUNCTION_A = 9,
+ FAT_FUNCTION_T = 10, /* first argument for %t */
+ FAT_FUNCTION_T2 = 11, /* second argument for %t */
+ FAT_FORMAT_STRING = 12,
+ /* Flags */
+ FAT_OCAML_SYNTAX = 1 << 4,
+ FAT_OPTIONAL_OCAML_SYNTAX = 1 << 5,
+ /* Bitmasks */
+ FAT_BASIC_MASK = (FAT_INTEGER | FAT_INT32 | FAT_NATIVEINT | FAT_INT64
+ | FAT_STRING | FAT_CHARACTER | FAT_FLOATINGPOINT
+ | FAT_BOOLEAN | FAT_FUNCTION_A | FAT_FUNCTION_T
+ | FAT_FUNCTION_T2 | FAT_FORMAT_STRING)
+};
+#ifdef __cplusplus
+typedef int format_arg_type_t;
+#else
+typedef enum format_arg_type format_arg_type_t;
+#endif
+
+struct numbered_arg
+{
+ size_t number;
+ format_arg_type_t type;
+ char *signature; /* for type == FAT_FORMAT_STRING */
+};
+
+struct spec
+{
+ size_t directives;
+ size_t numbered_arg_count;
+ struct numbered_arg *numbered;
+};
+
+
+static int
+numbered_arg_compare (const void *p1, const void *p2)
+{
+ size_t n1 = ((const struct numbered_arg *) p1)->number;
+ size_t n2 = ((const struct numbered_arg *) p2)->number;
+
+ return (n1 > n2 ? 1 : n1 < n2 ? -1 : 0);
+}
+
+/* Frees the memory held by *spec. */
+static void
+destroy_spec (struct spec *spec)
+{
+ if (spec->numbered != NULL)
+ {
+ size_t i;
+ for (i = spec->numbered_arg_count; i > 0; )
+ {
+ --i;
+ if (spec->numbered[i].type == FAT_FORMAT_STRING)
+ free (spec->numbered[i].signature);
+ }
+ free (spec->numbered);
+ }
+}
+
+/* Returns the signature of a format string
+ as a freshly allocated string. */
+static char *
+format_string_signature (const struct spec *spec)
+{
+ size_t len;
+ {
+ size_t i;
+ const struct numbered_arg *p;
+ len = spec->numbered_arg_count;
+ for (i = 0, p = spec->numbered; i < spec->numbered_arg_count; i++, p++)
+ if ((p->type & FAT_BASIC_MASK) == FAT_FORMAT_STRING)
+ len += strlen (p->signature) + 1;
+ }
+ char *signature = (char *) xmalloc (len + 1);
+ {
+ size_t i;
+ const struct numbered_arg *p;
+ char *s;
+ for (i = 0, p = spec->numbered, s = signature;
+ i < spec->numbered_arg_count;
+ i++, p++)
+ switch (p->type & FAT_BASIC_MASK)
+ {
+ case FAT_INTEGER:
+ *s++ = 'i';
+ break;
+ case FAT_INT32:
+ *s++ = 'l';
+ break;
+ case FAT_NATIVEINT:
+ *s++ = 'n';
+ break;
+ case FAT_INT64:
+ *s++ = 'L';
+ break;
+ case FAT_STRING:
+ *s++ = 's';
+ break;
+ case FAT_CHARACTER:
+ *s++ = 'c';
+ break;
+ case FAT_FLOATINGPOINT:
+ *s++ = 'f';
+ break;
+ case FAT_BOOLEAN:
+ *s++ = 'B';
+ break;
+ case FAT_FUNCTION_A:
+ *s++ = 'a';
+ break;
+ case FAT_FUNCTION_T:
+ *s++ = 't';
+ break;
+ case FAT_FUNCTION_T2:
+ break;
+ case FAT_FORMAT_STRING:
+ *s++ = '(';
+ memcpy (s, p->signature, strlen (p->signature));
+ s += strlen (p->signature);
+ *s++ = ')';
+ break;
+ default:
+ abort ();
+ }
+ *s = '\0';
+ }
+ return signature;
+}
+
+/* When a type is specified via format string substitution, e.g. "%(%s%)", both
+ the variant without OCaml syntax "%s" and the variant with OCaml syntax "%S"
+ are allowed. */
+static format_arg_type_t
+type_without_translator_constraint (format_arg_type_t type)
+{
+ switch (type & FAT_BASIC_MASK)
+ {
+ case FAT_STRING:
+ case FAT_CHARACTER:
+ case FAT_FLOATINGPOINT:
+ return (type & FAT_BASIC_MASK) | FAT_OPTIONAL_OCAML_SYNTAX;
+ default:
+ return type;
+ }
+}
+
+/* Parse a piece of format string, until the matching terminating format
+ directive is encountered.
+ spec is the global struct spec.
+ format is the remainder of the format string.
+ It is updated upon valid return.
+ terminator is '\0' at the top-level, otherwise '}' or ')'.
+ translated is true when msgstr extensions should be accepted.
+ fdi is an array to be filled with format directive indicators, or NULL.
+ If the format string is invalid, false is returned and *invalid_reason is
+ set to an error message explaining why. */
+static bool
+parse_upto (struct spec *spec,
+ const char **formatp,
+ char terminator, bool translated,
+ char *fdi, char **invalid_reason)
+{
+ const char *format = *formatp;
+ const char *const format_start = format;
+ size_t numbered_allocated;
+ size_t unnumbered_arg_count;
+
+ spec->directives = 0;
+ spec->numbered_arg_count = 0;
+ spec->numbered = NULL;
+ numbered_allocated = 0;
+ unnumbered_arg_count = 0;
+
+ for (; *format != '\0';)
+ /* Invariant: spec->numbered_arg_count == 0 || unnumbered_arg_count == 0. */
+ if (*format++ == '%')
+ {
+ /* A directive. */
+ size_t number = 0;
+ format_arg_type_t type;
+ char *signature = NULL;
+
+ FDI_SET (format - 1, FMTDIR_START);
+ spec->directives++;
+
+ if (translated && c_isdigit (*format))
+ {
+ const char *f = format;
+ size_t m = 0;
+
+ do
+ {
+ m = 10 * m + (*f - '0');
+ f++;
+ }
+ while (c_isdigit (*f));
+
+ if (*f == '$')
+ {
+ if (m == 0)
+ {
+ *invalid_reason = INVALID_ARGNO_0 (spec->directives);
+ FDI_SET (f, FMTDIR_ERROR);
+ goto bad_format;
+ }
+ number = m;
+ format = ++f;
+ }
+ }
+
+ /* Parse flags. */
+ while (*format == ' ' || *format == '+' || *format == '-'
+ || *format == '#' || *format == '0')
+ format++;
+
+ /* Parse width. */
+ if (*format == '*')
+ {
+ size_t width_number = 0;
+
+ format++;
+
+ if (translated && c_isdigit (*format))
+ {
+ const char *f = format;
+ size_t m = 0;
+
+ do
+ {
+ m = 10 * m + (*f - '0');
+ f++;
+ }
+ while (c_isdigit (*f));
+
+ if (*f == '$')
+ {
+ if (m == 0)
+ {
+ *invalid_reason =
+ INVALID_WIDTH_ARGNO_0 (spec->directives);
+ FDI_SET (f, FMTDIR_ERROR);
+ goto bad_format;
+ }
+ width_number = m;
+ format = ++f;
+ }
+ }
+
+ if (width_number)
+ {
+ /* Numbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (unnumbered_arg_count > 0)
+ {
+ *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ goto bad_format;
+ }
+
+ if (numbered_allocated == spec->numbered_arg_count)
+ {
+ numbered_allocated = 2 * numbered_allocated + 1;
+ spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+ }
+ spec->numbered[spec->numbered_arg_count].number = width_number;
+ spec->numbered[spec->numbered_arg_count].type = FAT_INTEGER;
+ spec->numbered_arg_count++;
+ }
+ else
+ {
+ /* Unnumbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (spec->numbered_arg_count > 0)
+ {
+ *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ goto bad_format;
+ }
+
+ if (numbered_allocated == unnumbered_arg_count)
+ {
+ numbered_allocated = 2 * numbered_allocated + 1;
+ spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+ }
+ spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+ spec->numbered[unnumbered_arg_count].type = FAT_INTEGER;
+ unnumbered_arg_count++;
+ }
+ }
+ else if (c_isdigit (*format))
+ {
+ do format++; while (c_isdigit (*format));
+ }
+
+ /* Parse precision. */
+ if (*format == '.')
+ {
+ format++;
+
+ if (*format == '*')
+ {
+ size_t precision_number = 0;
+
+ format++;
+
+ if (translated && c_isdigit (*format))
+ {
+ const char *f = format;
+ size_t m = 0;
+
+ do
+ {
+ m = 10 * m + (*f - '0');
+ f++;
+ }
+ while (c_isdigit (*f));
+
+ if (*f == '$')
+ {
+ if (m == 0)
+ {
+ *invalid_reason =
+ INVALID_PRECISION_ARGNO_0 (spec->directives);
+ FDI_SET (f, FMTDIR_ERROR);
+ goto bad_format;
+ }
+ precision_number = m;
+ format = ++f;
+ }
+ }
+
+ if (precision_number)
+ {
+ /* Numbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (unnumbered_arg_count > 0)
+ {
+ *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ goto bad_format;
+ }
+
+ if (numbered_allocated == spec->numbered_arg_count)
+ {
+ numbered_allocated = 2 * numbered_allocated + 1;
+ spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+ }
+ spec->numbered[spec->numbered_arg_count].number = precision_number;
+ spec->numbered[spec->numbered_arg_count].type = FAT_INTEGER;
+ spec->numbered_arg_count++;
+ }
+ else
+ {
+ /* Unnumbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (spec->numbered_arg_count > 0)
+ {
+ *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ goto bad_format;
+ }
+
+ if (numbered_allocated == unnumbered_arg_count)
+ {
+ numbered_allocated = 2 * numbered_allocated + 1;
+ spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+ }
+ spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+ spec->numbered[unnumbered_arg_count].type = FAT_INTEGER;
+ unnumbered_arg_count++;
+ }
+ }
+ else if (c_isdigit (*format))
+ {
+ do format++; while (c_isdigit (*format));
+ }
+ }
+
+ /* Parse the specifier. */
+ enum format_arg_type integer_type = FAT_INTEGER;
+ if (*format == 'l')
+ {
+ integer_type = FAT_INT32;
+ format++;
+ }
+ else if (*format == 'n')
+ {
+ integer_type = FAT_NATIVEINT;
+ format++;
+ }
+ else if (*format == 'L')
+ {
+ integer_type = FAT_INT64;
+ format++;
+ }
+
+ switch (*format)
+ {
+ case 'd':
+ case 'i':
+ case 'u':
+ case 'x': case 'X':
+ case 'o':
+ type = integer_type;
+ break;
+ default:
+ if (integer_type != FAT_INTEGER)
+ --format;
+ switch (*format)
+ {
+ case 's':
+ type = FAT_STRING;
+ break;
+ case 'S':
+ type = FAT_STRING | FAT_OCAML_SYNTAX;
+ break;
+ case 'c':
+ type = FAT_CHARACTER;
+ break;
+ case 'C':
+ type = FAT_CHARACTER | FAT_OCAML_SYNTAX;
+ break;
+ case 'f':
+ case 'e': case 'E':
+ case 'g': case 'G':
+ case 'h': case 'H':
+ type = FAT_FLOATINGPOINT;
+ break;
+ case 'F':
+ type = FAT_FLOATINGPOINT | FAT_OCAML_SYNTAX;
+ break;
+ case 'B':
+ type = FAT_BOOLEAN;
+ break;
+ case 'a':
+ type = FAT_FUNCTION_A;
+ break;
+ case 't':
+ type = FAT_FUNCTION_T;
+ break;
+ case '{':
+ {
+ struct spec sub_spec;
+ *formatp = format;
+ if (!parse_upto (&sub_spec, formatp, '}', false,
+ fdi, invalid_reason))
+ {
+ FDI_SET (**formatp == '\0' ? *formatp - 1 : *formatp,
+ FMTDIR_ERROR);
+ goto bad_format;
+ }
+ format = *formatp;
+ type = FAT_FORMAT_STRING;
+ signature = format_string_signature (&sub_spec);
+ destroy_spec (&sub_spec);
+ }
+ break;
+ case '}':
+ if (terminator != '}')
+ {
+ *invalid_reason =
+ xasprintf (_("Found '%s' without matching '%s'."), "%}", "%{");
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ goto bad_format;
+ }
+ spec->directives--;
+ goto done;
+ case '(':
+ {
+ struct spec sub_spec;
+ *formatp = format;
+ if (!parse_upto (&sub_spec, formatp, ')', false,
+ fdi, invalid_reason))
+ {
+ FDI_SET (**formatp == '\0' ? *formatp - 1 : *formatp,
+ FMTDIR_ERROR);
+ goto bad_format;
+ }
+ format = *formatp;
+ type = FAT_FORMAT_STRING;
+ signature = format_string_signature (&sub_spec);
+
+ if (number)
+ {
+ /* Numbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (unnumbered_arg_count > 0)
+ {
+ *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+ FDI_SET (format, FMTDIR_ERROR);
+ goto bad_format;
+ }
+
+ size_t new_numbered_arg_count =
+ spec->numbered_arg_count + 1 + sub_spec.numbered_arg_count;
+ if (numbered_allocated < new_numbered_arg_count)
+ {
+ numbered_allocated = 2 * numbered_allocated + 1;
+ if (numbered_allocated < new_numbered_arg_count)
+ numbered_allocated = new_numbered_arg_count;
+ spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+ }
+ spec->numbered[spec->numbered_arg_count].number = number;
+ spec->numbered[spec->numbered_arg_count].type = type;
+ spec->numbered[spec->numbered_arg_count].signature = signature;
+ spec->numbered_arg_count++;
+ for (size_t i = 0; i < sub_spec.numbered_arg_count; i++)
+ {
+ spec->numbered[spec->numbered_arg_count].number = number + sub_spec.numbered[i].number;
+ spec->numbered[spec->numbered_arg_count].type =
+ type_without_translator_constraint (sub_spec.numbered[i].type);
+ if (sub_spec.numbered[i].type == FAT_FORMAT_STRING)
+ spec->numbered[spec->numbered_arg_count].signature = sub_spec.numbered[i].signature;
+ spec->numbered_arg_count++;
+ }
+ }
+ else
+ {
+ /* Unnumbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (spec->numbered_arg_count > 0)
+ {
+ *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+ FDI_SET (format, FMTDIR_ERROR);
+ goto bad_format;
+ }
+
+ size_t new_unnumbered_arg_count =
+ unnumbered_arg_count + 1 + sub_spec.numbered_arg_count;
+ if (numbered_allocated < new_unnumbered_arg_count)
+ {
+ numbered_allocated = 2 * numbered_allocated + 1;
+ if (numbered_allocated < new_unnumbered_arg_count)
+ numbered_allocated = new_unnumbered_arg_count;
+ spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+ }
+ spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+ spec->numbered[unnumbered_arg_count].type = type;
+ spec->numbered[unnumbered_arg_count].signature = signature;
+ unnumbered_arg_count++;
+ for (size_t i = 0; i < sub_spec.numbered_arg_count; i++)
+ {
+ spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+ spec->numbered[unnumbered_arg_count].type =
+ type_without_translator_constraint (sub_spec.numbered[i].type);
+ if (sub_spec.numbered[i].type == FAT_FORMAT_STRING)
+ spec->numbered[unnumbered_arg_count].signature = sub_spec.numbered[i].signature;
+ unnumbered_arg_count++;
+ }
+ }
+
+ free (sub_spec.numbered);
+ }
+ goto done_specifier;
+ case ')':
+ if (terminator != ')')
+ {
+ *invalid_reason =
+ xasprintf (_("Found '%s' without matching '%s'."), "%)", "%(");
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ goto bad_format;
+ }
+ spec->directives--;
+ goto done;
+ case '!':
+ case '%':
+ case '@':
+ case ',':
+ type = FAT_NONE;
+ break;
+ default:
+ if (*format == '\0')
+ {
+ *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ }
+ else
+ {
+ *invalid_reason =
+ INVALID_CONVERSION_SPECIFIER (spec->directives, *format);
+ FDI_SET (format, FMTDIR_ERROR);
+ }
+ goto bad_format;
+ }
+ break;
+ }
+
+ if (type != FAT_NONE)
+ {
+ if (number)
+ {
+ /* Numbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (unnumbered_arg_count > 0)
+ {
+ *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+ FDI_SET (format, FMTDIR_ERROR);
+ goto bad_format;
+ }
+
+ size_t new_numbered_arg_count =
+ spec->numbered_arg_count + 1 + (type == FAT_FUNCTION_T);
+ if (numbered_allocated < new_numbered_arg_count)
+ {
+ numbered_allocated = 2 * numbered_allocated + 1;
+ if (numbered_allocated < new_numbered_arg_count)
+ numbered_allocated = new_numbered_arg_count;
+ spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+ }
+ spec->numbered[spec->numbered_arg_count].number = number;
+ spec->numbered[spec->numbered_arg_count].type = type;
+ if (type == FAT_FORMAT_STRING)
+ spec->numbered[spec->numbered_arg_count].signature = signature;
+ spec->numbered_arg_count++;
+ if (type == FAT_FUNCTION_T)
+ {
+ spec->numbered[spec->numbered_arg_count].number = number + 1;
+ spec->numbered[spec->numbered_arg_count].type = FAT_FUNCTION_T2;
+ spec->numbered_arg_count++;
+ }
+ }
+ else
+ {
+ /* Unnumbered argument. */
+
+ /* Numbered and unnumbered specifications are exclusive. */
+ if (spec->numbered_arg_count > 0)
+ {
+ *invalid_reason = INVALID_MIXES_NUMBERED_UNNUMBERED ();
+ FDI_SET (format, FMTDIR_ERROR);
+ goto bad_format;
+ }
+
+ size_t new_unnumbered_arg_count =
+ unnumbered_arg_count + 1 + (type == FAT_FUNCTION_T);
+ if (numbered_allocated < new_unnumbered_arg_count)
+ {
+ numbered_allocated = 2 * numbered_allocated + 1;
+ if (numbered_allocated < new_unnumbered_arg_count)
+ numbered_allocated = new_unnumbered_arg_count;
+ spec->numbered = (struct numbered_arg *) xrealloc (spec->numbered, numbered_allocated * sizeof (struct numbered_arg));
+ }
+ spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+ spec->numbered[unnumbered_arg_count].type = type;
+ if (type == FAT_FORMAT_STRING)
+ spec->numbered[unnumbered_arg_count].signature = signature;
+ unnumbered_arg_count++;
+ if (type == FAT_FUNCTION_T)
+ {
+ spec->numbered[unnumbered_arg_count].number = unnumbered_arg_count + 1;
+ spec->numbered[unnumbered_arg_count].type = FAT_FUNCTION_T2;
+ unnumbered_arg_count++;
+ }
+ }
+ }
+
+ done_specifier:
+ FDI_SET (format, FMTDIR_END);
+
+ format++;
+ }
+
+ if (terminator != '\0')
+ {
+ *invalid_reason = xasprintf (_("Found '%%%c' without matching '%%%c'."),
+ terminator == '}' ? '{' : '(', terminator);
+ goto bad_format;
+ }
+
+ done:
+ /* Convert the unnumbered argument array to numbered arguments. */
+ if (unnumbered_arg_count > 0)
+ spec->numbered_arg_count = unnumbered_arg_count;
+ /* Sort the numbered argument array, and eliminate duplicates. */
+ else if (spec->numbered_arg_count > 1)
+ {
+ size_t i, j;
+ bool err;
+
+ qsort (spec->numbered, spec->numbered_arg_count,
+ sizeof (struct numbered_arg), numbered_arg_compare);
+
+ /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */
+ err = false;
+ for (i = j = 0; i < spec->numbered_arg_count; i++)
+ if (j > 0 && spec->numbered[i].number == spec->numbered[j-1].number)
+ {
+ format_arg_type_t type1 = spec->numbered[i].type;
+ format_arg_type_t type2 = spec->numbered[j-1].type;
+ format_arg_type_t type_both;
+
+ if (((type1 == type2)
+ && (type1 != FAT_FORMAT_STRING
+ || strcmp (spec->numbered[i].signature,
+ spec->numbered[j-1].signature) == 0))
+ || (((type1 | type2) & FAT_OPTIONAL_OCAML_SYNTAX) != 0
+ && (((type1 & ~FAT_OPTIONAL_OCAML_SYNTAX) | FAT_OCAML_SYNTAX)
+ == ((type2 & ~FAT_OPTIONAL_OCAML_SYNTAX) | FAT_OCAML_SYNTAX))))
+ type_both = (type1 | type2) & ~FAT_OPTIONAL_OCAML_SYNTAX;
+ else
+ {
+ /* Incompatible types. */
+ type_both = FAT_NONE;
+ if (!err)
+ *invalid_reason =
+ INVALID_INCOMPATIBLE_ARG_TYPES (spec->numbered[i].number);
+ err = true;
+ }
+
+ spec->numbered[j-1].type = type_both;
+ if (type_both == FAT_FORMAT_STRING)
+ free (spec->numbered[i].signature);
+ }
+ else
+ {
+ if (j < i)
+ {
+ spec->numbered[j].number = spec->numbered[i].number;
+ spec->numbered[j].type = spec->numbered[i].type;
+ if (spec->numbered[j].type == FAT_FORMAT_STRING)
+ spec->numbered[j].signature = spec->numbered[i].signature;
+ }
+ j++;
+ }
+ spec->numbered_arg_count = j;
+ if (err)
+ /* *invalid_reason has already been set above. */
+ goto bad_format;
+ }
+
+ *formatp = format;
+ return true;
+
+ bad_format:
+ destroy_spec (spec);
+ return false;
+}
+
+static void *
+format_parse (const char *format, bool translated, char *fdi,
+ char **invalid_reason)
+{
+ struct spec spec;
+ struct spec *result;
+
+ if (!parse_upto (&spec, &format, '\0', translated, fdi, invalid_reason))
+ return NULL;
+
+ result = XMALLOC (struct spec);
+ *result = spec;
+ return result;
+}
+
+static void
+format_free (void *descr)
+{
+ struct spec *spec = (struct spec *) descr;
+
+ destroy_spec (spec);
+ free (spec);
+}
+
+static int
+format_get_number_of_directives (void *descr)
+{
+ struct spec *spec = (struct spec *) descr;
+
+ return spec->directives;
+}
+
+static bool
+format_check (void *msgid_descr, void *msgstr_descr, bool equality,
+ formatstring_error_logger_t error_logger, void *error_logger_data,
+ const char *pretty_msgid, const char *pretty_msgstr)
+{
+ struct spec *spec1 = (struct spec *) msgid_descr;
+ struct spec *spec2 = (struct spec *) msgstr_descr;
+ bool err = false;
+
+ if (spec1->numbered_arg_count + spec2->numbered_arg_count > 0)
+ {
+ size_t i, j;
+ size_t n1 = spec1->numbered_arg_count;
+ size_t n2 = spec2->numbered_arg_count;
+
+ /* Check that the argument numbers are the same.
+ Both arrays are sorted. We search for the first difference. */
+ for (i = 0, j = 0; i < n1 || j < n2; )
+ {
+ int cmp = (i >= n1 ? 1 :
+ j >= n2 ? -1 :
+ spec1->numbered[i].number > spec2->numbered[j].number ? 1 :
+ spec1->numbered[i].number < spec2->numbered[j].number ? -1 :
+ 0);
+
+ if (cmp > 0)
+ {
+ if (error_logger)
+ error_logger (error_logger_data,
+ _("a format specification for argument %zu, as in '%s', doesn't exist in '%s'"),
+ spec2->numbered[j].number, pretty_msgstr,
+ pretty_msgid);
+ err = true;
+ break;
+ }
+ else if (cmp < 0)
+ {
+ if (equality)
+ {
+ if (error_logger)
+ error_logger (error_logger_data,
+ _("a format specification for argument %zu doesn't exist in '%s'"),
+ spec1->numbered[i].number, pretty_msgstr);
+ err = true;
+ break;
+ }
+ else
+ i++;
+ }
+ else
+ j++, i++;
+ }
+ /* Check that the argument types are essentially the same. */
+ if (!err)
+ for (i = 0, j = 0; j < n2; )
+ {
+ if (spec1->numbered[i].number == spec2->numbered[j].number)
+ {
+ format_arg_type_t type1 = spec1->numbered[i].type;
+ format_arg_type_t type2 = spec2->numbered[j].type;
+
+ if (!(((type1 == type2)
+ && (type1 != FAT_FORMAT_STRING
+ || strcmp (spec1->numbered[i].signature,
+ spec2->numbered[j].signature) == 0))
+ || ((type2 & FAT_OPTIONAL_OCAML_SYNTAX) != 0
+ && (type2 & ~FAT_OPTIONAL_OCAML_SYNTAX)
+ == (type1 & ~FAT_OCAML_SYNTAX))))
+ {
+ if (error_logger)
+ error_logger (error_logger_data,
+ _("format specifications in '%s' and '%s' for argument %zu are not the same"),
+ pretty_msgid, pretty_msgstr,
+ spec2->numbered[j].number);
+ err = true;
+ break;
+ }
+ j++, i++;
+ }
+ else
+ i++;
+ }
+ }
+
+ return err;
+}
+
+
+struct formatstring_parser formatstring_ocaml =
+{
+ format_parse,
+ format_free,
+ format_get_number_of_directives,
+ NULL,
+ format_check
+};
+
+
+#ifdef TEST
+
+/* Test program: Print the argument list specification returned by
+ format_parse for strings read from standard input. */
+
+#include <stdio.h>
+
+static void
+format_print (void *descr)
+{
+ struct spec *spec = (struct spec *) descr;
+ size_t last;
+ size_t i;
+
+ if (spec == NULL)
+ {
+ printf ("INVALID");
+ return;
+ }
+
+ printf ("(");
+ last = 1;
+ for (i = 0; i < spec->numbered_arg_count; i++)
+ {
+ size_t number = spec->numbered[i].number;
+
+ if (i > 0)
+ printf (" ");
+ if (number < last)
+ abort ();
+ for (; last < number; last++)
+ printf ("_ ");
+ switch (spec->numbered[i].type & FAT_BASIC_MASK)
+ {
+ case FAT_INTEGER:
+ printf ("i");
+ break;
+ case FAT_INT32:
+ printf ("l");
+ break;
+ case FAT_NATIVEINT:
+ printf ("n");
+ break;
+ case FAT_INT64:
+ printf ("L");
+ break;
+ case FAT_STRING:
+ printf ("s");
+ break;
+ case FAT_CHARACTER:
+ printf ("c");
+ break;
+ case FAT_FLOATINGPOINT:
+ printf ("f");
+ break;
+ case FAT_BOOLEAN:
+ printf ("B");
+ break;
+ case FAT_FUNCTION_A:
+ printf ("a");
+ break;
+ case FAT_FUNCTION_T:
+ printf ("t1");
+ break;
+ case FAT_FUNCTION_T2:
+ printf ("t2");
+ break;
+ case FAT_FORMAT_STRING:
+ printf ("\"%s\"", spec->numbered[i].signature);
+ break;
+ default:
+ abort ();
+ }
+ if (spec->numbered[i].type & FAT_OCAML_SYNTAX)
+ printf ("!");
+ if (spec->numbered[i].type & FAT_OPTIONAL_OCAML_SYNTAX)
+ printf ("?");
+ last = number + 1;
+ }
+ printf (")");
+}
+
+int
+main ()
+{
+ for (;;)
+ {
+ char *line = NULL;
+ size_t line_size = 0;
+ int line_len;
+ char *invalid_reason;
+ void *descr;
+
+ line_len = getline (&line, &line_size, stdin);
+ if (line_len < 0)
+ break;
+ if (line_len > 0 && line[line_len - 1] == '\n')
+ line[--line_len] = '\0';
+
+ invalid_reason = NULL;
+ descr = format_parse (line, true, NULL, &invalid_reason);
+
+ format_print (descr);
+ printf ("\n");
+ if (descr == NULL)
+ printf ("%s\n", invalid_reason);
+
+ free (invalid_reason);
+ free (line);
+ }
+
+ return 0;
+}
+
+/*
+ * For Emacs M-x compile
+ * Local Variables:
+ * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-ocaml.c ../gnulib-lib/libgettextlib.la"
+ * End:
+ */
+
+#endif /* TEST */
/* format_pascal */ &formatstring_pascal,
/* format_modula2 */ &formatstring_modula2,
/* format_d */ &formatstring_d,
+ /* format_ocaml */ &formatstring_ocaml,
/* format_smalltalk */ &formatstring_smalltalk,
/* format_qt */ &formatstring_qt,
/* format_qt_plural */ &formatstring_qt_plural,
extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_pascal;
extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_modula2;
extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_d;
+extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_ocaml;
extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_smalltalk;
extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_qt;
extern LIBGETTEXTSRC_DLL_VARIABLE struct formatstring_parser formatstring_qt_plural;
/* format_pascal */ "object-pascal",
/* format_modula2 */ "modula2",
/* format_d */ "d",
+ /* format_ocaml */ "ocaml",
/* format_smalltalk */ "smalltalk",
/* format_qt */ "qt",
/* format_qt_plursl */ "qt-plural",
/* format_pascal */ "Object Pascal",
/* format_modula2 */ "Modula-2",
/* format_d */ "D",
+ /* format_ocaml */ "OCaml",
/* format_smalltalk */ "Smalltalk",
/* format_qt */ "Qt",
/* format_qt_plural */ "Qt plural",
format_pascal,
format_modula2,
format_d,
+ format_ocaml,
format_smalltalk,
format_qt,
format_qt_plural,
format_gfc_internal,
format_ycp
};
-#define NFORMATS 36 /* Number of format_type enum values. */
+#define NFORMATS 37 /* Number of format_type enum values. */
extern LIBGETTEXTSRC_DLL_VARIABLE const char *const format_language[NFORMATS];
extern LIBGETTEXTSRC_DLL_VARIABLE const char *const format_language_pretty[NFORMATS];
/* Specification. */
#include "x-ocaml.h"
+#include <errno.h>
#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <error.h>
#include "message.h"
-#include "clean-temp.h"
-#include "concat-filename.h"
-#include "execute.h"
-#include "xvasprintf.h"
-#include "x-po.h"
+#include "string-desc.h"
+#include "xstring-desc.h"
+#include "string-buffer.h"
#include "xgettext.h"
+#include "xg-pos.h"
+#include "xg-mixed-string.h"
+#include "xg-arglist-context.h"
+#include "xg-arglist-callshape.h"
+#include "xg-arglist-parser.h"
+#include "xg-message.h"
+#include "if-error.h"
+#include "xalloc.h"
+#include "read-file.h"
+#include "unistr.h"
+#include "po-charset.h"
#include "gettext.h"
-/* A convenience macro. I don't like writing gettext() every time. */
-#define _(str) gettext (str)
+#define _(s) gettext (s)
-/* We don't parse OCaml directly, but instead rely on the 'ocaml-gettext'
- program, that invokes the 'ocaml-xgettext' program. Both are contained
- in the 'opam' package named 'gettext':
- https://opam.ocaml.org/packages/gettext/
- https://github.com/gildor478/ocaml-gettext
- https://github.com/gildor478/ocaml-gettext/blob/master/doc/reference-manual.md
+/* Use tree-sitter.
+ Documentation: <https://tree-sitter.github.io/tree-sitter/using-parsers> */
+#include <tree_sitter/api.h>
+extern const TSLanguage *tree_sitter_ocaml (void);
- Comments start with '(*' and end with '*)' and can be nested.
- Reference: <https://ocaml.org/docs/tour-of-ocaml>
+
+/* The OCaml syntax is defined in <https://ocaml.org/docs/language>.
+
+ String syntax: Strings are delimited by double-quotes or by {id| |id} pairs.
+ Backslash is the escape character. Among the escape sequences, there is in
+ particular backslash-newline-spaces_or_tabs and \u{nnnn}.
+ Reference: <https://ocaml.org/manual/5.3/lex.html#sss:stringliterals>
+
+ Comment syntax: Comments start with '(*' and end with '*)' and can be nested.
+ References: <https://ocaml.org/manual/5.3/lex.html#sss:lex:comments>
+ <https://ocaml.org/docs/tour-of-ocaml>
*/
+#define DEBUG_OCAML 0
+
/* ====================== Keyword set customization. ====================== */
-/* This function currently has no effect. */
+/* If true extract all strings. */
+static bool extract_all = false;
+
+static hash_table keywords;
+static bool default_keywords = true;
+
void
x_ocaml_extract_all (void)
{
+ extract_all = true;
}
-/* This function currently has no effect. */
void
-x_ocaml_keyword (const char *keyword)
+x_ocaml_keyword (const char *name)
{
+ if (name == NULL)
+ default_keywords = false;
+ else
+ {
+ const char *end;
+ struct callshape shape;
+ const char *colon;
+
+ if (keywords.table == NULL)
+ hash_init (&keywords, 100);
+
+ split_keywordspec (name, &end, &shape);
+
+ /* The characters between name and end should form a valid identifier.
+ A colon means an invalid parse in split_keywordspec(). */
+ colon = strchr (name, ':');
+ if (colon == NULL || colon >= end)
+ insert_keyword_callshape (&keywords, name, end - name, &shape);
+ }
+}
+
+/* Finish initializing the keywords hash table.
+ Called after argument processing, before each file is processed. */
+static void
+init_keywords ()
+{
+ if (default_keywords)
+ {
+ /* Compatible with ocaml-gettext/src/bin/ocaml-xgettext/xgettext.ml. */
+ /* When adding new keywords here, also update the documentation in
+ xgettext.texi! */
+ x_ocaml_keyword ("s_");
+ x_ocaml_keyword ("f_");
+ x_ocaml_keyword ("sn_:1,2");
+ x_ocaml_keyword ("fn_:1,2");
+ x_ocaml_keyword ("gettext:2");
+ x_ocaml_keyword ("fgettext:2");
+ x_ocaml_keyword ("dgettext:3");
+ x_ocaml_keyword ("fdgettext:3");
+ x_ocaml_keyword ("dcgettext:3");
+ x_ocaml_keyword ("fdcgettext:3");
+ x_ocaml_keyword ("ngettext:2,3");
+ x_ocaml_keyword ("fngettext:2,3");
+ x_ocaml_keyword ("dngettext:3,4");
+ x_ocaml_keyword ("fdngettext:3,4");
+ x_ocaml_keyword ("dcngettext:3,4");
+ x_ocaml_keyword ("fdcngettext:3,4");
+ default_keywords = false;
+ }
}
/* This function currently has no effect. */
void
init_flag_table_ocaml (void)
{
+ /* Compatible with ocaml-gettext/src/bin/ocaml-xgettext/xgettext.ml. */
+ xgettext_record_flag ("s_:1:impossible-ocaml-format");
+ xgettext_record_flag ("f_:1:ocaml-format");
+ xgettext_record_flag ("sn_:1:impossible-ocaml-format");
+ xgettext_record_flag ("sn_:2:impossible-ocaml-format");
+ xgettext_record_flag ("fn_:1:ocaml-format");
+ xgettext_record_flag ("fn_:2:ocaml-format");
+ xgettext_record_flag ("gettext:2:impossible-ocaml-format");
+ xgettext_record_flag ("fgettext:2:ocaml-format");
+ xgettext_record_flag ("dgettext:3:impossible-ocaml-format");
+ xgettext_record_flag ("fdgettext:3:ocaml-format");
+ xgettext_record_flag ("dcgettext:3:impossible-ocaml-format");
+ xgettext_record_flag ("fdcgettext:3:ocaml-format");
+ xgettext_record_flag ("ngettext:2:impossible-ocaml-format");
+ xgettext_record_flag ("ngettext:3:impossible-ocaml-format");
+ xgettext_record_flag ("fngettext:2:ocaml-format");
+ xgettext_record_flag ("fngettext:3:ocaml-format");
+ xgettext_record_flag ("dngettext:3:impossible-ocaml-format");
+ xgettext_record_flag ("dngettext:4:impossible-ocaml-format");
+ xgettext_record_flag ("fdngettext:3:ocaml-format");
+ xgettext_record_flag ("fdngettext:4:ocaml-format");
+ xgettext_record_flag ("dcngettext:3:impossible-ocaml-format");
+ xgettext_record_flag ("dcngettext:4:impossible-ocaml-format");
+ xgettext_record_flag ("fdcngettext:3:ocaml-format");
+ xgettext_record_flag ("fdcngettext:4:ocaml-format");
+}
+
+
+/* ======================== Parsing via tree-sitter. ======================== */
+/* To understand this code, look at
+ tree-sitter-ocaml/grammars/ocaml/src/node-types.json
+ and
+ tree-sitter-ocaml/grammars/ocaml/src/grammar.json
+ */
+
+/* The tree-sitter's language object. */
+static const TSLanguage *ts_language;
+
+/* ------------------------- Node types and symbols ------------------------- */
+
+static TSSymbol
+ts_language_symbol (const char *name, bool is_named)
+{
+ TSSymbol result =
+ ts_language_symbol_for_name (ts_language, name, strlen (name), is_named);
+ if (result == 0)
+ /* If we get here, the grammar has evolved in an incompatible way. */
+ abort ();
+ return result;
+}
+
+static TSFieldId
+ts_language_field (const char *name)
+{
+ TSFieldId result =
+ ts_language_field_id_for_name (ts_language, name, strlen (name));
+ if (result == 0)
+ /* If we get here, the grammar has evolved in an incompatible way. */
+ abort ();
+ return result;
+}
+
+/* Optimization:
+ Instead of
+ strcmp (ts_node_type (node), "string") == 0
+ it is faster to do
+ ts_node_symbol (node) == ts_symbol_string
+ */
+static TSSymbol ts_symbol_comment;
+static TSSymbol ts_symbol_string;
+static TSSymbol ts_symbol_string_content;
+static TSSymbol ts_symbol_escape_sequence;
+static TSSymbol ts_symbol_quoted_string;
+static TSSymbol ts_symbol_quoted_string_content;
+static TSSymbol ts_symbol_infix_expression;
+static TSSymbol ts_symbol_concat_operator;
+static TSSymbol ts_symbol_application_expression;
+static TSSymbol ts_symbol_value_path;
+static TSSymbol ts_symbol_value_name;
+static TSSymbol ts_symbol_parenthesized_expression;
+static TSSymbol ts_symbol_lparen;
+static TSSymbol ts_symbol_rparen;
+static TSFieldId ts_field_operator;
+static TSFieldId ts_field_left;
+static TSFieldId ts_field_right;
+static TSFieldId ts_field_function;
+
+static inline size_t
+ts_node_line_number (TSNode node)
+{
+ return ts_node_start_point (node).row + 1;
}
+/* -------------------------------- The file -------------------------------- */
-/* ========================= Extracting strings. ========================== */
+/* The entire contents of the file being analyzed. */
+static const char *contents;
+/* -------------------------------- Comments -------------------------------- */
+
+/* These are for tracking whether comments count as immediately before
+ keyword. */
+static int last_comment_line;
+static int last_non_comment_line;
+
+/* Saves a comment line. */
+static void save_comment_line (string_desc_t gist)
+{
+ /* Remove leading whitespace. */
+ while (sd_length (gist) > 0
+ && (sd_char_at (gist, 0) == ' '
+ || sd_char_at (gist, 0) == '\t'))
+ gist = sd_substring (gist, 1, sd_length (gist));
+ /* Remove trailing whitespace. */
+ size_t len = sd_length (gist);
+ while (len > 0
+ && (sd_char_at (gist, len - 1) == ' '
+ || sd_char_at (gist, len - 1) == '\t'))
+ len--;
+ gist = sd_substring (gist, 0, len);
+ savable_comment_add (sd_c (gist));
+}
+
+/* Does the comment handling for NODE.
+ Updates savable_comment, last_comment_line, last_non_comment_line.
+ It is important that this function gets called
+ - for each node (not only the named nodes!),
+ - in depth-first traversal order. */
+static void handle_comments (TSNode node)
+{
+ #if DEBUG_OCAML
+ fprintf (stderr, "LCL=%d LNCL=%d node=[%s]|%s|\n", last_comment_line, last_non_comment_line, ts_node_type (node), ts_node_string (node));
+ #endif
+ if (last_comment_line < last_non_comment_line
+ && last_non_comment_line < ts_node_line_number (node))
+ /* We have skipped over a newline. This newline terminated a line
+ with non-comment tokens, after the last comment line. */
+ savable_comment_reset ();
+
+ if (ts_node_symbol (node) == ts_symbol_comment)
+ {
+ string_desc_t entire =
+ sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+ contents + ts_node_start_byte (node));
+ /* It should start and end with the comment markers. */
+ if (!(sd_length (entire) >= 4
+ && sd_char_at (entire, 0) == '('
+ && sd_char_at (entire, 1) == '*'
+ && sd_char_at (entire, sd_length (entire) - 2) == '*'
+ && sd_char_at (entire, sd_length (entire) - 1) == ')'))
+ abort ();
+ string_desc_t gist = sd_substring (entire, 2, sd_length (entire) - 2);
+ /* Split into lines.
+ Remove leading and trailing whitespace from each line. */
+ for (;;)
+ {
+ ptrdiff_t nl_index = sd_index (gist, '\n');
+ if (nl_index >= 0)
+ {
+ save_comment_line (sd_substring (gist, 0, nl_index));
+ gist = sd_substring (gist, nl_index + 1, sd_length (gist));
+ }
+ else
+ {
+ save_comment_line (gist);
+ break;
+ }
+ }
+ last_comment_line = ts_node_end_point (node).row + 1;
+ }
+ else
+ last_non_comment_line = ts_node_line_number (node);
+}
+
+/* ---------------------------- String literals ---------------------------- */
+
+/* Determines whether NODE represents the string concatenation operator '^'. */
+static bool
+is_string_concatenation_operator (TSNode node)
+{
+ if (ts_node_symbol (node) == ts_symbol_concat_operator)
+ {
+ string_desc_t operator_string =
+ sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+ contents + ts_node_start_byte (node));
+ if (sd_equals (operator_string, sd_from_c ("^")))
+ return true;
+ }
+ return false;
+}
+
+/* Determines whether NODE represents a string literal or the concatenation
+ of string literals (via the '^' operator). */
static bool
-is_not_header (const message_ty *mp)
+is_string_literal (TSNode node)
{
- return !is_header (mp);
+ start:
+ if (ts_node_symbol (node) == ts_symbol_string
+ || ts_node_symbol (node) == ts_symbol_quoted_string)
+ return true;
+ if (ts_node_symbol (node) == ts_symbol_infix_expression
+ && is_string_concatenation_operator (ts_node_child_by_field_id (node, ts_field_operator))
+ /* Recurse into the left and right subnodes. */
+ && is_string_literal (ts_node_child_by_field_id (node, ts_field_left)))
+ {
+ /*return is_string_literal (ts_node_child_by_field_id (node, ts_field_right));*/
+ node = ts_node_child_by_field_id (node, ts_field_right);
+ goto start;
+ }
+ if (ts_node_symbol (node) == ts_symbol_parenthesized_expression)
+ {
+ uint32_t count = ts_node_child_count (node);
+ if (count > 0
+ && ts_node_symbol (ts_node_child (node, 0)) == ts_symbol_lparen
+ && ts_node_symbol (ts_node_child (node, count - 1)) == ts_symbol_rparen)
+ {
+ uint32_t subnodes = 0;
+ uint32_t last_subnode_index = 0;
+ uint32_t i;
+ for (i = 1; i < count - 1; i++)
+ {
+ TSNode subnode = ts_node_child (node, i);
+ if (ts_node_is_named (subnode)
+ && ts_node_symbol (subnode) != ts_symbol_comment)
+ {
+ subnodes++;
+ last_subnode_index = i;
+ }
+ }
+ if (subnodes == 1)
+ {
+ TSNode subnode = ts_node_child (node, last_subnode_index);
+ /* Recurse. */
+ /*return is_string_literal (subnode);*/
+ node = subnode;
+ goto start;
+ }
+ }
+ }
+
+ return false;
+}
+
+/* Appends the string literal pieces from NODE to BUFFER. */
+static void
+string_literal_accumulate_pieces (TSNode node,
+ struct string_buffer *buffer)
+{
+ start:
+ if (ts_node_symbol (node) == ts_symbol_string)
+ {
+ uint32_t count = ts_node_named_child_count (node);
+ uint32_t i;
+ for (i = 0; i < count; i++)
+ {
+ TSNode subnode = ts_node_named_child (node, i);
+ if (ts_node_symbol (subnode) == ts_symbol_string_content)
+ {
+ const char *subnode_start = contents + ts_node_start_byte (subnode);
+ const char *subnode_end = contents + ts_node_end_byte (subnode);
+ uint32_t subcount = ts_node_child_count (subnode);
+ #if DEBUG_OCAML
+ {
+ fprintf (stderr, "string_content children:\n");
+ uint32_t j;
+ for (j = 0; j < subcount; j++)
+ fprintf (stderr, "%u -> [%s]|%s|\n", j, ts_node_type (ts_node_child (subnode, j)), ts_node_string (ts_node_child (subnode, j)));
+ }
+ #endif
+ /* Iterate over the children nodes of type escape_sequence.
+ Other children nodes, such as conversion_specification or
+ pretty_printing_indication, can be ignored. */
+ uint32_t j;
+ for (j = 0; j < subcount; j++)
+ {
+ TSNode subsubnode = ts_node_child (subnode, j);
+ if (ts_node_symbol (subsubnode) == ts_symbol_escape_sequence)
+ {
+ const char *escape_start = contents + ts_node_start_byte (subsubnode);
+ const char *escape_end = contents + ts_node_end_byte (subsubnode);
+ sb_xappend_desc (buffer,
+ sd_new_addr (escape_start - subnode_start, subnode_start));
+
+ /* The escape sequence must start with a backslash. */
+ if (!(escape_end - escape_start >= 2 && escape_start[0] == '\\'))
+ abort ();
+ /* tree-sitter's grammar.js allows more escape sequences
+ than the OCaml system. Give a warning for those cases
+ where the OCaml system gives an error. */
+ bool invalid = false;
+ if (escape_end - escape_start >= 2
+ && (escape_start[1] == '\n' || escape_start[1] == '\r'))
+ /* backslash-newline-spaces_or_tabs */
+ ;
+ else if (escape_end - escape_start == 2)
+ {
+ switch (escape_start[1])
+ {
+ case '\\':
+ case '"':
+ case '\'':
+ case ' ':
+ sb_xappend1 (buffer, escape_start[1]);
+ break;
+ case 'n':
+ sb_xappend1 (buffer, '\n');
+ break;
+ case 'r':
+ sb_xappend1 (buffer, '\r');
+ break;
+ case 't':
+ sb_xappend1 (buffer, '\t');
+ break;
+ case 'b':
+ sb_xappend1 (buffer, 0x08);
+ break;
+ default:
+ abort ();
+ }
+ }
+ else if (escape_end - escape_start == 4
+ && (escape_start[1] >= '0'
+ && escape_start[1] <= '9'))
+ {
+ /* Only exactly 3 decimal digits are accepted. */
+ unsigned int value = 0;
+ const char *p;
+ for (p = escape_start + 1; p < escape_end; p++)
+ {
+ /* No overflow is possible. */
+ char c = *p;
+ if (c >= '0' && c <= '9')
+ value = value * 10 + (c - '0');
+ else
+ abort ();
+ }
+ if (value > 0xFF)
+ invalid = true;
+ if (!invalid)
+ sb_xappend1 (buffer, (unsigned char) value);
+ }
+ else if (escape_end - escape_start == 4
+ && escape_start[1] == 'x')
+ {
+ /* Only exactly 2 hexadecimal digits are accepted. */
+ unsigned int value = 0;
+ const char *p;
+ for (p = escape_start + 2; p < escape_end; p++)
+ {
+ /* No overflow is possible. */
+ char c = *p;
+ if (c >= '0' && c <= '9')
+ value = (value << 4) + (c - '0');
+ else if (c >= 'A' && c <= 'Z')
+ value = (value << 4) + (c - 'A' + 10);
+ else if (c >= 'a' && c <= 'z')
+ value = (value << 4) + (c - 'a' + 10);
+ else
+ abort ();
+ }
+ sb_xappend1 (buffer, (unsigned char) value);
+ }
+ else if (escape_end - escape_start == 5
+ && escape_start[1] == 'o')
+ {
+ /* Only exactly 3 octal digits are accepted. */
+ unsigned int value = 0;
+ const char *p;
+ for (p = escape_start + 2; p < escape_end; p++)
+ {
+ /* No overflow is possible. */
+ char c = *p;
+ if (c >= '0' && c <= '7')
+ value = (value << 3) + (c - '0');
+ else
+ abort ();
+ }
+ if (value > 0xFF)
+ abort ();
+ sb_xappend1 (buffer, (unsigned char) value);
+ }
+ else if (escape_end - escape_start > 4
+ && escape_start[1] == 'u'
+ && escape_start[2] == '{'
+ && escape_end[-1] == '}')
+ {
+ if (escape_end - escape_start <= 4 + 6)
+ {
+ /* 1 to 6 hexadecimal digits are accepted. */
+ unsigned int value = 0;
+ const char *p;
+ for (p = escape_start + 3; p < escape_end - 1; p++)
+ {
+ /* No overflow is possible. */
+ char c = *p;
+ if (c >= '0' && c <= '9')
+ value = (value << 4) + (c - '0');
+ else if (c >= 'A' && c <= 'Z')
+ value = (value << 4) + (c - 'A' + 10);
+ else if (c >= 'a' && c <= 'z')
+ value = (value << 4) + (c - 'a' + 10);
+ else
+ abort ();
+ }
+ if (value >= 0x110000
+ || (value >= 0xD800 && value <= 0xDFFF))
+ invalid = true;
+ if (!invalid)
+ {
+ uint8_t buf[6];
+ int n = u8_uctomb (buf, value, sizeof (buf));
+ if (n > 0)
+ sb_xappend_desc (buffer,
+ sd_new_addr (n, (const char *) buf));
+ else
+ invalid = true;
+ }
+ }
+ else
+ invalid = true;
+ }
+ else
+ abort ();
+ if (invalid)
+ {
+ size_t line_number = ts_node_line_number (subnode);
+ if_error (IF_SEVERITY_WARNING,
+ logical_file_name, line_number, (size_t)(-1), false,
+ _("invalid escape sequence in string"));
+ }
+
+ subnode_start = escape_end;
+ }
+ }
+ sb_xappend_desc (buffer,
+ sd_new_addr (subnode_end - subnode_start, subnode_start));
+ }
+ else
+ abort ();
+ }
+ }
+ else if (ts_node_symbol (node) == ts_symbol_quoted_string)
+ {
+ uint32_t count = ts_node_named_child_count (node);
+ uint32_t i;
+ for (i = 0; i < count; i++)
+ {
+ TSNode subnode = ts_node_named_child (node, i);
+ if (ts_node_symbol (subnode) == ts_symbol_quoted_string_content)
+ {
+ /* We can ignore the children nodes here, since none of them can
+ be of type escape_sequence. */
+ string_desc_t subnode_string =
+ sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+ contents + ts_node_start_byte (subnode));
+ sb_xappend_desc (buffer, subnode_string);
+ }
+ }
+ }
+ else if (ts_node_symbol (node) == ts_symbol_infix_expression
+ && is_string_concatenation_operator (ts_node_child_by_field_id (node, ts_field_operator)))
+ {
+ /* Recurse into the left and right subnodes. */
+ string_literal_accumulate_pieces (ts_node_child_by_field_id (node, ts_field_left), buffer);
+ /*string_literal_accumulate_pieces (ts_node_child_by_field_id (node, ts_field_right), buffer);*/
+ node = ts_node_child_by_field_id (node, ts_field_right);
+ goto start;
+ }
+ else if (ts_node_symbol (node) == ts_symbol_parenthesized_expression)
+ {
+ uint32_t count = ts_node_child_count (node);
+ /* is_string_literal has already checked that the first child node is '(',
+ that the last child node is ')', and that in-between there is exactly
+ one non-comment node. */
+ if (!(count > 0))
+ abort ();
+ uint32_t i;
+ for (i = 1; i < count - 1; i++)
+ {
+ TSNode subnode = ts_node_child (node, i);
+ if (ts_node_is_named (subnode)
+ && ts_node_symbol (subnode) != ts_symbol_comment)
+ {
+ /* Recurse. */
+ /*string_literal_accumulate_pieces (subnode, buffer);*/
+ node = subnode;
+ goto start;
+ }
+ }
+ abort ();
+ }
+ else
+ abort ();
+}
+
+/* Combines the pieces of a string literal or concatenated string literal.
+ Returns a freshly allocated, mostly UTF-8 encoded string. */
+static char *
+string_literal_value (TSNode node)
+{
+ struct string_buffer buffer;
+ sb_init (&buffer);
+ string_literal_accumulate_pieces (node, &buffer);
+ return sb_xdupfree_c (&buffer);
+}
+
+/* --------------------- Parsing and string extraction --------------------- */
+
+/* Context lookup table. */
+static flag_context_list_table_ty *flag_context_list_table;
+
+/* Maximum supported nesting depth. */
+#define MAX_NESTING_DEPTH 1000
+
+static int nesting_depth;
+
+/* The file is parsed into an abstract syntax tree. Scan the syntax tree,
+ looking for a keyword in function position of a application_expression,
+ followed by followed by a string among the arguments.
+ When we see this pattern, we have something to remember.
+
+ Normal handling: Look for
+ keyword ... msgid ...
+ Plural handling: Look for
+ keyword ... msgid ... msgid_plural ...
+
+ We use recursion because the arguments before msgid or between msgid
+ and msgid_plural can contain subexpressions of the same form. */
+
+/* Forward declarations. */
+static void extract_from_node (TSNode node,
+ bool ignore,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp);
+
+/* Extracts messages from the function application consisting of
+ - FUNCTION_NODE: a tree node of type 'value_path',
+ - FUNCTION_NAME_NODE: a tree node of type 'value_name',
+ the last named node of FUNCTION_NODE,
+ - ARGS_NODE: a tree node of type 'application_expression',
+ of which FUNCTION_NAME is the 'function' field.
+ Extracted messages are added to MLP. */
+static void
+extract_from_function_call (TSNode function_node,
+ TSNode function_name_node,
+ TSNode args_node,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp)
+{
+ uint32_t args_count = ts_node_child_count (args_node);
+
+ string_desc_t function_name =
+ sd_new_addr (ts_node_end_byte (function_name_node) - ts_node_start_byte (function_name_node),
+ contents + ts_node_start_byte (function_name_node));
+
+ /* Context iterator. */
+ flag_context_list_iterator_ty next_context_iter =
+ flag_context_list_iterator (
+ flag_context_list_table_lookup (
+ flag_context_list_table,
+ sd_data (function_name), sd_length (function_name)));
+
+ /* Information associated with the callee. */
+ const struct callshapes *next_shapes = NULL;
+
+ /* Look in the keywords table. */
+ void *keyword_value;
+ if (hash_find_entry (&keywords,
+ sd_data (function_name), sd_length (function_name),
+ &keyword_value)
+ == 0)
+ next_shapes = (const struct callshapes *) keyword_value;
+
+ if (next_shapes != NULL)
+ {
+ /* We have a function, named by a relevant identifier, with an argument
+ list. */
+
+ struct arglist_parser *argparser =
+ arglist_parser_alloc (mlp, next_shapes);
+
+ /* Current argument number. */
+ uint32_t arg;
+ uint32_t i;
+
+ arg = 0;
+ for (i = 0; i < args_count; i++)
+ {
+ TSNode arg_node = ts_node_child (args_node, i);
+ handle_comments (arg_node);
+ if (ts_node_is_named (arg_node)
+ && ts_node_symbol (arg_node) != ts_symbol_comment
+ && !ts_node_eq (arg_node, function_node))
+ {
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ bool already_extracted = false;
+ if (is_string_literal (arg_node))
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = ts_node_line_number (arg_node);
+
+ char *string = string_literal_value (arg_node);
+
+ if (extract_all)
+ {
+ remember_a_message (mlp, NULL, string, true, false,
+ arg_region, &pos,
+ NULL, savable_comment, true);
+ already_extracted = true;
+ }
+ else
+ {
+ mixed_string_ty *mixed_string =
+ mixed_string_alloc_utf8 (string, lc_string,
+ pos.file_name, pos.line_number);
+ arglist_parser_remember (argparser, arg, mixed_string,
+ arg_region,
+ pos.file_name, pos.line_number,
+ savable_comment, true);
+ }
+ }
+
+ if (!already_extracted)
+ {
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+ _("too many open parentheses"));
+ extract_from_node (arg_node,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+ }
+
+ unref_region (arg_region);
+ }
+ }
+ arglist_parser_done (argparser, arg);
+ return;
+ }
+
+ /* Recurse. */
+
+ uint32_t i;
+
+ for (i = 0; i < args_count; i++)
+ {
+ TSNode arg_node = ts_node_child (args_node, i);
+ handle_comments (arg_node);
+ if (ts_node_is_named (arg_node)
+ && ts_node_symbol (arg_node) != ts_symbol_comment)
+ {
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+ _("too many open parentheses"));
+ extract_from_node (arg_node,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+
+ unref_region (arg_region);
+ }
+ }
+}
+
+/* Extracts messages in the syntax tree NODE.
+ Extracted messages are added to MLP. */
+static void
+extract_from_node (TSNode node,
+ bool ignore,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp)
+{
+ if (extract_all && !ignore && is_string_literal (node))
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = ts_node_line_number (node);
+
+ char *string = string_literal_value (node);
+
+ remember_a_message (mlp, NULL, string, true, false,
+ outer_region, &pos,
+ NULL, savable_comment, true);
+ }
+
+ if (ts_node_symbol (node) == ts_symbol_application_expression
+ && ts_node_named_child_count (node) >= 2)
+ {
+ TSNode function_node = ts_node_named_child (node, 0);
+ /* This is the field called 'function'. */
+ if (! ts_node_eq (ts_node_child_by_field_id (node, ts_field_function),
+ function_node))
+ abort ();
+ if (ts_node_symbol (function_node) == ts_symbol_value_path
+ && ts_node_named_child_count (function_node) > 0)
+ {
+ TSNode function_name_node =
+ ts_node_named_child (function_node,
+ ts_node_named_child_count (function_node) - 1);
+ if (ts_node_symbol (function_name_node) == ts_symbol_value_name)
+ {
+ extract_from_function_call (function_node, function_name_node, node,
+ outer_region,
+ mlp);
+ return;
+ }
+ }
+ }
+
+ /* Recurse. */
+ if (!(ts_node_symbol (node) == ts_symbol_comment))
+ {
+ ignore = ignore || is_string_literal (node);
+ uint32_t count = ts_node_child_count (node);
+ uint32_t i;
+ for (i = 0; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (node, i);
+ handle_comments (subnode);
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (subnode), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (subnode,
+ ignore,
+ outer_region,
+ mlp);
+ nesting_depth--;
+ }
+ }
}
void
-extract_ocaml (const char *found_in_dir, const char *real_filename,
- const char *logical_filename,
+extract_ocaml (FILE *f,
+ const char *real_filename, const char *logical_filename,
flag_context_list_table_ty *flag_table,
msgdomain_list_ty *mdlp)
{
- /* Invoke
- ocaml-gettext --action extract --extract-pot <temp>.pot real_filename */
+ message_list_ty *mlp = mdlp->item[0]->messages;
- /* First, create a temporary directory where this invocation can place its
- output. */
- struct temp_dir *tmpdir = create_temp_dir ("ocgt", NULL, false);
- if (tmpdir == NULL)
- exit (EXIT_FAILURE);
+ logical_file_name = xstrdup (logical_filename);
- /* Prepare the temporary POT file name. */
- char *temp_file_name = xconcatenated_filename (tmpdir->dir_name, "temp.pot", NULL);
- register_temp_file (tmpdir, temp_file_name);
+ last_comment_line = -1;
+ last_non_comment_line = -1;
- /* Invoke ocaml-gettext. */
- const char *progname = "ocaml-gettext";
- {
- const char *argv[7];
- int exitstatus;
- /* Prepare arguments. */
- argv[0] = progname;
- argv[1] = "--action";
- argv[2] = "extract";
- argv[3] = "--extract-pot";
- argv[4] = temp_file_name;
- argv[5] = logical_filename;
- argv[6] = NULL;
- exitstatus = execute (progname, progname, argv, NULL, found_in_dir,
- true, false, false, false, true, false, NULL);
- if (exitstatus != 0)
- error (EXIT_FAILURE, 0, _("%s subprocess failed with exit code %d"),
- progname, exitstatus);
- }
+ flag_context_list_table = flag_table;
+ nesting_depth = 0;
+
+ init_keywords ();
+
+ if (ts_language == NULL)
+ {
+ ts_language = tree_sitter_ocaml ();
+ ts_symbol_comment = ts_language_symbol ("comment", true);
+ ts_symbol_string = ts_language_symbol ("string", true);
+ ts_symbol_string_content = ts_language_symbol ("string_content", true);
+ ts_symbol_escape_sequence = ts_language_symbol ("escape_sequence", true);
+ ts_symbol_quoted_string = ts_language_symbol ("quoted_string", true);;
+ ts_symbol_quoted_string_content = ts_language_symbol ("quoted_string_content", true);;
+ ts_symbol_infix_expression = ts_language_symbol ("infix_expression", true);
+ ts_symbol_concat_operator = ts_language_symbol ("concat_operator", true);
+ ts_symbol_application_expression = ts_language_symbol ("application_expression", true);
+ ts_symbol_value_path = ts_language_symbol ("value_path", true);
+ ts_symbol_value_name = ts_language_symbol ("value_name", true);
+ ts_symbol_parenthesized_expression = ts_language_symbol ("parenthesized_expression", true);
+ ts_symbol_lparen = ts_language_symbol ("(", false);
+ ts_symbol_rparen = ts_language_symbol (")", false);
+ ts_field_operator = ts_language_field ("operator");
+ ts_field_left = ts_language_field ("left");
+ ts_field_right = ts_language_field ("right");
+ ts_field_function = ts_language_field ("function");
+ }
+
+ /* Read the file into memory. */
+ char *contents_data;
+ size_t contents_length;
+ contents_data = read_file (real_filename, 0, &contents_length);
+ if (contents_data == NULL)
+ error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+ real_filename);
+
+ /* tree-sitter works only on files whose size fits in an uint32_t. */
+ if (contents_length > 0xFFFFFFFFUL)
+ error (EXIT_FAILURE, 0, _("file \"%s\" is unsupported because too large"),
+ real_filename);
- /* Read the resulting POT file. */
+ /* OCaml source files are "expected to be" UTF-8 encoded.
+ <https://ocaml.org/manual/5.3/lex.html#sss:lex:text-encoding> */
+ if (u8_check ((uint8_t *) contents_data, contents_length) != NULL)
+ error (EXIT_FAILURE, 0,
+ _("file \"%s\" is invalid because not UTF-8 encoded"),
+ real_filename);
+ xgettext_current_source_encoding = po_charset_utf8;
+
+ /* Create a parser. */
+ TSParser *parser = ts_parser_new ();
+
+ /* Set the parser's language. */
+ ts_parser_set_language (parser, ts_language);
+
+ /* Parse the file, producing a syntax tree. */
+ TSTree *tree = ts_parser_parse_string (parser, NULL, contents_data, contents_length);
+
+ #if DEBUG_OCAML
+ /* For debugging: Print the tree. */
{
- FILE *fp = fopen (temp_file_name, "r");
- if (fp == NULL)
- error (EXIT_FAILURE, 0, _("%s subprocess did not create the expected file"),
- progname);
- char *dummy_filename = xasprintf (_("(output from '%s')"), progname);
- extract_po (fp, temp_file_name, dummy_filename, flag_table, mdlp);
- fclose (fp);
- free (dummy_filename);
+ char *tree_as_string = ts_node_string (ts_tree_root_node (tree));
+ fprintf (stderr, "Syntax tree: %s\n", tree_as_string);
+ free (tree_as_string);
}
+ #endif
- cleanup_temp_dir (tmpdir);
+ contents = contents_data;
- if (xgettext_omit_header)
- {
- /* Remove the header entry. */
- if (mdlp->nitems > 0)
- message_list_remove_if_not (mdlp->item[0]->messages, is_not_header);
- }
+ extract_from_node (ts_tree_root_node (tree),
+ false,
+ null_context_region (),
+ mlp);
+
+ ts_tree_delete (tree);
+ ts_parser_delete (parser);
+ free (contents_data);
+
+ logical_file_name = NULL;
}
{ "ml", "OCaml" }, \
#define SCANNERS_OCAML \
- { "OCaml", NULL, extract_ocaml, \
- NULL, NULL, NULL }, \
+ { "OCaml", extract_ocaml, NULL, \
+ &flag_table_ocaml, &formatstring_ocaml, NULL }, \
-extern void extract_ocaml (const char *found_in_dir, const char *real_filename,
+/* Scan a OCaml file and add its translatable strings to mdlp. */
+extern void extract_ocaml (FILE *fp, const char *real_filename,
const char *logical_filename,
flag_context_list_table_ty *flag_table,
msgdomain_list_ty *mdlp);
static flag_context_list_table_ty flag_table_lua;
static flag_context_list_table_ty flag_table_modula2;
static flag_context_list_table_ty flag_table_d;
+static flag_context_list_table_ty flag_table_ocaml;
static flag_context_list_table_ty flag_table_vala;
static flag_context_list_table_ty flag_table_tcl;
static flag_context_list_table_ty flag_table_perl;
(only languages C, C++, ObjectiveC, Python,\n\
Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
- awk, Lua, Modula-2, D, Vala, Tcl, Perl, PHP,\n\
- GCC-source, Glade, GSettings)\n"));
+ awk, Lua, Modula-2, D, OCaml, Vala, Tcl, Perl,\n\
+ PHP, GCC-source, Glade, GSettings)\n"));
printf (_("\
-kWORD, --keyword=WORD look for WORD as an additional keyword\n\
-k, --keyword do not to use default keywords\n"));
(only languages C, C++, ObjectiveC, Python,\n\
Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
- awk, Lua, Modula-2, D, Vala, Tcl, Perl, PHP,\n\
- GCC-source, Glade, GSettings, Desktop)\n"));
+ awk, Lua, Modula-2, D, OCaml, Vala, Tcl, Perl,\n\
+ PHP, GCC-source, Glade, GSettings, Desktop)\n"));
printf (_("\
--flag=WORD:ARG:FLAG additional flag for strings inside the argument\n\
number ARG of keyword WORD\n"));
(only languages C, C++, ObjectiveC, Python,\n\
Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
- awk, Lua, Modula-2, D, Vala, Tcl, Perl, PHP,\n\
- GCC-source, YCP)\n"));
+ awk, Lua, Modula-2, D, OCaml, Vala, Tcl, Perl,\n\
+ PHP, GCC-source, YCP)\n"));
printf (_("\
--tag=WORD:FORMAT defines the behaviour of tagged template literals\n\
with tag WORD\n"));
name_start, name_end,
argnum, value, pass);
break;
+ case format_ocaml:
+ flag_context_list_table_insert (&flag_table_ocaml, XFORMAT_PRIMARY,
+ name_start, name_end,
+ argnum, value, pass);
+ break;
case format_smalltalk:
break;
case format_qt:
xgettext-lua-stackovfl-1 xgettext-lua-stackovfl-2 \
xgettext-lua-stackovfl-3 xgettext-lua-stackovfl-4 \
xgettext-modula2-1 xgettext-modula2-2 \
+ xgettext-ocaml-1 xgettext-ocaml-2 \
xgettext-objc-1 xgettext-objc-2 \
xgettext-perl-1 xgettext-perl-2 xgettext-perl-3 xgettext-perl-4 \
xgettext-perl-5 xgettext-perl-6 xgettext-perl-7 xgettext-perl-8 \
format-lisp-1 format-lisp-2 \
format-lua-1 format-lua-2 \
format-modula2-1 format-modula2-2 \
+ format-ocaml-1 format-ocaml-2 \
format-php-1 format-php-2 format-php-3 \
format-python-1 format-python-2 format-python-3 \
format-python-brace-1 format-python-brace-2 \
--- /dev/null
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test recognition of OCaml format strings.
+
+cat <<\EOF > f-oc-1.data
+# Valid: no argument
+"abc%!"
+# Valid: no argument
+"abc%%"
+# Valid: no argument
+"abc%@"
+# Valid: no argument
+"abc%,"
+# Valid: one integer argument
+"abc%d"
+# Valid: one integer argument
+"abc%i"
+# Valid: one integer argument
+"abc%u"
+# Valid: one integer argument
+"abc%x"
+# Valid: one integer argument
+"abc%X"
+# Valid: one integer argument
+"abc%o"
+# Valid: one int32 argument
+"abc%ld"
+# Valid: one int32 argument
+"abc%li"
+# Valid: one int32 argument
+"abc%lu"
+# Valid: one int32 argument
+"abc%lx"
+# Valid: one int32 argument
+"abc%lX"
+# Valid: one int32 argument
+"abc%lo"
+# Valid: one nativeint argument
+"abc%nd"
+# Valid: one nativeint argument
+"abc%ni"
+# Valid: one nativeint argument
+"abc%nu"
+# Valid: one nativeint argument
+"abc%nx"
+# Valid: one nativeint argument
+"abc%nX"
+# Valid: one nativeint argument
+"abc%no"
+# Valid: one int64 argument
+"abc%Ld"
+# Valid: one int64 argument
+"abc%Li"
+# Valid: one int64 argument
+"abc%Lu"
+# Valid: one int64 argument
+"abc%Lx"
+# Valid: one int64 argument
+"abc%LX"
+# Valid: one int64 argument
+"abc%Lo"
+# Valid: one string argument
+"abc%s"
+# Valid: one string argument
+"abc%S"
+# Valid: one character argument
+"abc%c"
+# Valid: one character argument
+"abc%C"
+# Valid: one floating-point argument
+"abc%f"
+# Valid: one floating-point argument
+"abc%e"
+# Valid: one floating-point argument
+"abc%E"
+# Valid: one floating-point argument
+"abc%g"
+# Valid: one floating-point argument
+"abc%G"
+# Valid: one floating-point argument
+"abc%h"
+# Valid: one floating-point argument
+"abc%H"
+# Valid: one floating-point argument
+"abc%F"
+# Valid: one boolean argument
+"abc%B"
+# Valid: one function argument
+"abc%a"
+# Valid: one function-with-arg argument
+"abc%t"
+# Valid: one format-string argument
+"abc%{%s%}"
+# Valid: one format-string argument
+"abc%{%S%}"
+# Valid: one format-string argument and one string argument
+"abc%(%s%)"
+# Valid: one format-string argument and one string argument
+"abc%(%S%)"
+# Valid: one argument with flags
+"abc%0#g"
+# Valid: one argument with width
+"abc%2g"
+# Valid: one argument with width
+"abc%*g"
+# Valid: one argument with precision
+"abc%.4g"
+# Valid: one argument with precision
+"abc%.*g"
+# Valid: one argument with width and precision
+"abc%14.4g"
+# Valid: one argument with width and precision
+"abc%14.*g"
+# Valid: one argument with width and precision
+"abc%*.4g"
+# Valid: one argument with width and precision
+"abc%*.*g"
+# Invalid: unterminated
+"abc%"
+# Invalid: unterminated
+"abc%l"
+# Invalid: unterminated
+"abc%n"
+# Invalid: unterminated
+"abc%L"
+# Invalid: unknown format specifier
+"abc%y"
+# Invalid: unknown format specifier
+"abc%A"
+# Invalid: unknown format specifier
+"abc%T"
+# Invalid: %{ and %} don't match
+"abc%{%s"
+# Invalid: %{ and %} don't match
+"abc%s%}"
+# Invalid: %{ and %} don't match
+"abc%s%}%{"
+# Invalid: %( and %) don't match
+"abc%(%s"
+# Invalid: %( and %) don't match
+"abc%s%)"
+# Invalid: %( and %) don't match
+"abc%s%)%("
+# Invalid: %{%} and %(%) nesting
+"abc%{%(%}%)"
+# Invalid: %{%} and %(%) nesting
+"abc%(%{%)%}"
+# Invalid: flags after width
+"abc%*0g"
+# Valid: null precision
+"abc%.f"
+# Invalid: twice precision
+"abc%.4.2g"
+# Valid: three arguments
+"abc%d%u%u"
+# Valid only in msgstr: a numbered argument
+"abc%1$d"
+# Invalid: zero
+"abc%0$d"
+# Valid only in msgstr: two-digit numbered arguments
+"abc%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz"
+# Invalid: unterminated number
+"abc%1"
+# Invalid: flags before number
+"abc%+1$d"
+# Valid only in msgstr: three arguments, two with same number
+"abc%1$4x,%2$c,%1$u"
+# Invalid: argument with conflicting types
+"abc%1$4x,%2$c,%1$s"
+# Valid only in msgstr: no conflicting types
+"abc%2$t%1$d"
+# Invalid: argument with conflicting types
+"abc%1$t%2$d"
+# Valid only in msgstr: no conflict
+"abc%1$4x,%2$c,%1$u"
+# Invalid: mixing of numbered and unnumbered arguments
+"abc%d%2$x"
+# Valid only in msgstr: numbered argument with constant precision
+"abc%1$.9x"
+# Invalid: mixing of numbered and unnumbered arguments
+"abc%1$.*x"
+# Valid only in msgstr: missing non-final argument
+"abc%2$x%3$s"
+# Valid only in msgstr: permutation
+"abc%2$ddef%1$d"
+# Valid only in msgstr: multiple uses of same argument
+"abc%2$xdef%1$sghi%2$x"
+# Valid only in msgstr: one argument with width
+"abc%2$#*1$g"
+# Valid only in msgstr: one argument with width and precision
+"abc%3$*2$.*1$g"
+# Invalid: zero
+"abc%2$*0$.*1$g"
+EOF
+
+: ${XGETTEXT=xgettext}
+n=0
+while read comment; do
+ read string
+ n=`expr $n + 1`
+ cat <<EOF > f-oc-1-$n.in
+let a = f_ ${string}
+EOF
+ ${XGETTEXT} -L OCaml -o f-oc-1-$n.po f-oc-1-$n.in || Exit 1
+ test -f f-oc-1-$n.po || Exit 1
+ fail=
+ if echo "$comment" | grep 'Valid:' > /dev/null; then
+ if grep ocaml-format f-oc-1-$n.po > /dev/null; then
+ :
+ else
+ fail=yes
+ fi
+ else
+ if grep ocaml-format f-oc-1-$n.po > /dev/null; then
+ fail=yes
+ else
+ :
+ fi
+ fi
+ if test -n "$fail"; then
+ echo "Format string recognition error:" 1>&2
+ cat f-oc-1-$n.in 1>&2
+ echo "Got:" 1>&2
+ cat f-oc-1-$n.po 1>&2
+ Exit 1
+ fi
+ rm -f f-oc-1-$n.in f-oc-1-$n.po
+done < f-oc-1.data
+
+Exit 0
--- /dev/null
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test checking of OCaml format strings.
+
+cat <<\EOF > f-oc-2.data
+# Valid: %% doesn't count
+msgid "abc%%def"
+msgstr "xyz"
+# Invalid: invalid msgstr
+msgid "abc%%def"
+msgstr "xyz%"
+# Valid: same arguments
+msgid "abc%s%gdef"
+msgstr "xyz%s%g"
+# Valid: same arguments, with different widths
+msgid "abc%2sdef"
+msgstr "xyz%3s"
+# Valid: same arguments but in numbered syntax
+msgid "abc%d"
+msgstr "xyz%1$d"
+# Valid: same arguments but in numbered syntax
+msgid "abc%s%gdef"
+msgstr "xyz%1$s%2$g"
+# Valid: permutation
+msgid "abc%s%g%cdef"
+msgstr "xyz%3$c%2$g%1$s"
+# Invalid: too few arguments
+msgid "abc%sdef%u"
+msgstr "xyz%1$s"
+# Invalid: too few arguments
+msgid "abc%sdef%u"
+msgstr "xyz%s"
+# Invalid: too many arguments
+msgid "abc%udef"
+msgstr "xyz%uvw%c"
+# Valid: same numbered arguments, with different widths
+msgid "abc%4s%5s"
+msgstr "xyz%2$4s%1$5s"
+# Invalid: missing final argument
+msgid "abc%udef%s"
+msgstr "xyz%1$u"
+# Invalid: missing non-final argument
+msgid "abc%sdef%u"
+msgstr "xyz%2$u"
+# Invalid: added argument
+msgid "abc%udef"
+msgstr "xyz%1$uvw%2$c"
+# Valid: type compatibility
+msgid "abc%d"
+msgstr "xyz%i"
+# Valid: type compatibility
+msgid "abc%d"
+msgstr "xyz%u"
+# Valid: type compatibility
+msgid "abc%d"
+msgstr "xyz%x"
+# Valid: type compatibility
+msgid "abc%d"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid "abc%d"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid "abc%i"
+msgstr "xyz%u"
+# Valid: type compatibility
+msgid "abc%i"
+msgstr "xyz%x"
+# Valid: type compatibility
+msgid "abc%i"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid "abc%i"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid "abc%u"
+msgstr "xyz%x"
+# Valid: type compatibility
+msgid "abc%u"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid "abc%u"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid "abc%x"
+msgstr "xyz%X"
+# Valid: type compatibility
+msgid "abc%x"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid "abc%X"
+msgstr "xyz%o"
+# Valid: type compatibility
+msgid "abc%ld"
+msgstr "xyz%li"
+# Valid: type compatibility
+msgid "abc%ld"
+msgstr "xyz%lu"
+# Valid: type compatibility
+msgid "abc%ld"
+msgstr "xyz%lx"
+# Valid: type compatibility
+msgid "abc%ld"
+msgstr "xyz%lX"
+# Valid: type compatibility
+msgid "abc%ld"
+msgstr "xyz%lo"
+# Valid: type compatibility
+msgid "abc%li"
+msgstr "xyz%lu"
+# Valid: type compatibility
+msgid "abc%li"
+msgstr "xyz%lx"
+# Valid: type compatibility
+msgid "abc%li"
+msgstr "xyz%lX"
+# Valid: type compatibility
+msgid "abc%li"
+msgstr "xyz%lo"
+# Valid: type compatibility
+msgid "abc%lu"
+msgstr "xyz%lx"
+# Valid: type compatibility
+msgid "abc%lu"
+msgstr "xyz%lX"
+# Valid: type compatibility
+msgid "abc%lu"
+msgstr "xyz%lo"
+# Valid: type compatibility
+msgid "abc%lx"
+msgstr "xyz%lX"
+# Valid: type compatibility
+msgid "abc%lx"
+msgstr "xyz%lo"
+# Valid: type compatibility
+msgid "abc%lX"
+msgstr "xyz%lo"
+# Valid: type compatibility
+msgid "abc%nd"
+msgstr "xyz%ni"
+# Valid: type compatibility
+msgid "abc%nd"
+msgstr "xyz%nu"
+# Valid: type compatibility
+msgid "abc%nd"
+msgstr "xyz%nx"
+# Valid: type compatibility
+msgid "abc%nd"
+msgstr "xyz%nX"
+# Valid: type compatibility
+msgid "abc%nd"
+msgstr "xyz%no"
+# Valid: type compatibility
+msgid "abc%ni"
+msgstr "xyz%nu"
+# Valid: type compatibility
+msgid "abc%ni"
+msgstr "xyz%nx"
+# Valid: type compatibility
+msgid "abc%ni"
+msgstr "xyz%nX"
+# Valid: type compatibility
+msgid "abc%ni"
+msgstr "xyz%no"
+# Valid: type compatibility
+msgid "abc%nu"
+msgstr "xyz%nx"
+# Valid: type compatibility
+msgid "abc%nu"
+msgstr "xyz%nX"
+# Valid: type compatibility
+msgid "abc%nu"
+msgstr "xyz%no"
+# Valid: type compatibility
+msgid "abc%nx"
+msgstr "xyz%nX"
+# Valid: type compatibility
+msgid "abc%nx"
+msgstr "xyz%no"
+# Valid: type compatibility
+msgid "abc%nX"
+msgstr "xyz%no"
+# Valid: type compatibility
+msgid "abc%Ld"
+msgstr "xyz%Li"
+# Valid: type compatibility
+msgid "abc%Ld"
+msgstr "xyz%Lu"
+# Valid: type compatibility
+msgid "abc%Ld"
+msgstr "xyz%Lx"
+# Valid: type compatibility
+msgid "abc%Ld"
+msgstr "xyz%LX"
+# Valid: type compatibility
+msgid "abc%Ld"
+msgstr "xyz%Lo"
+# Valid: type compatibility
+msgid "abc%Li"
+msgstr "xyz%Lu"
+# Valid: type compatibility
+msgid "abc%Li"
+msgstr "xyz%Lx"
+# Valid: type compatibility
+msgid "abc%Li"
+msgstr "xyz%LX"
+# Valid: type compatibility
+msgid "abc%Li"
+msgstr "xyz%Lo"
+# Valid: type compatibility
+msgid "abc%Lu"
+msgstr "xyz%Lx"
+# Valid: type compatibility
+msgid "abc%Lu"
+msgstr "xyz%LX"
+# Valid: type compatibility
+msgid "abc%Lu"
+msgstr "xyz%Lo"
+# Valid: type compatibility
+msgid "abc%Lx"
+msgstr "xyz%LX"
+# Valid: type compatibility
+msgid "abc%Lx"
+msgstr "xyz%Lo"
+# Valid: type compatibility
+msgid "abc%LX"
+msgstr "xyz%Lo"
+# Valid: type compatibility
+msgid "abc%f"
+msgstr "xyz%e"
+# Valid: type compatibility
+msgid "abc%f"
+msgstr "xyz%E"
+# Valid: type compatibility
+msgid "abc%f"
+msgstr "xyz%g"
+# Valid: type compatibility
+msgid "abc%f"
+msgstr "xyz%G"
+# Valid: type compatibility
+msgid "abc%f"
+msgstr "xyz%h"
+# Valid: type compatibility
+msgid "abc%f"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid "abc%e"
+msgstr "xyz%E"
+# Valid: type compatibility
+msgid "abc%e"
+msgstr "xyz%g"
+# Valid: type compatibility
+msgid "abc%e"
+msgstr "xyz%G"
+# Valid: type compatibility
+msgid "abc%e"
+msgstr "xyz%h"
+# Valid: type compatibility
+msgid "abc%e"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid "abc%E"
+msgstr "xyz%g"
+# Valid: type compatibility
+msgid "abc%E"
+msgstr "xyz%G"
+# Valid: type compatibility
+msgid "abc%E"
+msgstr "xyz%h"
+# Valid: type compatibility
+msgid "abc%E"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid "abc%g"
+msgstr "xyz%G"
+# Valid: type compatibility
+msgid "abc%g"
+msgstr "xyz%h"
+# Valid: type compatibility
+msgid "abc%g"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid "abc%G"
+msgstr "xyz%h"
+# Valid: type compatibility
+msgid "abc%G"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid "abc%h"
+msgstr "xyz%H"
+# Valid: type compatibility
+msgid "abc%{%s%}"
+msgstr "xyz%{%S%}"
+# Valid: type compatibility
+msgid "abc%{%S%}"
+msgstr "xyz%{%s%}"
+# Valid: type compatibility
+msgid "abc%{%c%}"
+msgstr "xyz%{%C%}"
+# Valid: type compatibility
+msgid "abc%{%C%}"
+msgstr "xyz%{%c%}"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%ld"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%nd"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%Ld"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%d"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%nd"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%Ld"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%ld"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%Ld"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%nd"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%s"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%Ld"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%S"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%s"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%S"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid "abc%S"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid "abc%S"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid "abc%S"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid "abc%S"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid "abc%S"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%S"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%S"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%S"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%C"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%c"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%C"
+msgstr "xyz%f"
+# Invalid: type incompatibility
+msgid "abc%C"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid "abc%C"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid "abc%C"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%C"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%C"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%C"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%f"
+msgstr "xyz%F"
+# Invalid: type incompatibility
+msgid "abc%f"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid "abc%f"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%f"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%f"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%f"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%F"
+msgstr "xyz%B"
+# Invalid: type incompatibility
+msgid "abc%F"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%F"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%F"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%F"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%B"
+msgstr "xyz%a"
+# Invalid: type incompatibility
+msgid "abc%B"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%B"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%B"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%a"
+msgstr "xyz%t"
+# Invalid: type incompatibility
+msgid "abc%a"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%a"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid "abc%t"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility
+msgid "abc%t"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility of format strings
+msgid "abc%(hhh%s%i%)def"
+msgstr "xyz%(hhh%s%)%i"
+# Invalid: too many arguments
+msgid "abc%{%s%}"
+msgstr "xyz%(%s%)"
+# Invalid: too few arguments
+msgid "abc%(%s%)"
+msgstr "xyz%{%s%}"
+# Invalid: type incompatibility for width
+msgid "abc%g%*g"
+msgstr "xyz%*g%g"
+# Invalid: zero
+msgid "abc"
+msgstr "xyz%0$d"
+# Valid: two-digit numbered arguments
+msgid "abc%d%d%d%d%d%d%d%d%d%d%d"
+msgstr "xyz%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz"
+# Invalid: unterminated number
+msgid "abc"
+msgstr "xyz%1"
+# Invalid: flags before number
+msgid "abc%d"
+msgstr "xyz%+1$d"
+# Valid: three arguments, two with same number
+msgid "abc%d%c"
+msgstr "xyz%1$4x,%2$c,%1$u"
+# Invalid: argument with conflicting types
+msgid "abc%d%c"
+msgstr "xyz%1$4x,%2$c,%1$s"
+# Valid: no conflicting types
+msgid "abc%d%t"
+msgstr "xyz%2$t%1$d"
+# Invalid: argument with conflicting types
+msgid "abc%t%d"
+msgstr "xyz%1$t%2$d"
+# Valid: no conflict
+msgid "abc%u%c%a"
+msgstr "xyz%1$4x,%2$c,%1$u,%3$a"
+# Invalid: mixing of numbered and unnumbered arguments
+msgid "abc%d%x"
+msgstr "xyz%d%2$x"
+# Valid: numbered argument with constant precision
+msgid "abc%.9x"
+msgstr "xyz%1$.9x"
+# Invalid: mixing of numbered and unnumbered arguments
+msgid "abc%d%d"
+msgstr "xyz%1$.*x"
+# Invalid: missing non-final argument
+msgid "abc%d%x%s"
+msgstr "xyz%2$x%3$s"
+# Valid: permutation
+msgid "abc%d%d"
+msgstr "xyz%2$ddef%1$d"
+# Valid: multiple uses of same argument
+msgid "abc%s%x"
+msgstr "xyz%2$xdef%1$sghi%2$x"
+# Valid: one argument with width
+msgid "abc%i%g"
+msgstr "xyz%2$#*1$g"
+# Valid: one argument with width and precision
+msgid "abc%i%i%g"
+msgstr "xyz%3$*2$.*1$g"
+# Invalid: zero
+msgid "abc%i%g"
+msgstr "xyz%2$*0$.*1$g"
+EOF
+
+: ${MSGFMT=msgfmt}
+n=0
+while read comment; do
+ read msgid_line
+ read msgstr_line
+ n=`expr $n + 1`
+ cat <<EOF > f-oc-2-$n.po
+#, ocaml-format
+${msgid_line}
+${msgstr_line}
+EOF
+ fail=
+ if echo "$comment" | grep 'Valid:' > /dev/null; then
+ if ${MSGFMT} --check-format -o f-oc-2-$n.mo f-oc-2-$n.po; then
+ :
+ else
+ fail=yes
+ fi
+ else
+ ${MSGFMT} --check-format -o f-oc-2-$n.mo f-oc-2-$n.po 2> /dev/null
+ if test $? = 1; then
+ :
+ else
+ fail=yes
+ fi
+ fi
+ if test -n "$fail"; then
+ echo "Format string checking error:" 1>&2
+ cat f-oc-2-$n.po 1>&2
+ Exit 1
+ fi
+ rm -f f-oc-2-$n.po f-oc-2-$n.mo
+done < f-oc-2.data
+
+Exit 0
--- /dev/null
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test OCaml support: Simple things.
+
+cat <<\EOF > xg-oc-1.ml
+ (* A one-line comment. *)
+ let test1 = s_ "Test String 1" in
+ (* Two one-line *)
+ (* comments *)
+ let test2 = (s_ "Test String 2") in
+ (* A multi-line
+ comment. *)
+ let test3 = s_("Test String 3") in
+ (* A (* nesting *) (* one-line *) comment. *)
+ let test4 = s_ "Test String 4" in
+ (* A (*
+ nesting
+ *)
+ (*
+ multi-line *)
+ comment. *)
+ let test5 = s_ "Test String 5" in
+ (*
+ s_("Not extracted");
+ *)
+ let dummy = 0 in
+ (* OCaml has string literal concatenation. *)
+ let test6 = s_("Test " ^
+ "String "
+ ^ "6") in
+ (* Empty string. *)
+ let test7 = s_ "" in
+
+ (* sprintf expects a format string. *)
+ sprintf (f_ "weight %u") w
+
+ print_string(s_ "Test string 11 %s")
+ let n = 2 in
+ let s = "2" in
+ print_string(sprintf(fn_ "%0s piece of cake" "%s pieces of cake" n) s)
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -c -d xg-oc-1.tmp xg-oc-1.ml || Exit 1
+LC_ALL=C tr -d '\r' < xg-oc-1.tmp.po > xg-oc-1.po || Exit 1
+
+cat <<\EOF > xg-oc-1.ok
+#. A one-line comment.
+msgid "Test String 1"
+msgstr ""
+
+#. Two one-line
+#. comments
+msgid "Test String 2"
+msgstr ""
+
+#. A multi-line
+#. comment.
+msgid "Test String 3"
+msgstr ""
+
+#. A (* nesting *) (* one-line *) comment.
+msgid "Test String 4"
+msgstr ""
+
+#. A (*
+#. nesting
+#. *)
+#. (*
+#. multi-line *)
+#. comment.
+msgid "Test String 5"
+msgstr ""
+
+#. OCaml has string literal concatenation.
+msgid "Test String 6"
+msgstr ""
+
+#. Empty string.
+msgid ""
+msgstr ""
+
+#. sprintf expects a format string.
+#, ocaml-format
+msgid "weight %u"
+msgstr ""
+
+msgid "Test string 11 %s"
+msgstr ""
+
+#, ocaml-format
+msgid "%0s piece of cake"
+msgid_plural "%s pieces of cake"
+msgstr[0] ""
+msgstr[1] ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-oc-1.ok xg-oc-1.po || Exit 1
+
+exit 0
--- /dev/null
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test OCaml support: String literals.
+
+cat <<\EOF > xg-oc-2.ml
+let s1 = s_ {abracadabra|Test string 1 abc\ndef\tghi\\jkl\
+ mno\u{20AC}pqr|xyz|abracadabra} in
+let s2 = s_ "Test string 2 abc\ndef\tghi\\jkl\
+ mno\u{20AC}pqr\ stu\'vwx" in
+let s3 = s_ ("Test string 3" ^ " abc" ^ "def") in
+let s4 = s_ "Test string 4 \0377\064\x404\o1006\u{1F603}" in
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --no-location -d xg-oc-2.tmp xg-oc-2.ml || Exit 1
+func_filter_POT_Creation_Date xg-oc-2.tmp.po xg-oc-2.pot
+
+cat <<\EOF > xg-oc-2.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid ""
+"Test string 1 abc\\ndef\\tghi\\\\jkl\\\n"
+" mno\\u{20AC}pqr|xyz"
+msgstr ""
+
+msgid ""
+"Test string 2 abc\n"
+"def\tghi\\jklmno€pqr stu'vwx"
+msgstr ""
+
+msgid "Test string 3 abcdef"
+msgstr ""
+
+msgid "Test string 4 %7@@4@6😃"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-oc-2.ok xg-oc-2.pot || Exit 1
+
+exit 0