D support: Add D support in the tools.

author Bruno Haible <bruno@clisp.org>

Tue, 1 Apr 2025 09:51:08 +0000 (11:51 +0200)

committer Bruno Haible <bruno@clisp.org>

Tue, 1 Apr 2025 11:01:05 +0000 (13:01 +0200)
author Bruno Haible <bruno@clisp.org>
Tue, 1 Apr 2025 09:51:08 +0000 (11:51 +0200)
committer Bruno Haible <bruno@clisp.org>
Tue, 1 Apr 2025 11:01:05 +0000 (13:01 +0200)
diff --git a/NEWS b/NEWS

index 8464209280d605429892d62628e3f49c9cda37d4..9967b7e4100f7055a27b7ec19c63955757506356 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,9 @@ Version 0.25 - April 2025
    * D:
      - A new library libintl_d.a contains the runtime for using GNU gettext
        message catalogs in the D programming language.
+    - xgettext now supports D.
+    - 'msgfmt -c' now verifies the syntax of translations of D format
+      strings.
  
  Version 0.24 - February 2025
  
diff --git a/autogen.sh b/autogen.sh

index 3a3c3a1f012ae2c90c1683bd3ad3ede009599089..a77372f48962eea1731ac49e0c76d14d69ef8a63 100755 (executable)
--- a/autogen.sh
+++ b/autogen.sh
@@ -275,7 +275,11 @@ if ! $skip_gnulib; then
      unistr/u8-mbtouc
      unistr/u8-mbtoucr
      unistr/u8-uctomb
+    unistr/u16-check
+    unistr/u16-to-u8
      unistr/u16-mbtouc
+    unistr/u32-check
+    unistr/u32-to-u8
      uniwidth/width
      unlocked-io
      unsetenv
diff --git a/autopull.sh b/autopull.sh

index aea517eee977da13939dddf8954e18206e21ad1e..d3629082559e4c759399b9a07b5ccb39f9cd919b 100755 (executable)
--- a/autopull.sh
+++ b/autopull.sh
@@ -89,6 +89,7 @@ TREE_SITTER_VERSION=0.23.2
  TREE_SITTER_GO_VERSION=0.23.4
  TREE_SITTER_RUST_VERSION=0.23.2
  TREE_SITTER_TYPESCRIPT_VERSION=0.23.2
+TREE_SITTER_D_VERSION=0.8.2
  # Cache the relevant source code. Erase the rest of the tree-sitter projects.
  test -d gettext-tools/tree-sitter-$TREE_SITTER_VERSION || {
    func_git_clone_shallow tree-sitter https://github.com/tree-sitter/tree-sitter.git v$TREE_SITTER_VERSION
@@ -134,11 +135,22 @@ test -d gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION ||
    mv gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/scanner.c gettext-tools/tree-sitter-typescript-$TREE_SITTER_TYPESCRIPT_VERSION/tsx/src/tsx-scanner.c
    rm -rf tree-sitter-typescript
  }
+test -d gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION || {
+  func_git_clone_shallow tree-sitter-d https://github.com/gdamore/tree-sitter-d.git v$TREE_SITTER_D_VERSION
+  (cd tree-sitter-d && patch -p1) < gettext-tools/build-aux/tree-sitter-d-portability.diff
+  mkdir gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION
+  mv tree-sitter-d/LICENSE.txt gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION/LICENSE
+  mv tree-sitter-d/src gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION/src
+  mv gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION/src/parser.c gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION/src/d-parser.c
+  mv gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION/src/scanner.c gettext-tools/tree-sitter-d-$TREE_SITTER_D_VERSION/src/d-scanner.c
+  rm -rf tree-sitter-d
+}
  cat > gettext-tools/tree-sitter.cfg <<EOF
  TREE_SITTER_VERSION=$TREE_SITTER_VERSION
  TREE_SITTER_GO_VERSION=$TREE_SITTER_GO_VERSION
  TREE_SITTER_RUST_VERSION=$TREE_SITTER_RUST_VERSION
  TREE_SITTER_TYPESCRIPT_VERSION=$TREE_SITTER_TYPESCRIPT_VERSION
+TREE_SITTER_D_VERSION=$TREE_SITTER_D_VERSION
  EOF
  
  dir0=`pwd`
diff --git a/gettext-tools/Makefile.am b/gettext-tools/Makefile.am

index 55308d4f8ffeb8a449e33f01ee059e76cbd31889..c12ca968b1386967fcb11bd2ead773074bdc96d5 100644 (file)
--- a/gettext-tools/Makefile.am
+++ b/gettext-tools/Makefile.am
@@ -99,7 +99,14 @@ EXTRA_DIST += \
    tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tsx-scanner.c \
    tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/alloc.h \
    tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/array.h \
-  tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/parser.h
+  tree-sitter-typescript-$(TREE_SITTER_TYPESCRIPT_VERSION)/tsx/src/tree_sitter/parser.h \
+  build-aux/tree-sitter-d-portability.diff \
+  tree-sitter-d-$(TREE_SITTER_D_VERSION)/LICENSE \
+  tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/d-parser.c \
+  tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/d-scanner.c \
+  tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/tree_sitter/alloc.h \
+  tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/tree_sitter/array.h \
+  tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/tree_sitter/parser.h
  
  # Files installed for the user.
  
diff --git a/gettext-tools/build-aux/tree-sitter-d-portability.diff b/gettext-tools/build-aux/tree-sitter-d-portability.diff

new file mode 100644 (file)

index 0000000..161ab0c
--- /dev/null
+++ b/gettext-tools/build-aux/tree-sitter-d-portability.diff
@@ -0,0 +1,32 @@
+diff --git a/src/parser.c b/src/parser.c
+index 7ce433c..702c5be 100644
+--- a/src/parser.c
++++ b/src/parser.c
+@@ -673670,8 +673670,10 @@ void tree_sitter_d_external_scanner_deserialize(void *, const char *, unsigned);
+ #define TS_PUBLIC
+ #elif defined(_WIN32)
+ #define TS_PUBLIC __declspec(dllexport)
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define TS_PUBLIC __attribute__((visibility("default")))
++#else
++#define TS_PUBLIC
+ #endif
+ 
+ TS_PUBLIC const TSLanguage *tree_sitter_d(void) {
+diff --git a/src/tree_sitter/parser.h b/src/tree_sitter/parser.h
+index 799f599..130b4d0 100644
+--- a/src/tree_sitter/parser.h
++++ b/src/tree_sitter/parser.h
+@@ -155,8 +155,10 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
+ 
+ #ifdef _MSC_VER
+ #define UNUSED __pragma(warning(suppress : 4101))
+-#else
++#elif defined __GNUC__ || defined __clang__
+ #define UNUSED __attribute__((unused))
++#else
++#define UNUSED
+ #endif
+ 
+ #define START_LEXER()           \
diff --git a/gettext-tools/configure.ac b/gettext-tools/configure.ac

index 71f797792e18fbe7afb9b47dabd1180af892cfcb..a7c51d40f6d6486f4fe269947c23e6839118522c 100644 (file)
--- a/gettext-tools/configure.ac
+++ b/gettext-tools/configure.ac
@@ -575,6 +575,7 @@ AC_SUBST([TREE_SITTER_VERSION])
  AC_SUBST([TREE_SITTER_GO_VERSION])
  AC_SUBST([TREE_SITTER_RUST_VERSION])
  AC_SUBST([TREE_SITTER_TYPESCRIPT_VERSION])
+AC_SUBST([TREE_SITTER_D_VERSION])
  
  PACKAGE_SUFFIX="-$ARCHIVE_VERSION"
  AC_SUBST([PACKAGE_SUFFIX])
diff --git a/gettext-tools/doc/Makefile.am b/gettext-tools/doc/Makefile.am

index 1dfb894573ba813548da745148d4bbbf96ebf9a0..1b26b832e92f2eb2ee11eab90f7b3ad26c3e8fb7 100644 (file)
--- a/gettext-tools/doc/Makefile.am
+++ b/gettext-tools/doc/Makefile.am
@@ -81,6 +81,7 @@ gettext_TEXINFOS = \
    lang-gawk.texi \
    lang-lua.texi \
    lang-pascal.texi \
+  lang-d.texi \
    lang-smalltalk.texi \
    lang-vala.texi \
    lang-wxwidgets.texi \
diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi

index e90c6c95b3f9c179fe27e963ff0ae56c22646abd..85aa8c9f2c5dfe5997c9cd1bb8f005db82d58bc9 100644 (file)
--- a/gettext-tools/doc/gettext.texi
+++ b/gettext-tools/doc/gettext.texi
@@ -412,6 +412,7 @@ The Translator's View
  * awk-format::                  awk Format Strings
  * lua-format::                  Lua Format Strings
  * object-pascal-format::        Object Pascal Format Strings
+* d-format::                    D Format Strings
  * smalltalk-format::            Smalltalk Format Strings
  * qt-format::                   Qt Format Strings
  * qt-plural-format::            Qt Plural Format Strings
@@ -1750,6 +1751,12 @@ Likewise for Lua, see @ref{lua-format}.
  @kwindex no-object-pascal-format@r{ flag}
  Likewise for Object Pascal, see @ref{object-pascal-format}.
  
+@item d-format
+@kwindex d-format@r{ flag}
+@itemx no-d-format
+@kwindex no-d-format@r{ flag}
+Likewise for D, see @ref{d-format}.
+
  @item smalltalk-format
  @kwindex smalltalk-format@r{ flag}
  @itemx no-smalltalk-format
@@ -2329,6 +2336,7 @@ at runtime (or possibly at compile time, if the compiler supports that).
  @cindex Shell, string concatenation
  @cindex awk, string concatenation
  @cindex Lua, string concatenation
+@cindex D, string concatenation
  @cindex Smalltalk, string concatenation
  @cindex Vala, string concatenation
  @cindex Perl, string concatenation
@@ -2368,6 +2376,9 @@ In awk, string concatenation is denoted by mere juxtaposition of strings.
  In Lua, string concatenation is denoted by the @samp{..} operator.
  @c Reference: https://www.lua.org/pil/3.4.html
  @item
+In D, string concatenation is denoted by the @samp{~} operator.
+@c Reference: https://dlang.org/spec/expression.html#cat_expressions
+@item
  In Smalltalk, string concatenation is denoted by the @samp{,} operator.
  @c Reference: https://rmod-files.lille.inria.fr/FreeBooks/ByExample/14%20-%20Chapter%2012%20-%20Strings.pdf
  @item
@@ -2426,6 +2437,7 @@ but no formatting function is called.
  @cindex TypeScript, strings with embedded expressions
  @cindex Ruby, strings with embedded expressions
  @cindex Shell, strings with embedded expressions
+@cindex D, strings with embedded expressions
  @cindex Tcl, strings with embedded expressions
  @cindex Perl, strings with embedded expressions
  @cindex PHP, strings with embedded expressions
@@ -2459,6 +2471,10 @@ references to variables, along with default values and string operations.
  Such as @code{"Hello, $name!"} or @code{"Hello, $@{name@}!"}.
  @c Reference: https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_03
  @item
+In D, @emph{interpolation expression sequences} can contain expressions.
+Such as @code{i"Hello, $(name)!"}.
+@c Reference: https://dlang.org/spec/istring.html
+@item
  In Tcl, strings are subject to @emph{variable substitution}.
  Such as @code{"Hello, $name!"}.
  @c Reference: https://wiki.tcl-lang.org/page/Dodekalogue
@@ -9928,6 +9944,7 @@ strings.
  * awk-format::                  awk Format Strings
  * lua-format::                  Lua Format Strings
  * object-pascal-format::        Object Pascal Format Strings
+* d-format::                    D Format Strings
  * smalltalk-format::            Smalltalk Format Strings
  * qt-format::                   Qt Format Strings
  * qt-plural-format::            Qt Plural Format Strings
@@ -10221,6 +10238,13 @@ Object Pascal format strings are described in the documentation of the
  Free Pascal runtime library, section Format,
  @uref{https://www.freepascal.org/docs-html/rtl/sysutils/format.html}.
  
+@node d-format
+@subsection D Format Strings
+
+D format strings are described
+in the documentation of the D module @code{std.format},
+at @uref{https://dlang.org/library/std/format.html}.
+
  @node smalltalk-format
  @subsection Smalltalk Format Strings
  
@@ -10440,6 +10464,7 @@ that language, and to combine the resulting files using @code{msgcat}.
  * gawk::                        GNU awk
  * Lua::                         Lua
  * Pascal::                      Pascal - Free Pascal Compiler
+* D::                           D
  * Smalltalk::                   GNU Smalltalk
  * Vala::                        Vala
  * wxWidgets::                   wxWidgets library
@@ -10470,6 +10495,7 @@ that language, and to combine the resulting files using @code{msgcat}.
  @include lang-gawk.texi
  @include lang-lua.texi
  @include lang-pascal.texi
+@include lang-d.texi
  @include lang-smalltalk.texi
  @include lang-vala.texi
  @include lang-wxwidgets.texi
diff --git a/gettext-tools/doc/lang-d.texi b/gettext-tools/doc/lang-d.texi

new file mode 100644 (file)

index 0000000..9c3b497
--- /dev/null
+++ b/gettext-tools/doc/lang-d.texi
@@ -0,0 +1,72 @@
+@c This file is part of the GNU gettext manual.
+@c Copyright (C) 1995-2025 Free Software Foundation, Inc.
+@c See the file gettext.texi for copying conditions.
+
+@node D
+@subsection D
+@cindex D
+
+@table @asis
+@item RPMs
+gcc-gdc or ldc
+
+@item Ubuntu packages
+gdc or ldc
+
+@item File extension
+@code{d}
+
+@item String syntax
+@c https://dlang.org/spec/lex.html#string_literals
+@code{r"abc"}, @code{`abc`}, @code{"abc"},
+@code{q"[abc]"}, @code{q"(abc)"}, @code{q"<abc>"}, @code{q"@{abc@}"},
+@code{q@{abc@}}, @code{x"6A 6B 6C"}
+
+@item gettext shorthand
+@code{_("abc")}
+
+@item gettext/ngettext functions
+@code{gettext}, @code{dgettext}, @code{dcgettext},
+@code{ngettext}, @code{dngettext}, @code{dcngettext}
+
+Note that the @code{ngettext}-like functions need to take
+two argument strings that consume the same number of arguments.
+For example, you cannot write
+@code{format(ngettext("a piece", "%d pieces", n), n)}
+because in the singular case,
+@code{format} would treat the unused argument as an error and
+throw an exception.
+As a workaround, you need to convert @code{n} to a string and
+format that string with precision zero:
+@code{format(ngettext("%.0sa piece", "%s pieces", n), to!string(n))}
+or
+@code{format(ngettext("%.0sa piece", "%s pieces", n), text(n))}
+
+@item textdomain
+@code{textdomain} function
+
+@item bindtextdomain
+@code{bindtextdomain} function
+
+@item setlocale
+Programmer must call @code{setlocale (LC_ALL, "")}
+
+@item Prerequisite
+@code{import gnu.libintl;}
+@*@code{alias _ = gettext;}
+
+@item Use or emulate GNU gettext
+Use
+
+@item Extractor
+@code{xgettext -k_ --flag=_:1:pass-c-format --flag=_:1:pass-d-format}
+
+@item Formatting with positions
+@code{fprintf "%2$d %1$d"}, @code{format "%2$d %1$d"}
+
+@item Portability
+fully portable
+
+@item po-mode marking
+---
+@end table
diff --git a/gettext-tools/doc/xgettext.texi b/gettext-tools/doc/xgettext.texi

index 1928c28ef1688cf2009341162e5c06ae77c091c7..7e877f678031a2fc0d03c0e59c0a54e8e583846f 100644 (file)
--- a/gettext-tools/doc/xgettext.texi
+++ b/gettext-tools/doc/xgettext.texi
@@ -90,6 +90,7 @@ Specifies the language of the input files.  The supported languages are
  @code{Shell},
  @code{awk},
  @code{Lua},
+@code{D},
  @code{Smalltalk},
  @code{Vala},
  @code{Tcl},
@@ -262,6 +263,7 @@ Go,
  Shell,
  awk,
  Lua,
+D,
  Vala,
  Tcl,
  Perl,
@@ -325,6 +327,7 @@ Go,
  Shell,
  awk,
  Lua,
+D,
  Vala,
  Tcl,
  Perl,
@@ -404,6 +407,10 @@ For Lua: @code{_}, @code{gettext.gettext}, @code{gettext.dgettext:2},
  @code{gettext.dcgettext:2}, @code{gettext.ngettext:1,2},
  @code{gettext.dngettext:2,3}, @code{gettext.dcngettext:2,3}.
  
+@item
+For D: @code{gettext}, @code{dgettext:2}, @code{dcgettext:2},
+@code{ngettext:1,2}, @code{dngettext:2,3}, @code{dcngettext:2,3}.
+
  @item
  For JavaScript, TypeScript, TSX:
  @code{_}, @code{gettext}, @code{dgettext:2},
@@ -484,6 +491,7 @@ Go,
  Shell,
  awk,
  Lua,
+D,
  Vala,
  Tcl,
  Perl,
diff --git a/gettext-tools/libgettextpo/Makefile.am b/gettext-tools/libgettextpo/Makefile.am

index 25731ecd02bd47071d7ad6d10fbc41df9d89d3f9..1a85edc02a65b1248c25c3e31cc540a4fb0373d6 100644 (file)
--- a/gettext-tools/libgettextpo/Makefile.am
+++ b/gettext-tools/libgettextpo/Makefile.am
@@ -86,6 +86,7 @@ libgettextpo_la_AUXSOURCES = \
    ../src/format-awk.c \
    ../src/format-lua.c \
    ../src/format-pascal.c \
+  ../src/format-d.c \
    ../src/format-smalltalk.c \
    ../src/format-qt.c \
    ../src/format-qt-plural.c \
diff --git a/gettext-tools/po/POTFILES.in b/gettext-tools/po/POTFILES.in

index 7422d5eade28e09187100cdeb30091a0bb5b31c9..bc9925313985183e8c9a768b51b2a96ad0999514 100644 (file)
--- a/gettext-tools/po/POTFILES.in
+++ b/gettext-tools/po/POTFILES.in
@@ -15,6 +15,7 @@ src/format-c++-brace.c
  src/format-c.c
  src/format-c-parse.h
  src/format-csharp.c
+src/format-d.c
  src/format-elisp.c
  src/format-gcc-internal.c
  src/format-gfc-internal.c
@@ -91,6 +92,7 @@ src/write-xml.c
  src/x-awk.c
  src/x-c.c
  src/x-csharp.c
+src/x-d.c
  src/x-desktop.c
  src/x-elisp.c
  src/xerror-handler.c
diff --git a/gettext-tools/src/FILES b/gettext-tools/src/FILES

index 053e9fc4162e05b7482920934bff33d1675a5005..b32a76da2f54c991aa9e5746aad45ea981ba660e 100644 (file)
--- a/gettext-tools/src/FILES
+++ b/gettext-tools/src/FILES
@@ -243,6 +243,7 @@ format-sh.c            Format string handling for Shell.
  format-awk.c           Format string handling for awk.
  format-lua.c           Format string handling for Lua.
  format-pascal.c        Format string handling for Object Pascal.
+format-d.c             Format string handling for D.
  format-smalltalk.c     Format string handling for Smalltalk and YCP.
  format-qt.c            Format string handling for Qt.
  format-qt-plural.c     Format string handling for Qt plural forms.
@@ -412,6 +413,10 @@ msgl-check.c
  | x-lua.h
  | x-lua.c
  |               String extractor for Lua.
+| x-d.h
+| x-d.c
+| html5-entities.h
+|               String extractor for D.
  | x-smalltalk.h
  | x-smalltalk.c
  |               String extractor for Smalltalk.
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am

index 51c9efd52bc492e5bd93195f1e335ae0056ac9a3..dc7b4e18c88f16c396cbd66a89b7b1bfe6d426de 100644 (file)
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -89,6 +89,7 @@ noinst_HEADERS = \
    x-sh.h \
    x-awk.h \
    x-lua.h \
+  x-d.h html5-entities.h \
    x-smalltalk.h \
    x-vala.h \
    x-tcl.h \
@@ -204,6 +205,7 @@ FORMAT_SOURCE += \
    format-awk.c \
    format-lua.c \
    format-pascal.c \
+  format-d.c \
    format-smalltalk.c \
    format-qt.c \
    format-qt-plural.c \
@@ -252,7 +254,9 @@ libxgettextts1_a_CPPFLAGS = \
  libxgettextts2_a_SOURCES = \
    ../tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/rust-parser.c \
    ../tree-sitter-rust-$(TREE_SITTER_RUST_VERSION)/src/rust-scanner.c \
-  ../tree-sitter-go-$(TREE_SITTER_GO_VERSION)/src/go-parser.c
+  ../tree-sitter-go-$(TREE_SITTER_GO_VERSION)/src/go-parser.c \
+  ../tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/d-parser.c \
+  ../tree-sitter-d-$(TREE_SITTER_D_VERSION)/src/d-scanner.c
  libxgettextts2_a_CPPFLAGS = \
    -I$(top_srcdir)/tree-sitter-$(TREE_SITTER_VERSION)/lib/include
  libxgettextts3_a_SOURCES = \
@@ -321,6 +325,7 @@ xgettext_SOURCES += \
    x-sh.c ../../gettext-runtime/src/escapes.h \
    x-awk.c \
    x-lua.c \
+  x-d.c \
    x-smalltalk.c \
    x-vala.c \
    x-tcl.c \
diff --git a/gettext-tools/src/format-d.c b/gettext-tools/src/format-d.c

new file mode 100644 (file)

index 0000000..da97724
--- /dev/null
+++ b/gettext-tools/src/format-d.c
@@ -0,0 +1,2422 @@
+/* D format strings.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2025.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "format.h"
+#include "attribute.h"
+#include "c-ctype.h"
+#include "gcd.h"
+#include "xalloc.h"
+#include "xvasprintf.h"
+#include "format-invalid.h"
+#include "minmax.h"
+#include "gettext.h"
+
+#define _(str) gettext (str)
+
+
+/* Assertion macro.  Could be defined to empty for speed.  */
+#define ASSERT(expr) if (!(expr)) abort ();
+
+
+/* D format strings are described in the description of the std.format module
+   <https://dlang.org/library/std/format.html> and implemented in
+   gcc-14.2.0/libphobos/src/std/format/spec.d
+   gcc-14.2.0/libphobos/src/std/format/write.d
+   gcc-14.2.0/libphobos/src/std/format/internal/write.d .
+
+   A format string consists of literal text (that is output verbatim), doubled
+   percent-signs ('%%', that lead to a single percent-sign when output), and
+   directives.
+   A directive
+   - starts with '%',
+   - is optionally followed by
+       a positive integer m, then '$', or
+       a positive integer m, then ':', then a positive integer m₂ ≥ m, then '$',
+       a positive integer m, then ':', then '$', or
+   - is optionally followed by a sequence of flags, each being one of
+       '+', '-', ' ', '0', '#', '=',
+   - is optionally followed by a width specification:
+       a positive integer, or
+       '*', or
+       '*', then a positive integer, then '$',
+   - is optionally followed by a precision specification:
+       '.' then optionally:
+         a positive integer, or
+         '*', or
+         '*', then a positive integer, then '$',
+   - is optionally followed by a separator specification:
+       ',' then optionally:
+         a positive integer, or
+         '*',
+       then optionally a '?',
+   - is followed by
+       either a format specifier
+       or a compound specifier:
+         - a '(',
+         - a format string that eats 1 or 2 arguments,
+         - optionally '%|' then literal text, possibly with doubled
+           percent-signs,
+         - '%)'.
+ */
+
+/* Data structure describing format string derived constraints for an
+   argument list.  It is a recursive list structure.  Structure sharing
+   is not allowed.  */
+
+enum format_cdr_type
+{
+  FCT_REQUIRED, /* The format argument list cannot end before this argument.  */
+  FCT_OPTIONAL  /* The format argument list may end before this argument.  */
+};
+
+enum format_arg_type
+{
+  FAT_NONE           = 0,
+  FAT_BOOL           = 1 << 0,
+  FAT_INTEGER        = 1 << 1,
+  FAT_FLOATINGPOINT  = 1 << 2,
+  FAT_CHAR           = 1 << 3,
+  FAT_ARRAY          = 1 << 4, /* string or array */
+  FAT_ASSOCIATIVE    = 1 << 5,
+  FAT_IRANGE         = 1 << 6, /* irange or simd */
+  FAT_STRUCT         = 1 << 7, /* struct or class or union */
+  FAT_POINTER        = 1 << 8, /* pointer or null */
+  /* Note: enum are not listed here, since enum values can be formatted with
+     any specifier available for their base type.  */
+  FAT_ANY_TYPE       = (FAT_BOOL | FAT_INTEGER | FAT_FLOATINGPOINT | FAT_CHAR
+                        | FAT_ARRAY | FAT_ASSOCIATIVE | FAT_IRANGE | FAT_STRUCT
+                        | FAT_POINTER),
+  /* A flag: */
+  FAT_ELEMENTWISE    = 1 << 10,
+  /* Combination of allowed types and flag: */
+  FAT_ELEMENTWISE_1  = FAT_ELEMENTWISE | FAT_ARRAY | FAT_IRANGE,
+  FAT_ELEMENTWISE_2  = FAT_ELEMENTWISE | FAT_ASSOCIATIVE
+};
+
+struct format_arg
+{
+  unsigned int repcount; /* Number of consecutive arguments this constraint
+                            applies to.  Normally 1, but unconstrained
+                            arguments are often repeated.  */
+  enum format_cdr_type presence; /* Can the argument list end right before
+                                    this argument?  */
+  enum format_arg_type type;    /* Possible values for this argument.  */
+  struct format_arg_list *list; /* For FAT_ELEMENTWISE.  */
+};
+
+struct segment
+{
+  unsigned int count;   /* Number of format_arg records used.  */
+  unsigned int allocated;
+  struct format_arg *element;   /* Argument constraints.  */
+  unsigned int length; /* Number of arguments represented by this segment.
+                          This is the sum of all repcounts in the segment.  */
+};
+
+struct format_arg_list
+{
+  /* The constraints for the potentially infinite argument list are assumed
+     to become ultimately periodic.  Such a periodic sequence can be split into
+     an initial segment and an endlessly repeated loop segment.
+     A finite sequence is represented entirely in the initial segment; the
+     loop segment is empty.
+     In this file, the loop segment is always either empty or has length 1.
+     But it is not worth exploiting this property: The code is more future-proof
+     in the general form, shared with format-lisp.c and format-scheme.c.  */
+
+  struct segment initial;       /* Initial arguments segment.  */
+  struct segment repeated;      /* Endlessly repeated segment.  */
+};
+
+struct spec
+{
+  unsigned int directives;
+  struct format_arg_list *list;
+};
+
+
+/* Forward declaration of local functions.  */
+static void verify_list (const struct format_arg_list *list);
+static void free_list (struct format_arg_list *list);
+static struct format_arg_list * copy_list (const struct format_arg_list *list);
+static bool equal_list (const struct format_arg_list *list1,
+                        const struct format_arg_list *list2);
+static struct format_arg_list * make_intersected_list
+                                               (struct format_arg_list *list1,
+                                                struct format_arg_list *list2);
+
+
+/* ======================= Verify a format_arg_list ======================= */
+
+/* Verify some invariants.  */
+static void
+verify_element (const struct format_arg * e)
+{
+  ASSERT (e->repcount > 0);
+  if (e->type & FAT_ELEMENTWISE)
+    verify_list (e->list);
+}
+
+/* Verify some invariants.  */
+/* Memory effects: none.  */
+static void
+verify_list (const struct format_arg_list *list)
+{
+  unsigned int i;
+  unsigned int total_repcount;
+
+  ASSERT (list->initial.count <= list->initial.allocated);
+  total_repcount = 0;
+  for (i = 0; i < list->initial.count; i++)
+    {
+      verify_element (&list->initial.element[i]);
+      total_repcount += list->initial.element[i].repcount;
+    }
+  ASSERT (total_repcount == list->initial.length);
+
+  ASSERT (list->repeated.count <= list->repeated.allocated);
+  total_repcount = 0;
+  for (i = 0; i < list->repeated.count; i++)
+    {
+      verify_element (&list->repeated.element[i]);
+      total_repcount += list->repeated.element[i].repcount;
+    }
+  ASSERT (total_repcount == list->repeated.length);
+}
+
+/* Assertion macro.  Could be defined to empty for speed.  */
+#define VERIFY_LIST(list) verify_list (list)
+
+
+/* ======================== Free a format_arg_list ======================== */
+
+/* Free the data belonging to an argument list element.  */
+static inline void
+free_element (struct format_arg *element)
+{
+  if (element->type & FAT_ELEMENTWISE)
+    free_list (element->list);
+}
+
+/* Free an argument list.  */
+/* Memory effects: Frees list.  */
+static void
+free_list (struct format_arg_list *list)
+{
+  unsigned int i;
+
+  for (i = 0; i < list->initial.count; i++)
+    free_element (&list->initial.element[i]);
+  if (list->initial.element != NULL)
+    free (list->initial.element);
+
+  for (i = 0; i < list->repeated.count; i++)
+    free_element (&list->repeated.element[i]);
+  if (list->repeated.element != NULL)
+    free (list->repeated.element);
+}
+
+
+/* ======================== Copy a format_arg_list ======================== */
+
+/* Copy the data belonging to an argument list element.  */
+static inline void
+copy_element (struct format_arg *newelement,
+              const struct format_arg *oldelement)
+{
+  newelement->repcount = oldelement->repcount;
+  newelement->presence = oldelement->presence;
+  newelement->type = oldelement->type;
+  if (oldelement->type & FAT_ELEMENTWISE)
+    newelement->list = copy_list (oldelement->list);
+}
+
+/* Copy an argument list.  */
+/* Memory effects: Freshly allocated result.  */
+static struct format_arg_list *
+copy_list (const struct format_arg_list *list)
+{
+  struct format_arg_list *newlist;
+  unsigned int length;
+  unsigned int i;
+
+  VERIFY_LIST (list);
+
+  newlist = XMALLOC (struct format_arg_list);
+
+  newlist->initial.count = newlist->initial.allocated = list->initial.count;
+  length = 0;
+  if (list->initial.count == 0)
+    newlist->initial.element = NULL;
+  else
+    {
+      newlist->initial.element =
+        XNMALLOC (newlist->initial.allocated, struct format_arg);
+      for (i = 0; i < list->initial.count; i++)
+        {
+          copy_element (&newlist->initial.element[i],
+                        &list->initial.element[i]);
+          length += list->initial.element[i].repcount;
+        }
+    }
+  ASSERT (length == list->initial.length);
+  newlist->initial.length = length;
+
+  newlist->repeated.count = newlist->repeated.allocated = list->repeated.count;
+  length = 0;
+  if (list->repeated.count == 0)
+    newlist->repeated.element = NULL;
+  else
+    {
+      newlist->repeated.element =
+        XNMALLOC (newlist->repeated.allocated, struct format_arg);
+      for (i = 0; i < list->repeated.count; i++)
+        {
+          copy_element (&newlist->repeated.element[i],
+                        &list->repeated.element[i]);
+          length += list->repeated.element[i].repcount;
+        }
+    }
+  ASSERT (length == list->repeated.length);
+  newlist->repeated.length = length;
+
+  VERIFY_LIST (newlist);
+
+  return newlist;
+}
+
+
+/* ===================== Compare two format_arg_lists ===================== */
+
+/* Tests whether two normalized argument constraints are equivalent,
+   ignoring the repcount.  */
+static bool
+equal_element (const struct format_arg * e1, const struct format_arg * e2)
+{
+  return (e1->presence == e2->presence
+          && e1->type == e2->type
+          && (e1->type & FAT_ELEMENTWISE ? equal_list (e1->list, e2->list) :
+              true));
+}
+
+/* Tests whether two normalized argument list constraints are equivalent.  */
+/* Memory effects: none.  */
+static bool
+equal_list (const struct format_arg_list *list1,
+            const struct format_arg_list *list2)
+{
+  unsigned int n, i;
+
+  VERIFY_LIST (list1);
+  VERIFY_LIST (list2);
+
+  n = list1->initial.count;
+  if (n != list2->initial.count)
+    return false;
+  for (i = 0; i < n; i++)
+    {
+      const struct format_arg * e1 = &list1->initial.element[i];
+      const struct format_arg * e2 = &list2->initial.element[i];
+
+      if (!(e1->repcount == e2->repcount && equal_element (e1, e2)))
+        return false;
+    }
+
+  n = list1->repeated.count;
+  if (n != list2->repeated.count)
+    return false;
+  for (i = 0; i < n; i++)
+    {
+      const struct format_arg * e1 = &list1->repeated.element[i];
+      const struct format_arg * e2 = &list2->repeated.element[i];
+
+      if (!(e1->repcount == e2->repcount && equal_element (e1, e2)))
+        return false;
+    }
+
+  return true;
+}
+
+
+/* ===================== Incremental memory allocation ===================== */
+
+/* Ensure list->initial.allocated >= newcount.  */
+static inline void
+ensure_initial_alloc (struct format_arg_list *list, unsigned int newcount)
+{
+  if (newcount > list->initial.allocated)
+    {
+      list->initial.allocated =
+        MAX (2 * list->initial.allocated + 1, newcount);
+      list->initial.element =
+        (struct format_arg *)
+        xrealloc (list->initial.element,
+                  list->initial.allocated * sizeof (struct format_arg));
+    }
+}
+
+/* Ensure list->initial.allocated > list->initial.count.  */
+static inline void
+grow_initial_alloc (struct format_arg_list *list)
+{
+  if (list->initial.count >= list->initial.allocated)
+    {
+      list->initial.allocated =
+        MAX (2 * list->initial.allocated + 1, list->initial.count + 1);
+      list->initial.element =
+        (struct format_arg *)
+        xrealloc (list->initial.element,
+                  list->initial.allocated * sizeof (struct format_arg));
+    }
+}
+
+/* Ensure list->repeated.allocated >= newcount.  */
+static inline void
+ensure_repeated_alloc (struct format_arg_list *list, unsigned int newcount)
+{
+  if (newcount > list->repeated.allocated)
+    {
+      list->repeated.allocated =
+        MAX (2 * list->repeated.allocated + 1, newcount);
+      list->repeated.element =
+        (struct format_arg *)
+        xrealloc (list->repeated.element,
+                  list->repeated.allocated * sizeof (struct format_arg));
+    }
+}
+
+/* Ensure list->repeated.allocated > list->repeated.count.  */
+static inline void
+grow_repeated_alloc (struct format_arg_list *list)
+{
+  if (list->repeated.count >= list->repeated.allocated)
+    {
+      list->repeated.allocated =
+        MAX (2 * list->repeated.allocated + 1, list->repeated.count + 1);
+      list->repeated.element =
+        (struct format_arg *)
+        xrealloc (list->repeated.element,
+                  list->repeated.allocated * sizeof (struct format_arg));
+    }
+}
+
+
+/* ====================== Normalize a format_arg_list ====================== */
+
+/* Normalize an argument list constraint, assuming all sublists are already
+   normalized.  */
+/* Memory effects: Destructively modifies list.  */
+static void
+normalize_outermost_list (struct format_arg_list *list)
+{
+  unsigned int n, i, j;
+
+  /* Step 1: Combine adjacent elements.
+     Copy from i to j, keeping 0 <= j <= i.  */
+
+  n = list->initial.count;
+  for (i = j = 0; i < n; i++)
+    if (j > 0
+        && equal_element (&list->initial.element[i],
+                          &list->initial.element[j-1]))
+      {
+        list->initial.element[j-1].repcount +=
+          list->initial.element[i].repcount;
+        free_element (&list->initial.element[i]);
+      }
+    else
+      {
+        if (j < i)
+          list->initial.element[j] = list->initial.element[i];
+        j++;
+      }
+  list->initial.count = j;
+
+  n = list->repeated.count;
+  for (i = j = 0; i < n; i++)
+    if (j > 0
+        && equal_element (&list->repeated.element[i],
+                          &list->repeated.element[j-1]))
+      {
+        list->repeated.element[j-1].repcount +=
+          list->repeated.element[i].repcount;
+        free_element (&list->repeated.element[i]);
+      }
+    else
+      {
+        if (j < i)
+          list->repeated.element[j] = list->repeated.element[i];
+        j++;
+      }
+  list->repeated.count = j;
+
+  /* Nothing more to be done if the loop segment is empty.  */
+  if (list->repeated.count > 0)
+    {
+      unsigned int m, repcount0_extra;
+
+      /* Step 2: Reduce the loop period.  */
+      n = list->repeated.count;
+      repcount0_extra = 0;
+      if (n > 1
+          && equal_element (&list->repeated.element[0],
+                            &list->repeated.element[n-1]))
+        {
+          repcount0_extra = list->repeated.element[n-1].repcount;
+          n--;
+        }
+      /* Proceed as if the loop period were n, with
+         list->repeated.element[0].repcount incremented by repcount0_extra.  */
+      for (m = 2; m <= n / 2; m++)
+        if ((n % m) == 0)
+          {
+            /* m is a divisor of n.  Try to reduce the loop period to n.  */
+            bool ok = true;
+
+            for (i = 0; i < n - m; i++)
+              if (!((list->repeated.element[i].repcount
+                     + (i == 0 ? repcount0_extra : 0)
+                     == list->repeated.element[i+m].repcount)
+                    && equal_element (&list->repeated.element[i],
+                                      &list->repeated.element[i+m])))
+                {
+                  ok = false;
+                  break;
+                }
+            if (ok)
+              {
+                for (i = m; i < n; i++)
+                  free_element (&list->repeated.element[i]);
+                if (n < list->repeated.count)
+                  list->repeated.element[m] = list->repeated.element[n];
+                list->repeated.count = list->repeated.count - n + m;
+                list->repeated.length /= n / m;
+                break;
+              }
+          }
+      if (list->repeated.count == 1)
+        {
+          /* The loop has period 1.  Normalize the repcount.  */
+          list->repeated.element[0].repcount = 1;
+          list->repeated.length = 1;
+        }
+
+      /* Step 3: Roll as much as possible of the initial segment's tail
+         into the loop.  */
+      if (list->repeated.count == 1)
+        {
+          if (list->initial.count > 0
+              && equal_element (&list->initial.element[list->initial.count-1],
+                                &list->repeated.element[0]))
+            {
+              /* Roll the last element of the initial segment into the loop.
+                 Its repcount is irrelevant.  The second-to-last element is
+                 certainly different and doesn't need to be considered.  */
+              list->initial.length -=
+                list->initial.element[list->initial.count-1].repcount;
+              free_element (&list->initial.element[list->initial.count-1]);
+              list->initial.count--;
+            }
+        }
+      else
+        {
+          while (list->initial.count > 0
+                 && equal_element (&list->initial.element[list->initial.count-1],
+                                   &list->repeated.element[list->repeated.count-1]))
+            {
+              unsigned int moved_repcount =
+                MIN (list->initial.element[list->initial.count-1].repcount,
+                     list->repeated.element[list->repeated.count-1].repcount);
+
+              /* Add the element at the start of list->repeated.  */
+              if (equal_element (&list->repeated.element[0],
+                                 &list->repeated.element[list->repeated.count-1]))
+                list->repeated.element[0].repcount += moved_repcount;
+              else
+                {
+                  unsigned int newcount = list->repeated.count + 1;
+                  ensure_repeated_alloc (list, newcount);
+                  for (i = newcount - 1; i > 0; i--)
+                    list->repeated.element[i] = list->repeated.element[i-1];
+                  list->repeated.count = newcount;
+                  copy_element (&list->repeated.element[0],
+                                &list->repeated.element[list->repeated.count-1]);
+                  list->repeated.element[0].repcount = moved_repcount;
+                }
+
+              /* Remove the element from the end of list->repeated.  */
+              list->repeated.element[list->repeated.count-1].repcount -=
+                moved_repcount;
+              if (list->repeated.element[list->repeated.count-1].repcount == 0)
+                {
+                  free_element (&list->repeated.element[list->repeated.count-1]);
+                  list->repeated.count--;
+                }
+
+              /* Remove the element from the end of list->initial.  */
+              list->initial.element[list->initial.count-1].repcount -=
+                moved_repcount;
+              if (list->initial.element[list->initial.count-1].repcount == 0)
+                {
+                  free_element (&list->initial.element[list->initial.count-1]);
+                  list->initial.count--;
+                }
+              list->initial.length -= moved_repcount;
+            }
+        }
+    }
+}
+
+/* Normalize an argument list constraint.  */
+/* Memory effects: Destructively modifies list.  */
+static void
+normalize_list (struct format_arg_list *list)
+{
+  unsigned int n, i;
+
+  VERIFY_LIST (list);
+
+  /* First normalize all elements, recursively.  */
+  n = list->initial.count;
+  for (i = 0; i < n; i++)
+    if (list->initial.element[i].type & FAT_ELEMENTWISE)
+      normalize_list (list->initial.element[i].list);
+  n = list->repeated.count;
+  for (i = 0; i < n; i++)
+    if (list->repeated.element[i].type & FAT_ELEMENTWISE)
+      normalize_list (list->repeated.element[i].list);
+
+  /* Then normalize the top level list.  */
+  normalize_outermost_list (list);
+
+  VERIFY_LIST (list);
+}
+
+
+/* ===================== Unconstrained and empty lists ===================== */
+
+/* It's easier to allocate these on demand, than to be careful not to
+   accidentally modify statically allocated lists.  */
+
+
+/* Create an unconstrained argument list.  */
+/* Memory effects: Freshly allocated result.  */
+static struct format_arg_list *
+make_unconstrained_list ()
+{
+  struct format_arg_list *list;
+
+  list = XMALLOC (struct format_arg_list);
+  list->initial.count = 0;
+  list->initial.allocated = 0;
+  list->initial.element = NULL;
+  list->initial.length = 0;
+  list->repeated.count = 1;
+  list->repeated.allocated = 1;
+  list->repeated.element = XNMALLOC (1, struct format_arg);
+  list->repeated.element[0].repcount = 1;
+  list->repeated.element[0].presence = FCT_OPTIONAL;
+  list->repeated.element[0].type = FAT_ANY_TYPE;
+  list->repeated.length = 1;
+
+  VERIFY_LIST (list);
+
+  return list;
+}
+
+
+/* Create an empty argument list.  */
+/* Memory effects: Freshly allocated result.  */
+static struct format_arg_list *
+make_empty_list ()
+{
+  struct format_arg_list *list;
+
+  list = XMALLOC (struct format_arg_list);
+  list->initial.count = 0;
+  list->initial.allocated = 0;
+  list->initial.element = NULL;
+  list->initial.length = 0;
+  list->repeated.count = 0;
+  list->repeated.allocated = 0;
+  list->repeated.element = NULL;
+  list->repeated.length = 0;
+
+  VERIFY_LIST (list);
+
+  return list;
+}
+
+
+/* Test for an empty list.  */
+/* Memory effects: none.  */
+MAYBE_UNUSED static bool
+is_empty_list (const struct format_arg_list *list)
+{
+  return (list->initial.count == 0 && list->repeated.count == 0);
+}
+
+
+/* ======================== format_arg_list surgery ======================== */
+
+/* Unfold list->repeated m times, where m >= 1.
+   Assumes list->repeated.count > 0.  */
+/* Memory effects: list is destructively modified.  */
+static void
+unfold_loop (struct format_arg_list *list, unsigned int m)
+{
+  unsigned int i, j, k;
+
+  if (m > 1)
+    {
+      unsigned int newcount = list->repeated.count * m;
+      ensure_repeated_alloc (list, newcount);
+      i = list->repeated.count;
+      for (k = 1; k < m; k++)
+        for (j = 0; j < list->repeated.count; j++, i++)
+          copy_element (&list->repeated.element[i], &list->repeated.element[j]);
+      list->repeated.count = newcount;
+      list->repeated.length = list->repeated.length * m;
+    }
+}
+
+/* Ensure list->initial.length := m, where m >= list->initial.length.
+   Assumes list->repeated.count > 0.  */
+/* Memory effects: list is destructively modified.  */
+static void
+rotate_loop (struct format_arg_list *list, unsigned int m)
+{
+  if (m == list->initial.length)
+    return;
+
+  if (list->repeated.count == 1)
+    {
+      /* Instead of multiple copies of list->repeated.element[0], a single
+         copy with higher repcount is appended to list->initial.  */
+      unsigned int i, newcount;
+
+      newcount = list->initial.count + 1;
+      ensure_initial_alloc (list, newcount);
+      i = list->initial.count;
+      copy_element (&list->initial.element[i], &list->repeated.element[0]);
+      list->initial.element[i].repcount = m - list->initial.length;
+      list->initial.count = newcount;
+      list->initial.length = m;
+    }
+  else
+    {
+      unsigned int n = list->repeated.length;
+
+      /* Write m = list->initial.length + q * n + r with 0 <= r < n.  */
+      unsigned int q = (m - list->initial.length) / n;
+      unsigned int r = (m - list->initial.length) % n;
+
+      /* Determine how many entries of list->repeated are needed for
+         length r.  */
+      unsigned int s;
+      unsigned int t;
+
+      for (t = r, s = 0;
+           s < list->repeated.count && t >= list->repeated.element[s].repcount;
+           t -= list->repeated.element[s].repcount, s++)
+        ;
+
+      /* s must be < list->repeated.count, otherwise r would have been >= n.  */
+      ASSERT (s < list->repeated.count);
+
+      /* So we need to add to list->initial:
+         q full copies of list->repeated,
+         plus the s first elements of list->repeated,
+         plus, if t > 0, a splitoff of list->repeated.element[s].  */
+      {
+        unsigned int i, j, k, newcount;
+
+        i = list->initial.count;
+        newcount = i + q * list->repeated.count + s + (t > 0 ? 1 : 0);
+        ensure_initial_alloc (list, newcount);
+        for (k = 0; k < q; k++)
+          for (j = 0; j < list->repeated.count; j++, i++)
+            copy_element (&list->initial.element[i],
+                          &list->repeated.element[j]);
+        for (j = 0; j < s; j++, i++)
+          copy_element (&list->initial.element[i], &list->repeated.element[j]);
+        if (t > 0)
+          {
+            copy_element (&list->initial.element[i],
+                          &list->repeated.element[j]);
+            list->initial.element[i].repcount = t;
+            i++;
+          }
+        ASSERT (i == newcount);
+        list->initial.count = newcount;
+        /* The new length of the initial segment is
+           = list->initial.length
+             + q * list->repeated.length
+             + list->repeated[0..s-1].repcount + t
+           = list->initial.length + q * n + r
+           = m.
+         */
+        list->initial.length = m;
+      }
+
+      /* And rotate list->repeated.  */
+      if (r > 0)
+        {
+          unsigned int i, j, oldcount, newcount;
+          struct format_arg *newelement;
+
+          oldcount = list->repeated.count;
+          newcount = list->repeated.count + (t > 0 ? 1 : 0);
+          newelement = XNMALLOC (newcount, struct format_arg);
+          i = 0;
+          for (j = s; j < oldcount; j++, i++)
+            newelement[i] = list->repeated.element[j];
+          for (j = 0; j < s; j++, i++)
+            newelement[i] = list->repeated.element[j];
+          if (t > 0)
+            {
+              copy_element (&newelement[oldcount], &newelement[0]);
+              newelement[0].repcount -= t;
+              newelement[oldcount].repcount = t;
+            }
+          free (list->repeated.element);
+          list->repeated.element = newelement;
+          list->repeated.count = newcount;
+        }
+    }
+}
+
+
+/* Ensure index n in the initial segment falls on a split between elements,
+   i.e. if 0 < n < list->initial.length, then n-1 and n are covered by two
+   different adjacent elements.  */
+/* Memory effects: list is destructively modified.  */
+static unsigned int
+initial_splitelement (struct format_arg_list *list, unsigned int n)
+{
+  unsigned int s;
+  unsigned int t;
+  unsigned int oldrepcount;
+  unsigned int newcount;
+  unsigned int i;
+
+  VERIFY_LIST (list);
+
+  if (n > list->initial.length)
+    {
+      ASSERT (list->repeated.count > 0);
+      rotate_loop (list, n);
+      ASSERT (n <= list->initial.length);
+    }
+
+  /* Determine how many entries of list->initial need to be skipped.  */
+  for (t = n, s = 0;
+       s < list->initial.count && t >= list->initial.element[s].repcount;
+       t -= list->initial.element[s].repcount, s++)
+    ;
+
+  if (t == 0)
+    return s;
+
+  ASSERT (s < list->initial.count);
+
+  /* Split the entry into two entries.  */
+  oldrepcount = list->initial.element[s].repcount;
+  newcount = list->initial.count + 1;
+  ensure_initial_alloc (list, newcount);
+  for (i = list->initial.count - 1; i > s; i--)
+    list->initial.element[i+1] = list->initial.element[i];
+  copy_element (&list->initial.element[s+1], &list->initial.element[s]);
+  list->initial.element[s].repcount = t;
+  list->initial.element[s+1].repcount = oldrepcount - t;
+  list->initial.count = newcount;
+
+  VERIFY_LIST (list);
+
+  return s+1;
+}
+
+
+/* Ensure index n in the initial segment is not shared.  Return its index.  */
+/* Memory effects: list is destructively modified.  */
+MAYBE_UNUSED static unsigned int
+initial_unshare (struct format_arg_list *list, unsigned int n)
+{
+  /* This does the same side effects as
+       initial_splitelement (list, n);
+       initial_splitelement (list, n + 1);
+   */
+  unsigned int s;
+  unsigned int t;
+
+  VERIFY_LIST (list);
+
+  if (n >= list->initial.length)
+    {
+      ASSERT (list->repeated.count > 0);
+      rotate_loop (list, n + 1);
+      ASSERT (n < list->initial.length);
+    }
+
+  /* Determine how many entries of list->initial need to be skipped.  */
+  for (t = n, s = 0;
+       s < list->initial.count && t >= list->initial.element[s].repcount;
+       t -= list->initial.element[s].repcount, s++)
+    ;
+
+  /* s must be < list->initial.count.  */
+  ASSERT (s < list->initial.count);
+
+  if (list->initial.element[s].repcount > 1)
+    {
+      /* Split the entry into at most three entries: for indices < n,
+         for index n, and for indices > n.  */
+      unsigned int oldrepcount = list->initial.element[s].repcount;
+      unsigned int newcount =
+        list->initial.count + (t == 0 || t == oldrepcount - 1 ? 1 : 2);
+      ensure_initial_alloc (list, newcount);
+      if (t == 0 || t == oldrepcount - 1)
+        {
+          unsigned int i;
+
+          for (i = list->initial.count - 1; i > s; i--)
+            list->initial.element[i+1] = list->initial.element[i];
+          copy_element (&list->initial.element[s+1], &list->initial.element[s]);
+          if (t == 0)
+            {
+              list->initial.element[s].repcount = 1;
+              list->initial.element[s+1].repcount = oldrepcount - 1;
+            }
+          else
+            {
+              list->initial.element[s].repcount = oldrepcount - 1;
+              list->initial.element[s+1].repcount = 1;
+            }
+        }
+      else
+        {
+          unsigned int i;
+
+          for (i = list->initial.count - 1; i > s; i--)
+            list->initial.element[i+2] = list->initial.element[i];
+          copy_element (&list->initial.element[s+2], &list->initial.element[s]);
+          copy_element (&list->initial.element[s+1], &list->initial.element[s]);
+          list->initial.element[s].repcount = t;
+          list->initial.element[s+1].repcount = 1;
+          list->initial.element[s+2].repcount = oldrepcount - 1 - t;
+        }
+      list->initial.count = newcount;
+      if (t > 0)
+        s++;
+    }
+
+  /* Now the entry for index n has repcount 1.  */
+  ASSERT (list->initial.element[s].repcount == 1);
+
+  VERIFY_LIST (list);
+
+  return s;
+}
+
+
+/* ================= Intersection of two format_arg_lists ================= */
+
+/* Create the intersection (i.e. combined constraints) of two argument
+   constraints.  Return false if the intersection is empty, i.e. if the
+   two constraints give a contradiction.  */
+/* Memory effects: Freshly allocated element's sublist.  */
+static bool
+make_intersected_element (struct format_arg *re,
+                          const struct format_arg * e1,
+                          const struct format_arg * e2)
+{
+  /* Intersect the cdr types.  */
+  if (e1->presence == FCT_REQUIRED || e2->presence == FCT_REQUIRED)
+    re->presence = FCT_REQUIRED;
+  else
+    re->presence = FCT_OPTIONAL;
+
+  /* Intersect the arg types.  */
+  if (e1->type == FAT_ANY_TYPE)
+    {
+      re->type = e2->type;
+      if (e2->type & FAT_ELEMENTWISE)
+        re->list = copy_list (e2->list);
+    }
+  else if (e2->type == FAT_ANY_TYPE)
+    {
+      re->type = e1->type;
+      if (e1->type & FAT_ELEMENTWISE)
+        re->list = copy_list (e1->list);
+    }
+  else if (e1->type & e2->type & FAT_ELEMENTWISE)
+    {
+      if ((e1->type == FAT_ELEMENTWISE_1 && e2->type == FAT_ELEMENTWISE_1)
+          || (e1->type == FAT_ELEMENTWISE_2 && e2->type == FAT_ELEMENTWISE_2))
+        {
+          re->type = e1->type;
+          re->list = make_intersected_list (copy_list (e1->list),
+                                            copy_list (e2->list));
+          if (re->list == NULL)
+            return false;
+        }
+      else
+        return false;
+    }
+  else
+    {
+      re->type = e1->type & e2->type;
+      if (re->type == FAT_NONE)
+        return false;
+      if (e1->type & FAT_ELEMENTWISE)
+        {
+          re->type |= FAT_ELEMENTWISE;
+          re->list = copy_list (e1->list);
+        }
+      else if (e2->type & FAT_ELEMENTWISE)
+        {
+          re->type |= FAT_ELEMENTWISE;
+          re->list = copy_list (e2->list);
+        }
+    }
+
+  return true;
+}
+
+/* Append list->repeated to list->initial, and clear list->repeated.  */
+/* Memory effects: list is destructively modified.  */
+static void
+append_repeated_to_initial (struct format_arg_list *list)
+{
+  if (list->repeated.count > 0)
+    {
+      /* Move list->repeated over to list->initial.  */
+      unsigned int i, j, newcount;
+
+      newcount = list->initial.count + list->repeated.count;
+      ensure_initial_alloc (list, newcount);
+      i = list->initial.count;
+      for (j = 0; j < list->repeated.count; j++, i++)
+        list->initial.element[i] = list->repeated.element[j];
+      list->initial.count = newcount;
+      list->initial.length = list->initial.length + list->repeated.length;
+      free (list->repeated.element);
+      list->repeated.element = NULL;
+      list->repeated.allocated = 0;
+      list->repeated.count = 0;
+      list->repeated.length = 0;
+    }
+}
+
+/* Handle a contradiction during building of a format_arg_list.
+   The list consists only of an initial segment.  The repeated segment is
+   empty.  This function searches the last FCT_OPTIONAL and cuts off the
+   list at this point, or - if none is found - returns NULL.  */
+/* Memory effects: list is destructively modified.  If NULL is returned,
+   list is freed.  */
+static struct format_arg_list *
+backtrack_in_initial (struct format_arg_list *list)
+{
+  ASSERT (list->repeated.count == 0);
+
+  while (list->initial.count > 0)
+    {
+      unsigned int i = list->initial.count - 1;
+      if (list->initial.element[i].presence == FCT_REQUIRED)
+        {
+          /* Throw away this element.  */
+          list->initial.length -= list->initial.element[i].repcount;
+          free_element (&list->initial.element[i]);
+          list->initial.count = i;
+        }
+      else /* list->initial.element[i].presence == FCT_OPTIONAL */
+        {
+          /* The list must end here.  */
+          list->initial.length--;
+          if (list->initial.element[i].repcount > 1)
+            list->initial.element[i].repcount--;
+          else
+            {
+              free_element (&list->initial.element[i]);
+              list->initial.count = i;
+            }
+          VERIFY_LIST (list);
+          return list;
+        }
+    }
+
+  free_list (list);
+  return NULL;
+}
+
+/* Create the intersection (i.e. combined constraints) of two argument list
+   constraints.  Free both argument lists when done.  Return NULL if the
+   intersection is empty, i.e. if the two constraints give a contradiction.  */
+/* Memory effects: list1 and list2 are freed.  The result, if non-NULL, is
+   freshly allocated.  */
+static struct format_arg_list *
+make_intersected_list (struct format_arg_list *list1,
+                       struct format_arg_list *list2)
+{
+  struct format_arg_list *result;
+
+  VERIFY_LIST (list1);
+  VERIFY_LIST (list2);
+
+  if (list1->repeated.length > 0 && list2->repeated.length > 0)
+    /* Step 1: Ensure list1->repeated.length == list2->repeated.length.  */
+    {
+      unsigned int n1 = list1->repeated.length;
+      unsigned int n2 = list2->repeated.length;
+      unsigned int g = gcd (n1, n2);
+      unsigned int m1 = n2 / g; /* = lcm(n1,n2) / n1 */
+      unsigned int m2 = n1 / g; /* = lcm(n1,n2) / n2 */
+
+      unfold_loop (list1, m1);
+      unfold_loop (list2, m2);
+      /* Now list1->repeated.length = list2->repeated.length = lcm(n1,n2).  */
+    }
+
+  if (list1->repeated.length > 0 || list2->repeated.length > 0)
+    /* Step 2: Ensure the initial segment of the result can be computed
+       from the initial segments of list1 and list2.  If both have a
+       repeated segment, this means to ensure
+       list1->initial.length == list2->initial.length.  */
+    {
+      unsigned int m = MAX (list1->initial.length, list2->initial.length);
+
+      if (list1->repeated.length > 0)
+        rotate_loop (list1, m);
+      if (list2->repeated.length > 0)
+        rotate_loop (list2, m);
+    }
+
+  if (list1->repeated.length > 0 && list2->repeated.length > 0)
+    {
+      ASSERT (list1->initial.length == list2->initial.length);
+      ASSERT (list1->repeated.length == list2->repeated.length);
+    }
+
+  /* Step 3: Allocate the result.  */
+  result = XMALLOC (struct format_arg_list);
+  result->initial.count = 0;
+  result->initial.allocated = 0;
+  result->initial.element = NULL;
+  result->initial.length = 0;
+  result->repeated.count = 0;
+  result->repeated.allocated = 0;
+  result->repeated.element = NULL;
+  result->repeated.length = 0;
+
+  /* Step 4: Elementwise intersection of list1->initial, list2->initial.  */
+  {
+    struct format_arg *e1;
+    struct format_arg *e2;
+    unsigned int c1;
+    unsigned int c2;
+
+    e1 = list1->initial.element; c1 = list1->initial.count;
+    e2 = list2->initial.element; c2 = list2->initial.count;
+    while (c1 > 0 && c2 > 0)
+      {
+        struct format_arg *re;
+
+        /* Ensure room in result->initial.  */
+        grow_initial_alloc (result);
+        re = &result->initial.element[result->initial.count];
+        re->repcount = MIN (e1->repcount, e2->repcount);
+
+        /* Intersect the argument types.  */
+        if (!make_intersected_element (re, e1, e2))
+          {
+            /* If re->presence == FCT_OPTIONAL, the result list ends here.  */
+            if (re->presence == FCT_REQUIRED)
+              /* Contradiction.  Backtrack.  */
+              result = backtrack_in_initial (result);
+            goto done;
+          }
+
+        result->initial.count++;
+        result->initial.length += re->repcount;
+
+        e1->repcount -= re->repcount;
+        if (e1->repcount == 0)
+          {
+            e1++;
+            c1--;
+          }
+        e2->repcount -= re->repcount;
+        if (e2->repcount == 0)
+          {
+            e2++;
+            c2--;
+          }
+      }
+
+    if (list1->repeated.count == 0 && list2->repeated.count == 0)
+      {
+        /* Intersecting two finite lists.  */
+        if (c1 > 0)
+          {
+            /* list1 longer than list2.  */
+            if (e1->presence == FCT_REQUIRED)
+              /* Contradiction.  Backtrack.  */
+              result = backtrack_in_initial (result);
+          }
+        else if (c2 > 0)
+          {
+            /* list2 longer than list1.  */
+            if (e2->presence == FCT_REQUIRED)
+              /* Contradiction.  Backtrack.  */
+              result = backtrack_in_initial (result);
+          }
+        goto done;
+      }
+    else if (list1->repeated.count == 0)
+      {
+        /* Intersecting a finite and an infinite list.  */
+        ASSERT (c1 == 0);
+        if ((c2 > 0 ? e2->presence : list2->repeated.element[0].presence)
+            == FCT_REQUIRED)
+          /* Contradiction.  Backtrack.  */
+          result = backtrack_in_initial (result);
+        goto done;
+      }
+    else if (list2->repeated.count == 0)
+      {
+        /* Intersecting an infinite and a finite list.  */
+        ASSERT (c2 == 0);
+        if ((c1 > 0 ? e1->presence : list1->repeated.element[0].presence)
+            == FCT_REQUIRED)
+          /* Contradiction.  Backtrack.  */
+          result = backtrack_in_initial (result);
+        goto done;
+      }
+    /* Intersecting two infinite lists.  */
+    ASSERT (c1 == 0 && c2 == 0);
+  }
+
+  /* Step 5: Elementwise intersection of list1->repeated, list2->repeated.  */
+  {
+    struct format_arg *e1;
+    struct format_arg *e2;
+    unsigned int c1;
+    unsigned int c2;
+
+    e1 = list1->repeated.element; c1 = list1->repeated.count;
+    e2 = list2->repeated.element; c2 = list2->repeated.count;
+    while (c1 > 0 && c2 > 0)
+      {
+        struct format_arg *re;
+
+        /* Ensure room in result->repeated.  */
+        grow_repeated_alloc (result);
+        re = &result->repeated.element[result->repeated.count];
+        re->repcount = MIN (e1->repcount, e2->repcount);
+
+        /* Intersect the argument types.  */
+        if (!make_intersected_element (re, e1, e2))
+          {
+            bool re_is_required = re->presence == FCT_REQUIRED;
+
+            append_repeated_to_initial (result);
+
+            /* If re->presence == FCT_OPTIONAL, the result list ends here.  */
+            if (re_is_required)
+              /* Contradiction.  Backtrack.  */
+              result = backtrack_in_initial (result);
+
+            goto done;
+          }
+
+        result->repeated.count++;
+        result->repeated.length += re->repcount;
+
+        e1->repcount -= re->repcount;
+        if (e1->repcount == 0)
+          {
+            e1++;
+            c1--;
+          }
+        e2->repcount -= re->repcount;
+        if (e2->repcount == 0)
+          {
+            e2++;
+            c2--;
+          }
+      }
+    ASSERT (c1 == 0 && c2 == 0);
+  }
+
+ done:
+  free_list (list1);
+  free_list (list2);
+  if (result != NULL)
+    {
+      /* Undo the loop unfolding and unrolling done above.  */
+      normalize_outermost_list (result);
+      VERIFY_LIST (result);
+    }
+  return result;
+}
+
+
+/* Create the intersection of an argument list and the empty list.
+   Return NULL if the intersection is empty.  */
+/* Memory effects: The result, if non-NULL, is freshly allocated.  */
+MAYBE_UNUSED static struct format_arg_list *
+make_intersection_with_empty_list (struct format_arg_list *list)
+{
+#if 0 /* equivalent but slower */
+  return make_intersected_list (copy_list (list), make_empty_list ());
+#else
+  if (list->initial.count > 0
+      ? list->initial.element[0].presence == FCT_REQUIRED
+      : list->repeated.count > 0
+        && list->repeated.element[0].presence == FCT_REQUIRED)
+    return NULL;
+  else
+    return make_empty_list ();
+#endif
+}
+
+
+/* Create the intersection of two argument list constraints.  NULL stands
+   for an impossible situation, i.e. a contradiction.  */
+/* Memory effects: list1 and list2 are freed if non-NULL.  The result,
+   if non-NULL, is freshly allocated.  */
+MAYBE_UNUSED static struct format_arg_list *
+intersection (struct format_arg_list *list1, struct format_arg_list *list2)
+{
+  if (list1 != NULL)
+    {
+      if (list2 != NULL)
+        return make_intersected_list (list1, list2);
+      else
+        {
+          free_list (list1);
+          return NULL;
+        }
+    }
+  else
+    {
+      if (list2 != NULL)
+        {
+          free_list (list2);
+          return NULL;
+        }
+      else
+        return NULL;
+    }
+}
+
+
+/* ===================== Union of two format_arg_lists ===================== */
+
+/* Create the union of an argument list and the empty list.  */
+/* Memory effects: list is freed.  The result is freshly allocated.  */
+MAYBE_UNUSED static struct format_arg_list *
+make_union_with_empty_list (struct format_arg_list *list)
+{
+  VERIFY_LIST (list);
+
+  if (list->initial.count > 0
+      ? list->initial.element[0].presence == FCT_REQUIRED
+      : list->repeated.count > 0
+        && list->repeated.element[0].presence == FCT_REQUIRED)
+    {
+      initial_splitelement (list, 1);
+      ASSERT (list->initial.count > 0);
+      ASSERT (list->initial.element[0].repcount == 1);
+      ASSERT (list->initial.element[0].presence == FCT_REQUIRED);
+      list->initial.element[0].presence = FCT_OPTIONAL;
+
+      /* We might need to merge list->initial.element[0] and
+         list->initial.element[1].  */
+      normalize_outermost_list (list);
+    }
+
+  VERIFY_LIST (list);
+
+  return list;
+}
+
+
+/* =========== Adding specific constraints to a format_arg_list =========== */
+
+
+/* Test whether arguments 0..n are required arguments in a list.  */
+MAYBE_UNUSED static bool
+is_required (const struct format_arg_list *list, unsigned int n)
+{
+  unsigned int s;
+  unsigned int t;
+
+  /* We'll check whether the first n+1 presence flags are FCT_REQUIRED.  */
+  t = n + 1;
+
+  /* Walk the list->initial segment.  */
+  for (s = 0;
+       s < list->initial.count && t >= list->initial.element[s].repcount;
+       t -= list->initial.element[s].repcount, s++)
+    if (list->initial.element[s].presence != FCT_REQUIRED)
+      return false;
+
+  if (t == 0)
+    return true;
+
+  if (s < list->initial.count)
+    {
+      if (list->initial.element[s].presence != FCT_REQUIRED)
+        return false;
+      else
+        return true;
+    }
+
+  /* Walk the list->repeated segment.  */
+  if (list->repeated.count == 0)
+    return false;
+
+  for (s = 0;
+       s < list->repeated.count && t >= list->repeated.element[s].repcount;
+       t -= list->repeated.element[s].repcount, s++)
+    if (list->repeated.element[s].presence != FCT_REQUIRED)
+      return false;
+
+  if (t == 0)
+    return true;
+
+  if (s < list->repeated.count)
+    {
+      if (list->repeated.element[s].presence != FCT_REQUIRED)
+        return false;
+      else
+        return true;
+    }
+
+  /* The list->repeated segment consists only of FCT_REQUIRED.  So,
+     regardless how many more passes through list->repeated would be
+     needed until t becomes 0, the result is true.  */
+  return true;
+}
+
+
+/* Add a constraint to an argument list, namely that the arguments 0...n are
+   present.  NULL stands for an impossible situation, i.e. a contradiction.  */
+/* Memory effects: list is freed.  The result is freshly allocated.  */
+static struct format_arg_list *
+add_required_constraint (struct format_arg_list *list, unsigned int n)
+{
+  unsigned int i, rest;
+
+  if (list == NULL)
+    return NULL;
+
+  VERIFY_LIST (list);
+
+  if (list->repeated.count == 0 && list->initial.length <= n)
+    {
+      /* list is already constrained to have at most length n.
+         Contradiction.  */
+      free_list (list);
+      return NULL;
+    }
+
+  initial_splitelement (list, n + 1);
+
+  for (i = 0, rest = n + 1; rest > 0; )
+    {
+      list->initial.element[i].presence = FCT_REQUIRED;
+      rest -= list->initial.element[i].repcount;
+      i++;
+    }
+
+  VERIFY_LIST (list);
+
+  return list;
+}
+
+
+/* Add a constraint to an argument list, namely that the argument n is
+   never present.  NULL stands for an impossible situation, i.e. a
+   contradiction.  */
+/* Memory effects: list is freed.  The result is freshly allocated.  */
+static struct format_arg_list *
+add_end_constraint (struct format_arg_list *list, unsigned int n)
+{
+  unsigned int s, i;
+  enum format_cdr_type n_presence;
+
+  if (list == NULL)
+    return NULL;
+
+  VERIFY_LIST (list);
+
+  if (list->repeated.count == 0 && list->initial.length <= n)
+    /* list is already constrained to have at most length n.  */
+    return list;
+
+  s = initial_splitelement (list, n);
+  n_presence =
+    (s < list->initial.count
+     ? /* n < list->initial.length */ list->initial.element[s].presence
+     : /* n >= list->initial.length */ list->repeated.element[0].presence);
+
+  for (i = s; i < list->initial.count; i++)
+    {
+      list->initial.length -= list->initial.element[i].repcount;
+      free_element (&list->initial.element[i]);
+    }
+  list->initial.count = s;
+
+  for (i = 0; i < list->repeated.count; i++)
+    free_element (&list->repeated.element[i]);
+  if (list->repeated.element != NULL)
+    free (list->repeated.element);
+  list->repeated.element = NULL;
+  list->repeated.allocated = 0;
+  list->repeated.count = 0;
+  list->repeated.length = 0;
+
+  if (n_presence == FCT_REQUIRED)
+    return backtrack_in_initial (list);
+  else
+    return list;
+}
+
+
+/* Add a constraint to an argument list, namely that the arguments n1..n2
+   (n1 <= n2) are of a given list type or (if sublist is NULL) of a given
+   non-list type.  NULL stands for an impossible situation, i.e. a
+   contradiction.  Assumes a preceding add_required_constraint (list, n2).  */
+/* Memory effects: list is freed.  The result is freshly allocated.  */
+static struct format_arg_list *
+add_type_constraint (struct format_arg_list *list,
+                     unsigned int n1, unsigned int n2,
+                     enum format_arg_type type,
+                     struct format_arg_list *sublist)
+{
+  unsigned int s;
+  struct format_arg newconstraint;
+
+  if (list == NULL)
+    return NULL;
+
+  /* Through the previous add_required_constraint, we can assume
+     list->initial.length >= n2+1.  */
+
+  s = initial_splitelement (list, n1);
+  initial_splitelement (list, n2 + 1);
+
+  newconstraint.presence = FCT_OPTIONAL;
+  newconstraint.type = type;
+  newconstraint.list = sublist;
+
+  /* Modify the elements that represent the indices n1..n2.  */
+  unsigned int n = n1;
+  while (n <= n2)
+    {
+      struct format_arg tmpelement;
+      if (!make_intersected_element (&tmpelement,
+                                     &list->initial.element[s], &newconstraint))
+        {
+          list = add_end_constraint (list, n);
+          break;
+        }
+      free_element (&list->initial.element[s]);
+      list->initial.element[s].type = tmpelement.type;
+      list->initial.element[s].list = tmpelement.list;
+      n += list->initial.element[s].repcount;
+      s++;
+    }
+
+  if (list != NULL)
+    VERIFY_LIST (list);
+
+  return list;
+}
+
+
+/* Add a constraint to an argument list, namely that all the arguments
+   n, n+1, n+2, ..., if they exist, are of a given list type or (if sublist is
+   NULL) of a given non-list type.  NULL stands for an impossible situation,
+   i.e. a contradiction.  */
+/* Memory effects: list is freed.  The result is freshly allocated.  */
+static struct format_arg_list *
+add_repeated_opt_type_constraint (struct format_arg_list *list,
+                                  unsigned int n,
+                                  enum format_arg_type type,
+                                  struct format_arg_list *sublist)
+{
+
+  unsigned int s;
+  struct format_arg newconstraint;
+
+  if (list == NULL)
+    return NULL;
+
+  s = initial_splitelement (list, n);
+
+  newconstraint.presence = FCT_OPTIONAL;
+  newconstraint.type = type;
+  newconstraint.list = sublist;
+
+  /* Modify the initial elements that represent the indices >= n.  */
+  for (; s < list->initial.count; s++)
+    {
+      struct format_arg tmpelement;
+      if (!make_intersected_element (&tmpelement,
+                                     &list->initial.element[s], &newconstraint))
+        {
+          list = add_end_constraint (list, n);
+          goto done;
+        }
+      free_element (&list->initial.element[s]);
+      list->initial.element[s].type = tmpelement.type;
+      list->initial.element[s].list = tmpelement.list;
+      n += list->initial.element[s].repcount;
+    }
+
+  /* Modify the repeated elements.  */
+  for (s = 0; s < list->repeated.count; s++)
+    {
+      struct format_arg tmpelement;
+      if (!make_intersected_element (&tmpelement,
+                                     &list->repeated.element[s], &newconstraint))
+        {
+          list = add_end_constraint (list, n);
+          goto done;
+        }
+      free_element (&list->repeated.element[s]);
+      list->repeated.element[s].type = tmpelement.type;
+      list->repeated.element[s].list = tmpelement.list;
+      n += list->repeated.element[s].repcount;
+    }
+
+ done:
+  if (list != NULL)
+    VERIFY_LIST (list);
+
+  return list;
+
+}
+
+
+/* ============= Subroutines used by the format string parser ============= */
+
+static void
+add_req_type_constraint (struct format_arg_list **listp,
+                         unsigned int position1, unsigned int position2,
+                         enum format_arg_type type,
+                         struct format_arg_list *sublist)
+{
+  *listp = add_required_constraint (*listp, position2);
+  if (type & FAT_ELEMENTWISE)
+    {
+      ASSERT (sublist != NULL);
+      *listp = add_type_constraint (*listp, position1, position2,
+                                    type, sublist);
+    }
+  else
+    {
+      ASSERT (sublist == NULL);
+      *listp = add_type_constraint (*listp, position1, position2, type, NULL);
+    }
+}
+
+
+/* ======================= The format string parser ======================= */
+
+#define INVALID_ARGNO_ORDER(directive_number) \
+  xasprintf (_("In the directive number %u, the first argument number is greater than the second argument number."), directive_number)
+
+#define INVALID_COMPOUND_VARARG(directive_number) \
+  xasprintf (_("In the directive number %u, the compound specifier consumes a variable number of arguments."), directive_number)
+
+#define INVALID_COMPOUND_ARGCOUNT(directive_number, num_arguments) \
+  xasprintf (_("In the directive number %u, the compound specifier consumes %u arguments."), directive_number, num_arguments)
+
+#define INVALID_BAR_OUTSIDE_COMPOUND() \
+  xstrdup (_("Found '%|' outside of '%(...%)'."))
+
+#define INVALID_UNTERMINATED_COMPOUND() \
+  xstrdup (_("The string ends in the middle of a compound specifier."))
+
+#define INVALID_COMPOUND_DELIMITER(directive_number) \
+  xasprintf (_("In the directive number %u, there is an invalid directive in the delimiter part of a compound specifier."), directive_number)
+
+#define INVALID_NESTING(found_char, notfound_char) \
+  xasprintf (_("Found '%%%c' without matching '%%%c'."), found_char, notfound_char)
+
+#define INVALID_ARG_PAST_LAST(directive_number) \
+  xasprintf (_("The directive number %u references an argument after the last argument."), directive_number)
+
+#undef INVALID_INCOMPATIBLE_ARG_TYPES
+#define INVALID_INCOMPATIBLE_ARG_TYPES() \
+  xstrdup (_("The string refers to some argument in incompatible ways."))
+
+/* Parse a piece of format string, until the matching terminating format
+   directive is encountered.
+   spec is the global struct spec.
+   format is the remainder of the format string.
+   It is updated upon valid return.
+   compound is true inside a compound specifier.
+   fdi is an array to be filled with format directive indicators, or NULL.
+   If the format string is invalid, false is returned and *invalid_reason is
+   set to an error message explaining why.  */
+static bool
+parse_upto (struct spec *spec,
+            const char **formatp, bool compound,
+            char *fdi, char **invalid_reason)
+{
+  const char *format = *formatp;
+  const char *const format_start = format;
+  unsigned int arg_count = 0;
+
+  for (; *format != '\0'; )
+    {
+      char c = *format++;
+
+      if (c == '%')
+        {
+          FDI_SET (format - 1, FMTDIR_START);
+
+          /* Count number of directives.  */
+          spec->directives++;
+
+          if (*format == '\0')
+            {
+              *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
+              FDI_SET (format - 1, FMTDIR_ERROR);
+              return false;
+            }
+          if (*format == '%')
+            /* A doubled percent-sign.  */
+            format++;
+          else
+            {
+              /* A directive.  */
+              unsigned int first_number = 0;
+              unsigned int second_number = 0;
+              bool second_is_last = false;
+              unsigned int width_number = 0;
+              bool width_from_arg = false;
+              unsigned int precision_number = 0;
+              bool precision_from_arg = false;
+              bool separator_digits_from_arg = false;
+              bool separator_char_from_arg = false;
+
+              /* Parse position.  */
+              if (c_isdigit (*format))
+                {
+                  const char *f = format;
+                  unsigned int m = 0;
+
+                  do
+                    {
+                      m = 10 * m + (*f - '0');
+                      f++;
+                    }
+                  while (c_isdigit (*f));
+
+                  if (*f == '$')
+                    {
+                      if (m == 0)
+                        {
+                          *invalid_reason = INVALID_ARGNO_0 (spec->directives);
+                          FDI_SET (f, FMTDIR_ERROR);
+                          return false;
+                        }
+                      first_number = m;
+                      format = ++f;
+                    }
+                  else if (*f == ':')
+                    {
+                      f++;
+                      if (c_isdigit (*f))
+                        {
+                          unsigned int m2 = 0;
+
+                          do
+                            {
+                              m2 = 10 * m2 + (*f - '0');
+                              f++;
+                            }
+                          while (c_isdigit (*f));
+
+                          if (*f == '$')
+                            {
+                              if (m2 == 0)
+                                {
+                                  *invalid_reason = INVALID_ARGNO_0 (spec->directives);
+                                  FDI_SET (f, FMTDIR_ERROR);
+                                  return false;
+                                }
+                              if (m > m2)
+                                {
+                                  *invalid_reason = INVALID_ARGNO_ORDER (spec->directives);
+                                  FDI_SET (f, FMTDIR_ERROR);
+                                  return false;
+                                }
+                              first_number = m;
+                              second_number = m2;
+                              format = ++f;
+                            }
+                        }
+                      else if (*f == '$')
+                        {
+                          first_number = m;
+                          second_is_last = true;
+                          format = ++f;
+                        }
+                    }
+                }
+
+              /* Parse flags.  */
+              while (*format == ' ' || *format == '+' || *format == '-'
+                     || *format == '#' || *format == '0' || *format == '=')
+                format++;
+
+              /* Parse width.  */
+              if (c_isdigit (*format))
+                {
+                  do format++; while (c_isdigit (*format));
+                }
+              else if (*format == '*')
+                {
+                  format++;
+                  if (c_isdigit (*format))
+                    {
+                      const char *f = format;
+                      unsigned int m = 0;
+
+                      do
+                        {
+                          m = 10 * m + (*f - '0');
+                          f++;
+                        }
+                      while (c_isdigit (*f));
+
+                      if (*f == '$')
+                        {
+                          if (m == 0)
+                            {
+                              *invalid_reason = INVALID_WIDTH_ARGNO_0 (spec->directives);
+                              FDI_SET (f, FMTDIR_ERROR);
+                              return false;
+                            }
+                          width_number = m;
+                          format = ++f;
+                        }
+                    }
+                  if (width_number == 0)
+                    width_from_arg = true;
+                }
+
+              /* Parse precision.  */
+              if (*format == '.')
+                {
+                  format++;
+
+                  if (c_isdigit (*format))
+                    {
+                      do format++; while (c_isdigit (*format));
+                    }
+                  else if (*format == '*')
+                    {
+                      format++;
+                      if (c_isdigit (*format))
+                        {
+                          const char *f = format;
+                          unsigned int m = 0;
+
+                          do
+                            {
+                              m = 10 * m + (*f - '0');
+                              f++;
+                            }
+                          while (c_isdigit (*f));
+
+                          if (*f == '$')
+                            {
+                              if (m == 0)
+                                {
+                                  *invalid_reason = INVALID_WIDTH_ARGNO_0 (spec->directives);
+                                  FDI_SET (f, FMTDIR_ERROR);
+                                  return false;
+                                }
+                              precision_number = m;
+                              format = ++f;
+                            }
+                        }
+                      if (precision_number == 0)
+                        precision_from_arg = true;
+                    }
+                }
+
+              /* Parse separator.  */
+              if (*format == ',')
+                {
+                  format++;
+
+                  if (c_isdigit (*format))
+                    {
+                      do format++; while (c_isdigit (*format));
+                    }
+                  else if (*format == '*')
+                    {
+                      format++;
+                      separator_digits_from_arg = true;
+                    }
+
+                  if (*format == '?')
+                    {
+                      format++;
+                      separator_char_from_arg = true;
+                    }
+                }
+
+              enum format_arg_type type;
+              struct format_arg_list *elementwise_list = NULL;
+
+              /* Parse specifier.  */
+              switch (*format)
+                {
+                case 's':
+                  type = FAT_BOOL | FAT_INTEGER | FAT_FLOATINGPOINT | FAT_CHAR | FAT_ARRAY | FAT_ASSOCIATIVE | FAT_IRANGE | FAT_STRUCT | FAT_POINTER;
+                  break;
+                case 'c':
+                  type = FAT_CHAR;
+                  break;
+                case 'd': case 'u': case 'b': case 'o':
+                  type = FAT_BOOL | FAT_INTEGER | FAT_CHAR;
+                  break;
+                case 'x': case 'X':
+                  type = FAT_BOOL | FAT_INTEGER | FAT_CHAR | FAT_POINTER;
+                  break;
+                case 'e': case 'E': case 'f': case 'F':
+                case 'g': case 'G': case 'a': case 'A':
+                  type = FAT_INTEGER | FAT_FLOATINGPOINT;
+                  break;
+                case 'r':
+                  type = FAT_BOOL | FAT_INTEGER | FAT_FLOATINGPOINT | FAT_CHAR | FAT_ARRAY | FAT_IRANGE;
+                  break;
+                case '(':
+                  /* A compound specifier.  */
+                  format++;
+                  {
+                    struct spec sub_spec;
+                    sub_spec.directives = 0;
+                    sub_spec.list = make_unconstrained_list ();
+                    *formatp = format;
+                    if (!parse_upto (&sub_spec, formatp, true, fdi, invalid_reason))
+                      {
+                        FDI_SET (**formatp == '\0' ? *formatp - 1 : *formatp,
+                                 FMTDIR_ERROR);
+                        return false;
+                      }
+                    format = *formatp;
+                    elementwise_list = sub_spec.list;
+                    if (elementwise_list->repeated.count > 0)
+                      {
+                        /* Test case: "%(%1:$s%)"  */
+                        *invalid_reason = INVALID_COMPOUND_VARARG (spec->directives);
+                        FDI_SET (format - 1, FMTDIR_ERROR);
+                        return false;
+                      }
+                    if (elementwise_list->initial.length == 1)
+                      type = FAT_ELEMENTWISE_1;
+                    else if (elementwise_list->initial.length == 2)
+                      type = FAT_ELEMENTWISE_2;
+                    else
+                      {
+                        /* Test case: "%(%s %s %s%)"  */
+                        *invalid_reason = INVALID_COMPOUND_ARGCOUNT (spec->directives, elementwise_list->initial.length);
+                        FDI_SET (format - 1, FMTDIR_ERROR);
+                        return false;
+                      }
+                  }
+                  break;
+                case '|':
+                  if (!compound)
+                    {
+                      *invalid_reason = INVALID_BAR_OUTSIDE_COMPOUND ();
+                      FDI_SET (format, FMTDIR_ERROR);
+                      return false;
+                    }
+                  /* Parse the second part of a compound specifier.  */
+                  format++;
+                  for (;;)
+                    {
+                      if (*format == '\0')
+                        {
+                          *invalid_reason = INVALID_UNTERMINATED_COMPOUND ();
+                          FDI_SET (format - 1, FMTDIR_ERROR);
+                          return false;
+                        }
+                      if (*format == '%')
+                        {
+                          format++;
+                          if (*format == '%')
+                            format++;
+                          else if (*format == ')')
+                            break;
+                          else
+                            {
+                              *invalid_reason = INVALID_COMPOUND_DELIMITER (spec->directives);
+                              FDI_SET (format, FMTDIR_ERROR);
+                              return false;
+                            }
+                        }
+                      else
+                        format++;
+                    }
+                  /* Here (*format == ')').  */
+                  FALLTHROUGH;
+                case ')':
+                  if (!compound)
+                    {
+                      *invalid_reason = INVALID_NESTING (')', '(');
+                      FDI_SET (format, FMTDIR_ERROR);
+                      return false;
+                    }
+                  goto done;
+                default:
+                  if (*format == '\0')
+                    {
+                      *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
+                      FDI_SET (format - 1, FMTDIR_ERROR);
+                    }
+                  else
+                    {
+                      *invalid_reason = INVALID_CONVERSION_SPECIFIER (spec->directives, *format);
+                      FDI_SET (format, FMTDIR_ERROR);
+                    }
+                  return false;
+                }
+
+              if (width_number > 0)
+                {
+                  add_req_type_constraint (&spec->list, width_number - 1, width_number - 1,
+                                           FAT_INTEGER, NULL);
+                  if (arg_count < width_number)
+                    arg_count = width_number;
+                }
+              else if (width_from_arg)
+                {
+                  if (arg_count == UINT_MAX)
+                    {
+                      *invalid_reason = INVALID_ARG_PAST_LAST (spec->directives);
+                      FDI_SET (format, FMTDIR_ERROR);
+                      return false;
+                    }
+                  add_req_type_constraint (&spec->list, arg_count, arg_count,
+                                           FAT_INTEGER, NULL);
+                  arg_count++;
+                }
+
+              if (precision_number > 0)
+                {
+                  add_req_type_constraint (&spec->list, precision_number - 1, precision_number - 1,
+                                           FAT_INTEGER, NULL);
+                  if (arg_count < precision_number)
+                    arg_count = precision_number;
+                }
+              else if (precision_from_arg)
+                {
+                  if (arg_count == UINT_MAX)
+                    {
+                      *invalid_reason = INVALID_ARG_PAST_LAST (spec->directives);
+                      FDI_SET (format, FMTDIR_ERROR);
+                      return false;
+                    }
+                  add_req_type_constraint (&spec->list, arg_count, arg_count,
+                                           FAT_INTEGER, NULL);
+                  arg_count++;
+                }
+
+              if (separator_digits_from_arg)
+                {
+                  if (arg_count == UINT_MAX)
+                    {
+                      *invalid_reason = INVALID_ARG_PAST_LAST (spec->directives);
+                      FDI_SET (format, FMTDIR_ERROR);
+                      return false;
+                    }
+                  add_req_type_constraint (&spec->list, arg_count, arg_count,
+                                           FAT_INTEGER, NULL);
+                  arg_count++;
+                }
+
+              if (separator_char_from_arg)
+                {
+                  if (arg_count == UINT_MAX)
+                    {
+                      *invalid_reason = INVALID_ARG_PAST_LAST (spec->directives);
+                      FDI_SET (format, FMTDIR_ERROR);
+                      return false;
+                    }
+                  add_req_type_constraint (&spec->list, arg_count, arg_count,
+                                           FAT_CHAR, NULL);
+                  arg_count++;
+                }
+
+              if (first_number > 0)
+                {
+                  if (second_number > 0)
+                    {
+                      add_req_type_constraint (&spec->list, first_number - 1, second_number - 1,
+                                               type, elementwise_list);
+                      if (arg_count < second_number)
+                        arg_count = second_number;
+                    }
+                  else if (second_is_last)
+                    {
+                      add_req_type_constraint (&spec->list, first_number - 1, first_number - 1,
+                                               type, elementwise_list);
+                      spec->list = add_repeated_opt_type_constraint (spec->list, first_number,
+                                                                     type, elementwise_list);
+                      arg_count = UINT_MAX;
+                    }
+                  else
+                    {
+                      add_req_type_constraint (&spec->list, first_number - 1, first_number - 1,
+                                               type, elementwise_list);
+                      if (arg_count < first_number)
+                        arg_count = first_number;
+                    }
+                }
+              else
+                {
+                  if (arg_count == UINT_MAX)
+                    {
+                      *invalid_reason = INVALID_ARG_PAST_LAST (spec->directives);
+                      FDI_SET (format, FMTDIR_ERROR);
+                      return false;
+                    }
+                  add_req_type_constraint (&spec->list, arg_count, arg_count,
+                                           type, elementwise_list);
+                  arg_count++;
+                }
+
+              if (type & FAT_ELEMENTWISE)
+                free_list (elementwise_list);
+
+              FDI_SET (format, FMTDIR_END);
+
+              format++;
+            }
+        }
+    }
+
+  if (compound)
+    {
+      *invalid_reason = INVALID_NESTING ('(', ')');
+      return false;
+    }
+
+ done:
+  *formatp = format;
+
+  /* Extra arguments at the end are not allowed.  */
+  if (arg_count != UINT_MAX)
+    {
+      spec->list = add_end_constraint (spec->list, arg_count);
+      if (spec->list == NULL)
+        return false;
+    }
+
+  return true;
+}
+
+
+/* ============== Top level format string handling functions ============== */
+
+static void *
+format_parse (const char *format, bool translated, char *fdi,
+              char **invalid_reason)
+{
+  struct spec spec;
+  struct spec *result;
+
+  spec.directives = 0;
+  spec.list = make_unconstrained_list ();
+
+  if (!parse_upto (&spec, &format, false,
+                   fdi, invalid_reason))
+    /* Invalid format string.  */
+    return NULL;
+
+  if (spec.list == NULL)
+    {
+      /* Contradictory argument type information.  */
+      *invalid_reason = INVALID_INCOMPATIBLE_ARG_TYPES ();
+      return NULL;
+    }
+
+  /* Normalize the result.  */
+  normalize_list (spec.list);
+
+  result = XMALLOC (struct spec);
+  *result = spec;
+  return result;
+}
+
+static void
+format_free (void *descr)
+{
+  struct spec *spec = (struct spec *) descr;
+
+  free_list (spec->list);
+}
+
+static int
+format_get_number_of_directives (void *descr)
+{
+  struct spec *spec = (struct spec *) descr;
+
+  return spec->directives;
+}
+
+static bool
+format_check (void *msgid_descr, void *msgstr_descr, bool equality,
+              formatstring_error_logger_t error_logger, void *error_logger_data,
+              const char *pretty_msgid, const char *pretty_msgstr)
+{
+  struct spec *spec1 = (struct spec *) msgid_descr;
+  struct spec *spec2 = (struct spec *) msgstr_descr;
+
+  /* The formatting functions in the D module std.format treat an unused
+     argument at the end of the argument list as an error.  Therefore here
+     the translator must not omit some of the arguments.
+     This could be mitigated in format strings with two or more directives.
+     Example:
+       "%2$s bought a piece." vs. "%2$s bought %1$d pieces."
+     Here the unused argument (argument 1) would not be at the end of the
+     argument list.  But this does not help with the more frequent case:
+       "a piece" vs. "%d pieces"
+     Therefore we recommend the zero-precision workaround in the documentation:
+       "%.0sa piece" vs. "%s pieces"
+   */
+  equality = true;
+
+  bool err = false;
+
+  if (equality)
+    {
+      if (!equal_list (spec1->list, spec2->list))
+        {
+          if (error_logger)
+            error_logger (error_logger_data,
+                          _("format specifications in '%s' and '%s' are not equivalent"),
+                          pretty_msgid, pretty_msgstr);
+          err = true;
+        }
+    }
+  else
+    {
+      struct format_arg_list *intersection =
+        make_intersected_list (copy_list (spec1->list),
+                               copy_list (spec2->list));
+
+      if (!(intersection != NULL
+            && (normalize_list (intersection),
+                equal_list (intersection, spec2->list))))
+        {
+          if (error_logger)
+            error_logger (error_logger_data,
+                          _("format specifications in '%s' are not a subset of those in '%s'"),
+                          pretty_msgstr, pretty_msgid);
+          err = true;
+        }
+    }
+
+  return err;
+}
+
+
+struct formatstring_parser formatstring_d =
+{
+  format_parse,
+  format_free,
+  format_get_number_of_directives,
+  NULL,
+  format_check
+};
+
+
+/* ============================= Testing code ============================= */
+
+#ifdef TEST
+
+/* Test program: Print the argument list specification returned by
+   format_parse for strings read from standard input.  */
+
+#include <stdio.h>
+
+static void print_list (struct format_arg_list *list);
+
+static void
+print_element (struct format_arg *element)
+{
+  switch (element->presence)
+    {
+    case FCT_REQUIRED:
+      break;
+    case FCT_OPTIONAL:
+      printf (". ");
+      break;
+    default:
+      abort ();
+    }
+
+  if (element->type == FAT_NONE)
+    abort ();
+  if (element->type & FAT_ELEMENTWISE)
+    {
+      switch (element->type)
+        {
+        case FAT_ELEMENTWISE_1:
+          printf ("1");
+          break;
+        case FAT_ELEMENTWISE_2:
+          printf ("2");
+          break;
+        default:
+          abort ();
+        }
+      print_list (element->list);
+    }
+  else
+    {
+      if (element->type == FAT_ANY_TYPE)
+        printf ("*");
+      else
+        {
+          if (element->type & FAT_BOOL)
+            printf ("b");
+          if (element->type & FAT_INTEGER)
+            printf ("i");
+          if (element->type & FAT_FLOATINGPOINT)
+            printf ("f");
+          if (element->type & FAT_CHAR)
+            printf ("c");
+          if (element->type & FAT_ARRAY)
+            printf ("a");
+          if (element->type & FAT_ASSOCIATIVE)
+            printf ("@");
+          if (element->type & FAT_IRANGE)
+            printf ("r");
+          if (element->type & FAT_STRUCT)
+            printf ("s");
+          if (element->type & FAT_POINTER)
+            printf ("p");
+        }
+    }
+}
+
+static void
+print_list (struct format_arg_list *list)
+{
+  unsigned int i, j;
+
+  printf ("(");
+
+  for (i = 0; i < list->initial.count; i++)
+    for (j = 0; j < list->initial.element[i].repcount; j++)
+      {
+        if (i > 0 || j > 0)
+          printf (" ");
+        print_element (&list->initial.element[i]);
+      }
+
+  if (list->repeated.count > 0)
+    {
+      printf (" |");
+      for (i = 0; i < list->repeated.count; i++)
+        for (j = 0; j < list->repeated.element[i].repcount; j++)
+          {
+            printf (" ");
+            print_element (&list->repeated.element[i]);
+          }
+    }
+
+  printf (")");
+}
+
+static void
+format_print (void *descr)
+{
+  struct spec *spec = (struct spec *) descr;
+
+  if (spec == NULL)
+    {
+      printf ("INVALID");
+      return;
+    }
+
+  print_list (spec->list);
+}
+
+int
+main ()
+{
+  for (;;)
+    {
+      char *line = NULL;
+      size_t line_size = 0;
+      int line_len;
+      char *invalid_reason;
+      void *descr;
+
+      line_len = getline (&line, &line_size, stdin);
+      if (line_len < 0)
+        break;
+      if (line_len > 0 && line[line_len - 1] == '\n')
+        line[--line_len] = '\0';
+
+      invalid_reason = NULL;
+      descr = format_parse (line, false, NULL, &invalid_reason);
+
+      format_print (descr);
+      printf ("\n");
+      if (descr == NULL)
+        printf ("%s\n", invalid_reason);
+
+      free (invalid_reason);
+      free (line);
+    }
+
+  return 0;
+}
+
+/*
+ * For Emacs M-x compile
+ * Local Variables:
+ * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-d.c ../gnulib-lib/libgettextlib.la"
+ * End:
+ */
+
+#endif /* TEST */
diff --git a/gettext-tools/src/format.c b/gettext-tools/src/format.c

index e713586ebfb01301fff39af8fccf0860ddfa532b..c8388ddc2e36e91a327707427db36e8a15ff60e5 100644 (file)
--- a/gettext-tools/src/format.c
+++ b/gettext-tools/src/format.c
@@ -54,6 +54,7 @@ struct formatstring_parser *formatstring_parsers[NFORMATS] =
    /* format_awk */              &formatstring_awk,
    /* format_lua */              &formatstring_lua,
    /* format_pascal */           &formatstring_pascal,
+  /* format_d */                &formatstring_d,
    /* format_smalltalk */        &formatstring_smalltalk,
    /* format_qt */               &formatstring_qt,
    /* format_qt_plural */        &formatstring_qt_plural,
diff --git a/gettext-tools/src/format.h b/gettext-tools/src/format.h

index 1634bfbbf59bf32b812fce3fff671f7d1453421c..4bf8a1856ee57ac8f456cfe54b2398c4805a2d16 100644 (file)
--- a/gettext-tools/src/format.h
+++ b/gettext-tools/src/format.h
@@ -120,6 +120,7 @@ extern DLL_VARIABLE struct formatstring_parser formatstring_sh;
  extern DLL_VARIABLE struct formatstring_parser formatstring_awk;
  extern DLL_VARIABLE struct formatstring_parser formatstring_lua;
  extern DLL_VARIABLE struct formatstring_parser formatstring_pascal;
+extern DLL_VARIABLE struct formatstring_parser formatstring_d;
  extern DLL_VARIABLE struct formatstring_parser formatstring_smalltalk;
  extern DLL_VARIABLE struct formatstring_parser formatstring_qt;
  extern DLL_VARIABLE struct formatstring_parser formatstring_qt_plural;
diff --git a/gettext-tools/src/html5-entities.h b/gettext-tools/src/html5-entities.h

new file mode 100644 (file)

index 0000000..f5a73a8
--- /dev/null
+++ b/gettext-tools/src/html5-entities.h
@@ -0,0 +1,2195 @@
+/* List of HTML 5 entities.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025.  */
+/* Data extracted from
+   <https://html.spec.whatwg.org/multipage/named-characters.html>  */
+
+/* Table of HTML5 named character entities.
+   Split into two tables, one for name lengths <= 15, one for name lengths > 15,
+   in order to reduze the binary size of the tables.
+   Each of the tables is sorted by name in ascending order.  */
+static const struct { const char name[15 + 1]; const char value[6 + 1]; } html5short[] =
+{
+  { "AElig", "Æ" }, /* U+00C6 */
+  { "AMP", "&" }, /* U+0026 */
+  { "Aacute", "Á" }, /* U+00C1 */
+  { "Abreve", "Ă" }, /* U+0102 */
+  { "Acirc", "Â" }, /* U+00C2 */
+  { "Acy", "А" }, /* U+0410 */
+  { "Afr", "𝔄" }, /* U+1D504 */
+  { "Agrave", "À" }, /* U+00C0 */
+  { "Alpha", "Α" }, /* U+0391 */
+  { "Amacr", "Ā" }, /* U+0100 */
+  { "And", "⩓" }, /* U+2A53 */
+  { "Aogon", "Ą" }, /* U+0104 */
+  { "Aopf", "𝔸" }, /* U+1D538 */
+  { "ApplyFunction", "⁡" }, /* U+2061 */
+  { "Aring", "Å" }, /* U+00C5 */
+  { "Ascr", "𝒜" }, /* U+1D49C */
+  { "Assign", "≔" }, /* U+2254 */
+  { "Atilde", "Ã" }, /* U+00C3 */
+  { "Auml", "Ä" }, /* U+00C4 */
+  { "Backslash", "∖" }, /* U+2216 */
+  { "Barv", "⫧" }, /* U+2AE7 */
+  { "Barwed", "⌆" }, /* U+2306 */
+  { "Bcy", "Б" }, /* U+0411 */
+  { "Because", "∵" }, /* U+2235 */
+  { "Bernoullis", "ℬ" }, /* U+212C */
+  { "Beta", "Β" }, /* U+0392 */
+  { "Bfr", "𝔅" }, /* U+1D505 */
+  { "Bopf", "𝔹" }, /* U+1D539 */
+  { "Breve", "˘" }, /* U+02D8 */
+  { "Bscr", "ℬ" }, /* U+212C */
+  { "Bumpeq", "≎" }, /* U+224E */
+  { "CHcy", "Ч" }, /* U+0427 */
+  { "COPY", "©" }, /* U+00A9 */
+  { "Cacute", "Ć" }, /* U+0106 */
+  { "Cap", "⋒" }, /* U+22D2 */
+  { "Cayleys", "ℭ" }, /* U+212D */
+  { "Ccaron", "Č" }, /* U+010C */
+  { "Ccedil", "Ç" }, /* U+00C7 */
+  { "Ccirc", "Ĉ" }, /* U+0108 */
+  { "Cconint", "∰" }, /* U+2230 */
+  { "Cdot", "Ċ" }, /* U+010A */
+  { "Cedilla", "¸" }, /* U+00B8 */
+  { "CenterDot", "·" }, /* U+00B7 */
+  { "Cfr", "ℭ" }, /* U+212D */
+  { "Chi", "Χ" }, /* U+03A7 */
+  { "CircleDot", "⊙" }, /* U+2299 */
+  { "CircleMinus", "⊖" }, /* U+2296 */
+  { "CirclePlus", "⊕" }, /* U+2295 */
+  { "CircleTimes", "⊗" }, /* U+2297 */
+  { "CloseCurlyQuote", "’" }, /* U+2019 */
+  { "Colon", "∷" }, /* U+2237 */
+  { "Colone", "⩴" }, /* U+2A74 */
+  { "Congruent", "≡" }, /* U+2261 */
+  { "Conint", "∯" }, /* U+222F */
+  { "ContourIntegral", "∮" }, /* U+222E */
+  { "Copf", "ℂ" }, /* U+2102 */
+  { "Coproduct", "∐" }, /* U+2210 */
+  { "Cross", "⨯" }, /* U+2A2F */
+  { "Cscr", "𝒞" }, /* U+1D49E */
+  { "Cup", "⋓" }, /* U+22D3 */
+  { "CupCap", "≍" }, /* U+224D */
+  { "DD", "ⅅ" }, /* U+2145 */
+  { "DDotrahd", "⤑" }, /* U+2911 */
+  { "DJcy", "Ђ" }, /* U+0402 */
+  { "DScy", "Ѕ" }, /* U+0405 */
+  { "DZcy", "Џ" }, /* U+040F */
+  { "Dagger", "‡" }, /* U+2021 */
+  { "Darr", "↡" }, /* U+21A1 */
+  { "Dashv", "⫤" }, /* U+2AE4 */
+  { "Dcaron", "Ď" }, /* U+010E */
+  { "Dcy", "Д" }, /* U+0414 */
+  { "Del", "∇" }, /* U+2207 */
+  { "Delta", "Δ" }, /* U+0394 */
+  { "Dfr", "𝔇" }, /* U+1D507 */
+  { "DiacriticalDot", "˙" }, /* U+02D9 */
+  { "Diamond", "⋄" }, /* U+22C4 */
+  { "DifferentialD", "ⅆ" }, /* U+2146 */
+  { "Dopf", "𝔻" }, /* U+1D53B */
+  { "Dot", "¨" }, /* U+00A8 */
+  { "DotDot", "⃜" }, /* U+20DC */
+  { "DotEqual", "≐" }, /* U+2250 */
+  { "DoubleDot", "¨" }, /* U+00A8 */
+  { "DoubleDownArrow", "⇓" }, /* U+21D3 */
+  { "DoubleLeftArrow", "⇐" }, /* U+21D0 */
+  { "DoubleLeftTee", "⫤" }, /* U+2AE4 */
+  { "DoubleRightTee", "⊨" }, /* U+22A8 */
+  { "DoubleUpArrow", "⇑" }, /* U+21D1 */
+  { "DownArrow", "↓" }, /* U+2193 */
+  { "DownArrowBar", "⤓" }, /* U+2913 */
+  { "DownBreve", "̑" }, /* U+0311 */
+  { "DownLeftVector", "↽" }, /* U+21BD */
+  { "DownRightVector", "⇁" }, /* U+21C1 */
+  { "DownTee", "⊤" }, /* U+22A4 */
+  { "DownTeeArrow", "↧" }, /* U+21A7 */
+  { "Downarrow", "⇓" }, /* U+21D3 */
+  { "Dscr", "𝒟" }, /* U+1D49F */
+  { "Dstrok", "Đ" }, /* U+0110 */
+  { "ENG", "Ŋ" }, /* U+014A */
+  { "ETH", "Ð" }, /* U+00D0 */
+  { "Eacute", "É" }, /* U+00C9 */
+  { "Ecaron", "Ě" }, /* U+011A */
+  { "Ecirc", "Ê" }, /* U+00CA */
+  { "Ecy", "Э" }, /* U+042D */
+  { "Edot", "Ė" }, /* U+0116 */
+  { "Efr", "𝔈" }, /* U+1D508 */
+  { "Egrave", "È" }, /* U+00C8 */
+  { "Element", "∈" }, /* U+2208 */
+  { "Emacr", "Ē" }, /* U+0112 */
+  { "Eogon", "Ę" }, /* U+0118 */
+  { "Eopf", "𝔼" }, /* U+1D53C */
+  { "Epsilon", "Ε" }, /* U+0395 */
+  { "Equal", "⩵" }, /* U+2A75 */
+  { "EqualTilde", "≂" }, /* U+2242 */
+  { "Equilibrium", "⇌" }, /* U+21CC */
+  { "Escr", "ℰ" }, /* U+2130 */
+  { "Esim", "⩳" }, /* U+2A73 */
+  { "Eta", "Η" }, /* U+0397 */
+  { "Euml", "Ë" }, /* U+00CB */
+  { "Exists", "∃" }, /* U+2203 */
+  { "ExponentialE", "ⅇ" }, /* U+2147 */
+  { "Fcy", "Ф" }, /* U+0424 */
+  { "Ffr", "𝔉" }, /* U+1D509 */
+  { "Fopf", "𝔽" }, /* U+1D53D */
+  { "ForAll", "∀" }, /* U+2200 */
+  { "Fouriertrf", "ℱ" }, /* U+2131 */
+  { "Fscr", "ℱ" }, /* U+2131 */
+  { "GJcy", "Ѓ" }, /* U+0403 */
+  { "GT", ">" }, /* U+003E */
+  { "Gamma", "Γ" }, /* U+0393 */
+  { "Gammad", "Ϝ" }, /* U+03DC */
+  { "Gbreve", "Ğ" }, /* U+011E */
+  { "Gcedil", "Ģ" }, /* U+0122 */
+  { "Gcirc", "Ĝ" }, /* U+011C */
+  { "Gcy", "Г" }, /* U+0413 */
+  { "Gdot", "Ġ" }, /* U+0120 */
+  { "Gfr", "𝔊" }, /* U+1D50A */
+  { "Gg", "⋙" }, /* U+22D9 */
+  { "Gopf", "𝔾" }, /* U+1D53E */
+  { "GreaterEqual", "≥" }, /* U+2265 */
+  { "GreaterGreater", "⪢" }, /* U+2AA2 */
+  { "GreaterLess", "≷" }, /* U+2277 */
+  { "GreaterTilde", "≳" }, /* U+2273 */
+  { "Gscr", "𝒢" }, /* U+1D4A2 */
+  { "Gt", "≫" }, /* U+226B */
+  { "HARDcy", "Ъ" }, /* U+042A */
+  { "Hacek", "ˇ" }, /* U+02C7 */
+  { "Hat", "^" }, /* U+005E */
+  { "Hcirc", "Ĥ" }, /* U+0124 */
+  { "Hfr", "ℌ" }, /* U+210C */
+  { "HilbertSpace", "ℋ" }, /* U+210B */
+  { "Hopf", "ℍ" }, /* U+210D */
+  { "HorizontalLine", "─" }, /* U+2500 */
+  { "Hscr", "ℋ" }, /* U+210B */
+  { "Hstrok", "Ħ" }, /* U+0126 */
+  { "HumpDownHump", "≎" }, /* U+224E */
+  { "HumpEqual", "≏" }, /* U+224F */
+  { "IEcy", "Е" }, /* U+0415 */
+  { "IJlig", "Ĳ" }, /* U+0132 */
+  { "IOcy", "Ё" }, /* U+0401 */
+  { "Iacute", "Í" }, /* U+00CD */
+  { "Icirc", "Î" }, /* U+00CE */
+  { "Icy", "И" }, /* U+0418 */
+  { "Idot", "İ" }, /* U+0130 */
+  { "Ifr", "ℑ" }, /* U+2111 */
+  { "Igrave", "Ì" }, /* U+00CC */
+  { "Im", "ℑ" }, /* U+2111 */
+  { "Imacr", "Ī" }, /* U+012A */
+  { "ImaginaryI", "ⅈ" }, /* U+2148 */
+  { "Implies", "⇒" }, /* U+21D2 */
+  { "Int", "∬" }, /* U+222C */
+  { "Integral", "∫" }, /* U+222B */
+  { "Intersection", "⋂" }, /* U+22C2 */
+  { "InvisibleComma", "⁣" }, /* U+2063 */
+  { "InvisibleTimes", "⁢" }, /* U+2062 */
+  { "Iogon", "Į" }, /* U+012E */
+  { "Iopf", "𝕀" }, /* U+1D540 */
+  { "Iota", "Ι" }, /* U+0399 */
+  { "Iscr", "ℐ" }, /* U+2110 */
+  { "Itilde", "Ĩ" }, /* U+0128 */
+  { "Iukcy", "І" }, /* U+0406 */
+  { "Iuml", "Ï" }, /* U+00CF */
+  { "Jcirc", "Ĵ" }, /* U+0134 */
+  { "Jcy", "Й" }, /* U+0419 */
+  { "Jfr", "𝔍" }, /* U+1D50D */
+  { "Jopf", "𝕁" }, /* U+1D541 */
+  { "Jscr", "𝒥" }, /* U+1D4A5 */
+  { "Jsercy", "Ј" }, /* U+0408 */
+  { "Jukcy", "Є" }, /* U+0404 */
+  { "KHcy", "Х" }, /* U+0425 */
+  { "KJcy", "Ќ" }, /* U+040C */
+  { "Kappa", "Κ" }, /* U+039A */
+  { "Kcedil", "Ķ" }, /* U+0136 */
+  { "Kcy", "К" }, /* U+041A */
+  { "Kfr", "𝔎" }, /* U+1D50E */
+  { "Kopf", "𝕂" }, /* U+1D542 */
+  { "Kscr", "𝒦" }, /* U+1D4A6 */
+  { "LJcy", "Љ" }, /* U+0409 */
+  { "LT", "<" }, /* U+003C */
+  { "Lacute", "Ĺ" }, /* U+0139 */
+  { "Lambda", "Λ" }, /* U+039B */
+  { "Lang", "⟪" }, /* U+27EA */
+  { "Laplacetrf", "ℒ" }, /* U+2112 */
+  { "Larr", "↞" }, /* U+219E */
+  { "Lcaron", "Ľ" }, /* U+013D */
+  { "Lcedil", "Ļ" }, /* U+013B */
+  { "Lcy", "Л" }, /* U+041B */
+  { "LeftArrow", "←" }, /* U+2190 */
+  { "LeftArrowBar", "⇤" }, /* U+21E4 */
+  { "LeftCeiling", "⌈" }, /* U+2308 */
+  { "LeftDownVector", "⇃" }, /* U+21C3 */
+  { "LeftFloor", "⌊" }, /* U+230A */
+  { "LeftRightArrow", "↔" }, /* U+2194 */
+  { "LeftRightVector", "⥎" }, /* U+294E */
+  { "LeftTee", "⊣" }, /* U+22A3 */
+  { "LeftTeeArrow", "↤" }, /* U+21A4 */
+  { "LeftTeeVector", "⥚" }, /* U+295A */
+  { "LeftTriangle", "⊲" }, /* U+22B2 */
+  { "LeftTriangleBar", "⧏" }, /* U+29CF */
+  { "LeftUpTeeVector", "⥠" }, /* U+2960 */
+  { "LeftUpVector", "↿" }, /* U+21BF */
+  { "LeftUpVectorBar", "⥘" }, /* U+2958 */
+  { "LeftVector", "↼" }, /* U+21BC */
+  { "LeftVectorBar", "⥒" }, /* U+2952 */
+  { "Leftarrow", "⇐" }, /* U+21D0 */
+  { "Leftrightarrow", "⇔" }, /* U+21D4 */
+  { "LessFullEqual", "≦" }, /* U+2266 */
+  { "LessGreater", "≶" }, /* U+2276 */
+  { "LessLess", "⪡" }, /* U+2AA1 */
+  { "LessSlantEqual", "⩽" }, /* U+2A7D */
+  { "LessTilde", "≲" }, /* U+2272 */
+  { "Lfr", "𝔏" }, /* U+1D50F */
+  { "Ll", "⋘" }, /* U+22D8 */
+  { "Lleftarrow", "⇚" }, /* U+21DA */
+  { "Lmidot", "Ŀ" }, /* U+013F */
+  { "LongLeftArrow", "⟵" }, /* U+27F5 */
+  { "LongRightArrow", "⟶" }, /* U+27F6 */
+  { "Longleftarrow", "⟸" }, /* U+27F8 */
+  { "Longrightarrow", "⟹" }, /* U+27F9 */
+  { "Lopf", "𝕃" }, /* U+1D543 */
+  { "LowerLeftArrow", "↙" }, /* U+2199 */
+  { "LowerRightArrow", "↘" }, /* U+2198 */
+  { "Lscr", "ℒ" }, /* U+2112 */
+  { "Lsh", "↰" }, /* U+21B0 */
+  { "Lstrok", "Ł" }, /* U+0141 */
+  { "Lt", "≪" }, /* U+226A */
+  { "Map", "⤅" }, /* U+2905 */
+  { "Mcy", "М" }, /* U+041C */
+  { "MediumSpace", " " }, /* U+205F */
+  { "Mellintrf", "ℳ" }, /* U+2133 */
+  { "Mfr", "𝔐" }, /* U+1D510 */
+  { "MinusPlus", "∓" }, /* U+2213 */
+  { "Mopf", "𝕄" }, /* U+1D544 */
+  { "Mscr", "ℳ" }, /* U+2133 */
+  { "Mu", "Μ" }, /* U+039C */
+  { "NJcy", "Њ" }, /* U+040A */
+  { "Nacute", "Ń" }, /* U+0143 */
+  { "Ncaron", "Ň" }, /* U+0147 */
+  { "Ncedil", "Ņ" }, /* U+0145 */
+  { "Ncy", "Н" }, /* U+041D */
+  { "NestedLessLess", "≪" }, /* U+226A */
+  { "NewLine", "\n" }, /* U+000A */
+  { "Nfr", "𝔑" }, /* U+1D511 */
+  { "NoBreak", "⁠" }, /* U+2060 */
+  { "Nopf", "ℕ" }, /* U+2115 */
+  { "Not", "⫬" }, /* U+2AEC */
+  { "NotCongruent", "≢" }, /* U+2262 */
+  { "NotCupCap", "≭" }, /* U+226D */
+  { "NotElement", "∉" }, /* U+2209 */
+  { "NotEqual", "≠" }, /* U+2260 */
+  { "NotEqualTilde", "≂̸" }, /* U+2242 U+0338 */
+  { "NotExists", "∄" }, /* U+2204 */
+  { "NotGreater", "≯" }, /* U+226F */
+  { "NotGreaterEqual", "≱" }, /* U+2271 */
+  { "NotGreaterLess", "≹" }, /* U+2279 */
+  { "NotGreaterTilde", "≵" }, /* U+2275 */
+  { "NotHumpDownHump", "≎̸" }, /* U+224E U+0338 */
+  { "NotHumpEqual", "≏̸" }, /* U+224F U+0338 */
+  { "NotLeftTriangle", "⋪" }, /* U+22EA */
+  { "NotLess", "≮" }, /* U+226E */
+  { "NotLessEqual", "≰" }, /* U+2270 */
+  { "NotLessGreater", "≸" }, /* U+2278 */
+  { "NotLessLess", "≪̸" }, /* U+226A U+0338 */
+  { "NotLessTilde", "≴" }, /* U+2274 */
+  { "NotPrecedes", "⊀" }, /* U+2280 */
+  { "NotSquareSubset", "⊏̸" }, /* U+228F U+0338 */
+  { "NotSubset", "⊂⃒" }, /* U+2282 U+20D2 */
+  { "NotSubsetEqual", "⊈" }, /* U+2288 */
+  { "NotSucceeds", "⊁" }, /* U+2281 */
+  { "NotSuperset", "⊃⃒" }, /* U+2283 U+20D2 */
+  { "NotTilde", "≁" }, /* U+2241 */
+  { "NotTildeEqual", "≄" }, /* U+2244 */
+  { "NotTildeTilde", "≉" }, /* U+2249 */
+  { "NotVerticalBar", "∤" }, /* U+2224 */
+  { "Nscr", "𝒩" }, /* U+1D4A9 */
+  { "Ntilde", "Ñ" }, /* U+00D1 */
+  { "Nu", "Ν" }, /* U+039D */
+  { "OElig", "Œ" }, /* U+0152 */
+  { "Oacute", "Ó" }, /* U+00D3 */
+  { "Ocirc", "Ô" }, /* U+00D4 */
+  { "Ocy", "О" }, /* U+041E */
+  { "Odblac", "Ő" }, /* U+0150 */
+  { "Ofr", "𝔒" }, /* U+1D512 */
+  { "Ograve", "Ò" }, /* U+00D2 */
+  { "Omacr", "Ō" }, /* U+014C */
+  { "Omega", "Ω" }, /* U+03A9 */
+  { "Omicron", "Ο" }, /* U+039F */
+  { "Oopf", "𝕆" }, /* U+1D546 */
+  { "OpenCurlyQuote", "‘" }, /* U+2018 */
+  { "Or", "⩔" }, /* U+2A54 */
+  { "Oscr", "𝒪" }, /* U+1D4AA */
+  { "Oslash", "Ø" }, /* U+00D8 */
+  { "Otilde", "Õ" }, /* U+00D5 */
+  { "Otimes", "⨷" }, /* U+2A37 */
+  { "Ouml", "Ö" }, /* U+00D6 */
+  { "OverBar", "‾" }, /* U+203E */
+  { "OverBrace", "⏞" }, /* U+23DE */
+  { "OverBracket", "⎴" }, /* U+23B4 */
+  { "OverParenthesis", "⏜" }, /* U+23DC */
+  { "PartialD", "∂" }, /* U+2202 */
+  { "Pcy", "П" }, /* U+041F */
+  { "Pfr", "𝔓" }, /* U+1D513 */
+  { "Phi", "Φ" }, /* U+03A6 */
+  { "Pi", "Π" }, /* U+03A0 */
+  { "PlusMinus", "±" }, /* U+00B1 */
+  { "Poincareplane", "ℌ" }, /* U+210C */
+  { "Popf", "ℙ" }, /* U+2119 */
+  { "Pr", "⪻" }, /* U+2ABB */
+  { "Precedes", "≺" }, /* U+227A */
+  { "PrecedesEqual", "⪯" }, /* U+2AAF */
+  { "PrecedesTilde", "≾" }, /* U+227E */
+  { "Prime", "″" }, /* U+2033 */
+  { "Product", "∏" }, /* U+220F */
+  { "Proportion", "∷" }, /* U+2237 */
+  { "Proportional", "∝" }, /* U+221D */
+  { "Pscr", "𝒫" }, /* U+1D4AB */
+  { "Psi", "Ψ" }, /* U+03A8 */
+  { "QUOT", "\"" }, /* U+0022 */
+  { "Qfr", "𝔔" }, /* U+1D514 */
+  { "Qopf", "ℚ" }, /* U+211A */
+  { "Qscr", "𝒬" }, /* U+1D4AC */
+  { "RBarr", "⤐" }, /* U+2910 */
+  { "REG", "®" }, /* U+00AE */
+  { "Racute", "Ŕ" }, /* U+0154 */
+  { "Rang", "⟫" }, /* U+27EB */
+  { "Rarr", "↠" }, /* U+21A0 */
+  { "Rarrtl", "⤖" }, /* U+2916 */
+  { "Rcaron", "Ř" }, /* U+0158 */
+  { "Rcedil", "Ŗ" }, /* U+0156 */
+  { "Rcy", "Р" }, /* U+0420 */
+  { "Re", "ℜ" }, /* U+211C */
+  { "ReverseElement", "∋" }, /* U+220B */
+  { "Rfr", "ℜ" }, /* U+211C */
+  { "Rho", "Ρ" }, /* U+03A1 */
+  { "RightArrow", "→" }, /* U+2192 */
+  { "RightArrowBar", "⇥" }, /* U+21E5 */
+  { "RightCeiling", "⌉" }, /* U+2309 */
+  { "RightDownVector", "⇂" }, /* U+21C2 */
+  { "RightFloor", "⌋" }, /* U+230B */
+  { "RightTee", "⊢" }, /* U+22A2 */
+  { "RightTeeArrow", "↦" }, /* U+21A6 */
+  { "RightTeeVector", "⥛" }, /* U+295B */
+  { "RightTriangle", "⊳" }, /* U+22B3 */
+  { "RightUpVector", "↾" }, /* U+21BE */
+  { "RightVector", "⇀" }, /* U+21C0 */
+  { "RightVectorBar", "⥓" }, /* U+2953 */
+  { "Rightarrow", "⇒" }, /* U+21D2 */
+  { "Ropf", "ℝ" }, /* U+211D */
+  { "RoundImplies", "⥰" }, /* U+2970 */
+  { "Rrightarrow", "⇛" }, /* U+21DB */
+  { "Rscr", "ℛ" }, /* U+211B */
+  { "Rsh", "↱" }, /* U+21B1 */
+  { "RuleDelayed", "⧴" }, /* U+29F4 */
+  { "SHCHcy", "Щ" }, /* U+0429 */
+  { "SHcy", "Ш" }, /* U+0428 */
+  { "SOFTcy", "Ь" }, /* U+042C */
+  { "Sacute", "Ś" }, /* U+015A */
+  { "Sc", "⪼" }, /* U+2ABC */
+  { "Scaron", "Š" }, /* U+0160 */
+  { "Scedil", "Ş" }, /* U+015E */
+  { "Scirc", "Ŝ" }, /* U+015C */
+  { "Scy", "С" }, /* U+0421 */
+  { "Sfr", "𝔖" }, /* U+1D516 */
+  { "ShortDownArrow", "↓" }, /* U+2193 */
+  { "ShortLeftArrow", "←" }, /* U+2190 */
+  { "ShortRightArrow", "→" }, /* U+2192 */
+  { "ShortUpArrow", "↑" }, /* U+2191 */
+  { "Sigma", "Σ" }, /* U+03A3 */
+  { "SmallCircle", "∘" }, /* U+2218 */
+  { "Sopf", "𝕊" }, /* U+1D54A */
+  { "Sqrt", "√" }, /* U+221A */
+  { "Square", "□" }, /* U+25A1 */
+  { "SquareSubset", "⊏" }, /* U+228F */
+  { "SquareSuperset", "⊐" }, /* U+2290 */
+  { "SquareUnion", "⊔" }, /* U+2294 */
+  { "Sscr", "𝒮" }, /* U+1D4AE */
+  { "Star", "⋆" }, /* U+22C6 */
+  { "Sub", "⋐" }, /* U+22D0 */
+  { "Subset", "⋐" }, /* U+22D0 */
+  { "SubsetEqual", "⊆" }, /* U+2286 */
+  { "Succeeds", "≻" }, /* U+227B */
+  { "SucceedsEqual", "⪰" }, /* U+2AB0 */
+  { "SucceedsTilde", "≿" }, /* U+227F */
+  { "SuchThat", "∋" }, /* U+220B */
+  { "Sum", "∑" }, /* U+2211 */
+  { "Sup", "⋑" }, /* U+22D1 */
+  { "Superset", "⊃" }, /* U+2283 */
+  { "SupersetEqual", "⊇" }, /* U+2287 */
+  { "Supset", "⋑" }, /* U+22D1 */
+  { "THORN", "Þ" }, /* U+00DE */
+  { "TRADE", "™" }, /* U+2122 */
+  { "TSHcy", "Ћ" }, /* U+040B */
+  { "TScy", "Ц" }, /* U+0426 */
+  { "Tab", "   " }, /* U+0009 */
+  { "Tau", "Τ" }, /* U+03A4 */
+  { "Tcaron", "Ť" }, /* U+0164 */
+  { "Tcedil", "Ţ" }, /* U+0162 */
+  { "Tcy", "Т" }, /* U+0422 */
+  { "Tfr", "𝔗" }, /* U+1D517 */
+  { "Therefore", "∴" }, /* U+2234 */
+  { "Theta", "Θ" }, /* U+0398 */
+  { "ThickSpace", "  " }, /* U+205F U+200A */
+  { "ThinSpace", " " }, /* U+2009 */
+  { "Tilde", "∼" }, /* U+223C */
+  { "TildeEqual", "≃" }, /* U+2243 */
+  { "TildeFullEqual", "≅" }, /* U+2245 */
+  { "TildeTilde", "≈" }, /* U+2248 */
+  { "Topf", "𝕋" }, /* U+1D54B */
+  { "TripleDot", "⃛" }, /* U+20DB */
+  { "Tscr", "𝒯" }, /* U+1D4AF */
+  { "Tstrok", "Ŧ" }, /* U+0166 */
+  { "Uacute", "Ú" }, /* U+00DA */
+  { "Uarr", "↟" }, /* U+219F */
+  { "Uarrocir", "⥉" }, /* U+2949 */
+  { "Ubrcy", "Ў" }, /* U+040E */
+  { "Ubreve", "Ŭ" }, /* U+016C */
+  { "Ucirc", "Û" }, /* U+00DB */
+  { "Ucy", "У" }, /* U+0423 */
+  { "Udblac", "Ű" }, /* U+0170 */
+  { "Ufr", "𝔘" }, /* U+1D518 */
+  { "Ugrave", "Ù" }, /* U+00D9 */
+  { "Umacr", "Ū" }, /* U+016A */
+  { "UnderBar", "_" }, /* U+005F */
+  { "UnderBrace", "⏟" }, /* U+23DF */
+  { "UnderBracket", "⎵" }, /* U+23B5 */
+  { "Union", "⋃" }, /* U+22C3 */
+  { "UnionPlus", "⊎" }, /* U+228E */
+  { "Uogon", "Ų" }, /* U+0172 */
+  { "Uopf", "𝕌" }, /* U+1D54C */
+  { "UpArrow", "↑" }, /* U+2191 */
+  { "UpArrowBar", "⤒" }, /* U+2912 */
+  { "UpDownArrow", "↕" }, /* U+2195 */
+  { "UpEquilibrium", "⥮" }, /* U+296E */
+  { "UpTee", "⊥" }, /* U+22A5 */
+  { "UpTeeArrow", "↥" }, /* U+21A5 */
+  { "Uparrow", "⇑" }, /* U+21D1 */
+  { "Updownarrow", "⇕" }, /* U+21D5 */
+  { "UpperLeftArrow", "↖" }, /* U+2196 */
+  { "UpperRightArrow", "↗" }, /* U+2197 */
+  { "Upsi", "ϒ" }, /* U+03D2 */
+  { "Upsilon", "Υ" }, /* U+03A5 */
+  { "Uring", "Ů" }, /* U+016E */
+  { "Uscr", "𝒰" }, /* U+1D4B0 */
+  { "Utilde", "Ũ" }, /* U+0168 */
+  { "Uuml", "Ü" }, /* U+00DC */
+  { "VDash", "⊫" }, /* U+22AB */
+  { "Vbar", "⫫" }, /* U+2AEB */
+  { "Vcy", "В" }, /* U+0412 */
+  { "Vdash", "⊩" }, /* U+22A9 */
+  { "Vdashl", "⫦" }, /* U+2AE6 */
+  { "Vee", "⋁" }, /* U+22C1 */
+  { "Verbar", "‖" }, /* U+2016 */
+  { "Vert", "‖" }, /* U+2016 */
+  { "VerticalBar", "∣" }, /* U+2223 */
+  { "VerticalLine", "|" }, /* U+007C */
+  { "VerticalTilde", "≀" }, /* U+2240 */
+  { "VeryThinSpace", " " }, /* U+200A */
+  { "Vfr", "𝔙" }, /* U+1D519 */
+  { "Vopf", "𝕍" }, /* U+1D54D */
+  { "Vscr", "𝒱" }, /* U+1D4B1 */
+  { "Vvdash", "⊪" }, /* U+22AA */
+  { "Wcirc", "Ŵ" }, /* U+0174 */
+  { "Wedge", "⋀" }, /* U+22C0 */
+  { "Wfr", "𝔚" }, /* U+1D51A */
+  { "Wopf", "𝕎" }, /* U+1D54E */
+  { "Wscr", "𝒲" }, /* U+1D4B2 */
+  { "Xfr", "𝔛" }, /* U+1D51B */
+  { "Xi", "Ξ" }, /* U+039E */
+  { "Xopf", "𝕏" }, /* U+1D54F */
+  { "Xscr", "𝒳" }, /* U+1D4B3 */
+  { "YAcy", "Я" }, /* U+042F */
+  { "YIcy", "Ї" }, /* U+0407 */
+  { "YUcy", "Ю" }, /* U+042E */
+  { "Yacute", "Ý" }, /* U+00DD */
+  { "Ycirc", "Ŷ" }, /* U+0176 */
+  { "Ycy", "Ы" }, /* U+042B */
+  { "Yfr", "𝔜" }, /* U+1D51C */
+  { "Yopf", "𝕐" }, /* U+1D550 */
+  { "Yscr", "𝒴" }, /* U+1D4B4 */
+  { "Yuml", "Ÿ" }, /* U+0178 */
+  { "ZHcy", "Ж" }, /* U+0416 */
+  { "Zacute", "Ź" }, /* U+0179 */
+  { "Zcaron", "Ž" }, /* U+017D */
+  { "Zcy", "З" }, /* U+0417 */
+  { "Zdot", "Ż" }, /* U+017B */
+  { "ZeroWidthSpace", "" }, /* U+200B */
+  { "Zeta", "Ζ" }, /* U+0396 */
+  { "Zfr", "ℨ" }, /* U+2128 */
+  { "Zopf", "ℤ" }, /* U+2124 */
+  { "Zscr", "𝒵" }, /* U+1D4B5 */
+  { "aacute", "á" }, /* U+00E1 */
+  { "abreve", "ă" }, /* U+0103 */
+  { "ac", "∾" }, /* U+223E */
+  { "acE", "∾̳" }, /* U+223E U+0333 */
+  { "acd", "∿" }, /* U+223F */
+  { "acirc", "â" }, /* U+00E2 */
+  { "acute", "´" }, /* U+00B4 */
+  { "acy", "а" }, /* U+0430 */
+  { "aelig", "æ" }, /* U+00E6 */
+  { "af", "⁡" }, /* U+2061 */
+  { "afr", "𝔞" }, /* U+1D51E */
+  { "agrave", "à" }, /* U+00E0 */
+  { "alefsym", "ℵ" }, /* U+2135 */
+  { "aleph", "ℵ" }, /* U+2135 */
+  { "alpha", "α" }, /* U+03B1 */
+  { "amacr", "ā" }, /* U+0101 */
+  { "amalg", "⨿" }, /* U+2A3F */
+  { "amp", "&" }, /* U+0026 */
+  { "and", "∧" }, /* U+2227 */
+  { "andand", "⩕" }, /* U+2A55 */
+  { "andd", "⩜" }, /* U+2A5C */
+  { "andslope", "⩘" }, /* U+2A58 */
+  { "andv", "⩚" }, /* U+2A5A */
+  { "ang", "∠" }, /* U+2220 */
+  { "ange", "⦤" }, /* U+29A4 */
+  { "angle", "∠" }, /* U+2220 */
+  { "angmsd", "∡" }, /* U+2221 */
+  { "angmsdaa", "⦨" }, /* U+29A8 */
+  { "angmsdab", "⦩" }, /* U+29A9 */
+  { "angmsdac", "⦪" }, /* U+29AA */
+  { "angmsdad", "⦫" }, /* U+29AB */
+  { "angmsdae", "⦬" }, /* U+29AC */
+  { "angmsdaf", "⦭" }, /* U+29AD */
+  { "angmsdag", "⦮" }, /* U+29AE */
+  { "angmsdah", "⦯" }, /* U+29AF */
+  { "angrt", "∟" }, /* U+221F */
+  { "angrtvb", "⊾" }, /* U+22BE */
+  { "angrtvbd", "⦝" }, /* U+299D */
+  { "angsph", "∢" }, /* U+2222 */
+  { "angst", "Å" }, /* U+00C5 */
+  { "angzarr", "⍼" }, /* U+237C */
+  { "aogon", "ą" }, /* U+0105 */
+  { "aopf", "𝕒" }, /* U+1D552 */
+  { "ap", "≈" }, /* U+2248 */
+  { "apE", "⩰" }, /* U+2A70 */
+  { "apacir", "⩯" }, /* U+2A6F */
+  { "ape", "≊" }, /* U+224A */
+  { "apid", "≋" }, /* U+224B */
+  { "apos", "'" }, /* U+0027 */
+  { "approx", "≈" }, /* U+2248 */
+  { "approxeq", "≊" }, /* U+224A */
+  { "aring", "å" }, /* U+00E5 */
+  { "ascr", "𝒶" }, /* U+1D4B6 */
+  { "ast", "*" }, /* U+002A */
+  { "asymp", "≈" }, /* U+2248 */
+  { "asympeq", "≍" }, /* U+224D */
+  { "atilde", "ã" }, /* U+00E3 */
+  { "auml", "ä" }, /* U+00E4 */
+  { "awconint", "∳" }, /* U+2233 */
+  { "awint", "⨑" }, /* U+2A11 */
+  { "bNot", "⫭" }, /* U+2AED */
+  { "backcong", "≌" }, /* U+224C */
+  { "backepsilon", "϶" }, /* U+03F6 */
+  { "backprime", "‵" }, /* U+2035 */
+  { "backsim", "∽" }, /* U+223D */
+  { "backsimeq", "⋍" }, /* U+22CD */
+  { "barvee", "⊽" }, /* U+22BD */
+  { "barwed", "⌅" }, /* U+2305 */
+  { "barwedge", "⌅" }, /* U+2305 */
+  { "bbrk", "⎵" }, /* U+23B5 */
+  { "bbrktbrk", "⎶" }, /* U+23B6 */
+  { "bcong", "≌" }, /* U+224C */
+  { "bcy", "б" }, /* U+0431 */
+  { "bdquo", "„" }, /* U+201E */
+  { "becaus", "∵" }, /* U+2235 */
+  { "because", "∵" }, /* U+2235 */
+  { "bemptyv", "⦰" }, /* U+29B0 */
+  { "bepsi", "϶" }, /* U+03F6 */
+  { "bernou", "ℬ" }, /* U+212C */
+  { "beta", "β" }, /* U+03B2 */
+  { "beth", "ℶ" }, /* U+2136 */
+  { "between", "≬" }, /* U+226C */
+  { "bfr", "𝔟" }, /* U+1D51F */
+  { "bigcap", "⋂" }, /* U+22C2 */
+  { "bigcirc", "◯" }, /* U+25EF */
+  { "bigcup", "⋃" }, /* U+22C3 */
+  { "bigodot", "⨀" }, /* U+2A00 */
+  { "bigoplus", "⨁" }, /* U+2A01 */
+  { "bigotimes", "⨂" }, /* U+2A02 */
+  { "bigsqcup", "⨆" }, /* U+2A06 */
+  { "bigstar", "★" }, /* U+2605 */
+  { "bigtriangledown", "▽" }, /* U+25BD */
+  { "bigtriangleup", "△" }, /* U+25B3 */
+  { "biguplus", "⨄" }, /* U+2A04 */
+  { "bigvee", "⋁" }, /* U+22C1 */
+  { "bigwedge", "⋀" }, /* U+22C0 */
+  { "bkarow", "⤍" }, /* U+290D */
+  { "blacklozenge", "⧫" }, /* U+29EB */
+  { "blacksquare", "▪" }, /* U+25AA */
+  { "blacktriangle", "▴" }, /* U+25B4 */
+  { "blank", "␣" }, /* U+2423 */
+  { "blk12", "▒" }, /* U+2592 */
+  { "blk14", "░" }, /* U+2591 */
+  { "blk34", "▓" }, /* U+2593 */
+  { "block", "█" }, /* U+2588 */
+  { "bne", "=⃥" }, /* U+003D U+20E5 */
+  { "bnequiv", "≡⃥" }, /* U+2261 U+20E5 */
+  { "bnot", "⌐" }, /* U+2310 */
+  { "bopf", "𝕓" }, /* U+1D553 */
+  { "bot", "⊥" }, /* U+22A5 */
+  { "bottom", "⊥" }, /* U+22A5 */
+  { "bowtie", "⋈" }, /* U+22C8 */
+  { "boxDL", "╗" }, /* U+2557 */
+  { "boxDR", "╔" }, /* U+2554 */
+  { "boxDl", "╖" }, /* U+2556 */
+  { "boxDr", "╓" }, /* U+2553 */
+  { "boxH", "═" }, /* U+2550 */
+  { "boxHD", "╦" }, /* U+2566 */
+  { "boxHU", "╩" }, /* U+2569 */
+  { "boxHd", "╤" }, /* U+2564 */
+  { "boxHu", "╧" }, /* U+2567 */
+  { "boxUL", "╝" }, /* U+255D */
+  { "boxUR", "╚" }, /* U+255A */
+  { "boxUl", "╜" }, /* U+255C */
+  { "boxUr", "╙" }, /* U+2559 */
+  { "boxV", "║" }, /* U+2551 */
+  { "boxVH", "╬" }, /* U+256C */
+  { "boxVL", "╣" }, /* U+2563 */
+  { "boxVR", "╠" }, /* U+2560 */
+  { "boxVh", "╫" }, /* U+256B */
+  { "boxVl", "╢" }, /* U+2562 */
+  { "boxVr", "╟" }, /* U+255F */
+  { "boxbox", "⧉" }, /* U+29C9 */
+  { "boxdL", "╕" }, /* U+2555 */
+  { "boxdR", "╒" }, /* U+2552 */
+  { "boxdl", "┐" }, /* U+2510 */
+  { "boxdr", "┌" }, /* U+250C */
+  { "boxh", "─" }, /* U+2500 */
+  { "boxhD", "╥" }, /* U+2565 */
+  { "boxhU", "╨" }, /* U+2568 */
+  { "boxhd", "┬" }, /* U+252C */
+  { "boxhu", "┴" }, /* U+2534 */
+  { "boxminus", "⊟" }, /* U+229F */
+  { "boxplus", "⊞" }, /* U+229E */
+  { "boxtimes", "⊠" }, /* U+22A0 */
+  { "boxuL", "╛" }, /* U+255B */
+  { "boxuR", "╘" }, /* U+2558 */
+  { "boxul", "┘" }, /* U+2518 */
+  { "boxur", "└" }, /* U+2514 */
+  { "boxv", "│" }, /* U+2502 */
+  { "boxvH", "╪" }, /* U+256A */
+  { "boxvL", "╡" }, /* U+2561 */
+  { "boxvR", "╞" }, /* U+255E */
+  { "boxvh", "┼" }, /* U+253C */
+  { "boxvl", "┤" }, /* U+2524 */
+  { "boxvr", "├" }, /* U+251C */
+  { "bprime", "‵" }, /* U+2035 */
+  { "breve", "˘" }, /* U+02D8 */
+  { "brvbar", "¦" }, /* U+00A6 */
+  { "bscr", "𝒷" }, /* U+1D4B7 */
+  { "bsemi", "⁏" }, /* U+204F */
+  { "bsim", "∽" }, /* U+223D */
+  { "bsime", "⋍" }, /* U+22CD */
+  { "bsol", "\\" }, /* U+005C */
+  { "bsolb", "⧅" }, /* U+29C5 */
+  { "bsolhsub", "⟈" }, /* U+27C8 */
+  { "bull", "•" }, /* U+2022 */
+  { "bullet", "•" }, /* U+2022 */
+  { "bump", "≎" }, /* U+224E */
+  { "bumpE", "⪮" }, /* U+2AAE */
+  { "bumpe", "≏" }, /* U+224F */
+  { "bumpeq", "≏" }, /* U+224F */
+  { "cacute", "ć" }, /* U+0107 */
+  { "cap", "∩" }, /* U+2229 */
+  { "capand", "⩄" }, /* U+2A44 */
+  { "capbrcup", "⩉" }, /* U+2A49 */
+  { "capcap", "⩋" }, /* U+2A4B */
+  { "capcup", "⩇" }, /* U+2A47 */
+  { "capdot", "⩀" }, /* U+2A40 */
+  { "caps", "∩︀" }, /* U+2229 U+FE00 */
+  { "caret", "⁁" }, /* U+2041 */
+  { "caron", "ˇ" }, /* U+02C7 */
+  { "ccaps", "⩍" }, /* U+2A4D */
+  { "ccaron", "č" }, /* U+010D */
+  { "ccedil", "ç" }, /* U+00E7 */
+  { "ccirc", "ĉ" }, /* U+0109 */
+  { "ccups", "⩌" }, /* U+2A4C */
+  { "ccupssm", "⩐" }, /* U+2A50 */
+  { "cdot", "ċ" }, /* U+010B */
+  { "cedil", "¸" }, /* U+00B8 */
+  { "cemptyv", "⦲" }, /* U+29B2 */
+  { "cent", "¢" }, /* U+00A2 */
+  { "centerdot", "·" }, /* U+00B7 */
+  { "cfr", "𝔠" }, /* U+1D520 */
+  { "chcy", "ч" }, /* U+0447 */
+  { "check", "✓" }, /* U+2713 */
+  { "checkmark", "✓" }, /* U+2713 */
+  { "chi", "χ" }, /* U+03C7 */
+  { "cir", "○" }, /* U+25CB */
+  { "cirE", "⧃" }, /* U+29C3 */
+  { "circ", "ˆ" }, /* U+02C6 */
+  { "circeq", "≗" }, /* U+2257 */
+  { "circlearrowleft", "↺" }, /* U+21BA */
+  { "circledR", "®" }, /* U+00AE */
+  { "circledS", "Ⓢ" }, /* U+24C8 */
+  { "circledast", "⊛" }, /* U+229B */
+  { "circledcirc", "⊚" }, /* U+229A */
+  { "circleddash", "⊝" }, /* U+229D */
+  { "cire", "≗" }, /* U+2257 */
+  { "cirfnint", "⨐" }, /* U+2A10 */
+  { "cirmid", "⫯" }, /* U+2AEF */
+  { "cirscir", "⧂" }, /* U+29C2 */
+  { "clubs", "♣" }, /* U+2663 */
+  { "clubsuit", "♣" }, /* U+2663 */
+  { "colon", ":" }, /* U+003A */
+  { "colone", "≔" }, /* U+2254 */
+  { "coloneq", "≔" }, /* U+2254 */
+  { "comma", "," }, /* U+002C */
+  { "commat", "@" }, /* U+0040 */
+  { "comp", "∁" }, /* U+2201 */
+  { "compfn", "∘" }, /* U+2218 */
+  { "complement", "∁" }, /* U+2201 */
+  { "complexes", "ℂ" }, /* U+2102 */
+  { "cong", "≅" }, /* U+2245 */
+  { "congdot", "⩭" }, /* U+2A6D */
+  { "conint", "∮" }, /* U+222E */
+  { "copf", "𝕔" }, /* U+1D554 */
+  { "coprod", "∐" }, /* U+2210 */
+  { "copy", "©" }, /* U+00A9 */
+  { "copysr", "℗" }, /* U+2117 */
+  { "crarr", "↵" }, /* U+21B5 */
+  { "cross", "✗" }, /* U+2717 */
+  { "cscr", "𝒸" }, /* U+1D4B8 */
+  { "csub", "⫏" }, /* U+2ACF */
+  { "csube", "⫑" }, /* U+2AD1 */
+  { "csup", "⫐" }, /* U+2AD0 */
+  { "csupe", "⫒" }, /* U+2AD2 */
+  { "ctdot", "⋯" }, /* U+22EF */
+  { "cudarrl", "⤸" }, /* U+2938 */
+  { "cudarrr", "⤵" }, /* U+2935 */
+  { "cuepr", "⋞" }, /* U+22DE */
+  { "cuesc", "⋟" }, /* U+22DF */
+  { "cularr", "↶" }, /* U+21B6 */
+  { "cularrp", "⤽" }, /* U+293D */
+  { "cup", "∪" }, /* U+222A */
+  { "cupbrcap", "⩈" }, /* U+2A48 */
+  { "cupcap", "⩆" }, /* U+2A46 */
+  { "cupcup", "⩊" }, /* U+2A4A */
+  { "cupdot", "⊍" }, /* U+228D */
+  { "cupor", "⩅" }, /* U+2A45 */
+  { "cups", "∪︀" }, /* U+222A U+FE00 */
+  { "curarr", "↷" }, /* U+21B7 */
+  { "curarrm", "⤼" }, /* U+293C */
+  { "curlyeqprec", "⋞" }, /* U+22DE */
+  { "curlyeqsucc", "⋟" }, /* U+22DF */
+  { "curlyvee", "⋎" }, /* U+22CE */
+  { "curlywedge", "⋏" }, /* U+22CF */
+  { "curren", "¤" }, /* U+00A4 */
+  { "curvearrowleft", "↶" }, /* U+21B6 */
+  { "curvearrowright", "↷" }, /* U+21B7 */
+  { "cuvee", "⋎" }, /* U+22CE */
+  { "cuwed", "⋏" }, /* U+22CF */
+  { "cwconint", "∲" }, /* U+2232 */
+  { "cwint", "∱" }, /* U+2231 */
+  { "cylcty", "⌭" }, /* U+232D */
+  { "dArr", "⇓" }, /* U+21D3 */
+  { "dHar", "⥥" }, /* U+2965 */
+  { "dagger", "†" }, /* U+2020 */
+  { "daleth", "ℸ" }, /* U+2138 */
+  { "darr", "↓" }, /* U+2193 */
+  { "dash", "‐" }, /* U+2010 */
+  { "dashv", "⊣" }, /* U+22A3 */
+  { "dbkarow", "⤏" }, /* U+290F */
+  { "dblac", "˝" }, /* U+02DD */
+  { "dcaron", "ď" }, /* U+010F */
+  { "dcy", "д" }, /* U+0434 */
+  { "dd", "ⅆ" }, /* U+2146 */
+  { "ddagger", "‡" }, /* U+2021 */
+  { "ddarr", "⇊" }, /* U+21CA */
+  { "ddotseq", "⩷" }, /* U+2A77 */
+  { "deg", "°" }, /* U+00B0 */
+  { "delta", "δ" }, /* U+03B4 */
+  { "demptyv", "⦱" }, /* U+29B1 */
+  { "dfisht", "⥿" }, /* U+297F */
+  { "dfr", "𝔡" }, /* U+1D521 */
+  { "dharl", "⇃" }, /* U+21C3 */
+  { "dharr", "⇂" }, /* U+21C2 */
+  { "diam", "⋄" }, /* U+22C4 */
+  { "diamond", "⋄" }, /* U+22C4 */
+  { "diamondsuit", "♦" }, /* U+2666 */
+  { "diams", "♦" }, /* U+2666 */
+  { "die", "¨" }, /* U+00A8 */
+  { "digamma", "ϝ" }, /* U+03DD */
+  { "disin", "⋲" }, /* U+22F2 */
+  { "div", "÷" }, /* U+00F7 */
+  { "divide", "÷" }, /* U+00F7 */
+  { "divideontimes", "⋇" }, /* U+22C7 */
+  { "divonx", "⋇" }, /* U+22C7 */
+  { "djcy", "ђ" }, /* U+0452 */
+  { "dlcorn", "⌞" }, /* U+231E */
+  { "dlcrop", "⌍" }, /* U+230D */
+  { "dollar", "$" }, /* U+0024 */
+  { "dopf", "𝕕" }, /* U+1D555 */
+  { "dot", "˙" }, /* U+02D9 */
+  { "doteq", "≐" }, /* U+2250 */
+  { "doteqdot", "≑" }, /* U+2251 */
+  { "dotminus", "∸" }, /* U+2238 */
+  { "dotplus", "∔" }, /* U+2214 */
+  { "dotsquare", "⊡" }, /* U+22A1 */
+  { "doublebarwedge", "⌆" }, /* U+2306 */
+  { "downarrow", "↓" }, /* U+2193 */
+  { "downdownarrows", "⇊" }, /* U+21CA */
+  { "downharpoonleft", "⇃" }, /* U+21C3 */
+  { "drbkarow", "⤐" }, /* U+2910 */
+  { "drcorn", "⌟" }, /* U+231F */
+  { "drcrop", "⌌" }, /* U+230C */
+  { "dscr", "𝒹" }, /* U+1D4B9 */
+  { "dscy", "ѕ" }, /* U+0455 */
+  { "dsol", "⧶" }, /* U+29F6 */
+  { "dstrok", "đ" }, /* U+0111 */
+  { "dtdot", "⋱" }, /* U+22F1 */
+  { "dtri", "▿" }, /* U+25BF */
+  { "dtrif", "▾" }, /* U+25BE */
+  { "duarr", "⇵" }, /* U+21F5 */
+  { "duhar", "⥯" }, /* U+296F */
+  { "dwangle", "⦦" }, /* U+29A6 */
+  { "dzcy", "џ" }, /* U+045F */
+  { "dzigrarr", "⟿" }, /* U+27FF */
+  { "eDDot", "⩷" }, /* U+2A77 */
+  { "eDot", "≑" }, /* U+2251 */
+  { "eacute", "é" }, /* U+00E9 */
+  { "easter", "⩮" }, /* U+2A6E */
+  { "ecaron", "ě" }, /* U+011B */
+  { "ecir", "≖" }, /* U+2256 */
+  { "ecirc", "ê" }, /* U+00EA */
+  { "ecolon", "≕" }, /* U+2255 */
+  { "ecy", "э" }, /* U+044D */
+  { "edot", "ė" }, /* U+0117 */
+  { "ee", "ⅇ" }, /* U+2147 */
+  { "efDot", "≒" }, /* U+2252 */
+  { "efr", "𝔢" }, /* U+1D522 */
+  { "eg", "⪚" }, /* U+2A9A */
+  { "egrave", "è" }, /* U+00E8 */
+  { "egs", "⪖" }, /* U+2A96 */
+  { "egsdot", "⪘" }, /* U+2A98 */
+  { "el", "⪙" }, /* U+2A99 */
+  { "elinters", "⏧" }, /* U+23E7 */
+  { "ell", "ℓ" }, /* U+2113 */
+  { "els", "⪕" }, /* U+2A95 */
+  { "elsdot", "⪗" }, /* U+2A97 */
+  { "emacr", "ē" }, /* U+0113 */
+  { "empty", "∅" }, /* U+2205 */
+  { "emptyset", "∅" }, /* U+2205 */
+  { "emptyv", "∅" }, /* U+2205 */
+  { "emsp", " " }, /* U+2003 */
+  { "emsp13", " " }, /* U+2004 */
+  { "emsp14", " " }, /* U+2005 */
+  { "eng", "ŋ" }, /* U+014B */
+  { "ensp", " " }, /* U+2002 */
+  { "eogon", "ę" }, /* U+0119 */
+  { "eopf", "𝕖" }, /* U+1D556 */
+  { "epar", "⋕" }, /* U+22D5 */
+  { "eparsl", "⧣" }, /* U+29E3 */
+  { "eplus", "⩱" }, /* U+2A71 */
+  { "epsi", "ε" }, /* U+03B5 */
+  { "epsilon", "ε" }, /* U+03B5 */
+  { "epsiv", "ϵ" }, /* U+03F5 */
+  { "eqcirc", "≖" }, /* U+2256 */
+  { "eqcolon", "≕" }, /* U+2255 */
+  { "eqsim", "≂" }, /* U+2242 */
+  { "eqslantgtr", "⪖" }, /* U+2A96 */
+  { "eqslantless", "⪕" }, /* U+2A95 */
+  { "equals", "=" }, /* U+003D */
+  { "equest", "≟" }, /* U+225F */
+  { "equiv", "≡" }, /* U+2261 */
+  { "equivDD", "⩸" }, /* U+2A78 */
+  { "eqvparsl", "⧥" }, /* U+29E5 */
+  { "erDot", "≓" }, /* U+2253 */
+  { "erarr", "⥱" }, /* U+2971 */
+  { "escr", "ℯ" }, /* U+212F */
+  { "esdot", "≐" }, /* U+2250 */
+  { "esim", "≂" }, /* U+2242 */
+  { "eta", "η" }, /* U+03B7 */
+  { "eth", "ð" }, /* U+00F0 */
+  { "euml", "ë" }, /* U+00EB */
+  { "euro", "€" }, /* U+20AC */
+  { "excl", "!" }, /* U+0021 */
+  { "exist", "∃" }, /* U+2203 */
+  { "expectation", "ℰ" }, /* U+2130 */
+  { "exponentiale", "ⅇ" }, /* U+2147 */
+  { "fallingdotseq", "≒" }, /* U+2252 */
+  { "fcy", "ф" }, /* U+0444 */
+  { "female", "♀" }, /* U+2640 */
+  { "ffilig", "ﬃ" }, /* U+FB03 */
+  { "fflig", "ﬀ" }, /* U+FB00 */
+  { "ffllig", "ﬄ" }, /* U+FB04 */
+  { "ffr", "𝔣" }, /* U+1D523 */
+  { "filig", "ﬁ" }, /* U+FB01 */
+  { "fjlig", "fj" }, /* U+0066 U+006A */
+  { "flat", "♭" }, /* U+266D */
+  { "fllig", "ﬂ" }, /* U+FB02 */
+  { "fltns", "▱" }, /* U+25B1 */
+  { "fnof", "ƒ" }, /* U+0192 */
+  { "fopf", "𝕗" }, /* U+1D557 */
+  { "forall", "∀" }, /* U+2200 */
+  { "fork", "⋔" }, /* U+22D4 */
+  { "forkv", "⫙" }, /* U+2AD9 */
+  { "fpartint", "⨍" }, /* U+2A0D */
+  { "frac12", "½" }, /* U+00BD */
+  { "frac13", "⅓" }, /* U+2153 */
+  { "frac14", "¼" }, /* U+00BC */
+  { "frac15", "⅕" }, /* U+2155 */
+  { "frac16", "⅙" }, /* U+2159 */
+  { "frac18", "⅛" }, /* U+215B */
+  { "frac23", "⅔" }, /* U+2154 */
+  { "frac25", "⅖" }, /* U+2156 */
+  { "frac34", "¾" }, /* U+00BE */
+  { "frac35", "⅗" }, /* U+2157 */
+  { "frac38", "⅜" }, /* U+215C */
+  { "frac45", "⅘" }, /* U+2158 */
+  { "frac56", "⅚" }, /* U+215A */
+  { "frac58", "⅝" }, /* U+215D */
+  { "frac78", "⅞" }, /* U+215E */
+  { "frasl", "⁄" }, /* U+2044 */
+  { "frown", "⌢" }, /* U+2322 */
+  { "fscr", "𝒻" }, /* U+1D4BB */
+  { "gE", "≧" }, /* U+2267 */
+  { "gEl", "⪌" }, /* U+2A8C */
+  { "gacute", "ǵ" }, /* U+01F5 */
+  { "gamma", "γ" }, /* U+03B3 */
+  { "gammad", "ϝ" }, /* U+03DD */
+  { "gap", "⪆" }, /* U+2A86 */
+  { "gbreve", "ğ" }, /* U+011F */
+  { "gcirc", "ĝ" }, /* U+011D */
+  { "gcy", "г" }, /* U+0433 */
+  { "gdot", "ġ" }, /* U+0121 */
+  { "ge", "≥" }, /* U+2265 */
+  { "gel", "⋛" }, /* U+22DB */
+  { "geq", "≥" }, /* U+2265 */
+  { "geqq", "≧" }, /* U+2267 */
+  { "geqslant", "⩾" }, /* U+2A7E */
+  { "ges", "⩾" }, /* U+2A7E */
+  { "gescc", "⪩" }, /* U+2AA9 */
+  { "gesdot", "⪀" }, /* U+2A80 */
+  { "gesdoto", "⪂" }, /* U+2A82 */
+  { "gesdotol", "⪄" }, /* U+2A84 */
+  { "gesl", "⋛︀" }, /* U+22DB U+FE00 */
+  { "gesles", "⪔" }, /* U+2A94 */
+  { "gfr", "𝔤" }, /* U+1D524 */
+  { "gg", "≫" }, /* U+226B */
+  { "ggg", "⋙" }, /* U+22D9 */
+  { "gimel", "ℷ" }, /* U+2137 */
+  { "gjcy", "ѓ" }, /* U+0453 */
+  { "gl", "≷" }, /* U+2277 */
+  { "glE", "⪒" }, /* U+2A92 */
+  { "gla", "⪥" }, /* U+2AA5 */
+  { "glj", "⪤" }, /* U+2AA4 */
+  { "gnE", "≩" }, /* U+2269 */
+  { "gnap", "⪊" }, /* U+2A8A */
+  { "gnapprox", "⪊" }, /* U+2A8A */
+  { "gne", "⪈" }, /* U+2A88 */
+  { "gneq", "⪈" }, /* U+2A88 */
+  { "gneqq", "≩" }, /* U+2269 */
+  { "gnsim", "⋧" }, /* U+22E7 */
+  { "gopf", "𝕘" }, /* U+1D558 */
+  { "grave", "`" }, /* U+0060 */
+  { "gscr", "ℊ" }, /* U+210A */
+  { "gsim", "≳" }, /* U+2273 */
+  { "gsime", "⪎" }, /* U+2A8E */
+  { "gsiml", "⪐" }, /* U+2A90 */
+  { "gt", ">" }, /* U+003E */
+  { "gtcc", "⪧" }, /* U+2AA7 */
+  { "gtcir", "⩺" }, /* U+2A7A */
+  { "gtdot", "⋗" }, /* U+22D7 */
+  { "gtlPar", "⦕" }, /* U+2995 */
+  { "gtquest", "⩼" }, /* U+2A7C */
+  { "gtrapprox", "⪆" }, /* U+2A86 */
+  { "gtrarr", "⥸" }, /* U+2978 */
+  { "gtrdot", "⋗" }, /* U+22D7 */
+  { "gtreqless", "⋛" }, /* U+22DB */
+  { "gtreqqless", "⪌" }, /* U+2A8C */
+  { "gtrless", "≷" }, /* U+2277 */
+  { "gtrsim", "≳" }, /* U+2273 */
+  { "gvertneqq", "≩︀" }, /* U+2269 U+FE00 */
+  { "gvnE", "≩︀" }, /* U+2269 U+FE00 */
+  { "hArr", "⇔" }, /* U+21D4 */
+  { "hairsp", " " }, /* U+200A */
+  { "half", "½" }, /* U+00BD */
+  { "hamilt", "ℋ" }, /* U+210B */
+  { "hardcy", "ъ" }, /* U+044A */
+  { "harr", "↔" }, /* U+2194 */
+  { "harrcir", "⥈" }, /* U+2948 */
+  { "harrw", "↭" }, /* U+21AD */
+  { "hbar", "ℏ" }, /* U+210F */
+  { "hcirc", "ĥ" }, /* U+0125 */
+  { "hearts", "♥" }, /* U+2665 */
+  { "heartsuit", "♥" }, /* U+2665 */
+  { "hellip", "…" }, /* U+2026 */
+  { "hercon", "⊹" }, /* U+22B9 */
+  { "hfr", "𝔥" }, /* U+1D525 */
+  { "hksearow", "⤥" }, /* U+2925 */
+  { "hkswarow", "⤦" }, /* U+2926 */
+  { "hoarr", "⇿" }, /* U+21FF */
+  { "homtht", "∻" }, /* U+223B */
+  { "hookleftarrow", "↩" }, /* U+21A9 */
+  { "hookrightarrow", "↪" }, /* U+21AA */
+  { "hopf", "𝕙" }, /* U+1D559 */
+  { "horbar", "―" }, /* U+2015 */
+  { "hscr", "𝒽" }, /* U+1D4BD */
+  { "hslash", "ℏ" }, /* U+210F */
+  { "hstrok", "ħ" }, /* U+0127 */
+  { "hybull", "⁃" }, /* U+2043 */
+  { "hyphen", "‐" }, /* U+2010 */
+  { "iacute", "í" }, /* U+00ED */
+  { "ic", "⁣" }, /* U+2063 */
+  { "icirc", "î" }, /* U+00EE */
+  { "icy", "и" }, /* U+0438 */
+  { "iecy", "е" }, /* U+0435 */
+  { "iexcl", "¡" }, /* U+00A1 */
+  { "iff", "⇔" }, /* U+21D4 */
+  { "ifr", "𝔦" }, /* U+1D526 */
+  { "igrave", "ì" }, /* U+00EC */
+  { "ii", "ⅈ" }, /* U+2148 */
+  { "iiiint", "⨌" }, /* U+2A0C */
+  { "iiint", "∭" }, /* U+222D */
+  { "iinfin", "⧜" }, /* U+29DC */
+  { "iiota", "℩" }, /* U+2129 */
+  { "ijlig", "ĳ" }, /* U+0133 */
+  { "imacr", "ī" }, /* U+012B */
+  { "image", "ℑ" }, /* U+2111 */
+  { "imagline", "ℐ" }, /* U+2110 */
+  { "imagpart", "ℑ" }, /* U+2111 */
+  { "imath", "ı" }, /* U+0131 */
+  { "imof", "⊷" }, /* U+22B7 */
+  { "imped", "Ƶ" }, /* U+01B5 */
+  { "in", "∈" }, /* U+2208 */
+  { "incare", "℅" }, /* U+2105 */
+  { "infin", "∞" }, /* U+221E */
+  { "infintie", "⧝" }, /* U+29DD */
+  { "inodot", "ı" }, /* U+0131 */
+  { "int", "∫" }, /* U+222B */
+  { "intcal", "⊺" }, /* U+22BA */
+  { "integers", "ℤ" }, /* U+2124 */
+  { "intercal", "⊺" }, /* U+22BA */
+  { "intlarhk", "⨗" }, /* U+2A17 */
+  { "intprod", "⨼" }, /* U+2A3C */
+  { "iocy", "ё" }, /* U+0451 */
+  { "iogon", "į" }, /* U+012F */
+  { "iopf", "𝕚" }, /* U+1D55A */
+  { "iota", "ι" }, /* U+03B9 */
+  { "iprod", "⨼" }, /* U+2A3C */
+  { "iquest", "¿" }, /* U+00BF */
+  { "iscr", "𝒾" }, /* U+1D4BE */
+  { "isin", "∈" }, /* U+2208 */
+  { "isinE", "⋹" }, /* U+22F9 */
+  { "isindot", "⋵" }, /* U+22F5 */
+  { "isins", "⋴" }, /* U+22F4 */
+  { "isinsv", "⋳" }, /* U+22F3 */
+  { "isinv", "∈" }, /* U+2208 */
+  { "it", "⁢" }, /* U+2062 */
+  { "itilde", "ĩ" }, /* U+0129 */
+  { "iukcy", "і" }, /* U+0456 */
+  { "iuml", "ï" }, /* U+00EF */
+  { "jcirc", "ĵ" }, /* U+0135 */
+  { "jcy", "й" }, /* U+0439 */
+  { "jfr", "𝔧" }, /* U+1D527 */
+  { "jmath", "ȷ" }, /* U+0237 */
+  { "jopf", "𝕛" }, /* U+1D55B */
+  { "jscr", "𝒿" }, /* U+1D4BF */
+  { "jsercy", "ј" }, /* U+0458 */
+  { "jukcy", "є" }, /* U+0454 */
+  { "kappa", "κ" }, /* U+03BA */
+  { "kappav", "ϰ" }, /* U+03F0 */
+  { "kcedil", "ķ" }, /* U+0137 */
+  { "kcy", "к" }, /* U+043A */
+  { "kfr", "𝔨" }, /* U+1D528 */
+  { "kgreen", "ĸ" }, /* U+0138 */
+  { "khcy", "х" }, /* U+0445 */
+  { "kjcy", "ќ" }, /* U+045C */
+  { "kopf", "𝕜" }, /* U+1D55C */
+  { "kscr", "𝓀" }, /* U+1D4C0 */
+  { "lAarr", "⇚" }, /* U+21DA */
+  { "lArr", "⇐" }, /* U+21D0 */
+  { "lAtail", "⤛" }, /* U+291B */
+  { "lBarr", "⤎" }, /* U+290E */
+  { "lE", "≦" }, /* U+2266 */
+  { "lEg", "⪋" }, /* U+2A8B */
+  { "lHar", "⥢" }, /* U+2962 */
+  { "lacute", "ĺ" }, /* U+013A */
+  { "laemptyv", "⦴" }, /* U+29B4 */
+  { "lagran", "ℒ" }, /* U+2112 */
+  { "lambda", "λ" }, /* U+03BB */
+  { "lang", "⟨" }, /* U+27E8 */
+  { "langd", "⦑" }, /* U+2991 */
+  { "langle", "⟨" }, /* U+27E8 */
+  { "lap", "⪅" }, /* U+2A85 */
+  { "laquo", "«" }, /* U+00AB */
+  { "larr", "←" }, /* U+2190 */
+  { "larrb", "⇤" }, /* U+21E4 */
+  { "larrbfs", "⤟" }, /* U+291F */
+  { "larrfs", "⤝" }, /* U+291D */
+  { "larrhk", "↩" }, /* U+21A9 */
+  { "larrlp", "↫" }, /* U+21AB */
+  { "larrpl", "⤹" }, /* U+2939 */
+  { "larrsim", "⥳" }, /* U+2973 */
+  { "larrtl", "↢" }, /* U+21A2 */
+  { "lat", "⪫" }, /* U+2AAB */
+  { "latail", "⤙" }, /* U+2919 */
+  { "late", "⪭" }, /* U+2AAD */
+  { "lates", "⪭︀" }, /* U+2AAD U+FE00 */
+  { "lbarr", "⤌" }, /* U+290C */
+  { "lbbrk", "❲" }, /* U+2772 */
+  { "lbrace", "{" }, /* U+007B */
+  { "lbrack", "[" }, /* U+005B */
+  { "lbrke", "⦋" }, /* U+298B */
+  { "lbrksld", "⦏" }, /* U+298F */
+  { "lbrkslu", "⦍" }, /* U+298D */
+  { "lcaron", "ľ" }, /* U+013E */
+  { "lcedil", "ļ" }, /* U+013C */
+  { "lceil", "⌈" }, /* U+2308 */
+  { "lcub", "{" }, /* U+007B */
+  { "lcy", "л" }, /* U+043B */
+  { "ldca", "⤶" }, /* U+2936 */
+  { "ldquo", "“" }, /* U+201C */
+  { "ldquor", "„" }, /* U+201E */
+  { "ldrdhar", "⥧" }, /* U+2967 */
+  { "ldrushar", "⥋" }, /* U+294B */
+  { "ldsh", "↲" }, /* U+21B2 */
+  { "le", "≤" }, /* U+2264 */
+  { "leftarrow", "←" }, /* U+2190 */
+  { "leftarrowtail", "↢" }, /* U+21A2 */
+  { "leftharpoondown", "↽" }, /* U+21BD */
+  { "leftharpoonup", "↼" }, /* U+21BC */
+  { "leftleftarrows", "⇇" }, /* U+21C7 */
+  { "leftrightarrow", "↔" }, /* U+2194 */
+  { "leftrightarrows", "⇆" }, /* U+21C6 */
+  { "leftthreetimes", "⋋" }, /* U+22CB */
+  { "leg", "⋚" }, /* U+22DA */
+  { "leq", "≤" }, /* U+2264 */
+  { "leqq", "≦" }, /* U+2266 */
+  { "leqslant", "⩽" }, /* U+2A7D */
+  { "les", "⩽" }, /* U+2A7D */
+  { "lescc", "⪨" }, /* U+2AA8 */
+  { "lesdot", "⩿" }, /* U+2A7F */
+  { "lesdoto", "⪁" }, /* U+2A81 */
+  { "lesdotor", "⪃" }, /* U+2A83 */
+  { "lesg", "⋚︀" }, /* U+22DA U+FE00 */
+  { "lesges", "⪓" }, /* U+2A93 */
+  { "lessapprox", "⪅" }, /* U+2A85 */
+  { "lessdot", "⋖" }, /* U+22D6 */
+  { "lesseqgtr", "⋚" }, /* U+22DA */
+  { "lesseqqgtr", "⪋" }, /* U+2A8B */
+  { "lessgtr", "≶" }, /* U+2276 */
+  { "lesssim", "≲" }, /* U+2272 */
+  { "lfisht", "⥼" }, /* U+297C */
+  { "lfloor", "⌊" }, /* U+230A */
+  { "lfr", "𝔩" }, /* U+1D529 */
+  { "lg", "≶" }, /* U+2276 */
+  { "lgE", "⪑" }, /* U+2A91 */
+  { "lhard", "↽" }, /* U+21BD */
+  { "lharu", "↼" }, /* U+21BC */
+  { "lharul", "⥪" }, /* U+296A */
+  { "lhblk", "▄" }, /* U+2584 */
+  { "ljcy", "љ" }, /* U+0459 */
+  { "ll", "≪" }, /* U+226A */
+  { "llarr", "⇇" }, /* U+21C7 */
+  { "llcorner", "⌞" }, /* U+231E */
+  { "llhard", "⥫" }, /* U+296B */
+  { "lltri", "◺" }, /* U+25FA */
+  { "lmidot", "ŀ" }, /* U+0140 */
+  { "lmoust", "⎰" }, /* U+23B0 */
+  { "lmoustache", "⎰" }, /* U+23B0 */
+  { "lnE", "≨" }, /* U+2268 */
+  { "lnap", "⪉" }, /* U+2A89 */
+  { "lnapprox", "⪉" }, /* U+2A89 */
+  { "lne", "⪇" }, /* U+2A87 */
+  { "lneq", "⪇" }, /* U+2A87 */
+  { "lneqq", "≨" }, /* U+2268 */
+  { "lnsim", "⋦" }, /* U+22E6 */
+  { "loang", "⟬" }, /* U+27EC */
+  { "loarr", "⇽" }, /* U+21FD */
+  { "lobrk", "⟦" }, /* U+27E6 */
+  { "longleftarrow", "⟵" }, /* U+27F5 */
+  { "longmapsto", "⟼" }, /* U+27FC */
+  { "longrightarrow", "⟶" }, /* U+27F6 */
+  { "looparrowleft", "↫" }, /* U+21AB */
+  { "looparrowright", "↬" }, /* U+21AC */
+  { "lopar", "⦅" }, /* U+2985 */
+  { "lopf", "𝕝" }, /* U+1D55D */
+  { "loplus", "⨭" }, /* U+2A2D */
+  { "lotimes", "⨴" }, /* U+2A34 */
+  { "lowast", "∗" }, /* U+2217 */
+  { "lowbar", "_" }, /* U+005F */
+  { "loz", "◊" }, /* U+25CA */
+  { "lozenge", "◊" }, /* U+25CA */
+  { "lozf", "⧫" }, /* U+29EB */
+  { "lpar", "(" }, /* U+0028 */
+  { "lparlt", "⦓" }, /* U+2993 */
+  { "lrarr", "⇆" }, /* U+21C6 */
+  { "lrcorner", "⌟" }, /* U+231F */
+  { "lrhar", "⇋" }, /* U+21CB */
+  { "lrhard", "⥭" }, /* U+296D */
+  { "lrm", "\342\200\216" }, /* U+200E */
+  { "lrtri", "⊿" }, /* U+22BF */
+  { "lsaquo", "‹" }, /* U+2039 */
+  { "lscr", "𝓁" }, /* U+1D4C1 */
+  { "lsh", "↰" }, /* U+21B0 */
+  { "lsim", "≲" }, /* U+2272 */
+  { "lsime", "⪍" }, /* U+2A8D */
+  { "lsimg", "⪏" }, /* U+2A8F */
+  { "lsqb", "[" }, /* U+005B */
+  { "lsquo", "‘" }, /* U+2018 */
+  { "lsquor", "‚" }, /* U+201A */
+  { "lstrok", "ł" }, /* U+0142 */
+  { "lt", "<" }, /* U+003C */
+  { "ltcc", "⪦" }, /* U+2AA6 */
+  { "ltcir", "⩹" }, /* U+2A79 */
+  { "ltdot", "⋖" }, /* U+22D6 */
+  { "lthree", "⋋" }, /* U+22CB */
+  { "ltimes", "⋉" }, /* U+22C9 */
+  { "ltlarr", "⥶" }, /* U+2976 */
+  { "ltquest", "⩻" }, /* U+2A7B */
+  { "ltrPar", "⦖" }, /* U+2996 */
+  { "ltri", "◃" }, /* U+25C3 */
+  { "ltrie", "⊴" }, /* U+22B4 */
+  { "ltrif", "◂" }, /* U+25C2 */
+  { "lurdshar", "⥊" }, /* U+294A */
+  { "luruhar", "⥦" }, /* U+2966 */
+  { "lvertneqq", "≨︀" }, /* U+2268 U+FE00 */
+  { "lvnE", "≨︀" }, /* U+2268 U+FE00 */
+  { "mDDot", "∺" }, /* U+223A */
+  { "macr", "¯" }, /* U+00AF */
+  { "male", "♂" }, /* U+2642 */
+  { "malt", "✠" }, /* U+2720 */
+  { "maltese", "✠" }, /* U+2720 */
+  { "map", "↦" }, /* U+21A6 */
+  { "mapsto", "↦" }, /* U+21A6 */
+  { "mapstodown", "↧" }, /* U+21A7 */
+  { "mapstoleft", "↤" }, /* U+21A4 */
+  { "mapstoup", "↥" }, /* U+21A5 */
+  { "marker", "▮" }, /* U+25AE */
+  { "mcomma", "⨩" }, /* U+2A29 */
+  { "mcy", "м" }, /* U+043C */
+  { "mdash", "—" }, /* U+2014 */
+  { "measuredangle", "∡" }, /* U+2221 */
+  { "mfr", "𝔪" }, /* U+1D52A */
+  { "mho", "℧" }, /* U+2127 */
+  { "micro", "µ" }, /* U+00B5 */
+  { "mid", "∣" }, /* U+2223 */
+  { "midast", "*" }, /* U+002A */
+  { "midcir", "⫰" }, /* U+2AF0 */
+  { "middot", "·" }, /* U+00B7 */
+  { "minus", "−" }, /* U+2212 */
+  { "minusb", "⊟" }, /* U+229F */
+  { "minusd", "∸" }, /* U+2238 */
+  { "minusdu", "⨪" }, /* U+2A2A */
+  { "mlcp", "⫛" }, /* U+2ADB */
+  { "mldr", "…" }, /* U+2026 */
+  { "mnplus", "∓" }, /* U+2213 */
+  { "models", "⊧" }, /* U+22A7 */
+  { "mopf", "𝕞" }, /* U+1D55E */
+  { "mp", "∓" }, /* U+2213 */
+  { "mscr", "𝓂" }, /* U+1D4C2 */
+  { "mstpos", "∾" }, /* U+223E */
+  { "mu", "μ" }, /* U+03BC */
+  { "multimap", "⊸" }, /* U+22B8 */
+  { "mumap", "⊸" }, /* U+22B8 */
+  { "nGg", "⋙̸" }, /* U+22D9 U+0338 */
+  { "nGt", "≫⃒" }, /* U+226B U+20D2 */
+  { "nGtv", "≫̸" }, /* U+226B U+0338 */
+  { "nLeftarrow", "⇍" }, /* U+21CD */
+  { "nLeftrightarrow", "⇎" }, /* U+21CE */
+  { "nLl", "⋘̸" }, /* U+22D8 U+0338 */
+  { "nLt", "≪⃒" }, /* U+226A U+20D2 */
+  { "nLtv", "≪̸" }, /* U+226A U+0338 */
+  { "nRightarrow", "⇏" }, /* U+21CF */
+  { "nVDash", "⊯" }, /* U+22AF */
+  { "nVdash", "⊮" }, /* U+22AE */
+  { "nabla", "∇" }, /* U+2207 */
+  { "nacute", "ń" }, /* U+0144 */
+  { "nang", "∠⃒" }, /* U+2220 U+20D2 */
+  { "nap", "≉" }, /* U+2249 */
+  { "napE", "⩰̸" }, /* U+2A70 U+0338 */
+  { "napid", "≋̸" }, /* U+224B U+0338 */
+  { "napos", "ŉ" }, /* U+0149 */
+  { "napprox", "≉" }, /* U+2249 */
+  { "natur", "♮" }, /* U+266E */
+  { "natural", "♮" }, /* U+266E */
+  { "naturals", "ℕ" }, /* U+2115 */
+  { "nbsp", " " }, /* U+00A0 */
+  { "nbump", "≎̸" }, /* U+224E U+0338 */
+  { "nbumpe", "≏̸" }, /* U+224F U+0338 */
+  { "ncap", "⩃" }, /* U+2A43 */
+  { "ncaron", "ň" }, /* U+0148 */
+  { "ncedil", "ņ" }, /* U+0146 */
+  { "ncong", "≇" }, /* U+2247 */
+  { "ncongdot", "⩭̸" }, /* U+2A6D U+0338 */
+  { "ncup", "⩂" }, /* U+2A42 */
+  { "ncy", "н" }, /* U+043D */
+  { "ndash", "–" }, /* U+2013 */
+  { "ne", "≠" }, /* U+2260 */
+  { "neArr", "⇗" }, /* U+21D7 */
+  { "nearhk", "⤤" }, /* U+2924 */
+  { "nearr", "↗" }, /* U+2197 */
+  { "nearrow", "↗" }, /* U+2197 */
+  { "nedot", "≐̸" }, /* U+2250 U+0338 */
+  { "nequiv", "≢" }, /* U+2262 */
+  { "nesear", "⤨" }, /* U+2928 */
+  { "nesim", "≂̸" }, /* U+2242 U+0338 */
+  { "nexist", "∄" }, /* U+2204 */
+  { "nexists", "∄" }, /* U+2204 */
+  { "nfr", "𝔫" }, /* U+1D52B */
+  { "ngE", "≧̸" }, /* U+2267 U+0338 */
+  { "nge", "≱" }, /* U+2271 */
+  { "ngeq", "≱" }, /* U+2271 */
+  { "ngeqq", "≧̸" }, /* U+2267 U+0338 */
+  { "ngeqslant", "⩾̸" }, /* U+2A7E U+0338 */
+  { "nges", "⩾̸" }, /* U+2A7E U+0338 */
+  { "ngsim", "≵" }, /* U+2275 */
+  { "ngt", "≯" }, /* U+226F */
+  { "ngtr", "≯" }, /* U+226F */
+  { "nhArr", "⇎" }, /* U+21CE */
+  { "nharr", "↮" }, /* U+21AE */
+  { "nhpar", "⫲" }, /* U+2AF2 */
+  { "ni", "∋" }, /* U+220B */
+  { "nis", "⋼" }, /* U+22FC */
+  { "nisd", "⋺" }, /* U+22FA */
+  { "niv", "∋" }, /* U+220B */
+  { "njcy", "њ" }, /* U+045A */
+  { "nlArr", "⇍" }, /* U+21CD */
+  { "nlE", "≦̸" }, /* U+2266 U+0338 */
+  { "nlarr", "↚" }, /* U+219A */
+  { "nldr", "‥" }, /* U+2025 */
+  { "nle", "≰" }, /* U+2270 */
+  { "nleftarrow", "↚" }, /* U+219A */
+  { "nleftrightarrow", "↮" }, /* U+21AE */
+  { "nleq", "≰" }, /* U+2270 */
+  { "nleqq", "≦̸" }, /* U+2266 U+0338 */
+  { "nleqslant", "⩽̸" }, /* U+2A7D U+0338 */
+  { "nles", "⩽̸" }, /* U+2A7D U+0338 */
+  { "nless", "≮" }, /* U+226E */
+  { "nlsim", "≴" }, /* U+2274 */
+  { "nlt", "≮" }, /* U+226E */
+  { "nltri", "⋪" }, /* U+22EA */
+  { "nltrie", "⋬" }, /* U+22EC */
+  { "nmid", "∤" }, /* U+2224 */
+  { "nopf", "𝕟" }, /* U+1D55F */
+  { "not", "¬" }, /* U+00AC */
+  { "notin", "∉" }, /* U+2209 */
+  { "notinE", "⋹̸" }, /* U+22F9 U+0338 */
+  { "notindot", "⋵̸" }, /* U+22F5 U+0338 */
+  { "notinva", "∉" }, /* U+2209 */
+  { "notinvb", "⋷" }, /* U+22F7 */
+  { "notinvc", "⋶" }, /* U+22F6 */
+  { "notni", "∌" }, /* U+220C */
+  { "notniva", "∌" }, /* U+220C */
+  { "notnivb", "⋾" }, /* U+22FE */
+  { "notnivc", "⋽" }, /* U+22FD */
+  { "npar", "∦" }, /* U+2226 */
+  { "nparallel", "∦" }, /* U+2226 */
+  { "nparsl", "⫽⃥" }, /* U+2AFD U+20E5 */
+  { "npart", "∂̸" }, /* U+2202 U+0338 */
+  { "npolint", "⨔" }, /* U+2A14 */
+  { "npr", "⊀" }, /* U+2280 */
+  { "nprcue", "⋠" }, /* U+22E0 */
+  { "npre", "⪯̸" }, /* U+2AAF U+0338 */
+  { "nprec", "⊀" }, /* U+2280 */
+  { "npreceq", "⪯̸" }, /* U+2AAF U+0338 */
+  { "nrArr", "⇏" }, /* U+21CF */
+  { "nrarr", "↛" }, /* U+219B */
+  { "nrarrc", "⤳̸" }, /* U+2933 U+0338 */
+  { "nrarrw", "↝̸" }, /* U+219D U+0338 */
+  { "nrightarrow", "↛" }, /* U+219B */
+  { "nrtri", "⋫" }, /* U+22EB */
+  { "nrtrie", "⋭" }, /* U+22ED */
+  { "nsc", "⊁" }, /* U+2281 */
+  { "nsccue", "⋡" }, /* U+22E1 */
+  { "nsce", "⪰̸" }, /* U+2AB0 U+0338 */
+  { "nscr", "𝓃" }, /* U+1D4C3 */
+  { "nshortmid", "∤" }, /* U+2224 */
+  { "nshortparallel", "∦" }, /* U+2226 */
+  { "nsim", "≁" }, /* U+2241 */
+  { "nsime", "≄" }, /* U+2244 */
+  { "nsimeq", "≄" }, /* U+2244 */
+  { "nsmid", "∤" }, /* U+2224 */
+  { "nspar", "∦" }, /* U+2226 */
+  { "nsqsube", "⋢" }, /* U+22E2 */
+  { "nsqsupe", "⋣" }, /* U+22E3 */
+  { "nsub", "⊄" }, /* U+2284 */
+  { "nsubE", "⫅̸" }, /* U+2AC5 U+0338 */
+  { "nsube", "⊈" }, /* U+2288 */
+  { "nsubset", "⊂⃒" }, /* U+2282 U+20D2 */
+  { "nsubseteq", "⊈" }, /* U+2288 */
+  { "nsubseteqq", "⫅̸" }, /* U+2AC5 U+0338 */
+  { "nsucc", "⊁" }, /* U+2281 */
+  { "nsucceq", "⪰̸" }, /* U+2AB0 U+0338 */
+  { "nsup", "⊅" }, /* U+2285 */
+  { "nsupE", "⫆̸" }, /* U+2AC6 U+0338 */
+  { "nsupe", "⊉" }, /* U+2289 */
+  { "nsupset", "⊃⃒" }, /* U+2283 U+20D2 */
+  { "nsupseteq", "⊉" }, /* U+2289 */
+  { "nsupseteqq", "⫆̸" }, /* U+2AC6 U+0338 */
+  { "ntgl", "≹" }, /* U+2279 */
+  { "ntilde", "ñ" }, /* U+00F1 */
+  { "ntlg", "≸" }, /* U+2278 */
+  { "ntriangleleft", "⋪" }, /* U+22EA */
+  { "ntrianglelefteq", "⋬" }, /* U+22EC */
+  { "ntriangleright", "⋫" }, /* U+22EB */
+  { "nu", "ν" }, /* U+03BD */
+  { "num", "#" }, /* U+0023 */
+  { "numero", "№" }, /* U+2116 */
+  { "numsp", " " }, /* U+2007 */
+  { "nvDash", "⊭" }, /* U+22AD */
+  { "nvHarr", "⤄" }, /* U+2904 */
+  { "nvap", "≍⃒" }, /* U+224D U+20D2 */
+  { "nvdash", "⊬" }, /* U+22AC */
+  { "nvge", "≥⃒" }, /* U+2265 U+20D2 */
+  { "nvgt", ">⃒" }, /* U+003E U+20D2 */
+  { "nvinfin", "⧞" }, /* U+29DE */
+  { "nvlArr", "⤂" }, /* U+2902 */
+  { "nvle", "≤⃒" }, /* U+2264 U+20D2 */
+  { "nvlt", "<⃒" }, /* U+003C U+20D2 */
+  { "nvltrie", "⊴⃒" }, /* U+22B4 U+20D2 */
+  { "nvrArr", "⤃" }, /* U+2903 */
+  { "nvrtrie", "⊵⃒" }, /* U+22B5 U+20D2 */
+  { "nvsim", "∼⃒" }, /* U+223C U+20D2 */
+  { "nwArr", "⇖" }, /* U+21D6 */
+  { "nwarhk", "⤣" }, /* U+2923 */
+  { "nwarr", "↖" }, /* U+2196 */
+  { "nwarrow", "↖" }, /* U+2196 */
+  { "nwnear", "⤧" }, /* U+2927 */
+  { "oS", "Ⓢ" }, /* U+24C8 */
+  { "oacute", "ó" }, /* U+00F3 */
+  { "oast", "⊛" }, /* U+229B */
+  { "ocir", "⊚" }, /* U+229A */
+  { "ocirc", "ô" }, /* U+00F4 */
+  { "ocy", "о" }, /* U+043E */
+  { "odash", "⊝" }, /* U+229D */
+  { "odblac", "ő" }, /* U+0151 */
+  { "odiv", "⨸" }, /* U+2A38 */
+  { "odot", "⊙" }, /* U+2299 */
+  { "odsold", "⦼" }, /* U+29BC */
+  { "oelig", "œ" }, /* U+0153 */
+  { "ofcir", "⦿" }, /* U+29BF */
+  { "ofr", "𝔬" }, /* U+1D52C */
+  { "ogon", "˛" }, /* U+02DB */
+  { "ograve", "ò" }, /* U+00F2 */
+  { "ogt", "⧁" }, /* U+29C1 */
+  { "ohbar", "⦵" }, /* U+29B5 */
+  { "ohm", "Ω" }, /* U+03A9 */
+  { "oint", "∮" }, /* U+222E */
+  { "olarr", "↺" }, /* U+21BA */
+  { "olcir", "⦾" }, /* U+29BE */
+  { "olcross", "⦻" }, /* U+29BB */
+  { "oline", "‾" }, /* U+203E */
+  { "olt", "⧀" }, /* U+29C0 */
+  { "omacr", "ō" }, /* U+014D */
+  { "omega", "ω" }, /* U+03C9 */
+  { "omicron", "ο" }, /* U+03BF */
+  { "omid", "⦶" }, /* U+29B6 */
+  { "ominus", "⊖" }, /* U+2296 */
+  { "oopf", "𝕠" }, /* U+1D560 */
+  { "opar", "⦷" }, /* U+29B7 */
+  { "operp", "⦹" }, /* U+29B9 */
+  { "oplus", "⊕" }, /* U+2295 */
+  { "or", "∨" }, /* U+2228 */
+  { "orarr", "↻" }, /* U+21BB */
+  { "ord", "⩝" }, /* U+2A5D */
+  { "order", "ℴ" }, /* U+2134 */
+  { "orderof", "ℴ" }, /* U+2134 */
+  { "ordf", "ª" }, /* U+00AA */
+  { "ordm", "º" }, /* U+00BA */
+  { "origof", "⊶" }, /* U+22B6 */
+  { "oror", "⩖" }, /* U+2A56 */
+  { "orslope", "⩗" }, /* U+2A57 */
+  { "orv", "⩛" }, /* U+2A5B */
+  { "oscr", "ℴ" }, /* U+2134 */
+  { "oslash", "ø" }, /* U+00F8 */
+  { "osol", "⊘" }, /* U+2298 */
+  { "otilde", "õ" }, /* U+00F5 */
+  { "otimes", "⊗" }, /* U+2297 */
+  { "otimesas", "⨶" }, /* U+2A36 */
+  { "ouml", "ö" }, /* U+00F6 */
+  { "ovbar", "⌽" }, /* U+233D */
+  { "par", "∥" }, /* U+2225 */
+  { "para", "¶" }, /* U+00B6 */
+  { "parallel", "∥" }, /* U+2225 */
+  { "parsim", "⫳" }, /* U+2AF3 */
+  { "parsl", "⫽" }, /* U+2AFD */
+  { "part", "∂" }, /* U+2202 */
+  { "pcy", "п" }, /* U+043F */
+  { "percnt", "%" }, /* U+0025 */
+  { "period", "." }, /* U+002E */
+  { "permil", "‰" }, /* U+2030 */
+  { "perp", "⊥" }, /* U+22A5 */
+  { "pertenk", "‱" }, /* U+2031 */
+  { "pfr", "𝔭" }, /* U+1D52D */
+  { "phi", "φ" }, /* U+03C6 */
+  { "phiv", "ϕ" }, /* U+03D5 */
+  { "phmmat", "ℳ" }, /* U+2133 */
+  { "phone", "☎" }, /* U+260E */
+  { "pi", "π" }, /* U+03C0 */
+  { "pitchfork", "⋔" }, /* U+22D4 */
+  { "piv", "ϖ" }, /* U+03D6 */
+  { "planck", "ℏ" }, /* U+210F */
+  { "planckh", "ℎ" }, /* U+210E */
+  { "plankv", "ℏ" }, /* U+210F */
+  { "plus", "+" }, /* U+002B */
+  { "plusacir", "⨣" }, /* U+2A23 */
+  { "plusb", "⊞" }, /* U+229E */
+  { "pluscir", "⨢" }, /* U+2A22 */
+  { "plusdo", "∔" }, /* U+2214 */
+  { "plusdu", "⨥" }, /* U+2A25 */
+  { "pluse", "⩲" }, /* U+2A72 */
+  { "plusmn", "±" }, /* U+00B1 */
+  { "plussim", "⨦" }, /* U+2A26 */
+  { "plustwo", "⨧" }, /* U+2A27 */
+  { "pm", "±" }, /* U+00B1 */
+  { "pointint", "⨕" }, /* U+2A15 */
+  { "popf", "𝕡" }, /* U+1D561 */
+  { "pound", "£" }, /* U+00A3 */
+  { "pr", "≺" }, /* U+227A */
+  { "prE", "⪳" }, /* U+2AB3 */
+  { "prap", "⪷" }, /* U+2AB7 */
+  { "prcue", "≼" }, /* U+227C */
+  { "pre", "⪯" }, /* U+2AAF */
+  { "prec", "≺" }, /* U+227A */
+  { "precapprox", "⪷" }, /* U+2AB7 */
+  { "preccurlyeq", "≼" }, /* U+227C */
+  { "preceq", "⪯" }, /* U+2AAF */
+  { "precnapprox", "⪹" }, /* U+2AB9 */
+  { "precneqq", "⪵" }, /* U+2AB5 */
+  { "precnsim", "⋨" }, /* U+22E8 */
+  { "precsim", "≾" }, /* U+227E */
+  { "prime", "′" }, /* U+2032 */
+  { "primes", "ℙ" }, /* U+2119 */
+  { "prnE", "⪵" }, /* U+2AB5 */
+  { "prnap", "⪹" }, /* U+2AB9 */
+  { "prnsim", "⋨" }, /* U+22E8 */
+  { "prod", "∏" }, /* U+220F */
+  { "profalar", "⌮" }, /* U+232E */
+  { "profline", "⌒" }, /* U+2312 */
+  { "profsurf", "⌓" }, /* U+2313 */
+  { "prop", "∝" }, /* U+221D */
+  { "propto", "∝" }, /* U+221D */
+  { "prsim", "≾" }, /* U+227E */
+  { "prurel", "⊰" }, /* U+22B0 */
+  { "pscr", "𝓅" }, /* U+1D4C5 */
+  { "psi", "ψ" }, /* U+03C8 */
+  { "puncsp", " " }, /* U+2008 */
+  { "qfr", "𝔮" }, /* U+1D52E */
+  { "qint", "⨌" }, /* U+2A0C */
+  { "qopf", "𝕢" }, /* U+1D562 */
+  { "qprime", "⁗" }, /* U+2057 */
+  { "qscr", "𝓆" }, /* U+1D4C6 */
+  { "quaternions", "ℍ" }, /* U+210D */
+  { "quatint", "⨖" }, /* U+2A16 */
+  { "quest", "?" }, /* U+003F */
+  { "questeq", "≟" }, /* U+225F */
+  { "quot", "\"" }, /* U+0022 */
+  { "rAarr", "⇛" }, /* U+21DB */
+  { "rArr", "⇒" }, /* U+21D2 */
+  { "rAtail", "⤜" }, /* U+291C */
+  { "rBarr", "⤏" }, /* U+290F */
+  { "rHar", "⥤" }, /* U+2964 */
+  { "race", "∽̱" }, /* U+223D U+0331 */
+  { "racute", "ŕ" }, /* U+0155 */
+  { "radic", "√" }, /* U+221A */
+  { "raemptyv", "⦳" }, /* U+29B3 */
+  { "rang", "⟩" }, /* U+27E9 */
+  { "rangd", "⦒" }, /* U+2992 */
+  { "range", "⦥" }, /* U+29A5 */
+  { "rangle", "⟩" }, /* U+27E9 */
+  { "raquo", "»" }, /* U+00BB */
+  { "rarr", "→" }, /* U+2192 */
+  { "rarrap", "⥵" }, /* U+2975 */
+  { "rarrb", "⇥" }, /* U+21E5 */
+  { "rarrbfs", "⤠" }, /* U+2920 */
+  { "rarrc", "⤳" }, /* U+2933 */
+  { "rarrfs", "⤞" }, /* U+291E */
+  { "rarrhk", "↪" }, /* U+21AA */
+  { "rarrlp", "↬" }, /* U+21AC */
+  { "rarrpl", "⥅" }, /* U+2945 */
+  { "rarrsim", "⥴" }, /* U+2974 */
+  { "rarrtl", "↣" }, /* U+21A3 */
+  { "rarrw", "↝" }, /* U+219D */
+  { "ratail", "⤚" }, /* U+291A */
+  { "ratio", "∶" }, /* U+2236 */
+  { "rationals", "ℚ" }, /* U+211A */
+  { "rbarr", "⤍" }, /* U+290D */
+  { "rbbrk", "❳" }, /* U+2773 */
+  { "rbrace", "}" }, /* U+007D */
+  { "rbrack", "]" }, /* U+005D */
+  { "rbrke", "⦌" }, /* U+298C */
+  { "rbrksld", "⦎" }, /* U+298E */
+  { "rbrkslu", "⦐" }, /* U+2990 */
+  { "rcaron", "ř" }, /* U+0159 */
+  { "rcedil", "ŗ" }, /* U+0157 */
+  { "rceil", "⌉" }, /* U+2309 */
+  { "rcub", "}" }, /* U+007D */
+  { "rcy", "р" }, /* U+0440 */
+  { "rdca", "⤷" }, /* U+2937 */
+  { "rdldhar", "⥩" }, /* U+2969 */
+  { "rdquo", "”" }, /* U+201D */
+  { "rdquor", "”" }, /* U+201D */
+  { "rdsh", "↳" }, /* U+21B3 */
+  { "real", "ℜ" }, /* U+211C */
+  { "realine", "ℛ" }, /* U+211B */
+  { "realpart", "ℜ" }, /* U+211C */
+  { "reals", "ℝ" }, /* U+211D */
+  { "rect", "▭" }, /* U+25AD */
+  { "reg", "®" }, /* U+00AE */
+  { "rfisht", "⥽" }, /* U+297D */
+  { "rfloor", "⌋" }, /* U+230B */
+  { "rfr", "𝔯" }, /* U+1D52F */
+  { "rhard", "⇁" }, /* U+21C1 */
+  { "rharu", "⇀" }, /* U+21C0 */
+  { "rharul", "⥬" }, /* U+296C */
+  { "rho", "ρ" }, /* U+03C1 */
+  { "rhov", "ϱ" }, /* U+03F1 */
+  { "rightarrow", "→" }, /* U+2192 */
+  { "rightarrowtail", "↣" }, /* U+21A3 */
+  { "rightharpoonup", "⇀" }, /* U+21C0 */
+  { "rightleftarrows", "⇄" }, /* U+21C4 */
+  { "rightsquigarrow", "↝" }, /* U+219D */
+  { "rightthreetimes", "⋌" }, /* U+22CC */
+  { "ring", "˚" }, /* U+02DA */
+  { "risingdotseq", "≓" }, /* U+2253 */
+  { "rlarr", "⇄" }, /* U+21C4 */
+  { "rlhar", "⇌" }, /* U+21CC */
+  { "rlm", "\342\200\217" }, /* U+200F */
+  { "rmoust", "⎱" }, /* U+23B1 */
+  { "rmoustache", "⎱" }, /* U+23B1 */
+  { "rnmid", "⫮" }, /* U+2AEE */
+  { "roang", "⟭" }, /* U+27ED */
+  { "roarr", "⇾" }, /* U+21FE */
+  { "robrk", "⟧" }, /* U+27E7 */
+  { "ropar", "⦆" }, /* U+2986 */
+  { "ropf", "𝕣" }, /* U+1D563 */
+  { "roplus", "⨮" }, /* U+2A2E */
+  { "rotimes", "⨵" }, /* U+2A35 */
+  { "rpar", ")" }, /* U+0029 */
+  { "rpargt", "⦔" }, /* U+2994 */
+  { "rppolint", "⨒" }, /* U+2A12 */
+  { "rrarr", "⇉" }, /* U+21C9 */
+  { "rsaquo", "›" }, /* U+203A */
+  { "rscr", "𝓇" }, /* U+1D4C7 */
+  { "rsh", "↱" }, /* U+21B1 */
+  { "rsqb", "]" }, /* U+005D */
+  { "rsquo", "’" }, /* U+2019 */
+  { "rsquor", "’" }, /* U+2019 */
+  { "rthree", "⋌" }, /* U+22CC */
+  { "rtimes", "⋊" }, /* U+22CA */
+  { "rtri", "▹" }, /* U+25B9 */
+  { "rtrie", "⊵" }, /* U+22B5 */
+  { "rtrif", "▸" }, /* U+25B8 */
+  { "rtriltri", "⧎" }, /* U+29CE */
+  { "ruluhar", "⥨" }, /* U+2968 */
+  { "rx", "℞" }, /* U+211E */
+  { "sacute", "ś" }, /* U+015B */
+  { "sbquo", "‚" }, /* U+201A */
+  { "sc", "≻" }, /* U+227B */
+  { "scE", "⪴" }, /* U+2AB4 */
+  { "scap", "⪸" }, /* U+2AB8 */
+  { "scaron", "š" }, /* U+0161 */
+  { "sccue", "≽" }, /* U+227D */
+  { "sce", "⪰" }, /* U+2AB0 */
+  { "scedil", "ş" }, /* U+015F */
+  { "scirc", "ŝ" }, /* U+015D */
+  { "scnE", "⪶" }, /* U+2AB6 */
+  { "scnap", "⪺" }, /* U+2ABA */
+  { "scnsim", "⋩" }, /* U+22E9 */
+  { "scpolint", "⨓" }, /* U+2A13 */
+  { "scsim", "≿" }, /* U+227F */
+  { "scy", "с" }, /* U+0441 */
+  { "sdot", "⋅" }, /* U+22C5 */
+  { "sdotb", "⊡" }, /* U+22A1 */
+  { "sdote", "⩦" }, /* U+2A66 */
+  { "seArr", "⇘" }, /* U+21D8 */
+  { "searhk", "⤥" }, /* U+2925 */
+  { "searr", "↘" }, /* U+2198 */
+  { "searrow", "↘" }, /* U+2198 */
+  { "sect", "§" }, /* U+00A7 */
+  { "semi", ";" }, /* U+003B */
+  { "seswar", "⤩" }, /* U+2929 */
+  { "setminus", "∖" }, /* U+2216 */
+  { "setmn", "∖" }, /* U+2216 */
+  { "sext", "✶" }, /* U+2736 */
+  { "sfr", "𝔰" }, /* U+1D530 */
+  { "sfrown", "⌢" }, /* U+2322 */
+  { "sharp", "♯" }, /* U+266F */
+  { "shchcy", "щ" }, /* U+0449 */
+  { "shcy", "ш" }, /* U+0448 */
+  { "shortmid", "∣" }, /* U+2223 */
+  { "shortparallel", "∥" }, /* U+2225 */
+  { "shy", "" }, /* U+00AD */
+  { "sigma", "σ" }, /* U+03C3 */
+  { "sigmaf", "ς" }, /* U+03C2 */
+  { "sigmav", "ς" }, /* U+03C2 */
+  { "sim", "∼" }, /* U+223C */
+  { "simdot", "⩪" }, /* U+2A6A */
+  { "sime", "≃" }, /* U+2243 */
+  { "simeq", "≃" }, /* U+2243 */
+  { "simg", "⪞" }, /* U+2A9E */
+  { "simgE", "⪠" }, /* U+2AA0 */
+  { "siml", "⪝" }, /* U+2A9D */
+  { "simlE", "⪟" }, /* U+2A9F */
+  { "simne", "≆" }, /* U+2246 */
+  { "simplus", "⨤" }, /* U+2A24 */
+  { "simrarr", "⥲" }, /* U+2972 */
+  { "slarr", "←" }, /* U+2190 */
+  { "smallsetminus", "∖" }, /* U+2216 */
+  { "smashp", "⨳" }, /* U+2A33 */
+  { "smeparsl", "⧤" }, /* U+29E4 */
+  { "smid", "∣" }, /* U+2223 */
+  { "smile", "⌣" }, /* U+2323 */
+  { "smt", "⪪" }, /* U+2AAA */
+  { "smte", "⪬" }, /* U+2AAC */
+  { "smtes", "⪬︀" }, /* U+2AAC U+FE00 */
+  { "softcy", "ь" }, /* U+044C */
+  { "sol", "/" }, /* U+002F */
+  { "solb", "⧄" }, /* U+29C4 */
+  { "solbar", "⌿" }, /* U+233F */
+  { "sopf", "𝕤" }, /* U+1D564 */
+  { "spades", "♠" }, /* U+2660 */
+  { "spadesuit", "♠" }, /* U+2660 */
+  { "spar", "∥" }, /* U+2225 */
+  { "sqcap", "⊓" }, /* U+2293 */
+  { "sqcaps", "⊓︀" }, /* U+2293 U+FE00 */
+  { "sqcup", "⊔" }, /* U+2294 */
+  { "sqcups", "⊔︀" }, /* U+2294 U+FE00 */
+  { "sqsub", "⊏" }, /* U+228F */
+  { "sqsube", "⊑" }, /* U+2291 */
+  { "sqsubset", "⊏" }, /* U+228F */
+  { "sqsubseteq", "⊑" }, /* U+2291 */
+  { "sqsup", "⊐" }, /* U+2290 */
+  { "sqsupe", "⊒" }, /* U+2292 */
+  { "sqsupset", "⊐" }, /* U+2290 */
+  { "sqsupseteq", "⊒" }, /* U+2292 */
+  { "squ", "□" }, /* U+25A1 */
+  { "square", "□" }, /* U+25A1 */
+  { "squarf", "▪" }, /* U+25AA */
+  { "squf", "▪" }, /* U+25AA */
+  { "srarr", "→" }, /* U+2192 */
+  { "sscr", "𝓈" }, /* U+1D4C8 */
+  { "ssetmn", "∖" }, /* U+2216 */
+  { "ssmile", "⌣" }, /* U+2323 */
+  { "sstarf", "⋆" }, /* U+22C6 */
+  { "star", "☆" }, /* U+2606 */
+  { "starf", "★" }, /* U+2605 */
+  { "straightepsilon", "ϵ" }, /* U+03F5 */
+  { "straightphi", "ϕ" }, /* U+03D5 */
+  { "strns", "¯" }, /* U+00AF */
+  { "sub", "⊂" }, /* U+2282 */
+  { "subE", "⫅" }, /* U+2AC5 */
+  { "subdot", "⪽" }, /* U+2ABD */
+  { "sube", "⊆" }, /* U+2286 */
+  { "subedot", "⫃" }, /* U+2AC3 */
+  { "submult", "⫁" }, /* U+2AC1 */
+  { "subnE", "⫋" }, /* U+2ACB */
+  { "subne", "⊊" }, /* U+228A */
+  { "subplus", "⪿" }, /* U+2ABF */
+  { "subrarr", "⥹" }, /* U+2979 */
+  { "subset", "⊂" }, /* U+2282 */
+  { "subseteq", "⊆" }, /* U+2286 */
+  { "subseteqq", "⫅" }, /* U+2AC5 */
+  { "subsetneq", "⊊" }, /* U+228A */
+  { "subsetneqq", "⫋" }, /* U+2ACB */
+  { "subsim", "⫇" }, /* U+2AC7 */
+  { "subsub", "⫕" }, /* U+2AD5 */
+  { "subsup", "⫓" }, /* U+2AD3 */
+  { "succ", "≻" }, /* U+227B */
+  { "succapprox", "⪸" }, /* U+2AB8 */
+  { "succcurlyeq", "≽" }, /* U+227D */
+  { "succeq", "⪰" }, /* U+2AB0 */
+  { "succnapprox", "⪺" }, /* U+2ABA */
+  { "succneqq", "⪶" }, /* U+2AB6 */
+  { "succnsim", "⋩" }, /* U+22E9 */
+  { "succsim", "≿" }, /* U+227F */
+  { "sum", "∑" }, /* U+2211 */
+  { "sung", "♪" }, /* U+266A */
+  { "sup", "⊃" }, /* U+2283 */
+  { "sup1", "¹" }, /* U+00B9 */
+  { "sup2", "²" }, /* U+00B2 */
+  { "sup3", "³" }, /* U+00B3 */
+  { "supE", "⫆" }, /* U+2AC6 */
+  { "supdot", "⪾" }, /* U+2ABE */
+  { "supdsub", "⫘" }, /* U+2AD8 */
+  { "supe", "⊇" }, /* U+2287 */
+  { "supedot", "⫄" }, /* U+2AC4 */
+  { "suphsol", "⟉" }, /* U+27C9 */
+  { "suphsub", "⫗" }, /* U+2AD7 */
+  { "suplarr", "⥻" }, /* U+297B */
+  { "supmult", "⫂" }, /* U+2AC2 */
+  { "supnE", "⫌" }, /* U+2ACC */
+  { "supne", "⊋" }, /* U+228B */
+  { "supplus", "⫀" }, /* U+2AC0 */
+  { "supset", "⊃" }, /* U+2283 */
+  { "supseteq", "⊇" }, /* U+2287 */
+  { "supseteqq", "⫆" }, /* U+2AC6 */
+  { "supsetneq", "⊋" }, /* U+228B */
+  { "supsetneqq", "⫌" }, /* U+2ACC */
+  { "supsim", "⫈" }, /* U+2AC8 */
+  { "supsub", "⫔" }, /* U+2AD4 */
+  { "supsup", "⫖" }, /* U+2AD6 */
+  { "swArr", "⇙" }, /* U+21D9 */
+  { "swarhk", "⤦" }, /* U+2926 */
+  { "swarr", "↙" }, /* U+2199 */
+  { "swarrow", "↙" }, /* U+2199 */
+  { "swnwar", "⤪" }, /* U+292A */
+  { "szlig", "ß" }, /* U+00DF */
+  { "target", "⌖" }, /* U+2316 */
+  { "tau", "τ" }, /* U+03C4 */
+  { "tbrk", "⎴" }, /* U+23B4 */
+  { "tcaron", "ť" }, /* U+0165 */
+  { "tcedil", "ţ" }, /* U+0163 */
+  { "tcy", "т" }, /* U+0442 */
+  { "tdot", "⃛" }, /* U+20DB */
+  { "telrec", "⌕" }, /* U+2315 */
+  { "tfr", "𝔱" }, /* U+1D531 */
+  { "there4", "∴" }, /* U+2234 */
+  { "therefore", "∴" }, /* U+2234 */
+  { "theta", "θ" }, /* U+03B8 */
+  { "thetasym", "ϑ" }, /* U+03D1 */
+  { "thetav", "ϑ" }, /* U+03D1 */
+  { "thickapprox", "≈" }, /* U+2248 */
+  { "thicksim", "∼" }, /* U+223C */
+  { "thinsp", " " }, /* U+2009 */
+  { "thkap", "≈" }, /* U+2248 */
+  { "thksim", "∼" }, /* U+223C */
+  { "thorn", "þ" }, /* U+00FE */
+  { "tilde", "˜" }, /* U+02DC */
+  { "times", "×" }, /* U+00D7 */
+  { "timesb", "⊠" }, /* U+22A0 */
+  { "timesbar", "⨱" }, /* U+2A31 */
+  { "timesd", "⨰" }, /* U+2A30 */
+  { "tint", "∭" }, /* U+222D */
+  { "toea", "⤨" }, /* U+2928 */
+  { "top", "⊤" }, /* U+22A4 */
+  { "topbot", "⌶" }, /* U+2336 */
+  { "topcir", "⫱" }, /* U+2AF1 */
+  { "topf", "𝕥" }, /* U+1D565 */
+  { "topfork", "⫚" }, /* U+2ADA */
+  { "tosa", "⤩" }, /* U+2929 */
+  { "tprime", "‴" }, /* U+2034 */
+  { "trade", "™" }, /* U+2122 */
+  { "triangle", "▵" }, /* U+25B5 */
+  { "triangledown", "▿" }, /* U+25BF */
+  { "triangleleft", "◃" }, /* U+25C3 */
+  { "trianglelefteq", "⊴" }, /* U+22B4 */
+  { "triangleq", "≜" }, /* U+225C */
+  { "triangleright", "▹" }, /* U+25B9 */
+  { "trianglerighteq", "⊵" }, /* U+22B5 */
+  { "tridot", "◬" }, /* U+25EC */
+  { "trie", "≜" }, /* U+225C */
+  { "triminus", "⨺" }, /* U+2A3A */
+  { "triplus", "⨹" }, /* U+2A39 */
+  { "trisb", "⧍" }, /* U+29CD */
+  { "tritime", "⨻" }, /* U+2A3B */
+  { "trpezium", "⏢" }, /* U+23E2 */
+  { "tscr", "𝓉" }, /* U+1D4C9 */
+  { "tscy", "ц" }, /* U+0446 */
+  { "tshcy", "ћ" }, /* U+045B */
+  { "tstrok", "ŧ" }, /* U+0167 */
+  { "twixt", "≬" }, /* U+226C */
+  { "uArr", "⇑" }, /* U+21D1 */
+  { "uHar", "⥣" }, /* U+2963 */
+  { "uacute", "ú" }, /* U+00FA */
+  { "uarr", "↑" }, /* U+2191 */
+  { "ubrcy", "ў" }, /* U+045E */
+  { "ubreve", "ŭ" }, /* U+016D */
+  { "ucirc", "û" }, /* U+00FB */
+  { "ucy", "у" }, /* U+0443 */
+  { "udarr", "⇅" }, /* U+21C5 */
+  { "udblac", "ű" }, /* U+0171 */
+  { "udhar", "⥮" }, /* U+296E */
+  { "ufisht", "⥾" }, /* U+297E */
+  { "ufr", "𝔲" }, /* U+1D532 */
+  { "ugrave", "ù" }, /* U+00F9 */
+  { "uharl", "↿" }, /* U+21BF */
+  { "uharr", "↾" }, /* U+21BE */
+  { "uhblk", "▀" }, /* U+2580 */
+  { "ulcorn", "⌜" }, /* U+231C */
+  { "ulcorner", "⌜" }, /* U+231C */
+  { "ulcrop", "⌏" }, /* U+230F */
+  { "ultri", "◸" }, /* U+25F8 */
+  { "umacr", "ū" }, /* U+016B */
+  { "uml", "¨" }, /* U+00A8 */
+  { "uogon", "ų" }, /* U+0173 */
+  { "uopf", "𝕦" }, /* U+1D566 */
+  { "uparrow", "↑" }, /* U+2191 */
+  { "updownarrow", "↕" }, /* U+2195 */
+  { "upharpoonleft", "↿" }, /* U+21BF */
+  { "upharpoonright", "↾" }, /* U+21BE */
+  { "uplus", "⊎" }, /* U+228E */
+  { "upsi", "υ" }, /* U+03C5 */
+  { "upsih", "ϒ" }, /* U+03D2 */
+  { "upsilon", "υ" }, /* U+03C5 */
+  { "upuparrows", "⇈" }, /* U+21C8 */
+  { "urcorn", "⌝" }, /* U+231D */
+  { "urcorner", "⌝" }, /* U+231D */
+  { "urcrop", "⌎" }, /* U+230E */
+  { "uring", "ů" }, /* U+016F */
+  { "urtri", "◹" }, /* U+25F9 */
+  { "uscr", "𝓊" }, /* U+1D4CA */
+  { "utdot", "⋰" }, /* U+22F0 */
+  { "utilde", "ũ" }, /* U+0169 */
+  { "utri", "▵" }, /* U+25B5 */
+  { "utrif", "▴" }, /* U+25B4 */
+  { "uuarr", "⇈" }, /* U+21C8 */
+  { "uuml", "ü" }, /* U+00FC */
+  { "uwangle", "⦧" }, /* U+29A7 */
+  { "vArr", "⇕" }, /* U+21D5 */
+  { "vBar", "⫨" }, /* U+2AE8 */
+  { "vBarv", "⫩" }, /* U+2AE9 */
+  { "vDash", "⊨" }, /* U+22A8 */
+  { "vangrt", "⦜" }, /* U+299C */
+  { "varepsilon", "ϵ" }, /* U+03F5 */
+  { "varkappa", "ϰ" }, /* U+03F0 */
+  { "varnothing", "∅" }, /* U+2205 */
+  { "varphi", "ϕ" }, /* U+03D5 */
+  { "varpi", "ϖ" }, /* U+03D6 */
+  { "varpropto", "∝" }, /* U+221D */
+  { "varr", "↕" }, /* U+2195 */
+  { "varrho", "ϱ" }, /* U+03F1 */
+  { "varsigma", "ς" }, /* U+03C2 */
+  { "varsubsetneq", "⊊︀" }, /* U+228A U+FE00 */
+  { "varsubsetneqq", "⫋︀" }, /* U+2ACB U+FE00 */
+  { "varsupsetneq", "⊋︀" }, /* U+228B U+FE00 */
+  { "varsupsetneqq", "⫌︀" }, /* U+2ACC U+FE00 */
+  { "vartheta", "ϑ" }, /* U+03D1 */
+  { "vartriangleleft", "⊲" }, /* U+22B2 */
+  { "vcy", "в" }, /* U+0432 */
+  { "vdash", "⊢" }, /* U+22A2 */
+  { "vee", "∨" }, /* U+2228 */
+  { "veebar", "⊻" }, /* U+22BB */
+  { "veeeq", "≚" }, /* U+225A */
+  { "vellip", "⋮" }, /* U+22EE */
+  { "verbar", "|" }, /* U+007C */
+  { "vert", "|" }, /* U+007C */
+  { "vfr", "𝔳" }, /* U+1D533 */
+  { "vltri", "⊲" }, /* U+22B2 */
+  { "vnsub", "⊂⃒" }, /* U+2282 U+20D2 */
+  { "vnsup", "⊃⃒" }, /* U+2283 U+20D2 */
+  { "vopf", "𝕧" }, /* U+1D567 */
+  { "vprop", "∝" }, /* U+221D */
+  { "vrtri", "⊳" }, /* U+22B3 */
+  { "vscr", "𝓋" }, /* U+1D4CB */
+  { "vsubnE", "⫋︀" }, /* U+2ACB U+FE00 */
+  { "vsubne", "⊊︀" }, /* U+228A U+FE00 */
+  { "vsupnE", "⫌︀" }, /* U+2ACC U+FE00 */
+  { "vsupne", "⊋︀" }, /* U+228B U+FE00 */
+  { "vzigzag", "⦚" }, /* U+299A */
+  { "wcirc", "ŵ" }, /* U+0175 */
+  { "wedbar", "⩟" }, /* U+2A5F */
+  { "wedge", "∧" }, /* U+2227 */
+  { "wedgeq", "≙" }, /* U+2259 */
+  { "weierp", "℘" }, /* U+2118 */
+  { "wfr", "𝔴" }, /* U+1D534 */
+  { "wopf", "𝕨" }, /* U+1D568 */
+  { "wp", "℘" }, /* U+2118 */
+  { "wr", "≀" }, /* U+2240 */
+  { "wreath", "≀" }, /* U+2240 */
+  { "wscr", "𝓌" }, /* U+1D4CC */
+  { "xcap", "⋂" }, /* U+22C2 */
+  { "xcirc", "◯" }, /* U+25EF */
+  { "xcup", "⋃" }, /* U+22C3 */
+  { "xdtri", "▽" }, /* U+25BD */
+  { "xfr", "𝔵" }, /* U+1D535 */
+  { "xhArr", "⟺" }, /* U+27FA */
+  { "xharr", "⟷" }, /* U+27F7 */
+  { "xi", "ξ" }, /* U+03BE */
+  { "xlArr", "⟸" }, /* U+27F8 */
+  { "xlarr", "⟵" }, /* U+27F5 */
+  { "xmap", "⟼" }, /* U+27FC */
+  { "xnis", "⋻" }, /* U+22FB */
+  { "xodot", "⨀" }, /* U+2A00 */
+  { "xopf", "𝕩" }, /* U+1D569 */
+  { "xoplus", "⨁" }, /* U+2A01 */
+  { "xotime", "⨂" }, /* U+2A02 */
+  { "xrArr", "⟹" }, /* U+27F9 */
+  { "xrarr", "⟶" }, /* U+27F6 */
+  { "xscr", "𝓍" }, /* U+1D4CD */
+  { "xsqcup", "⨆" }, /* U+2A06 */
+  { "xuplus", "⨄" }, /* U+2A04 */
+  { "xutri", "△" }, /* U+25B3 */
+  { "xvee", "⋁" }, /* U+22C1 */
+  { "xwedge", "⋀" }, /* U+22C0 */
+  { "yacute", "ý" }, /* U+00FD */
+  { "yacy", "я" }, /* U+044F */
+  { "ycirc", "ŷ" }, /* U+0177 */
+  { "ycy", "ы" }, /* U+044B */
+  { "yen", "¥" }, /* U+00A5 */
+  { "yfr", "𝔶" }, /* U+1D536 */
+  { "yicy", "ї" }, /* U+0457 */
+  { "yopf", "𝕪" }, /* U+1D56A */
+  { "yscr", "𝓎" }, /* U+1D4CE */
+  { "yucy", "ю" }, /* U+044E */
+  { "yuml", "ÿ" }, /* U+00FF */
+  { "zacute", "ź" }, /* U+017A */
+  { "zcaron", "ž" }, /* U+017E */
+  { "zcy", "з" }, /* U+0437 */
+  { "zdot", "ż" }, /* U+017C */
+  { "zeetrf", "ℨ" }, /* U+2128 */
+  { "zeta", "ζ" }, /* U+03B6 */
+  { "zfr", "𝔷" }, /* U+1D537 */
+  { "zhcy", "ж" }, /* U+0436 */
+  { "zigrarr", "⇝" }, /* U+21DD */
+  { "zopf", "𝕫" }, /* U+1D56B */
+  { "zscr", "𝓏" }, /* U+1D4CF */
+  { "zwj", "‍" }, /* U+200D */
+  { "zwnj", "‌" } /* U+200C */
+};
+static const struct { const char name[31 + 1]; const char value[6 + 1]; } html5long[] =
+{
+  { "CapitalDifferentialD", "ⅅ" }, /* U+2145 */
+  { "ClockwiseContourIntegral", "∲" }, /* U+2232 */
+  { "CloseCurlyDoubleQuote", "”" }, /* U+201D */
+  { "CounterClockwiseContourIntegral", "∳" }, /* U+2233 */
+  { "DiacriticalAcute", "´" }, /* U+00B4 */
+  { "DiacriticalDoubleAcute", "˝" }, /* U+02DD */
+  { "DiacriticalGrave", "`" }, /* U+0060 */
+  { "DiacriticalTilde", "˜" }, /* U+02DC */
+  { "DoubleContourIntegral", "∯" }, /* U+222F */
+  { "DoubleLeftRightArrow", "⇔" }, /* U+21D4 */
+  { "DoubleLongLeftArrow", "⟸" }, /* U+27F8 */
+  { "DoubleLongLeftRightArrow", "⟺" }, /* U+27FA */
+  { "DoubleLongRightArrow", "⟹" }, /* U+27F9 */
+  { "DoubleRightArrow", "⇒" }, /* U+21D2 */
+  { "DoubleUpDownArrow", "⇕" }, /* U+21D5 */
+  { "DoubleVerticalBar", "∥" }, /* U+2225 */
+  { "DownArrowUpArrow", "⇵" }, /* U+21F5 */
+  { "DownLeftRightVector", "⥐" }, /* U+2950 */
+  { "DownLeftTeeVector", "⥞" }, /* U+295E */
+  { "DownLeftVectorBar", "⥖" }, /* U+2956 */
+  { "DownRightTeeVector", "⥟" }, /* U+295F */
+  { "DownRightVectorBar", "⥗" }, /* U+2957 */
+  { "EmptySmallSquare", "◻" }, /* U+25FB */
+  { "EmptyVerySmallSquare", "▫" }, /* U+25AB */
+  { "FilledSmallSquare", "◼" }, /* U+25FC */
+  { "FilledVerySmallSquare", "▪" }, /* U+25AA */
+  { "GreaterEqualLess", "⋛" }, /* U+22DB */
+  { "GreaterFullEqual", "≧" }, /* U+2267 */
+  { "GreaterSlantEqual", "⩾" }, /* U+2A7E */
+  { "LeftAngleBracket", "⟨" }, /* U+27E8 */
+  { "LeftArrowRightArrow", "⇆" }, /* U+21C6 */
+  { "LeftDoubleBracket", "⟦" }, /* U+27E6 */
+  { "LeftDownTeeVector", "⥡" }, /* U+2961 */
+  { "LeftDownVectorBar", "⥙" }, /* U+2959 */
+  { "LeftTriangleEqual", "⊴" }, /* U+22B4 */
+  { "LeftUpDownVector", "⥑" }, /* U+2951 */
+  { "LessEqualGreater", "⋚" }, /* U+22DA */
+  { "LongLeftRightArrow", "⟷" }, /* U+27F7 */
+  { "Longleftrightarrow", "⟺" }, /* U+27FA */
+  { "NegativeMediumSpace", "" }, /* U+200B */
+  { "NegativeThickSpace", "" }, /* U+200B */
+  { "NegativeThinSpace", "" }, /* U+200B */
+  { "NegativeVeryThinSpace", "" }, /* U+200B */
+  { "NestedGreaterGreater", "≫" }, /* U+226B */
+  { "NonBreakingSpace", " " }, /* U+00A0 */
+  { "NotDoubleVerticalBar", "∦" }, /* U+2226 */
+  { "NotGreaterFullEqual", "≧̸" }, /* U+2267 U+0338 */
+  { "NotGreaterGreater", "≫̸" }, /* U+226B U+0338 */
+  { "NotGreaterSlantEqual", "⩾̸" }, /* U+2A7E U+0338 */
+  { "NotLeftTriangleBar", "⧏̸" }, /* U+29CF U+0338 */
+  { "NotLeftTriangleEqual", "⋬" }, /* U+22EC */
+  { "NotLessSlantEqual", "⩽̸" }, /* U+2A7D U+0338 */
+  { "NotNestedGreaterGreater", "⪢̸" }, /* U+2AA2 U+0338 */
+  { "NotNestedLessLess", "⪡̸" }, /* U+2AA1 U+0338 */
+  { "NotPrecedesEqual", "⪯̸" }, /* U+2AAF U+0338 */
+  { "NotPrecedesSlantEqual", "⋠" }, /* U+22E0 */
+  { "NotReverseElement", "∌" }, /* U+220C */
+  { "NotRightTriangle", "⋫" }, /* U+22EB */
+  { "NotRightTriangleBar", "⧐̸" }, /* U+29D0 U+0338 */
+  { "NotRightTriangleEqual", "⋭" }, /* U+22ED */
+  { "NotSquareSubsetEqual", "⋢" }, /* U+22E2 */
+  { "NotSquareSuperset", "⊐̸" }, /* U+2290 U+0338 */
+  { "NotSquareSupersetEqual", "⋣" }, /* U+22E3 */
+  { "NotSucceedsEqual", "⪰̸" }, /* U+2AB0 U+0338 */
+  { "NotSucceedsSlantEqual", "⋡" }, /* U+22E1 */
+  { "NotSucceedsTilde", "≿̸" }, /* U+227F U+0338 */
+  { "NotSupersetEqual", "⊉" }, /* U+2289 */
+  { "NotTildeFullEqual", "≇" }, /* U+2247 */
+  { "OpenCurlyDoubleQuote", "“" }, /* U+201C */
+  { "PrecedesSlantEqual", "≼" }, /* U+227C */
+  { "ReverseEquilibrium", "⇋" }, /* U+21CB */
+  { "ReverseUpEquilibrium", "⥯" }, /* U+296F */
+  { "RightAngleBracket", "⟩" }, /* U+27E9 */
+  { "RightArrowLeftArrow", "⇄" }, /* U+21C4 */
+  { "RightDoubleBracket", "⟧" }, /* U+27E7 */
+  { "RightDownTeeVector", "⥝" }, /* U+295D */
+  { "RightDownVectorBar", "⥕" }, /* U+2955 */
+  { "RightTriangleBar", "⧐" }, /* U+29D0 */
+  { "RightTriangleEqual", "⊵" }, /* U+22B5 */
+  { "RightUpDownVector", "⥏" }, /* U+294F */
+  { "RightUpTeeVector", "⥜" }, /* U+295C */
+  { "RightUpVectorBar", "⥔" }, /* U+2954 */
+  { "SquareIntersection", "⊓" }, /* U+2293 */
+  { "SquareSubsetEqual", "⊑" }, /* U+2291 */
+  { "SquareSupersetEqual", "⊒" }, /* U+2292 */
+  { "SucceedsSlantEqual", "≽" }, /* U+227D */
+  { "UnderParenthesis", "⏝" }, /* U+23DD */
+  { "UpArrowDownArrow", "⇅" }, /* U+21C5 */
+  { "VerticalSeparator", "❘" }, /* U+2758 */
+  { "blacktriangledown", "▾" }, /* U+25BE */
+  { "blacktriangleleft", "◂" }, /* U+25C2 */
+  { "blacktriangleright", "▸" }, /* U+25B8 */
+  { "circlearrowright", "↻" }, /* U+21BB */
+  { "downharpoonright", "⇂" }, /* U+21C2 */
+  { "leftrightharpoons", "⇋" }, /* U+21CB */
+  { "leftrightsquigarrow", "↭" }, /* U+21AD */
+  { "longleftrightarrow", "⟷" }, /* U+27F7 */
+  { "ntrianglerighteq", "⋭" }, /* U+22ED */
+  { "rightharpoondown", "⇁" }, /* U+21C1 */
+  { "rightleftharpoons", "⇌" }, /* U+21CC */
+  { "rightrightarrows", "⇉" }, /* U+21C9 */
+  { "twoheadleftarrow", "↞" }, /* U+219E */
+  { "twoheadrightarrow", "↠" }, /* U+21A0 */
+  { "vartriangleright", "⊳" } /* U+22B3 */
+};
+/* Lookup of a HTML5 named character entity.  */
+static const char *
+html5_lookup (string_desc_t name)
+{
+  if (sd_length (name) <= 15)
+    {
+      /* Binary search.  */
+      size_t lo = 0;
+      size_t hi = sizeof (html5short) / sizeof (html5short[0]);
+      while (lo < hi)
+        {
+          size_t mid = (lo + hi) / 2;
+          int cmp = sd_cmp (name, sd_from_c (html5short[mid].name));
+          if (cmp == 0)
+            return html5short[mid].value;
+          if (cmp < 0)
+            hi = mid;
+          else /* cmp > 0 */
+            lo = mid + 1;
+        }
+    }
+  else
+    {
+      /* Binary search.  */
+      size_t lo = 0;
+      size_t hi = sizeof (html5long) / sizeof (html5long[0]);
+      while (lo < hi)
+        {
+          size_t mid = (lo + hi) / 2;
+          int cmp = sd_cmp (name, sd_from_c (html5long[mid].name));
+          if (cmp == 0)
+            return html5long[mid].value;
+          if (cmp < 0)
+            hi = mid;
+          else /* cmp > 0 */
+            lo = mid + 1;
+        }
+    }
+  return NULL;
+}
diff --git a/gettext-tools/src/message.c b/gettext-tools/src/message.c

index 2c6d94f5bb3dfecf830d5201ebcd8f192b5ca5fc..9b61936852d0a837b32c928e20913fef78fe87dc 100644 (file)
--- a/gettext-tools/src/message.c
+++ b/gettext-tools/src/message.c
@@ -54,6 +54,7 @@ const char *const format_language[NFORMATS] =
    /* format_awk */              "awk",
    /* format_lua */              "lua",
    /* format_pascal */           "object-pascal",
+  /* format_d */                "d",
    /* format_smalltalk */        "smalltalk",
    /* format_qt */               "qt",
    /* format_qt_plursl */        "qt-plural",
@@ -91,6 +92,7 @@ const char *const format_language_pretty[NFORMATS] =
    /* format_awk */              "awk",
    /* format_lua */              "Lua",
    /* format_pascal */           "Object Pascal",
+  /* format_d */                "D",
    /* format_smalltalk */        "Smalltalk",
    /* format_qt */               "Qt",
    /* format_qt_plural */        "Qt plural",
diff --git a/gettext-tools/src/message.h b/gettext-tools/src/message.h

index dd9447d6447a9193e934bcd2a151748b137871b8..72b3d8602bb6ebf4bf83535bbefdd9655874f282 100644 (file)
--- a/gettext-tools/src/message.h
+++ b/gettext-tools/src/message.h
@@ -63,6 +63,7 @@ enum format_type
    format_awk,
    format_lua,
    format_pascal,
+  format_d,
    format_smalltalk,
    format_qt,
    format_qt_plural,
@@ -77,7 +78,7 @@ enum format_type
    format_gfc_internal,
    format_ycp
  };
-#define NFORMATS 33     /* Number of format_type enum values.  */
+#define NFORMATS 34     /* Number of format_type enum values.  */
  extern DLL_VARIABLE const char *const format_language[NFORMATS];
  extern DLL_VARIABLE const char *const format_language_pretty[NFORMATS];
  
diff --git a/gettext-tools/src/x-d.c b/gettext-tools/src/x-d.c

new file mode 100644 (file)

index 0000000..844d632
--- /dev/null
+++ b/gettext-tools/src/x-d.c
@@ -0,0 +1,1805 @@
+/* xgettext D backend.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025.  */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+/* Specification.  */
+#include "x-d.h"
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <error.h>
+#include "message.h"
+#include "string-desc.h"
+#include "xstring-desc.h"
+#include "string-buffer-reversed.h"
+#include "c-ctype.h"
+#include "html5-entities.h"
+#include "xgettext.h"
+#include "xg-pos.h"
+#include "xg-mixed-string.h"
+#include "xg-arglist-context.h"
+#include "xg-arglist-callshape.h"
+#include "xg-arglist-parser.h"
+#include "xg-message.h"
+#include "if-error.h"
+#include "xalloc.h"
+#include "read-file.h"
+#include "unistr.h"
+#include "byteswap.h"
+#include "po-charset.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+/* Use tree-sitter.
+   Documentation: <https://tree-sitter.github.io/tree-sitter/using-parsers>  */
+#include <tree_sitter/api.h>
+extern const TSLanguage *tree_sitter_d (void);
+
+
+/* The D syntax is defined in <https://dlang.org/spec/spec.html>.
+   The design principle of this language appears to be: "If there are two ways
+   to get a certain feature, find three more equivalent ways, and support all
+   five in the language."
+   Examples:
+     - There are 5 supported encodings for the source code.
+     - There are 3 supported syntaxes for comments.
+     - There are 10 supported syntaxes for string literals (not even counting
+       the interpolation expression sequences).
+     - There are 4 supported ways of including a Unicode character in a
+       double-quoted string.
+   This guarantees
+     - a steep learning curve for the junior programmers,
+     - that even senior programmers never fully master the language,
+     - that teams of developers will eternally fight over code style and
+       irrelevant details,
+     - and a high implementation complexity for the language and its runtime.
+ */
+
+#define DEBUG_D 0
+
+
+/* ====================== Keyword set customization.  ====================== */
+
+/* If true extract all strings.  */
+static bool extract_all = false;
+
+static hash_table function_keywords;
+static hash_table template_keywords;
+static bool default_keywords = true;
+
+
+void
+x_d_extract_all ()
+{
+  extract_all = true;
+}
+
+
+void
+x_d_keyword (const char *name)
+{
+  if (name == NULL)
+    default_keywords = false;
+  else
+    {
+      const char *end;
+      struct callshape shape;
+      const char *colon;
+
+      if (function_keywords.table == NULL)
+        hash_init (&function_keywords, 100);
+      if (template_keywords.table == NULL)
+        hash_init (&template_keywords, 100);
+
+      split_keywordspec (name, &end, &shape);
+
+      /* The characters between name and end should form a valid identifier,
+         possibly with a trailing '!'.
+         A colon means an invalid parse in split_keywordspec().  */
+      colon = strchr (name, ':');
+      if (colon == NULL || colon >= end)
+        {
+          if (end > name && end[-1] == '!')
+            insert_keyword_callshape (&template_keywords, name, end - 1 - name,
+                                      &shape);
+          else
+            insert_keyword_callshape (&function_keywords, name, end - name,
+                                      &shape);
+        }
+    }
+}
+
+/* Finish initializing the keywords hash table.
+   Called after argument processing, before each file is processed.  */
+static void
+init_keywords ()
+{
+  if (default_keywords)
+    {
+      /* When adding new keywords here, also update the documentation in
+         xgettext.texi!  */
+      x_d_keyword ("gettext");
+      x_d_keyword ("dgettext:2");
+      x_d_keyword ("dcgettext:2");
+      x_d_keyword ("ngettext:1,2");
+      x_d_keyword ("dngettext:2,3");
+      x_d_keyword ("dcngettext:2,3");
+      x_d_keyword ("pgettext:1c,2");
+      x_d_keyword ("dpgettext:2c,3");
+      x_d_keyword ("dcpgettext:2c,3");
+      x_d_keyword ("npgettext:1c,2,3");
+      x_d_keyword ("dnpgettext:2c,3,4");
+      x_d_keyword ("dcnpgettext:2c,3,4");
+      default_keywords = false;
+    }
+}
+
+void
+init_flag_table_d ()
+{
+  xgettext_record_flag ("gettext:1:pass-c-format");
+  xgettext_record_flag ("dgettext:2:pass-c-format");
+  xgettext_record_flag ("dcgettext:2:pass-c-format");
+  xgettext_record_flag ("ngettext:1:pass-c-format");
+  xgettext_record_flag ("ngettext:2:pass-c-format");
+  xgettext_record_flag ("dngettext:2:pass-c-format");
+  xgettext_record_flag ("dngettext:3:pass-c-format");
+  xgettext_record_flag ("dcngettext:2:pass-c-format");
+  xgettext_record_flag ("dcngettext:3:pass-c-format");
+  xgettext_record_flag ("pgettext:2:pass-c-format");
+  xgettext_record_flag ("dpgettext:3:pass-c-format");
+  xgettext_record_flag ("dcpgettext:3:pass-c-format");
+  xgettext_record_flag ("npgettext:2:pass-c-format");
+  xgettext_record_flag ("npgettext:3:pass-c-format");
+  xgettext_record_flag ("dnpgettext:3:pass-c-format");
+  xgettext_record_flag ("dnpgettext:4:pass-c-format");
+  xgettext_record_flag ("dcnpgettext:3:pass-c-format");
+  xgettext_record_flag ("dcnpgettext:4:pass-c-format");
+  xgettext_record_flag ("gettext:1:pass-d-format");
+  xgettext_record_flag ("dgettext:2:pass-d-format");
+  xgettext_record_flag ("dcgettext:2:pass-d-format");
+  xgettext_record_flag ("ngettext:1:pass-d-format");
+  xgettext_record_flag ("ngettext:2:pass-d-format");
+  xgettext_record_flag ("dngettext:2:pass-d-format");
+  xgettext_record_flag ("dngettext:3:pass-d-format");
+  xgettext_record_flag ("dcngettext:2:pass-d-format");
+  xgettext_record_flag ("dcngettext:3:pass-d-format");
+  xgettext_record_flag ("pgettext:2:pass-d-format");
+  xgettext_record_flag ("dpgettext:3:pass-d-format");
+  xgettext_record_flag ("dcpgettext:3:pass-d-format");
+  xgettext_record_flag ("npgettext:2:pass-d-format");
+  xgettext_record_flag ("npgettext:3:pass-d-format");
+  xgettext_record_flag ("dnpgettext:3:pass-d-format");
+  xgettext_record_flag ("dnpgettext:4:pass-d-format");
+  xgettext_record_flag ("dcnpgettext:3:pass-d-format");
+  xgettext_record_flag ("dcnpgettext:4:pass-d-format");
+
+  /* Module core.stdc.stdio
+     <https://dlang.org/library/core/stdc/stdio.html>  */
+  xgettext_record_flag ("fprintf:2:c-format");
+  xgettext_record_flag ("vfprintf:2:c-format");
+  xgettext_record_flag ("printf:1:c-format");
+  xgettext_record_flag ("vprintf:1:c-format");
+  xgettext_record_flag ("sprintf:2:c-format");
+  xgettext_record_flag ("vsprintf:2:c-format");
+  xgettext_record_flag ("snprintf:3:c-format");
+  xgettext_record_flag ("vsnprintf:3:c-format");
+
+  /* Module std.format
+     <https://dlang.org/library/std/format.html>  */
+  xgettext_record_flag ("format:1:d-format");
+  xgettext_record_flag ("sformat:2:d-format");
+}
+
+
+/* ======================== Parsing via tree-sitter. ======================== */
+/* To understand this code, look at
+     tree-sitter-d/src/node-types.json
+   and
+     tree-sitter-d/src/grammar.json
+ */
+
+/* The tree-sitter's language object.  */
+static const TSLanguage *ts_language;
+
+/* ------------------------- Node types and symbols ------------------------- */
+
+static TSSymbol
+ts_language_symbol (const char *name, bool is_named)
+{
+  TSSymbol result =
+    ts_language_symbol_for_name (ts_language, name, strlen (name), is_named);
+  if (result == 0)
+    /* If we get here, the grammar has evolved in an incompatible way.  */
+    abort ();
+  return result;
+}
+
+MAYBE_UNUSED static TSFieldId
+ts_language_field (const char *name)
+{
+  TSFieldId result =
+    ts_language_field_id_for_name (ts_language, name, strlen (name));
+  if (result == 0)
+    /* If we get here, the grammar has evolved in an incompatible way.  */
+    abort ();
+  return result;
+}
+
+/* Optimization:
+   Instead of
+     strcmp (ts_node_type (node), "string_literal") == 0
+   it is faster to do
+     ts_node_symbol (node) == ts_symbol_string_literal
+ */
+static TSSymbol ts_symbol_comment;
+static TSSymbol ts_symbol_string_literal;
+static TSSymbol ts_symbol_quoted_string;
+static TSSymbol ts_symbol_escape_sequence;
+static TSSymbol ts_symbol_htmlentity;
+static TSSymbol ts_symbol_raw_string;
+static TSSymbol ts_symbol_hex_string;
+static TSSymbol ts_symbol_binary_expression;
+static TSSymbol ts_symbol_add_expression;
+static TSSymbol ts_symbol_expression;
+static TSSymbol ts_symbol_identifier;
+static TSSymbol ts_symbol_property_expression;
+static TSSymbol ts_symbol_call_expression;
+static TSSymbol ts_symbol_named_arguments;
+static TSSymbol ts_symbol_named_argument;
+static TSSymbol ts_symbol_template_instance;
+static TSSymbol ts_symbol_template_arguments;
+static TSSymbol ts_symbol_template_argument;
+static TSSymbol ts_symbol_unittest_declaration;
+static TSSymbol ts_symbol_tilde; /* ~ */
+
+static inline size_t
+ts_node_line_number (TSNode node)
+{
+  return ts_node_start_point (node).row + 1;
+}
+
+/* -------------------------------- The file -------------------------------- */
+
+/* The entire contents of the file being analyzed.  */
+static const char *contents;
+
+/* -------------------------------- Comments -------------------------------- */
+
+/* These are for tracking whether comments count as immediately before
+   keyword.  */
+static int last_comment_line;
+static int last_non_comment_line;
+
+/* Saves a comment line.  */
+static void save_comment_line (string_desc_t gist)
+{
+  /* Remove leading whitespace.  */
+  while (sd_length (gist) > 0
+         && (sd_char_at (gist, 0) == ' '
+             || sd_char_at (gist, 0) == '\t'))
+    gist = sd_substring (gist, 1, sd_length (gist));
+  /* Remove trailing whitespace.  */
+  size_t len = sd_length (gist);
+  while (len > 0
+         && (sd_char_at (gist, len - 1) == ' '
+             || sd_char_at (gist, len - 1) == '\t'))
+    len--;
+  gist = sd_substring (gist, 0, len);
+  savable_comment_add (sd_c (gist));
+}
+
+/* Does the comment handling for NODE.
+   Updates savable_comment, last_comment_line, last_non_comment_line.
+   It is important that this function gets called
+     - for each node (not only the named nodes!),
+     - in depth-first traversal order.  */
+static void handle_comments (TSNode node)
+{
+  #if DEBUG_D && 0
+  fprintf (stderr, "LCL=%d LNCL=%d node=[%s]|%s|\n", last_comment_line, last_non_comment_line, ts_node_type (node), ts_node_string (node));
+  #endif
+  if (last_comment_line < last_non_comment_line
+      && last_non_comment_line < ts_node_line_number (node))
+    /* We have skipped over a newline.  This newline terminated a line
+       with non-comment tokens, after the last comment line.  */
+    savable_comment_reset ();
+
+  if (ts_node_symbol (node) == ts_symbol_comment)
+    {
+      string_desc_t entire =
+        sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+                     (char *) contents + ts_node_start_byte (node));
+      /* It should either start with two slashes...  */
+      if (sd_length (entire) >= 2
+          && sd_char_at (entire, 0) == '/'
+          && sd_char_at (entire, 1) == '/')
+        {
+          save_comment_line (sd_substring (entire, 2, sd_length (entire)));
+          last_comment_line = ts_node_end_point (node).row + 1;
+        }
+      /* ... or it should start and end with the C comment markers or
+         with the D nested comment markers.  */
+      else if (sd_length (entire) >= 4
+               && sd_char_at (entire, 0) == '/'
+               && ((sd_char_at (entire, 1) == '*'
+                    && sd_char_at (entire, sd_length (entire) - 2) == '*')
+                   || (sd_char_at (entire, 1) == '+'
+                       && sd_char_at (entire, sd_length (entire) - 2) == '+'))
+               && sd_char_at (entire, sd_length (entire) - 1) == '/')
+        {
+          string_desc_t gist = sd_substring (entire, 2, sd_length (entire) - 2);
+          /* Split into lines.
+             Remove leading and trailing whitespace from each line.  */
+          for (;;)
+            {
+              ptrdiff_t nl_index = sd_index (gist, '\n');
+              if (nl_index >= 0)
+                {
+                  save_comment_line (sd_substring (gist, 0, nl_index));
+                  gist = sd_substring (gist, nl_index + 1, sd_length (gist));
+                }
+              else
+                {
+                  save_comment_line (gist);
+                  break;
+                }
+            }
+          last_comment_line = ts_node_end_point (node).row + 1;
+        }
+      else
+        abort ();
+    }
+  else
+    last_non_comment_line = ts_node_line_number (node);
+}
+
+/* ---------------------------- String literals ---------------------------- */
+
+/* Determines whether NODE represents a string literal or the concatenation
+   of string literals (via the '+' operator).  */
+static bool
+is_string_literal (TSNode node)
+{
+ start:
+  if (ts_node_symbol (node) == ts_symbol_string_literal)
+    {
+      string_desc_t node_contents =
+        sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+                     (char *) contents + ts_node_start_byte (node));
+      #if DEBUG_D && 0
+      fprintf (stderr, "[%s]|%s|%.*s|\n", ts_node_type (node), ts_node_string (node), (int) sd_length (node_contents), sd_data (node_contents));
+      #if 0
+      uint32_t count = ts_node_child_count (node);
+      uint32_t i;
+      for (i = 0; i < count; i++)
+        {
+          TSNode subnode = ts_node_named_child (node, i);
+          string_desc_t subnode_contents =
+            sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+                         (char *) contents + ts_node_start_byte (subnode));
+          fprintf (stderr, "%u -> [%s]|%s|%.*s|\n", i, ts_node_type (subnode), ts_node_string (subnode), (int) sd_length (subnode_contents), sd_data (subnode_contents));
+          uint32_t count2 = ts_node_child_count (subnode);
+          uint32_t j;
+          for (j = 0; j < count2; j++)
+            {
+              fprintf (stderr, "%u %u -> [%s]|%s|\n", i, j, ts_node_type (ts_node_child (subnode, j)), ts_node_string (ts_node_child (subnode, j)));
+            }
+        }
+      #endif
+      #endif
+      /* tree-sitter-d does not do a good job of dissecting the string literal
+         into its constituents.  Therefore we have to look at the node's entire
+         contents and dissect ourselves.  */
+      /* Interpolation expression sequences look like string literals but are
+         not, since they need a '.text' call to convert to string.  */
+      if (sd_char_at (node_contents, 0) == 'i')
+        return false;
+      /* We only want string literals with 'char' elements, not 'wchar' or
+         'dchar'.  */
+      if (sd_char_at (node_contents, sd_length (node_contents) - 1) == 'w'
+          || sd_char_at (node_contents, sd_length (node_contents) - 1) == 'd')
+        return false;
+      return true;
+    }
+  if (ts_node_symbol (node) == ts_symbol_binary_expression
+      && ts_node_child_count (node) == 1)
+    {
+      TSNode subnode = ts_node_child (node, 0);
+      if (ts_node_symbol (subnode) == ts_symbol_add_expression
+          && ts_node_child_count (subnode) == 3
+          && ts_node_symbol (ts_node_child (subnode, 1)) == ts_symbol_tilde
+          /* Recurse into the left and right subnodes.  */
+          && is_string_literal (ts_node_child (subnode, 2)))
+        {
+          /*return is_string_literal (ts_node_child (subnode, 0));*/
+          node = ts_node_child (subnode, 0);
+          goto start;
+        }
+    }
+  if (ts_node_symbol (node) == ts_symbol_expression
+      && ts_node_named_child_count (node) == 1)
+    {
+      TSNode subnode = ts_node_named_child (node, 0);
+      /* Recurse.  */
+      /*return is_string_literal (subnode);*/
+      node = subnode;
+      goto start;
+    }
+  return false;
+}
+
+/* Prepends the string literal pieces from NODE to BUFFER.  */
+static void
+string_literal_accumulate_pieces (TSNode node,
+                                  struct string_buffer_reversed *buffer)
+{
+ start:
+  if (ts_node_symbol (node) == ts_symbol_string_literal)
+    {
+      /* tree-sitter-d does not do a good job of dissecting the string literal
+         into its constituents.  Therefore we have to look at the node's entire
+         contents and dissect ourselves.  The only help we get is the list of
+         escape sequences in a double-quoted string literal:
+         (string_literal (quoted_string (escape_sequence) ... (escape_sequence)))
+       */
+      string_desc_t node_contents =
+        sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+                     (char *) contents + ts_node_start_byte (node));
+      #if DEBUG_D && 0
+      fprintf (stderr, "[%s]|%s|%.*s|\n", ts_node_type (node), ts_node_string (node), (int) sd_length (node_contents), sd_data (node_contents));
+      #endif
+      /* Drop StringPostfix.  */
+      if (sd_length (node_contents) >= 1
+          && sd_char_at (node_contents, sd_length (node_contents) - 1) == 'c')
+        node_contents = sd_substring (node_contents, 0, sd_length (node_contents) - 1);
+      /* Distinguish the various cases.  */
+      if (sd_length (node_contents) >= 2
+          && sd_char_at (node_contents, 0) == '"'
+          && sd_char_at (node_contents, sd_length (node_contents) - 1) == '"')
+        {
+          /* A double-quoted string.  */
+          if (ts_node_child_count (node) != 1)
+            abort ();
+          TSNode subnode = ts_node_child (node, 0);
+          if (ts_node_symbol (subnode) != ts_symbol_quoted_string)
+            abort ();
+          node_contents = sd_substring (node_contents, 1, sd_length (node_contents) - 1);
+          const char *ptr = sd_data (node_contents) + sd_length (node_contents);
+          /* Iterate through the nodes of type escape_sequence under the subnode.  */
+          uint32_t count = ts_node_named_child_count (subnode);
+          uint32_t i;
+          for (i = count; i > 0; )
+            {
+              i--;
+              TSNode escnode = ts_node_named_child (subnode, i);
+              if (ts_node_symbol (escnode) == ts_symbol_escape_sequence
+                  || ts_node_symbol (escnode) == ts_symbol_htmlentity)
+                {
+                  const char *escape_start = contents + ts_node_start_byte (escnode);
+                  const char *escape_end = contents + ts_node_end_byte (escnode);
+                  if (escape_end < ptr)
+                    sbr_xprepend_desc (buffer, sd_new_addr (ptr - escape_end, (char *) escape_end));
+
+                  /* The escape sequence must start with a backslash.  */
+                  if (!(escape_end - escape_start >= 2 && escape_start[0] == '\\'))
+                    abort ();
+                  /* tree-sitter's grammar.js allows more escape sequences than the
+                     specification.  Give a warning for the invalid cases.  */
+                  bool invalid = false;
+                  if (escape_end - escape_start == 2)
+                    {
+                      switch (escape_start[1])
+                        {
+                        case '\'':
+                        case '"':
+                        case '?':
+                        case '\\':
+                          sbr_xprepend1 (buffer, escape_start[1]);
+                          break;
+                        case '0': case '1': case '2': case '3':
+                        case '4': case '5': case '6': case '7':
+                          sbr_xprepend1 (buffer, escape_start[1] - '0');
+                          break;
+                        case 'a':
+                          sbr_xprepend1 (buffer, 0x07);
+                          break;
+                        case 'b':
+                          sbr_xprepend1 (buffer, 0x08);
+                          break;
+                        case 'f':
+                          sbr_xprepend1 (buffer, 0x0C);
+                          break;
+                        case 'n':
+                          sbr_xprepend1 (buffer, '\n');
+                          break;
+                        case 'r':
+                          sbr_xprepend1 (buffer, '\r');
+                          break;
+                        case 't':
+                          sbr_xprepend1 (buffer, '\t');
+                          break;
+                        case 'v':
+                          sbr_xprepend1 (buffer, 0x0B);
+                          break;
+                        default:
+                          invalid = true;
+                          break;
+                        }
+                    }
+                  else if (escape_start[1] >= '0' && escape_start[1] <= '7')
+                    {
+                      unsigned int value = 0;
+                      /* Only up to 3 octal digits are accepted.  */
+                      if (escape_end - escape_start <= 1 + 3)
+                        {
+                          const char *p;
+                          for (p = escape_start + 1; p < escape_end; p++)
+                            {
+                              /* No overflow is possible.  */
+                              char c = *p;
+                              if (c >= '0' && c <= '7')
+                                value = (value << 3) + (c - '0');
+                              else
+                                invalid = true;
+                            }
+                          if (value > 0xFF)
+                            invalid = true;
+                        }
+                      if (!invalid)
+                        sbr_xprepend1 (buffer, (unsigned char) value);
+                    }
+                  else if ((escape_start[1] == 'x' && escape_end - escape_start == 2 + 2)
+                           || (escape_start[1] == 'u' && escape_end - escape_start == 2 + 4)
+                           || (escape_start[1] == 'U' && escape_end - escape_start == 2 + 8))
+                    {
+                      unsigned int value = 0;
+                      const char *p;
+                      for (p = escape_start + 2; p < escape_end; p++)
+                        {
+                          /* No overflow is possible.  */
+                          char c = *p;
+                          if (c >= '0' && c <= '9')
+                            value = (value << 4) + (c - '0');
+                          else if (c >= 'A' && c <= 'Z')
+                            value = (value << 4) + (c - 'A' + 10);
+                          else if (c >= 'a' && c <= 'z')
+                            value = (value << 4) + (c - 'a' + 10);
+                          else
+                            invalid = true;
+                        }
+                      if (escape_start[1] == 'x')
+                        {
+                          if (!invalid)
+                            sbr_xprepend1 (buffer, (unsigned char) value);
+                        }
+                      else if (value < 0x110000 && !(value >= 0xD800 && value < 0xE000))
+                        {
+                          uint8_t buf[6];
+                          int n = u8_uctomb (buf, value, sizeof (buf));
+                          if (!(n > 0))
+                            abort ();
+                          sbr_xprepend_desc (buffer, sd_new_addr (n, (char *) buf));
+                        }
+                      else
+                        invalid = true;
+                    }
+                  else if (escape_start[1] == '&' && escape_end[-1] == ';')
+                    {
+                      /* A named character entity.  */
+                      string_desc_t entity =
+                        sd_new_addr (escape_end - escape_start - 3, (char *) escape_start + 2);
+                      const char *value = html5_lookup (entity);
+                      if (value != NULL)
+                        sbr_xprepend_c (buffer, value);
+                      else
+                        invalid = true;
+                    }
+                  else
+                    invalid = true;
+                  if (invalid)
+                    {
+                      size_t line_number = ts_node_line_number (escnode);
+                      if_error (IF_SEVERITY_WARNING,
+                                logical_file_name, line_number, (size_t)(-1), false,
+                                _("invalid escape sequence in string"));
+                    }
+
+                  ptr = escape_start;
+                }
+              else
+                abort ();
+            }
+          sbr_xprepend_desc (buffer, sd_substring (node_contents, 0, ptr - sd_data (node_contents)));
+        }
+      else if (sd_length (node_contents) >= 3
+               && sd_char_at (node_contents, 0) == 'x'
+               && sd_char_at (node_contents, 1) == '"'
+               && sd_char_at (node_contents, sd_length (node_contents) - 1) == '"')
+        {
+          /* A hex string.  */
+          if (ts_node_child_count (node) != 1)
+            abort ();
+          TSNode subnode = ts_node_child (node, 0);
+          if (ts_node_symbol (subnode) != ts_symbol_hex_string)
+            abort ();
+          node_contents = sd_substring (node_contents, 2, sd_length (node_contents) - 1);
+          int shift = 0;
+          int value = 0;
+          ptrdiff_t i;
+          for (i = sd_length (node_contents) - 1; i >= 0; i--)
+            {
+              char c = sd_char_at (node_contents, i);
+              if (c >= '0' && c <= '9')
+                {
+                  value += (c - '0') << shift;
+                  shift += 4;
+                }
+              else if (c >= 'A' && c <= 'F')
+                {
+                  value += (c - 'A' + 10) << shift;
+                  shift += 4;
+                }
+              else if (c >= 'a' && c <= 'f')
+                {
+                  value += (c - 'a' + 10) << shift;
+                  shift += 4;
+                }
+              if (shift == 8)
+                {
+                  sbr_xprepend1 (buffer, value);
+                  value = 0;
+                  shift = 0;
+                }
+            }
+          /* If shift == 4 here, there was an odd number of hex digits.  */
+        }
+      else
+        {
+          /* A raw string, delimited string, or token string.  */
+          if (sd_char_at (node_contents, 0) == 'q')
+            {
+              if (sd_length (node_contents) >= 3
+                  && sd_char_at (node_contents, 1) == '{'
+                  && sd_char_at (node_contents, sd_length (node_contents) - 1) == '}')
+                /* A token string.  */
+                node_contents = sd_substring (node_contents, 2, sd_length (node_contents) - 1);
+              else if (sd_length (node_contents) >= 3
+                       && sd_char_at (node_contents, 1) == '"'
+                       && sd_char_at (node_contents, sd_length (node_contents) - 1) == '"')
+                {
+                  /* A delimited string.  */
+                  node_contents = sd_substring (node_contents, 2, sd_length (node_contents) - 1);
+                  if (sd_length (node_contents) >= 2
+                      && ((sd_char_at (node_contents, 0) == '('
+                           && sd_char_at (node_contents, sd_length (node_contents) - 1) == ')')
+                          || (sd_char_at (node_contents, 0) == '['
+                              && sd_char_at (node_contents, sd_length (node_contents) - 1) == ']')
+                          || (sd_char_at (node_contents, 0) == '{'
+                              && sd_char_at (node_contents, sd_length (node_contents) - 1) == '}')
+                          || (sd_char_at (node_contents, 0) == '<'
+                              && sd_char_at (node_contents, sd_length (node_contents) - 1) == '>')
+                          || (sd_char_at (node_contents, 0) == sd_char_at (node_contents, sd_length (node_contents) - 1)
+                              && !(c_isalpha (sd_char_at (node_contents, 0)) || sd_char_at (node_contents, 0) == '_'))))
+                    node_contents = sd_substring (node_contents, 1, sd_length (node_contents) - 1);
+                  else
+                    {
+                      ptrdiff_t first_newline = sd_index (node_contents, '\n');
+                      if (first_newline < 0)
+                        abort ();
+                      ptrdiff_t last_newline = sd_last_index (node_contents, '\n');
+                      if (last_newline < 0)
+                        abort ();
+                      string_desc_t delimiter = sd_substring (node_contents, last_newline + 1, sd_length (node_contents));
+                      size_t delimiter_length = sd_length (delimiter);
+                      if (delimiter_length == 0)
+                        abort ();
+                      if (!((first_newline == delimiter_length
+                             || (first_newline == delimiter_length + 1
+                                 && sd_char_at (node_contents, delimiter_length) == '\r'))
+                            && sd_equals (sd_substring (node_contents, 0, delimiter_length), delimiter)))
+                        abort ();
+                      node_contents = sd_substring (node_contents, first_newline + 1, last_newline + 1);
+                    }
+                }
+              else
+                abort ();
+            }
+          else if (sd_length (node_contents) >= 3
+                   && sd_char_at (node_contents, 0) == 'r'
+                   && sd_char_at (node_contents, 1) == '"'
+                   && sd_char_at (node_contents, sd_length (node_contents) - 1) == '"')
+            /* A raw string.  */
+            node_contents = sd_substring (node_contents, 2, sd_length (node_contents) - 1);
+          else if (sd_length (node_contents) >= 2
+                   && sd_char_at (node_contents, 0) == '`'
+                   && sd_char_at (node_contents, sd_length (node_contents) - 1) == '`')
+            /* A raw string.  */
+            node_contents = sd_substring (node_contents, 1, sd_length (node_contents) - 1);
+          else
+            abort ();
+
+          sbr_xprepend_desc (buffer, node_contents);
+        }
+    }
+  else if (ts_node_symbol (node) == ts_symbol_binary_expression
+           && ts_node_child_count (node) == 1)
+    {
+      TSNode subnode = ts_node_child (node, 0);
+      if (ts_node_symbol (subnode) == ts_symbol_add_expression
+          && ts_node_child_count (subnode) == 3
+          && ts_node_symbol (ts_node_child (subnode, 1)) == ts_symbol_tilde)
+        {
+          /* Recurse into the left and right subnodes.  */
+          string_literal_accumulate_pieces (ts_node_child (subnode, 2), buffer);
+          /*string_literal_accumulate_pieces (ts_node_child (subnode, 0), buffer);*/
+          node = ts_node_child (subnode, 0);
+          goto start;
+        }
+      else
+        abort ();
+    }
+  else if (ts_node_symbol (node) == ts_symbol_expression
+           && ts_node_named_child_count (node) == 1)
+    {
+      TSNode subnode = ts_node_named_child (node, 0);
+      /* Recurse.  */
+      /*string_literal_accumulate_pieces (subnode, buffer);*/
+      node = subnode;
+      goto start;
+    }
+  else
+    abort ();
+}
+
+/* Combines the pieces of a string or template_string or concatenated
+   string literal.
+   Returns a freshly allocated, mostly UTF-8 encoded string.  */
+static char *
+string_literal_value (TSNode node)
+{
+  struct string_buffer_reversed buffer;
+  sbr_init (&buffer);
+  string_literal_accumulate_pieces (node, &buffer);
+  return sbr_xdupfree_c (&buffer);
+}
+
+/* --------------------- Parsing and string extraction --------------------- */
+
+/* Context lookup table.  */
+static flag_context_list_table_ty *flag_context_list_table;
+
+/* Maximum supported nesting depth.  */
+#define MAX_NESTING_DEPTH 1000
+
+static int nesting_depth;
+
+/* The file is parsed into an abstract syntax tree.  Scan the syntax tree,
+   looking for a keyword in identifier position of a call_expression or
+   macro_invocation, followed by followed by a string among the arguments.
+   When we see this pattern, we have something to remember.
+
+     Normal handling: Look for
+       keyword ( ... msgid ... )
+     Plural handling: Look for
+       keyword ( ... msgid ... msgid_plural ... )
+
+   We handle macro_invocation separately from call_expression, because in
+   a macro_invocation spaces are allowed between the identifier and the '!'
+   (i.e. 'println !' is as valid as 'println!').  Looking for 'println!'
+   would make the code more complicated.
+
+   We use recursion because the arguments before msgid or between msgid
+   and msgid_plural can contain subexpressions of the same form.  */
+
+/* Forward declarations.  */
+static void extract_from_node (TSNode node,
+                               bool ignore,
+                               bool callee_in_call_expression,
+                               flag_region_ty *outer_region,
+                               message_list_ty *mlp);
+
+/* Extracts messages from the function call NODE consisting of
+     - CALLEE_NODE: a tree node of type 'identifier' or 'property_expression',
+     - ARGS_NODE: a tree node of type 'named_arguments'.
+   Extracted messages are added to MLP.  */
+static void
+extract_from_function_call (TSNode node,
+                            TSNode callee_node,
+                            TSNode args_node,
+                            flag_region_ty *outer_region,
+                            message_list_ty *mlp)
+{
+  uint32_t args_count = ts_node_child_count (args_node);
+
+  TSNode function_node;
+  if (ts_node_symbol (callee_node) == ts_symbol_identifier)
+    function_node = callee_node;
+  else if (ts_node_symbol (callee_node) == ts_symbol_property_expression)
+    function_node = ts_node_child (callee_node, ts_node_child_count (callee_node) - 1);
+  else
+    abort ();
+
+  flag_context_list_iterator_ty next_context_iter;
+
+  if (ts_node_symbol (function_node) == ts_symbol_identifier)
+    {
+      string_desc_t function_name =
+        sd_new_addr (ts_node_end_byte (function_node) - ts_node_start_byte (function_node),
+                     (char *) contents + ts_node_start_byte (function_node));
+
+      /* Context iterator.  */
+      next_context_iter =
+        flag_context_list_iterator (
+          flag_context_list_table_lookup (
+            flag_context_list_table,
+            sd_data (function_name), sd_length (function_name)));
+
+      void *keyword_value;
+      if (hash_find_entry (&function_keywords,
+                           sd_data (function_name), sd_length (function_name),
+                           &keyword_value)
+          == 0)
+        {
+          /* The callee has some information associated with it.  */
+          const struct callshapes *next_shapes = keyword_value;
+
+          /* We have a function, named by a relevant identifier, with an argument
+             list.  */
+
+          struct arglist_parser *argparser =
+            arglist_parser_alloc (mlp, next_shapes);
+
+          /* Current argument number.  */
+          uint32_t arg = 0;
+
+          /* The first part of the 'property_expression' is treated as the first
+             argument.  Cf. <https://dlang.org/spec/function.html#pseudo-member>  */
+          if (ts_node_symbol (callee_node) == ts_symbol_property_expression)
+            {
+              arg++;
+              flag_region_ty *arg_region =
+                inheriting_region (outer_region,
+                                   flag_context_list_iterator_advance (
+                                     &next_context_iter));
+
+              bool already_extracted = false;
+              TSNode arg_expr_node = ts_node_child (callee_node, 0);
+              if (is_string_literal (arg_expr_node))
+                {
+                  lex_pos_ty pos;
+                  pos.file_name = logical_file_name;
+                  pos.line_number = ts_node_line_number (arg_expr_node);
+
+                  char *string = string_literal_value (arg_expr_node);
+
+                  if (extract_all)
+                    {
+                      remember_a_message (mlp, NULL, string, true, false,
+                                          arg_region, &pos,
+                                          NULL, savable_comment, true);
+                      already_extracted = true;
+                    }
+                  else
+                    {
+                      mixed_string_ty *mixed_string =
+                        mixed_string_alloc_utf8 (string, lc_string,
+                                                 pos.file_name, pos.line_number);
+                      arglist_parser_remember (argparser, arg, mixed_string,
+                                               arg_region,
+                                               pos.file_name, pos.line_number,
+                                               savable_comment, true);
+                    }
+                }
+
+              if (!already_extracted)
+                {
+                  if (++nesting_depth > MAX_NESTING_DEPTH)
+                    if_error (IF_SEVERITY_FATAL_ERROR,
+                              logical_file_name, ts_node_line_number (arg_expr_node), (size_t)(-1), false,
+                              _("too many open parentheses, brackets, or braces"));
+                  extract_from_node (arg_expr_node,
+                                     false,
+                                     false,
+                                     arg_region,
+                                     mlp);
+                  nesting_depth--;
+                }
+
+              {
+                /* Handle the potential comments in the callee_node, between
+                   arg_expr_node and function_node.  */
+                uint32_t count = ts_node_child_count (callee_node);
+                uint32_t i;
+                for (i = 1; i < count; i++)
+                  {
+                    TSNode subnode = ts_node_child (callee_node, i);
+                    if (ts_node_eq (subnode, function_node))
+                      break;
+                    handle_comments (subnode);
+                  }
+              }
+
+              unref_region (arg_region);
+            }
+
+          /* Handle the potential comments in node, between
+             callee_node and args_node.  */
+          {
+            uint32_t count = ts_node_child_count (node);
+            uint32_t i;
+            for (i = 1; i < count; i++)
+              {
+                TSNode subnode = ts_node_child (node, i);
+                if (ts_node_eq (subnode, args_node))
+                  break;
+                handle_comments (subnode);
+              }
+          }
+
+          uint32_t i;
+          for (i = 0; i < args_count; i++)
+            {
+              TSNode arg_node = ts_node_child (args_node, i);
+              handle_comments (arg_node);
+              if (ts_node_is_named (arg_node)
+                  && ts_node_symbol (arg_node) != ts_symbol_comment)
+                {
+                  if (ts_node_symbol (arg_node) != ts_symbol_named_argument)
+                    abort ();
+                  arg++;
+                  flag_region_ty *arg_region =
+                    inheriting_region (outer_region,
+                                       flag_context_list_iterator_advance (
+                                         &next_context_iter));
+
+                  bool already_extracted = false;
+                  if (ts_node_child_count (arg_node) == 1)
+                    {
+                      TSNode arg_expr_node = ts_node_child (arg_node, 0);
+                      if (is_string_literal (arg_expr_node))
+                        {
+                          lex_pos_ty pos;
+                          pos.file_name = logical_file_name;
+                          pos.line_number = ts_node_line_number (arg_expr_node);
+
+                          char *string = string_literal_value (arg_expr_node);
+
+                          if (extract_all)
+                            {
+                              remember_a_message (mlp, NULL, string, true, false,
+                                                  arg_region, &pos,
+                                                  NULL, savable_comment, true);
+                              already_extracted = true;
+                            }
+                          else
+                            {
+                              mixed_string_ty *mixed_string =
+                                mixed_string_alloc_utf8 (string, lc_string,
+                                                         pos.file_name, pos.line_number);
+                              arglist_parser_remember (argparser, arg, mixed_string,
+                                                       arg_region,
+                                                       pos.file_name, pos.line_number,
+                                                       savable_comment, true);
+                            }
+                        }
+                    }
+
+                  if (!already_extracted)
+                    {
+                      if (++nesting_depth > MAX_NESTING_DEPTH)
+                        if_error (IF_SEVERITY_FATAL_ERROR,
+                                  logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                                  _("too many open parentheses, brackets, or braces"));
+                      extract_from_node (arg_node,
+                                         false,
+                                         false,
+                                         arg_region,
+                                         mlp);
+                      nesting_depth--;
+                    }
+
+                  unref_region (arg_region);
+                }
+            }
+          arglist_parser_done (argparser, arg);
+          return;
+        }
+    }
+  else
+    next_context_iter = null_context_list_iterator;
+
+  /* Recurse.  */
+
+  /* Current argument number.  */
+  MAYBE_UNUSED uint32_t arg = 0;
+
+  /* The first part of the 'property_expression' is treated as the first
+     argument.  Cf. <https://dlang.org/spec/function.html#pseudo-member>  */
+  if (ts_node_symbol (callee_node) == ts_symbol_property_expression)
+    {
+      arg++;
+      flag_region_ty *arg_region =
+        inheriting_region (outer_region,
+                           flag_context_list_iterator_advance (
+                             &next_context_iter));
+      TSNode arg_expr_node = ts_node_child (callee_node, 0);
+
+      if (++nesting_depth > MAX_NESTING_DEPTH)
+        if_error (IF_SEVERITY_FATAL_ERROR,
+                  logical_file_name, ts_node_line_number (arg_expr_node), (size_t)(-1), false,
+                  _("too many open parentheses, brackets, or braces"));
+      extract_from_node (arg_expr_node,
+                         false,
+                         false,
+                         arg_region,
+                         mlp);
+      nesting_depth--;
+
+      {
+        /* Handle the potential comments in the callee_node, between
+           arg_expr_node and function_node.  */
+        uint32_t count = ts_node_child_count (callee_node);
+        uint32_t i;
+        for (i = 1; i < count; i++)
+          {
+            TSNode subnode = ts_node_child (callee_node, i);
+            if (ts_node_eq (subnode, function_node))
+              break;
+            handle_comments (subnode);
+          }
+      }
+
+      unref_region (arg_region);
+    }
+
+  /* Handle the potential comments in node, between
+     callee_node and args_node.  */
+  {
+    uint32_t count = ts_node_child_count (node);
+    uint32_t i;
+    for (i = 1; i < count; i++)
+      {
+        TSNode subnode = ts_node_child (node, i);
+        if (ts_node_eq (subnode, args_node))
+          break;
+        handle_comments (subnode);
+      }
+  }
+
+  uint32_t i;
+  for (i = 0; i < args_count; i++)
+    {
+      TSNode arg_node = ts_node_child (args_node, i);
+      handle_comments (arg_node);
+      if (ts_node_is_named (arg_node)
+          && ts_node_symbol (arg_node) != ts_symbol_comment)
+        {
+          arg++;
+          flag_region_ty *arg_region =
+            inheriting_region (outer_region,
+                               flag_context_list_iterator_advance (
+                                 &next_context_iter));
+
+          if (++nesting_depth > MAX_NESTING_DEPTH)
+            if_error (IF_SEVERITY_FATAL_ERROR,
+                      logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                      _("too many open parentheses, brackets, or braces"));
+          extract_from_node (arg_node,
+                             false,
+                             false,
+                             arg_region,
+                             mlp);
+          nesting_depth--;
+
+          unref_region (arg_region);
+        }
+    }
+}
+
+/* Extracts messages from the function call consisting of
+     - CALLEE_NODE: a tree node of type 'property_expression'.
+   Extracted messages are added to MLP.  */
+static void
+extract_from_function_call_without_args (TSNode callee_node,
+                                         flag_region_ty *outer_region,
+                                         message_list_ty *mlp)
+{
+  TSNode function_node = ts_node_child (callee_node, ts_node_child_count (callee_node) - 1);
+
+  flag_context_list_iterator_ty next_context_iter;
+
+  if (ts_node_symbol (function_node) == ts_symbol_identifier)
+    {
+      string_desc_t function_name =
+        sd_new_addr (ts_node_end_byte (function_node) - ts_node_start_byte (function_node),
+                     (char *) contents + ts_node_start_byte (function_node));
+
+      /* Context iterator.  */
+      next_context_iter =
+        flag_context_list_iterator (
+          flag_context_list_table_lookup (
+            flag_context_list_table,
+            sd_data (function_name), sd_length (function_name)));
+
+      void *keyword_value;
+      if (hash_find_entry (&function_keywords,
+                           sd_data (function_name), sd_length (function_name),
+                           &keyword_value)
+          == 0)
+        {
+          /* The callee has some information associated with it.  */
+          const struct callshapes *next_shapes = keyword_value;
+
+          /* We have a function, named by a relevant identifier, with an implicit
+             argument list.  */
+
+          struct arglist_parser *argparser =
+            arglist_parser_alloc (mlp, next_shapes);
+
+          /* Current argument number.  */
+          uint32_t arg = 0;
+
+          /* The first part of the 'property_expression' is treated as the first
+             argument.  Cf. <https://dlang.org/spec/function.html#pseudo-member>  */
+          arg++;
+          flag_region_ty *arg_region =
+            inheriting_region (outer_region,
+                               flag_context_list_iterator_advance (
+                                 &next_context_iter));
+
+          bool already_extracted = false;
+          TSNode arg_expr_node = ts_node_child (callee_node, 0);
+          if (is_string_literal (arg_expr_node))
+            {
+              lex_pos_ty pos;
+              pos.file_name = logical_file_name;
+              pos.line_number = ts_node_line_number (arg_expr_node);
+
+              char *string = string_literal_value (arg_expr_node);
+
+              if (extract_all)
+                {
+                  remember_a_message (mlp, NULL, string, true, false,
+                                      arg_region, &pos,
+                                      NULL, savable_comment, true);
+                  already_extracted = true;
+                }
+              else
+                {
+                  mixed_string_ty *mixed_string =
+                    mixed_string_alloc_utf8 (string, lc_string,
+                                             pos.file_name, pos.line_number);
+                  arglist_parser_remember (argparser, arg, mixed_string,
+                                           arg_region,
+                                           pos.file_name, pos.line_number,
+                                           savable_comment, true);
+                }
+            }
+
+          if (!already_extracted)
+            {
+              if (++nesting_depth > MAX_NESTING_DEPTH)
+                if_error (IF_SEVERITY_FATAL_ERROR,
+                          logical_file_name, ts_node_line_number (arg_expr_node), (size_t)(-1), false,
+                          _("too many open parentheses, brackets, or braces"));
+              extract_from_node (arg_expr_node,
+                                 false,
+                                 false,
+                                 arg_region,
+                                 mlp);
+              nesting_depth--;
+            }
+
+          {
+            /* Handle the potential comments in the callee_node, between
+               arg_expr_node and function_node.  */
+            uint32_t count = ts_node_child_count (callee_node);
+            uint32_t i;
+            for (i = 1; i < count; i++)
+              {
+                TSNode subnode = ts_node_child (callee_node, i);
+                if (ts_node_eq (subnode, function_node))
+                  break;
+                handle_comments (subnode);
+              }
+          }
+
+          unref_region (arg_region);
+
+          arglist_parser_done (argparser, arg);
+          return;
+        }
+    }
+  else
+    next_context_iter = null_context_list_iterator;
+
+  /* Recurse.  */
+
+  /* Current argument number.  */
+  MAYBE_UNUSED uint32_t arg = 0;
+
+  /* The first part of the 'property_expression' is treated as the first
+     argument.  Cf. <https://dlang.org/spec/function.html#pseudo-member>  */
+  arg++;
+  flag_region_ty *arg_region =
+    inheriting_region (outer_region,
+                       flag_context_list_iterator_advance (
+                         &next_context_iter));
+  TSNode arg_expr_node = ts_node_child (callee_node, 0);
+
+  if (++nesting_depth > MAX_NESTING_DEPTH)
+    if_error (IF_SEVERITY_FATAL_ERROR,
+              logical_file_name, ts_node_line_number (arg_expr_node), (size_t)(-1), false,
+              _("too many open parentheses, brackets, or braces"));
+  extract_from_node (arg_expr_node,
+                     false,
+                     false,
+                     arg_region,
+                     mlp);
+  nesting_depth--;
+
+  {
+    /* Handle the potential comments in the callee_node, between
+       arg_expr_node and function_node.  */
+    uint32_t count = ts_node_child_count (callee_node);
+    uint32_t i;
+    for (i = 1; i < count; i++)
+      {
+        TSNode subnode = ts_node_child (callee_node, i);
+        if (ts_node_eq (subnode, function_node))
+          break;
+        handle_comments (subnode);
+      }
+  }
+
+  unref_region (arg_region);
+}
+
+/* Extracts messages from the template instantation NODE consisting of
+     - IDENTIFIER_NODE: a tree node of type 'identifier',
+     - ARGS_NODE: a tree node of type 'template_arguments'.
+   Extracted messages are added to MLP.  */
+static void
+extract_from_template_instantation (TSNode node,
+                                    TSNode identifier_node,
+                                    TSNode args_node,
+                                    flag_region_ty *outer_region,
+                                    message_list_ty *mlp)
+{
+  uint32_t args_count = ts_node_child_count (args_node);
+
+  string_desc_t template_name =
+    sd_new_addr (ts_node_end_byte (identifier_node) - ts_node_start_byte (identifier_node),
+                 (char *) contents + ts_node_start_byte (identifier_node));
+
+  /* Handle the potential comments in node, between
+     identifier_node and args_node.  */
+  {
+    uint32_t count = ts_node_child_count (node);
+    uint32_t i;
+    for (i = 1; i < count; i++)
+      {
+        TSNode subnode = ts_node_child (node, i);
+        if (ts_node_eq (subnode, args_node))
+          break;
+        handle_comments (subnode);
+      }
+  }
+
+  /* Context iterator.  */
+  flag_context_list_iterator_ty next_context_iter =
+    flag_context_list_iterator (
+      flag_context_list_table_lookup (
+        flag_context_list_table,
+        sd_data (template_name), sd_length (template_name)));
+
+  void *keyword_value;
+  if (hash_find_entry (&template_keywords,
+                       sd_data (template_name), sd_length (template_name),
+                       &keyword_value)
+      == 0)
+    {
+      /* The identifier has some information associated with it.  */
+      const struct callshapes *next_shapes = keyword_value;
+
+      /* We have a template instantiation, named by a relevant identifier, with
+         either a single argument or an argument list.  */
+
+      struct arglist_parser *argparser =
+        arglist_parser_alloc (mlp, next_shapes);
+
+      /* Current argument number.  */
+      uint32_t arg = 0;
+
+      uint32_t i;
+      for (i = 0; i < args_count; i++)
+        {
+          TSNode arg_node = ts_node_child (args_node, i);
+          handle_comments (arg_node);
+          if (ts_node_is_named (arg_node)
+              && ts_node_symbol (arg_node) != ts_symbol_comment)
+            {
+              if (ts_node_symbol (arg_node) == ts_symbol_template_argument)
+                {
+                  arg++;
+                  flag_region_ty *arg_region =
+                    inheriting_region (outer_region,
+                                       flag_context_list_iterator_advance (
+                                         &next_context_iter));
+
+                  bool already_extracted = false;
+                  if (ts_node_child_count (arg_node) == 1)
+                    {
+                      TSNode arg_expr_node = ts_node_child (arg_node, 0);
+                      if (is_string_literal (arg_expr_node))
+                        {
+                          lex_pos_ty pos;
+                          pos.file_name = logical_file_name;
+                          pos.line_number = ts_node_line_number (arg_expr_node);
+
+                          char *string = string_literal_value (arg_expr_node);
+
+                          if (extract_all)
+                            {
+                              remember_a_message (mlp, NULL, string, true, false,
+                                                  arg_region, &pos,
+                                                  NULL, savable_comment, true);
+                              already_extracted = true;
+                            }
+                          else
+                            {
+                              mixed_string_ty *mixed_string =
+                                mixed_string_alloc_utf8 (string, lc_string,
+                                                         pos.file_name, pos.line_number);
+                              arglist_parser_remember (argparser, arg, mixed_string,
+                                                       arg_region,
+                                                       pos.file_name, pos.line_number,
+                                                       savable_comment, true);
+                            }
+                        }
+                    }
+
+                  if (!already_extracted)
+                    {
+                      if (++nesting_depth > MAX_NESTING_DEPTH)
+                        if_error (IF_SEVERITY_FATAL_ERROR,
+                                  logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                                  _("too many open parentheses, brackets, or braces"));
+                      extract_from_node (arg_node,
+                                         false,
+                                         false,
+                                         arg_region,
+                                         mlp);
+                      nesting_depth--;
+                    }
+
+                  unref_region (arg_region);
+                }
+              else /* Assume a single template argument.  */
+                {
+                  arg++;
+                  flag_region_ty *arg_region =
+                    inheriting_region (outer_region,
+                                       flag_context_list_iterator_advance (
+                                         &next_context_iter));
+
+                  bool already_extracted = false;
+
+                  if (is_string_literal (arg_node))
+                    {
+                      lex_pos_ty pos;
+                      pos.file_name = logical_file_name;
+                      pos.line_number = ts_node_line_number (arg_node);
+
+                      char *string = string_literal_value (arg_node);
+
+                      if (extract_all)
+                        {
+                          remember_a_message (mlp, NULL, string, true, false,
+                                              arg_region, &pos,
+                                              NULL, savable_comment, true);
+                          already_extracted = true;
+                        }
+                      else
+                        {
+                          mixed_string_ty *mixed_string =
+                            mixed_string_alloc_utf8 (string, lc_string,
+                                                     pos.file_name, pos.line_number);
+                          arglist_parser_remember (argparser, arg, mixed_string,
+                                                   arg_region,
+                                                   pos.file_name, pos.line_number,
+                                                   savable_comment, true);
+                        }
+                    }
+
+                  if (!already_extracted)
+                    {
+                      if (++nesting_depth > MAX_NESTING_DEPTH)
+                        if_error (IF_SEVERITY_FATAL_ERROR,
+                                  logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                                  _("too many open parentheses, brackets, or braces"));
+                      extract_from_node (arg_node,
+                                         false,
+                                         false,
+                                         arg_region,
+                                         mlp);
+                      nesting_depth--;
+                    }
+
+                    unref_region (arg_region);
+                }
+            }
+        }
+      arglist_parser_done (argparser, arg);
+      return;
+    }
+
+  /* Recurse.  */
+
+  /* Current argument number.  */
+  MAYBE_UNUSED uint32_t arg = 0;
+
+  uint32_t i;
+  for (i = 0; i < args_count; i++)
+    {
+      TSNode arg_node = ts_node_child (args_node, i);
+      handle_comments (arg_node);
+      if (ts_node_is_named (arg_node)
+          && ts_node_symbol (arg_node) != ts_symbol_comment)
+        {
+          arg++;
+          flag_region_ty *arg_region =
+            inheriting_region (outer_region,
+                               flag_context_list_iterator_advance (
+                                 &next_context_iter));
+
+          if (++nesting_depth > MAX_NESTING_DEPTH)
+            if_error (IF_SEVERITY_FATAL_ERROR,
+                      logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+                      _("too many open parentheses, brackets, or braces"));
+          extract_from_node (arg_node,
+                             false,
+                             false,
+                             arg_region,
+                             mlp);
+          nesting_depth--;
+
+          unref_region (arg_region);
+        }
+    }
+}
+
+/* Extracts messages in the syntax tree NODE.
+   Extracted messages are added to MLP.  */
+static void
+extract_from_node (TSNode node,
+                   bool ignore,
+                   bool callee_in_call_expression,
+                   flag_region_ty *outer_region,
+                   message_list_ty *mlp)
+{
+  if (extract_all && !ignore && is_string_literal (node))
+    {
+      lex_pos_ty pos;
+      pos.file_name = logical_file_name;
+      pos.line_number = ts_node_line_number (node);
+
+      char *string = string_literal_value (node);
+
+      remember_a_message (mlp, NULL, string, true, false,
+                          outer_region, &pos,
+                          NULL, savable_comment, true);
+    }
+
+  if (ts_node_symbol (node) == ts_symbol_call_expression
+      && ts_node_named_child_count (node) >= 2)
+    {
+      TSNode callee_node = ts_node_named_child (node, 0);
+      if (ts_node_symbol (callee_node) == ts_symbol_identifier
+          || ts_node_symbol (callee_node) == ts_symbol_property_expression)
+        {
+          uint32_t ncount = ts_node_named_child_count (node);
+          uint32_t a;
+          for (a = 1; a < ncount; a++)
+            if (ts_node_symbol (ts_node_named_child (node, a)) == ts_symbol_named_arguments)
+              break;
+          if (a < ncount)
+            {
+              TSNode args_node = ts_node_named_child (node, a);
+              if (ts_node_symbol (args_node) != ts_symbol_named_arguments)
+                abort ();
+              extract_from_function_call (node, callee_node, args_node,
+                                          outer_region,
+                                          mlp);
+              return;
+            }
+        }
+    }
+
+  if (!callee_in_call_expression
+      && ts_node_symbol (node) == ts_symbol_property_expression)
+    {
+      /* A 'property_expression' that is not in the position of the callee in a
+         call_expression is treated like a call_expression with 0 arguments.  */
+      extract_from_function_call_without_args (node,
+                                               outer_region,
+                                               mlp);
+      return;
+    }
+
+  if (ts_node_symbol (node) == ts_symbol_template_instance
+      && ts_node_named_child_count (node) >= 2)
+    {
+      TSNode identifier_node = ts_node_named_child (node, 0);
+      if (ts_node_symbol (identifier_node) == ts_symbol_identifier)
+        {
+          uint32_t ncount = ts_node_named_child_count (node);
+          uint32_t a;
+          for (a = 1; a < ncount; a++)
+            if (ts_node_symbol (ts_node_named_child (node, a)) == ts_symbol_template_arguments)
+              break;
+          if (a < ncount)
+            {
+              TSNode args_node = ts_node_named_child (node, a);
+              if (ts_node_symbol (args_node) != ts_symbol_template_arguments)
+                abort ();
+              extract_from_template_instantation (node,
+                                                  identifier_node, args_node,
+                                                  outer_region,
+                                                  mlp);
+              return;
+            }
+        }
+    }
+
+  #if DEBUG_D && 0
+  if (ts_node_symbol (node) == ts_symbol_call_expression)
+    {
+      TSNode subnode = ts_node_named_child (node, 0);
+      fprintf (stderr, "-> %s\n", ts_node_string (subnode));
+      if (ts_node_symbol (subnode) == ts_symbol_identifier)
+        {
+          string_desc_t subnode_string =
+            sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+                         (char *) contents + ts_node_start_byte (subnode));
+          if (sd_equals (subnode_string, sd_from_c ("gettext")))
+            {
+              TSNode argsnode = ts_node_named_child (node, 1);
+              fprintf (stderr, "gettext arguments: %s\n", ts_node_string (argsnode));
+              fprintf (stderr, "gettext children:\n");
+              uint32_t count = ts_node_named_child_count (node);
+              uint32_t i;
+              for (i = 0; i < count; i++)
+                fprintf (stderr, "%u -> %s\n", i, ts_node_string (ts_node_named_child (node, i)));
+            }
+        }
+    }
+  #endif
+
+  /* Recurse.  */
+  if (ts_node_symbol (node) != ts_symbol_comment
+      /* Ignore the code in unit tests.  Translators are not supposed to
+         localize unit tests, only production code.  */
+      && ts_node_symbol (node) != ts_symbol_unittest_declaration)
+    {
+      ignore = ignore || is_string_literal (node);
+      uint32_t count = ts_node_child_count (node);
+      uint32_t i;
+      for (i = 0; i < count; i++)
+        {
+          TSNode subnode = ts_node_child (node, i);
+          handle_comments (subnode);
+          if (++nesting_depth > MAX_NESTING_DEPTH)
+            if_error (IF_SEVERITY_FATAL_ERROR,
+                      logical_file_name, ts_node_line_number (subnode), (size_t)(-1), false,
+                      _("too many open parentheses, brackets, or braces"));
+          extract_from_node (subnode,
+                             ignore,
+                             i == 0 && ts_node_symbol (node) == ts_symbol_call_expression,
+                             outer_region,
+                             mlp);
+          nesting_depth--;
+       }
+    }
+}
+
+void
+extract_d (FILE *f,
+         const char *real_filename, const char *logical_filename,
+         flag_context_list_table_ty *flag_table,
+         msgdomain_list_ty *mdlp)
+{
+  message_list_ty *mlp = mdlp->item[0]->messages;
+
+  logical_file_name = xstrdup (logical_filename);
+
+  last_comment_line = -1;
+  last_non_comment_line = -1;
+
+  flag_context_list_table = flag_table;
+  nesting_depth = 0;
+
+  init_keywords ();
+
+  if (ts_language == NULL)
+    {
+      ts_language = tree_sitter_d ();
+      ts_symbol_comment              = ts_language_symbol ("comment", true);
+      ts_symbol_string_literal       = ts_language_symbol ("string_literal", true);
+      ts_symbol_quoted_string        = ts_language_symbol ("quoted_string", true);
+      ts_symbol_escape_sequence      = ts_language_symbol ("escape_sequence", true);
+      ts_symbol_htmlentity           = ts_language_symbol ("htmlentity", true);
+      ts_symbol_raw_string           = ts_language_symbol ("raw_string", true);
+      ts_symbol_hex_string           = ts_language_symbol ("hex_string", true);
+      ts_symbol_binary_expression    = ts_language_symbol ("binary_expression", true);
+      ts_symbol_add_expression       = ts_language_symbol ("add_expression", true);
+      ts_symbol_expression           = ts_language_symbol ("expression", true);
+      ts_symbol_identifier           = ts_language_symbol ("identifier", true);
+      ts_symbol_property_expression  = ts_language_symbol ("property_expression", true);
+      ts_symbol_call_expression      = ts_language_symbol ("call_expression", true);
+      ts_symbol_named_arguments      = ts_language_symbol ("named_arguments", true);
+      ts_symbol_named_argument       = ts_language_symbol ("named_argument", true);
+      ts_symbol_template_instance    = ts_language_symbol ("template_instance", true);
+      ts_symbol_template_arguments   = ts_language_symbol ("template_arguments", true);
+      ts_symbol_template_argument    = ts_language_symbol ("template_argument", true);
+      ts_symbol_unittest_declaration = ts_language_symbol ("unittest_declaration", true);
+      ts_symbol_tilde                = ts_language_symbol ("~", false);
+    }
+
+  /* Read the file into memory.  */
+  char *contents_data;
+  size_t contents_length;
+  contents_data = read_file (real_filename, 0, &contents_length);
+  if (contents_data == NULL)
+    error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+           real_filename);
+
+  /* tree-sitter works only on files whose size fits in an uint32_t.  */
+  if (contents_length > 0xFFFFFFFFUL)
+    error (EXIT_FAILURE, 0, _("file \"%s\" is unsupported because too large"),
+           real_filename);
+
+  /* D source files are UTF-8 or UTF-16 or UTF-32 encoded.
+     See <https://dlang.org/spec/lex.html#source_text>.
+     But tree-sitter supports only the UTF-8 case, and we want the 'contents'
+     variable above to be in an ASCII-compatible encoding as well.  */
+  if (u8_check ((uint8_t *) contents_data, contents_length) != NULL)
+    {
+      /* The file is not UTF-8 encoded.
+         Note: contents_data is malloc()ed and therefore suitably aligned.  */
+      /* Test whether it is UTF-32 encoded.
+         The disambiguation is automatic, because the file is supposed to
+         contain at least one U+000A, and U+0A000000 is invalid.  */
+      if ((contents_length % 4) == 0)
+        {
+          int round;
+          for (round = 0; round < 2; round++)
+            {
+              if (u32_check ((uint32_t *) contents_data, contents_length / 4) == NULL)
+                {
+                  /* Convert from UTF-32 to UTF-8.  */
+                  size_t u8_contents_length;
+                  uint8_t *u8_contents_data =
+                    u32_to_u8 ((uint32_t *) contents_data, contents_length / 4,
+                               NULL, &u8_contents_length);
+                  if (u8_contents_data != NULL)
+                    {
+                      free (contents_data);
+                      contents_length = u8_contents_length;
+                      contents_data = (char *) u8_contents_data;
+                      goto converted;
+                    }
+                }
+              for (size_t i = 0; i < contents_length / 4; i++)
+                ((uint32_t *) contents_data)[i] = bswap_32 (((uint32_t *) contents_data)[i]);
+            }
+        }
+      /* Test whether it is UTF-16 encoded.
+         Disambiguate between UTF-16BE and UTF-16LE 1. by looking at the BOM, if present,
+         2. by looking at the number of characters U+000A vs. U+0A00 (a heuristic).  */
+      if ((contents_length % 2) == 0)
+        {
+          bool swap;
+          if (((uint16_t *) contents_data)[0] == 0xFEFF)
+            swap = false;
+          else if (((uint16_t *) contents_data)[0] == 0xFFFE)
+            swap = true;
+          else
+            {
+              size_t count_000A = 0;
+              size_t count_0A00 = 0;
+              for (size_t i = 0; i < contents_length / 2; i++)
+                {
+                  uint16_t uc = ((uint16_t *) contents_data)[i];
+                  count_000A += (uc == 0x000A);
+                  count_0A00 += (uc == 0x0A00);
+                }
+              swap = (count_0A00 > count_000A);
+            }
+          if (swap)
+            {
+              for (size_t i = 0; i < contents_length / 2; i++)
+                ((uint16_t *) contents_data)[i] = bswap_16 (((uint16_t *) contents_data)[i]);
+            }
+          if (u16_check ((uint16_t *) contents_data, contents_length / 2) == NULL)
+            {
+              /* Convert from UTF-16 to UTF-8.  */
+              size_t u8_contents_length;
+              uint8_t *u8_contents_data =
+                u16_to_u8 ((uint16_t *) contents_data, contents_length / 2,
+                           NULL, &u8_contents_length);
+              if (u8_contents_data != NULL)
+                {
+                  free (contents_data);
+                  contents_length = u8_contents_length;
+                  contents_data = (char *) u8_contents_data;
+                  goto converted;
+                }
+            }
+        }
+      error (EXIT_FAILURE, 0,
+             _("file \"%s\" is unsupported because not UTF-8 or UTF-16 or UTF-32 encoded"),
+             real_filename);
+    }
+ converted:
+  if (u8_check ((uint8_t *) contents_data, contents_length) != NULL)
+    abort ();
+  xgettext_current_source_encoding = po_charset_utf8;
+
+  /* Create a parser.  */
+  TSParser *parser = ts_parser_new ();
+
+  /* Set the parser's language.  */
+  ts_parser_set_language (parser, ts_language);
+
+  /* Parse the file, producing a syntax tree.  */
+  TSTree *tree = ts_parser_parse_string (parser, NULL, contents_data, contents_length);
+
+  #if DEBUG_D
+  /* For debugging: Print the tree.  */
+  {
+    char *tree_as_string = ts_node_string (ts_tree_root_node (tree));
+    fprintf (stderr, "Syntax tree: %s\n", tree_as_string);
+    free (tree_as_string);
+  }
+  #endif
+
+  contents = contents_data;
+
+  extract_from_node (ts_tree_root_node (tree),
+                     false,
+                     false,
+                     null_context_region (),
+                     mlp);
+
+  ts_tree_delete (tree);
+  ts_parser_delete (parser);
+  free (contents_data);
+
+  logical_file_name = NULL;
+}
diff --git a/gettext-tools/src/x-d.h b/gettext-tools/src/x-d.h

new file mode 100644 (file)

index 0000000..e150505
--- /dev/null
+++ b/gettext-tools/src/x-d.h
@@ -0,0 +1,57 @@
+/* xgettext D backend.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025.  */
+
+
+#include <stdio.h>
+
+#include "message.h"
+#include "xg-arglist-context.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define EXTENSIONS_D \
+  { "d",      "D"     },                                                \
+
+#define SCANNERS_D \
+  { "D",                extract_d, NULL,                                \
+                        &flag_table_d,                                  \
+                        &formatstring_c, &formatstring_d },             \
+
+/* Scan a D file and add its translatable strings to mdlp.  */
+extern void extract_d (FILE *fp, const char *real_filename,
+                       const char *logical_filename,
+                       flag_context_list_table_ty *flag_table,
+                       msgdomain_list_ty *mdlp);
+
+
+/* Handling of options specific to this language.  */
+
+extern void x_d_extract_all (void);
+
+extern void x_d_keyword (const char *name);
+
+extern void init_flag_table_d (void);
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/gettext-tools/src/xg-message.c b/gettext-tools/src/xg-message.c

index 2cd7af1669b037303c77b4893f9e737998596c5d..bd22137501b3287b3dabdb8a84a90dc5c7fc79fc 100644 (file)
--- a/gettext-tools/src/xg-message.c
+++ b/gettext-tools/src/xg-message.c
@@ -1,5 +1,5 @@
  /* Extracting a message.  Accumulating the message list.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
  
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -136,6 +136,15 @@ decide_is_format (message_ty *mp)
  {
    size_t i;
  
+  bool already_c_format = possible_format_p (mp->is_format[format_c]);
+  bool already_qt_or_kde_or_boost_format =
+    (possible_format_p (mp->is_format[format_qt])
+     || possible_format_p (mp->is_format[format_qt_plural])
+     || possible_format_p (mp->is_format[format_kde])
+     || possible_format_p (mp->is_format[format_kde_kuit])
+     || possible_format_p (mp->is_format[format_boost]));
+  bool already_d_format = possible_format_p (mp->is_format[format_d]);
+
    /* If it is not already decided, through programmer comments, whether the
       msgid is a format string, examine the msgid.  This is a heuristic.  */
    for (i = 0; i < NFORMATS; i++)
@@ -148,12 +157,7 @@ decide_is_format (message_ty *mp)
            /* Avoid flagging a string as c-format when it's known to be a
               qt-format or qt-plural-format or kde-format or boost-format
               string.  */
-          && !(i == format_c
-               && (possible_format_p (mp->is_format[format_qt])
-                   || possible_format_p (mp->is_format[format_qt_plural])
-                   || possible_format_p (mp->is_format[format_kde])
-                   || possible_format_p (mp->is_format[format_kde_kuit])
-                   || possible_format_p (mp->is_format[format_boost])))
+          && !(i == format_c && already_qt_or_kde_or_boost_format)
            /* Avoid flagging a string as kde-format when it's known to
               be a kde-kuit-format string.  */
            && !(i == format_kde
@@ -164,7 +168,12 @@ decide_is_format (message_ty *mp)
               string will be marked as kde-format if both are
               undecided.  */
            && !(i == format_kde_kuit
-               && possible_format_p (mp->is_format[format_kde])))
+               && possible_format_p (mp->is_format[format_kde]))
+          /* Avoid flagging a string as c-format when it's known to be a
+             d-format, and vice versa.  So a string will be marked as both
+             c-format and d-format if both are undecided.  */
+          && !(i == format_d && already_c_format)
+          && !(i == format_c && already_d_format))
          {
            struct formatstring_parser *parser = formatstring_parsers[i];
            char *invalid_reason = NULL;
@@ -588,6 +597,15 @@ remember_a_message_plural (message_ty *mp, char *string, bool is_utf8,
           format string.  */
        set_format_flags_from_context (mp, true, pos, region);
  
+      bool already_c_format = possible_format_p (mp->is_format[format_c]);
+      bool already_qt_or_kde_or_boost_format =
+        (possible_format_p (mp->is_format[format_qt])
+         || possible_format_p (mp->is_format[format_qt_plural])
+         || possible_format_p (mp->is_format[format_kde])
+         || possible_format_p (mp->is_format[format_kde_kuit])
+         || possible_format_p (mp->is_format[format_boost]));
+      bool already_d_format = possible_format_p (mp->is_format[format_d]);
+
        /* If it is not already decided, through programmer comments or
           the msgid, whether the msgid is a format string, examine the
           msgid_plural.  This is a heuristic.  */
@@ -601,12 +619,7 @@ remember_a_message_plural (message_ty *mp, char *string, bool is_utf8,
                   && possible_format_p (mp->is_format[format_c]))
              /* Avoid flagging a string as c-format when it's known to be a
                 qt-format or qt-plural-format or boost-format string.  */
-            && !(i == format_c
-                 && (possible_format_p (mp->is_format[format_qt])
-                     || possible_format_p (mp->is_format[format_qt_plural])
-                     || possible_format_p (mp->is_format[format_kde])
-                     || possible_format_p (mp->is_format[format_kde_kuit])
-                     || possible_format_p (mp->is_format[format_boost])))
+            && !(i == format_c && already_qt_or_kde_or_boost_format)
              /* Avoid flagging a string as kde-format when it's known
                 to be a kde-kuit-format string.  */
              && !(i == format_kde
@@ -617,7 +630,12 @@ remember_a_message_plural (message_ty *mp, char *string, bool is_utf8,
                 string will be marked as kde-format if both are
                 undecided.  */
              && !(i == format_kde_kuit
-                 && possible_format_p (mp->is_format[format_kde])))
+                 && possible_format_p (mp->is_format[format_kde]))
+            /* Avoid flagging a string as c-format when it's known to be a
+               d-format, and vice versa.  So a string will be marked as both
+               c-format and d-format if both are undecided.  */
+            && !(i == format_d && already_c_format)
+            && !(i == format_c && already_d_format))
            {
              struct formatstring_parser *parser = formatstring_parsers[i];
              char *invalid_reason = NULL;
diff --git a/gettext-tools/src/xgettext.c b/gettext-tools/src/xgettext.c

index a762f5fd6d3fc629d8000bd9800440a07ac65dca..6851f114ff25cc6cdd3cabe80a31eecafdb27221 100644 (file)
--- a/gettext-tools/src/xgettext.c
+++ b/gettext-tools/src/xgettext.c
@@ -119,6 +119,7 @@
  #include "x-sh.h"
  #include "x-awk.h"
  #include "x-lua.h"
+#include "x-d.h"
  #include "x-smalltalk.h"
  #include "x-vala.h"
  #include "x-tcl.h"
@@ -211,6 +212,7 @@ static flag_context_list_table_ty flag_table_ruby;
  static flag_context_list_table_ty flag_table_sh;
  static flag_context_list_table_ty flag_table_awk;
  static flag_context_list_table_ty flag_table_lua;
+static flag_context_list_table_ty flag_table_d;
  static flag_context_list_table_ty flag_table_vala;
  static flag_context_list_table_ty flag_table_tcl;
  static flag_context_list_table_ty flag_table_perl;
@@ -408,6 +410,7 @@ main (int argc, char *argv[])
    init_flag_table_sh ();
    init_flag_table_awk ();
    init_flag_table_lua ();
+  init_flag_table_d ();
    init_flag_table_vala ();
    init_flag_table_tcl ();
    init_flag_table_perl ();
@@ -425,26 +428,27 @@ main (int argc, char *argv[])
  
        case 'a':
          x_c_extract_all ();
-        x_sh_extract_all ();
          x_python_extract_all ();
+        x_java_extract_all ();
+        x_csharp_extract_all ();
+        x_javascript_extract_all ();
+        x_typescript_extract_all ();
+        x_typescriptx_extract_all ();
+        x_scheme_extract_all ();
          x_lisp_extract_all ();
          x_elisp_extract_all ();
          x_librep_extract_all ();
-        x_scheme_extract_all ();
-        x_java_extract_all ();
-        x_csharp_extract_all ();
-        x_awk_extract_all ();
-        x_tcl_extract_all ();
-        x_perl_extract_all ();
-        x_php_extract_all ();
          x_rust_extract_all ();
          x_go_extract_all ();
          x_ruby_extract_all ();
+        x_sh_extract_all ();
+        x_awk_extract_all ();
          x_lua_extract_all ();
-        x_javascript_extract_all ();
-        x_typescript_extract_all ();
-        x_typescriptx_extract_all ();
+        x_d_extract_all ();
          x_vala_extract_all ();
+        x_tcl_extract_all ();
+        x_perl_extract_all ();
+        x_php_extract_all ();
          break;
  
        case 'c':
@@ -509,26 +513,27 @@ main (int argc, char *argv[])
            optarg = NULL;
          x_c_keyword (optarg);
          x_objc_keyword (optarg);
-        x_sh_keyword (optarg);
          x_python_keyword (optarg);
+        x_java_keyword (optarg);
+        x_csharp_keyword (optarg);
+        x_javascript_keyword (optarg);
+        x_typescript_keyword (optarg);
+        x_typescriptx_keyword (optarg);
+        x_scheme_keyword (optarg);
          x_lisp_keyword (optarg);
          x_elisp_keyword (optarg);
          x_librep_keyword (optarg);
-        x_scheme_keyword (optarg);
-        x_java_keyword (optarg);
-        x_csharp_keyword (optarg);
-        x_awk_keyword (optarg);
-        x_tcl_keyword (optarg);
-        x_perl_keyword (optarg);
-        x_php_keyword (optarg);
          x_rust_keyword (optarg);
          x_go_keyword (optarg);
          x_ruby_keyword (optarg);
+        x_sh_keyword (optarg);
+        x_awk_keyword (optarg);
          x_lua_keyword (optarg);
-        x_javascript_keyword (optarg);
-        x_typescript_keyword (optarg);
-        x_typescriptx_keyword (optarg);
+        x_d_keyword (optarg);
          x_vala_keyword (optarg);
+        x_tcl_keyword (optarg);
+        x_perl_keyword (optarg);
+        x_php_keyword (optarg);
          x_desktop_keyword (optarg);
          if (optarg == NULL)
            no_default_keywords = true;
@@ -1149,9 +1154,9 @@ Choice of input file language:\n"));
                                  (C, C++, ObjectiveC, PO, Python, Java,\n\
                                  JavaProperties, C#, JavaScript, TypeScript, TSX,\n\
                                  Scheme, Guile, Lisp, EmacsLisp, librep, Rust,\n\
-                                Go, Ruby, Shell, awk, Lua, Smalltalk, Vala, Tcl,\n\
-                                Perl, PHP, GCC-source, YCP, NXStringTable, RST,\n\
-                                RSJ, Glade, GSettings, Desktop)\n"));
+                                Go, Ruby, Shell, awk, Lua, D, Smalltalk, Vala,\n\
+                                Tcl, Perl, PHP, GCC-source, YCP, NXStringTable,\n\
+                                RST, RSJ, Glade, GSettings, Desktop)\n"));
        printf (_("\
    -C, --c++                   shorthand for --language=C++\n"));
        printf (_("\
@@ -1193,7 +1198,7 @@ Language specific options:\n"));
                                  (only languages C, C++, ObjectiveC, Python,\n\
                                  Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
                                  Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
-                                awk, Lua, Vala, Tcl, Perl, PHP, GCC-source,\n\
+                                awk, Lua, D, Vala, Tcl, Perl, PHP, GCC-source,\n\
                                  Glade, GSettings)\n"));
        printf (_("\
    -kWORD, --keyword=WORD      look for WORD as an additional keyword\n\
@@ -1202,7 +1207,7 @@ Language specific options:\n"));
                                  (only languages C, C++, ObjectiveC, Python,\n\
                                  Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
                                  Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
-                                awk, Lua, Vala, Tcl, Perl, PHP, GCC-source,\n\
+                                awk, Lua, D, Vala, Tcl, Perl, PHP, GCC-source,\n\
                                  Glade, GSettings, Desktop)\n"));
        printf (_("\
        --flag=WORD:ARG:FLAG    additional flag for strings inside the argument\n\
@@ -1211,7 +1216,8 @@ Language specific options:\n"));
                                  (only languages C, C++, ObjectiveC, Python,\n\
                                  Java, C#, JavaScript, TypeScript, TSX, Scheme,\n\
                                  Guile, Lisp, EmacsLisp, librep, Rust, Go, Shell,\n\
-                                awk, Lua, Vala, Tcl, Perl, PHP, GCC-source, YCP)\n"));
+                                awk, Lua, D, Vala, Tcl, Perl, PHP, GCC-source,\n\
+                                YCP)\n"));
        printf (_("\
        --tag=WORD:FORMAT       defines the behaviour of tagged template literals\n\
                                with tag WORD\n"));
@@ -1619,6 +1625,12 @@ xgettext_record_flag (const char *optionstring)
                                                          name_start, name_end,
                                                          argnum, value, pass);
                        }
+                    if (backend == NULL || strcmp (backend, "D") == 0)
+                      {
+                        flag_context_list_table_insert (&flag_table_d, XFORMAT_PRIMARY,
+                                                        name_start, name_end,
+                                                        argnum, value, pass);
+                      }
                      if (backend == NULL || strcmp (backend, "Vala") == 0)
                        {
                          flag_context_list_table_insert (&flag_table_vala, XFORMAT_PRIMARY,
@@ -1738,6 +1750,11 @@ xgettext_record_flag (const char *optionstring)
                      break;
                    case format_pascal:
                      break;
+                  case format_d:
+                    flag_context_list_table_insert (&flag_table_d, XFORMAT_SECONDARY,
+                                                    name_start, name_end,
+                                                    argnum, value, pass);
+                    break;
                    case format_smalltalk:
                      break;
                    case format_qt:
@@ -2347,6 +2364,7 @@ language_to_extractor (const char *name)
      SCANNERS_SH
      SCANNERS_AWK
      SCANNERS_LUA
+    SCANNERS_D
      SCANNERS_SMALLTALK
      SCANNERS_VALA
      SCANNERS_TCL
@@ -2444,6 +2462,7 @@ extension_to_language (const char *extension)
      EXTENSIONS_SH
      EXTENSIONS_AWK
      EXTENSIONS_LUA
+    EXTENSIONS_D
      EXTENSIONS_SMALLTALK
      EXTENSIONS_VALA
      EXTENSIONS_TCL
diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am

index 086e3175419ec7233b5f54e72aa8cfd5f39c3d3b..543eadde926f60cc108498760939703132118504 100644 (file)
--- a/gettext-tools/tests/Makefile.am
+++ b/gettext-tools/tests/Makefile.am
@@ -105,6 +105,8 @@ TESTS = gettext-1 gettext-2 \
         xgettext-csharp-7 xgettext-csharp-8 xgettext-csharp-9 \
         xgettext-csharp-stackovfl-1 xgettext-csharp-stackovfl-2 \
         xgettext-csharp-stackovfl-3 xgettext-csharp-stackovfl-4 \
+       xgettext-d-1 xgettext-d-2 xgettext-d-3 xgettext-d-4 xgettext-d-5 \
+       xgettext-d-6 \
         xgettext-desktop-1 xgettext-desktop-2 xgettext-desktop-3 \
         xgettext-docbook-1 \
         xgettext-elisp-1 xgettext-elisp-2 xgettext-elisp-3 \
@@ -196,6 +198,7 @@ TESTS = gettext-1 gettext-2 \
         format-c-1 format-c-2 format-c-3 format-c-4 format-c-5 \
         format-c++-brace-1 format-c++-brace-2 \
         format-csharp-1 format-csharp-2 \
+       format-d-1 format-d-2 \
         format-elisp-1 format-elisp-2 \
         format-gcc-internal-1 format-gcc-internal-2 \
         format-gfc-internal-1 format-gfc-internal-2 \
@@ -278,6 +281,9 @@ EXTRA_DIST += init.sh init.cfg $(TESTS) \
         xgettext-c-1 xg-c-comment-6.c xg-c-escape-3.c xg-vala-2.vala \
         common/supplemental/plurals.xml \
         testdata/crlf.pot \
+       testdata/dprog.utf-8.d testdata/dprog.utf-8+bom.d \
+       testdata/dprog.utf-16be.d testdata/dprog.utf-16le.d \
+       testdata/dprog.utf-32be.d testdata/dprog.utf-32le.d \
         testdata/nonascii.pot \
         testdata/tcltest_pl.po testdata/tcltest_pl.msg \
         testdata/tcltest_cs.po testdata/tcltest_cs.msg \
diff --git a/gettext-tools/tests/format-d-1 b/gettext-tools/tests/format-d-1

new file mode 100755 (executable)

index 0000000..2691f39
--- /dev/null
+++ b/gettext-tools/tests/format-d-1
@@ -0,0 +1,249 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test recognition of D format strings.
+
+cat <<\EOF > f-d-1.data
+# Valid: no argument
+"abc%%"
+# Valid: one any-type argument
+"abc%s"
+# Valid: one char argument
+"abc%c"
+# Valid: one bool or integer or char argument
+"abc%d"
+# Valid: one bool or integer or char argument
+"abc%u"
+# Valid: one bool or integer or char argument
+"abc%b"
+# Valid: one bool or integer or char argument
+"abc%o"
+# Valid: one bool or integer or char or pointer argument
+"abc%x"
+# Valid: one bool or integer or char or pointer argument
+"abc%X"
+# Valid: one integer or float argument
+"abc%e"
+# Valid: one integer or float argument
+"abc%E"
+# Valid: one integer or float argument
+"abc%f"
+# Valid: one integer or float argument
+"abc%F"
+# Valid: one integer or float argument
+"abc%g"
+# Valid: one integer or float argument
+"abc%G"
+# Valid: one integer or float argument
+"abc%a"
+# Valid: one integer or float argument
+"abc%A"
+# Valid: one bool or integer or float or char or array or irange argument
+"abc%r"
+# Valid: one array or irange argument
+"abc%(%s%)"
+# Valid: one associative argument
+"abc%(%s%s%)"
+# Valid: one argument with flags
+"abc%0#g"
+# Valid: one argument with width
+"abc%2g"
+# Valid: one argument with width
+"abc%*g"
+# Valid: one argument with precision
+"abc%.4g"
+# Valid: one argument with precision
+"abc%.*g"
+# Valid: one argument with width and precision
+"abc%14.4g"
+# Valid: one argument with width and precision
+"abc%14.*g"
+# Valid: one argument with width and precision
+"abc%*.4g"
+# Valid: one argument with width and precision
+"abc%*.*g"
+# Valid: one argument with separator
+"abc%,3g"
+# Valid: one argument with separator
+"abc%,*g"
+# Valid: one argument with separator
+"abc%,3?g"
+# Valid: one argument with separator
+"abc%,*?g"
+# Valid: one argument with width and separator
+"abc%14,3g"
+# Valid: one argument with width and separator
+"abc%14,*g"
+# Valid: one argument with width and separator
+"abc%14,3?g"
+# Valid: one argument with width and separator
+"abc%14,*?g"
+# Valid: one argument with width and separator
+"abc%*,3g"
+# Valid: one argument with width and separator
+"abc%*,*g"
+# Valid: one argument with width and separator
+"abc%*,3?g"
+# Valid: one argument with width and separator
+"abc%*,*?g"
+# Valid: one argument with precision and separator
+"abc%.4,3g"
+# Valid: one argument with precision and separator
+"abc%.4,*g"
+# Valid: one argument with precision and separator
+"abc%.4,3?g"
+# Valid: one argument with precision and separator
+"abc%.4,*?g"
+# Valid: one argument with precision and separator
+"abc%.*,3g"
+# Valid: one argument with precision and separator
+"abc%.*,*g"
+# Valid: one argument with precision and separator
+"abc%.*,3?g"
+# Valid: one argument with precision and separator
+"abc%.*,*?g"
+# Valid: one argument with width and precision and separator
+"abc%14.4,3g"
+# Valid: one argument with width and precision and separator
+"abc%14.4,*g"
+# Valid: one argument with width and precision and separator
+"abc%14.4,3?g"
+# Valid: one argument with width and precision and separator
+"abc%14.4,*?g"
+# Valid: one argument with width and precision and separator
+"abc%14.*,3g"
+# Valid: one argument with width and precision and separator
+"abc%14.*,*g"
+# Valid: one argument with width and precision and separator
+"abc%14.*,3?g"
+# Valid: one argument with width and precision and separator
+"abc%14.*,*?g"
+# Valid: one argument with width and precision and separator
+"abc%*.4,3g"
+# Valid: one argument with width and precision and separator
+"abc%*.4,*g"
+# Valid: one argument with width and precision and separator
+"abc%*.4,3?g"
+# Valid: one argument with width and precision and separator
+"abc%*.4,*?g"
+# Valid: one argument with width and precision and separator
+"abc%*.*,3g"
+# Valid: one argument with width and precision and separator
+"abc%*.*,*g"
+# Valid: one argument with width and precision and separator
+"abc%*.*,3?g"
+# Valid: one argument with width and precision and separator
+"abc%*.*,*?g"
+# Invalid: unterminated
+"abc%"
+# Invalid: unknown format specifier
+"abc%y"
+# Invalid: flags after width
+"abc%*0g"
+# Invalid: flags after precision
+"abc%.*0g"
+# Invalid: flags after separator
+"abc%,*0g"
+# Invalid: precision after separator
+"abc%,4.2g"
+# Invalid: twice precision
+"abc%.4.2g"
+# Invalid: twice separator
+"abc%,4,2g"
+# Valid: three arguments
+"abc%d%u%u"
+# Valid: a numbered argument
+"abc%1$d"
+# Invalid: zero
+"abc%0$d"
+# Valid: two-digit numbered arguments
+"abc%11$def%10$dgh%9$dij%8$dkl%7$dmn%6$dop%5$dqr%4$dst%3$duv%2$dwx%1$dyz"
+# Invalid: unterminated number
+"abc%1"
+# Invalid: flags before number
+"abc%+1$d"
+# Valid: three arguments, two with same number, no conflict
+"abc%1$4x,%2$c,%1$u"
+# Valid: three arguments, two with same number, no conflict
+"abc%1$4c,%2$c,%1$u"
+# Invalid: argument with conflicting types
+"abc%1$4c,%2$c,%1$e"
+# Valid: mixing of numbered and unnumbered arguments
+"abc%d%2$x"
+# Valid: mixing of numbered and unnumbered arguments
+"abc%2$d%x"
+# Valid: numbered argument with constant precision
+"abc%1$.9x"
+# Valid: mixing of numbered and unnumbered arguments
+"abc%1$.*x"
+# Valid: missing non-final argument
+"abc%2$x%3$s"
+# Valid: permutation
+"abc%2$ddef%1$d"
+# Valid: multiple uses of same argument
+"abc%2$xdef%1$Fghi%2$x"
+# Valid: one argument with width
+"abc%2$#*1$g"
+# Valid: one argument with width and precision
+"abc%3$*2$.*1$g"
+# Valid: one argument with width and precision and separator
+"abc%4$*3$.*2$,*g"
+# Invalid: zero
+"abc%2$*0$.*1$g"
+# Valid: compound specifier for array
+"abc%(def%sghi%)"
+# Valid: compound specifier for associative
+"abc%(def%sghi%sjkl%)"
+# Invalid: compound specifier that consumes no arguments
+"abc%(def%)"
+# Invalid: compound specifier that consumes too many arguments
+"abc%(def%sghi%sjkl%smno%)"
+# Valid: nested compound specifier
+"abc%(def%(%s%)ghi%)"
+# Valid: nested compound specifier
+"abc%(def%(%s%s%)ghi%)"
+# Valid: nested compound specifier
+"abc%(def%(%s%)ghi%(%u%)jkl%)"
+# Valid: nested compound specifier
+"abc%(def%(%s%)ghi%(%u%u%)jkl%)"
+# Valid: nested compound specifier
+"abc%(def%(%s%s%)ghi%(%u%)jkl%)"
+# Valid: nested compound specifier
+"abc%(def%(%s%s%)ghi%(%u%u%)jkl%)"
+EOF
+
+: ${XGETTEXT=xgettext}
+n=0
+while read comment; do
+  read string
+  n=`expr $n + 1`
+  cat <<EOF > f-d-1-$n.in
+void func() { gettext(${string}); }
+EOF
+  ${XGETTEXT} -L D -o f-d-1-$n.po f-d-1-$n.in || Exit 1
+  test -f f-d-1-$n.po || Exit 1
+  fail=
+  if echo "$comment" | grep 'Valid:' > /dev/null; then
+    if grep d-format f-d-1-$n.po > /dev/null; then
+      :
+    else
+      fail=yes
+    fi
+  else
+    if grep d-format f-d-1-$n.po > /dev/null; then
+      fail=yes
+    else
+      :
+    fi
+  fi
+  if test -n "$fail"; then
+    echo "Format string recognition error:" 1>&2
+    cat f-d-1-$n.in 1>&2
+    echo "Got:" 1>&2
+    cat f-d-1-$n.po 1>&2
+    Exit 1
+  fi
+  rm -f f-d-1-$n.in f-d-1-$n.po
+done < f-d-1.data
+
+Exit 0
diff --git a/gettext-tools/tests/format-d-2 b/gettext-tools/tests/format-d-2

new file mode 100755 (executable)

index 0000000..24d05e9
--- /dev/null
+++ b/gettext-tools/tests/format-d-2
@@ -0,0 +1,231 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test checking of D format strings.
+
+cat <<\EOF > f-d-2.data
+# Valid: %% doesn't count
+msgid  "abc%%def"
+msgstr "xyz"
+# Invalid: invalid msgstr
+msgid  "abc%%def"
+msgstr "xyz%"
+# Valid: same arguments
+msgid  "abc%s%gdef"
+msgstr "xyz%s%g"
+# Valid: same arguments, with different widths
+msgid  "abc%2sdef"
+msgstr "xyz%3s"
+# Valid: same arguments but in numbered syntax
+msgid  "abc%s%gdef"
+msgstr "xyz%1$s%2$g"
+# Valid: permutation
+msgid  "abc%s%g%cdef"
+msgstr "xyz%3$c%2$g%1$s"
+# Invalid: too few arguments
+msgid  "abc%2$udef%1$s"
+msgstr "xyz%1$s"
+# Invalid: too few arguments
+msgid  "abc%sdef%u"
+msgstr "xyz%s"
+# Invalid: too many arguments
+msgid  "abc%udef"
+msgstr "xyz%uvw%c"
+# Valid: same numbered arguments, with different widths
+msgid  "abc%2$5s%1$4s"
+msgstr "xyz%2$4s%1$5s"
+# Invalid: missing final argument
+msgid  "abc%2$sdef%1$u"
+msgstr "xyz%1$u"
+# Valid: missing non-final argument
+msgid  "abc%1$sdef%2$u"
+msgstr "xyz%2$u"
+# Invalid: added argument at the end
+msgid  "abc%1$udef"
+msgstr "xyz%1$uvw%2$c"
+# Invalid: added argument not at the end (see the comment in format-d.c:format_check))
+msgid  "abc%2$udef"
+msgstr "xyz%2$uvw%1$c"
+
+# Valid: type compatibility
+msgid  "abc%d"
+msgstr "xyz%u"
+# Valid: type compatibility
+msgid  "abc%u"
+msgstr "xyz%b"
+# Valid: type compatibility
+msgid  "abc%u"
+msgstr "xyz%o"
+
+# Valid: type compatibility
+msgid  "abc%x"
+msgstr "xyz%X"
+
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%E"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%f"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%F"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%g"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%G"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%a"
+# Valid: type compatibility
+msgid  "abc%e"
+msgstr "xyz%A"
+
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%c"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%d"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%x"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%e"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%r"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%s"
+msgstr "xyz%(%s%s%)"
+
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%d"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%x"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%e"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%r"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%c"
+msgstr "xyz%(%s%s%)"
+
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%x"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%e"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%r"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%d"
+msgstr "xyz%(%s%s%)"
+
+# Invalid: type incompatibility
+msgid  "abc%x"
+msgstr "xyz%e"
+# Invalid: type incompatibility
+msgid  "abc%x"
+msgstr "xyz%r"
+# Invalid: type incompatibility
+msgid  "abc%x"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%x"
+msgstr "xyz%(%s%s%)"
+
+# Invalid: type incompatibility
+msgid  "abc%e"
+msgstr "xyz%r"
+# Invalid: type incompatibility
+msgid  "abc%e"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%e"
+msgstr "xyz%(%s%s%)"
+
+# Invalid: type incompatibility
+msgid  "abc%r"
+msgstr "xyz%(%s%)"
+# Invalid: type incompatibility
+msgid  "abc%r"
+msgstr "xyz%(%s%s%)"
+
+# Invalid: type incompatibility
+msgid  "abc%(%s%)"
+msgstr "xyz%(%s%s%)"
+
+# Invalid: type incompatibility for width
+msgid  "abc%g%*g"
+msgstr "xyz%*g%g"
+
+# Invalid: type incompatibility for precision
+msgid  "abc%g%.*g"
+msgstr "xyz%.*g%g"
+
+# Invalid: type incompatibility for separator digits
+msgid  "abc%g%,*g"
+msgstr "xyz%,*g%g"
+
+# Invalid: type incompatibility for separator character
+msgid  "abc%,*g%,*?g"
+msgstr "xyz%,*?g%,*g"
+EOF
+
+: ${MSGFMT=msgfmt}
+n=0
+while read comment; do
+  if test -z "$comment"; then
+    read comment
+  fi
+  read msgid_line
+  read msgstr_line
+  n=`expr $n + 1`
+  cat <<EOF > f-d-2-$n.po
+#, d-format
+${msgid_line}
+${msgstr_line}
+EOF
+  fail=
+  if echo "$comment" | grep 'Valid:' > /dev/null; then
+    if ${MSGFMT} --check-format -o f-d-2-$n.mo f-d-2-$n.po; then
+      :
+    else
+      fail=yes
+    fi
+  else
+    ${MSGFMT} --check-format -o f-d-2-$n.mo f-d-2-$n.po 2> /dev/null
+    if test $? = 1; then
+      :
+    else
+      fail=yes
+    fi
+  fi
+  if test -n "$fail"; then
+    echo "Format string checking error:" 1>&2
+    cat f-d-2-$n.po 1>&2
+    Exit 1
+  fi
+  rm -f f-d-2-$n.po f-d-2-$n.mo
+done < f-d-2.data
+
+Exit 0
diff --git a/gettext-tools/tests/testdata/dprog.utf-16be.d b/gettext-tools/tests/testdata/dprog.utf-16be.d

new file mode 100644 (file)

index 0000000..dc6471e

Binary files /dev/null and b/gettext-tools/tests/testdata/dprog.utf-16be.d differ
diff --git a/gettext-tools/tests/testdata/dprog.utf-16le.d b/gettext-tools/tests/testdata/dprog.utf-16le.d

new file mode 100644 (file)

index 0000000..9fd2b08

Binary files /dev/null and b/gettext-tools/tests/testdata/dprog.utf-16le.d differ
diff --git a/gettext-tools/tests/testdata/dprog.utf-32be.d b/gettext-tools/tests/testdata/dprog.utf-32be.d

new file mode 100644 (file)

index 0000000..4a20084

Binary files /dev/null and b/gettext-tools/tests/testdata/dprog.utf-32be.d differ
diff --git a/gettext-tools/tests/testdata/dprog.utf-32le.d b/gettext-tools/tests/testdata/dprog.utf-32le.d

new file mode 100644 (file)

index 0000000..792f63b

Binary files /dev/null and b/gettext-tools/tests/testdata/dprog.utf-32le.d differ
diff --git a/gettext-tools/tests/testdata/dprog.utf-8+bom.d b/gettext-tools/tests/testdata/dprog.utf-8+bom.d

new file mode 100644 (file)

index 0000000..a65ba65
--- /dev/null
+++ b/gettext-tools/tests/testdata/dprog.utf-8+bom.d
@@ -0,0 +1,8 @@
+import std.stdio;
+
+void main ()
+{
+  writeln ("Hello world!");
+  writeln ("It costs €20.");
+  writeln ("Stay healthy 😷.");
+}
diff --git a/gettext-tools/tests/testdata/dprog.utf-8.d b/gettext-tools/tests/testdata/dprog.utf-8.d

new file mode 100644 (file)

index 0000000..de7dd69
--- /dev/null
+++ b/gettext-tools/tests/testdata/dprog.utf-8.d
@@ -0,0 +1,8 @@
+import std.stdio;
+
+void main ()
+{
+  writeln ("Hello world!");
+  writeln ("It costs €20.");
+  writeln ("Stay healthy 😷.");
+}
diff --git a/gettext-tools/tests/xgettext-d-1 b/gettext-tools/tests/xgettext-d-1

new file mode 100755 (executable)

index 0000000..c4c768e
--- /dev/null
+++ b/gettext-tools/tests/xgettext-d-1
@@ -0,0 +1,67 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test D support: Source file encodings
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --no-location -a -d xg-d-1a.tmp "$wabs_srcdir"/testdata/dprog.utf-8.d || Exit 1
+func_filter_POT_Creation_Date xg-d-1a.tmp.po xg-d-1a.pot
+
+cat <<\EOF > xg-d-1.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "Hello world!"
+msgstr ""
+
+msgid "It costs €20."
+msgstr ""
+
+msgid "Stay healthy 😷."
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-d-1.ok xg-d-1a.pot || Exit 1
+
+${XGETTEXT} --no-location -a -d xg-d-1b.tmp "$wabs_srcdir"/testdata/dprog.utf-8+bom.d || Exit 1
+func_filter_POT_Creation_Date xg-d-1b.tmp.po xg-d-1b.pot
+
+${DIFF} xg-d-1.ok xg-d-1b.pot || Exit 1
+
+${XGETTEXT} --no-location -a -d xg-d-1c.tmp "$wabs_srcdir"/testdata/dprog.utf-16le.d || Exit 1
+func_filter_POT_Creation_Date xg-d-1c.tmp.po xg-d-1c.pot
+
+${DIFF} xg-d-1.ok xg-d-1c.pot || Exit 1
+
+${XGETTEXT} --no-location -a -d xg-d-1d.tmp "$wabs_srcdir"/testdata/dprog.utf-16be.d || Exit 1
+func_filter_POT_Creation_Date xg-d-1d.tmp.po xg-d-1d.pot
+
+${DIFF} xg-d-1.ok xg-d-1d.pot || Exit 1
+
+${XGETTEXT} --no-location -a -d xg-d-1e.tmp "$wabs_srcdir"/testdata/dprog.utf-32le.d || Exit 1
+func_filter_POT_Creation_Date xg-d-1e.tmp.po xg-d-1e.pot
+
+${DIFF} xg-d-1.ok xg-d-1e.pot || Exit 1
+
+${XGETTEXT} --no-location -a -d xg-d-1f.tmp "$wabs_srcdir"/testdata/dprog.utf-32be.d || Exit 1
+func_filter_POT_Creation_Date xg-d-1f.tmp.po xg-d-1f.pot
+
+${DIFF} xg-d-1.ok xg-d-1f.pot || Exit 1
+
+exit 0
diff --git a/gettext-tools/tests/xgettext-d-2 b/gettext-tools/tests/xgettext-d-2

new file mode 100755 (executable)

index 0000000..3b09bf9
--- /dev/null
+++ b/gettext-tools/tests/xgettext-d-2
@@ -0,0 +1,173 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test D support: Simple things.
+
+cat <<\EOF > xg-d-2.d
+void main ()
+{
+  // A C++ syntax one-line comment.
+  string test1 = gettext("Test String 1");
+  /* A C syntax one-line comment. */
+  string test2 = gettext("Test String 2");
+  // A C++ syntax multi-line
+  // comment.
+  string test3 = gettext("Test String 3");
+  /* A C syntax multi-line
+     comment. */
+  string test4 = gettext("Test String 4");
+  /+ A D syntax /* nesting */ /+ one-line +/ comment. +/
+  string test5 = gettext("Test String 5");
+  /+ A D syntax /*
+     nesting
+     */
+     /+
+     multi-line +/
+     comment. +/
+  string test6 = gettext("Test String 6");
+  // D has string literal concatenation.
+  string test7 = gettext("Test " ~
+  "String "
+  ~ "7");
+  // Empty string.
+  string test8 = gettext("");
+
+  // printf expects a C format string.
+  printf(gettext("depth %f"), 3.4);
+  // format expects a D format string.
+  format(gettext("weight %f"), 5.6);
+  gettext("height %f").format(5.6);
+  // This can be used as a C or D format string.
+  string test9 = gettext("length %f");
+  // This can be used as a C or D format string, but is not a D format string.
+  string test10 = gettext("%lu");
+  // This can be used as a C or D format string, but is not a C format string.
+  string test11 = gettext("%(%s%)");
+
+  // Plain call syntax.
+  writeln(gettext("Test string 12"));
+  // Method-like call syntax.
+  "Test string 13"."/*A*/./*B*/gettext/*C*/(/*D*/)/*E*/./*F*/to!string.writeln;
+  "Test string 14".gettext().to!string.writeln;
+  "Test string 15".gettext.to!string.writeln;
+
+  // Plain call syntax for plural.
+  writeln(ngettext("%.0sa piece of cake", "%s pieces of cake", n).format(text(s)));
+  // Method-like call syntax for plural.
+  writeln("%.0sa piece of wood".ngettext("%s pieces of wood", n).format(text(s)));
+}
+
+@safe pure unittest
+{
+    gettext("Not extracted");
+}
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -c -d xg-d-2.tmp xg-d-2.d || Exit 1
+LC_ALL=C tr -d '\r' < xg-d-2.tmp.po > xg-d-2.po || Exit 1
+
+cat <<\EOF > xg-d-2.ok
+#. A C++ syntax one-line comment.
+msgid "Test String 1"
+msgstr ""
+
+#. A C syntax one-line comment.
+msgid "Test String 2"
+msgstr ""
+
+#. A C++ syntax multi-line
+#. comment.
+msgid "Test String 3"
+msgstr ""
+
+#. A C syntax multi-line
+#. comment.
+msgid "Test String 4"
+msgstr ""
+
+#. A D syntax /* nesting */ /+ one-line +/ comment.
+msgid "Test String 5"
+msgstr ""
+
+#. A D syntax /*
+#. nesting
+#. */
+#. /+
+#. multi-line +/
+#. comment.
+msgid "Test String 6"
+msgstr ""
+
+#. D has string literal concatenation.
+msgid "Test String 7"
+msgstr ""
+
+#. Empty string.
+msgid ""
+msgstr ""
+
+#. printf expects a C format string.
+#, c-format
+msgid "depth %f"
+msgstr ""
+
+#. format expects a D format string.
+#, d-format
+msgid "weight %f"
+msgstr ""
+
+#, d-format
+msgid "height %f"
+msgstr ""
+
+#. This can be used as a C or D format string.
+#, c-format, d-format
+msgid "length %f"
+msgstr ""
+
+#. This can be used as a C or D format string, but is not a D format string.
+#, c-format
+msgid "%lu"
+msgstr ""
+
+#. This can be used as a C or D format string, but is not a C format string.
+#, d-format
+msgid "%(%s%)"
+msgstr ""
+
+#. Plain call syntax.
+msgid "Test string 12"
+msgstr ""
+
+#. Method-like call syntax.
+msgid "Test string 13"
+msgstr ""
+
+#. E
+#. F
+msgid "Test string 14"
+msgstr ""
+
+msgid "Test string 15"
+msgstr ""
+
+#. Plain call syntax for plural.
+#, d-format
+msgid "%.0sa piece of cake"
+msgid_plural "%s pieces of cake"
+msgstr[0] ""
+msgstr[1] ""
+
+#. Method-like call syntax for plural.
+#, d-format
+msgid "%.0sa piece of wood"
+msgid_plural "%s pieces of wood"
+msgstr[0] ""
+msgstr[1] ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-d-2.ok xg-d-2.po || Exit 1
+
+exit 0
diff --git a/gettext-tools/tests/xgettext-d-3 b/gettext-tools/tests/xgettext-d-3

new file mode 100755 (executable)

index 0000000..6d1e3fc
--- /dev/null
+++ b/gettext-tools/tests/xgettext-d-3
@@ -0,0 +1,115 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test D support: String literals.
+
+cat <<\EOF > xg-d-3.d
+import std.stdio;
+import std.conv : text;
+
+string a = r"a: abc\tdef";
+string b = `b: abc\tdef`;
+string c1 = "c1: abc\tdef\u20acghi\U0001f603jklmno\&euro;pqr";
+// Unicode surrogate codepoints are invalid. xgettext eliminates them.
+string c2 = "c2: abc\tdef\u20acghi\U0001f603jkl\ud83d\ude03mno\&euro;pqr"c;
+string d1 = q"foo
+d1: abc
+def
+foo";
+string d2 = q"(d2: abc)";
+string d3 = q"[d3: abc]";
+string d4 = q"{d4: abc}";
+string d5 = q"<d5: abc>";
+string e = q{e: g >= 0 };
+string f = x"66 E282AC"; // Note: Hex strings are broken in gdc version 12 and 13.
+// Not string literals.
+string g = i`abc\tdef`.text;
+string h = i"abc\tdef".text;
+string i = iq{g >= 0 }.text;
+
+void main ()
+{
+  writeln ("abc" ~ "Böse" ~ "20€");
+  writeln (a);
+  writeln (b);
+  writeln (c1);
+  writeln (c2);
+  writeln (d1);
+  writeln (d2);
+  writeln (d3);
+  writeln (d4);
+  writeln (d5);
+  writeln (e);
+  writeln (f);
+  writeln (g);
+  writeln (h);
+  writeln (i);
+}
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --no-location -a -d xg-d-3.tmp xg-d-3.d || Exit 1
+func_filter_POT_Creation_Date xg-d-3.tmp.po xg-d-3.pot
+
+cat <<\EOF > xg-d-3.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "a: abc\\tdef"
+msgstr ""
+
+msgid "b: abc\\tdef"
+msgstr ""
+
+msgid "c1: abc\tdef€ghi😃jklmno€pqr"
+msgstr ""
+
+msgid "c2: abc\tdef€ghi😃jklmno€pqr"
+msgstr ""
+
+msgid ""
+"d1: abc\n"
+"def\n"
+msgstr ""
+
+msgid "d2: abc"
+msgstr ""
+
+msgid "d3: abc"
+msgstr ""
+
+msgid "d4: abc"
+msgstr ""
+
+msgid "d5: abc"
+msgstr ""
+
+msgid "e: g >= 0 "
+msgstr ""
+
+msgid "f€"
+msgstr ""
+
+msgid "abcBöse20€"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-d-3.ok xg-d-3.pot || Exit 1
+
+exit 0
diff --git a/gettext-tools/tests/xgettext-d-4 b/gettext-tools/tests/xgettext-d-4

new file mode 100755 (executable)

index 0000000..ee3079d
--- /dev/null
+++ b/gettext-tools/tests/xgettext-d-4
@@ -0,0 +1,40 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test D support: function call like syntax in templates.
+
+cat <<\EOF > xg-d-4.d
+void main ()
+{
+  tr!"Test string 1";
+  tr!("Test string 2");
+  tr !("Test string 3");
+  tr!("%.0sa piece of cake", "%s pieces of cake");
+}
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -k'tr!' -k'tr!:1,2' -d xg-d-4.tmp xg-d-4.d || Exit 1
+LC_ALL=C tr -d '\r' < xg-d-4.tmp.po > xg-d-4.po || Exit 1
+
+cat <<\EOF > xg-d-4.ok
+msgid "Test string 1"
+msgstr ""
+
+msgid "Test string 2"
+msgstr ""
+
+msgid "Test string 3"
+msgstr ""
+
+#, c-format, d-format
+msgid "%.0sa piece of cake"
+msgid_plural "%s pieces of cake"
+msgstr[0] ""
+msgstr[1] ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-d-4.ok xg-d-4.po || Exit 1
+
+exit 0
diff --git a/gettext-tools/tests/xgettext-d-5 b/gettext-tools/tests/xgettext-d-5

new file mode 100755 (executable)

index 0000000..80fe531
--- /dev/null
+++ b/gettext-tools/tests/xgettext-d-5
@@ -0,0 +1,32 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test D support: extraction of contexts.
+
+cat <<\EOF > xg-d-5.d
+void main ()
+{
+  writeln(gettext("help"));
+  writeln(pgettext("Help", "about"));
+}
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header -d xg-d-5.tmp xg-d-5.d || Exit 1
+LC_ALL=C tr -d '\r' < xg-d-5.tmp.po > xg-d-5.po || Exit 1
+
+cat <<\EOF > xg-d-5.ok
+#: xg-d-5.d:3
+msgid "help"
+msgstr ""
+
+#: xg-d-5.d:4
+msgctxt "Help"
+msgid "about"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-d-5.ok xg-d-5.po || Exit 1
+
+exit 0
diff --git a/gettext-tools/tests/xgettext-d-6 b/gettext-tools/tests/xgettext-d-6

new file mode 100755 (executable)

index 0000000..7552348
--- /dev/null
+++ b/gettext-tools/tests/xgettext-d-6
@@ -0,0 +1,139 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test D support: propagation of 'd-format'.
+
+cat <<\EOF > xg-d-6.d
+void main ()
+{
+  gettext ("Hello10");
+  (gettext ("Hello11"));
+  ((gettext ("Hello12")));
+  gettext (gettext ("Hello13"));
+  (gettext (gettext ("Hello14")));
+  ((gettext (gettext ("Hello15"))));
+  gettext ((gettext ("Hello16")));
+  gettext (((gettext ("Hello17"))));
+  gettext (foo(), gettext ("Hello18"));
+
+  format (gettext ("Hello20"));
+  format ((gettext ("Hello21")));
+  format (((gettext ("Hello22"))));
+  format (gettext (gettext ("Hello23")));
+  format ((gettext (gettext ("Hello24"))));
+  format (((gettext (gettext ("Hello25")))));
+  format (gettext ((gettext ("Hello26"))));
+  format (gettext (((gettext ("Hello27")))));
+  format (gettext (foo(), gettext ("Hello28")));
+
+  printf ("%s", gettext ("Hello30"));
+  printf ("%s", (gettext ("Hello31")));
+  printf ("%s", ((gettext ("Hello32"))));
+  printf ("%s", gettext (gettext ("Hello33")));
+  printf ("%s", (gettext (gettext ("Hello34"))));
+  printf ("%s", ((gettext (gettext ("Hello35")))));
+  printf ("%s", gettext ((gettext ("Hello36"))));
+  printf ("%s", gettext (((gettext ("Hello37")))));
+  printf ("%s", gettext (foo(), gettext ("Hello38")));
+}
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --omit-header --no-location -c -d xg-d-6.tmp xg-d-6.d || Exit 1
+LC_ALL=C tr -d '\r' < xg-d-6.tmp.po > xg-d-6.po || Exit 1
+
+cat <<\EOF > xg-d-6.ok
+msgid "Hello10"
+msgstr ""
+
+msgid "Hello11"
+msgstr ""
+
+msgid "Hello12"
+msgstr ""
+
+msgid "Hello13"
+msgstr ""
+
+msgid "Hello14"
+msgstr ""
+
+msgid "Hello15"
+msgstr ""
+
+msgid "Hello16"
+msgstr ""
+
+msgid "Hello17"
+msgstr ""
+
+msgid "Hello18"
+msgstr ""
+
+#, d-format
+msgid "Hello20"
+msgstr ""
+
+#, d-format
+msgid "Hello21"
+msgstr ""
+
+#, d-format
+msgid "Hello22"
+msgstr ""
+
+#, d-format
+msgid "Hello23"
+msgstr ""
+
+#, d-format
+msgid "Hello24"
+msgstr ""
+
+#, d-format
+msgid "Hello25"
+msgstr ""
+
+#, d-format
+msgid "Hello26"
+msgstr ""
+
+#, d-format
+msgid "Hello27"
+msgstr ""
+
+msgid "Hello28"
+msgstr ""
+
+msgid "Hello30"
+msgstr ""
+
+msgid "Hello31"
+msgstr ""
+
+msgid "Hello32"
+msgstr ""
+
+msgid "Hello33"
+msgstr ""
+
+msgid "Hello34"
+msgstr ""
+
+msgid "Hello35"
+msgstr ""
+
+msgid "Hello36"
+msgstr ""
+
+msgid "Hello37"
+msgstr ""
+
+msgid "Hello38"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-d-6.ok xg-d-6.po || Exit 1
+
+exit 0
diff --git a/gettext-tools/woe32dll/gettextsrc-exports.c b/gettext-tools/woe32dll/gettextsrc-exports.c

index 1c3db7560fc8e5dc4109e1d986b2377392fcc53e..319e164683287b2b3f688d198abd11a1511b6d6e 100644 (file)
--- a/gettext-tools/woe32dll/gettextsrc-exports.c
+++ b/gettext-tools/woe32dll/gettextsrc-exports.c
@@ -29,6 +29,7 @@ VARIABLE(formatstring_boost)
  VARIABLE(formatstring_c)
  VARIABLE(formatstring_cplusplus_brace)
  VARIABLE(formatstring_csharp)
+VARIABLE(formatstring_d)
  VARIABLE(formatstring_elisp)
  VARIABLE(formatstring_gcc_internal)
  VARIABLE(formatstring_gfc_internal)
author	Bruno Haible <bruno@clisp.org>
	Tue, 1 Apr 2025 09:51:08 +0000 (11:51 +0200)
committer	Bruno Haible <bruno@clisp.org>
	Tue, 1 Apr 2025 11:01:05 +0000 (13:01 +0200)
NEWS		patch \| blob \| blame \| history
autogen.sh		patch \| blob \| blame \| history
autopull.sh		patch \| blob \| blame \| history
gettext-tools/Makefile.am		patch \| blob \| blame \| history
gettext-tools/build-aux/tree-sitter-d-portability.diff	[new file with mode: 0644]	patch \| blob
gettext-tools/configure.ac		patch \| blob \| blame \| history
gettext-tools/doc/Makefile.am		patch \| blob \| blame \| history
gettext-tools/doc/gettext.texi		patch \| blob \| blame \| history
gettext-tools/doc/lang-d.texi	[new file with mode: 0644]	patch \| blob
gettext-tools/doc/xgettext.texi		patch \| blob \| blame \| history
gettext-tools/libgettextpo/Makefile.am		patch \| blob \| blame \| history
gettext-tools/po/POTFILES.in		patch \| blob \| blame \| history
gettext-tools/src/FILES		patch \| blob \| blame \| history
gettext-tools/src/Makefile.am		patch \| blob \| blame \| history
gettext-tools/src/format-d.c	[new file with mode: 0644]	patch \| blob
gettext-tools/src/format.c		patch \| blob \| blame \| history
gettext-tools/src/format.h		patch \| blob \| blame \| history
gettext-tools/src/html5-entities.h	[new file with mode: 0644]	patch \| blob
gettext-tools/src/message.c		patch \| blob \| blame \| history
gettext-tools/src/message.h		patch \| blob \| blame \| history
gettext-tools/src/x-d.c	[new file with mode: 0644]	patch \| blob
gettext-tools/src/x-d.h	[new file with mode: 0644]	patch \| blob
gettext-tools/src/xg-message.c		patch \| blob \| blame \| history
gettext-tools/src/xgettext.c		patch \| blob \| blame \| history
gettext-tools/tests/Makefile.am		patch \| blob \| blame \| history
gettext-tools/tests/format-d-1	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/format-d-2	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/testdata/dprog.utf-16be.d	[new file with mode: 0644]	patch \| blob
gettext-tools/tests/testdata/dprog.utf-16le.d	[new file with mode: 0644]	patch \| blob
gettext-tools/tests/testdata/dprog.utf-32be.d	[new file with mode: 0644]	patch \| blob
gettext-tools/tests/testdata/dprog.utf-32le.d	[new file with mode: 0644]	patch \| blob
gettext-tools/tests/testdata/dprog.utf-8+bom.d	[new file with mode: 0644]	patch \| blob
gettext-tools/tests/testdata/dprog.utf-8.d	[new file with mode: 0644]	patch \| blob
gettext-tools/tests/xgettext-d-1	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-d-2	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-d-3	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-d-4	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-d-5	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-d-6	[new file with mode: 0755]	patch \| blob
gettext-tools/woe32dll/gettextsrc-exports.c		patch \| blob \| blame \| history