--- /dev/null
+/* D format strings.
+ Copyright (C) 2001-2025 Free Software Foundation, Inc.
+ Written by Bruno Haible <bruno@clisp.org>, 2025.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#include "format.h"
+#include "attribute.h"
+#include "c-ctype.h"
+#include "gcd.h"
+#include "xalloc.h"
+#include "xvasprintf.h"
+#include "format-invalid.h"
+#include "minmax.h"
+#include "gettext.h"
+
+#define _(str) gettext (str)
+
+
+/* Assertion macro. Could be defined to empty for speed. */
+#define ASSERT(expr) if (!(expr)) abort ();
+
+
+/* D format strings are described in the description of the std.format module
+ <https://dlang.org/library/std/format.html> and implemented in
+ gcc-14.2.0/libphobos/src/std/format/spec.d
+ gcc-14.2.0/libphobos/src/std/format/write.d
+ gcc-14.2.0/libphobos/src/std/format/internal/write.d .
+
+ A format string consists of literal text (that is output verbatim), doubled
+ percent-signs ('%%', that lead to a single percent-sign when output), and
+ directives.
+ A directive
+ - starts with '%',
+ - is optionally followed by
+ a positive integer m, then '$', or
+ a positive integer m, then ':', then a positive integer m₂ ≥ m, then '$',
+ a positive integer m, then ':', then '$', or
+ - is optionally followed by a sequence of flags, each being one of
+ '+', '-', ' ', '0', '#', '=',
+ - is optionally followed by a width specification:
+ a positive integer, or
+ '*', or
+ '*', then a positive integer, then '$',
+ - is optionally followed by a precision specification:
+ '.' then optionally:
+ a positive integer, or
+ '*', or
+ '*', then a positive integer, then '$',
+ - is optionally followed by a separator specification:
+ ',' then optionally:
+ a positive integer, or
+ '*',
+ then optionally a '?',
+ - is followed by
+ either a format specifier
+ or a compound specifier:
+ - a '(',
+ - a format string that eats 1 or 2 arguments,
+ - optionally '%|' then literal text, possibly with doubled
+ percent-signs,
+ - '%)'.
+ */
+
+/* Data structure describing format string derived constraints for an
+ argument list. It is a recursive list structure. Structure sharing
+ is not allowed. */
+
+enum format_cdr_type
+{
+ FCT_REQUIRED, /* The format argument list cannot end before this argument. */
+ FCT_OPTIONAL /* The format argument list may end before this argument. */
+};
+
+enum format_arg_type
+{
+ FAT_NONE = 0,
+ FAT_BOOL = 1 << 0,
+ FAT_INTEGER = 1 << 1,
+ FAT_FLOATINGPOINT = 1 << 2,
+ FAT_CHAR = 1 << 3,
+ FAT_ARRAY = 1 << 4, /* string or array */
+ FAT_ASSOCIATIVE = 1 << 5,
+ FAT_IRANGE = 1 << 6, /* irange or simd */
+ FAT_STRUCT = 1 << 7, /* struct or class or union */
+ FAT_POINTER = 1 << 8, /* pointer or null */
+ /* Note: enum are not listed here, since enum values can be formatted with
+ any specifier available for their base type. */
+ FAT_ANY_TYPE = (FAT_BOOL | FAT_INTEGER | FAT_FLOATINGPOINT | FAT_CHAR
+ | FAT_ARRAY | FAT_ASSOCIATIVE | FAT_IRANGE | FAT_STRUCT
+ | FAT_POINTER),
+ /* A flag: */
+ FAT_ELEMENTWISE = 1 << 10,
+ /* Combination of allowed types and flag: */
+ FAT_ELEMENTWISE_1 = FAT_ELEMENTWISE | FAT_ARRAY | FAT_IRANGE,
+ FAT_ELEMENTWISE_2 = FAT_ELEMENTWISE | FAT_ASSOCIATIVE
+};
+
+struct format_arg
+{
+ unsigned int repcount; /* Number of consecutive arguments this constraint
+ applies to. Normally 1, but unconstrained
+ arguments are often repeated. */
+ enum format_cdr_type presence; /* Can the argument list end right before
+ this argument? */
+ enum format_arg_type type; /* Possible values for this argument. */
+ struct format_arg_list *list; /* For FAT_ELEMENTWISE. */
+};
+
+struct segment
+{
+ unsigned int count; /* Number of format_arg records used. */
+ unsigned int allocated;
+ struct format_arg *element; /* Argument constraints. */
+ unsigned int length; /* Number of arguments represented by this segment.
+ This is the sum of all repcounts in the segment. */
+};
+
+struct format_arg_list
+{
+ /* The constraints for the potentially infinite argument list are assumed
+ to become ultimately periodic. Such a periodic sequence can be split into
+ an initial segment and an endlessly repeated loop segment.
+ A finite sequence is represented entirely in the initial segment; the
+ loop segment is empty.
+ In this file, the loop segment is always either empty or has length 1.
+ But it is not worth exploiting this property: The code is more future-proof
+ in the general form, shared with format-lisp.c and format-scheme.c. */
+
+ struct segment initial; /* Initial arguments segment. */
+ struct segment repeated; /* Endlessly repeated segment. */
+};
+
+struct spec
+{
+ unsigned int directives;
+ struct format_arg_list *list;
+};
+
+
+/* Forward declaration of local functions. */
+static void verify_list (const struct format_arg_list *list);
+static void free_list (struct format_arg_list *list);
+static struct format_arg_list * copy_list (const struct format_arg_list *list);
+static bool equal_list (const struct format_arg_list *list1,
+ const struct format_arg_list *list2);
+static struct format_arg_list * make_intersected_list
+ (struct format_arg_list *list1,
+ struct format_arg_list *list2);
+
+
+/* ======================= Verify a format_arg_list ======================= */
+
+/* Verify some invariants. */
+static void
+verify_element (const struct format_arg * e)
+{
+ ASSERT (e->repcount > 0);
+ if (e->type & FAT_ELEMENTWISE)
+ verify_list (e->list);
+}
+
+/* Verify some invariants. */
+/* Memory effects: none. */
+static void
+verify_list (const struct format_arg_list *list)
+{
+ unsigned int i;
+ unsigned int total_repcount;
+
+ ASSERT (list->initial.count <= list->initial.allocated);
+ total_repcount = 0;
+ for (i = 0; i < list->initial.count; i++)
+ {
+ verify_element (&list->initial.element[i]);
+ total_repcount += list->initial.element[i].repcount;
+ }
+ ASSERT (total_repcount == list->initial.length);
+
+ ASSERT (list->repeated.count <= list->repeated.allocated);
+ total_repcount = 0;
+ for (i = 0; i < list->repeated.count; i++)
+ {
+ verify_element (&list->repeated.element[i]);
+ total_repcount += list->repeated.element[i].repcount;
+ }
+ ASSERT (total_repcount == list->repeated.length);
+}
+
+/* Assertion macro. Could be defined to empty for speed. */
+#define VERIFY_LIST(list) verify_list (list)
+
+
+/* ======================== Free a format_arg_list ======================== */
+
+/* Free the data belonging to an argument list element. */
+static inline void
+free_element (struct format_arg *element)
+{
+ if (element->type & FAT_ELEMENTWISE)
+ free_list (element->list);
+}
+
+/* Free an argument list. */
+/* Memory effects: Frees list. */
+static void
+free_list (struct format_arg_list *list)
+{
+ unsigned int i;
+
+ for (i = 0; i < list->initial.count; i++)
+ free_element (&list->initial.element[i]);
+ if (list->initial.element != NULL)
+ free (list->initial.element);
+
+ for (i = 0; i < list->repeated.count; i++)
+ free_element (&list->repeated.element[i]);
+ if (list->repeated.element != NULL)
+ free (list->repeated.element);
+}
+
+
+/* ======================== Copy a format_arg_list ======================== */
+
+/* Copy the data belonging to an argument list element. */
+static inline void
+copy_element (struct format_arg *newelement,
+ const struct format_arg *oldelement)
+{
+ newelement->repcount = oldelement->repcount;
+ newelement->presence = oldelement->presence;
+ newelement->type = oldelement->type;
+ if (oldelement->type & FAT_ELEMENTWISE)
+ newelement->list = copy_list (oldelement->list);
+}
+
+/* Copy an argument list. */
+/* Memory effects: Freshly allocated result. */
+static struct format_arg_list *
+copy_list (const struct format_arg_list *list)
+{
+ struct format_arg_list *newlist;
+ unsigned int length;
+ unsigned int i;
+
+ VERIFY_LIST (list);
+
+ newlist = XMALLOC (struct format_arg_list);
+
+ newlist->initial.count = newlist->initial.allocated = list->initial.count;
+ length = 0;
+ if (list->initial.count == 0)
+ newlist->initial.element = NULL;
+ else
+ {
+ newlist->initial.element =
+ XNMALLOC (newlist->initial.allocated, struct format_arg);
+ for (i = 0; i < list->initial.count; i++)
+ {
+ copy_element (&newlist->initial.element[i],
+ &list->initial.element[i]);
+ length += list->initial.element[i].repcount;
+ }
+ }
+ ASSERT (length == list->initial.length);
+ newlist->initial.length = length;
+
+ newlist->repeated.count = newlist->repeated.allocated = list->repeated.count;
+ length = 0;
+ if (list->repeated.count == 0)
+ newlist->repeated.element = NULL;
+ else
+ {
+ newlist->repeated.element =
+ XNMALLOC (newlist->repeated.allocated, struct format_arg);
+ for (i = 0; i < list->repeated.count; i++)
+ {
+ copy_element (&newlist->repeated.element[i],
+ &list->repeated.element[i]);
+ length += list->repeated.element[i].repcount;
+ }
+ }
+ ASSERT (length == list->repeated.length);
+ newlist->repeated.length = length;
+
+ VERIFY_LIST (newlist);
+
+ return newlist;
+}
+
+
+/* ===================== Compare two format_arg_lists ===================== */
+
+/* Tests whether two normalized argument constraints are equivalent,
+ ignoring the repcount. */
+static bool
+equal_element (const struct format_arg * e1, const struct format_arg * e2)
+{
+ return (e1->presence == e2->presence
+ && e1->type == e2->type
+ && (e1->type & FAT_ELEMENTWISE ? equal_list (e1->list, e2->list) :
+ true));
+}
+
+/* Tests whether two normalized argument list constraints are equivalent. */
+/* Memory effects: none. */
+static bool
+equal_list (const struct format_arg_list *list1,
+ const struct format_arg_list *list2)
+{
+ unsigned int n, i;
+
+ VERIFY_LIST (list1);
+ VERIFY_LIST (list2);
+
+ n = list1->initial.count;
+ if (n != list2->initial.count)
+ return false;
+ for (i = 0; i < n; i++)
+ {
+ const struct format_arg * e1 = &list1->initial.element[i];
+ const struct format_arg * e2 = &list2->initial.element[i];
+
+ if (!(e1->repcount == e2->repcount && equal_element (e1, e2)))
+ return false;
+ }
+
+ n = list1->repeated.count;
+ if (n != list2->repeated.count)
+ return false;
+ for (i = 0; i < n; i++)
+ {
+ const struct format_arg * e1 = &list1->repeated.element[i];
+ const struct format_arg * e2 = &list2->repeated.element[i];
+
+ if (!(e1->repcount == e2->repcount && equal_element (e1, e2)))
+ return false;
+ }
+
+ return true;
+}
+
+
+/* ===================== Incremental memory allocation ===================== */
+
+/* Ensure list->initial.allocated >= newcount. */
+static inline void
+ensure_initial_alloc (struct format_arg_list *list, unsigned int newcount)
+{
+ if (newcount > list->initial.allocated)
+ {
+ list->initial.allocated =
+ MAX (2 * list->initial.allocated + 1, newcount);
+ list->initial.element =
+ (struct format_arg *)
+ xrealloc (list->initial.element,
+ list->initial.allocated * sizeof (struct format_arg));
+ }
+}
+
+/* Ensure list->initial.allocated > list->initial.count. */
+static inline void
+grow_initial_alloc (struct format_arg_list *list)
+{
+ if (list->initial.count >= list->initial.allocated)
+ {
+ list->initial.allocated =
+ MAX (2 * list->initial.allocated + 1, list->initial.count + 1);
+ list->initial.element =
+ (struct format_arg *)
+ xrealloc (list->initial.element,
+ list->initial.allocated * sizeof (struct format_arg));
+ }
+}
+
+/* Ensure list->repeated.allocated >= newcount. */
+static inline void
+ensure_repeated_alloc (struct format_arg_list *list, unsigned int newcount)
+{
+ if (newcount > list->repeated.allocated)
+ {
+ list->repeated.allocated =
+ MAX (2 * list->repeated.allocated + 1, newcount);
+ list->repeated.element =
+ (struct format_arg *)
+ xrealloc (list->repeated.element,
+ list->repeated.allocated * sizeof (struct format_arg));
+ }
+}
+
+/* Ensure list->repeated.allocated > list->repeated.count. */
+static inline void
+grow_repeated_alloc (struct format_arg_list *list)
+{
+ if (list->repeated.count >= list->repeated.allocated)
+ {
+ list->repeated.allocated =
+ MAX (2 * list->repeated.allocated + 1, list->repeated.count + 1);
+ list->repeated.element =
+ (struct format_arg *)
+ xrealloc (list->repeated.element,
+ list->repeated.allocated * sizeof (struct format_arg));
+ }
+}
+
+
+/* ====================== Normalize a format_arg_list ====================== */
+
+/* Normalize an argument list constraint, assuming all sublists are already
+ normalized. */
+/* Memory effects: Destructively modifies list. */
+static void
+normalize_outermost_list (struct format_arg_list *list)
+{
+ unsigned int n, i, j;
+
+ /* Step 1: Combine adjacent elements.
+ Copy from i to j, keeping 0 <= j <= i. */
+
+ n = list->initial.count;
+ for (i = j = 0; i < n; i++)
+ if (j > 0
+ && equal_element (&list->initial.element[i],
+ &list->initial.element[j-1]))
+ {
+ list->initial.element[j-1].repcount +=
+ list->initial.element[i].repcount;
+ free_element (&list->initial.element[i]);
+ }
+ else
+ {
+ if (j < i)
+ list->initial.element[j] = list->initial.element[i];
+ j++;
+ }
+ list->initial.count = j;
+
+ n = list->repeated.count;
+ for (i = j = 0; i < n; i++)
+ if (j > 0
+ && equal_element (&list->repeated.element[i],
+ &list->repeated.element[j-1]))
+ {
+ list->repeated.element[j-1].repcount +=
+ list->repeated.element[i].repcount;
+ free_element (&list->repeated.element[i]);
+ }
+ else
+ {
+ if (j < i)
+ list->repeated.element[j] = list->repeated.element[i];
+ j++;
+ }
+ list->repeated.count = j;
+
+ /* Nothing more to be done if the loop segment is empty. */
+ if (list->repeated.count > 0)
+ {
+ unsigned int m, repcount0_extra;
+
+ /* Step 2: Reduce the loop period. */
+ n = list->repeated.count;
+ repcount0_extra = 0;
+ if (n > 1
+ && equal_element (&list->repeated.element[0],
+ &list->repeated.element[n-1]))
+ {
+ repcount0_extra = list->repeated.element[n-1].repcount;
+ n--;
+ }
+ /* Proceed as if the loop period were n, with
+ list->repeated.element[0].repcount incremented by repcount0_extra. */
+ for (m = 2; m <= n / 2; m++)
+ if ((n % m) == 0)
+ {
+ /* m is a divisor of n. Try to reduce the loop period to n. */
+ bool ok = true;
+
+ for (i = 0; i < n - m; i++)
+ if (!((list->repeated.element[i].repcount
+ + (i == 0 ? repcount0_extra : 0)
+ == list->repeated.element[i+m].repcount)
+ && equal_element (&list->repeated.element[i],
+ &list->repeated.element[i+m])))
+ {
+ ok = false;
+ break;
+ }
+ if (ok)
+ {
+ for (i = m; i < n; i++)
+ free_element (&list->repeated.element[i]);
+ if (n < list->repeated.count)
+ list->repeated.element[m] = list->repeated.element[n];
+ list->repeated.count = list->repeated.count - n + m;
+ list->repeated.length /= n / m;
+ break;
+ }
+ }
+ if (list->repeated.count == 1)
+ {
+ /* The loop has period 1. Normalize the repcount. */
+ list->repeated.element[0].repcount = 1;
+ list->repeated.length = 1;
+ }
+
+ /* Step 3: Roll as much as possible of the initial segment's tail
+ into the loop. */
+ if (list->repeated.count == 1)
+ {
+ if (list->initial.count > 0
+ && equal_element (&list->initial.element[list->initial.count-1],
+ &list->repeated.element[0]))
+ {
+ /* Roll the last element of the initial segment into the loop.
+ Its repcount is irrelevant. The second-to-last element is
+ certainly different and doesn't need to be considered. */
+ list->initial.length -=
+ list->initial.element[list->initial.count-1].repcount;
+ free_element (&list->initial.element[list->initial.count-1]);
+ list->initial.count--;
+ }
+ }
+ else
+ {
+ while (list->initial.count > 0
+ && equal_element (&list->initial.element[list->initial.count-1],
+ &list->repeated.element[list->repeated.count-1]))
+ {
+ unsigned int moved_repcount =
+ MIN (list->initial.element[list->initial.count-1].repcount,
+ list->repeated.element[list->repeated.count-1].repcount);
+
+ /* Add the element at the start of list->repeated. */
+ if (equal_element (&list->repeated.element[0],
+ &list->repeated.element[list->repeated.count-1]))
+ list->repeated.element[0].repcount += moved_repcount;
+ else
+ {
+ unsigned int newcount = list->repeated.count + 1;
+ ensure_repeated_alloc (list, newcount);
+ for (i = newcount - 1; i > 0; i--)
+ list->repeated.element[i] = list->repeated.element[i-1];
+ list->repeated.count = newcount;
+ copy_element (&list->repeated.element[0],
+ &list->repeated.element[list->repeated.count-1]);
+ list->repeated.element[0].repcount = moved_repcount;
+ }
+
+ /* Remove the element from the end of list->repeated. */
+ list->repeated.element[list->repeated.count-1].repcount -=
+ moved_repcount;
+ if (list->repeated.element[list->repeated.count-1].repcount == 0)
+ {
+ free_element (&list->repeated.element[list->repeated.count-1]);
+ list->repeated.count--;
+ }
+
+ /* Remove the element from the end of list->initial. */
+ list->initial.element[list->initial.count-1].repcount -=
+ moved_repcount;
+ if (list->initial.element[list->initial.count-1].repcount == 0)
+ {
+ free_element (&list->initial.element[list->initial.count-1]);
+ list->initial.count--;
+ }
+ list->initial.length -= moved_repcount;
+ }
+ }
+ }
+}
+
+/* Normalize an argument list constraint. */
+/* Memory effects: Destructively modifies list. */
+static void
+normalize_list (struct format_arg_list *list)
+{
+ unsigned int n, i;
+
+ VERIFY_LIST (list);
+
+ /* First normalize all elements, recursively. */
+ n = list->initial.count;
+ for (i = 0; i < n; i++)
+ if (list->initial.element[i].type & FAT_ELEMENTWISE)
+ normalize_list (list->initial.element[i].list);
+ n = list->repeated.count;
+ for (i = 0; i < n; i++)
+ if (list->repeated.element[i].type & FAT_ELEMENTWISE)
+ normalize_list (list->repeated.element[i].list);
+
+ /* Then normalize the top level list. */
+ normalize_outermost_list (list);
+
+ VERIFY_LIST (list);
+}
+
+
+/* ===================== Unconstrained and empty lists ===================== */
+
+/* It's easier to allocate these on demand, than to be careful not to
+ accidentally modify statically allocated lists. */
+
+
+/* Create an unconstrained argument list. */
+/* Memory effects: Freshly allocated result. */
+static struct format_arg_list *
+make_unconstrained_list ()
+{
+ struct format_arg_list *list;
+
+ list = XMALLOC (struct format_arg_list);
+ list->initial.count = 0;
+ list->initial.allocated = 0;
+ list->initial.element = NULL;
+ list->initial.length = 0;
+ list->repeated.count = 1;
+ list->repeated.allocated = 1;
+ list->repeated.element = XNMALLOC (1, struct format_arg);
+ list->repeated.element[0].repcount = 1;
+ list->repeated.element[0].presence = FCT_OPTIONAL;
+ list->repeated.element[0].type = FAT_ANY_TYPE;
+ list->repeated.length = 1;
+
+ VERIFY_LIST (list);
+
+ return list;
+}
+
+
+/* Create an empty argument list. */
+/* Memory effects: Freshly allocated result. */
+static struct format_arg_list *
+make_empty_list ()
+{
+ struct format_arg_list *list;
+
+ list = XMALLOC (struct format_arg_list);
+ list->initial.count = 0;
+ list->initial.allocated = 0;
+ list->initial.element = NULL;
+ list->initial.length = 0;
+ list->repeated.count = 0;
+ list->repeated.allocated = 0;
+ list->repeated.element = NULL;
+ list->repeated.length = 0;
+
+ VERIFY_LIST (list);
+
+ return list;
+}
+
+
+/* Test for an empty list. */
+/* Memory effects: none. */
+MAYBE_UNUSED static bool
+is_empty_list (const struct format_arg_list *list)
+{
+ return (list->initial.count == 0 && list->repeated.count == 0);
+}
+
+
+/* ======================== format_arg_list surgery ======================== */
+
+/* Unfold list->repeated m times, where m >= 1.
+ Assumes list->repeated.count > 0. */
+/* Memory effects: list is destructively modified. */
+static void
+unfold_loop (struct format_arg_list *list, unsigned int m)
+{
+ unsigned int i, j, k;
+
+ if (m > 1)
+ {
+ unsigned int newcount = list->repeated.count * m;
+ ensure_repeated_alloc (list, newcount);
+ i = list->repeated.count;
+ for (k = 1; k < m; k++)
+ for (j = 0; j < list->repeated.count; j++, i++)
+ copy_element (&list->repeated.element[i], &list->repeated.element[j]);
+ list->repeated.count = newcount;
+ list->repeated.length = list->repeated.length * m;
+ }
+}
+
+/* Ensure list->initial.length := m, where m >= list->initial.length.
+ Assumes list->repeated.count > 0. */
+/* Memory effects: list is destructively modified. */
+static void
+rotate_loop (struct format_arg_list *list, unsigned int m)
+{
+ if (m == list->initial.length)
+ return;
+
+ if (list->repeated.count == 1)
+ {
+ /* Instead of multiple copies of list->repeated.element[0], a single
+ copy with higher repcount is appended to list->initial. */
+ unsigned int i, newcount;
+
+ newcount = list->initial.count + 1;
+ ensure_initial_alloc (list, newcount);
+ i = list->initial.count;
+ copy_element (&list->initial.element[i], &list->repeated.element[0]);
+ list->initial.element[i].repcount = m - list->initial.length;
+ list->initial.count = newcount;
+ list->initial.length = m;
+ }
+ else
+ {
+ unsigned int n = list->repeated.length;
+
+ /* Write m = list->initial.length + q * n + r with 0 <= r < n. */
+ unsigned int q = (m - list->initial.length) / n;
+ unsigned int r = (m - list->initial.length) % n;
+
+ /* Determine how many entries of list->repeated are needed for
+ length r. */
+ unsigned int s;
+ unsigned int t;
+
+ for (t = r, s = 0;
+ s < list->repeated.count && t >= list->repeated.element[s].repcount;
+ t -= list->repeated.element[s].repcount, s++)
+ ;
+
+ /* s must be < list->repeated.count, otherwise r would have been >= n. */
+ ASSERT (s < list->repeated.count);
+
+ /* So we need to add to list->initial:
+ q full copies of list->repeated,
+ plus the s first elements of list->repeated,
+ plus, if t > 0, a splitoff of list->repeated.element[s]. */
+ {
+ unsigned int i, j, k, newcount;
+
+ i = list->initial.count;
+ newcount = i + q * list->repeated.count + s + (t > 0 ? 1 : 0);
+ ensure_initial_alloc (list, newcount);
+ for (k = 0; k < q; k++)
+ for (j = 0; j < list->repeated.count; j++, i++)
+ copy_element (&list->initial.element[i],
+ &list->repeated.element[j]);
+ for (j = 0; j < s; j++, i++)
+ copy_element (&list->initial.element[i], &list->repeated.element[j]);
+ if (t > 0)
+ {
+ copy_element (&list->initial.element[i],
+ &list->repeated.element[j]);
+ list->initial.element[i].repcount = t;
+ i++;
+ }
+ ASSERT (i == newcount);
+ list->initial.count = newcount;
+ /* The new length of the initial segment is
+ = list->initial.length
+ + q * list->repeated.length
+ + list->repeated[0..s-1].repcount + t
+ = list->initial.length + q * n + r
+ = m.
+ */
+ list->initial.length = m;
+ }
+
+ /* And rotate list->repeated. */
+ if (r > 0)
+ {
+ unsigned int i, j, oldcount, newcount;
+ struct format_arg *newelement;
+
+ oldcount = list->repeated.count;
+ newcount = list->repeated.count + (t > 0 ? 1 : 0);
+ newelement = XNMALLOC (newcount, struct format_arg);
+ i = 0;
+ for (j = s; j < oldcount; j++, i++)
+ newelement[i] = list->repeated.element[j];
+ for (j = 0; j < s; j++, i++)
+ newelement[i] = list->repeated.element[j];
+ if (t > 0)
+ {
+ copy_element (&newelement[oldcount], &newelement[0]);
+ newelement[0].repcount -= t;
+ newelement[oldcount].repcount = t;
+ }
+ free (list->repeated.element);
+ list->repeated.element = newelement;
+ list->repeated.count = newcount;
+ }
+ }
+}
+
+
+/* Ensure index n in the initial segment falls on a split between elements,
+ i.e. if 0 < n < list->initial.length, then n-1 and n are covered by two
+ different adjacent elements. */
+/* Memory effects: list is destructively modified. */
+static unsigned int
+initial_splitelement (struct format_arg_list *list, unsigned int n)
+{
+ unsigned int s;
+ unsigned int t;
+ unsigned int oldrepcount;
+ unsigned int newcount;
+ unsigned int i;
+
+ VERIFY_LIST (list);
+
+ if (n > list->initial.length)
+ {
+ ASSERT (list->repeated.count > 0);
+ rotate_loop (list, n);
+ ASSERT (n <= list->initial.length);
+ }
+
+ /* Determine how many entries of list->initial need to be skipped. */
+ for (t = n, s = 0;
+ s < list->initial.count && t >= list->initial.element[s].repcount;
+ t -= list->initial.element[s].repcount, s++)
+ ;
+
+ if (t == 0)
+ return s;
+
+ ASSERT (s < list->initial.count);
+
+ /* Split the entry into two entries. */
+ oldrepcount = list->initial.element[s].repcount;
+ newcount = list->initial.count + 1;
+ ensure_initial_alloc (list, newcount);
+ for (i = list->initial.count - 1; i > s; i--)
+ list->initial.element[i+1] = list->initial.element[i];
+ copy_element (&list->initial.element[s+1], &list->initial.element[s]);
+ list->initial.element[s].repcount = t;
+ list->initial.element[s+1].repcount = oldrepcount - t;
+ list->initial.count = newcount;
+
+ VERIFY_LIST (list);
+
+ return s+1;
+}
+
+
+/* Ensure index n in the initial segment is not shared. Return its index. */
+/* Memory effects: list is destructively modified. */
+MAYBE_UNUSED static unsigned int
+initial_unshare (struct format_arg_list *list, unsigned int n)
+{
+ /* This does the same side effects as
+ initial_splitelement (list, n);
+ initial_splitelement (list, n + 1);
+ */
+ unsigned int s;
+ unsigned int t;
+
+ VERIFY_LIST (list);
+
+ if (n >= list->initial.length)
+ {
+ ASSERT (list->repeated.count > 0);
+ rotate_loop (list, n + 1);
+ ASSERT (n < list->initial.length);
+ }
+
+ /* Determine how many entries of list->initial need to be skipped. */
+ for (t = n, s = 0;
+ s < list->initial.count && t >= list->initial.element[s].repcount;
+ t -= list->initial.element[s].repcount, s++)
+ ;
+
+ /* s must be < list->initial.count. */
+ ASSERT (s < list->initial.count);
+
+ if (list->initial.element[s].repcount > 1)
+ {
+ /* Split the entry into at most three entries: for indices < n,
+ for index n, and for indices > n. */
+ unsigned int oldrepcount = list->initial.element[s].repcount;
+ unsigned int newcount =
+ list->initial.count + (t == 0 || t == oldrepcount - 1 ? 1 : 2);
+ ensure_initial_alloc (list, newcount);
+ if (t == 0 || t == oldrepcount - 1)
+ {
+ unsigned int i;
+
+ for (i = list->initial.count - 1; i > s; i--)
+ list->initial.element[i+1] = list->initial.element[i];
+ copy_element (&list->initial.element[s+1], &list->initial.element[s]);
+ if (t == 0)
+ {
+ list->initial.element[s].repcount = 1;
+ list->initial.element[s+1].repcount = oldrepcount - 1;
+ }
+ else
+ {
+ list->initial.element[s].repcount = oldrepcount - 1;
+ list->initial.element[s+1].repcount = 1;
+ }
+ }
+ else
+ {
+ unsigned int i;
+
+ for (i = list->initial.count - 1; i > s; i--)
+ list->initial.element[i+2] = list->initial.element[i];
+ copy_element (&list->initial.element[s+2], &list->initial.element[s]);
+ copy_element (&list->initial.element[s+1], &list->initial.element[s]);
+ list->initial.element[s].repcount = t;
+ list->initial.element[s+1].repcount = 1;
+ list->initial.element[s+2].repcount = oldrepcount - 1 - t;
+ }
+ list->initial.count = newcount;
+ if (t > 0)
+ s++;
+ }
+
+ /* Now the entry for index n has repcount 1. */
+ ASSERT (list->initial.element[s].repcount == 1);
+
+ VERIFY_LIST (list);
+
+ return s;
+}
+
+
+/* ================= Intersection of two format_arg_lists ================= */
+
+/* Create the intersection (i.e. combined constraints) of two argument
+ constraints. Return false if the intersection is empty, i.e. if the
+ two constraints give a contradiction. */
+/* Memory effects: Freshly allocated element's sublist. */
+static bool
+make_intersected_element (struct format_arg *re,
+ const struct format_arg * e1,
+ const struct format_arg * e2)
+{
+ /* Intersect the cdr types. */
+ if (e1->presence == FCT_REQUIRED || e2->presence == FCT_REQUIRED)
+ re->presence = FCT_REQUIRED;
+ else
+ re->presence = FCT_OPTIONAL;
+
+ /* Intersect the arg types. */
+ if (e1->type == FAT_ANY_TYPE)
+ {
+ re->type = e2->type;
+ if (e2->type & FAT_ELEMENTWISE)
+ re->list = copy_list (e2->list);
+ }
+ else if (e2->type == FAT_ANY_TYPE)
+ {
+ re->type = e1->type;
+ if (e1->type & FAT_ELEMENTWISE)
+ re->list = copy_list (e1->list);
+ }
+ else if (e1->type & e2->type & FAT_ELEMENTWISE)
+ {
+ if ((e1->type == FAT_ELEMENTWISE_1 && e2->type == FAT_ELEMENTWISE_1)
+ || (e1->type == FAT_ELEMENTWISE_2 && e2->type == FAT_ELEMENTWISE_2))
+ {
+ re->type = e1->type;
+ re->list = make_intersected_list (copy_list (e1->list),
+ copy_list (e2->list));
+ if (re->list == NULL)
+ return false;
+ }
+ else
+ return false;
+ }
+ else
+ {
+ re->type = e1->type & e2->type;
+ if (re->type == FAT_NONE)
+ return false;
+ if (e1->type & FAT_ELEMENTWISE)
+ {
+ re->type |= FAT_ELEMENTWISE;
+ re->list = copy_list (e1->list);
+ }
+ else if (e2->type & FAT_ELEMENTWISE)
+ {
+ re->type |= FAT_ELEMENTWISE;
+ re->list = copy_list (e2->list);
+ }
+ }
+
+ return true;
+}
+
+/* Append list->repeated to list->initial, and clear list->repeated. */
+/* Memory effects: list is destructively modified. */
+static void
+append_repeated_to_initial (struct format_arg_list *list)
+{
+ if (list->repeated.count > 0)
+ {
+ /* Move list->repeated over to list->initial. */
+ unsigned int i, j, newcount;
+
+ newcount = list->initial.count + list->repeated.count;
+ ensure_initial_alloc (list, newcount);
+ i = list->initial.count;
+ for (j = 0; j < list->repeated.count; j++, i++)
+ list->initial.element[i] = list->repeated.element[j];
+ list->initial.count = newcount;
+ list->initial.length = list->initial.length + list->repeated.length;
+ free (list->repeated.element);
+ list->repeated.element = NULL;
+ list->repeated.allocated = 0;
+ list->repeated.count = 0;
+ list->repeated.length = 0;
+ }
+}
+
+/* Handle a contradiction during building of a format_arg_list.
+ The list consists only of an initial segment. The repeated segment is
+ empty. This function searches the last FCT_OPTIONAL and cuts off the
+ list at this point, or - if none is found - returns NULL. */
+/* Memory effects: list is destructively modified. If NULL is returned,
+ list is freed. */
+static struct format_arg_list *
+backtrack_in_initial (struct format_arg_list *list)
+{
+ ASSERT (list->repeated.count == 0);
+
+ while (list->initial.count > 0)
+ {
+ unsigned int i = list->initial.count - 1;
+ if (list->initial.element[i].presence == FCT_REQUIRED)
+ {
+ /* Throw away this element. */
+ list->initial.length -= list->initial.element[i].repcount;
+ free_element (&list->initial.element[i]);
+ list->initial.count = i;
+ }
+ else /* list->initial.element[i].presence == FCT_OPTIONAL */
+ {
+ /* The list must end here. */
+ list->initial.length--;
+ if (list->initial.element[i].repcount > 1)
+ list->initial.element[i].repcount--;
+ else
+ {
+ free_element (&list->initial.element[i]);
+ list->initial.count = i;
+ }
+ VERIFY_LIST (list);
+ return list;
+ }
+ }
+
+ free_list (list);
+ return NULL;
+}
+
+/* Create the intersection (i.e. combined constraints) of two argument list
+ constraints. Free both argument lists when done. Return NULL if the
+ intersection is empty, i.e. if the two constraints give a contradiction. */
+/* Memory effects: list1 and list2 are freed. The result, if non-NULL, is
+ freshly allocated. */
+static struct format_arg_list *
+make_intersected_list (struct format_arg_list *list1,
+ struct format_arg_list *list2)
+{
+ struct format_arg_list *result;
+
+ VERIFY_LIST (list1);
+ VERIFY_LIST (list2);
+
+ if (list1->repeated.length > 0 && list2->repeated.length > 0)
+ /* Step 1: Ensure list1->repeated.length == list2->repeated.length. */
+ {
+ unsigned int n1 = list1->repeated.length;
+ unsigned int n2 = list2->repeated.length;
+ unsigned int g = gcd (n1, n2);
+ unsigned int m1 = n2 / g; /* = lcm(n1,n2) / n1 */
+ unsigned int m2 = n1 / g; /* = lcm(n1,n2) / n2 */
+
+ unfold_loop (list1, m1);
+ unfold_loop (list2, m2);
+ /* Now list1->repeated.length = list2->repeated.length = lcm(n1,n2). */
+ }
+
+ if (list1->repeated.length > 0 || list2->repeated.length > 0)
+ /* Step 2: Ensure the initial segment of the result can be computed
+ from the initial segments of list1 and list2. If both have a
+ repeated segment, this means to ensure
+ list1->initial.length == list2->initial.length. */
+ {
+ unsigned int m = MAX (list1->initial.length, list2->initial.length);
+
+ if (list1->repeated.length > 0)
+ rotate_loop (list1, m);
+ if (list2->repeated.length > 0)
+ rotate_loop (list2, m);
+ }
+
+ if (list1->repeated.length > 0 && list2->repeated.length > 0)
+ {
+ ASSERT (list1->initial.length == list2->initial.length);
+ ASSERT (list1->repeated.length == list2->repeated.length);
+ }
+
+ /* Step 3: Allocate the result. */
+ result = XMALLOC (struct format_arg_list);
+ result->initial.count = 0;
+ result->initial.allocated = 0;
+ result->initial.element = NULL;
+ result->initial.length = 0;
+ result->repeated.count = 0;
+ result->repeated.allocated = 0;
+ result->repeated.element = NULL;
+ result->repeated.length = 0;
+
+ /* Step 4: Elementwise intersection of list1->initial, list2->initial. */
+ {
+ struct format_arg *e1;
+ struct format_arg *e2;
+ unsigned int c1;
+ unsigned int c2;
+
+ e1 = list1->initial.element; c1 = list1->initial.count;
+ e2 = list2->initial.element; c2 = list2->initial.count;
+ while (c1 > 0 && c2 > 0)
+ {
+ struct format_arg *re;
+
+ /* Ensure room in result->initial. */
+ grow_initial_alloc (result);
+ re = &result->initial.element[result->initial.count];
+ re->repcount = MIN (e1->repcount, e2->repcount);
+
+ /* Intersect the argument types. */
+ if (!make_intersected_element (re, e1, e2))
+ {
+ /* If re->presence == FCT_OPTIONAL, the result list ends here. */
+ if (re->presence == FCT_REQUIRED)
+ /* Contradiction. Backtrack. */
+ result = backtrack_in_initial (result);
+ goto done;
+ }
+
+ result->initial.count++;
+ result->initial.length += re->repcount;
+
+ e1->repcount -= re->repcount;
+ if (e1->repcount == 0)
+ {
+ e1++;
+ c1--;
+ }
+ e2->repcount -= re->repcount;
+ if (e2->repcount == 0)
+ {
+ e2++;
+ c2--;
+ }
+ }
+
+ if (list1->repeated.count == 0 && list2->repeated.count == 0)
+ {
+ /* Intersecting two finite lists. */
+ if (c1 > 0)
+ {
+ /* list1 longer than list2. */
+ if (e1->presence == FCT_REQUIRED)
+ /* Contradiction. Backtrack. */
+ result = backtrack_in_initial (result);
+ }
+ else if (c2 > 0)
+ {
+ /* list2 longer than list1. */
+ if (e2->presence == FCT_REQUIRED)
+ /* Contradiction. Backtrack. */
+ result = backtrack_in_initial (result);
+ }
+ goto done;
+ }
+ else if (list1->repeated.count == 0)
+ {
+ /* Intersecting a finite and an infinite list. */
+ ASSERT (c1 == 0);
+ if ((c2 > 0 ? e2->presence : list2->repeated.element[0].presence)
+ == FCT_REQUIRED)
+ /* Contradiction. Backtrack. */
+ result = backtrack_in_initial (result);
+ goto done;
+ }
+ else if (list2->repeated.count == 0)
+ {
+ /* Intersecting an infinite and a finite list. */
+ ASSERT (c2 == 0);
+ if ((c1 > 0 ? e1->presence : list1->repeated.element[0].presence)
+ == FCT_REQUIRED)
+ /* Contradiction. Backtrack. */
+ result = backtrack_in_initial (result);
+ goto done;
+ }
+ /* Intersecting two infinite lists. */
+ ASSERT (c1 == 0 && c2 == 0);
+ }
+
+ /* Step 5: Elementwise intersection of list1->repeated, list2->repeated. */
+ {
+ struct format_arg *e1;
+ struct format_arg *e2;
+ unsigned int c1;
+ unsigned int c2;
+
+ e1 = list1->repeated.element; c1 = list1->repeated.count;
+ e2 = list2->repeated.element; c2 = list2->repeated.count;
+ while (c1 > 0 && c2 > 0)
+ {
+ struct format_arg *re;
+
+ /* Ensure room in result->repeated. */
+ grow_repeated_alloc (result);
+ re = &result->repeated.element[result->repeated.count];
+ re->repcount = MIN (e1->repcount, e2->repcount);
+
+ /* Intersect the argument types. */
+ if (!make_intersected_element (re, e1, e2))
+ {
+ bool re_is_required = re->presence == FCT_REQUIRED;
+
+ append_repeated_to_initial (result);
+
+ /* If re->presence == FCT_OPTIONAL, the result list ends here. */
+ if (re_is_required)
+ /* Contradiction. Backtrack. */
+ result = backtrack_in_initial (result);
+
+ goto done;
+ }
+
+ result->repeated.count++;
+ result->repeated.length += re->repcount;
+
+ e1->repcount -= re->repcount;
+ if (e1->repcount == 0)
+ {
+ e1++;
+ c1--;
+ }
+ e2->repcount -= re->repcount;
+ if (e2->repcount == 0)
+ {
+ e2++;
+ c2--;
+ }
+ }
+ ASSERT (c1 == 0 && c2 == 0);
+ }
+
+ done:
+ free_list (list1);
+ free_list (list2);
+ if (result != NULL)
+ {
+ /* Undo the loop unfolding and unrolling done above. */
+ normalize_outermost_list (result);
+ VERIFY_LIST (result);
+ }
+ return result;
+}
+
+
+/* Create the intersection of an argument list and the empty list.
+ Return NULL if the intersection is empty. */
+/* Memory effects: The result, if non-NULL, is freshly allocated. */
+MAYBE_UNUSED static struct format_arg_list *
+make_intersection_with_empty_list (struct format_arg_list *list)
+{
+#if 0 /* equivalent but slower */
+ return make_intersected_list (copy_list (list), make_empty_list ());
+#else
+ if (list->initial.count > 0
+ ? list->initial.element[0].presence == FCT_REQUIRED
+ : list->repeated.count > 0
+ && list->repeated.element[0].presence == FCT_REQUIRED)
+ return NULL;
+ else
+ return make_empty_list ();
+#endif
+}
+
+
+/* Create the intersection of two argument list constraints. NULL stands
+ for an impossible situation, i.e. a contradiction. */
+/* Memory effects: list1 and list2 are freed if non-NULL. The result,
+ if non-NULL, is freshly allocated. */
+MAYBE_UNUSED static struct format_arg_list *
+intersection (struct format_arg_list *list1, struct format_arg_list *list2)
+{
+ if (list1 != NULL)
+ {
+ if (list2 != NULL)
+ return make_intersected_list (list1, list2);
+ else
+ {
+ free_list (list1);
+ return NULL;
+ }
+ }
+ else
+ {
+ if (list2 != NULL)
+ {
+ free_list (list2);
+ return NULL;
+ }
+ else
+ return NULL;
+ }
+}
+
+
+/* ===================== Union of two format_arg_lists ===================== */
+
+/* Create the union of an argument list and the empty list. */
+/* Memory effects: list is freed. The result is freshly allocated. */
+MAYBE_UNUSED static struct format_arg_list *
+make_union_with_empty_list (struct format_arg_list *list)
+{
+ VERIFY_LIST (list);
+
+ if (list->initial.count > 0
+ ? list->initial.element[0].presence == FCT_REQUIRED
+ : list->repeated.count > 0
+ && list->repeated.element[0].presence == FCT_REQUIRED)
+ {
+ initial_splitelement (list, 1);
+ ASSERT (list->initial.count > 0);
+ ASSERT (list->initial.element[0].repcount == 1);
+ ASSERT (list->initial.element[0].presence == FCT_REQUIRED);
+ list->initial.element[0].presence = FCT_OPTIONAL;
+
+ /* We might need to merge list->initial.element[0] and
+ list->initial.element[1]. */
+ normalize_outermost_list (list);
+ }
+
+ VERIFY_LIST (list);
+
+ return list;
+}
+
+
+/* =========== Adding specific constraints to a format_arg_list =========== */
+
+
+/* Test whether arguments 0..n are required arguments in a list. */
+MAYBE_UNUSED static bool
+is_required (const struct format_arg_list *list, unsigned int n)
+{
+ unsigned int s;
+ unsigned int t;
+
+ /* We'll check whether the first n+1 presence flags are FCT_REQUIRED. */
+ t = n + 1;
+
+ /* Walk the list->initial segment. */
+ for (s = 0;
+ s < list->initial.count && t >= list->initial.element[s].repcount;
+ t -= list->initial.element[s].repcount, s++)
+ if (list->initial.element[s].presence != FCT_REQUIRED)
+ return false;
+
+ if (t == 0)
+ return true;
+
+ if (s < list->initial.count)
+ {
+ if (list->initial.element[s].presence != FCT_REQUIRED)
+ return false;
+ else
+ return true;
+ }
+
+ /* Walk the list->repeated segment. */
+ if (list->repeated.count == 0)
+ return false;
+
+ for (s = 0;
+ s < list->repeated.count && t >= list->repeated.element[s].repcount;
+ t -= list->repeated.element[s].repcount, s++)
+ if (list->repeated.element[s].presence != FCT_REQUIRED)
+ return false;
+
+ if (t == 0)
+ return true;
+
+ if (s < list->repeated.count)
+ {
+ if (list->repeated.element[s].presence != FCT_REQUIRED)
+ return false;
+ else
+ return true;
+ }
+
+ /* The list->repeated segment consists only of FCT_REQUIRED. So,
+ regardless how many more passes through list->repeated would be
+ needed until t becomes 0, the result is true. */
+ return true;
+}
+
+
+/* Add a constraint to an argument list, namely that the arguments 0...n are
+ present. NULL stands for an impossible situation, i.e. a contradiction. */
+/* Memory effects: list is freed. The result is freshly allocated. */
+static struct format_arg_list *
+add_required_constraint (struct format_arg_list *list, unsigned int n)
+{
+ unsigned int i, rest;
+
+ if (list == NULL)
+ return NULL;
+
+ VERIFY_LIST (list);
+
+ if (list->repeated.count == 0 && list->initial.length <= n)
+ {
+ /* list is already constrained to have at most length n.
+ Contradiction. */
+ free_list (list);
+ return NULL;
+ }
+
+ initial_splitelement (list, n + 1);
+
+ for (i = 0, rest = n + 1; rest > 0; )
+ {
+ list->initial.element[i].presence = FCT_REQUIRED;
+ rest -= list->initial.element[i].repcount;
+ i++;
+ }
+
+ VERIFY_LIST (list);
+
+ return list;
+}
+
+
+/* Add a constraint to an argument list, namely that the argument n is
+ never present. NULL stands for an impossible situation, i.e. a
+ contradiction. */
+/* Memory effects: list is freed. The result is freshly allocated. */
+static struct format_arg_list *
+add_end_constraint (struct format_arg_list *list, unsigned int n)
+{
+ unsigned int s, i;
+ enum format_cdr_type n_presence;
+
+ if (list == NULL)
+ return NULL;
+
+ VERIFY_LIST (list);
+
+ if (list->repeated.count == 0 && list->initial.length <= n)
+ /* list is already constrained to have at most length n. */
+ return list;
+
+ s = initial_splitelement (list, n);
+ n_presence =
+ (s < list->initial.count
+ ? /* n < list->initial.length */ list->initial.element[s].presence
+ : /* n >= list->initial.length */ list->repeated.element[0].presence);
+
+ for (i = s; i < list->initial.count; i++)
+ {
+ list->initial.length -= list->initial.element[i].repcount;
+ free_element (&list->initial.element[i]);
+ }
+ list->initial.count = s;
+
+ for (i = 0; i < list->repeated.count; i++)
+ free_element (&list->repeated.element[i]);
+ if (list->repeated.element != NULL)
+ free (list->repeated.element);
+ list->repeated.element = NULL;
+ list->repeated.allocated = 0;
+ list->repeated.count = 0;
+ list->repeated.length = 0;
+
+ if (n_presence == FCT_REQUIRED)
+ return backtrack_in_initial (list);
+ else
+ return list;
+}
+
+
+/* Add a constraint to an argument list, namely that the arguments n1..n2
+ (n1 <= n2) are of a given list type or (if sublist is NULL) of a given
+ non-list type. NULL stands for an impossible situation, i.e. a
+ contradiction. Assumes a preceding add_required_constraint (list, n2). */
+/* Memory effects: list is freed. The result is freshly allocated. */
+static struct format_arg_list *
+add_type_constraint (struct format_arg_list *list,
+ unsigned int n1, unsigned int n2,
+ enum format_arg_type type,
+ struct format_arg_list *sublist)
+{
+ unsigned int s;
+ struct format_arg newconstraint;
+
+ if (list == NULL)
+ return NULL;
+
+ /* Through the previous add_required_constraint, we can assume
+ list->initial.length >= n2+1. */
+
+ s = initial_splitelement (list, n1);
+ initial_splitelement (list, n2 + 1);
+
+ newconstraint.presence = FCT_OPTIONAL;
+ newconstraint.type = type;
+ newconstraint.list = sublist;
+
+ /* Modify the elements that represent the indices n1..n2. */
+ unsigned int n = n1;
+ while (n <= n2)
+ {
+ struct format_arg tmpelement;
+ if (!make_intersected_element (&tmpelement,
+ &list->initial.element[s], &newconstraint))
+ {
+ list = add_end_constraint (list, n);
+ break;
+ }
+ free_element (&list->initial.element[s]);
+ list->initial.element[s].type = tmpelement.type;
+ list->initial.element[s].list = tmpelement.list;
+ n += list->initial.element[s].repcount;
+ s++;
+ }
+
+ if (list != NULL)
+ VERIFY_LIST (list);
+
+ return list;
+}
+
+
+/* Add a constraint to an argument list, namely that all the arguments
+ n, n+1, n+2, ..., if they exist, are of a given list type or (if sublist is
+ NULL) of a given non-list type. NULL stands for an impossible situation,
+ i.e. a contradiction. */
+/* Memory effects: list is freed. The result is freshly allocated. */
+static struct format_arg_list *
+add_repeated_opt_type_constraint (struct format_arg_list *list,
+ unsigned int n,
+ enum format_arg_type type,
+ struct format_arg_list *sublist)
+{
+
+ unsigned int s;
+ struct format_arg newconstraint;
+
+ if (list == NULL)
+ return NULL;
+
+ s = initial_splitelement (list, n);
+
+ newconstraint.presence = FCT_OPTIONAL;
+ newconstraint.type = type;
+ newconstraint.list = sublist;
+
+ /* Modify the initial elements that represent the indices >= n. */
+ for (; s < list->initial.count; s++)
+ {
+ struct format_arg tmpelement;
+ if (!make_intersected_element (&tmpelement,
+ &list->initial.element[s], &newconstraint))
+ {
+ list = add_end_constraint (list, n);
+ goto done;
+ }
+ free_element (&list->initial.element[s]);
+ list->initial.element[s].type = tmpelement.type;
+ list->initial.element[s].list = tmpelement.list;
+ n += list->initial.element[s].repcount;
+ }
+
+ /* Modify the repeated elements. */
+ for (s = 0; s < list->repeated.count; s++)
+ {
+ struct format_arg tmpelement;
+ if (!make_intersected_element (&tmpelement,
+ &list->repeated.element[s], &newconstraint))
+ {
+ list = add_end_constraint (list, n);
+ goto done;
+ }
+ free_element (&list->repeated.element[s]);
+ list->repeated.element[s].type = tmpelement.type;
+ list->repeated.element[s].list = tmpelement.list;
+ n += list->repeated.element[s].repcount;
+ }
+
+ done:
+ if (list != NULL)
+ VERIFY_LIST (list);
+
+ return list;
+
+}
+
+
+/* ============= Subroutines used by the format string parser ============= */
+
+static void
+add_req_type_constraint (struct format_arg_list **listp,
+ unsigned int position1, unsigned int position2,
+ enum format_arg_type type,
+ struct format_arg_list *sublist)
+{
+ *listp = add_required_constraint (*listp, position2);
+ if (type & FAT_ELEMENTWISE)
+ {
+ ASSERT (sublist != NULL);
+ *listp = add_type_constraint (*listp, position1, position2,
+ type, sublist);
+ }
+ else
+ {
+ ASSERT (sublist == NULL);
+ *listp = add_type_constraint (*listp, position1, position2, type, NULL);
+ }
+}
+
+
+/* ======================= The format string parser ======================= */
+
+#define INVALID_ARGNO_ORDER(directive_number) \
+ xasprintf (_("In the directive number %u, the first argument number is greater than the second argument number."), directive_number)
+
+#define INVALID_COMPOUND_VARARG(directive_number) \
+ xasprintf (_("In the directive number %u, the compound specifier consumes a variable number of arguments."), directive_number)
+
+#define INVALID_COMPOUND_ARGCOUNT(directive_number, num_arguments) \
+ xasprintf (_("In the directive number %u, the compound specifier consumes %u arguments."), directive_number, num_arguments)
+
+#define INVALID_BAR_OUTSIDE_COMPOUND() \
+ xstrdup (_("Found '%|' outside of '%(...%)'."))
+
+#define INVALID_UNTERMINATED_COMPOUND() \
+ xstrdup (_("The string ends in the middle of a compound specifier."))
+
+#define INVALID_COMPOUND_DELIMITER(directive_number) \
+ xasprintf (_("In the directive number %u, there is an invalid directive in the delimiter part of a compound specifier."), directive_number)
+
+#define INVALID_NESTING(found_char, notfound_char) \
+ xasprintf (_("Found '%%%c' without matching '%%%c'."), found_char, notfound_char)
+
+#define INVALID_ARG_PAST_LAST(directive_number) \
+ xasprintf (_("The directive number %u references an argument after the last argument."), directive_number)
+
+#undef INVALID_INCOMPATIBLE_ARG_TYPES
+#define INVALID_INCOMPATIBLE_ARG_TYPES() \
+ xstrdup (_("The string refers to some argument in incompatible ways."))
+
+/* Parse a piece of format string, until the matching terminating format
+ directive is encountered.
+ spec is the global struct spec.
+ format is the remainder of the format string.
+ It is updated upon valid return.
+ compound is true inside a compound specifier.
+ fdi is an array to be filled with format directive indicators, or NULL.
+ If the format string is invalid, false is returned and *invalid_reason is
+ set to an error message explaining why. */
+static bool
+parse_upto (struct spec *spec,
+ const char **formatp, bool compound,
+ char *fdi, char **invalid_reason)
+{
+ const char *format = *formatp;
+ const char *const format_start = format;
+ unsigned int arg_count = 0;
+
+ for (; *format != '\0'; )
+ {
+ char c = *format++;
+
+ if (c == '%')
+ {
+ FDI_SET (format - 1, FMTDIR_START);
+
+ /* Count number of directives. */
+ spec->directives++;
+
+ if (*format == '\0')
+ {
+ *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ return false;
+ }
+ if (*format == '%')
+ /* A doubled percent-sign. */
+ format++;
+ else
+ {
+ /* A directive. */
+ unsigned int first_number = 0;
+ unsigned int second_number = 0;
+ bool second_is_last = false;
+ unsigned int width_number = 0;
+ bool width_from_arg = false;
+ unsigned int precision_number = 0;
+ bool precision_from_arg = false;
+ bool separator_digits_from_arg = false;
+ bool separator_char_from_arg = false;
+
+ /* Parse position. */
+ if (c_isdigit (*format))
+ {
+ const char *f = format;
+ unsigned int m = 0;
+
+ do
+ {
+ m = 10 * m + (*f - '0');
+ f++;
+ }
+ while (c_isdigit (*f));
+
+ if (*f == '$')
+ {
+ if (m == 0)
+ {
+ *invalid_reason = INVALID_ARGNO_0 (spec->directives);
+ FDI_SET (f, FMTDIR_ERROR);
+ return false;
+ }
+ first_number = m;
+ format = ++f;
+ }
+ else if (*f == ':')
+ {
+ f++;
+ if (c_isdigit (*f))
+ {
+ unsigned int m2 = 0;
+
+ do
+ {
+ m2 = 10 * m2 + (*f - '0');
+ f++;
+ }
+ while (c_isdigit (*f));
+
+ if (*f == '$')
+ {
+ if (m2 == 0)
+ {
+ *invalid_reason = INVALID_ARGNO_0 (spec->directives);
+ FDI_SET (f, FMTDIR_ERROR);
+ return false;
+ }
+ if (m > m2)
+ {
+ *invalid_reason = INVALID_ARGNO_ORDER (spec->directives);
+ FDI_SET (f, FMTDIR_ERROR);
+ return false;
+ }
+ first_number = m;
+ second_number = m2;
+ format = ++f;
+ }
+ }
+ else if (*f == '$')
+ {
+ first_number = m;
+ second_is_last = true;
+ format = ++f;
+ }
+ }
+ }
+
+ /* Parse flags. */
+ while (*format == ' ' || *format == '+' || *format == '-'
+ || *format == '#' || *format == '0' || *format == '=')
+ format++;
+
+ /* Parse width. */
+ if (c_isdigit (*format))
+ {
+ do format++; while (c_isdigit (*format));
+ }
+ else if (*format == '*')
+ {
+ format++;
+ if (c_isdigit (*format))
+ {
+ const char *f = format;
+ unsigned int m = 0;
+
+ do
+ {
+ m = 10 * m + (*f - '0');
+ f++;
+ }
+ while (c_isdigit (*f));
+
+ if (*f == '$')
+ {
+ if (m == 0)
+ {
+ *invalid_reason = INVALID_WIDTH_ARGNO_0 (spec->directives);
+ FDI_SET (f, FMTDIR_ERROR);
+ return false;
+ }
+ width_number = m;
+ format = ++f;
+ }
+ }
+ if (width_number == 0)
+ width_from_arg = true;
+ }
+
+ /* Parse precision. */
+ if (*format == '.')
+ {
+ format++;
+
+ if (c_isdigit (*format))
+ {
+ do format++; while (c_isdigit (*format));
+ }
+ else if (*format == '*')
+ {
+ format++;
+ if (c_isdigit (*format))
+ {
+ const char *f = format;
+ unsigned int m = 0;
+
+ do
+ {
+ m = 10 * m + (*f - '0');
+ f++;
+ }
+ while (c_isdigit (*f));
+
+ if (*f == '$')
+ {
+ if (m == 0)
+ {
+ *invalid_reason = INVALID_WIDTH_ARGNO_0 (spec->directives);
+ FDI_SET (f, FMTDIR_ERROR);
+ return false;
+ }
+ precision_number = m;
+ format = ++f;
+ }
+ }
+ if (precision_number == 0)
+ precision_from_arg = true;
+ }
+ }
+
+ /* Parse separator. */
+ if (*format == ',')
+ {
+ format++;
+
+ if (c_isdigit (*format))
+ {
+ do format++; while (c_isdigit (*format));
+ }
+ else if (*format == '*')
+ {
+ format++;
+ separator_digits_from_arg = true;
+ }
+
+ if (*format == '?')
+ {
+ format++;
+ separator_char_from_arg = true;
+ }
+ }
+
+ enum format_arg_type type;
+ struct format_arg_list *elementwise_list = NULL;
+
+ /* Parse specifier. */
+ switch (*format)
+ {
+ case 's':
+ type = FAT_BOOL | FAT_INTEGER | FAT_FLOATINGPOINT | FAT_CHAR | FAT_ARRAY | FAT_ASSOCIATIVE | FAT_IRANGE | FAT_STRUCT | FAT_POINTER;
+ break;
+ case 'c':
+ type = FAT_CHAR;
+ break;
+ case 'd': case 'u': case 'b': case 'o':
+ type = FAT_BOOL | FAT_INTEGER | FAT_CHAR;
+ break;
+ case 'x': case 'X':
+ type = FAT_BOOL | FAT_INTEGER | FAT_CHAR | FAT_POINTER;
+ break;
+ case 'e': case 'E': case 'f': case 'F':
+ case 'g': case 'G': case 'a': case 'A':
+ type = FAT_INTEGER | FAT_FLOATINGPOINT;
+ break;
+ case 'r':
+ type = FAT_BOOL | FAT_INTEGER | FAT_FLOATINGPOINT | FAT_CHAR | FAT_ARRAY | FAT_IRANGE;
+ break;
+ case '(':
+ /* A compound specifier. */
+ format++;
+ {
+ struct spec sub_spec;
+ sub_spec.directives = 0;
+ sub_spec.list = make_unconstrained_list ();
+ *formatp = format;
+ if (!parse_upto (&sub_spec, formatp, true, fdi, invalid_reason))
+ {
+ FDI_SET (**formatp == '\0' ? *formatp - 1 : *formatp,
+ FMTDIR_ERROR);
+ return false;
+ }
+ format = *formatp;
+ elementwise_list = sub_spec.list;
+ if (elementwise_list->repeated.count > 0)
+ {
+ /* Test case: "%(%1:$s%)" */
+ *invalid_reason = INVALID_COMPOUND_VARARG (spec->directives);
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ return false;
+ }
+ if (elementwise_list->initial.length == 1)
+ type = FAT_ELEMENTWISE_1;
+ else if (elementwise_list->initial.length == 2)
+ type = FAT_ELEMENTWISE_2;
+ else
+ {
+ /* Test case: "%(%s %s %s%)" */
+ *invalid_reason = INVALID_COMPOUND_ARGCOUNT (spec->directives, elementwise_list->initial.length);
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ return false;
+ }
+ }
+ break;
+ case '|':
+ if (!compound)
+ {
+ *invalid_reason = INVALID_BAR_OUTSIDE_COMPOUND ();
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ /* Parse the second part of a compound specifier. */
+ format++;
+ for (;;)
+ {
+ if (*format == '\0')
+ {
+ *invalid_reason = INVALID_UNTERMINATED_COMPOUND ();
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ return false;
+ }
+ if (*format == '%')
+ {
+ format++;
+ if (*format == '%')
+ format++;
+ else if (*format == ')')
+ break;
+ else
+ {
+ *invalid_reason = INVALID_COMPOUND_DELIMITER (spec->directives);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ }
+ else
+ format++;
+ }
+ /* Here (*format == ')'). */
+ FALLTHROUGH;
+ case ')':
+ if (!compound)
+ {
+ *invalid_reason = INVALID_NESTING (')', '(');
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ goto done;
+ default:
+ if (*format == '\0')
+ {
+ *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE ();
+ FDI_SET (format - 1, FMTDIR_ERROR);
+ }
+ else
+ {
+ *invalid_reason = INVALID_CONVERSION_SPECIFIER (spec->directives, *format);
+ FDI_SET (format, FMTDIR_ERROR);
+ }
+ return false;
+ }
+
+ if (width_number > 0)
+ {
+ add_req_type_constraint (&spec->list, width_number - 1, width_number - 1,
+ FAT_INTEGER, NULL);
+ if (arg_count < width_number)
+ arg_count = width_number;
+ }
+ else if (width_from_arg)
+ {
+ if (arg_count == UINT_MAX)
+ {
+ *invalid_reason = INVALID_ARG_PAST_LAST (spec->directives);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ add_req_type_constraint (&spec->list, arg_count, arg_count,
+ FAT_INTEGER, NULL);
+ arg_count++;
+ }
+
+ if (precision_number > 0)
+ {
+ add_req_type_constraint (&spec->list, precision_number - 1, precision_number - 1,
+ FAT_INTEGER, NULL);
+ if (arg_count < precision_number)
+ arg_count = precision_number;
+ }
+ else if (precision_from_arg)
+ {
+ if (arg_count == UINT_MAX)
+ {
+ *invalid_reason = INVALID_ARG_PAST_LAST (spec->directives);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ add_req_type_constraint (&spec->list, arg_count, arg_count,
+ FAT_INTEGER, NULL);
+ arg_count++;
+ }
+
+ if (separator_digits_from_arg)
+ {
+ if (arg_count == UINT_MAX)
+ {
+ *invalid_reason = INVALID_ARG_PAST_LAST (spec->directives);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ add_req_type_constraint (&spec->list, arg_count, arg_count,
+ FAT_INTEGER, NULL);
+ arg_count++;
+ }
+
+ if (separator_char_from_arg)
+ {
+ if (arg_count == UINT_MAX)
+ {
+ *invalid_reason = INVALID_ARG_PAST_LAST (spec->directives);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ add_req_type_constraint (&spec->list, arg_count, arg_count,
+ FAT_CHAR, NULL);
+ arg_count++;
+ }
+
+ if (first_number > 0)
+ {
+ if (second_number > 0)
+ {
+ add_req_type_constraint (&spec->list, first_number - 1, second_number - 1,
+ type, elementwise_list);
+ if (arg_count < second_number)
+ arg_count = second_number;
+ }
+ else if (second_is_last)
+ {
+ add_req_type_constraint (&spec->list, first_number - 1, first_number - 1,
+ type, elementwise_list);
+ spec->list = add_repeated_opt_type_constraint (spec->list, first_number,
+ type, elementwise_list);
+ arg_count = UINT_MAX;
+ }
+ else
+ {
+ add_req_type_constraint (&spec->list, first_number - 1, first_number - 1,
+ type, elementwise_list);
+ if (arg_count < first_number)
+ arg_count = first_number;
+ }
+ }
+ else
+ {
+ if (arg_count == UINT_MAX)
+ {
+ *invalid_reason = INVALID_ARG_PAST_LAST (spec->directives);
+ FDI_SET (format, FMTDIR_ERROR);
+ return false;
+ }
+ add_req_type_constraint (&spec->list, arg_count, arg_count,
+ type, elementwise_list);
+ arg_count++;
+ }
+
+ if (type & FAT_ELEMENTWISE)
+ free_list (elementwise_list);
+
+ FDI_SET (format, FMTDIR_END);
+
+ format++;
+ }
+ }
+ }
+
+ if (compound)
+ {
+ *invalid_reason = INVALID_NESTING ('(', ')');
+ return false;
+ }
+
+ done:
+ *formatp = format;
+
+ /* Extra arguments at the end are not allowed. */
+ if (arg_count != UINT_MAX)
+ {
+ spec->list = add_end_constraint (spec->list, arg_count);
+ if (spec->list == NULL)
+ return false;
+ }
+
+ return true;
+}
+
+
+/* ============== Top level format string handling functions ============== */
+
+static void *
+format_parse (const char *format, bool translated, char *fdi,
+ char **invalid_reason)
+{
+ struct spec spec;
+ struct spec *result;
+
+ spec.directives = 0;
+ spec.list = make_unconstrained_list ();
+
+ if (!parse_upto (&spec, &format, false,
+ fdi, invalid_reason))
+ /* Invalid format string. */
+ return NULL;
+
+ if (spec.list == NULL)
+ {
+ /* Contradictory argument type information. */
+ *invalid_reason = INVALID_INCOMPATIBLE_ARG_TYPES ();
+ return NULL;
+ }
+
+ /* Normalize the result. */
+ normalize_list (spec.list);
+
+ result = XMALLOC (struct spec);
+ *result = spec;
+ return result;
+}
+
+static void
+format_free (void *descr)
+{
+ struct spec *spec = (struct spec *) descr;
+
+ free_list (spec->list);
+}
+
+static int
+format_get_number_of_directives (void *descr)
+{
+ struct spec *spec = (struct spec *) descr;
+
+ return spec->directives;
+}
+
+static bool
+format_check (void *msgid_descr, void *msgstr_descr, bool equality,
+ formatstring_error_logger_t error_logger, void *error_logger_data,
+ const char *pretty_msgid, const char *pretty_msgstr)
+{
+ struct spec *spec1 = (struct spec *) msgid_descr;
+ struct spec *spec2 = (struct spec *) msgstr_descr;
+
+ /* The formatting functions in the D module std.format treat an unused
+ argument at the end of the argument list as an error. Therefore here
+ the translator must not omit some of the arguments.
+ This could be mitigated in format strings with two or more directives.
+ Example:
+ "%2$s bought a piece." vs. "%2$s bought %1$d pieces."
+ Here the unused argument (argument 1) would not be at the end of the
+ argument list. But this does not help with the more frequent case:
+ "a piece" vs. "%d pieces"
+ Therefore we recommend the zero-precision workaround in the documentation:
+ "%.0sa piece" vs. "%s pieces"
+ */
+ equality = true;
+
+ bool err = false;
+
+ if (equality)
+ {
+ if (!equal_list (spec1->list, spec2->list))
+ {
+ if (error_logger)
+ error_logger (error_logger_data,
+ _("format specifications in '%s' and '%s' are not equivalent"),
+ pretty_msgid, pretty_msgstr);
+ err = true;
+ }
+ }
+ else
+ {
+ struct format_arg_list *intersection =
+ make_intersected_list (copy_list (spec1->list),
+ copy_list (spec2->list));
+
+ if (!(intersection != NULL
+ && (normalize_list (intersection),
+ equal_list (intersection, spec2->list))))
+ {
+ if (error_logger)
+ error_logger (error_logger_data,
+ _("format specifications in '%s' are not a subset of those in '%s'"),
+ pretty_msgstr, pretty_msgid);
+ err = true;
+ }
+ }
+
+ return err;
+}
+
+
+struct formatstring_parser formatstring_d =
+{
+ format_parse,
+ format_free,
+ format_get_number_of_directives,
+ NULL,
+ format_check
+};
+
+
+/* ============================= Testing code ============================= */
+
+#ifdef TEST
+
+/* Test program: Print the argument list specification returned by
+ format_parse for strings read from standard input. */
+
+#include <stdio.h>
+
+static void print_list (struct format_arg_list *list);
+
+static void
+print_element (struct format_arg *element)
+{
+ switch (element->presence)
+ {
+ case FCT_REQUIRED:
+ break;
+ case FCT_OPTIONAL:
+ printf (". ");
+ break;
+ default:
+ abort ();
+ }
+
+ if (element->type == FAT_NONE)
+ abort ();
+ if (element->type & FAT_ELEMENTWISE)
+ {
+ switch (element->type)
+ {
+ case FAT_ELEMENTWISE_1:
+ printf ("1");
+ break;
+ case FAT_ELEMENTWISE_2:
+ printf ("2");
+ break;
+ default:
+ abort ();
+ }
+ print_list (element->list);
+ }
+ else
+ {
+ if (element->type == FAT_ANY_TYPE)
+ printf ("*");
+ else
+ {
+ if (element->type & FAT_BOOL)
+ printf ("b");
+ if (element->type & FAT_INTEGER)
+ printf ("i");
+ if (element->type & FAT_FLOATINGPOINT)
+ printf ("f");
+ if (element->type & FAT_CHAR)
+ printf ("c");
+ if (element->type & FAT_ARRAY)
+ printf ("a");
+ if (element->type & FAT_ASSOCIATIVE)
+ printf ("@");
+ if (element->type & FAT_IRANGE)
+ printf ("r");
+ if (element->type & FAT_STRUCT)
+ printf ("s");
+ if (element->type & FAT_POINTER)
+ printf ("p");
+ }
+ }
+}
+
+static void
+print_list (struct format_arg_list *list)
+{
+ unsigned int i, j;
+
+ printf ("(");
+
+ for (i = 0; i < list->initial.count; i++)
+ for (j = 0; j < list->initial.element[i].repcount; j++)
+ {
+ if (i > 0 || j > 0)
+ printf (" ");
+ print_element (&list->initial.element[i]);
+ }
+
+ if (list->repeated.count > 0)
+ {
+ printf (" |");
+ for (i = 0; i < list->repeated.count; i++)
+ for (j = 0; j < list->repeated.element[i].repcount; j++)
+ {
+ printf (" ");
+ print_element (&list->repeated.element[i]);
+ }
+ }
+
+ printf (")");
+}
+
+static void
+format_print (void *descr)
+{
+ struct spec *spec = (struct spec *) descr;
+
+ if (spec == NULL)
+ {
+ printf ("INVALID");
+ return;
+ }
+
+ print_list (spec->list);
+}
+
+int
+main ()
+{
+ for (;;)
+ {
+ char *line = NULL;
+ size_t line_size = 0;
+ int line_len;
+ char *invalid_reason;
+ void *descr;
+
+ line_len = getline (&line, &line_size, stdin);
+ if (line_len < 0)
+ break;
+ if (line_len > 0 && line[line_len - 1] == '\n')
+ line[--line_len] = '\0';
+
+ invalid_reason = NULL;
+ descr = format_parse (line, false, NULL, &invalid_reason);
+
+ format_print (descr);
+ printf ("\n");
+ if (descr == NULL)
+ printf ("%s\n", invalid_reason);
+
+ free (invalid_reason);
+ free (line);
+ }
+
+ return 0;
+}
+
+/*
+ * For Emacs M-x compile
+ * Local Variables:
+ * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-d.c ../gnulib-lib/libgettextlib.la"
+ * End:
+ */
+
+#endif /* TEST */
--- /dev/null
+/* List of HTML 5 entities.
+ Copyright (C) 2025 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025. */
+/* Data extracted from
+ <https://html.spec.whatwg.org/multipage/named-characters.html> */
+
+/* Table of HTML5 named character entities.
+ Split into two tables, one for name lengths <= 15, one for name lengths > 15,
+ in order to reduze the binary size of the tables.
+ Each of the tables is sorted by name in ascending order. */
+static const struct { const char name[15 + 1]; const char value[6 + 1]; } html5short[] =
+{
+ { "AElig", "Æ" }, /* U+00C6 */
+ { "AMP", "&" }, /* U+0026 */
+ { "Aacute", "Á" }, /* U+00C1 */
+ { "Abreve", "Ă" }, /* U+0102 */
+ { "Acirc", "Â" }, /* U+00C2 */
+ { "Acy", "А" }, /* U+0410 */
+ { "Afr", "𝔄" }, /* U+1D504 */
+ { "Agrave", "À" }, /* U+00C0 */
+ { "Alpha", "Α" }, /* U+0391 */
+ { "Amacr", "Ā" }, /* U+0100 */
+ { "And", "⩓" }, /* U+2A53 */
+ { "Aogon", "Ą" }, /* U+0104 */
+ { "Aopf", "𝔸" }, /* U+1D538 */
+ { "ApplyFunction", "" }, /* U+2061 */
+ { "Aring", "Å" }, /* U+00C5 */
+ { "Ascr", "𝒜" }, /* U+1D49C */
+ { "Assign", "≔" }, /* U+2254 */
+ { "Atilde", "Ã" }, /* U+00C3 */
+ { "Auml", "Ä" }, /* U+00C4 */
+ { "Backslash", "∖" }, /* U+2216 */
+ { "Barv", "⫧" }, /* U+2AE7 */
+ { "Barwed", "⌆" }, /* U+2306 */
+ { "Bcy", "Б" }, /* U+0411 */
+ { "Because", "∵" }, /* U+2235 */
+ { "Bernoullis", "ℬ" }, /* U+212C */
+ { "Beta", "Β" }, /* U+0392 */
+ { "Bfr", "𝔅" }, /* U+1D505 */
+ { "Bopf", "𝔹" }, /* U+1D539 */
+ { "Breve", "˘" }, /* U+02D8 */
+ { "Bscr", "ℬ" }, /* U+212C */
+ { "Bumpeq", "≎" }, /* U+224E */
+ { "CHcy", "Ч" }, /* U+0427 */
+ { "COPY", "©" }, /* U+00A9 */
+ { "Cacute", "Ć" }, /* U+0106 */
+ { "Cap", "⋒" }, /* U+22D2 */
+ { "Cayleys", "ℭ" }, /* U+212D */
+ { "Ccaron", "Č" }, /* U+010C */
+ { "Ccedil", "Ç" }, /* U+00C7 */
+ { "Ccirc", "Ĉ" }, /* U+0108 */
+ { "Cconint", "∰" }, /* U+2230 */
+ { "Cdot", "Ċ" }, /* U+010A */
+ { "Cedilla", "¸" }, /* U+00B8 */
+ { "CenterDot", "·" }, /* U+00B7 */
+ { "Cfr", "ℭ" }, /* U+212D */
+ { "Chi", "Χ" }, /* U+03A7 */
+ { "CircleDot", "⊙" }, /* U+2299 */
+ { "CircleMinus", "⊖" }, /* U+2296 */
+ { "CirclePlus", "⊕" }, /* U+2295 */
+ { "CircleTimes", "⊗" }, /* U+2297 */
+ { "CloseCurlyQuote", "’" }, /* U+2019 */
+ { "Colon", "∷" }, /* U+2237 */
+ { "Colone", "⩴" }, /* U+2A74 */
+ { "Congruent", "≡" }, /* U+2261 */
+ { "Conint", "∯" }, /* U+222F */
+ { "ContourIntegral", "∮" }, /* U+222E */
+ { "Copf", "ℂ" }, /* U+2102 */
+ { "Coproduct", "∐" }, /* U+2210 */
+ { "Cross", "⨯" }, /* U+2A2F */
+ { "Cscr", "𝒞" }, /* U+1D49E */
+ { "Cup", "⋓" }, /* U+22D3 */
+ { "CupCap", "≍" }, /* U+224D */
+ { "DD", "ⅅ" }, /* U+2145 */
+ { "DDotrahd", "⤑" }, /* U+2911 */
+ { "DJcy", "Ђ" }, /* U+0402 */
+ { "DScy", "Ѕ" }, /* U+0405 */
+ { "DZcy", "Џ" }, /* U+040F */
+ { "Dagger", "‡" }, /* U+2021 */
+ { "Darr", "↡" }, /* U+21A1 */
+ { "Dashv", "⫤" }, /* U+2AE4 */
+ { "Dcaron", "Ď" }, /* U+010E */
+ { "Dcy", "Д" }, /* U+0414 */
+ { "Del", "∇" }, /* U+2207 */
+ { "Delta", "Δ" }, /* U+0394 */
+ { "Dfr", "𝔇" }, /* U+1D507 */
+ { "DiacriticalDot", "˙" }, /* U+02D9 */
+ { "Diamond", "⋄" }, /* U+22C4 */
+ { "DifferentialD", "ⅆ" }, /* U+2146 */
+ { "Dopf", "𝔻" }, /* U+1D53B */
+ { "Dot", "¨" }, /* U+00A8 */
+ { "DotDot", "⃜" }, /* U+20DC */
+ { "DotEqual", "≐" }, /* U+2250 */
+ { "DoubleDot", "¨" }, /* U+00A8 */
+ { "DoubleDownArrow", "⇓" }, /* U+21D3 */
+ { "DoubleLeftArrow", "⇐" }, /* U+21D0 */
+ { "DoubleLeftTee", "⫤" }, /* U+2AE4 */
+ { "DoubleRightTee", "⊨" }, /* U+22A8 */
+ { "DoubleUpArrow", "⇑" }, /* U+21D1 */
+ { "DownArrow", "↓" }, /* U+2193 */
+ { "DownArrowBar", "⤓" }, /* U+2913 */
+ { "DownBreve", "̑" }, /* U+0311 */
+ { "DownLeftVector", "↽" }, /* U+21BD */
+ { "DownRightVector", "⇁" }, /* U+21C1 */
+ { "DownTee", "⊤" }, /* U+22A4 */
+ { "DownTeeArrow", "↧" }, /* U+21A7 */
+ { "Downarrow", "⇓" }, /* U+21D3 */
+ { "Dscr", "𝒟" }, /* U+1D49F */
+ { "Dstrok", "Đ" }, /* U+0110 */
+ { "ENG", "Ŋ" }, /* U+014A */
+ { "ETH", "Ð" }, /* U+00D0 */
+ { "Eacute", "É" }, /* U+00C9 */
+ { "Ecaron", "Ě" }, /* U+011A */
+ { "Ecirc", "Ê" }, /* U+00CA */
+ { "Ecy", "Э" }, /* U+042D */
+ { "Edot", "Ė" }, /* U+0116 */
+ { "Efr", "𝔈" }, /* U+1D508 */
+ { "Egrave", "È" }, /* U+00C8 */
+ { "Element", "∈" }, /* U+2208 */
+ { "Emacr", "Ē" }, /* U+0112 */
+ { "Eogon", "Ę" }, /* U+0118 */
+ { "Eopf", "𝔼" }, /* U+1D53C */
+ { "Epsilon", "Ε" }, /* U+0395 */
+ { "Equal", "⩵" }, /* U+2A75 */
+ { "EqualTilde", "≂" }, /* U+2242 */
+ { "Equilibrium", "⇌" }, /* U+21CC */
+ { "Escr", "ℰ" }, /* U+2130 */
+ { "Esim", "⩳" }, /* U+2A73 */
+ { "Eta", "Η" }, /* U+0397 */
+ { "Euml", "Ë" }, /* U+00CB */
+ { "Exists", "∃" }, /* U+2203 */
+ { "ExponentialE", "ⅇ" }, /* U+2147 */
+ { "Fcy", "Ф" }, /* U+0424 */
+ { "Ffr", "𝔉" }, /* U+1D509 */
+ { "Fopf", "𝔽" }, /* U+1D53D */
+ { "ForAll", "∀" }, /* U+2200 */
+ { "Fouriertrf", "ℱ" }, /* U+2131 */
+ { "Fscr", "ℱ" }, /* U+2131 */
+ { "GJcy", "Ѓ" }, /* U+0403 */
+ { "GT", ">" }, /* U+003E */
+ { "Gamma", "Γ" }, /* U+0393 */
+ { "Gammad", "Ϝ" }, /* U+03DC */
+ { "Gbreve", "Ğ" }, /* U+011E */
+ { "Gcedil", "Ģ" }, /* U+0122 */
+ { "Gcirc", "Ĝ" }, /* U+011C */
+ { "Gcy", "Г" }, /* U+0413 */
+ { "Gdot", "Ġ" }, /* U+0120 */
+ { "Gfr", "𝔊" }, /* U+1D50A */
+ { "Gg", "⋙" }, /* U+22D9 */
+ { "Gopf", "𝔾" }, /* U+1D53E */
+ { "GreaterEqual", "≥" }, /* U+2265 */
+ { "GreaterGreater", "⪢" }, /* U+2AA2 */
+ { "GreaterLess", "≷" }, /* U+2277 */
+ { "GreaterTilde", "≳" }, /* U+2273 */
+ { "Gscr", "𝒢" }, /* U+1D4A2 */
+ { "Gt", "≫" }, /* U+226B */
+ { "HARDcy", "Ъ" }, /* U+042A */
+ { "Hacek", "ˇ" }, /* U+02C7 */
+ { "Hat", "^" }, /* U+005E */
+ { "Hcirc", "Ĥ" }, /* U+0124 */
+ { "Hfr", "ℌ" }, /* U+210C */
+ { "HilbertSpace", "ℋ" }, /* U+210B */
+ { "Hopf", "ℍ" }, /* U+210D */
+ { "HorizontalLine", "─" }, /* U+2500 */
+ { "Hscr", "ℋ" }, /* U+210B */
+ { "Hstrok", "Ħ" }, /* U+0126 */
+ { "HumpDownHump", "≎" }, /* U+224E */
+ { "HumpEqual", "≏" }, /* U+224F */
+ { "IEcy", "Е" }, /* U+0415 */
+ { "IJlig", "IJ" }, /* U+0132 */
+ { "IOcy", "Ё" }, /* U+0401 */
+ { "Iacute", "Í" }, /* U+00CD */
+ { "Icirc", "Î" }, /* U+00CE */
+ { "Icy", "И" }, /* U+0418 */
+ { "Idot", "İ" }, /* U+0130 */
+ { "Ifr", "ℑ" }, /* U+2111 */
+ { "Igrave", "Ì" }, /* U+00CC */
+ { "Im", "ℑ" }, /* U+2111 */
+ { "Imacr", "Ī" }, /* U+012A */
+ { "ImaginaryI", "ⅈ" }, /* U+2148 */
+ { "Implies", "⇒" }, /* U+21D2 */
+ { "Int", "∬" }, /* U+222C */
+ { "Integral", "∫" }, /* U+222B */
+ { "Intersection", "⋂" }, /* U+22C2 */
+ { "InvisibleComma", "" }, /* U+2063 */
+ { "InvisibleTimes", "" }, /* U+2062 */
+ { "Iogon", "Į" }, /* U+012E */
+ { "Iopf", "𝕀" }, /* U+1D540 */
+ { "Iota", "Ι" }, /* U+0399 */
+ { "Iscr", "ℐ" }, /* U+2110 */
+ { "Itilde", "Ĩ" }, /* U+0128 */
+ { "Iukcy", "І" }, /* U+0406 */
+ { "Iuml", "Ï" }, /* U+00CF */
+ { "Jcirc", "Ĵ" }, /* U+0134 */
+ { "Jcy", "Й" }, /* U+0419 */
+ { "Jfr", "𝔍" }, /* U+1D50D */
+ { "Jopf", "𝕁" }, /* U+1D541 */
+ { "Jscr", "𝒥" }, /* U+1D4A5 */
+ { "Jsercy", "Ј" }, /* U+0408 */
+ { "Jukcy", "Є" }, /* U+0404 */
+ { "KHcy", "Х" }, /* U+0425 */
+ { "KJcy", "Ќ" }, /* U+040C */
+ { "Kappa", "Κ" }, /* U+039A */
+ { "Kcedil", "Ķ" }, /* U+0136 */
+ { "Kcy", "К" }, /* U+041A */
+ { "Kfr", "𝔎" }, /* U+1D50E */
+ { "Kopf", "𝕂" }, /* U+1D542 */
+ { "Kscr", "𝒦" }, /* U+1D4A6 */
+ { "LJcy", "Љ" }, /* U+0409 */
+ { "LT", "<" }, /* U+003C */
+ { "Lacute", "Ĺ" }, /* U+0139 */
+ { "Lambda", "Λ" }, /* U+039B */
+ { "Lang", "⟪" }, /* U+27EA */
+ { "Laplacetrf", "ℒ" }, /* U+2112 */
+ { "Larr", "↞" }, /* U+219E */
+ { "Lcaron", "Ľ" }, /* U+013D */
+ { "Lcedil", "Ļ" }, /* U+013B */
+ { "Lcy", "Л" }, /* U+041B */
+ { "LeftArrow", "←" }, /* U+2190 */
+ { "LeftArrowBar", "⇤" }, /* U+21E4 */
+ { "LeftCeiling", "⌈" }, /* U+2308 */
+ { "LeftDownVector", "⇃" }, /* U+21C3 */
+ { "LeftFloor", "⌊" }, /* U+230A */
+ { "LeftRightArrow", "↔" }, /* U+2194 */
+ { "LeftRightVector", "⥎" }, /* U+294E */
+ { "LeftTee", "⊣" }, /* U+22A3 */
+ { "LeftTeeArrow", "↤" }, /* U+21A4 */
+ { "LeftTeeVector", "⥚" }, /* U+295A */
+ { "LeftTriangle", "⊲" }, /* U+22B2 */
+ { "LeftTriangleBar", "⧏" }, /* U+29CF */
+ { "LeftUpTeeVector", "⥠" }, /* U+2960 */
+ { "LeftUpVector", "↿" }, /* U+21BF */
+ { "LeftUpVectorBar", "⥘" }, /* U+2958 */
+ { "LeftVector", "↼" }, /* U+21BC */
+ { "LeftVectorBar", "⥒" }, /* U+2952 */
+ { "Leftarrow", "⇐" }, /* U+21D0 */
+ { "Leftrightarrow", "⇔" }, /* U+21D4 */
+ { "LessFullEqual", "≦" }, /* U+2266 */
+ { "LessGreater", "≶" }, /* U+2276 */
+ { "LessLess", "⪡" }, /* U+2AA1 */
+ { "LessSlantEqual", "⩽" }, /* U+2A7D */
+ { "LessTilde", "≲" }, /* U+2272 */
+ { "Lfr", "𝔏" }, /* U+1D50F */
+ { "Ll", "⋘" }, /* U+22D8 */
+ { "Lleftarrow", "⇚" }, /* U+21DA */
+ { "Lmidot", "Ŀ" }, /* U+013F */
+ { "LongLeftArrow", "⟵" }, /* U+27F5 */
+ { "LongRightArrow", "⟶" }, /* U+27F6 */
+ { "Longleftarrow", "⟸" }, /* U+27F8 */
+ { "Longrightarrow", "⟹" }, /* U+27F9 */
+ { "Lopf", "𝕃" }, /* U+1D543 */
+ { "LowerLeftArrow", "↙" }, /* U+2199 */
+ { "LowerRightArrow", "↘" }, /* U+2198 */
+ { "Lscr", "ℒ" }, /* U+2112 */
+ { "Lsh", "↰" }, /* U+21B0 */
+ { "Lstrok", "Ł" }, /* U+0141 */
+ { "Lt", "≪" }, /* U+226A */
+ { "Map", "⤅" }, /* U+2905 */
+ { "Mcy", "М" }, /* U+041C */
+ { "MediumSpace", " " }, /* U+205F */
+ { "Mellintrf", "ℳ" }, /* U+2133 */
+ { "Mfr", "𝔐" }, /* U+1D510 */
+ { "MinusPlus", "∓" }, /* U+2213 */
+ { "Mopf", "𝕄" }, /* U+1D544 */
+ { "Mscr", "ℳ" }, /* U+2133 */
+ { "Mu", "Μ" }, /* U+039C */
+ { "NJcy", "Њ" }, /* U+040A */
+ { "Nacute", "Ń" }, /* U+0143 */
+ { "Ncaron", "Ň" }, /* U+0147 */
+ { "Ncedil", "Ņ" }, /* U+0145 */
+ { "Ncy", "Н" }, /* U+041D */
+ { "NestedLessLess", "≪" }, /* U+226A */
+ { "NewLine", "\n" }, /* U+000A */
+ { "Nfr", "𝔑" }, /* U+1D511 */
+ { "NoBreak", "" }, /* U+2060 */
+ { "Nopf", "ℕ" }, /* U+2115 */
+ { "Not", "⫬" }, /* U+2AEC */
+ { "NotCongruent", "≢" }, /* U+2262 */
+ { "NotCupCap", "≭" }, /* U+226D */
+ { "NotElement", "∉" }, /* U+2209 */
+ { "NotEqual", "≠" }, /* U+2260 */
+ { "NotEqualTilde", "≂̸" }, /* U+2242 U+0338 */
+ { "NotExists", "∄" }, /* U+2204 */
+ { "NotGreater", "≯" }, /* U+226F */
+ { "NotGreaterEqual", "≱" }, /* U+2271 */
+ { "NotGreaterLess", "≹" }, /* U+2279 */
+ { "NotGreaterTilde", "≵" }, /* U+2275 */
+ { "NotHumpDownHump", "≎̸" }, /* U+224E U+0338 */
+ { "NotHumpEqual", "≏̸" }, /* U+224F U+0338 */
+ { "NotLeftTriangle", "⋪" }, /* U+22EA */
+ { "NotLess", "≮" }, /* U+226E */
+ { "NotLessEqual", "≰" }, /* U+2270 */
+ { "NotLessGreater", "≸" }, /* U+2278 */
+ { "NotLessLess", "≪̸" }, /* U+226A U+0338 */
+ { "NotLessTilde", "≴" }, /* U+2274 */
+ { "NotPrecedes", "⊀" }, /* U+2280 */
+ { "NotSquareSubset", "⊏̸" }, /* U+228F U+0338 */
+ { "NotSubset", "⊂⃒" }, /* U+2282 U+20D2 */
+ { "NotSubsetEqual", "⊈" }, /* U+2288 */
+ { "NotSucceeds", "⊁" }, /* U+2281 */
+ { "NotSuperset", "⊃⃒" }, /* U+2283 U+20D2 */
+ { "NotTilde", "≁" }, /* U+2241 */
+ { "NotTildeEqual", "≄" }, /* U+2244 */
+ { "NotTildeTilde", "≉" }, /* U+2249 */
+ { "NotVerticalBar", "∤" }, /* U+2224 */
+ { "Nscr", "𝒩" }, /* U+1D4A9 */
+ { "Ntilde", "Ñ" }, /* U+00D1 */
+ { "Nu", "Ν" }, /* U+039D */
+ { "OElig", "Œ" }, /* U+0152 */
+ { "Oacute", "Ó" }, /* U+00D3 */
+ { "Ocirc", "Ô" }, /* U+00D4 */
+ { "Ocy", "О" }, /* U+041E */
+ { "Odblac", "Ő" }, /* U+0150 */
+ { "Ofr", "𝔒" }, /* U+1D512 */
+ { "Ograve", "Ò" }, /* U+00D2 */
+ { "Omacr", "Ō" }, /* U+014C */
+ { "Omega", "Ω" }, /* U+03A9 */
+ { "Omicron", "Ο" }, /* U+039F */
+ { "Oopf", "𝕆" }, /* U+1D546 */
+ { "OpenCurlyQuote", "‘" }, /* U+2018 */
+ { "Or", "⩔" }, /* U+2A54 */
+ { "Oscr", "𝒪" }, /* U+1D4AA */
+ { "Oslash", "Ø" }, /* U+00D8 */
+ { "Otilde", "Õ" }, /* U+00D5 */
+ { "Otimes", "⨷" }, /* U+2A37 */
+ { "Ouml", "Ö" }, /* U+00D6 */
+ { "OverBar", "‾" }, /* U+203E */
+ { "OverBrace", "⏞" }, /* U+23DE */
+ { "OverBracket", "⎴" }, /* U+23B4 */
+ { "OverParenthesis", "⏜" }, /* U+23DC */
+ { "PartialD", "∂" }, /* U+2202 */
+ { "Pcy", "П" }, /* U+041F */
+ { "Pfr", "𝔓" }, /* U+1D513 */
+ { "Phi", "Φ" }, /* U+03A6 */
+ { "Pi", "Π" }, /* U+03A0 */
+ { "PlusMinus", "±" }, /* U+00B1 */
+ { "Poincareplane", "ℌ" }, /* U+210C */
+ { "Popf", "ℙ" }, /* U+2119 */
+ { "Pr", "⪻" }, /* U+2ABB */
+ { "Precedes", "≺" }, /* U+227A */
+ { "PrecedesEqual", "⪯" }, /* U+2AAF */
+ { "PrecedesTilde", "≾" }, /* U+227E */
+ { "Prime", "″" }, /* U+2033 */
+ { "Product", "∏" }, /* U+220F */
+ { "Proportion", "∷" }, /* U+2237 */
+ { "Proportional", "∝" }, /* U+221D */
+ { "Pscr", "𝒫" }, /* U+1D4AB */
+ { "Psi", "Ψ" }, /* U+03A8 */
+ { "QUOT", "\"" }, /* U+0022 */
+ { "Qfr", "𝔔" }, /* U+1D514 */
+ { "Qopf", "ℚ" }, /* U+211A */
+ { "Qscr", "𝒬" }, /* U+1D4AC */
+ { "RBarr", "⤐" }, /* U+2910 */
+ { "REG", "®" }, /* U+00AE */
+ { "Racute", "Ŕ" }, /* U+0154 */
+ { "Rang", "⟫" }, /* U+27EB */
+ { "Rarr", "↠" }, /* U+21A0 */
+ { "Rarrtl", "⤖" }, /* U+2916 */
+ { "Rcaron", "Ř" }, /* U+0158 */
+ { "Rcedil", "Ŗ" }, /* U+0156 */
+ { "Rcy", "Р" }, /* U+0420 */
+ { "Re", "ℜ" }, /* U+211C */
+ { "ReverseElement", "∋" }, /* U+220B */
+ { "Rfr", "ℜ" }, /* U+211C */
+ { "Rho", "Ρ" }, /* U+03A1 */
+ { "RightArrow", "→" }, /* U+2192 */
+ { "RightArrowBar", "⇥" }, /* U+21E5 */
+ { "RightCeiling", "⌉" }, /* U+2309 */
+ { "RightDownVector", "⇂" }, /* U+21C2 */
+ { "RightFloor", "⌋" }, /* U+230B */
+ { "RightTee", "⊢" }, /* U+22A2 */
+ { "RightTeeArrow", "↦" }, /* U+21A6 */
+ { "RightTeeVector", "⥛" }, /* U+295B */
+ { "RightTriangle", "⊳" }, /* U+22B3 */
+ { "RightUpVector", "↾" }, /* U+21BE */
+ { "RightVector", "⇀" }, /* U+21C0 */
+ { "RightVectorBar", "⥓" }, /* U+2953 */
+ { "Rightarrow", "⇒" }, /* U+21D2 */
+ { "Ropf", "ℝ" }, /* U+211D */
+ { "RoundImplies", "⥰" }, /* U+2970 */
+ { "Rrightarrow", "⇛" }, /* U+21DB */
+ { "Rscr", "ℛ" }, /* U+211B */
+ { "Rsh", "↱" }, /* U+21B1 */
+ { "RuleDelayed", "⧴" }, /* U+29F4 */
+ { "SHCHcy", "Щ" }, /* U+0429 */
+ { "SHcy", "Ш" }, /* U+0428 */
+ { "SOFTcy", "Ь" }, /* U+042C */
+ { "Sacute", "Ś" }, /* U+015A */
+ { "Sc", "⪼" }, /* U+2ABC */
+ { "Scaron", "Š" }, /* U+0160 */
+ { "Scedil", "Ş" }, /* U+015E */
+ { "Scirc", "Ŝ" }, /* U+015C */
+ { "Scy", "С" }, /* U+0421 */
+ { "Sfr", "𝔖" }, /* U+1D516 */
+ { "ShortDownArrow", "↓" }, /* U+2193 */
+ { "ShortLeftArrow", "←" }, /* U+2190 */
+ { "ShortRightArrow", "→" }, /* U+2192 */
+ { "ShortUpArrow", "↑" }, /* U+2191 */
+ { "Sigma", "Σ" }, /* U+03A3 */
+ { "SmallCircle", "∘" }, /* U+2218 */
+ { "Sopf", "𝕊" }, /* U+1D54A */
+ { "Sqrt", "√" }, /* U+221A */
+ { "Square", "□" }, /* U+25A1 */
+ { "SquareSubset", "⊏" }, /* U+228F */
+ { "SquareSuperset", "⊐" }, /* U+2290 */
+ { "SquareUnion", "⊔" }, /* U+2294 */
+ { "Sscr", "𝒮" }, /* U+1D4AE */
+ { "Star", "⋆" }, /* U+22C6 */
+ { "Sub", "⋐" }, /* U+22D0 */
+ { "Subset", "⋐" }, /* U+22D0 */
+ { "SubsetEqual", "⊆" }, /* U+2286 */
+ { "Succeeds", "≻" }, /* U+227B */
+ { "SucceedsEqual", "⪰" }, /* U+2AB0 */
+ { "SucceedsTilde", "≿" }, /* U+227F */
+ { "SuchThat", "∋" }, /* U+220B */
+ { "Sum", "∑" }, /* U+2211 */
+ { "Sup", "⋑" }, /* U+22D1 */
+ { "Superset", "⊃" }, /* U+2283 */
+ { "SupersetEqual", "⊇" }, /* U+2287 */
+ { "Supset", "⋑" }, /* U+22D1 */
+ { "THORN", "Þ" }, /* U+00DE */
+ { "TRADE", "™" }, /* U+2122 */
+ { "TSHcy", "Ћ" }, /* U+040B */
+ { "TScy", "Ц" }, /* U+0426 */
+ { "Tab", " " }, /* U+0009 */
+ { "Tau", "Τ" }, /* U+03A4 */
+ { "Tcaron", "Ť" }, /* U+0164 */
+ { "Tcedil", "Ţ" }, /* U+0162 */
+ { "Tcy", "Т" }, /* U+0422 */
+ { "Tfr", "𝔗" }, /* U+1D517 */
+ { "Therefore", "∴" }, /* U+2234 */
+ { "Theta", "Θ" }, /* U+0398 */
+ { "ThickSpace", " " }, /* U+205F U+200A */
+ { "ThinSpace", " " }, /* U+2009 */
+ { "Tilde", "∼" }, /* U+223C */
+ { "TildeEqual", "≃" }, /* U+2243 */
+ { "TildeFullEqual", "≅" }, /* U+2245 */
+ { "TildeTilde", "≈" }, /* U+2248 */
+ { "Topf", "𝕋" }, /* U+1D54B */
+ { "TripleDot", "⃛" }, /* U+20DB */
+ { "Tscr", "𝒯" }, /* U+1D4AF */
+ { "Tstrok", "Ŧ" }, /* U+0166 */
+ { "Uacute", "Ú" }, /* U+00DA */
+ { "Uarr", "↟" }, /* U+219F */
+ { "Uarrocir", "⥉" }, /* U+2949 */
+ { "Ubrcy", "Ў" }, /* U+040E */
+ { "Ubreve", "Ŭ" }, /* U+016C */
+ { "Ucirc", "Û" }, /* U+00DB */
+ { "Ucy", "У" }, /* U+0423 */
+ { "Udblac", "Ű" }, /* U+0170 */
+ { "Ufr", "𝔘" }, /* U+1D518 */
+ { "Ugrave", "Ù" }, /* U+00D9 */
+ { "Umacr", "Ū" }, /* U+016A */
+ { "UnderBar", "_" }, /* U+005F */
+ { "UnderBrace", "⏟" }, /* U+23DF */
+ { "UnderBracket", "⎵" }, /* U+23B5 */
+ { "Union", "⋃" }, /* U+22C3 */
+ { "UnionPlus", "⊎" }, /* U+228E */
+ { "Uogon", "Ų" }, /* U+0172 */
+ { "Uopf", "𝕌" }, /* U+1D54C */
+ { "UpArrow", "↑" }, /* U+2191 */
+ { "UpArrowBar", "⤒" }, /* U+2912 */
+ { "UpDownArrow", "↕" }, /* U+2195 */
+ { "UpEquilibrium", "⥮" }, /* U+296E */
+ { "UpTee", "⊥" }, /* U+22A5 */
+ { "UpTeeArrow", "↥" }, /* U+21A5 */
+ { "Uparrow", "⇑" }, /* U+21D1 */
+ { "Updownarrow", "⇕" }, /* U+21D5 */
+ { "UpperLeftArrow", "↖" }, /* U+2196 */
+ { "UpperRightArrow", "↗" }, /* U+2197 */
+ { "Upsi", "ϒ" }, /* U+03D2 */
+ { "Upsilon", "Υ" }, /* U+03A5 */
+ { "Uring", "Ů" }, /* U+016E */
+ { "Uscr", "𝒰" }, /* U+1D4B0 */
+ { "Utilde", "Ũ" }, /* U+0168 */
+ { "Uuml", "Ü" }, /* U+00DC */
+ { "VDash", "⊫" }, /* U+22AB */
+ { "Vbar", "⫫" }, /* U+2AEB */
+ { "Vcy", "В" }, /* U+0412 */
+ { "Vdash", "⊩" }, /* U+22A9 */
+ { "Vdashl", "⫦" }, /* U+2AE6 */
+ { "Vee", "⋁" }, /* U+22C1 */
+ { "Verbar", "‖" }, /* U+2016 */
+ { "Vert", "‖" }, /* U+2016 */
+ { "VerticalBar", "∣" }, /* U+2223 */
+ { "VerticalLine", "|" }, /* U+007C */
+ { "VerticalTilde", "≀" }, /* U+2240 */
+ { "VeryThinSpace", " " }, /* U+200A */
+ { "Vfr", "𝔙" }, /* U+1D519 */
+ { "Vopf", "𝕍" }, /* U+1D54D */
+ { "Vscr", "𝒱" }, /* U+1D4B1 */
+ { "Vvdash", "⊪" }, /* U+22AA */
+ { "Wcirc", "Ŵ" }, /* U+0174 */
+ { "Wedge", "⋀" }, /* U+22C0 */
+ { "Wfr", "𝔚" }, /* U+1D51A */
+ { "Wopf", "𝕎" }, /* U+1D54E */
+ { "Wscr", "𝒲" }, /* U+1D4B2 */
+ { "Xfr", "𝔛" }, /* U+1D51B */
+ { "Xi", "Ξ" }, /* U+039E */
+ { "Xopf", "𝕏" }, /* U+1D54F */
+ { "Xscr", "𝒳" }, /* U+1D4B3 */
+ { "YAcy", "Я" }, /* U+042F */
+ { "YIcy", "Ї" }, /* U+0407 */
+ { "YUcy", "Ю" }, /* U+042E */
+ { "Yacute", "Ý" }, /* U+00DD */
+ { "Ycirc", "Ŷ" }, /* U+0176 */
+ { "Ycy", "Ы" }, /* U+042B */
+ { "Yfr", "𝔜" }, /* U+1D51C */
+ { "Yopf", "𝕐" }, /* U+1D550 */
+ { "Yscr", "𝒴" }, /* U+1D4B4 */
+ { "Yuml", "Ÿ" }, /* U+0178 */
+ { "ZHcy", "Ж" }, /* U+0416 */
+ { "Zacute", "Ź" }, /* U+0179 */
+ { "Zcaron", "Ž" }, /* U+017D */
+ { "Zcy", "З" }, /* U+0417 */
+ { "Zdot", "Ż" }, /* U+017B */
+ { "ZeroWidthSpace", "" }, /* U+200B */
+ { "Zeta", "Ζ" }, /* U+0396 */
+ { "Zfr", "ℨ" }, /* U+2128 */
+ { "Zopf", "ℤ" }, /* U+2124 */
+ { "Zscr", "𝒵" }, /* U+1D4B5 */
+ { "aacute", "á" }, /* U+00E1 */
+ { "abreve", "ă" }, /* U+0103 */
+ { "ac", "∾" }, /* U+223E */
+ { "acE", "∾̳" }, /* U+223E U+0333 */
+ { "acd", "∿" }, /* U+223F */
+ { "acirc", "â" }, /* U+00E2 */
+ { "acute", "´" }, /* U+00B4 */
+ { "acy", "а" }, /* U+0430 */
+ { "aelig", "æ" }, /* U+00E6 */
+ { "af", "" }, /* U+2061 */
+ { "afr", "𝔞" }, /* U+1D51E */
+ { "agrave", "à" }, /* U+00E0 */
+ { "alefsym", "ℵ" }, /* U+2135 */
+ { "aleph", "ℵ" }, /* U+2135 */
+ { "alpha", "α" }, /* U+03B1 */
+ { "amacr", "ā" }, /* U+0101 */
+ { "amalg", "⨿" }, /* U+2A3F */
+ { "amp", "&" }, /* U+0026 */
+ { "and", "∧" }, /* U+2227 */
+ { "andand", "⩕" }, /* U+2A55 */
+ { "andd", "⩜" }, /* U+2A5C */
+ { "andslope", "⩘" }, /* U+2A58 */
+ { "andv", "⩚" }, /* U+2A5A */
+ { "ang", "∠" }, /* U+2220 */
+ { "ange", "⦤" }, /* U+29A4 */
+ { "angle", "∠" }, /* U+2220 */
+ { "angmsd", "∡" }, /* U+2221 */
+ { "angmsdaa", "⦨" }, /* U+29A8 */
+ { "angmsdab", "⦩" }, /* U+29A9 */
+ { "angmsdac", "⦪" }, /* U+29AA */
+ { "angmsdad", "⦫" }, /* U+29AB */
+ { "angmsdae", "⦬" }, /* U+29AC */
+ { "angmsdaf", "⦭" }, /* U+29AD */
+ { "angmsdag", "⦮" }, /* U+29AE */
+ { "angmsdah", "⦯" }, /* U+29AF */
+ { "angrt", "∟" }, /* U+221F */
+ { "angrtvb", "⊾" }, /* U+22BE */
+ { "angrtvbd", "⦝" }, /* U+299D */
+ { "angsph", "∢" }, /* U+2222 */
+ { "angst", "Å" }, /* U+00C5 */
+ { "angzarr", "⍼" }, /* U+237C */
+ { "aogon", "ą" }, /* U+0105 */
+ { "aopf", "𝕒" }, /* U+1D552 */
+ { "ap", "≈" }, /* U+2248 */
+ { "apE", "⩰" }, /* U+2A70 */
+ { "apacir", "⩯" }, /* U+2A6F */
+ { "ape", "≊" }, /* U+224A */
+ { "apid", "≋" }, /* U+224B */
+ { "apos", "'" }, /* U+0027 */
+ { "approx", "≈" }, /* U+2248 */
+ { "approxeq", "≊" }, /* U+224A */
+ { "aring", "å" }, /* U+00E5 */
+ { "ascr", "𝒶" }, /* U+1D4B6 */
+ { "ast", "*" }, /* U+002A */
+ { "asymp", "≈" }, /* U+2248 */
+ { "asympeq", "≍" }, /* U+224D */
+ { "atilde", "ã" }, /* U+00E3 */
+ { "auml", "ä" }, /* U+00E4 */
+ { "awconint", "∳" }, /* U+2233 */
+ { "awint", "⨑" }, /* U+2A11 */
+ { "bNot", "⫭" }, /* U+2AED */
+ { "backcong", "≌" }, /* U+224C */
+ { "backepsilon", "϶" }, /* U+03F6 */
+ { "backprime", "‵" }, /* U+2035 */
+ { "backsim", "∽" }, /* U+223D */
+ { "backsimeq", "⋍" }, /* U+22CD */
+ { "barvee", "⊽" }, /* U+22BD */
+ { "barwed", "⌅" }, /* U+2305 */
+ { "barwedge", "⌅" }, /* U+2305 */
+ { "bbrk", "⎵" }, /* U+23B5 */
+ { "bbrktbrk", "⎶" }, /* U+23B6 */
+ { "bcong", "≌" }, /* U+224C */
+ { "bcy", "б" }, /* U+0431 */
+ { "bdquo", "„" }, /* U+201E */
+ { "becaus", "∵" }, /* U+2235 */
+ { "because", "∵" }, /* U+2235 */
+ { "bemptyv", "⦰" }, /* U+29B0 */
+ { "bepsi", "϶" }, /* U+03F6 */
+ { "bernou", "ℬ" }, /* U+212C */
+ { "beta", "β" }, /* U+03B2 */
+ { "beth", "ℶ" }, /* U+2136 */
+ { "between", "≬" }, /* U+226C */
+ { "bfr", "𝔟" }, /* U+1D51F */
+ { "bigcap", "⋂" }, /* U+22C2 */
+ { "bigcirc", "◯" }, /* U+25EF */
+ { "bigcup", "⋃" }, /* U+22C3 */
+ { "bigodot", "⨀" }, /* U+2A00 */
+ { "bigoplus", "⨁" }, /* U+2A01 */
+ { "bigotimes", "⨂" }, /* U+2A02 */
+ { "bigsqcup", "⨆" }, /* U+2A06 */
+ { "bigstar", "★" }, /* U+2605 */
+ { "bigtriangledown", "▽" }, /* U+25BD */
+ { "bigtriangleup", "△" }, /* U+25B3 */
+ { "biguplus", "⨄" }, /* U+2A04 */
+ { "bigvee", "⋁" }, /* U+22C1 */
+ { "bigwedge", "⋀" }, /* U+22C0 */
+ { "bkarow", "⤍" }, /* U+290D */
+ { "blacklozenge", "⧫" }, /* U+29EB */
+ { "blacksquare", "▪" }, /* U+25AA */
+ { "blacktriangle", "▴" }, /* U+25B4 */
+ { "blank", "␣" }, /* U+2423 */
+ { "blk12", "▒" }, /* U+2592 */
+ { "blk14", "░" }, /* U+2591 */
+ { "blk34", "▓" }, /* U+2593 */
+ { "block", "█" }, /* U+2588 */
+ { "bne", "=⃥" }, /* U+003D U+20E5 */
+ { "bnequiv", "≡⃥" }, /* U+2261 U+20E5 */
+ { "bnot", "⌐" }, /* U+2310 */
+ { "bopf", "𝕓" }, /* U+1D553 */
+ { "bot", "⊥" }, /* U+22A5 */
+ { "bottom", "⊥" }, /* U+22A5 */
+ { "bowtie", "⋈" }, /* U+22C8 */
+ { "boxDL", "╗" }, /* U+2557 */
+ { "boxDR", "╔" }, /* U+2554 */
+ { "boxDl", "╖" }, /* U+2556 */
+ { "boxDr", "╓" }, /* U+2553 */
+ { "boxH", "═" }, /* U+2550 */
+ { "boxHD", "╦" }, /* U+2566 */
+ { "boxHU", "╩" }, /* U+2569 */
+ { "boxHd", "╤" }, /* U+2564 */
+ { "boxHu", "╧" }, /* U+2567 */
+ { "boxUL", "╝" }, /* U+255D */
+ { "boxUR", "╚" }, /* U+255A */
+ { "boxUl", "╜" }, /* U+255C */
+ { "boxUr", "╙" }, /* U+2559 */
+ { "boxV", "║" }, /* U+2551 */
+ { "boxVH", "╬" }, /* U+256C */
+ { "boxVL", "╣" }, /* U+2563 */
+ { "boxVR", "╠" }, /* U+2560 */
+ { "boxVh", "╫" }, /* U+256B */
+ { "boxVl", "╢" }, /* U+2562 */
+ { "boxVr", "╟" }, /* U+255F */
+ { "boxbox", "⧉" }, /* U+29C9 */
+ { "boxdL", "╕" }, /* U+2555 */
+ { "boxdR", "╒" }, /* U+2552 */
+ { "boxdl", "┐" }, /* U+2510 */
+ { "boxdr", "┌" }, /* U+250C */
+ { "boxh", "─" }, /* U+2500 */
+ { "boxhD", "╥" }, /* U+2565 */
+ { "boxhU", "╨" }, /* U+2568 */
+ { "boxhd", "┬" }, /* U+252C */
+ { "boxhu", "┴" }, /* U+2534 */
+ { "boxminus", "⊟" }, /* U+229F */
+ { "boxplus", "⊞" }, /* U+229E */
+ { "boxtimes", "⊠" }, /* U+22A0 */
+ { "boxuL", "╛" }, /* U+255B */
+ { "boxuR", "╘" }, /* U+2558 */
+ { "boxul", "┘" }, /* U+2518 */
+ { "boxur", "└" }, /* U+2514 */
+ { "boxv", "│" }, /* U+2502 */
+ { "boxvH", "╪" }, /* U+256A */
+ { "boxvL", "╡" }, /* U+2561 */
+ { "boxvR", "╞" }, /* U+255E */
+ { "boxvh", "┼" }, /* U+253C */
+ { "boxvl", "┤" }, /* U+2524 */
+ { "boxvr", "├" }, /* U+251C */
+ { "bprime", "‵" }, /* U+2035 */
+ { "breve", "˘" }, /* U+02D8 */
+ { "brvbar", "¦" }, /* U+00A6 */
+ { "bscr", "𝒷" }, /* U+1D4B7 */
+ { "bsemi", "⁏" }, /* U+204F */
+ { "bsim", "∽" }, /* U+223D */
+ { "bsime", "⋍" }, /* U+22CD */
+ { "bsol", "\\" }, /* U+005C */
+ { "bsolb", "⧅" }, /* U+29C5 */
+ { "bsolhsub", "⟈" }, /* U+27C8 */
+ { "bull", "•" }, /* U+2022 */
+ { "bullet", "•" }, /* U+2022 */
+ { "bump", "≎" }, /* U+224E */
+ { "bumpE", "⪮" }, /* U+2AAE */
+ { "bumpe", "≏" }, /* U+224F */
+ { "bumpeq", "≏" }, /* U+224F */
+ { "cacute", "ć" }, /* U+0107 */
+ { "cap", "∩" }, /* U+2229 */
+ { "capand", "⩄" }, /* U+2A44 */
+ { "capbrcup", "⩉" }, /* U+2A49 */
+ { "capcap", "⩋" }, /* U+2A4B */
+ { "capcup", "⩇" }, /* U+2A47 */
+ { "capdot", "⩀" }, /* U+2A40 */
+ { "caps", "∩︀" }, /* U+2229 U+FE00 */
+ { "caret", "⁁" }, /* U+2041 */
+ { "caron", "ˇ" }, /* U+02C7 */
+ { "ccaps", "⩍" }, /* U+2A4D */
+ { "ccaron", "č" }, /* U+010D */
+ { "ccedil", "ç" }, /* U+00E7 */
+ { "ccirc", "ĉ" }, /* U+0109 */
+ { "ccups", "⩌" }, /* U+2A4C */
+ { "ccupssm", "⩐" }, /* U+2A50 */
+ { "cdot", "ċ" }, /* U+010B */
+ { "cedil", "¸" }, /* U+00B8 */
+ { "cemptyv", "⦲" }, /* U+29B2 */
+ { "cent", "¢" }, /* U+00A2 */
+ { "centerdot", "·" }, /* U+00B7 */
+ { "cfr", "𝔠" }, /* U+1D520 */
+ { "chcy", "ч" }, /* U+0447 */
+ { "check", "✓" }, /* U+2713 */
+ { "checkmark", "✓" }, /* U+2713 */
+ { "chi", "χ" }, /* U+03C7 */
+ { "cir", "○" }, /* U+25CB */
+ { "cirE", "⧃" }, /* U+29C3 */
+ { "circ", "ˆ" }, /* U+02C6 */
+ { "circeq", "≗" }, /* U+2257 */
+ { "circlearrowleft", "↺" }, /* U+21BA */
+ { "circledR", "®" }, /* U+00AE */
+ { "circledS", "Ⓢ" }, /* U+24C8 */
+ { "circledast", "⊛" }, /* U+229B */
+ { "circledcirc", "⊚" }, /* U+229A */
+ { "circleddash", "⊝" }, /* U+229D */
+ { "cire", "≗" }, /* U+2257 */
+ { "cirfnint", "⨐" }, /* U+2A10 */
+ { "cirmid", "⫯" }, /* U+2AEF */
+ { "cirscir", "⧂" }, /* U+29C2 */
+ { "clubs", "♣" }, /* U+2663 */
+ { "clubsuit", "♣" }, /* U+2663 */
+ { "colon", ":" }, /* U+003A */
+ { "colone", "≔" }, /* U+2254 */
+ { "coloneq", "≔" }, /* U+2254 */
+ { "comma", "," }, /* U+002C */
+ { "commat", "@" }, /* U+0040 */
+ { "comp", "∁" }, /* U+2201 */
+ { "compfn", "∘" }, /* U+2218 */
+ { "complement", "∁" }, /* U+2201 */
+ { "complexes", "ℂ" }, /* U+2102 */
+ { "cong", "≅" }, /* U+2245 */
+ { "congdot", "⩭" }, /* U+2A6D */
+ { "conint", "∮" }, /* U+222E */
+ { "copf", "𝕔" }, /* U+1D554 */
+ { "coprod", "∐" }, /* U+2210 */
+ { "copy", "©" }, /* U+00A9 */
+ { "copysr", "℗" }, /* U+2117 */
+ { "crarr", "↵" }, /* U+21B5 */
+ { "cross", "✗" }, /* U+2717 */
+ { "cscr", "𝒸" }, /* U+1D4B8 */
+ { "csub", "⫏" }, /* U+2ACF */
+ { "csube", "⫑" }, /* U+2AD1 */
+ { "csup", "⫐" }, /* U+2AD0 */
+ { "csupe", "⫒" }, /* U+2AD2 */
+ { "ctdot", "⋯" }, /* U+22EF */
+ { "cudarrl", "⤸" }, /* U+2938 */
+ { "cudarrr", "⤵" }, /* U+2935 */
+ { "cuepr", "⋞" }, /* U+22DE */
+ { "cuesc", "⋟" }, /* U+22DF */
+ { "cularr", "↶" }, /* U+21B6 */
+ { "cularrp", "⤽" }, /* U+293D */
+ { "cup", "∪" }, /* U+222A */
+ { "cupbrcap", "⩈" }, /* U+2A48 */
+ { "cupcap", "⩆" }, /* U+2A46 */
+ { "cupcup", "⩊" }, /* U+2A4A */
+ { "cupdot", "⊍" }, /* U+228D */
+ { "cupor", "⩅" }, /* U+2A45 */
+ { "cups", "∪︀" }, /* U+222A U+FE00 */
+ { "curarr", "↷" }, /* U+21B7 */
+ { "curarrm", "⤼" }, /* U+293C */
+ { "curlyeqprec", "⋞" }, /* U+22DE */
+ { "curlyeqsucc", "⋟" }, /* U+22DF */
+ { "curlyvee", "⋎" }, /* U+22CE */
+ { "curlywedge", "⋏" }, /* U+22CF */
+ { "curren", "¤" }, /* U+00A4 */
+ { "curvearrowleft", "↶" }, /* U+21B6 */
+ { "curvearrowright", "↷" }, /* U+21B7 */
+ { "cuvee", "⋎" }, /* U+22CE */
+ { "cuwed", "⋏" }, /* U+22CF */
+ { "cwconint", "∲" }, /* U+2232 */
+ { "cwint", "∱" }, /* U+2231 */
+ { "cylcty", "⌭" }, /* U+232D */
+ { "dArr", "⇓" }, /* U+21D3 */
+ { "dHar", "⥥" }, /* U+2965 */
+ { "dagger", "†" }, /* U+2020 */
+ { "daleth", "ℸ" }, /* U+2138 */
+ { "darr", "↓" }, /* U+2193 */
+ { "dash", "‐" }, /* U+2010 */
+ { "dashv", "⊣" }, /* U+22A3 */
+ { "dbkarow", "⤏" }, /* U+290F */
+ { "dblac", "˝" }, /* U+02DD */
+ { "dcaron", "ď" }, /* U+010F */
+ { "dcy", "д" }, /* U+0434 */
+ { "dd", "ⅆ" }, /* U+2146 */
+ { "ddagger", "‡" }, /* U+2021 */
+ { "ddarr", "⇊" }, /* U+21CA */
+ { "ddotseq", "⩷" }, /* U+2A77 */
+ { "deg", "°" }, /* U+00B0 */
+ { "delta", "δ" }, /* U+03B4 */
+ { "demptyv", "⦱" }, /* U+29B1 */
+ { "dfisht", "⥿" }, /* U+297F */
+ { "dfr", "𝔡" }, /* U+1D521 */
+ { "dharl", "⇃" }, /* U+21C3 */
+ { "dharr", "⇂" }, /* U+21C2 */
+ { "diam", "⋄" }, /* U+22C4 */
+ { "diamond", "⋄" }, /* U+22C4 */
+ { "diamondsuit", "♦" }, /* U+2666 */
+ { "diams", "♦" }, /* U+2666 */
+ { "die", "¨" }, /* U+00A8 */
+ { "digamma", "ϝ" }, /* U+03DD */
+ { "disin", "⋲" }, /* U+22F2 */
+ { "div", "÷" }, /* U+00F7 */
+ { "divide", "÷" }, /* U+00F7 */
+ { "divideontimes", "⋇" }, /* U+22C7 */
+ { "divonx", "⋇" }, /* U+22C7 */
+ { "djcy", "ђ" }, /* U+0452 */
+ { "dlcorn", "⌞" }, /* U+231E */
+ { "dlcrop", "⌍" }, /* U+230D */
+ { "dollar", "$" }, /* U+0024 */
+ { "dopf", "𝕕" }, /* U+1D555 */
+ { "dot", "˙" }, /* U+02D9 */
+ { "doteq", "≐" }, /* U+2250 */
+ { "doteqdot", "≑" }, /* U+2251 */
+ { "dotminus", "∸" }, /* U+2238 */
+ { "dotplus", "∔" }, /* U+2214 */
+ { "dotsquare", "⊡" }, /* U+22A1 */
+ { "doublebarwedge", "⌆" }, /* U+2306 */
+ { "downarrow", "↓" }, /* U+2193 */
+ { "downdownarrows", "⇊" }, /* U+21CA */
+ { "downharpoonleft", "⇃" }, /* U+21C3 */
+ { "drbkarow", "⤐" }, /* U+2910 */
+ { "drcorn", "⌟" }, /* U+231F */
+ { "drcrop", "⌌" }, /* U+230C */
+ { "dscr", "𝒹" }, /* U+1D4B9 */
+ { "dscy", "ѕ" }, /* U+0455 */
+ { "dsol", "⧶" }, /* U+29F6 */
+ { "dstrok", "đ" }, /* U+0111 */
+ { "dtdot", "⋱" }, /* U+22F1 */
+ { "dtri", "▿" }, /* U+25BF */
+ { "dtrif", "▾" }, /* U+25BE */
+ { "duarr", "⇵" }, /* U+21F5 */
+ { "duhar", "⥯" }, /* U+296F */
+ { "dwangle", "⦦" }, /* U+29A6 */
+ { "dzcy", "џ" }, /* U+045F */
+ { "dzigrarr", "⟿" }, /* U+27FF */
+ { "eDDot", "⩷" }, /* U+2A77 */
+ { "eDot", "≑" }, /* U+2251 */
+ { "eacute", "é" }, /* U+00E9 */
+ { "easter", "⩮" }, /* U+2A6E */
+ { "ecaron", "ě" }, /* U+011B */
+ { "ecir", "≖" }, /* U+2256 */
+ { "ecirc", "ê" }, /* U+00EA */
+ { "ecolon", "≕" }, /* U+2255 */
+ { "ecy", "э" }, /* U+044D */
+ { "edot", "ė" }, /* U+0117 */
+ { "ee", "ⅇ" }, /* U+2147 */
+ { "efDot", "≒" }, /* U+2252 */
+ { "efr", "𝔢" }, /* U+1D522 */
+ { "eg", "⪚" }, /* U+2A9A */
+ { "egrave", "è" }, /* U+00E8 */
+ { "egs", "⪖" }, /* U+2A96 */
+ { "egsdot", "⪘" }, /* U+2A98 */
+ { "el", "⪙" }, /* U+2A99 */
+ { "elinters", "⏧" }, /* U+23E7 */
+ { "ell", "ℓ" }, /* U+2113 */
+ { "els", "⪕" }, /* U+2A95 */
+ { "elsdot", "⪗" }, /* U+2A97 */
+ { "emacr", "ē" }, /* U+0113 */
+ { "empty", "∅" }, /* U+2205 */
+ { "emptyset", "∅" }, /* U+2205 */
+ { "emptyv", "∅" }, /* U+2205 */
+ { "emsp", " " }, /* U+2003 */
+ { "emsp13", " " }, /* U+2004 */
+ { "emsp14", " " }, /* U+2005 */
+ { "eng", "ŋ" }, /* U+014B */
+ { "ensp", " " }, /* U+2002 */
+ { "eogon", "ę" }, /* U+0119 */
+ { "eopf", "𝕖" }, /* U+1D556 */
+ { "epar", "⋕" }, /* U+22D5 */
+ { "eparsl", "⧣" }, /* U+29E3 */
+ { "eplus", "⩱" }, /* U+2A71 */
+ { "epsi", "ε" }, /* U+03B5 */
+ { "epsilon", "ε" }, /* U+03B5 */
+ { "epsiv", "ϵ" }, /* U+03F5 */
+ { "eqcirc", "≖" }, /* U+2256 */
+ { "eqcolon", "≕" }, /* U+2255 */
+ { "eqsim", "≂" }, /* U+2242 */
+ { "eqslantgtr", "⪖" }, /* U+2A96 */
+ { "eqslantless", "⪕" }, /* U+2A95 */
+ { "equals", "=" }, /* U+003D */
+ { "equest", "≟" }, /* U+225F */
+ { "equiv", "≡" }, /* U+2261 */
+ { "equivDD", "⩸" }, /* U+2A78 */
+ { "eqvparsl", "⧥" }, /* U+29E5 */
+ { "erDot", "≓" }, /* U+2253 */
+ { "erarr", "⥱" }, /* U+2971 */
+ { "escr", "ℯ" }, /* U+212F */
+ { "esdot", "≐" }, /* U+2250 */
+ { "esim", "≂" }, /* U+2242 */
+ { "eta", "η" }, /* U+03B7 */
+ { "eth", "ð" }, /* U+00F0 */
+ { "euml", "ë" }, /* U+00EB */
+ { "euro", "€" }, /* U+20AC */
+ { "excl", "!" }, /* U+0021 */
+ { "exist", "∃" }, /* U+2203 */
+ { "expectation", "ℰ" }, /* U+2130 */
+ { "exponentiale", "ⅇ" }, /* U+2147 */
+ { "fallingdotseq", "≒" }, /* U+2252 */
+ { "fcy", "ф" }, /* U+0444 */
+ { "female", "♀" }, /* U+2640 */
+ { "ffilig", "ffi" }, /* U+FB03 */
+ { "fflig", "ff" }, /* U+FB00 */
+ { "ffllig", "ffl" }, /* U+FB04 */
+ { "ffr", "𝔣" }, /* U+1D523 */
+ { "filig", "fi" }, /* U+FB01 */
+ { "fjlig", "fj" }, /* U+0066 U+006A */
+ { "flat", "♭" }, /* U+266D */
+ { "fllig", "fl" }, /* U+FB02 */
+ { "fltns", "▱" }, /* U+25B1 */
+ { "fnof", "ƒ" }, /* U+0192 */
+ { "fopf", "𝕗" }, /* U+1D557 */
+ { "forall", "∀" }, /* U+2200 */
+ { "fork", "⋔" }, /* U+22D4 */
+ { "forkv", "⫙" }, /* U+2AD9 */
+ { "fpartint", "⨍" }, /* U+2A0D */
+ { "frac12", "½" }, /* U+00BD */
+ { "frac13", "⅓" }, /* U+2153 */
+ { "frac14", "¼" }, /* U+00BC */
+ { "frac15", "⅕" }, /* U+2155 */
+ { "frac16", "⅙" }, /* U+2159 */
+ { "frac18", "⅛" }, /* U+215B */
+ { "frac23", "⅔" }, /* U+2154 */
+ { "frac25", "⅖" }, /* U+2156 */
+ { "frac34", "¾" }, /* U+00BE */
+ { "frac35", "⅗" }, /* U+2157 */
+ { "frac38", "⅜" }, /* U+215C */
+ { "frac45", "⅘" }, /* U+2158 */
+ { "frac56", "⅚" }, /* U+215A */
+ { "frac58", "⅝" }, /* U+215D */
+ { "frac78", "⅞" }, /* U+215E */
+ { "frasl", "⁄" }, /* U+2044 */
+ { "frown", "⌢" }, /* U+2322 */
+ { "fscr", "𝒻" }, /* U+1D4BB */
+ { "gE", "≧" }, /* U+2267 */
+ { "gEl", "⪌" }, /* U+2A8C */
+ { "gacute", "ǵ" }, /* U+01F5 */
+ { "gamma", "γ" }, /* U+03B3 */
+ { "gammad", "ϝ" }, /* U+03DD */
+ { "gap", "⪆" }, /* U+2A86 */
+ { "gbreve", "ğ" }, /* U+011F */
+ { "gcirc", "ĝ" }, /* U+011D */
+ { "gcy", "г" }, /* U+0433 */
+ { "gdot", "ġ" }, /* U+0121 */
+ { "ge", "≥" }, /* U+2265 */
+ { "gel", "⋛" }, /* U+22DB */
+ { "geq", "≥" }, /* U+2265 */
+ { "geqq", "≧" }, /* U+2267 */
+ { "geqslant", "⩾" }, /* U+2A7E */
+ { "ges", "⩾" }, /* U+2A7E */
+ { "gescc", "⪩" }, /* U+2AA9 */
+ { "gesdot", "⪀" }, /* U+2A80 */
+ { "gesdoto", "⪂" }, /* U+2A82 */
+ { "gesdotol", "⪄" }, /* U+2A84 */
+ { "gesl", "⋛︀" }, /* U+22DB U+FE00 */
+ { "gesles", "⪔" }, /* U+2A94 */
+ { "gfr", "𝔤" }, /* U+1D524 */
+ { "gg", "≫" }, /* U+226B */
+ { "ggg", "⋙" }, /* U+22D9 */
+ { "gimel", "ℷ" }, /* U+2137 */
+ { "gjcy", "ѓ" }, /* U+0453 */
+ { "gl", "≷" }, /* U+2277 */
+ { "glE", "⪒" }, /* U+2A92 */
+ { "gla", "⪥" }, /* U+2AA5 */
+ { "glj", "⪤" }, /* U+2AA4 */
+ { "gnE", "≩" }, /* U+2269 */
+ { "gnap", "⪊" }, /* U+2A8A */
+ { "gnapprox", "⪊" }, /* U+2A8A */
+ { "gne", "⪈" }, /* U+2A88 */
+ { "gneq", "⪈" }, /* U+2A88 */
+ { "gneqq", "≩" }, /* U+2269 */
+ { "gnsim", "⋧" }, /* U+22E7 */
+ { "gopf", "𝕘" }, /* U+1D558 */
+ { "grave", "`" }, /* U+0060 */
+ { "gscr", "ℊ" }, /* U+210A */
+ { "gsim", "≳" }, /* U+2273 */
+ { "gsime", "⪎" }, /* U+2A8E */
+ { "gsiml", "⪐" }, /* U+2A90 */
+ { "gt", ">" }, /* U+003E */
+ { "gtcc", "⪧" }, /* U+2AA7 */
+ { "gtcir", "⩺" }, /* U+2A7A */
+ { "gtdot", "⋗" }, /* U+22D7 */
+ { "gtlPar", "⦕" }, /* U+2995 */
+ { "gtquest", "⩼" }, /* U+2A7C */
+ { "gtrapprox", "⪆" }, /* U+2A86 */
+ { "gtrarr", "⥸" }, /* U+2978 */
+ { "gtrdot", "⋗" }, /* U+22D7 */
+ { "gtreqless", "⋛" }, /* U+22DB */
+ { "gtreqqless", "⪌" }, /* U+2A8C */
+ { "gtrless", "≷" }, /* U+2277 */
+ { "gtrsim", "≳" }, /* U+2273 */
+ { "gvertneqq", "≩︀" }, /* U+2269 U+FE00 */
+ { "gvnE", "≩︀" }, /* U+2269 U+FE00 */
+ { "hArr", "⇔" }, /* U+21D4 */
+ { "hairsp", " " }, /* U+200A */
+ { "half", "½" }, /* U+00BD */
+ { "hamilt", "ℋ" }, /* U+210B */
+ { "hardcy", "ъ" }, /* U+044A */
+ { "harr", "↔" }, /* U+2194 */
+ { "harrcir", "⥈" }, /* U+2948 */
+ { "harrw", "↭" }, /* U+21AD */
+ { "hbar", "ℏ" }, /* U+210F */
+ { "hcirc", "ĥ" }, /* U+0125 */
+ { "hearts", "♥" }, /* U+2665 */
+ { "heartsuit", "♥" }, /* U+2665 */
+ { "hellip", "…" }, /* U+2026 */
+ { "hercon", "⊹" }, /* U+22B9 */
+ { "hfr", "𝔥" }, /* U+1D525 */
+ { "hksearow", "⤥" }, /* U+2925 */
+ { "hkswarow", "⤦" }, /* U+2926 */
+ { "hoarr", "⇿" }, /* U+21FF */
+ { "homtht", "∻" }, /* U+223B */
+ { "hookleftarrow", "↩" }, /* U+21A9 */
+ { "hookrightarrow", "↪" }, /* U+21AA */
+ { "hopf", "𝕙" }, /* U+1D559 */
+ { "horbar", "―" }, /* U+2015 */
+ { "hscr", "𝒽" }, /* U+1D4BD */
+ { "hslash", "ℏ" }, /* U+210F */
+ { "hstrok", "ħ" }, /* U+0127 */
+ { "hybull", "⁃" }, /* U+2043 */
+ { "hyphen", "‐" }, /* U+2010 */
+ { "iacute", "í" }, /* U+00ED */
+ { "ic", "" }, /* U+2063 */
+ { "icirc", "î" }, /* U+00EE */
+ { "icy", "и" }, /* U+0438 */
+ { "iecy", "е" }, /* U+0435 */
+ { "iexcl", "¡" }, /* U+00A1 */
+ { "iff", "⇔" }, /* U+21D4 */
+ { "ifr", "𝔦" }, /* U+1D526 */
+ { "igrave", "ì" }, /* U+00EC */
+ { "ii", "ⅈ" }, /* U+2148 */
+ { "iiiint", "⨌" }, /* U+2A0C */
+ { "iiint", "∭" }, /* U+222D */
+ { "iinfin", "⧜" }, /* U+29DC */
+ { "iiota", "℩" }, /* U+2129 */
+ { "ijlig", "ij" }, /* U+0133 */
+ { "imacr", "ī" }, /* U+012B */
+ { "image", "ℑ" }, /* U+2111 */
+ { "imagline", "ℐ" }, /* U+2110 */
+ { "imagpart", "ℑ" }, /* U+2111 */
+ { "imath", "ı" }, /* U+0131 */
+ { "imof", "⊷" }, /* U+22B7 */
+ { "imped", "Ƶ" }, /* U+01B5 */
+ { "in", "∈" }, /* U+2208 */
+ { "incare", "℅" }, /* U+2105 */
+ { "infin", "∞" }, /* U+221E */
+ { "infintie", "⧝" }, /* U+29DD */
+ { "inodot", "ı" }, /* U+0131 */
+ { "int", "∫" }, /* U+222B */
+ { "intcal", "⊺" }, /* U+22BA */
+ { "integers", "ℤ" }, /* U+2124 */
+ { "intercal", "⊺" }, /* U+22BA */
+ { "intlarhk", "⨗" }, /* U+2A17 */
+ { "intprod", "⨼" }, /* U+2A3C */
+ { "iocy", "ё" }, /* U+0451 */
+ { "iogon", "į" }, /* U+012F */
+ { "iopf", "𝕚" }, /* U+1D55A */
+ { "iota", "ι" }, /* U+03B9 */
+ { "iprod", "⨼" }, /* U+2A3C */
+ { "iquest", "¿" }, /* U+00BF */
+ { "iscr", "𝒾" }, /* U+1D4BE */
+ { "isin", "∈" }, /* U+2208 */
+ { "isinE", "⋹" }, /* U+22F9 */
+ { "isindot", "⋵" }, /* U+22F5 */
+ { "isins", "⋴" }, /* U+22F4 */
+ { "isinsv", "⋳" }, /* U+22F3 */
+ { "isinv", "∈" }, /* U+2208 */
+ { "it", "" }, /* U+2062 */
+ { "itilde", "ĩ" }, /* U+0129 */
+ { "iukcy", "і" }, /* U+0456 */
+ { "iuml", "ï" }, /* U+00EF */
+ { "jcirc", "ĵ" }, /* U+0135 */
+ { "jcy", "й" }, /* U+0439 */
+ { "jfr", "𝔧" }, /* U+1D527 */
+ { "jmath", "ȷ" }, /* U+0237 */
+ { "jopf", "𝕛" }, /* U+1D55B */
+ { "jscr", "𝒿" }, /* U+1D4BF */
+ { "jsercy", "ј" }, /* U+0458 */
+ { "jukcy", "є" }, /* U+0454 */
+ { "kappa", "κ" }, /* U+03BA */
+ { "kappav", "ϰ" }, /* U+03F0 */
+ { "kcedil", "ķ" }, /* U+0137 */
+ { "kcy", "к" }, /* U+043A */
+ { "kfr", "𝔨" }, /* U+1D528 */
+ { "kgreen", "ĸ" }, /* U+0138 */
+ { "khcy", "х" }, /* U+0445 */
+ { "kjcy", "ќ" }, /* U+045C */
+ { "kopf", "𝕜" }, /* U+1D55C */
+ { "kscr", "𝓀" }, /* U+1D4C0 */
+ { "lAarr", "⇚" }, /* U+21DA */
+ { "lArr", "⇐" }, /* U+21D0 */
+ { "lAtail", "⤛" }, /* U+291B */
+ { "lBarr", "⤎" }, /* U+290E */
+ { "lE", "≦" }, /* U+2266 */
+ { "lEg", "⪋" }, /* U+2A8B */
+ { "lHar", "⥢" }, /* U+2962 */
+ { "lacute", "ĺ" }, /* U+013A */
+ { "laemptyv", "⦴" }, /* U+29B4 */
+ { "lagran", "ℒ" }, /* U+2112 */
+ { "lambda", "λ" }, /* U+03BB */
+ { "lang", "⟨" }, /* U+27E8 */
+ { "langd", "⦑" }, /* U+2991 */
+ { "langle", "⟨" }, /* U+27E8 */
+ { "lap", "⪅" }, /* U+2A85 */
+ { "laquo", "«" }, /* U+00AB */
+ { "larr", "←" }, /* U+2190 */
+ { "larrb", "⇤" }, /* U+21E4 */
+ { "larrbfs", "⤟" }, /* U+291F */
+ { "larrfs", "⤝" }, /* U+291D */
+ { "larrhk", "↩" }, /* U+21A9 */
+ { "larrlp", "↫" }, /* U+21AB */
+ { "larrpl", "⤹" }, /* U+2939 */
+ { "larrsim", "⥳" }, /* U+2973 */
+ { "larrtl", "↢" }, /* U+21A2 */
+ { "lat", "⪫" }, /* U+2AAB */
+ { "latail", "⤙" }, /* U+2919 */
+ { "late", "⪭" }, /* U+2AAD */
+ { "lates", "⪭︀" }, /* U+2AAD U+FE00 */
+ { "lbarr", "⤌" }, /* U+290C */
+ { "lbbrk", "❲" }, /* U+2772 */
+ { "lbrace", "{" }, /* U+007B */
+ { "lbrack", "[" }, /* U+005B */
+ { "lbrke", "⦋" }, /* U+298B */
+ { "lbrksld", "⦏" }, /* U+298F */
+ { "lbrkslu", "⦍" }, /* U+298D */
+ { "lcaron", "ľ" }, /* U+013E */
+ { "lcedil", "ļ" }, /* U+013C */
+ { "lceil", "⌈" }, /* U+2308 */
+ { "lcub", "{" }, /* U+007B */
+ { "lcy", "л" }, /* U+043B */
+ { "ldca", "⤶" }, /* U+2936 */
+ { "ldquo", "“" }, /* U+201C */
+ { "ldquor", "„" }, /* U+201E */
+ { "ldrdhar", "⥧" }, /* U+2967 */
+ { "ldrushar", "⥋" }, /* U+294B */
+ { "ldsh", "↲" }, /* U+21B2 */
+ { "le", "≤" }, /* U+2264 */
+ { "leftarrow", "←" }, /* U+2190 */
+ { "leftarrowtail", "↢" }, /* U+21A2 */
+ { "leftharpoondown", "↽" }, /* U+21BD */
+ { "leftharpoonup", "↼" }, /* U+21BC */
+ { "leftleftarrows", "⇇" }, /* U+21C7 */
+ { "leftrightarrow", "↔" }, /* U+2194 */
+ { "leftrightarrows", "⇆" }, /* U+21C6 */
+ { "leftthreetimes", "⋋" }, /* U+22CB */
+ { "leg", "⋚" }, /* U+22DA */
+ { "leq", "≤" }, /* U+2264 */
+ { "leqq", "≦" }, /* U+2266 */
+ { "leqslant", "⩽" }, /* U+2A7D */
+ { "les", "⩽" }, /* U+2A7D */
+ { "lescc", "⪨" }, /* U+2AA8 */
+ { "lesdot", "⩿" }, /* U+2A7F */
+ { "lesdoto", "⪁" }, /* U+2A81 */
+ { "lesdotor", "⪃" }, /* U+2A83 */
+ { "lesg", "⋚︀" }, /* U+22DA U+FE00 */
+ { "lesges", "⪓" }, /* U+2A93 */
+ { "lessapprox", "⪅" }, /* U+2A85 */
+ { "lessdot", "⋖" }, /* U+22D6 */
+ { "lesseqgtr", "⋚" }, /* U+22DA */
+ { "lesseqqgtr", "⪋" }, /* U+2A8B */
+ { "lessgtr", "≶" }, /* U+2276 */
+ { "lesssim", "≲" }, /* U+2272 */
+ { "lfisht", "⥼" }, /* U+297C */
+ { "lfloor", "⌊" }, /* U+230A */
+ { "lfr", "𝔩" }, /* U+1D529 */
+ { "lg", "≶" }, /* U+2276 */
+ { "lgE", "⪑" }, /* U+2A91 */
+ { "lhard", "↽" }, /* U+21BD */
+ { "lharu", "↼" }, /* U+21BC */
+ { "lharul", "⥪" }, /* U+296A */
+ { "lhblk", "▄" }, /* U+2584 */
+ { "ljcy", "љ" }, /* U+0459 */
+ { "ll", "≪" }, /* U+226A */
+ { "llarr", "⇇" }, /* U+21C7 */
+ { "llcorner", "⌞" }, /* U+231E */
+ { "llhard", "⥫" }, /* U+296B */
+ { "lltri", "◺" }, /* U+25FA */
+ { "lmidot", "ŀ" }, /* U+0140 */
+ { "lmoust", "⎰" }, /* U+23B0 */
+ { "lmoustache", "⎰" }, /* U+23B0 */
+ { "lnE", "≨" }, /* U+2268 */
+ { "lnap", "⪉" }, /* U+2A89 */
+ { "lnapprox", "⪉" }, /* U+2A89 */
+ { "lne", "⪇" }, /* U+2A87 */
+ { "lneq", "⪇" }, /* U+2A87 */
+ { "lneqq", "≨" }, /* U+2268 */
+ { "lnsim", "⋦" }, /* U+22E6 */
+ { "loang", "⟬" }, /* U+27EC */
+ { "loarr", "⇽" }, /* U+21FD */
+ { "lobrk", "⟦" }, /* U+27E6 */
+ { "longleftarrow", "⟵" }, /* U+27F5 */
+ { "longmapsto", "⟼" }, /* U+27FC */
+ { "longrightarrow", "⟶" }, /* U+27F6 */
+ { "looparrowleft", "↫" }, /* U+21AB */
+ { "looparrowright", "↬" }, /* U+21AC */
+ { "lopar", "⦅" }, /* U+2985 */
+ { "lopf", "𝕝" }, /* U+1D55D */
+ { "loplus", "⨭" }, /* U+2A2D */
+ { "lotimes", "⨴" }, /* U+2A34 */
+ { "lowast", "∗" }, /* U+2217 */
+ { "lowbar", "_" }, /* U+005F */
+ { "loz", "◊" }, /* U+25CA */
+ { "lozenge", "◊" }, /* U+25CA */
+ { "lozf", "⧫" }, /* U+29EB */
+ { "lpar", "(" }, /* U+0028 */
+ { "lparlt", "⦓" }, /* U+2993 */
+ { "lrarr", "⇆" }, /* U+21C6 */
+ { "lrcorner", "⌟" }, /* U+231F */
+ { "lrhar", "⇋" }, /* U+21CB */
+ { "lrhard", "⥭" }, /* U+296D */
+ { "lrm", "\342\200\216" }, /* U+200E */
+ { "lrtri", "⊿" }, /* U+22BF */
+ { "lsaquo", "‹" }, /* U+2039 */
+ { "lscr", "𝓁" }, /* U+1D4C1 */
+ { "lsh", "↰" }, /* U+21B0 */
+ { "lsim", "≲" }, /* U+2272 */
+ { "lsime", "⪍" }, /* U+2A8D */
+ { "lsimg", "⪏" }, /* U+2A8F */
+ { "lsqb", "[" }, /* U+005B */
+ { "lsquo", "‘" }, /* U+2018 */
+ { "lsquor", "‚" }, /* U+201A */
+ { "lstrok", "ł" }, /* U+0142 */
+ { "lt", "<" }, /* U+003C */
+ { "ltcc", "⪦" }, /* U+2AA6 */
+ { "ltcir", "⩹" }, /* U+2A79 */
+ { "ltdot", "⋖" }, /* U+22D6 */
+ { "lthree", "⋋" }, /* U+22CB */
+ { "ltimes", "⋉" }, /* U+22C9 */
+ { "ltlarr", "⥶" }, /* U+2976 */
+ { "ltquest", "⩻" }, /* U+2A7B */
+ { "ltrPar", "⦖" }, /* U+2996 */
+ { "ltri", "◃" }, /* U+25C3 */
+ { "ltrie", "⊴" }, /* U+22B4 */
+ { "ltrif", "◂" }, /* U+25C2 */
+ { "lurdshar", "⥊" }, /* U+294A */
+ { "luruhar", "⥦" }, /* U+2966 */
+ { "lvertneqq", "≨︀" }, /* U+2268 U+FE00 */
+ { "lvnE", "≨︀" }, /* U+2268 U+FE00 */
+ { "mDDot", "∺" }, /* U+223A */
+ { "macr", "¯" }, /* U+00AF */
+ { "male", "♂" }, /* U+2642 */
+ { "malt", "✠" }, /* U+2720 */
+ { "maltese", "✠" }, /* U+2720 */
+ { "map", "↦" }, /* U+21A6 */
+ { "mapsto", "↦" }, /* U+21A6 */
+ { "mapstodown", "↧" }, /* U+21A7 */
+ { "mapstoleft", "↤" }, /* U+21A4 */
+ { "mapstoup", "↥" }, /* U+21A5 */
+ { "marker", "▮" }, /* U+25AE */
+ { "mcomma", "⨩" }, /* U+2A29 */
+ { "mcy", "м" }, /* U+043C */
+ { "mdash", "—" }, /* U+2014 */
+ { "measuredangle", "∡" }, /* U+2221 */
+ { "mfr", "𝔪" }, /* U+1D52A */
+ { "mho", "℧" }, /* U+2127 */
+ { "micro", "µ" }, /* U+00B5 */
+ { "mid", "∣" }, /* U+2223 */
+ { "midast", "*" }, /* U+002A */
+ { "midcir", "⫰" }, /* U+2AF0 */
+ { "middot", "·" }, /* U+00B7 */
+ { "minus", "−" }, /* U+2212 */
+ { "minusb", "⊟" }, /* U+229F */
+ { "minusd", "∸" }, /* U+2238 */
+ { "minusdu", "⨪" }, /* U+2A2A */
+ { "mlcp", "⫛" }, /* U+2ADB */
+ { "mldr", "…" }, /* U+2026 */
+ { "mnplus", "∓" }, /* U+2213 */
+ { "models", "⊧" }, /* U+22A7 */
+ { "mopf", "𝕞" }, /* U+1D55E */
+ { "mp", "∓" }, /* U+2213 */
+ { "mscr", "𝓂" }, /* U+1D4C2 */
+ { "mstpos", "∾" }, /* U+223E */
+ { "mu", "μ" }, /* U+03BC */
+ { "multimap", "⊸" }, /* U+22B8 */
+ { "mumap", "⊸" }, /* U+22B8 */
+ { "nGg", "⋙̸" }, /* U+22D9 U+0338 */
+ { "nGt", "≫⃒" }, /* U+226B U+20D2 */
+ { "nGtv", "≫̸" }, /* U+226B U+0338 */
+ { "nLeftarrow", "⇍" }, /* U+21CD */
+ { "nLeftrightarrow", "⇎" }, /* U+21CE */
+ { "nLl", "⋘̸" }, /* U+22D8 U+0338 */
+ { "nLt", "≪⃒" }, /* U+226A U+20D2 */
+ { "nLtv", "≪̸" }, /* U+226A U+0338 */
+ { "nRightarrow", "⇏" }, /* U+21CF */
+ { "nVDash", "⊯" }, /* U+22AF */
+ { "nVdash", "⊮" }, /* U+22AE */
+ { "nabla", "∇" }, /* U+2207 */
+ { "nacute", "ń" }, /* U+0144 */
+ { "nang", "∠⃒" }, /* U+2220 U+20D2 */
+ { "nap", "≉" }, /* U+2249 */
+ { "napE", "⩰̸" }, /* U+2A70 U+0338 */
+ { "napid", "≋̸" }, /* U+224B U+0338 */
+ { "napos", "ʼn" }, /* U+0149 */
+ { "napprox", "≉" }, /* U+2249 */
+ { "natur", "♮" }, /* U+266E */
+ { "natural", "♮" }, /* U+266E */
+ { "naturals", "ℕ" }, /* U+2115 */
+ { "nbsp", " " }, /* U+00A0 */
+ { "nbump", "≎̸" }, /* U+224E U+0338 */
+ { "nbumpe", "≏̸" }, /* U+224F U+0338 */
+ { "ncap", "⩃" }, /* U+2A43 */
+ { "ncaron", "ň" }, /* U+0148 */
+ { "ncedil", "ņ" }, /* U+0146 */
+ { "ncong", "≇" }, /* U+2247 */
+ { "ncongdot", "⩭̸" }, /* U+2A6D U+0338 */
+ { "ncup", "⩂" }, /* U+2A42 */
+ { "ncy", "н" }, /* U+043D */
+ { "ndash", "–" }, /* U+2013 */
+ { "ne", "≠" }, /* U+2260 */
+ { "neArr", "⇗" }, /* U+21D7 */
+ { "nearhk", "⤤" }, /* U+2924 */
+ { "nearr", "↗" }, /* U+2197 */
+ { "nearrow", "↗" }, /* U+2197 */
+ { "nedot", "≐̸" }, /* U+2250 U+0338 */
+ { "nequiv", "≢" }, /* U+2262 */
+ { "nesear", "⤨" }, /* U+2928 */
+ { "nesim", "≂̸" }, /* U+2242 U+0338 */
+ { "nexist", "∄" }, /* U+2204 */
+ { "nexists", "∄" }, /* U+2204 */
+ { "nfr", "𝔫" }, /* U+1D52B */
+ { "ngE", "≧̸" }, /* U+2267 U+0338 */
+ { "nge", "≱" }, /* U+2271 */
+ { "ngeq", "≱" }, /* U+2271 */
+ { "ngeqq", "≧̸" }, /* U+2267 U+0338 */
+ { "ngeqslant", "⩾̸" }, /* U+2A7E U+0338 */
+ { "nges", "⩾̸" }, /* U+2A7E U+0338 */
+ { "ngsim", "≵" }, /* U+2275 */
+ { "ngt", "≯" }, /* U+226F */
+ { "ngtr", "≯" }, /* U+226F */
+ { "nhArr", "⇎" }, /* U+21CE */
+ { "nharr", "↮" }, /* U+21AE */
+ { "nhpar", "⫲" }, /* U+2AF2 */
+ { "ni", "∋" }, /* U+220B */
+ { "nis", "⋼" }, /* U+22FC */
+ { "nisd", "⋺" }, /* U+22FA */
+ { "niv", "∋" }, /* U+220B */
+ { "njcy", "њ" }, /* U+045A */
+ { "nlArr", "⇍" }, /* U+21CD */
+ { "nlE", "≦̸" }, /* U+2266 U+0338 */
+ { "nlarr", "↚" }, /* U+219A */
+ { "nldr", "‥" }, /* U+2025 */
+ { "nle", "≰" }, /* U+2270 */
+ { "nleftarrow", "↚" }, /* U+219A */
+ { "nleftrightarrow", "↮" }, /* U+21AE */
+ { "nleq", "≰" }, /* U+2270 */
+ { "nleqq", "≦̸" }, /* U+2266 U+0338 */
+ { "nleqslant", "⩽̸" }, /* U+2A7D U+0338 */
+ { "nles", "⩽̸" }, /* U+2A7D U+0338 */
+ { "nless", "≮" }, /* U+226E */
+ { "nlsim", "≴" }, /* U+2274 */
+ { "nlt", "≮" }, /* U+226E */
+ { "nltri", "⋪" }, /* U+22EA */
+ { "nltrie", "⋬" }, /* U+22EC */
+ { "nmid", "∤" }, /* U+2224 */
+ { "nopf", "𝕟" }, /* U+1D55F */
+ { "not", "¬" }, /* U+00AC */
+ { "notin", "∉" }, /* U+2209 */
+ { "notinE", "⋹̸" }, /* U+22F9 U+0338 */
+ { "notindot", "⋵̸" }, /* U+22F5 U+0338 */
+ { "notinva", "∉" }, /* U+2209 */
+ { "notinvb", "⋷" }, /* U+22F7 */
+ { "notinvc", "⋶" }, /* U+22F6 */
+ { "notni", "∌" }, /* U+220C */
+ { "notniva", "∌" }, /* U+220C */
+ { "notnivb", "⋾" }, /* U+22FE */
+ { "notnivc", "⋽" }, /* U+22FD */
+ { "npar", "∦" }, /* U+2226 */
+ { "nparallel", "∦" }, /* U+2226 */
+ { "nparsl", "⫽⃥" }, /* U+2AFD U+20E5 */
+ { "npart", "∂̸" }, /* U+2202 U+0338 */
+ { "npolint", "⨔" }, /* U+2A14 */
+ { "npr", "⊀" }, /* U+2280 */
+ { "nprcue", "⋠" }, /* U+22E0 */
+ { "npre", "⪯̸" }, /* U+2AAF U+0338 */
+ { "nprec", "⊀" }, /* U+2280 */
+ { "npreceq", "⪯̸" }, /* U+2AAF U+0338 */
+ { "nrArr", "⇏" }, /* U+21CF */
+ { "nrarr", "↛" }, /* U+219B */
+ { "nrarrc", "⤳̸" }, /* U+2933 U+0338 */
+ { "nrarrw", "↝̸" }, /* U+219D U+0338 */
+ { "nrightarrow", "↛" }, /* U+219B */
+ { "nrtri", "⋫" }, /* U+22EB */
+ { "nrtrie", "⋭" }, /* U+22ED */
+ { "nsc", "⊁" }, /* U+2281 */
+ { "nsccue", "⋡" }, /* U+22E1 */
+ { "nsce", "⪰̸" }, /* U+2AB0 U+0338 */
+ { "nscr", "𝓃" }, /* U+1D4C3 */
+ { "nshortmid", "∤" }, /* U+2224 */
+ { "nshortparallel", "∦" }, /* U+2226 */
+ { "nsim", "≁" }, /* U+2241 */
+ { "nsime", "≄" }, /* U+2244 */
+ { "nsimeq", "≄" }, /* U+2244 */
+ { "nsmid", "∤" }, /* U+2224 */
+ { "nspar", "∦" }, /* U+2226 */
+ { "nsqsube", "⋢" }, /* U+22E2 */
+ { "nsqsupe", "⋣" }, /* U+22E3 */
+ { "nsub", "⊄" }, /* U+2284 */
+ { "nsubE", "⫅̸" }, /* U+2AC5 U+0338 */
+ { "nsube", "⊈" }, /* U+2288 */
+ { "nsubset", "⊂⃒" }, /* U+2282 U+20D2 */
+ { "nsubseteq", "⊈" }, /* U+2288 */
+ { "nsubseteqq", "⫅̸" }, /* U+2AC5 U+0338 */
+ { "nsucc", "⊁" }, /* U+2281 */
+ { "nsucceq", "⪰̸" }, /* U+2AB0 U+0338 */
+ { "nsup", "⊅" }, /* U+2285 */
+ { "nsupE", "⫆̸" }, /* U+2AC6 U+0338 */
+ { "nsupe", "⊉" }, /* U+2289 */
+ { "nsupset", "⊃⃒" }, /* U+2283 U+20D2 */
+ { "nsupseteq", "⊉" }, /* U+2289 */
+ { "nsupseteqq", "⫆̸" }, /* U+2AC6 U+0338 */
+ { "ntgl", "≹" }, /* U+2279 */
+ { "ntilde", "ñ" }, /* U+00F1 */
+ { "ntlg", "≸" }, /* U+2278 */
+ { "ntriangleleft", "⋪" }, /* U+22EA */
+ { "ntrianglelefteq", "⋬" }, /* U+22EC */
+ { "ntriangleright", "⋫" }, /* U+22EB */
+ { "nu", "ν" }, /* U+03BD */
+ { "num", "#" }, /* U+0023 */
+ { "numero", "№" }, /* U+2116 */
+ { "numsp", " " }, /* U+2007 */
+ { "nvDash", "⊭" }, /* U+22AD */
+ { "nvHarr", "⤄" }, /* U+2904 */
+ { "nvap", "≍⃒" }, /* U+224D U+20D2 */
+ { "nvdash", "⊬" }, /* U+22AC */
+ { "nvge", "≥⃒" }, /* U+2265 U+20D2 */
+ { "nvgt", ">⃒" }, /* U+003E U+20D2 */
+ { "nvinfin", "⧞" }, /* U+29DE */
+ { "nvlArr", "⤂" }, /* U+2902 */
+ { "nvle", "≤⃒" }, /* U+2264 U+20D2 */
+ { "nvlt", "<⃒" }, /* U+003C U+20D2 */
+ { "nvltrie", "⊴⃒" }, /* U+22B4 U+20D2 */
+ { "nvrArr", "⤃" }, /* U+2903 */
+ { "nvrtrie", "⊵⃒" }, /* U+22B5 U+20D2 */
+ { "nvsim", "∼⃒" }, /* U+223C U+20D2 */
+ { "nwArr", "⇖" }, /* U+21D6 */
+ { "nwarhk", "⤣" }, /* U+2923 */
+ { "nwarr", "↖" }, /* U+2196 */
+ { "nwarrow", "↖" }, /* U+2196 */
+ { "nwnear", "⤧" }, /* U+2927 */
+ { "oS", "Ⓢ" }, /* U+24C8 */
+ { "oacute", "ó" }, /* U+00F3 */
+ { "oast", "⊛" }, /* U+229B */
+ { "ocir", "⊚" }, /* U+229A */
+ { "ocirc", "ô" }, /* U+00F4 */
+ { "ocy", "о" }, /* U+043E */
+ { "odash", "⊝" }, /* U+229D */
+ { "odblac", "ő" }, /* U+0151 */
+ { "odiv", "⨸" }, /* U+2A38 */
+ { "odot", "⊙" }, /* U+2299 */
+ { "odsold", "⦼" }, /* U+29BC */
+ { "oelig", "œ" }, /* U+0153 */
+ { "ofcir", "⦿" }, /* U+29BF */
+ { "ofr", "𝔬" }, /* U+1D52C */
+ { "ogon", "˛" }, /* U+02DB */
+ { "ograve", "ò" }, /* U+00F2 */
+ { "ogt", "⧁" }, /* U+29C1 */
+ { "ohbar", "⦵" }, /* U+29B5 */
+ { "ohm", "Ω" }, /* U+03A9 */
+ { "oint", "∮" }, /* U+222E */
+ { "olarr", "↺" }, /* U+21BA */
+ { "olcir", "⦾" }, /* U+29BE */
+ { "olcross", "⦻" }, /* U+29BB */
+ { "oline", "‾" }, /* U+203E */
+ { "olt", "⧀" }, /* U+29C0 */
+ { "omacr", "ō" }, /* U+014D */
+ { "omega", "ω" }, /* U+03C9 */
+ { "omicron", "ο" }, /* U+03BF */
+ { "omid", "⦶" }, /* U+29B6 */
+ { "ominus", "⊖" }, /* U+2296 */
+ { "oopf", "𝕠" }, /* U+1D560 */
+ { "opar", "⦷" }, /* U+29B7 */
+ { "operp", "⦹" }, /* U+29B9 */
+ { "oplus", "⊕" }, /* U+2295 */
+ { "or", "∨" }, /* U+2228 */
+ { "orarr", "↻" }, /* U+21BB */
+ { "ord", "⩝" }, /* U+2A5D */
+ { "order", "ℴ" }, /* U+2134 */
+ { "orderof", "ℴ" }, /* U+2134 */
+ { "ordf", "ª" }, /* U+00AA */
+ { "ordm", "º" }, /* U+00BA */
+ { "origof", "⊶" }, /* U+22B6 */
+ { "oror", "⩖" }, /* U+2A56 */
+ { "orslope", "⩗" }, /* U+2A57 */
+ { "orv", "⩛" }, /* U+2A5B */
+ { "oscr", "ℴ" }, /* U+2134 */
+ { "oslash", "ø" }, /* U+00F8 */
+ { "osol", "⊘" }, /* U+2298 */
+ { "otilde", "õ" }, /* U+00F5 */
+ { "otimes", "⊗" }, /* U+2297 */
+ { "otimesas", "⨶" }, /* U+2A36 */
+ { "ouml", "ö" }, /* U+00F6 */
+ { "ovbar", "⌽" }, /* U+233D */
+ { "par", "∥" }, /* U+2225 */
+ { "para", "¶" }, /* U+00B6 */
+ { "parallel", "∥" }, /* U+2225 */
+ { "parsim", "⫳" }, /* U+2AF3 */
+ { "parsl", "⫽" }, /* U+2AFD */
+ { "part", "∂" }, /* U+2202 */
+ { "pcy", "п" }, /* U+043F */
+ { "percnt", "%" }, /* U+0025 */
+ { "period", "." }, /* U+002E */
+ { "permil", "‰" }, /* U+2030 */
+ { "perp", "⊥" }, /* U+22A5 */
+ { "pertenk", "‱" }, /* U+2031 */
+ { "pfr", "𝔭" }, /* U+1D52D */
+ { "phi", "φ" }, /* U+03C6 */
+ { "phiv", "ϕ" }, /* U+03D5 */
+ { "phmmat", "ℳ" }, /* U+2133 */
+ { "phone", "☎" }, /* U+260E */
+ { "pi", "π" }, /* U+03C0 */
+ { "pitchfork", "⋔" }, /* U+22D4 */
+ { "piv", "ϖ" }, /* U+03D6 */
+ { "planck", "ℏ" }, /* U+210F */
+ { "planckh", "ℎ" }, /* U+210E */
+ { "plankv", "ℏ" }, /* U+210F */
+ { "plus", "+" }, /* U+002B */
+ { "plusacir", "⨣" }, /* U+2A23 */
+ { "plusb", "⊞" }, /* U+229E */
+ { "pluscir", "⨢" }, /* U+2A22 */
+ { "plusdo", "∔" }, /* U+2214 */
+ { "plusdu", "⨥" }, /* U+2A25 */
+ { "pluse", "⩲" }, /* U+2A72 */
+ { "plusmn", "±" }, /* U+00B1 */
+ { "plussim", "⨦" }, /* U+2A26 */
+ { "plustwo", "⨧" }, /* U+2A27 */
+ { "pm", "±" }, /* U+00B1 */
+ { "pointint", "⨕" }, /* U+2A15 */
+ { "popf", "𝕡" }, /* U+1D561 */
+ { "pound", "£" }, /* U+00A3 */
+ { "pr", "≺" }, /* U+227A */
+ { "prE", "⪳" }, /* U+2AB3 */
+ { "prap", "⪷" }, /* U+2AB7 */
+ { "prcue", "≼" }, /* U+227C */
+ { "pre", "⪯" }, /* U+2AAF */
+ { "prec", "≺" }, /* U+227A */
+ { "precapprox", "⪷" }, /* U+2AB7 */
+ { "preccurlyeq", "≼" }, /* U+227C */
+ { "preceq", "⪯" }, /* U+2AAF */
+ { "precnapprox", "⪹" }, /* U+2AB9 */
+ { "precneqq", "⪵" }, /* U+2AB5 */
+ { "precnsim", "⋨" }, /* U+22E8 */
+ { "precsim", "≾" }, /* U+227E */
+ { "prime", "′" }, /* U+2032 */
+ { "primes", "ℙ" }, /* U+2119 */
+ { "prnE", "⪵" }, /* U+2AB5 */
+ { "prnap", "⪹" }, /* U+2AB9 */
+ { "prnsim", "⋨" }, /* U+22E8 */
+ { "prod", "∏" }, /* U+220F */
+ { "profalar", "⌮" }, /* U+232E */
+ { "profline", "⌒" }, /* U+2312 */
+ { "profsurf", "⌓" }, /* U+2313 */
+ { "prop", "∝" }, /* U+221D */
+ { "propto", "∝" }, /* U+221D */
+ { "prsim", "≾" }, /* U+227E */
+ { "prurel", "⊰" }, /* U+22B0 */
+ { "pscr", "𝓅" }, /* U+1D4C5 */
+ { "psi", "ψ" }, /* U+03C8 */
+ { "puncsp", " " }, /* U+2008 */
+ { "qfr", "𝔮" }, /* U+1D52E */
+ { "qint", "⨌" }, /* U+2A0C */
+ { "qopf", "𝕢" }, /* U+1D562 */
+ { "qprime", "⁗" }, /* U+2057 */
+ { "qscr", "𝓆" }, /* U+1D4C6 */
+ { "quaternions", "ℍ" }, /* U+210D */
+ { "quatint", "⨖" }, /* U+2A16 */
+ { "quest", "?" }, /* U+003F */
+ { "questeq", "≟" }, /* U+225F */
+ { "quot", "\"" }, /* U+0022 */
+ { "rAarr", "⇛" }, /* U+21DB */
+ { "rArr", "⇒" }, /* U+21D2 */
+ { "rAtail", "⤜" }, /* U+291C */
+ { "rBarr", "⤏" }, /* U+290F */
+ { "rHar", "⥤" }, /* U+2964 */
+ { "race", "∽̱" }, /* U+223D U+0331 */
+ { "racute", "ŕ" }, /* U+0155 */
+ { "radic", "√" }, /* U+221A */
+ { "raemptyv", "⦳" }, /* U+29B3 */
+ { "rang", "⟩" }, /* U+27E9 */
+ { "rangd", "⦒" }, /* U+2992 */
+ { "range", "⦥" }, /* U+29A5 */
+ { "rangle", "⟩" }, /* U+27E9 */
+ { "raquo", "»" }, /* U+00BB */
+ { "rarr", "→" }, /* U+2192 */
+ { "rarrap", "⥵" }, /* U+2975 */
+ { "rarrb", "⇥" }, /* U+21E5 */
+ { "rarrbfs", "⤠" }, /* U+2920 */
+ { "rarrc", "⤳" }, /* U+2933 */
+ { "rarrfs", "⤞" }, /* U+291E */
+ { "rarrhk", "↪" }, /* U+21AA */
+ { "rarrlp", "↬" }, /* U+21AC */
+ { "rarrpl", "⥅" }, /* U+2945 */
+ { "rarrsim", "⥴" }, /* U+2974 */
+ { "rarrtl", "↣" }, /* U+21A3 */
+ { "rarrw", "↝" }, /* U+219D */
+ { "ratail", "⤚" }, /* U+291A */
+ { "ratio", "∶" }, /* U+2236 */
+ { "rationals", "ℚ" }, /* U+211A */
+ { "rbarr", "⤍" }, /* U+290D */
+ { "rbbrk", "❳" }, /* U+2773 */
+ { "rbrace", "}" }, /* U+007D */
+ { "rbrack", "]" }, /* U+005D */
+ { "rbrke", "⦌" }, /* U+298C */
+ { "rbrksld", "⦎" }, /* U+298E */
+ { "rbrkslu", "⦐" }, /* U+2990 */
+ { "rcaron", "ř" }, /* U+0159 */
+ { "rcedil", "ŗ" }, /* U+0157 */
+ { "rceil", "⌉" }, /* U+2309 */
+ { "rcub", "}" }, /* U+007D */
+ { "rcy", "р" }, /* U+0440 */
+ { "rdca", "⤷" }, /* U+2937 */
+ { "rdldhar", "⥩" }, /* U+2969 */
+ { "rdquo", "”" }, /* U+201D */
+ { "rdquor", "”" }, /* U+201D */
+ { "rdsh", "↳" }, /* U+21B3 */
+ { "real", "ℜ" }, /* U+211C */
+ { "realine", "ℛ" }, /* U+211B */
+ { "realpart", "ℜ" }, /* U+211C */
+ { "reals", "ℝ" }, /* U+211D */
+ { "rect", "▭" }, /* U+25AD */
+ { "reg", "®" }, /* U+00AE */
+ { "rfisht", "⥽" }, /* U+297D */
+ { "rfloor", "⌋" }, /* U+230B */
+ { "rfr", "𝔯" }, /* U+1D52F */
+ { "rhard", "⇁" }, /* U+21C1 */
+ { "rharu", "⇀" }, /* U+21C0 */
+ { "rharul", "⥬" }, /* U+296C */
+ { "rho", "ρ" }, /* U+03C1 */
+ { "rhov", "ϱ" }, /* U+03F1 */
+ { "rightarrow", "→" }, /* U+2192 */
+ { "rightarrowtail", "↣" }, /* U+21A3 */
+ { "rightharpoonup", "⇀" }, /* U+21C0 */
+ { "rightleftarrows", "⇄" }, /* U+21C4 */
+ { "rightsquigarrow", "↝" }, /* U+219D */
+ { "rightthreetimes", "⋌" }, /* U+22CC */
+ { "ring", "˚" }, /* U+02DA */
+ { "risingdotseq", "≓" }, /* U+2253 */
+ { "rlarr", "⇄" }, /* U+21C4 */
+ { "rlhar", "⇌" }, /* U+21CC */
+ { "rlm", "\342\200\217" }, /* U+200F */
+ { "rmoust", "⎱" }, /* U+23B1 */
+ { "rmoustache", "⎱" }, /* U+23B1 */
+ { "rnmid", "⫮" }, /* U+2AEE */
+ { "roang", "⟭" }, /* U+27ED */
+ { "roarr", "⇾" }, /* U+21FE */
+ { "robrk", "⟧" }, /* U+27E7 */
+ { "ropar", "⦆" }, /* U+2986 */
+ { "ropf", "𝕣" }, /* U+1D563 */
+ { "roplus", "⨮" }, /* U+2A2E */
+ { "rotimes", "⨵" }, /* U+2A35 */
+ { "rpar", ")" }, /* U+0029 */
+ { "rpargt", "⦔" }, /* U+2994 */
+ { "rppolint", "⨒" }, /* U+2A12 */
+ { "rrarr", "⇉" }, /* U+21C9 */
+ { "rsaquo", "›" }, /* U+203A */
+ { "rscr", "𝓇" }, /* U+1D4C7 */
+ { "rsh", "↱" }, /* U+21B1 */
+ { "rsqb", "]" }, /* U+005D */
+ { "rsquo", "’" }, /* U+2019 */
+ { "rsquor", "’" }, /* U+2019 */
+ { "rthree", "⋌" }, /* U+22CC */
+ { "rtimes", "⋊" }, /* U+22CA */
+ { "rtri", "▹" }, /* U+25B9 */
+ { "rtrie", "⊵" }, /* U+22B5 */
+ { "rtrif", "▸" }, /* U+25B8 */
+ { "rtriltri", "⧎" }, /* U+29CE */
+ { "ruluhar", "⥨" }, /* U+2968 */
+ { "rx", "℞" }, /* U+211E */
+ { "sacute", "ś" }, /* U+015B */
+ { "sbquo", "‚" }, /* U+201A */
+ { "sc", "≻" }, /* U+227B */
+ { "scE", "⪴" }, /* U+2AB4 */
+ { "scap", "⪸" }, /* U+2AB8 */
+ { "scaron", "š" }, /* U+0161 */
+ { "sccue", "≽" }, /* U+227D */
+ { "sce", "⪰" }, /* U+2AB0 */
+ { "scedil", "ş" }, /* U+015F */
+ { "scirc", "ŝ" }, /* U+015D */
+ { "scnE", "⪶" }, /* U+2AB6 */
+ { "scnap", "⪺" }, /* U+2ABA */
+ { "scnsim", "⋩" }, /* U+22E9 */
+ { "scpolint", "⨓" }, /* U+2A13 */
+ { "scsim", "≿" }, /* U+227F */
+ { "scy", "с" }, /* U+0441 */
+ { "sdot", "⋅" }, /* U+22C5 */
+ { "sdotb", "⊡" }, /* U+22A1 */
+ { "sdote", "⩦" }, /* U+2A66 */
+ { "seArr", "⇘" }, /* U+21D8 */
+ { "searhk", "⤥" }, /* U+2925 */
+ { "searr", "↘" }, /* U+2198 */
+ { "searrow", "↘" }, /* U+2198 */
+ { "sect", "§" }, /* U+00A7 */
+ { "semi", ";" }, /* U+003B */
+ { "seswar", "⤩" }, /* U+2929 */
+ { "setminus", "∖" }, /* U+2216 */
+ { "setmn", "∖" }, /* U+2216 */
+ { "sext", "✶" }, /* U+2736 */
+ { "sfr", "𝔰" }, /* U+1D530 */
+ { "sfrown", "⌢" }, /* U+2322 */
+ { "sharp", "♯" }, /* U+266F */
+ { "shchcy", "щ" }, /* U+0449 */
+ { "shcy", "ш" }, /* U+0448 */
+ { "shortmid", "∣" }, /* U+2223 */
+ { "shortparallel", "∥" }, /* U+2225 */
+ { "shy", "" }, /* U+00AD */
+ { "sigma", "σ" }, /* U+03C3 */
+ { "sigmaf", "ς" }, /* U+03C2 */
+ { "sigmav", "ς" }, /* U+03C2 */
+ { "sim", "∼" }, /* U+223C */
+ { "simdot", "⩪" }, /* U+2A6A */
+ { "sime", "≃" }, /* U+2243 */
+ { "simeq", "≃" }, /* U+2243 */
+ { "simg", "⪞" }, /* U+2A9E */
+ { "simgE", "⪠" }, /* U+2AA0 */
+ { "siml", "⪝" }, /* U+2A9D */
+ { "simlE", "⪟" }, /* U+2A9F */
+ { "simne", "≆" }, /* U+2246 */
+ { "simplus", "⨤" }, /* U+2A24 */
+ { "simrarr", "⥲" }, /* U+2972 */
+ { "slarr", "←" }, /* U+2190 */
+ { "smallsetminus", "∖" }, /* U+2216 */
+ { "smashp", "⨳" }, /* U+2A33 */
+ { "smeparsl", "⧤" }, /* U+29E4 */
+ { "smid", "∣" }, /* U+2223 */
+ { "smile", "⌣" }, /* U+2323 */
+ { "smt", "⪪" }, /* U+2AAA */
+ { "smte", "⪬" }, /* U+2AAC */
+ { "smtes", "⪬︀" }, /* U+2AAC U+FE00 */
+ { "softcy", "ь" }, /* U+044C */
+ { "sol", "/" }, /* U+002F */
+ { "solb", "⧄" }, /* U+29C4 */
+ { "solbar", "⌿" }, /* U+233F */
+ { "sopf", "𝕤" }, /* U+1D564 */
+ { "spades", "♠" }, /* U+2660 */
+ { "spadesuit", "♠" }, /* U+2660 */
+ { "spar", "∥" }, /* U+2225 */
+ { "sqcap", "⊓" }, /* U+2293 */
+ { "sqcaps", "⊓︀" }, /* U+2293 U+FE00 */
+ { "sqcup", "⊔" }, /* U+2294 */
+ { "sqcups", "⊔︀" }, /* U+2294 U+FE00 */
+ { "sqsub", "⊏" }, /* U+228F */
+ { "sqsube", "⊑" }, /* U+2291 */
+ { "sqsubset", "⊏" }, /* U+228F */
+ { "sqsubseteq", "⊑" }, /* U+2291 */
+ { "sqsup", "⊐" }, /* U+2290 */
+ { "sqsupe", "⊒" }, /* U+2292 */
+ { "sqsupset", "⊐" }, /* U+2290 */
+ { "sqsupseteq", "⊒" }, /* U+2292 */
+ { "squ", "□" }, /* U+25A1 */
+ { "square", "□" }, /* U+25A1 */
+ { "squarf", "▪" }, /* U+25AA */
+ { "squf", "▪" }, /* U+25AA */
+ { "srarr", "→" }, /* U+2192 */
+ { "sscr", "𝓈" }, /* U+1D4C8 */
+ { "ssetmn", "∖" }, /* U+2216 */
+ { "ssmile", "⌣" }, /* U+2323 */
+ { "sstarf", "⋆" }, /* U+22C6 */
+ { "star", "☆" }, /* U+2606 */
+ { "starf", "★" }, /* U+2605 */
+ { "straightepsilon", "ϵ" }, /* U+03F5 */
+ { "straightphi", "ϕ" }, /* U+03D5 */
+ { "strns", "¯" }, /* U+00AF */
+ { "sub", "⊂" }, /* U+2282 */
+ { "subE", "⫅" }, /* U+2AC5 */
+ { "subdot", "⪽" }, /* U+2ABD */
+ { "sube", "⊆" }, /* U+2286 */
+ { "subedot", "⫃" }, /* U+2AC3 */
+ { "submult", "⫁" }, /* U+2AC1 */
+ { "subnE", "⫋" }, /* U+2ACB */
+ { "subne", "⊊" }, /* U+228A */
+ { "subplus", "⪿" }, /* U+2ABF */
+ { "subrarr", "⥹" }, /* U+2979 */
+ { "subset", "⊂" }, /* U+2282 */
+ { "subseteq", "⊆" }, /* U+2286 */
+ { "subseteqq", "⫅" }, /* U+2AC5 */
+ { "subsetneq", "⊊" }, /* U+228A */
+ { "subsetneqq", "⫋" }, /* U+2ACB */
+ { "subsim", "⫇" }, /* U+2AC7 */
+ { "subsub", "⫕" }, /* U+2AD5 */
+ { "subsup", "⫓" }, /* U+2AD3 */
+ { "succ", "≻" }, /* U+227B */
+ { "succapprox", "⪸" }, /* U+2AB8 */
+ { "succcurlyeq", "≽" }, /* U+227D */
+ { "succeq", "⪰" }, /* U+2AB0 */
+ { "succnapprox", "⪺" }, /* U+2ABA */
+ { "succneqq", "⪶" }, /* U+2AB6 */
+ { "succnsim", "⋩" }, /* U+22E9 */
+ { "succsim", "≿" }, /* U+227F */
+ { "sum", "∑" }, /* U+2211 */
+ { "sung", "♪" }, /* U+266A */
+ { "sup", "⊃" }, /* U+2283 */
+ { "sup1", "¹" }, /* U+00B9 */
+ { "sup2", "²" }, /* U+00B2 */
+ { "sup3", "³" }, /* U+00B3 */
+ { "supE", "⫆" }, /* U+2AC6 */
+ { "supdot", "⪾" }, /* U+2ABE */
+ { "supdsub", "⫘" }, /* U+2AD8 */
+ { "supe", "⊇" }, /* U+2287 */
+ { "supedot", "⫄" }, /* U+2AC4 */
+ { "suphsol", "⟉" }, /* U+27C9 */
+ { "suphsub", "⫗" }, /* U+2AD7 */
+ { "suplarr", "⥻" }, /* U+297B */
+ { "supmult", "⫂" }, /* U+2AC2 */
+ { "supnE", "⫌" }, /* U+2ACC */
+ { "supne", "⊋" }, /* U+228B */
+ { "supplus", "⫀" }, /* U+2AC0 */
+ { "supset", "⊃" }, /* U+2283 */
+ { "supseteq", "⊇" }, /* U+2287 */
+ { "supseteqq", "⫆" }, /* U+2AC6 */
+ { "supsetneq", "⊋" }, /* U+228B */
+ { "supsetneqq", "⫌" }, /* U+2ACC */
+ { "supsim", "⫈" }, /* U+2AC8 */
+ { "supsub", "⫔" }, /* U+2AD4 */
+ { "supsup", "⫖" }, /* U+2AD6 */
+ { "swArr", "⇙" }, /* U+21D9 */
+ { "swarhk", "⤦" }, /* U+2926 */
+ { "swarr", "↙" }, /* U+2199 */
+ { "swarrow", "↙" }, /* U+2199 */
+ { "swnwar", "⤪" }, /* U+292A */
+ { "szlig", "ß" }, /* U+00DF */
+ { "target", "⌖" }, /* U+2316 */
+ { "tau", "τ" }, /* U+03C4 */
+ { "tbrk", "⎴" }, /* U+23B4 */
+ { "tcaron", "ť" }, /* U+0165 */
+ { "tcedil", "ţ" }, /* U+0163 */
+ { "tcy", "т" }, /* U+0442 */
+ { "tdot", "⃛" }, /* U+20DB */
+ { "telrec", "⌕" }, /* U+2315 */
+ { "tfr", "𝔱" }, /* U+1D531 */
+ { "there4", "∴" }, /* U+2234 */
+ { "therefore", "∴" }, /* U+2234 */
+ { "theta", "θ" }, /* U+03B8 */
+ { "thetasym", "ϑ" }, /* U+03D1 */
+ { "thetav", "ϑ" }, /* U+03D1 */
+ { "thickapprox", "≈" }, /* U+2248 */
+ { "thicksim", "∼" }, /* U+223C */
+ { "thinsp", " " }, /* U+2009 */
+ { "thkap", "≈" }, /* U+2248 */
+ { "thksim", "∼" }, /* U+223C */
+ { "thorn", "þ" }, /* U+00FE */
+ { "tilde", "˜" }, /* U+02DC */
+ { "times", "×" }, /* U+00D7 */
+ { "timesb", "⊠" }, /* U+22A0 */
+ { "timesbar", "⨱" }, /* U+2A31 */
+ { "timesd", "⨰" }, /* U+2A30 */
+ { "tint", "∭" }, /* U+222D */
+ { "toea", "⤨" }, /* U+2928 */
+ { "top", "⊤" }, /* U+22A4 */
+ { "topbot", "⌶" }, /* U+2336 */
+ { "topcir", "⫱" }, /* U+2AF1 */
+ { "topf", "𝕥" }, /* U+1D565 */
+ { "topfork", "⫚" }, /* U+2ADA */
+ { "tosa", "⤩" }, /* U+2929 */
+ { "tprime", "‴" }, /* U+2034 */
+ { "trade", "™" }, /* U+2122 */
+ { "triangle", "▵" }, /* U+25B5 */
+ { "triangledown", "▿" }, /* U+25BF */
+ { "triangleleft", "◃" }, /* U+25C3 */
+ { "trianglelefteq", "⊴" }, /* U+22B4 */
+ { "triangleq", "≜" }, /* U+225C */
+ { "triangleright", "▹" }, /* U+25B9 */
+ { "trianglerighteq", "⊵" }, /* U+22B5 */
+ { "tridot", "◬" }, /* U+25EC */
+ { "trie", "≜" }, /* U+225C */
+ { "triminus", "⨺" }, /* U+2A3A */
+ { "triplus", "⨹" }, /* U+2A39 */
+ { "trisb", "⧍" }, /* U+29CD */
+ { "tritime", "⨻" }, /* U+2A3B */
+ { "trpezium", "⏢" }, /* U+23E2 */
+ { "tscr", "𝓉" }, /* U+1D4C9 */
+ { "tscy", "ц" }, /* U+0446 */
+ { "tshcy", "ћ" }, /* U+045B */
+ { "tstrok", "ŧ" }, /* U+0167 */
+ { "twixt", "≬" }, /* U+226C */
+ { "uArr", "⇑" }, /* U+21D1 */
+ { "uHar", "⥣" }, /* U+2963 */
+ { "uacute", "ú" }, /* U+00FA */
+ { "uarr", "↑" }, /* U+2191 */
+ { "ubrcy", "ў" }, /* U+045E */
+ { "ubreve", "ŭ" }, /* U+016D */
+ { "ucirc", "û" }, /* U+00FB */
+ { "ucy", "у" }, /* U+0443 */
+ { "udarr", "⇅" }, /* U+21C5 */
+ { "udblac", "ű" }, /* U+0171 */
+ { "udhar", "⥮" }, /* U+296E */
+ { "ufisht", "⥾" }, /* U+297E */
+ { "ufr", "𝔲" }, /* U+1D532 */
+ { "ugrave", "ù" }, /* U+00F9 */
+ { "uharl", "↿" }, /* U+21BF */
+ { "uharr", "↾" }, /* U+21BE */
+ { "uhblk", "▀" }, /* U+2580 */
+ { "ulcorn", "⌜" }, /* U+231C */
+ { "ulcorner", "⌜" }, /* U+231C */
+ { "ulcrop", "⌏" }, /* U+230F */
+ { "ultri", "◸" }, /* U+25F8 */
+ { "umacr", "ū" }, /* U+016B */
+ { "uml", "¨" }, /* U+00A8 */
+ { "uogon", "ų" }, /* U+0173 */
+ { "uopf", "𝕦" }, /* U+1D566 */
+ { "uparrow", "↑" }, /* U+2191 */
+ { "updownarrow", "↕" }, /* U+2195 */
+ { "upharpoonleft", "↿" }, /* U+21BF */
+ { "upharpoonright", "↾" }, /* U+21BE */
+ { "uplus", "⊎" }, /* U+228E */
+ { "upsi", "υ" }, /* U+03C5 */
+ { "upsih", "ϒ" }, /* U+03D2 */
+ { "upsilon", "υ" }, /* U+03C5 */
+ { "upuparrows", "⇈" }, /* U+21C8 */
+ { "urcorn", "⌝" }, /* U+231D */
+ { "urcorner", "⌝" }, /* U+231D */
+ { "urcrop", "⌎" }, /* U+230E */
+ { "uring", "ů" }, /* U+016F */
+ { "urtri", "◹" }, /* U+25F9 */
+ { "uscr", "𝓊" }, /* U+1D4CA */
+ { "utdot", "⋰" }, /* U+22F0 */
+ { "utilde", "ũ" }, /* U+0169 */
+ { "utri", "▵" }, /* U+25B5 */
+ { "utrif", "▴" }, /* U+25B4 */
+ { "uuarr", "⇈" }, /* U+21C8 */
+ { "uuml", "ü" }, /* U+00FC */
+ { "uwangle", "⦧" }, /* U+29A7 */
+ { "vArr", "⇕" }, /* U+21D5 */
+ { "vBar", "⫨" }, /* U+2AE8 */
+ { "vBarv", "⫩" }, /* U+2AE9 */
+ { "vDash", "⊨" }, /* U+22A8 */
+ { "vangrt", "⦜" }, /* U+299C */
+ { "varepsilon", "ϵ" }, /* U+03F5 */
+ { "varkappa", "ϰ" }, /* U+03F0 */
+ { "varnothing", "∅" }, /* U+2205 */
+ { "varphi", "ϕ" }, /* U+03D5 */
+ { "varpi", "ϖ" }, /* U+03D6 */
+ { "varpropto", "∝" }, /* U+221D */
+ { "varr", "↕" }, /* U+2195 */
+ { "varrho", "ϱ" }, /* U+03F1 */
+ { "varsigma", "ς" }, /* U+03C2 */
+ { "varsubsetneq", "⊊︀" }, /* U+228A U+FE00 */
+ { "varsubsetneqq", "⫋︀" }, /* U+2ACB U+FE00 */
+ { "varsupsetneq", "⊋︀" }, /* U+228B U+FE00 */
+ { "varsupsetneqq", "⫌︀" }, /* U+2ACC U+FE00 */
+ { "vartheta", "ϑ" }, /* U+03D1 */
+ { "vartriangleleft", "⊲" }, /* U+22B2 */
+ { "vcy", "в" }, /* U+0432 */
+ { "vdash", "⊢" }, /* U+22A2 */
+ { "vee", "∨" }, /* U+2228 */
+ { "veebar", "⊻" }, /* U+22BB */
+ { "veeeq", "≚" }, /* U+225A */
+ { "vellip", "⋮" }, /* U+22EE */
+ { "verbar", "|" }, /* U+007C */
+ { "vert", "|" }, /* U+007C */
+ { "vfr", "𝔳" }, /* U+1D533 */
+ { "vltri", "⊲" }, /* U+22B2 */
+ { "vnsub", "⊂⃒" }, /* U+2282 U+20D2 */
+ { "vnsup", "⊃⃒" }, /* U+2283 U+20D2 */
+ { "vopf", "𝕧" }, /* U+1D567 */
+ { "vprop", "∝" }, /* U+221D */
+ { "vrtri", "⊳" }, /* U+22B3 */
+ { "vscr", "𝓋" }, /* U+1D4CB */
+ { "vsubnE", "⫋︀" }, /* U+2ACB U+FE00 */
+ { "vsubne", "⊊︀" }, /* U+228A U+FE00 */
+ { "vsupnE", "⫌︀" }, /* U+2ACC U+FE00 */
+ { "vsupne", "⊋︀" }, /* U+228B U+FE00 */
+ { "vzigzag", "⦚" }, /* U+299A */
+ { "wcirc", "ŵ" }, /* U+0175 */
+ { "wedbar", "⩟" }, /* U+2A5F */
+ { "wedge", "∧" }, /* U+2227 */
+ { "wedgeq", "≙" }, /* U+2259 */
+ { "weierp", "℘" }, /* U+2118 */
+ { "wfr", "𝔴" }, /* U+1D534 */
+ { "wopf", "𝕨" }, /* U+1D568 */
+ { "wp", "℘" }, /* U+2118 */
+ { "wr", "≀" }, /* U+2240 */
+ { "wreath", "≀" }, /* U+2240 */
+ { "wscr", "𝓌" }, /* U+1D4CC */
+ { "xcap", "⋂" }, /* U+22C2 */
+ { "xcirc", "◯" }, /* U+25EF */
+ { "xcup", "⋃" }, /* U+22C3 */
+ { "xdtri", "▽" }, /* U+25BD */
+ { "xfr", "𝔵" }, /* U+1D535 */
+ { "xhArr", "⟺" }, /* U+27FA */
+ { "xharr", "⟷" }, /* U+27F7 */
+ { "xi", "ξ" }, /* U+03BE */
+ { "xlArr", "⟸" }, /* U+27F8 */
+ { "xlarr", "⟵" }, /* U+27F5 */
+ { "xmap", "⟼" }, /* U+27FC */
+ { "xnis", "⋻" }, /* U+22FB */
+ { "xodot", "⨀" }, /* U+2A00 */
+ { "xopf", "𝕩" }, /* U+1D569 */
+ { "xoplus", "⨁" }, /* U+2A01 */
+ { "xotime", "⨂" }, /* U+2A02 */
+ { "xrArr", "⟹" }, /* U+27F9 */
+ { "xrarr", "⟶" }, /* U+27F6 */
+ { "xscr", "𝓍" }, /* U+1D4CD */
+ { "xsqcup", "⨆" }, /* U+2A06 */
+ { "xuplus", "⨄" }, /* U+2A04 */
+ { "xutri", "△" }, /* U+25B3 */
+ { "xvee", "⋁" }, /* U+22C1 */
+ { "xwedge", "⋀" }, /* U+22C0 */
+ { "yacute", "ý" }, /* U+00FD */
+ { "yacy", "я" }, /* U+044F */
+ { "ycirc", "ŷ" }, /* U+0177 */
+ { "ycy", "ы" }, /* U+044B */
+ { "yen", "¥" }, /* U+00A5 */
+ { "yfr", "𝔶" }, /* U+1D536 */
+ { "yicy", "ї" }, /* U+0457 */
+ { "yopf", "𝕪" }, /* U+1D56A */
+ { "yscr", "𝓎" }, /* U+1D4CE */
+ { "yucy", "ю" }, /* U+044E */
+ { "yuml", "ÿ" }, /* U+00FF */
+ { "zacute", "ź" }, /* U+017A */
+ { "zcaron", "ž" }, /* U+017E */
+ { "zcy", "з" }, /* U+0437 */
+ { "zdot", "ż" }, /* U+017C */
+ { "zeetrf", "ℨ" }, /* U+2128 */
+ { "zeta", "ζ" }, /* U+03B6 */
+ { "zfr", "𝔷" }, /* U+1D537 */
+ { "zhcy", "ж" }, /* U+0436 */
+ { "zigrarr", "⇝" }, /* U+21DD */
+ { "zopf", "𝕫" }, /* U+1D56B */
+ { "zscr", "𝓏" }, /* U+1D4CF */
+ { "zwj", "" }, /* U+200D */
+ { "zwnj", "" } /* U+200C */
+};
+static const struct { const char name[31 + 1]; const char value[6 + 1]; } html5long[] =
+{
+ { "CapitalDifferentialD", "ⅅ" }, /* U+2145 */
+ { "ClockwiseContourIntegral", "∲" }, /* U+2232 */
+ { "CloseCurlyDoubleQuote", "”" }, /* U+201D */
+ { "CounterClockwiseContourIntegral", "∳" }, /* U+2233 */
+ { "DiacriticalAcute", "´" }, /* U+00B4 */
+ { "DiacriticalDoubleAcute", "˝" }, /* U+02DD */
+ { "DiacriticalGrave", "`" }, /* U+0060 */
+ { "DiacriticalTilde", "˜" }, /* U+02DC */
+ { "DoubleContourIntegral", "∯" }, /* U+222F */
+ { "DoubleLeftRightArrow", "⇔" }, /* U+21D4 */
+ { "DoubleLongLeftArrow", "⟸" }, /* U+27F8 */
+ { "DoubleLongLeftRightArrow", "⟺" }, /* U+27FA */
+ { "DoubleLongRightArrow", "⟹" }, /* U+27F9 */
+ { "DoubleRightArrow", "⇒" }, /* U+21D2 */
+ { "DoubleUpDownArrow", "⇕" }, /* U+21D5 */
+ { "DoubleVerticalBar", "∥" }, /* U+2225 */
+ { "DownArrowUpArrow", "⇵" }, /* U+21F5 */
+ { "DownLeftRightVector", "⥐" }, /* U+2950 */
+ { "DownLeftTeeVector", "⥞" }, /* U+295E */
+ { "DownLeftVectorBar", "⥖" }, /* U+2956 */
+ { "DownRightTeeVector", "⥟" }, /* U+295F */
+ { "DownRightVectorBar", "⥗" }, /* U+2957 */
+ { "EmptySmallSquare", "◻" }, /* U+25FB */
+ { "EmptyVerySmallSquare", "▫" }, /* U+25AB */
+ { "FilledSmallSquare", "◼" }, /* U+25FC */
+ { "FilledVerySmallSquare", "▪" }, /* U+25AA */
+ { "GreaterEqualLess", "⋛" }, /* U+22DB */
+ { "GreaterFullEqual", "≧" }, /* U+2267 */
+ { "GreaterSlantEqual", "⩾" }, /* U+2A7E */
+ { "LeftAngleBracket", "⟨" }, /* U+27E8 */
+ { "LeftArrowRightArrow", "⇆" }, /* U+21C6 */
+ { "LeftDoubleBracket", "⟦" }, /* U+27E6 */
+ { "LeftDownTeeVector", "⥡" }, /* U+2961 */
+ { "LeftDownVectorBar", "⥙" }, /* U+2959 */
+ { "LeftTriangleEqual", "⊴" }, /* U+22B4 */
+ { "LeftUpDownVector", "⥑" }, /* U+2951 */
+ { "LessEqualGreater", "⋚" }, /* U+22DA */
+ { "LongLeftRightArrow", "⟷" }, /* U+27F7 */
+ { "Longleftrightarrow", "⟺" }, /* U+27FA */
+ { "NegativeMediumSpace", "" }, /* U+200B */
+ { "NegativeThickSpace", "" }, /* U+200B */
+ { "NegativeThinSpace", "" }, /* U+200B */
+ { "NegativeVeryThinSpace", "" }, /* U+200B */
+ { "NestedGreaterGreater", "≫" }, /* U+226B */
+ { "NonBreakingSpace", " " }, /* U+00A0 */
+ { "NotDoubleVerticalBar", "∦" }, /* U+2226 */
+ { "NotGreaterFullEqual", "≧̸" }, /* U+2267 U+0338 */
+ { "NotGreaterGreater", "≫̸" }, /* U+226B U+0338 */
+ { "NotGreaterSlantEqual", "⩾̸" }, /* U+2A7E U+0338 */
+ { "NotLeftTriangleBar", "⧏̸" }, /* U+29CF U+0338 */
+ { "NotLeftTriangleEqual", "⋬" }, /* U+22EC */
+ { "NotLessSlantEqual", "⩽̸" }, /* U+2A7D U+0338 */
+ { "NotNestedGreaterGreater", "⪢̸" }, /* U+2AA2 U+0338 */
+ { "NotNestedLessLess", "⪡̸" }, /* U+2AA1 U+0338 */
+ { "NotPrecedesEqual", "⪯̸" }, /* U+2AAF U+0338 */
+ { "NotPrecedesSlantEqual", "⋠" }, /* U+22E0 */
+ { "NotReverseElement", "∌" }, /* U+220C */
+ { "NotRightTriangle", "⋫" }, /* U+22EB */
+ { "NotRightTriangleBar", "⧐̸" }, /* U+29D0 U+0338 */
+ { "NotRightTriangleEqual", "⋭" }, /* U+22ED */
+ { "NotSquareSubsetEqual", "⋢" }, /* U+22E2 */
+ { "NotSquareSuperset", "⊐̸" }, /* U+2290 U+0338 */
+ { "NotSquareSupersetEqual", "⋣" }, /* U+22E3 */
+ { "NotSucceedsEqual", "⪰̸" }, /* U+2AB0 U+0338 */
+ { "NotSucceedsSlantEqual", "⋡" }, /* U+22E1 */
+ { "NotSucceedsTilde", "≿̸" }, /* U+227F U+0338 */
+ { "NotSupersetEqual", "⊉" }, /* U+2289 */
+ { "NotTildeFullEqual", "≇" }, /* U+2247 */
+ { "OpenCurlyDoubleQuote", "“" }, /* U+201C */
+ { "PrecedesSlantEqual", "≼" }, /* U+227C */
+ { "ReverseEquilibrium", "⇋" }, /* U+21CB */
+ { "ReverseUpEquilibrium", "⥯" }, /* U+296F */
+ { "RightAngleBracket", "⟩" }, /* U+27E9 */
+ { "RightArrowLeftArrow", "⇄" }, /* U+21C4 */
+ { "RightDoubleBracket", "⟧" }, /* U+27E7 */
+ { "RightDownTeeVector", "⥝" }, /* U+295D */
+ { "RightDownVectorBar", "⥕" }, /* U+2955 */
+ { "RightTriangleBar", "⧐" }, /* U+29D0 */
+ { "RightTriangleEqual", "⊵" }, /* U+22B5 */
+ { "RightUpDownVector", "⥏" }, /* U+294F */
+ { "RightUpTeeVector", "⥜" }, /* U+295C */
+ { "RightUpVectorBar", "⥔" }, /* U+2954 */
+ { "SquareIntersection", "⊓" }, /* U+2293 */
+ { "SquareSubsetEqual", "⊑" }, /* U+2291 */
+ { "SquareSupersetEqual", "⊒" }, /* U+2292 */
+ { "SucceedsSlantEqual", "≽" }, /* U+227D */
+ { "UnderParenthesis", "⏝" }, /* U+23DD */
+ { "UpArrowDownArrow", "⇅" }, /* U+21C5 */
+ { "VerticalSeparator", "❘" }, /* U+2758 */
+ { "blacktriangledown", "▾" }, /* U+25BE */
+ { "blacktriangleleft", "◂" }, /* U+25C2 */
+ { "blacktriangleright", "▸" }, /* U+25B8 */
+ { "circlearrowright", "↻" }, /* U+21BB */
+ { "downharpoonright", "⇂" }, /* U+21C2 */
+ { "leftrightharpoons", "⇋" }, /* U+21CB */
+ { "leftrightsquigarrow", "↭" }, /* U+21AD */
+ { "longleftrightarrow", "⟷" }, /* U+27F7 */
+ { "ntrianglerighteq", "⋭" }, /* U+22ED */
+ { "rightharpoondown", "⇁" }, /* U+21C1 */
+ { "rightleftharpoons", "⇌" }, /* U+21CC */
+ { "rightrightarrows", "⇉" }, /* U+21C9 */
+ { "twoheadleftarrow", "↞" }, /* U+219E */
+ { "twoheadrightarrow", "↠" }, /* U+21A0 */
+ { "vartriangleright", "⊳" } /* U+22B3 */
+};
+/* Lookup of a HTML5 named character entity. */
+static const char *
+html5_lookup (string_desc_t name)
+{
+ if (sd_length (name) <= 15)
+ {
+ /* Binary search. */
+ size_t lo = 0;
+ size_t hi = sizeof (html5short) / sizeof (html5short[0]);
+ while (lo < hi)
+ {
+ size_t mid = (lo + hi) / 2;
+ int cmp = sd_cmp (name, sd_from_c (html5short[mid].name));
+ if (cmp == 0)
+ return html5short[mid].value;
+ if (cmp < 0)
+ hi = mid;
+ else /* cmp > 0 */
+ lo = mid + 1;
+ }
+ }
+ else
+ {
+ /* Binary search. */
+ size_t lo = 0;
+ size_t hi = sizeof (html5long) / sizeof (html5long[0]);
+ while (lo < hi)
+ {
+ size_t mid = (lo + hi) / 2;
+ int cmp = sd_cmp (name, sd_from_c (html5long[mid].name));
+ if (cmp == 0)
+ return html5long[mid].value;
+ if (cmp < 0)
+ hi = mid;
+ else /* cmp > 0 */
+ lo = mid + 1;
+ }
+ }
+ return NULL;
+}
--- /dev/null
+/* xgettext D backend.
+ Copyright (C) 2001-2025 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2025. */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+/* Specification. */
+#include "x-d.h"
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <error.h>
+#include "message.h"
+#include "string-desc.h"
+#include "xstring-desc.h"
+#include "string-buffer-reversed.h"
+#include "c-ctype.h"
+#include "html5-entities.h"
+#include "xgettext.h"
+#include "xg-pos.h"
+#include "xg-mixed-string.h"
+#include "xg-arglist-context.h"
+#include "xg-arglist-callshape.h"
+#include "xg-arglist-parser.h"
+#include "xg-message.h"
+#include "if-error.h"
+#include "xalloc.h"
+#include "read-file.h"
+#include "unistr.h"
+#include "byteswap.h"
+#include "po-charset.h"
+#include "gettext.h"
+
+#define _(s) gettext(s)
+
+/* Use tree-sitter.
+ Documentation: <https://tree-sitter.github.io/tree-sitter/using-parsers> */
+#include <tree_sitter/api.h>
+extern const TSLanguage *tree_sitter_d (void);
+
+
+/* The D syntax is defined in <https://dlang.org/spec/spec.html>.
+ The design principle of this language appears to be: "If there are two ways
+ to get a certain feature, find three more equivalent ways, and support all
+ five in the language."
+ Examples:
+ - There are 5 supported encodings for the source code.
+ - There are 3 supported syntaxes for comments.
+ - There are 10 supported syntaxes for string literals (not even counting
+ the interpolation expression sequences).
+ - There are 4 supported ways of including a Unicode character in a
+ double-quoted string.
+ This guarantees
+ - a steep learning curve for the junior programmers,
+ - that even senior programmers never fully master the language,
+ - that teams of developers will eternally fight over code style and
+ irrelevant details,
+ - and a high implementation complexity for the language and its runtime.
+ */
+
+#define DEBUG_D 0
+
+
+/* ====================== Keyword set customization. ====================== */
+
+/* If true extract all strings. */
+static bool extract_all = false;
+
+static hash_table function_keywords;
+static hash_table template_keywords;
+static bool default_keywords = true;
+
+
+void
+x_d_extract_all ()
+{
+ extract_all = true;
+}
+
+
+void
+x_d_keyword (const char *name)
+{
+ if (name == NULL)
+ default_keywords = false;
+ else
+ {
+ const char *end;
+ struct callshape shape;
+ const char *colon;
+
+ if (function_keywords.table == NULL)
+ hash_init (&function_keywords, 100);
+ if (template_keywords.table == NULL)
+ hash_init (&template_keywords, 100);
+
+ split_keywordspec (name, &end, &shape);
+
+ /* The characters between name and end should form a valid identifier,
+ possibly with a trailing '!'.
+ A colon means an invalid parse in split_keywordspec(). */
+ colon = strchr (name, ':');
+ if (colon == NULL || colon >= end)
+ {
+ if (end > name && end[-1] == '!')
+ insert_keyword_callshape (&template_keywords, name, end - 1 - name,
+ &shape);
+ else
+ insert_keyword_callshape (&function_keywords, name, end - name,
+ &shape);
+ }
+ }
+}
+
+/* Finish initializing the keywords hash table.
+ Called after argument processing, before each file is processed. */
+static void
+init_keywords ()
+{
+ if (default_keywords)
+ {
+ /* When adding new keywords here, also update the documentation in
+ xgettext.texi! */
+ x_d_keyword ("gettext");
+ x_d_keyword ("dgettext:2");
+ x_d_keyword ("dcgettext:2");
+ x_d_keyword ("ngettext:1,2");
+ x_d_keyword ("dngettext:2,3");
+ x_d_keyword ("dcngettext:2,3");
+ x_d_keyword ("pgettext:1c,2");
+ x_d_keyword ("dpgettext:2c,3");
+ x_d_keyword ("dcpgettext:2c,3");
+ x_d_keyword ("npgettext:1c,2,3");
+ x_d_keyword ("dnpgettext:2c,3,4");
+ x_d_keyword ("dcnpgettext:2c,3,4");
+ default_keywords = false;
+ }
+}
+
+void
+init_flag_table_d ()
+{
+ xgettext_record_flag ("gettext:1:pass-c-format");
+ xgettext_record_flag ("dgettext:2:pass-c-format");
+ xgettext_record_flag ("dcgettext:2:pass-c-format");
+ xgettext_record_flag ("ngettext:1:pass-c-format");
+ xgettext_record_flag ("ngettext:2:pass-c-format");
+ xgettext_record_flag ("dngettext:2:pass-c-format");
+ xgettext_record_flag ("dngettext:3:pass-c-format");
+ xgettext_record_flag ("dcngettext:2:pass-c-format");
+ xgettext_record_flag ("dcngettext:3:pass-c-format");
+ xgettext_record_flag ("pgettext:2:pass-c-format");
+ xgettext_record_flag ("dpgettext:3:pass-c-format");
+ xgettext_record_flag ("dcpgettext:3:pass-c-format");
+ xgettext_record_flag ("npgettext:2:pass-c-format");
+ xgettext_record_flag ("npgettext:3:pass-c-format");
+ xgettext_record_flag ("dnpgettext:3:pass-c-format");
+ xgettext_record_flag ("dnpgettext:4:pass-c-format");
+ xgettext_record_flag ("dcnpgettext:3:pass-c-format");
+ xgettext_record_flag ("dcnpgettext:4:pass-c-format");
+ xgettext_record_flag ("gettext:1:pass-d-format");
+ xgettext_record_flag ("dgettext:2:pass-d-format");
+ xgettext_record_flag ("dcgettext:2:pass-d-format");
+ xgettext_record_flag ("ngettext:1:pass-d-format");
+ xgettext_record_flag ("ngettext:2:pass-d-format");
+ xgettext_record_flag ("dngettext:2:pass-d-format");
+ xgettext_record_flag ("dngettext:3:pass-d-format");
+ xgettext_record_flag ("dcngettext:2:pass-d-format");
+ xgettext_record_flag ("dcngettext:3:pass-d-format");
+ xgettext_record_flag ("pgettext:2:pass-d-format");
+ xgettext_record_flag ("dpgettext:3:pass-d-format");
+ xgettext_record_flag ("dcpgettext:3:pass-d-format");
+ xgettext_record_flag ("npgettext:2:pass-d-format");
+ xgettext_record_flag ("npgettext:3:pass-d-format");
+ xgettext_record_flag ("dnpgettext:3:pass-d-format");
+ xgettext_record_flag ("dnpgettext:4:pass-d-format");
+ xgettext_record_flag ("dcnpgettext:3:pass-d-format");
+ xgettext_record_flag ("dcnpgettext:4:pass-d-format");
+
+ /* Module core.stdc.stdio
+ <https://dlang.org/library/core/stdc/stdio.html> */
+ xgettext_record_flag ("fprintf:2:c-format");
+ xgettext_record_flag ("vfprintf:2:c-format");
+ xgettext_record_flag ("printf:1:c-format");
+ xgettext_record_flag ("vprintf:1:c-format");
+ xgettext_record_flag ("sprintf:2:c-format");
+ xgettext_record_flag ("vsprintf:2:c-format");
+ xgettext_record_flag ("snprintf:3:c-format");
+ xgettext_record_flag ("vsnprintf:3:c-format");
+
+ /* Module std.format
+ <https://dlang.org/library/std/format.html> */
+ xgettext_record_flag ("format:1:d-format");
+ xgettext_record_flag ("sformat:2:d-format");
+}
+
+
+/* ======================== Parsing via tree-sitter. ======================== */
+/* To understand this code, look at
+ tree-sitter-d/src/node-types.json
+ and
+ tree-sitter-d/src/grammar.json
+ */
+
+/* The tree-sitter's language object. */
+static const TSLanguage *ts_language;
+
+/* ------------------------- Node types and symbols ------------------------- */
+
+static TSSymbol
+ts_language_symbol (const char *name, bool is_named)
+{
+ TSSymbol result =
+ ts_language_symbol_for_name (ts_language, name, strlen (name), is_named);
+ if (result == 0)
+ /* If we get here, the grammar has evolved in an incompatible way. */
+ abort ();
+ return result;
+}
+
+MAYBE_UNUSED static TSFieldId
+ts_language_field (const char *name)
+{
+ TSFieldId result =
+ ts_language_field_id_for_name (ts_language, name, strlen (name));
+ if (result == 0)
+ /* If we get here, the grammar has evolved in an incompatible way. */
+ abort ();
+ return result;
+}
+
+/* Optimization:
+ Instead of
+ strcmp (ts_node_type (node), "string_literal") == 0
+ it is faster to do
+ ts_node_symbol (node) == ts_symbol_string_literal
+ */
+static TSSymbol ts_symbol_comment;
+static TSSymbol ts_symbol_string_literal;
+static TSSymbol ts_symbol_quoted_string;
+static TSSymbol ts_symbol_escape_sequence;
+static TSSymbol ts_symbol_htmlentity;
+static TSSymbol ts_symbol_raw_string;
+static TSSymbol ts_symbol_hex_string;
+static TSSymbol ts_symbol_binary_expression;
+static TSSymbol ts_symbol_add_expression;
+static TSSymbol ts_symbol_expression;
+static TSSymbol ts_symbol_identifier;
+static TSSymbol ts_symbol_property_expression;
+static TSSymbol ts_symbol_call_expression;
+static TSSymbol ts_symbol_named_arguments;
+static TSSymbol ts_symbol_named_argument;
+static TSSymbol ts_symbol_template_instance;
+static TSSymbol ts_symbol_template_arguments;
+static TSSymbol ts_symbol_template_argument;
+static TSSymbol ts_symbol_unittest_declaration;
+static TSSymbol ts_symbol_tilde; /* ~ */
+
+static inline size_t
+ts_node_line_number (TSNode node)
+{
+ return ts_node_start_point (node).row + 1;
+}
+
+/* -------------------------------- The file -------------------------------- */
+
+/* The entire contents of the file being analyzed. */
+static const char *contents;
+
+/* -------------------------------- Comments -------------------------------- */
+
+/* These are for tracking whether comments count as immediately before
+ keyword. */
+static int last_comment_line;
+static int last_non_comment_line;
+
+/* Saves a comment line. */
+static void save_comment_line (string_desc_t gist)
+{
+ /* Remove leading whitespace. */
+ while (sd_length (gist) > 0
+ && (sd_char_at (gist, 0) == ' '
+ || sd_char_at (gist, 0) == '\t'))
+ gist = sd_substring (gist, 1, sd_length (gist));
+ /* Remove trailing whitespace. */
+ size_t len = sd_length (gist);
+ while (len > 0
+ && (sd_char_at (gist, len - 1) == ' '
+ || sd_char_at (gist, len - 1) == '\t'))
+ len--;
+ gist = sd_substring (gist, 0, len);
+ savable_comment_add (sd_c (gist));
+}
+
+/* Does the comment handling for NODE.
+ Updates savable_comment, last_comment_line, last_non_comment_line.
+ It is important that this function gets called
+ - for each node (not only the named nodes!),
+ - in depth-first traversal order. */
+static void handle_comments (TSNode node)
+{
+ #if DEBUG_D && 0
+ fprintf (stderr, "LCL=%d LNCL=%d node=[%s]|%s|\n", last_comment_line, last_non_comment_line, ts_node_type (node), ts_node_string (node));
+ #endif
+ if (last_comment_line < last_non_comment_line
+ && last_non_comment_line < ts_node_line_number (node))
+ /* We have skipped over a newline. This newline terminated a line
+ with non-comment tokens, after the last comment line. */
+ savable_comment_reset ();
+
+ if (ts_node_symbol (node) == ts_symbol_comment)
+ {
+ string_desc_t entire =
+ sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+ (char *) contents + ts_node_start_byte (node));
+ /* It should either start with two slashes... */
+ if (sd_length (entire) >= 2
+ && sd_char_at (entire, 0) == '/'
+ && sd_char_at (entire, 1) == '/')
+ {
+ save_comment_line (sd_substring (entire, 2, sd_length (entire)));
+ last_comment_line = ts_node_end_point (node).row + 1;
+ }
+ /* ... or it should start and end with the C comment markers or
+ with the D nested comment markers. */
+ else if (sd_length (entire) >= 4
+ && sd_char_at (entire, 0) == '/'
+ && ((sd_char_at (entire, 1) == '*'
+ && sd_char_at (entire, sd_length (entire) - 2) == '*')
+ || (sd_char_at (entire, 1) == '+'
+ && sd_char_at (entire, sd_length (entire) - 2) == '+'))
+ && sd_char_at (entire, sd_length (entire) - 1) == '/')
+ {
+ string_desc_t gist = sd_substring (entire, 2, sd_length (entire) - 2);
+ /* Split into lines.
+ Remove leading and trailing whitespace from each line. */
+ for (;;)
+ {
+ ptrdiff_t nl_index = sd_index (gist, '\n');
+ if (nl_index >= 0)
+ {
+ save_comment_line (sd_substring (gist, 0, nl_index));
+ gist = sd_substring (gist, nl_index + 1, sd_length (gist));
+ }
+ else
+ {
+ save_comment_line (gist);
+ break;
+ }
+ }
+ last_comment_line = ts_node_end_point (node).row + 1;
+ }
+ else
+ abort ();
+ }
+ else
+ last_non_comment_line = ts_node_line_number (node);
+}
+
+/* ---------------------------- String literals ---------------------------- */
+
+/* Determines whether NODE represents a string literal or the concatenation
+ of string literals (via the '+' operator). */
+static bool
+is_string_literal (TSNode node)
+{
+ start:
+ if (ts_node_symbol (node) == ts_symbol_string_literal)
+ {
+ string_desc_t node_contents =
+ sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+ (char *) contents + ts_node_start_byte (node));
+ #if DEBUG_D && 0
+ fprintf (stderr, "[%s]|%s|%.*s|\n", ts_node_type (node), ts_node_string (node), (int) sd_length (node_contents), sd_data (node_contents));
+ #if 0
+ uint32_t count = ts_node_child_count (node);
+ uint32_t i;
+ for (i = 0; i < count; i++)
+ {
+ TSNode subnode = ts_node_named_child (node, i);
+ string_desc_t subnode_contents =
+ sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+ (char *) contents + ts_node_start_byte (subnode));
+ fprintf (stderr, "%u -> [%s]|%s|%.*s|\n", i, ts_node_type (subnode), ts_node_string (subnode), (int) sd_length (subnode_contents), sd_data (subnode_contents));
+ uint32_t count2 = ts_node_child_count (subnode);
+ uint32_t j;
+ for (j = 0; j < count2; j++)
+ {
+ fprintf (stderr, "%u %u -> [%s]|%s|\n", i, j, ts_node_type (ts_node_child (subnode, j)), ts_node_string (ts_node_child (subnode, j)));
+ }
+ }
+ #endif
+ #endif
+ /* tree-sitter-d does not do a good job of dissecting the string literal
+ into its constituents. Therefore we have to look at the node's entire
+ contents and dissect ourselves. */
+ /* Interpolation expression sequences look like string literals but are
+ not, since they need a '.text' call to convert to string. */
+ if (sd_char_at (node_contents, 0) == 'i')
+ return false;
+ /* We only want string literals with 'char' elements, not 'wchar' or
+ 'dchar'. */
+ if (sd_char_at (node_contents, sd_length (node_contents) - 1) == 'w'
+ || sd_char_at (node_contents, sd_length (node_contents) - 1) == 'd')
+ return false;
+ return true;
+ }
+ if (ts_node_symbol (node) == ts_symbol_binary_expression
+ && ts_node_child_count (node) == 1)
+ {
+ TSNode subnode = ts_node_child (node, 0);
+ if (ts_node_symbol (subnode) == ts_symbol_add_expression
+ && ts_node_child_count (subnode) == 3
+ && ts_node_symbol (ts_node_child (subnode, 1)) == ts_symbol_tilde
+ /* Recurse into the left and right subnodes. */
+ && is_string_literal (ts_node_child (subnode, 2)))
+ {
+ /*return is_string_literal (ts_node_child (subnode, 0));*/
+ node = ts_node_child (subnode, 0);
+ goto start;
+ }
+ }
+ if (ts_node_symbol (node) == ts_symbol_expression
+ && ts_node_named_child_count (node) == 1)
+ {
+ TSNode subnode = ts_node_named_child (node, 0);
+ /* Recurse. */
+ /*return is_string_literal (subnode);*/
+ node = subnode;
+ goto start;
+ }
+ return false;
+}
+
+/* Prepends the string literal pieces from NODE to BUFFER. */
+static void
+string_literal_accumulate_pieces (TSNode node,
+ struct string_buffer_reversed *buffer)
+{
+ start:
+ if (ts_node_symbol (node) == ts_symbol_string_literal)
+ {
+ /* tree-sitter-d does not do a good job of dissecting the string literal
+ into its constituents. Therefore we have to look at the node's entire
+ contents and dissect ourselves. The only help we get is the list of
+ escape sequences in a double-quoted string literal:
+ (string_literal (quoted_string (escape_sequence) ... (escape_sequence)))
+ */
+ string_desc_t node_contents =
+ sd_new_addr (ts_node_end_byte (node) - ts_node_start_byte (node),
+ (char *) contents + ts_node_start_byte (node));
+ #if DEBUG_D && 0
+ fprintf (stderr, "[%s]|%s|%.*s|\n", ts_node_type (node), ts_node_string (node), (int) sd_length (node_contents), sd_data (node_contents));
+ #endif
+ /* Drop StringPostfix. */
+ if (sd_length (node_contents) >= 1
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == 'c')
+ node_contents = sd_substring (node_contents, 0, sd_length (node_contents) - 1);
+ /* Distinguish the various cases. */
+ if (sd_length (node_contents) >= 2
+ && sd_char_at (node_contents, 0) == '"'
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == '"')
+ {
+ /* A double-quoted string. */
+ if (ts_node_child_count (node) != 1)
+ abort ();
+ TSNode subnode = ts_node_child (node, 0);
+ if (ts_node_symbol (subnode) != ts_symbol_quoted_string)
+ abort ();
+ node_contents = sd_substring (node_contents, 1, sd_length (node_contents) - 1);
+ const char *ptr = sd_data (node_contents) + sd_length (node_contents);
+ /* Iterate through the nodes of type escape_sequence under the subnode. */
+ uint32_t count = ts_node_named_child_count (subnode);
+ uint32_t i;
+ for (i = count; i > 0; )
+ {
+ i--;
+ TSNode escnode = ts_node_named_child (subnode, i);
+ if (ts_node_symbol (escnode) == ts_symbol_escape_sequence
+ || ts_node_symbol (escnode) == ts_symbol_htmlentity)
+ {
+ const char *escape_start = contents + ts_node_start_byte (escnode);
+ const char *escape_end = contents + ts_node_end_byte (escnode);
+ if (escape_end < ptr)
+ sbr_xprepend_desc (buffer, sd_new_addr (ptr - escape_end, (char *) escape_end));
+
+ /* The escape sequence must start with a backslash. */
+ if (!(escape_end - escape_start >= 2 && escape_start[0] == '\\'))
+ abort ();
+ /* tree-sitter's grammar.js allows more escape sequences than the
+ specification. Give a warning for the invalid cases. */
+ bool invalid = false;
+ if (escape_end - escape_start == 2)
+ {
+ switch (escape_start[1])
+ {
+ case '\'':
+ case '"':
+ case '?':
+ case '\\':
+ sbr_xprepend1 (buffer, escape_start[1]);
+ break;
+ case '0': case '1': case '2': case '3':
+ case '4': case '5': case '6': case '7':
+ sbr_xprepend1 (buffer, escape_start[1] - '0');
+ break;
+ case 'a':
+ sbr_xprepend1 (buffer, 0x07);
+ break;
+ case 'b':
+ sbr_xprepend1 (buffer, 0x08);
+ break;
+ case 'f':
+ sbr_xprepend1 (buffer, 0x0C);
+ break;
+ case 'n':
+ sbr_xprepend1 (buffer, '\n');
+ break;
+ case 'r':
+ sbr_xprepend1 (buffer, '\r');
+ break;
+ case 't':
+ sbr_xprepend1 (buffer, '\t');
+ break;
+ case 'v':
+ sbr_xprepend1 (buffer, 0x0B);
+ break;
+ default:
+ invalid = true;
+ break;
+ }
+ }
+ else if (escape_start[1] >= '0' && escape_start[1] <= '7')
+ {
+ unsigned int value = 0;
+ /* Only up to 3 octal digits are accepted. */
+ if (escape_end - escape_start <= 1 + 3)
+ {
+ const char *p;
+ for (p = escape_start + 1; p < escape_end; p++)
+ {
+ /* No overflow is possible. */
+ char c = *p;
+ if (c >= '0' && c <= '7')
+ value = (value << 3) + (c - '0');
+ else
+ invalid = true;
+ }
+ if (value > 0xFF)
+ invalid = true;
+ }
+ if (!invalid)
+ sbr_xprepend1 (buffer, (unsigned char) value);
+ }
+ else if ((escape_start[1] == 'x' && escape_end - escape_start == 2 + 2)
+ || (escape_start[1] == 'u' && escape_end - escape_start == 2 + 4)
+ || (escape_start[1] == 'U' && escape_end - escape_start == 2 + 8))
+ {
+ unsigned int value = 0;
+ const char *p;
+ for (p = escape_start + 2; p < escape_end; p++)
+ {
+ /* No overflow is possible. */
+ char c = *p;
+ if (c >= '0' && c <= '9')
+ value = (value << 4) + (c - '0');
+ else if (c >= 'A' && c <= 'Z')
+ value = (value << 4) + (c - 'A' + 10);
+ else if (c >= 'a' && c <= 'z')
+ value = (value << 4) + (c - 'a' + 10);
+ else
+ invalid = true;
+ }
+ if (escape_start[1] == 'x')
+ {
+ if (!invalid)
+ sbr_xprepend1 (buffer, (unsigned char) value);
+ }
+ else if (value < 0x110000 && !(value >= 0xD800 && value < 0xE000))
+ {
+ uint8_t buf[6];
+ int n = u8_uctomb (buf, value, sizeof (buf));
+ if (!(n > 0))
+ abort ();
+ sbr_xprepend_desc (buffer, sd_new_addr (n, (char *) buf));
+ }
+ else
+ invalid = true;
+ }
+ else if (escape_start[1] == '&' && escape_end[-1] == ';')
+ {
+ /* A named character entity. */
+ string_desc_t entity =
+ sd_new_addr (escape_end - escape_start - 3, (char *) escape_start + 2);
+ const char *value = html5_lookup (entity);
+ if (value != NULL)
+ sbr_xprepend_c (buffer, value);
+ else
+ invalid = true;
+ }
+ else
+ invalid = true;
+ if (invalid)
+ {
+ size_t line_number = ts_node_line_number (escnode);
+ if_error (IF_SEVERITY_WARNING,
+ logical_file_name, line_number, (size_t)(-1), false,
+ _("invalid escape sequence in string"));
+ }
+
+ ptr = escape_start;
+ }
+ else
+ abort ();
+ }
+ sbr_xprepend_desc (buffer, sd_substring (node_contents, 0, ptr - sd_data (node_contents)));
+ }
+ else if (sd_length (node_contents) >= 3
+ && sd_char_at (node_contents, 0) == 'x'
+ && sd_char_at (node_contents, 1) == '"'
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == '"')
+ {
+ /* A hex string. */
+ if (ts_node_child_count (node) != 1)
+ abort ();
+ TSNode subnode = ts_node_child (node, 0);
+ if (ts_node_symbol (subnode) != ts_symbol_hex_string)
+ abort ();
+ node_contents = sd_substring (node_contents, 2, sd_length (node_contents) - 1);
+ int shift = 0;
+ int value = 0;
+ ptrdiff_t i;
+ for (i = sd_length (node_contents) - 1; i >= 0; i--)
+ {
+ char c = sd_char_at (node_contents, i);
+ if (c >= '0' && c <= '9')
+ {
+ value += (c - '0') << shift;
+ shift += 4;
+ }
+ else if (c >= 'A' && c <= 'F')
+ {
+ value += (c - 'A' + 10) << shift;
+ shift += 4;
+ }
+ else if (c >= 'a' && c <= 'f')
+ {
+ value += (c - 'a' + 10) << shift;
+ shift += 4;
+ }
+ if (shift == 8)
+ {
+ sbr_xprepend1 (buffer, value);
+ value = 0;
+ shift = 0;
+ }
+ }
+ /* If shift == 4 here, there was an odd number of hex digits. */
+ }
+ else
+ {
+ /* A raw string, delimited string, or token string. */
+ if (sd_char_at (node_contents, 0) == 'q')
+ {
+ if (sd_length (node_contents) >= 3
+ && sd_char_at (node_contents, 1) == '{'
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == '}')
+ /* A token string. */
+ node_contents = sd_substring (node_contents, 2, sd_length (node_contents) - 1);
+ else if (sd_length (node_contents) >= 3
+ && sd_char_at (node_contents, 1) == '"'
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == '"')
+ {
+ /* A delimited string. */
+ node_contents = sd_substring (node_contents, 2, sd_length (node_contents) - 1);
+ if (sd_length (node_contents) >= 2
+ && ((sd_char_at (node_contents, 0) == '('
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == ')')
+ || (sd_char_at (node_contents, 0) == '['
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == ']')
+ || (sd_char_at (node_contents, 0) == '{'
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == '}')
+ || (sd_char_at (node_contents, 0) == '<'
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == '>')
+ || (sd_char_at (node_contents, 0) == sd_char_at (node_contents, sd_length (node_contents) - 1)
+ && !(c_isalpha (sd_char_at (node_contents, 0)) || sd_char_at (node_contents, 0) == '_'))))
+ node_contents = sd_substring (node_contents, 1, sd_length (node_contents) - 1);
+ else
+ {
+ ptrdiff_t first_newline = sd_index (node_contents, '\n');
+ if (first_newline < 0)
+ abort ();
+ ptrdiff_t last_newline = sd_last_index (node_contents, '\n');
+ if (last_newline < 0)
+ abort ();
+ string_desc_t delimiter = sd_substring (node_contents, last_newline + 1, sd_length (node_contents));
+ size_t delimiter_length = sd_length (delimiter);
+ if (delimiter_length == 0)
+ abort ();
+ if (!((first_newline == delimiter_length
+ || (first_newline == delimiter_length + 1
+ && sd_char_at (node_contents, delimiter_length) == '\r'))
+ && sd_equals (sd_substring (node_contents, 0, delimiter_length), delimiter)))
+ abort ();
+ node_contents = sd_substring (node_contents, first_newline + 1, last_newline + 1);
+ }
+ }
+ else
+ abort ();
+ }
+ else if (sd_length (node_contents) >= 3
+ && sd_char_at (node_contents, 0) == 'r'
+ && sd_char_at (node_contents, 1) == '"'
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == '"')
+ /* A raw string. */
+ node_contents = sd_substring (node_contents, 2, sd_length (node_contents) - 1);
+ else if (sd_length (node_contents) >= 2
+ && sd_char_at (node_contents, 0) == '`'
+ && sd_char_at (node_contents, sd_length (node_contents) - 1) == '`')
+ /* A raw string. */
+ node_contents = sd_substring (node_contents, 1, sd_length (node_contents) - 1);
+ else
+ abort ();
+
+ sbr_xprepend_desc (buffer, node_contents);
+ }
+ }
+ else if (ts_node_symbol (node) == ts_symbol_binary_expression
+ && ts_node_child_count (node) == 1)
+ {
+ TSNode subnode = ts_node_child (node, 0);
+ if (ts_node_symbol (subnode) == ts_symbol_add_expression
+ && ts_node_child_count (subnode) == 3
+ && ts_node_symbol (ts_node_child (subnode, 1)) == ts_symbol_tilde)
+ {
+ /* Recurse into the left and right subnodes. */
+ string_literal_accumulate_pieces (ts_node_child (subnode, 2), buffer);
+ /*string_literal_accumulate_pieces (ts_node_child (subnode, 0), buffer);*/
+ node = ts_node_child (subnode, 0);
+ goto start;
+ }
+ else
+ abort ();
+ }
+ else if (ts_node_symbol (node) == ts_symbol_expression
+ && ts_node_named_child_count (node) == 1)
+ {
+ TSNode subnode = ts_node_named_child (node, 0);
+ /* Recurse. */
+ /*string_literal_accumulate_pieces (subnode, buffer);*/
+ node = subnode;
+ goto start;
+ }
+ else
+ abort ();
+}
+
+/* Combines the pieces of a string or template_string or concatenated
+ string literal.
+ Returns a freshly allocated, mostly UTF-8 encoded string. */
+static char *
+string_literal_value (TSNode node)
+{
+ struct string_buffer_reversed buffer;
+ sbr_init (&buffer);
+ string_literal_accumulate_pieces (node, &buffer);
+ return sbr_xdupfree_c (&buffer);
+}
+
+/* --------------------- Parsing and string extraction --------------------- */
+
+/* Context lookup table. */
+static flag_context_list_table_ty *flag_context_list_table;
+
+/* Maximum supported nesting depth. */
+#define MAX_NESTING_DEPTH 1000
+
+static int nesting_depth;
+
+/* The file is parsed into an abstract syntax tree. Scan the syntax tree,
+ looking for a keyword in identifier position of a call_expression or
+ macro_invocation, followed by followed by a string among the arguments.
+ When we see this pattern, we have something to remember.
+
+ Normal handling: Look for
+ keyword ( ... msgid ... )
+ Plural handling: Look for
+ keyword ( ... msgid ... msgid_plural ... )
+
+ We handle macro_invocation separately from call_expression, because in
+ a macro_invocation spaces are allowed between the identifier and the '!'
+ (i.e. 'println !' is as valid as 'println!'). Looking for 'println!'
+ would make the code more complicated.
+
+ We use recursion because the arguments before msgid or between msgid
+ and msgid_plural can contain subexpressions of the same form. */
+
+/* Forward declarations. */
+static void extract_from_node (TSNode node,
+ bool ignore,
+ bool callee_in_call_expression,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp);
+
+/* Extracts messages from the function call NODE consisting of
+ - CALLEE_NODE: a tree node of type 'identifier' or 'property_expression',
+ - ARGS_NODE: a tree node of type 'named_arguments'.
+ Extracted messages are added to MLP. */
+static void
+extract_from_function_call (TSNode node,
+ TSNode callee_node,
+ TSNode args_node,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp)
+{
+ uint32_t args_count = ts_node_child_count (args_node);
+
+ TSNode function_node;
+ if (ts_node_symbol (callee_node) == ts_symbol_identifier)
+ function_node = callee_node;
+ else if (ts_node_symbol (callee_node) == ts_symbol_property_expression)
+ function_node = ts_node_child (callee_node, ts_node_child_count (callee_node) - 1);
+ else
+ abort ();
+
+ flag_context_list_iterator_ty next_context_iter;
+
+ if (ts_node_symbol (function_node) == ts_symbol_identifier)
+ {
+ string_desc_t function_name =
+ sd_new_addr (ts_node_end_byte (function_node) - ts_node_start_byte (function_node),
+ (char *) contents + ts_node_start_byte (function_node));
+
+ /* Context iterator. */
+ next_context_iter =
+ flag_context_list_iterator (
+ flag_context_list_table_lookup (
+ flag_context_list_table,
+ sd_data (function_name), sd_length (function_name)));
+
+ void *keyword_value;
+ if (hash_find_entry (&function_keywords,
+ sd_data (function_name), sd_length (function_name),
+ &keyword_value)
+ == 0)
+ {
+ /* The callee has some information associated with it. */
+ const struct callshapes *next_shapes = keyword_value;
+
+ /* We have a function, named by a relevant identifier, with an argument
+ list. */
+
+ struct arglist_parser *argparser =
+ arglist_parser_alloc (mlp, next_shapes);
+
+ /* Current argument number. */
+ uint32_t arg = 0;
+
+ /* The first part of the 'property_expression' is treated as the first
+ argument. Cf. <https://dlang.org/spec/function.html#pseudo-member> */
+ if (ts_node_symbol (callee_node) == ts_symbol_property_expression)
+ {
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ bool already_extracted = false;
+ TSNode arg_expr_node = ts_node_child (callee_node, 0);
+ if (is_string_literal (arg_expr_node))
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = ts_node_line_number (arg_expr_node);
+
+ char *string = string_literal_value (arg_expr_node);
+
+ if (extract_all)
+ {
+ remember_a_message (mlp, NULL, string, true, false,
+ arg_region, &pos,
+ NULL, savable_comment, true);
+ already_extracted = true;
+ }
+ else
+ {
+ mixed_string_ty *mixed_string =
+ mixed_string_alloc_utf8 (string, lc_string,
+ pos.file_name, pos.line_number);
+ arglist_parser_remember (argparser, arg, mixed_string,
+ arg_region,
+ pos.file_name, pos.line_number,
+ savable_comment, true);
+ }
+ }
+
+ if (!already_extracted)
+ {
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_expr_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_expr_node,
+ false,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+ }
+
+ {
+ /* Handle the potential comments in the callee_node, between
+ arg_expr_node and function_node. */
+ uint32_t count = ts_node_child_count (callee_node);
+ uint32_t i;
+ for (i = 1; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (callee_node, i);
+ if (ts_node_eq (subnode, function_node))
+ break;
+ handle_comments (subnode);
+ }
+ }
+
+ unref_region (arg_region);
+ }
+
+ /* Handle the potential comments in node, between
+ callee_node and args_node. */
+ {
+ uint32_t count = ts_node_child_count (node);
+ uint32_t i;
+ for (i = 1; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (node, i);
+ if (ts_node_eq (subnode, args_node))
+ break;
+ handle_comments (subnode);
+ }
+ }
+
+ uint32_t i;
+ for (i = 0; i < args_count; i++)
+ {
+ TSNode arg_node = ts_node_child (args_node, i);
+ handle_comments (arg_node);
+ if (ts_node_is_named (arg_node)
+ && ts_node_symbol (arg_node) != ts_symbol_comment)
+ {
+ if (ts_node_symbol (arg_node) != ts_symbol_named_argument)
+ abort ();
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ bool already_extracted = false;
+ if (ts_node_child_count (arg_node) == 1)
+ {
+ TSNode arg_expr_node = ts_node_child (arg_node, 0);
+ if (is_string_literal (arg_expr_node))
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = ts_node_line_number (arg_expr_node);
+
+ char *string = string_literal_value (arg_expr_node);
+
+ if (extract_all)
+ {
+ remember_a_message (mlp, NULL, string, true, false,
+ arg_region, &pos,
+ NULL, savable_comment, true);
+ already_extracted = true;
+ }
+ else
+ {
+ mixed_string_ty *mixed_string =
+ mixed_string_alloc_utf8 (string, lc_string,
+ pos.file_name, pos.line_number);
+ arglist_parser_remember (argparser, arg, mixed_string,
+ arg_region,
+ pos.file_name, pos.line_number,
+ savable_comment, true);
+ }
+ }
+ }
+
+ if (!already_extracted)
+ {
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_node,
+ false,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+ }
+
+ unref_region (arg_region);
+ }
+ }
+ arglist_parser_done (argparser, arg);
+ return;
+ }
+ }
+ else
+ next_context_iter = null_context_list_iterator;
+
+ /* Recurse. */
+
+ /* Current argument number. */
+ MAYBE_UNUSED uint32_t arg = 0;
+
+ /* The first part of the 'property_expression' is treated as the first
+ argument. Cf. <https://dlang.org/spec/function.html#pseudo-member> */
+ if (ts_node_symbol (callee_node) == ts_symbol_property_expression)
+ {
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+ TSNode arg_expr_node = ts_node_child (callee_node, 0);
+
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_expr_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_expr_node,
+ false,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+
+ {
+ /* Handle the potential comments in the callee_node, between
+ arg_expr_node and function_node. */
+ uint32_t count = ts_node_child_count (callee_node);
+ uint32_t i;
+ for (i = 1; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (callee_node, i);
+ if (ts_node_eq (subnode, function_node))
+ break;
+ handle_comments (subnode);
+ }
+ }
+
+ unref_region (arg_region);
+ }
+
+ /* Handle the potential comments in node, between
+ callee_node and args_node. */
+ {
+ uint32_t count = ts_node_child_count (node);
+ uint32_t i;
+ for (i = 1; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (node, i);
+ if (ts_node_eq (subnode, args_node))
+ break;
+ handle_comments (subnode);
+ }
+ }
+
+ uint32_t i;
+ for (i = 0; i < args_count; i++)
+ {
+ TSNode arg_node = ts_node_child (args_node, i);
+ handle_comments (arg_node);
+ if (ts_node_is_named (arg_node)
+ && ts_node_symbol (arg_node) != ts_symbol_comment)
+ {
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_node,
+ false,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+
+ unref_region (arg_region);
+ }
+ }
+}
+
+/* Extracts messages from the function call consisting of
+ - CALLEE_NODE: a tree node of type 'property_expression'.
+ Extracted messages are added to MLP. */
+static void
+extract_from_function_call_without_args (TSNode callee_node,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp)
+{
+ TSNode function_node = ts_node_child (callee_node, ts_node_child_count (callee_node) - 1);
+
+ flag_context_list_iterator_ty next_context_iter;
+
+ if (ts_node_symbol (function_node) == ts_symbol_identifier)
+ {
+ string_desc_t function_name =
+ sd_new_addr (ts_node_end_byte (function_node) - ts_node_start_byte (function_node),
+ (char *) contents + ts_node_start_byte (function_node));
+
+ /* Context iterator. */
+ next_context_iter =
+ flag_context_list_iterator (
+ flag_context_list_table_lookup (
+ flag_context_list_table,
+ sd_data (function_name), sd_length (function_name)));
+
+ void *keyword_value;
+ if (hash_find_entry (&function_keywords,
+ sd_data (function_name), sd_length (function_name),
+ &keyword_value)
+ == 0)
+ {
+ /* The callee has some information associated with it. */
+ const struct callshapes *next_shapes = keyword_value;
+
+ /* We have a function, named by a relevant identifier, with an implicit
+ argument list. */
+
+ struct arglist_parser *argparser =
+ arglist_parser_alloc (mlp, next_shapes);
+
+ /* Current argument number. */
+ uint32_t arg = 0;
+
+ /* The first part of the 'property_expression' is treated as the first
+ argument. Cf. <https://dlang.org/spec/function.html#pseudo-member> */
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ bool already_extracted = false;
+ TSNode arg_expr_node = ts_node_child (callee_node, 0);
+ if (is_string_literal (arg_expr_node))
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = ts_node_line_number (arg_expr_node);
+
+ char *string = string_literal_value (arg_expr_node);
+
+ if (extract_all)
+ {
+ remember_a_message (mlp, NULL, string, true, false,
+ arg_region, &pos,
+ NULL, savable_comment, true);
+ already_extracted = true;
+ }
+ else
+ {
+ mixed_string_ty *mixed_string =
+ mixed_string_alloc_utf8 (string, lc_string,
+ pos.file_name, pos.line_number);
+ arglist_parser_remember (argparser, arg, mixed_string,
+ arg_region,
+ pos.file_name, pos.line_number,
+ savable_comment, true);
+ }
+ }
+
+ if (!already_extracted)
+ {
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_expr_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_expr_node,
+ false,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+ }
+
+ {
+ /* Handle the potential comments in the callee_node, between
+ arg_expr_node and function_node. */
+ uint32_t count = ts_node_child_count (callee_node);
+ uint32_t i;
+ for (i = 1; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (callee_node, i);
+ if (ts_node_eq (subnode, function_node))
+ break;
+ handle_comments (subnode);
+ }
+ }
+
+ unref_region (arg_region);
+
+ arglist_parser_done (argparser, arg);
+ return;
+ }
+ }
+ else
+ next_context_iter = null_context_list_iterator;
+
+ /* Recurse. */
+
+ /* Current argument number. */
+ MAYBE_UNUSED uint32_t arg = 0;
+
+ /* The first part of the 'property_expression' is treated as the first
+ argument. Cf. <https://dlang.org/spec/function.html#pseudo-member> */
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+ TSNode arg_expr_node = ts_node_child (callee_node, 0);
+
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_expr_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_expr_node,
+ false,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+
+ {
+ /* Handle the potential comments in the callee_node, between
+ arg_expr_node and function_node. */
+ uint32_t count = ts_node_child_count (callee_node);
+ uint32_t i;
+ for (i = 1; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (callee_node, i);
+ if (ts_node_eq (subnode, function_node))
+ break;
+ handle_comments (subnode);
+ }
+ }
+
+ unref_region (arg_region);
+}
+
+/* Extracts messages from the template instantation NODE consisting of
+ - IDENTIFIER_NODE: a tree node of type 'identifier',
+ - ARGS_NODE: a tree node of type 'template_arguments'.
+ Extracted messages are added to MLP. */
+static void
+extract_from_template_instantation (TSNode node,
+ TSNode identifier_node,
+ TSNode args_node,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp)
+{
+ uint32_t args_count = ts_node_child_count (args_node);
+
+ string_desc_t template_name =
+ sd_new_addr (ts_node_end_byte (identifier_node) - ts_node_start_byte (identifier_node),
+ (char *) contents + ts_node_start_byte (identifier_node));
+
+ /* Handle the potential comments in node, between
+ identifier_node and args_node. */
+ {
+ uint32_t count = ts_node_child_count (node);
+ uint32_t i;
+ for (i = 1; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (node, i);
+ if (ts_node_eq (subnode, args_node))
+ break;
+ handle_comments (subnode);
+ }
+ }
+
+ /* Context iterator. */
+ flag_context_list_iterator_ty next_context_iter =
+ flag_context_list_iterator (
+ flag_context_list_table_lookup (
+ flag_context_list_table,
+ sd_data (template_name), sd_length (template_name)));
+
+ void *keyword_value;
+ if (hash_find_entry (&template_keywords,
+ sd_data (template_name), sd_length (template_name),
+ &keyword_value)
+ == 0)
+ {
+ /* The identifier has some information associated with it. */
+ const struct callshapes *next_shapes = keyword_value;
+
+ /* We have a template instantiation, named by a relevant identifier, with
+ either a single argument or an argument list. */
+
+ struct arglist_parser *argparser =
+ arglist_parser_alloc (mlp, next_shapes);
+
+ /* Current argument number. */
+ uint32_t arg = 0;
+
+ uint32_t i;
+ for (i = 0; i < args_count; i++)
+ {
+ TSNode arg_node = ts_node_child (args_node, i);
+ handle_comments (arg_node);
+ if (ts_node_is_named (arg_node)
+ && ts_node_symbol (arg_node) != ts_symbol_comment)
+ {
+ if (ts_node_symbol (arg_node) == ts_symbol_template_argument)
+ {
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ bool already_extracted = false;
+ if (ts_node_child_count (arg_node) == 1)
+ {
+ TSNode arg_expr_node = ts_node_child (arg_node, 0);
+ if (is_string_literal (arg_expr_node))
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = ts_node_line_number (arg_expr_node);
+
+ char *string = string_literal_value (arg_expr_node);
+
+ if (extract_all)
+ {
+ remember_a_message (mlp, NULL, string, true, false,
+ arg_region, &pos,
+ NULL, savable_comment, true);
+ already_extracted = true;
+ }
+ else
+ {
+ mixed_string_ty *mixed_string =
+ mixed_string_alloc_utf8 (string, lc_string,
+ pos.file_name, pos.line_number);
+ arglist_parser_remember (argparser, arg, mixed_string,
+ arg_region,
+ pos.file_name, pos.line_number,
+ savable_comment, true);
+ }
+ }
+ }
+
+ if (!already_extracted)
+ {
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_node,
+ false,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+ }
+
+ unref_region (arg_region);
+ }
+ else /* Assume a single template argument. */
+ {
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ bool already_extracted = false;
+
+ if (is_string_literal (arg_node))
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = ts_node_line_number (arg_node);
+
+ char *string = string_literal_value (arg_node);
+
+ if (extract_all)
+ {
+ remember_a_message (mlp, NULL, string, true, false,
+ arg_region, &pos,
+ NULL, savable_comment, true);
+ already_extracted = true;
+ }
+ else
+ {
+ mixed_string_ty *mixed_string =
+ mixed_string_alloc_utf8 (string, lc_string,
+ pos.file_name, pos.line_number);
+ arglist_parser_remember (argparser, arg, mixed_string,
+ arg_region,
+ pos.file_name, pos.line_number,
+ savable_comment, true);
+ }
+ }
+
+ if (!already_extracted)
+ {
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_node,
+ false,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+ }
+
+ unref_region (arg_region);
+ }
+ }
+ }
+ arglist_parser_done (argparser, arg);
+ return;
+ }
+
+ /* Recurse. */
+
+ /* Current argument number. */
+ MAYBE_UNUSED uint32_t arg = 0;
+
+ uint32_t i;
+ for (i = 0; i < args_count; i++)
+ {
+ TSNode arg_node = ts_node_child (args_node, i);
+ handle_comments (arg_node);
+ if (ts_node_is_named (arg_node)
+ && ts_node_symbol (arg_node) != ts_symbol_comment)
+ {
+ arg++;
+ flag_region_ty *arg_region =
+ inheriting_region (outer_region,
+ flag_context_list_iterator_advance (
+ &next_context_iter));
+
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (arg_node), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (arg_node,
+ false,
+ false,
+ arg_region,
+ mlp);
+ nesting_depth--;
+
+ unref_region (arg_region);
+ }
+ }
+}
+
+/* Extracts messages in the syntax tree NODE.
+ Extracted messages are added to MLP. */
+static void
+extract_from_node (TSNode node,
+ bool ignore,
+ bool callee_in_call_expression,
+ flag_region_ty *outer_region,
+ message_list_ty *mlp)
+{
+ if (extract_all && !ignore && is_string_literal (node))
+ {
+ lex_pos_ty pos;
+ pos.file_name = logical_file_name;
+ pos.line_number = ts_node_line_number (node);
+
+ char *string = string_literal_value (node);
+
+ remember_a_message (mlp, NULL, string, true, false,
+ outer_region, &pos,
+ NULL, savable_comment, true);
+ }
+
+ if (ts_node_symbol (node) == ts_symbol_call_expression
+ && ts_node_named_child_count (node) >= 2)
+ {
+ TSNode callee_node = ts_node_named_child (node, 0);
+ if (ts_node_symbol (callee_node) == ts_symbol_identifier
+ || ts_node_symbol (callee_node) == ts_symbol_property_expression)
+ {
+ uint32_t ncount = ts_node_named_child_count (node);
+ uint32_t a;
+ for (a = 1; a < ncount; a++)
+ if (ts_node_symbol (ts_node_named_child (node, a)) == ts_symbol_named_arguments)
+ break;
+ if (a < ncount)
+ {
+ TSNode args_node = ts_node_named_child (node, a);
+ if (ts_node_symbol (args_node) != ts_symbol_named_arguments)
+ abort ();
+ extract_from_function_call (node, callee_node, args_node,
+ outer_region,
+ mlp);
+ return;
+ }
+ }
+ }
+
+ if (!callee_in_call_expression
+ && ts_node_symbol (node) == ts_symbol_property_expression)
+ {
+ /* A 'property_expression' that is not in the position of the callee in a
+ call_expression is treated like a call_expression with 0 arguments. */
+ extract_from_function_call_without_args (node,
+ outer_region,
+ mlp);
+ return;
+ }
+
+ if (ts_node_symbol (node) == ts_symbol_template_instance
+ && ts_node_named_child_count (node) >= 2)
+ {
+ TSNode identifier_node = ts_node_named_child (node, 0);
+ if (ts_node_symbol (identifier_node) == ts_symbol_identifier)
+ {
+ uint32_t ncount = ts_node_named_child_count (node);
+ uint32_t a;
+ for (a = 1; a < ncount; a++)
+ if (ts_node_symbol (ts_node_named_child (node, a)) == ts_symbol_template_arguments)
+ break;
+ if (a < ncount)
+ {
+ TSNode args_node = ts_node_named_child (node, a);
+ if (ts_node_symbol (args_node) != ts_symbol_template_arguments)
+ abort ();
+ extract_from_template_instantation (node,
+ identifier_node, args_node,
+ outer_region,
+ mlp);
+ return;
+ }
+ }
+ }
+
+ #if DEBUG_D && 0
+ if (ts_node_symbol (node) == ts_symbol_call_expression)
+ {
+ TSNode subnode = ts_node_named_child (node, 0);
+ fprintf (stderr, "-> %s\n", ts_node_string (subnode));
+ if (ts_node_symbol (subnode) == ts_symbol_identifier)
+ {
+ string_desc_t subnode_string =
+ sd_new_addr (ts_node_end_byte (subnode) - ts_node_start_byte (subnode),
+ (char *) contents + ts_node_start_byte (subnode));
+ if (sd_equals (subnode_string, sd_from_c ("gettext")))
+ {
+ TSNode argsnode = ts_node_named_child (node, 1);
+ fprintf (stderr, "gettext arguments: %s\n", ts_node_string (argsnode));
+ fprintf (stderr, "gettext children:\n");
+ uint32_t count = ts_node_named_child_count (node);
+ uint32_t i;
+ for (i = 0; i < count; i++)
+ fprintf (stderr, "%u -> %s\n", i, ts_node_string (ts_node_named_child (node, i)));
+ }
+ }
+ }
+ #endif
+
+ /* Recurse. */
+ if (ts_node_symbol (node) != ts_symbol_comment
+ /* Ignore the code in unit tests. Translators are not supposed to
+ localize unit tests, only production code. */
+ && ts_node_symbol (node) != ts_symbol_unittest_declaration)
+ {
+ ignore = ignore || is_string_literal (node);
+ uint32_t count = ts_node_child_count (node);
+ uint32_t i;
+ for (i = 0; i < count; i++)
+ {
+ TSNode subnode = ts_node_child (node, i);
+ handle_comments (subnode);
+ if (++nesting_depth > MAX_NESTING_DEPTH)
+ if_error (IF_SEVERITY_FATAL_ERROR,
+ logical_file_name, ts_node_line_number (subnode), (size_t)(-1), false,
+ _("too many open parentheses, brackets, or braces"));
+ extract_from_node (subnode,
+ ignore,
+ i == 0 && ts_node_symbol (node) == ts_symbol_call_expression,
+ outer_region,
+ mlp);
+ nesting_depth--;
+ }
+ }
+}
+
+void
+extract_d (FILE *f,
+ const char *real_filename, const char *logical_filename,
+ flag_context_list_table_ty *flag_table,
+ msgdomain_list_ty *mdlp)
+{
+ message_list_ty *mlp = mdlp->item[0]->messages;
+
+ logical_file_name = xstrdup (logical_filename);
+
+ last_comment_line = -1;
+ last_non_comment_line = -1;
+
+ flag_context_list_table = flag_table;
+ nesting_depth = 0;
+
+ init_keywords ();
+
+ if (ts_language == NULL)
+ {
+ ts_language = tree_sitter_d ();
+ ts_symbol_comment = ts_language_symbol ("comment", true);
+ ts_symbol_string_literal = ts_language_symbol ("string_literal", true);
+ ts_symbol_quoted_string = ts_language_symbol ("quoted_string", true);
+ ts_symbol_escape_sequence = ts_language_symbol ("escape_sequence", true);
+ ts_symbol_htmlentity = ts_language_symbol ("htmlentity", true);
+ ts_symbol_raw_string = ts_language_symbol ("raw_string", true);
+ ts_symbol_hex_string = ts_language_symbol ("hex_string", true);
+ ts_symbol_binary_expression = ts_language_symbol ("binary_expression", true);
+ ts_symbol_add_expression = ts_language_symbol ("add_expression", true);
+ ts_symbol_expression = ts_language_symbol ("expression", true);
+ ts_symbol_identifier = ts_language_symbol ("identifier", true);
+ ts_symbol_property_expression = ts_language_symbol ("property_expression", true);
+ ts_symbol_call_expression = ts_language_symbol ("call_expression", true);
+ ts_symbol_named_arguments = ts_language_symbol ("named_arguments", true);
+ ts_symbol_named_argument = ts_language_symbol ("named_argument", true);
+ ts_symbol_template_instance = ts_language_symbol ("template_instance", true);
+ ts_symbol_template_arguments = ts_language_symbol ("template_arguments", true);
+ ts_symbol_template_argument = ts_language_symbol ("template_argument", true);
+ ts_symbol_unittest_declaration = ts_language_symbol ("unittest_declaration", true);
+ ts_symbol_tilde = ts_language_symbol ("~", false);
+ }
+
+ /* Read the file into memory. */
+ char *contents_data;
+ size_t contents_length;
+ contents_data = read_file (real_filename, 0, &contents_length);
+ if (contents_data == NULL)
+ error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
+ real_filename);
+
+ /* tree-sitter works only on files whose size fits in an uint32_t. */
+ if (contents_length > 0xFFFFFFFFUL)
+ error (EXIT_FAILURE, 0, _("file \"%s\" is unsupported because too large"),
+ real_filename);
+
+ /* D source files are UTF-8 or UTF-16 or UTF-32 encoded.
+ See <https://dlang.org/spec/lex.html#source_text>.
+ But tree-sitter supports only the UTF-8 case, and we want the 'contents'
+ variable above to be in an ASCII-compatible encoding as well. */
+ if (u8_check ((uint8_t *) contents_data, contents_length) != NULL)
+ {
+ /* The file is not UTF-8 encoded.
+ Note: contents_data is malloc()ed and therefore suitably aligned. */
+ /* Test whether it is UTF-32 encoded.
+ The disambiguation is automatic, because the file is supposed to
+ contain at least one U+000A, and U+0A000000 is invalid. */
+ if ((contents_length % 4) == 0)
+ {
+ int round;
+ for (round = 0; round < 2; round++)
+ {
+ if (u32_check ((uint32_t *) contents_data, contents_length / 4) == NULL)
+ {
+ /* Convert from UTF-32 to UTF-8. */
+ size_t u8_contents_length;
+ uint8_t *u8_contents_data =
+ u32_to_u8 ((uint32_t *) contents_data, contents_length / 4,
+ NULL, &u8_contents_length);
+ if (u8_contents_data != NULL)
+ {
+ free (contents_data);
+ contents_length = u8_contents_length;
+ contents_data = (char *) u8_contents_data;
+ goto converted;
+ }
+ }
+ for (size_t i = 0; i < contents_length / 4; i++)
+ ((uint32_t *) contents_data)[i] = bswap_32 (((uint32_t *) contents_data)[i]);
+ }
+ }
+ /* Test whether it is UTF-16 encoded.
+ Disambiguate between UTF-16BE and UTF-16LE 1. by looking at the BOM, if present,
+ 2. by looking at the number of characters U+000A vs. U+0A00 (a heuristic). */
+ if ((contents_length % 2) == 0)
+ {
+ bool swap;
+ if (((uint16_t *) contents_data)[0] == 0xFEFF)
+ swap = false;
+ else if (((uint16_t *) contents_data)[0] == 0xFFFE)
+ swap = true;
+ else
+ {
+ size_t count_000A = 0;
+ size_t count_0A00 = 0;
+ for (size_t i = 0; i < contents_length / 2; i++)
+ {
+ uint16_t uc = ((uint16_t *) contents_data)[i];
+ count_000A += (uc == 0x000A);
+ count_0A00 += (uc == 0x0A00);
+ }
+ swap = (count_0A00 > count_000A);
+ }
+ if (swap)
+ {
+ for (size_t i = 0; i < contents_length / 2; i++)
+ ((uint16_t *) contents_data)[i] = bswap_16 (((uint16_t *) contents_data)[i]);
+ }
+ if (u16_check ((uint16_t *) contents_data, contents_length / 2) == NULL)
+ {
+ /* Convert from UTF-16 to UTF-8. */
+ size_t u8_contents_length;
+ uint8_t *u8_contents_data =
+ u16_to_u8 ((uint16_t *) contents_data, contents_length / 2,
+ NULL, &u8_contents_length);
+ if (u8_contents_data != NULL)
+ {
+ free (contents_data);
+ contents_length = u8_contents_length;
+ contents_data = (char *) u8_contents_data;
+ goto converted;
+ }
+ }
+ }
+ error (EXIT_FAILURE, 0,
+ _("file \"%s\" is unsupported because not UTF-8 or UTF-16 or UTF-32 encoded"),
+ real_filename);
+ }
+ converted:
+ if (u8_check ((uint8_t *) contents_data, contents_length) != NULL)
+ abort ();
+ xgettext_current_source_encoding = po_charset_utf8;
+
+ /* Create a parser. */
+ TSParser *parser = ts_parser_new ();
+
+ /* Set the parser's language. */
+ ts_parser_set_language (parser, ts_language);
+
+ /* Parse the file, producing a syntax tree. */
+ TSTree *tree = ts_parser_parse_string (parser, NULL, contents_data, contents_length);
+
+ #if DEBUG_D
+ /* For debugging: Print the tree. */
+ {
+ char *tree_as_string = ts_node_string (ts_tree_root_node (tree));
+ fprintf (stderr, "Syntax tree: %s\n", tree_as_string);
+ free (tree_as_string);
+ }
+ #endif
+
+ contents = contents_data;
+
+ extract_from_node (ts_tree_root_node (tree),
+ false,
+ false,
+ null_context_region (),
+ mlp);
+
+ ts_tree_delete (tree);
+ ts_parser_delete (parser);
+ free (contents_data);
+
+ logical_file_name = NULL;
+}