its: Support DocBook XML, part 2.

author Bruno Haible <bruno@clisp.org>

Sun, 13 Oct 2024 10:49:40 +0000 (12:49 +0200)

committer Bruno Haible <bruno@clisp.org>

Sun, 13 Oct 2024 10:49:40 +0000 (12:49 +0200)
author Bruno Haible <bruno@clisp.org>
Sun, 13 Oct 2024 10:49:40 +0000 (12:49 +0200)
committer Bruno Haible <bruno@clisp.org>
Sun, 13 Oct 2024 10:49:40 +0000 (12:49 +0200)
diff --git a/NEWS b/NEWS

index dc19a852ec704782d333a4838644713220b5e12f..2689fce005d9ccd609d5a0b2aac9ee75c40f9a4d 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,7 @@ Version 0.23 - October 2024
        to W3C standards.
      o 'msgfmt --xml' accept an option --replace-text, that causes the output
        to be a mono-lingual XML file instead of a multi-lingual XML file.
+    o xgettext and 'msgfmt --xml' now supports DocBook XML files.
    - Python:
      o xgettext now assumes source code for Python 3 rather than Python 2.
        This affects the interpretation of escape sequences in string literals.
diff --git a/autogen.sh b/autogen.sh

index cdcb53a909adc124d2f1343b74c387347b69b2d9..1641d74ef2dc6771213435f080bf149e4781cdb5 100755 (executable)
--- a/autogen.sh
+++ b/autogen.sh
@@ -282,6 +282,7 @@ if ! $skip_gnulib; then
      xstriconv
      xstriconveh
      xstring-buffer
+    xstring-desc
      xvasprintf
    '
    # Common dependencies of GNULIB_MODULES_TOOLS_FOR_SRC and GNULIB_MODULES_TOOLS_FOR_LIBGREP.
diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi

index 5de6bb7dd71c20311df508a767106db8ef19df58..c62071bd9f9a5e97bf88808af472a7e76dbd018f 100644 (file)
--- a/gettext-tools/doc/gettext.texi
+++ b/gettext-tools/doc/gettext.texi
@@ -10812,14 +10812,36 @@ that selects the nodes to which this rule applies.
  
  @item
  A required @code{escape} attribute with the value @code{yes} or @code{no}.
+
+@item
+An optional @code{unescape-if} attribute with the value
+@code{xml}, @code{xhtml}, @code{html}, or @code{no}.
  @end itemize
  
  @noindent
-The default value, @code{no}, should be good for most XML file types.
+The default values, @code{escape="no"} and @code{unescape-if="no"},
+should be good for most XML file types.
  A rule with @code{escape="no"},
  that was necessary with GNU gettext versions before 0.23,
  is now redundant.
  
+The @code{unescape-if} attribute is useful for XML file types
+which present messages with embedded XML elements to the translator.
+Such file types are for example DocBook or XHTML.
+If @code{unescape-if="xml"} is specified and the translation
+of a message looks like valid XML, the usual escaping of @code{<},
+@code{>}, and character references is omitted.
+The resulting XML document then is likely what the translator intended.
+However, if the translator did not merely copy the XML markup from the
+message to the translation, but added or removed markup,
+the resulting XML document may be invalid.
+It is therefore useful if, after invoking @code{msgfmt}, you check
+the resulting XML document against the appropriate XML schema or DTD.
+
+Similarly, if @code{unescape-if="xhtml"} is specified and the translation
+looks like valid XHTML, the usual escaping is omitted.
+And likewise for @code{unescape-if="html"}.
+
  @end table
  
  All those extended data categories can only be expressed with global
diff --git a/gettext-tools/its/docbook4.its b/gettext-tools/its/docbook4.its

index bdc3e9557e6d69c977ae102f794aa16ca171086d..1c39ce21e7a18fa4951b11078845eabb88225f0c 100644 (file)
--- a/gettext-tools/its/docbook4.its
+++ b/gettext-tools/its/docbook4.its
@@ -1,6 +1,7 @@
  <?xml version="1.0"?>
  <!--
    Copyright (C) 2010-2018 Shaun McCance
+  Copyright (C) 2024 Free Software Foundation, Inc.
    This file was written by Shaun McCance <shaunm@gnome.org>, 2010-2018.
  
    This program is free software: you can redistribute it and/or modify
@@ -19,6 +20,7 @@
  <its:rules
      xmlns:its="http://www.w3.org/2005/11/its"
      xmlns:itst="http://itstool.org/extensions/"
+    xmlns:gt="https://www.gnu.org/s/gettext/ns/its/extensions/1.0"
      version="2.0">
  
    <itst:match selector="/book"/>
@@ -230,4 +232,5 @@
  
    <!-- Some hacks -->
    <its:translateRule translate="no" selector="//releaseinfo[@role = 'CVS' and normalize-space(.) = '$Id$']"/>
+  <gt:escapeRule selector="//*" escape="no" unescape-if="xml"/>
  </its:rules>
diff --git a/gettext-tools/its/docbook5.its b/gettext-tools/its/docbook5.its

index 3b2568ea3d40a5686cec2d2cdf98f00bdbd37087..2d448928c3b5d5914bf767d15767f35c538cd242 100644 (file)
--- a/gettext-tools/its/docbook5.its
+++ b/gettext-tools/its/docbook5.its
@@ -1,6 +1,7 @@
  <?xml version="1.0"?>
  <!--
    Copyright (C) 2010-2018 Shaun McCance
+  Copyright (C) 2024 Free Software Foundation, Inc.
    This file was written by Shaun McCance <shaunm@gnome.org>, 2010-2018.
  
    This program is free software: you can redistribute it and/or modify
@@ -20,6 +21,7 @@
      xmlns:its="http://www.w3.org/2005/11/its"
      xmlns:itst="http://itstool.org/extensions/"
      xmlns:db="http://docbook.org/ns/docbook"
+    xmlns:gt="https://www.gnu.org/s/gettext/ns/its/extensions/1.0"
      version="2.0">
  
    <itst:match selector="/db:*"/>
@@ -218,4 +220,5 @@
  
    <!-- Some hacks -->
    <its:translateRule translate="no" selector="//db:releaseinfo[@role = 'CVS' and normalize-space(.) = '$Id$']"/>
+  <gt:escapeRule selector="//db:*" escape="no" unescape-if="xml"/>
  </its:rules>
diff --git a/gettext-tools/src/its-extensions.xsd b/gettext-tools/src/its-extensions.xsd

index 4cb19e552a4f6c80c67c43582d0e8315b9eac601..97cc703f3e6fbcf04fa6717de9cbf73cf97b04fc 100644 (file)
--- a/gettext-tools/src/its-extensions.xsd
+++ b/gettext-tools/src/its-extensions.xsd
@@ -49,8 +49,8 @@ Written by Bruno Haible &lt;bruno@clisp.org&gt;, 2024.
      <attribute name="textPointer" type="string" use="optional"></attribute>
    </complexType>
  
-  <!-- If no <gt:escapeRule> is present, the default 'escape' property
-       is "no".  -->
+  <!-- If no <gt:escapeRule> is present, the default 'escape' and 'unescape-if'
+       properties are "no".  -->
    <complexType name="EscapeRuleType">
      <attribute name="selector" type="string" use="required"></attribute>
      <attribute name="escape" use="required">
@@ -61,5 +61,15 @@ Written by Bruno Haible &lt;bruno@clisp.org&gt;, 2024.
          </restriction>
        </simpleType>
      </attribute>
+    <attribute name="unescape-if" use="optional" default="no">
+      <simpleType>
+        <restriction base="string">
+          <enumeration value="xml"></enumeration>
+          <enumeration value="xhtml"></enumeration>
+          <enumeration value="html"></enumeration>
+          <enumeration value="no"></enumeration>
+        </restriction>
+      </simpleType>
+    </attribute>
    </complexType>
  </schema>
diff --git a/gettext-tools/src/its.c b/gettext-tools/src/its.c

index 7b676542d119c50a4c3390a803a0cb5482b5677f..8ee7ab40f3384fe07174deffd0bda4164f3ddcd2 100644 (file)
--- a/gettext-tools/src/its.c
+++ b/gettext-tools/src/its.c
@@ -41,11 +41,17 @@
  #include "xalloc.h"
  #include "xvasprintf.h"
  #include "string-buffer.h"
+#include "xstring-desc.h"
+#include "c-ctype.h"
+#include "unistr.h"
  #include "bcp47.h"
  #include "gettext.h"
  
  #define _(str) gettext (str)
  
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+
+
  /* The Internationalization Tag Set (ITS) 2.0 standard is available at:
     https://www.w3.org/TR/its20/
  
@@ -1299,6 +1305,13 @@ its_extension_escape_rule_constructor (struct its_rule_ty *rule, xmlNode *node)
    prop = _its_get_attribute (node, "escape", NULL);
    its_value_list_append (&rule->values, "escape", prop);
    free (prop);
+
+  if (xmlHasProp (node, BAD_CAST "unescape-if"))
+    {
+      prop = _its_get_attribute (node, "unescape-if", NULL);
+      its_value_list_append (&rule->values, "unescape-if", prop);
+      free (prop);
+    }
  }
  
  static struct its_value_list_ty *
@@ -1309,7 +1322,7 @@ its_extension_escape_rule_eval (struct its_rule_ty *rule,
    /* Evaluation rules:
       - Local usage: Yes
       - Global, rule-based selection: Yes
-     - Default values: escape="no" (handled in the caller)
+     - Default values: escape="no" unescape-if="no" (handled in the caller)
       - Inheritance for element nodes: Textual content of element,
         including content of child elements, but excluding attributes.  */
    struct its_value_list_ty *result;
@@ -1337,21 +1350,48 @@ its_extension_escape_rule_eval (struct its_rule_ty *rule,
          const char *value;
  
          /* A local attribute overrides the global rule.  */
-        if (xmlHasNsProp (node, BAD_CAST "escape", BAD_CAST GT_NS))
+        if (xmlHasNsProp (node, BAD_CAST "escape", BAD_CAST GT_NS)
+            || xmlHasNsProp (node, BAD_CAST "unescape-if", BAD_CAST GT_NS))
            {
-            char *prop;
+            if (xmlHasNsProp (node, BAD_CAST "escape", BAD_CAST GT_NS))
+              {
+                char *prop = _its_get_attribute (node, "escape", GT_NS);
+                if (strcmp (prop, "yes") == 0 || strcmp (prop, "no") == 0)
+                  {
+                    its_value_list_append (result, "escape", prop);
+                    if (strcmp (prop, "no") != 0)
+                      {
+                        free (prop);
+                        return result;
+                      }
+                  }
+                free (prop);
+              }
  
-            prop = _its_get_attribute (node, "escape", GT_NS);
-            if (strcmp (prop, "yes") == 0 || strcmp (prop, "no") == 0)
+            if (xmlHasNsProp (node, BAD_CAST "unescape-if", BAD_CAST GT_NS))
                {
-                its_value_list_append (result, "escape", prop);
+                char *prop = _its_get_attribute (node, "unescape-if", GT_NS);
+                if (strcmp (prop, "xml") == 0
+                    || strcmp (prop, "xhtml") == 0
+                    || strcmp (prop, "html") == 0
+                    || strcmp (prop, "no") == 0)
+                  {
+                    its_value_list_append (result, "unescape-if", prop);
+                    if (strcmp (prop, "no") != 0)
+                      {
+                        free (prop);
+                        return result;
+                      }
+                  }
                  free (prop);
-                return result;
                }
-            free (prop);
            }
  
          /* Check value for the current node.  */
+        value = its_pool_get_value_for_node (pool, node, "unescape-if");
+        if (value != NULL)
+          its_value_list_set_value (result, "unescape-if", value);
+
          value = its_pool_get_value_for_node (pool, node, "escape");
          if (value != NULL)
            {
@@ -2071,9 +2111,11 @@ _its_copy_node_with_attributes (xmlNode *node)
    return copy;
  }
  
-/* Returns true if S starts with a character reference.  */
+/* Returns true if S starts with a character reference.
+   If so, and if UCS_P is non-NULL, it returns the Unicode code point
+   in *UCS_P.  */
  static bool
-starts_with_character_reference (const char *s)
+starts_with_character_reference (const char *s, unsigned int *ucs_p)
  {
    /* <https://www.w3.org/TR/xml/#NT-CharRef> defines
       CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'  */
@@ -2085,10 +2127,26 @@ starts_with_character_reference (const char *s)
            s++;
            if (*s >= '0' && *s <= '9')
              {
+              bool overflow = false;
+              unsigned int value = 0;
                do
-                s++;
+                {
+                  value = 10 * value + (*s - '0');
+                  if (value >= 0x110000)
+                    overflow = true;
+                  s++;
+                }
                while (*s >= '0' && *s <= '9');
-              return *s == ';';
+              if (*s == ';')
+                {
+                  if (ucs_p != NULL)
+                    *ucs_p = (overflow || (value >= 0xD800 && value <= 0xDFFF)
+                              ? 0xFFFD
+                              : value);
+                  return true;
+                }
+              else
+                return false;
              }
            if (*s == 'x')
              {
@@ -2097,12 +2155,32 @@ starts_with_character_reference (const char *s)
                    || (*s >= 'A' && *s <= 'F')
                    || (*s >= 'a' && *s <= 'f'))
                  {
+                  bool overflow = false;
+                  unsigned int value = 0;
                    do
-                    s++;
+                    {
+                      value = 16 * value
+                              + (*s >= '0' && *s <= '9' ? *s - '0' :
+                                 *s >= 'A' && *s <= 'F' ? *s - 'A' + 10 :
+                                 *s >= 'a' && *s <= 'f' ? *s - 'a' + 10 :
+                                 0);
+                      if (value >= 0x110000)
+                        overflow = true;
+                      s++;
+                    }
                    while ((*s >= '0' && *s <= '9')
                           || (*s >= 'A' && *s <= 'F')
                           || (*s >= 'a' && *s <= 'f'));
-                  return *s == ';';
+                  if (*s == ';')
+                    {
+                      if (ucs_p != NULL)
+                        *ucs_p = (overflow || (value >= 0xD800 && value <= 0xDFFF)
+                                  ? 0xFFFD
+                                  : value);
+                      return true;
+                    }
+                  else
+                    return false;
                  }
              }
          }
@@ -2119,7 +2197,7 @@ _its_encode_special_chars_for_merge (const char *content)
  
    for (str = content; *str != '\0'; str++)
      {
-      if (*str == '&' && starts_with_character_reference (str))
+      if (*str == '&' && starts_with_character_reference (str, NULL))
          amount += sizeof ("&amp;");
        else if (*str == '<')
          amount += sizeof ("&lt;");
@@ -2134,7 +2212,7 @@ _its_encode_special_chars_for_merge (const char *content)
    p = result;
    for (str = content; *str != '\0'; str++)
      {
-      if (*str == '&' && starts_with_character_reference (str))
+      if (*str == '&' && starts_with_character_reference (str, NULL))
          p = stpcpy (p, "&amp;");
        else if (*str == '<')
          p = stpcpy (p, "&lt;");
@@ -2147,6 +2225,610 @@ _its_encode_special_chars_for_merge (const char *content)
    return result;
  }
  
+/* Attempts to set the document's encoding to UTF-8.
+   Returns true if successful, or false if it failed.  */
+static bool
+set_doc_encoding_utf8 (xmlDoc *doc)
+{
+  if (doc->encoding == NULL)
+    {
+      doc->encoding = BAD_CAST xstrdup ("UTF-8");
+      return true;
+    }
+  string_desc_t enc = string_desc_from_c ((char *) doc->encoding);
+  if (string_desc_c_casecmp (enc, string_desc_from_c ("UTF-8")) == 0
+      || string_desc_c_casecmp (enc, string_desc_from_c ("UTF8")) == 0)
+    return true;
+  /* The document's encoding is not UTF-8.  Conversion would be expensive.  */
+  return false;
+}
+
+/* Parses CONTENTS as a piece of simple well-formed generalized XML
+   ("simple" meaning without comments, CDATA, and other gobbledygook),
+   with markup being limited to ASCII tags only.
+   IGNORE_CASE means to ignore the case of tags (like in HTML).
+   VALID_ELEMENT is a test whether to accept a given element name,
+   or NULL to accept any element name.
+   NO_END_ELEMENT is a test whether a given element name is one that is an
+   empty element without needing an end tag (like e.g. <br> in HTML), or NULL
+   for none.
+   ADD_TO_NODE is the node (of type XML_ELEMENT_NODE) to which to add the
+   contents in form of XML_TEXT_NODE and XML_ELEMENT_NODE nodes, or NULL
+   for parsing without constructing the tree.
+   Returns true if the parsing succeeded.
+   Returns false with partially allocated children nodes (under ADD_TO_NODE,
+   to be freed by the caller) if the parsing failed.  */
+static bool
+_its_is_valid_simple_gen_xml (const char *contents,
+                              bool ignore_case,
+                              bool (*valid_element) (string_desc_t tag),
+                              bool (*no_end_element) (string_desc_t tag),
+                              xmlNode *add_to_node)
+{
+  /* Specification:
+     https://www.w3.org/TR/xml/  */
+
+  xmlNode *parent_node = add_to_node;
+
+  /* Stack of open elements.  */
+  string_desc_t open_elements[100];
+  size_t open_elements_count = 0;
+  const size_t open_elements_max = SIZEOF (open_elements);
+
+  const char *p = contents;
+  const char *curr_text_segment_start = p;
+
+  for (;;)
+    {
+      char c;
+
+      c = *p;
+      if (c == '\0')
+        {
+          if (open_elements_count > 0)
+            return false;
+          break;
+        }
+      if (c == '<')
+        {
+          if (add_to_node != NULL && curr_text_segment_start < p)
+            {
+              xmlNode *text_node = xmlNewDocTextLen (add_to_node->doc, NULL, 0);
+              xmlNodeSetContentLen (text_node,
+                                    BAD_CAST curr_text_segment_start,
+                                    p - curr_text_segment_start);
+              xmlAddChild (parent_node, text_node);
+            }
+
+          bool slash_before_tag = false;
+          bool slash_after_tag = false;
+
+          c = *++p;
+          if (c == '\0')
+            return false;
+          if (c == '/')
+            {
+              slash_before_tag = true;
+              c = *++p;
+              if (c == '\0')
+                return false;
+            }
+          /* Parse a name.
+             <https://www.w3.org/TR/xml/#NT-Name>  */
+          if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+                || c == '_' || c == ':'))
+            return false;
+          const char *name_start = p;
+          do
+            {
+              c = *++p;
+              if (c == '\0')
+                return false;
+            }
+          while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+                 || c == '_' || c == ':'
+                 || (c >= '0' && c <= '9') || c == '-' || c == '.');
+          const char *name_end = p;
+          xmlNode *current_node = NULL;
+          if (add_to_node != NULL && !slash_before_tag)
+            {
+              string_desc_t name =
+                string_desc_new_addr (name_end - name_start,
+                                      (char *) name_start);
+              char *name_c = xstring_desc_c (name);
+              if (ignore_case)
+                {
+                  /* Convert the name to lower case.  */
+                  char *np;
+                  for (np = name_c; *np != '\0'; np++)
+                    *np = c_tolower (*np);
+                }
+              current_node =
+                xmlNewDocNodeEatName (add_to_node->doc, NULL, BAD_CAST name_c,
+                                      NULL);
+              xmlAddChild (parent_node, current_node);
+            }
+          /* Skip over whitespace.
+             <https://www.w3.org/TR/xml/#sec-common-syn>  */
+          while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
+            {
+              c = *++p;
+              if (c == '\0')
+                return false;
+            }
+          if (!slash_before_tag)
+            {
+              /* Parse a sequence of attributes.
+                 <https://www.w3.org/TR/xml/#NT-Attribute>  */
+              for (;;)
+                {
+                  if (!((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+                        || c == '_' || c == ':'))
+                    break;
+                  const char *attr_name_start = p;
+                  do
+                    {
+                      c = *++p;
+                      if (c == '\0')
+                        return false;
+                    }
+                  while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+                         || c == '_' || c == ':'
+                         || (c >= '0' && c <= '9') || c == '-' || c == '.');
+                  const char *attr_name_end = p;
+                  /* Skip over whitespace before '='.  */
+                  while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
+                    {
+                      c = *++p;
+                      if (c == '\0')
+                        return false;
+                    }
+                  /* Expect '='.  */
+                  if (c != '=')
+                    return false;
+                  /* Skip over whitespace after '='.  */
+                  do
+                    {
+                      c = *++p;
+                      if (c == '\0')
+                        return false;
+                    }
+                  while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
+                  /* Skip over an attribute value.  */
+                  const char *attr_value_start = NULL;
+                  const char *attr_value_end = NULL;
+                  if (c == '"')
+                    {
+                      attr_value_start = p + 1;
+                      do
+                        {
+                          c = *++p;
+                          if (c == '\0')
+                            return false;
+                        }
+                      while (c != '"');
+                      attr_value_end = p;
+                    }
+                  else if (c == '\'')
+                    {
+                      attr_value_start = p + 1;
+                      do
+                        {
+                          c = *++p;
+                          if (c == '\0')
+                            return false;
+                        }
+                      while (c != '\'');
+                      attr_value_end = p;
+                    }
+                  else
+                    return false;
+                  if (add_to_node != NULL)
+                    {
+                      string_desc_t attr_name =
+                        string_desc_new_addr (attr_name_end - attr_name_start,
+                                              (char *) attr_name_start);
+                      string_desc_t attr_value =
+                        string_desc_new_addr (attr_value_end - attr_value_start,
+                                              (char *) attr_value_start);
+                      char *attr_name_c = xstring_desc_c (attr_name);
+                      char *attr_value_c = xstring_desc_c (attr_value);
+                      xmlAttr *attr =
+                        xmlNewProp (current_node, BAD_CAST attr_name_c,
+                                    BAD_CAST attr_value_c);
+                      if (attr == NULL)
+                        xalloc_die ();
+                      free (attr_value_c);
+                      free (attr_name_c);
+                    }
+                  /* Skip over whitespace after the attribute value.  */
+                  c = *++p;
+                  if (c == '\0')
+                    return false;
+                  if (!(c == ' ' || c == '\t' || c == '\n' || c == '\r'))
+                    break;
+                  do
+                    {
+                      c = *++p;
+                      if (c == '\0')
+                        return false;
+                    }
+                  while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
+                }
+              if (c == '/')
+                {
+                  slash_after_tag = true;
+                  c = *++p;
+                  if (c == '\0')
+                    return false;
+                }
+            }
+          if (c != '>')
+            return false;
+          /* Seen a complete <...> element start/end.  */
+          /* Verify that the tag is allowed.  */
+          string_desc_t tag =
+            string_desc_new_addr (name_end - name_start, (char *) name_start);
+          if (!(valid_element == NULL || valid_element (tag)))
+            return false;
+          if (slash_after_tag || (no_end_element != NULL && no_end_element (tag)))
+            {
+              /* Seen an empty element.  */
+            }
+          else if (!slash_before_tag)
+            {
+              /* Seen the start of an element.  */
+              if (open_elements_count == open_elements_max)
+                /* Nesting depth too high.  */
+                return false;
+              open_elements[open_elements_count++] = tag;
+              if (add_to_node != NULL)
+                parent_node = current_node;
+            }
+          else
+            {
+              /* Seen the end of an element.
+                 Verify that the tag matches the one of the start.  */
+              if (open_elements_count == 0)
+                /* The end of an element without a corresponding start.  */
+                return false;
+              if ((ignore_case ? string_desc_c_casecmp : string_desc_cmp)
+                  (open_elements[open_elements_count - 1], tag)
+                  != 0)
+                return false;
+              open_elements_count--;
+              if (add_to_node != NULL)
+                parent_node = parent_node->parent;
+            }
+          curr_text_segment_start = p + 1;
+        }
+      else if (c == '>')
+        {
+          /* Stray '>'.
+             We could allow it, but better not.  */
+          return false;
+        }
+      else if (c == '&')
+        {
+          /* Allow a character reference as a whole.
+             Also allow a single '&', as it does not much harm.  */
+          unsigned int ucs;
+          if (starts_with_character_reference (p, &ucs))
+            {
+              const char *semicolon = strchr (p, ';');
+              if (add_to_node != NULL)
+                {
+                  if (curr_text_segment_start < p)
+                    {
+                      xmlNode *text_node =
+                        xmlNewDocTextLen (add_to_node->doc, NULL, 0);
+                      xmlNodeSetContentLen (text_node,
+                                            BAD_CAST curr_text_segment_start,
+                                            p - curr_text_segment_start);
+                      xmlAddChild (parent_node, text_node);
+                    }
+                  xmlNode *text_node =
+                    xmlNewDocTextLen (add_to_node->doc, NULL, 0);
+                  if (set_doc_encoding_utf8 (add_to_node->doc))
+                    {
+                      uint8_t buf[6];
+                      int nbytes = u8_uctomb (buf, ucs, SIZEOF (buf));
+                      if (nbytes <= 0)
+                        abort ();
+                      xmlNodeSetContentLen (text_node, BAD_CAST buf, nbytes);
+                    }
+                  else
+                    xmlNodeSetContentLen (text_node, BAD_CAST p,
+                                          semicolon + 1 - p);
+                  /* Here it is useful that xmlAddChild merges adjacent text
+                     nodes.  */
+                  xmlAddChild (parent_node, text_node);
+                }
+              curr_text_segment_start = semicolon + 1;
+              p = semicolon;
+            }
+        }
+      p++;
+    }
+
+  if (add_to_node != NULL && curr_text_segment_start < p)
+    {
+      xmlNode *text_node = xmlNewDocTextLen (add_to_node->doc, NULL, 0);
+      xmlNodeSetContentLen (text_node,
+                            BAD_CAST curr_text_segment_start,
+                            p - curr_text_segment_start);
+      xmlAddChild (parent_node, text_node);
+    }
+  return true;
+}
+
+/* Returns true if CONTENTS is a piece of simple well-formed XML
+   ("simple" meaning without comments, CDATA, and other gobbledygook),
+   with markup being limited to ASCII tags only.  */
+static bool
+_its_is_valid_simple_xml (const char *contents)
+{
+  return _its_is_valid_simple_gen_xml (contents, false, NULL, NULL, NULL);
+}
+
+static bool
+is_valid_xhtml_element (string_desc_t tag)
+{
+  /* Specification:
+     https://www.w3.org/TR/xhtml1/
+     https://www.w3.org/TR/xhtml1/dtds.html  */
+  /* Sorted list of allowed tags.  */
+  static const char allowed[41][12] =
+    {
+      "a", /* anchor */
+      "abbr", /* abbreviation */
+      "acronym", /* acronym */
+      "address", /* address */
+      "b", /* bold font style */
+      "bdo", /* bidi override */
+      "big", /* bigger font */
+      "blockquote", /* block-like quote */
+      "br", /* forced line break */
+      "cite", /* citation */
+      "code", /* program code */
+      "dd", /* definition list item */
+      "del", /* deleted text */
+      "dfn", /* definitional */
+      "dl", /* definition list */
+      "dt", /* definition list item */
+      "em", /* emphasis */
+      "h1", /* heading */
+      "h2", /* heading */
+      "h3", /* heading */
+      "h4", /* heading */
+      "h5", /* heading */
+      "h6", /* heading */
+      "hr", /* horizontal rule */
+      "i", /* italic font style */
+      "ins", /* inserted text */
+      "kbd", /* user typed */
+      "li", /* list item */
+      "ol", /* list */
+      "p", /* paragraph */
+      "pre", /* preformatted text */
+      "q", /* inlined quote */
+      "samp", /* sample */
+      "small", /* smaller font */
+      "span", /* generic container */
+      "strong", /* strong emphasis */
+      "sub", /* subscript */
+      "sup", /* superscript */
+      "tt", /* fixed-width font */
+      "ul", /* list */
+      "var" /* variable */
+#if 0 /* I don't think it is appropriate for a translator to use these.  */
+      "div", /* generic container */
+      "script", /* only used in head */
+      "object", /* embedded object */
+      "param", /* parameter for object */
+      "img", /* image */
+      "map", /* image map */
+      "area", /* image map */
+      "form", /* form */
+      "label", /* form element */
+      "input", /* form control */
+      "select", /* form control */
+      "optgroup", /* form element */
+      "option", /* form element */
+      "textarea", /* user input */
+      "fieldset", /* form element */
+      "legend", /* form element */
+      "button", /* form element */
+      "table", /* table */
+      "caption", /* table */
+      "thead", /* table */
+      "tfoot", /* table */
+      "tbody", /* table */
+      "colgroup", /* table */
+      "col", /* table */
+      "tr", /* table */
+      "th", /* table */
+      "td", /* table */
+#endif
+    };
+  /* Use binary search.  */
+  size_t lo = 0;
+  size_t hi = SIZEOF (allowed);
+  while (lo < hi)
+    {
+      /* Invariant:
+         If tag occurs in the table, it is at an index >= lo, < hi.  */
+      size_t i = (lo + hi) / 2; /* >= lo, < hi */
+      int cmp = string_desc_cmp (tag, string_desc_from_c (allowed[i]));
+      if (cmp == 0)
+        return true;
+      if (cmp < 0)
+        hi = i;
+      else
+        lo = i + 1;
+    }
+  return false;
+}
+
+/* Returns true if the argument is a piece of simple well-formed XHTML
+   ("simple" meaning without comments, CDATA, and other gobbledygook),
+   with markup being limited to ASCII tags only.  */
+static bool
+_its_is_valid_simple_xhtml (const char *contents)
+{
+  return _its_is_valid_simple_gen_xml (contents, false,
+                                       is_valid_xhtml_element, NULL, NULL);
+}
+
+static bool
+is_valid_html_element (string_desc_t tag)
+{
+  /* Specification:
+     https://html.spec.whatwg.org/
+     sections
+     4.3 Sections
+     4.4 Grouping content
+     4.5 Text-level semantics
+     4.6 Links
+     4.7 Edits
+     I don't think it is appropriate for a translator to use elements from
+     the other sections of chapter 4.  */
+  /* Sorted list of allowed tags.  */
+  static const char allowed[52][12] =
+    {
+      "a", /* anchor */
+      "abbr", /* abbreviation */
+      "acronym", /* acronym (removed in HTML 5) */
+      "address", /* address */
+      "b", /* bold font style */
+      "bdi", /* bidi isolation */
+      "bdo", /* bidi override */
+      "big", /* bigger font (removed in HTML 5) */
+      "blockquote", /* block-like quote */
+      "br", /* forced line break */
+      "cite", /* citation */
+      "code", /* program code */
+      "dd", /* definition list item */
+      "del", /* deleted text */
+      "dfn", /* definitional */
+      "dl", /* definition list */
+      "dt", /* definition list item */
+      "em", /* emphasis */
+      "figcaption",
+      "figure",
+      "h1", /* heading */
+      "h2", /* heading */
+      "h3", /* heading */
+      "h4", /* heading */
+      "h5", /* heading */
+      "h6", /* heading */
+      "hr", /* horizontal rule */
+      "i", /* italic font style */
+      "ins", /* inserted text */
+      "kbd", /* user typed */
+      "li", /* list item */
+      "mark", /* marked */
+      "menu", /* toolbar */
+      "ol", /* list */
+      "p", /* paragraph */
+      "pre", /* preformatted text */
+      "q", /* inlined quote */
+      "rp", /* ruby */
+      "rt", /* ruby */
+      "ruby", /* ruby annotations */
+      "s", /* strikethrough */
+      "samp", /* sample */
+      "small", /* smaller font */
+      "span", /* generic container */
+      "strong", /* strong emphasis */
+      "sub", /* subscript */
+      "sup", /* superscript */
+      "tt", /* fixed-width font (removed in HTML 5) */
+      "u", /* unarticulated */
+      "ul", /* list */
+      "var", /* variable */
+      "wbr" /* possible line break */
+    };
+  /* Use binary search.  */
+  size_t lo = 0;
+  size_t hi = SIZEOF (allowed);
+  while (lo < hi)
+    {
+      /* Invariant:
+         If tag occurs in the table, it is at an index >= lo, < hi.  */
+      size_t i = (lo + hi) / 2; /* >= lo, < hi */
+      int cmp = string_desc_cmp (tag, string_desc_from_c (allowed[i]));
+      if (cmp == 0)
+        return true;
+      if (cmp < 0)
+        hi = i;
+      else
+        lo = i + 1;
+    }
+  return false;
+}
+
+static bool
+is_no_end_html_element (string_desc_t tag)
+{
+  /* Specification:
+     https://html.spec.whatwg.org/
+     Search for "Tag omission in text/html: No end tag."  */
+  return string_desc_cmp (tag, string_desc_from_c ("br")) == 0
+         || string_desc_cmp (tag, string_desc_from_c ("hr")) == 0;
+}
+
+/* Returns true if the argument is a piece of simple well-formed HTML
+   ("simple" meaning without comments, CDATA, and other gobbledygook),
+   with markup being limited to ASCII tags only.  */
+static bool
+_its_is_valid_simple_html (const char *contents)
+{
+  /* Specification:
+     https://html.spec.whatwg.org/  */
+  return _its_is_valid_simple_gen_xml (contents, true,
+                                       is_valid_html_element,
+                                       is_no_end_html_element,
+                                       NULL);
+}
+
+static bool
+_its_set_simple_xml_content (xmlNode *node, const char *contents)
+{
+  /* This works fine for "xml" and "xhtml", but not for "html", due to
+     elements with no end, such as <br>.  xmlParseInNodeContext returns error
+     XML_ERR_NOT_WELL_BALANCED in this situation.  */
+  xmlNode *newChildNodes = NULL;
+  xmlParserErrors errors =
+    xmlParseInNodeContext (node, contents, strlen (contents),
+                           XML_PARSE_NONET | XML_PARSE_NOWARNING
+                           | XML_PARSE_NOBLANKS | XML_PARSE_NOERROR,
+                           &newChildNodes);
+  if (errors == XML_ERR_OK)
+    {
+      if (newChildNodes != NULL)
+        xmlAddChildList (node, newChildNodes);
+      return true;
+    }
+  else
+    return false;
+}
+
+static bool
+_its_set_simple_html_content (xmlNode *node, const char *contents)
+{
+  if (_its_is_valid_simple_gen_xml (contents, true,
+                                    is_valid_html_element,
+                                    is_no_end_html_element,
+                                    node))
+    return true;
+  else
+    {
+      xmlNodeSetContent (node, NULL);
+      return false;
+    }
+}
+
  static void
  its_merge_context_merge_node (struct its_merge_context_ty *context,
                                xmlNode *node,
@@ -2162,6 +2844,7 @@ its_merge_context_merge_node (struct its_merge_context_ty *context,
        bool do_escape;
        bool do_escape_during_extract;
        bool do_escape_during_merge;
+      const char *do_unescape_if;
        enum its_whitespace_type_ty whitespace;
  
        values = its_rule_list_eval (context->rules, node);
@@ -2175,6 +2858,8 @@ its_merge_context_merge_node (struct its_merge_context_ty *context,
  
        do_escape_during_merge = do_escape;
  
+      do_unescape_if = its_value_list_get_value (values, "unescape-if");
+
        value = its_value_list_get_value (values, "space");
        if (value && strcmp (value, "preserve") == 0)
          whitespace = ITS_WHITESPACE_PRESERVE;
@@ -2196,8 +2881,6 @@ its_merge_context_merge_node (struct its_merge_context_ty *context,
          msgid = _its_get_content (context->rules, node, value,
                                    ITS_WHITESPACE_PRESERVE,
                                    do_escape_during_extract);
-      its_value_list_destroy (values);
-      free (values);
  
        if (msgid == NULL)
          msgid = _its_collect_text_content (node, whitespace,
@@ -2233,6 +2916,7 @@ its_merge_context_merge_node (struct its_merge_context_ty *context,
                xpg_to_bcp47 (language_bcp47, language);
                xmlSetProp (translated, BAD_CAST "xml:lang", BAD_CAST language_bcp47);
  
+              const char *msgstr = mp->msgstr;
                /* libxml2 offers two functions for setting the content of an
                   element: xmlNodeSetContent and xmlNodeAddContent.  They differ
                   in the amount of escaping they do:
@@ -2265,25 +2949,68 @@ its_merge_context_merge_node (struct its_merge_context_ty *context,
                if (do_escape_during_merge)
                  {
                    /* These three are equivalent:
-                     xmlNodeAddContent (translated, BAD_CAST mp->msgstr);
-                     xmlNodeSetContent (translated, xmlEncodeEntitiesReentrant (context->doc, BAD_CAST mp->msgstr));
-                     xmlNodeSetContent (translated, xmlEncodeSpecialChars (context->doc, BAD_CAST mp->msgstr));  */
-                  xmlNodeAddContent (translated, BAD_CAST mp->msgstr);
+                     xmlNodeAddContent (translated, BAD_CAST msgstr);
+                     xmlNodeSetContent (translated, xmlEncodeEntitiesReentrant (context->doc, BAD_CAST msgstr));
+                     xmlNodeSetContent (translated, xmlEncodeSpecialChars (context->doc, BAD_CAST msgstr));  */
+                  xmlNodeAddContent (translated, BAD_CAST msgstr);
                  }
                else
                  {
-                  char *middle_ground = _its_encode_special_chars_for_merge (mp->msgstr);
-                  xmlNodeSetContent (translated, BAD_CAST middle_ground);
-                  free (middle_ground);
+                  bool done_unescape = false;
+
+                  if (do_unescape_if != NULL
+                       && ((strcmp (do_unescape_if, "xml") == 0
+                           && _its_is_valid_simple_xml (msgstr))
+                          || (strcmp (do_unescape_if, "xhtml") == 0
+                              && _its_is_valid_simple_xhtml (msgstr))
+                          || (strcmp (do_unescape_if, "html") == 0
+                              && _its_is_valid_simple_html (msgstr))))
+                    {
+                      /* It looks like the translator has provided a syntactically
+                         valid XML or HTML markup.
+                         Note: This is only a simple test; we don't check the XML
+                         or XHTML schema or HTML DTD here.  Therefore in theory the
+                         result may be invalid.  But this should be rare, since
+                         translators most often only preserve the markup that was
+                         present in the msgid; if they do this, the result will be
+                         valid.  */
+                      if (strcmp (do_unescape_if, "xml") == 0
+                          || strcmp (do_unescape_if, "xhtml") == 0)
+                        {
+                          if (_its_set_simple_xml_content (translated, msgstr))
+                            done_unescape = true;
+                        }
+                      else
+                        {
+                          /* For "html", we create the children nodes ourselves,
+                             in order to deal with elements with no end, such as
+                             <br>.  For "xml" and "xhtml", on the other hand,
+                             this code would not work well, due to insufficient
+                             handling of namespaces.  */
+                          if (_its_set_simple_html_content (translated, msgstr))
+                            done_unescape = true;
+                        }
+                    }
+                  if (!done_unescape)
+                    {
+                      char *middle_ground = _its_encode_special_chars_for_merge (msgstr);
+                      xmlNodeSetContent (translated, BAD_CAST middle_ground);
+                      free (middle_ground);
+                    }
                  }
  
                if (!replace_text)
                  xmlAddNextSibling (node, translated);
              }
          }
-      free (msgctxt);
        free (msgid);
+      free (msgctxt);
+      its_value_list_destroy (values);
+      free (values);
      }
+  /* FIXME: If replace_text, we should handle nodes of type XML_ATTRIBUTE_NODE,
+     because at least the "translatable" and "escape" properties are applicable
+     to them.  */
  }
  
  void
diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am

index b0afd45ce77e9858e4cd9b70453b367d51e0a676..727bebedd0e9cccd8582d4d33416f0fac6155eea 100644 (file)
--- a/gettext-tools/tests/Makefile.am
+++ b/gettext-tools/tests/Makefile.am
@@ -57,7 +57,7 @@ TESTS = gettext-1 gettext-2 \
         msgfmt-tcl-1 msgfmt-tcl-2 \
         msgfmt-qt-1 msgfmt-qt-2 \
         msgfmt-desktop-1 msgfmt-desktop-2 msgfmt-desktop-3 \
-       msgfmt-xml-1 msgfmt-xml-2 msgfmt-xml-3 msgfmt-xml-4 \
+       msgfmt-xml-1 msgfmt-xml-2 msgfmt-xml-3 msgfmt-xml-4 msgfmt-xml-5 \
         msggrep-1 msggrep-2 msggrep-3 msggrep-4 msggrep-5 msggrep-6 msggrep-7 \
         msggrep-8 msggrep-9 msggrep-10 msggrep-11 \
         msginit-1 msginit-2 msginit-3 msginit-4 \
@@ -103,6 +103,7 @@ TESTS = gettext-1 gettext-2 \
         xgettext-csharp-stackovfl-1 xgettext-csharp-stackovfl-2 \
         xgettext-csharp-stackovfl-3 xgettext-csharp-stackovfl-4 \
         xgettext-desktop-1 xgettext-desktop-2 \
+       xgettext-docbook-1 \
         xgettext-elisp-1 xgettext-elisp-2 xgettext-elisp-3 \
         xgettext-elisp-stackovfl-1 xgettext-elisp-stackovfl-2 \
         xgettext-elisp-stackovfl-3 xgettext-elisp-stackovfl-4 \
diff --git a/gettext-tools/tests/msgfmt-xml-5 b/gettext-tools/tests/msgfmt-xml-5

new file mode 100755 (executable)

index 0000000..156c573
--- /dev/null
+++ b/gettext-tools/tests/msgfmt-xml-5
@@ -0,0 +1,137 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test msgfmt --xml: DocBook support with --replace-text option
+
+# Example file taken from
+# https://sources.debian.org/src/python-activipy/0.1-9/debian/activipy_tester.dbk/
+cat <<\EOF > activipy_tester.xml
+<?xml version='1.0' encoding='utf-8'?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+
+<!-- (c) 2015 W. Martin Borgert <debacle@debian.org>
+     Date: 2015-11-22
+     License of this manual: GPL-3+ -->
+
+<refentry>
+  <refmeta>
+    <refentrytitle>activipy_tester</refentrytitle>
+    <manvolnum>1</manvolnum>
+    <refmiscinfo class="source">activipy_tester</refmiscinfo>
+    <refmiscinfo class="manual">User Commands</refmiscinfo>
+  </refmeta>
+  <refnamediv>
+    <refname>activipy_tester</refname>
+
+    <refpurpose>Test for activitystreams correctness</refpurpose>
+  </refnamediv>
+  <refsynopsisdiv>
+    <cmdsynopsis>
+      <command>activipy_tester</command>
+      <arg><option>-h</option></arg>
+      <arg><option>--help</option></arg>
+    </cmdsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Run <command>activipy_tester --help</command> for more options.
+    </para>
+  </refsect1>
+</refentry>
+EOF
+
+cat <<\EOF > de.po
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2024-10-11 23:47+0200\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: de\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: activipy_tester.xml:11 activipy_tester.xml:13 activipy_tester.xml:17
+msgid "activipy_tester"
+msgstr ""
+
+#: activipy_tester.xml:12
+msgid "1"
+msgstr ""
+
+#: activipy_tester.xml:14
+msgid "User Commands"
+msgstr "Benutzer-Befehle"
+
+#: activipy_tester.xml:19
+msgid "Test for activitystreams correctness"
+msgstr "Tests von activitystreams"
+
+#: activipy_tester.xml:22
+msgid ""
+"<command>activipy_tester</command><arg><option>-h</option></arg><arg><option>--help</option></arg>"
+msgstr ""
+
+#: activipy_tester.xml:30
+msgid "Description"
+msgstr "Beschreibung"
+
+#: activipy_tester.xml:32
+msgid "Run <command>activipy_tester --help</command> for more options."
+msgstr "Für weitere Optionen führen Sie <command>activipy_tester --help</command> aus."
+EOF
+
+cat <<\EOF > activipy_tester.de.xml.ok
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+<!-- (c) 2015 W. Martin Borgert <debacle@debian.org>
+     Date: 2015-11-22
+     License of this manual: GPL-3+ -->
+<refentry>
+  <refmeta>
+    <refentrytitle>activipy_tester</refentrytitle>
+    <manvolnum>1</manvolnum>
+    <refmiscinfo class="source">activipy_tester</refmiscinfo>
+    <refmiscinfo class="manual" xml:lang="de">Benutzer-Befehle</refmiscinfo>
+  </refmeta>
+  <refnamediv>
+    <refname>activipy_tester</refname>
+    <refpurpose xml:lang="de">Tests von activitystreams</refpurpose>
+  </refnamediv>
+  <refsynopsisdiv>
+    <cmdsynopsis>
+      <command>activipy_tester</command>
+      <arg>
+        <option>-h</option>
+      </arg>
+      <arg>
+        <option>--help</option>
+      </arg>
+    </cmdsynopsis>
+  </refsynopsisdiv>
+  <refsect1>
+    <title xml:lang="de">Beschreibung</title>
+    <para xml:lang="de">Für weitere Optionen führen Sie <command>activipy_tester --help</command> aus.</para>
+  </refsect1>
+</refentry>
+EOF
+
+${MSGFMT} --xml --template=activipy_tester.xml --replace-text -l de de.po -o activipy_tester.de.xml \
+  || Exit 1
+
+: ${DIFF=diff}
+${DIFF} activipy_tester.de.xml.ok activipy_tester.de.xml
+test $? = 0 || Exit 1
diff --git a/gettext-tools/tests/xgettext-docbook-1 b/gettext-tools/tests/xgettext-docbook-1

new file mode 100755 (executable)

index 0000000..30cc302
--- /dev/null
+++ b/gettext-tools/tests/xgettext-docbook-1
@@ -0,0 +1,105 @@
+#!/bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test of DocBook support.
+
+# Example file taken from
+# https://sources.debian.org/src/python-activipy/0.1-9/debian/activipy_tester.dbk/
+cat <<\EOF > activipy_tester.xml
+<?xml version='1.0' encoding='utf-8'?>
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN"
+"http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd">
+
+<!-- (c) 2015 W. Martin Borgert <debacle@debian.org>
+     Date: 2015-11-22
+     License of this manual: GPL-3+ -->
+
+<refentry>
+  <refmeta>
+    <refentrytitle>activipy_tester</refentrytitle>
+    <manvolnum>1</manvolnum>
+    <refmiscinfo class="source">activipy_tester</refmiscinfo>
+    <refmiscinfo class="manual">User Commands</refmiscinfo>
+  </refmeta>
+  <refnamediv>
+    <refname>activipy_tester</refname>
+
+    <refpurpose>Test for activitystreams correctness</refpurpose>
+  </refnamediv>
+  <refsynopsisdiv>
+    <cmdsynopsis>
+      <command>activipy_tester</command>
+      <arg><option>-h</option></arg>
+      <arg><option>--help</option></arg>
+    </cmdsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Description</title>
+
+    <para>
+      Run <command>activipy_tester --help</command> for more options.
+    </para>
+  </refsect1>
+</refentry>
+EOF
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} -o xg-db-1.tmp activipy_tester.xml || Exit 1
+func_filter_POT_Creation_Date xg-db-1.tmp xg-db-1.pot
+
+cat <<\EOF > xg-db-1.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: activipy_tester.xml:11 activipy_tester.xml:13 activipy_tester.xml:17
+msgid "activipy_tester"
+msgstr ""
+
+#: activipy_tester.xml:12
+msgid "1"
+msgstr ""
+
+#: activipy_tester.xml:14
+msgid "User Commands"
+msgstr ""
+
+#: activipy_tester.xml:19
+msgid "Test for activitystreams correctness"
+msgstr ""
+
+#: activipy_tester.xml:22
+msgid ""
+"<command>activipy_tester</command><arg><option>-h</option></"
+"arg><arg><option>--help</option></arg>"
+msgstr ""
+
+#: activipy_tester.xml:30
+msgid "Description"
+msgstr ""
+
+#: activipy_tester.xml:32
+msgid "Run <command>activipy_tester --help</command> for more options."
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-db-1.ok xg-db-1.pot
+result=$?
+
+exit $result
author	Bruno Haible <bruno@clisp.org>
	Sun, 13 Oct 2024 10:49:40 +0000 (12:49 +0200)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 13 Oct 2024 10:49:40 +0000 (12:49 +0200)
NEWS		patch \| blob \| blame \| history
autogen.sh		patch \| blob \| blame \| history
gettext-tools/doc/gettext.texi		patch \| blob \| blame \| history
gettext-tools/its/docbook4.its		patch \| blob \| blame \| history
gettext-tools/its/docbook5.its		patch \| blob \| blame \| history
gettext-tools/src/its-extensions.xsd		patch \| blob \| blame \| history
gettext-tools/src/its.c		patch \| blob \| blame \| history
gettext-tools/tests/Makefile.am		patch \| blob \| blame \| history
gettext-tools/tests/msgfmt-xml-5	[new file with mode: 0755]	patch \| blob
gettext-tools/tests/xgettext-docbook-1	[new file with mode: 0755]	patch \| blob