its: Add new preserveSpaceRule "paragraph"

author Daiki Ueno <ueno@gnu.org>

Mon, 11 Feb 2019 10:26:53 +0000 (11:26 +0100)

committer Daiki Ueno <ueno@gnu.org>

Fri, 15 Feb 2019 11:52:06 +0000 (12:52 +0100)
author Daiki Ueno <ueno@gnu.org>
Mon, 11 Feb 2019 10:26:53 +0000 (11:26 +0100)
committer Daiki Ueno <ueno@gnu.org>
Fri, 15 Feb 2019 11:52:06 +0000 (12:52 +0100)
diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi

index 892854c75f402fea242617b121a556a250f5ea04..3066058427de5492a7573e70325e2a9ac362e7e1 100644 (file)
--- a/gettext-tools/doc/gettext.texi
+++ b/gettext-tools/doc/gettext.texi
@@ -12354,10 +12354,12 @@ A required @code{escape} attribute with the value @code{yes} or @code{no}.
  @item Extended Preserve Space
  
  This data category extends the standard @samp{Preserve Space} data
-category with the additional value @samp{trim}.  The value means to
-remove the leading and trailing whitespaces of the content, but not to
-normalize whitespaces in the middle.  In the global rule, the
-@code{preserveSpaceRule} element contains the following:
+category with the additional values @samp{trim} and @samp{paragraph}.
+@samp{trim} means to remove the leading and trailing whitespaces of the
+content, but not to normalize whitespaces in the middle.
+@samp{paragraph} means to normalize the content but keep the paragraph
+boundaries.  In the global
+rule, the @code{preserveSpaceRule} element contains the following:
  
  @itemize
  @item
@@ -12366,7 +12368,7 @@ that selects the nodes to which this rule applies.
  
  @item
  A required @code{space} attribute with the value @code{default},
-@code{preserve}, or @code{trim}.
+@code{preserve}, @code{trim}, or @code{paragraph}.
  @end itemize
  
  @end table
diff --git a/gettext-tools/src/its.c b/gettext-tools/src/its.c

index 8b31e0e1e51ac6e1c9a4dfb9cfb9b8e976c376f8..9964c2d1f30cf2bde4c0b335484c87f23f013f7f 100644 (file)
--- a/gettext-tools/src/its.c
+++ b/gettext-tools/src/its.c
@@ -399,6 +399,69 @@ normalize_whitespace (const char *text, enum its_whitespace_type_ty whitespace)
      case ITS_WHITESPACE_TRIM:
        return trim (text);
  
+    case ITS_WHITESPACE_NORMALIZE_PARAGRAPH:
+      /* Normalize whitespaces within the text, keeping paragraph
+         boundaries.  */
+      {
+        char *result, *p, *out;
+
+        result = trim (text);
+        for (p = out = result; *p != '\0';)
+          {
+            char *pp, *pend = NULL, *next = NULL;
+            bool last_ws = false;
+
+            /* Find a paragraph boundary.  */
+            for (pp = p; *pp != '\0';)
+              {
+                char *nl = strchrnul (pp, '\n');
+                if (*nl == '\0')
+                  {
+                    pend = nl;
+                    next = pend;
+                    break;
+                  }
+                pp = nl + 1;
+                pp += strspn (pp, " \t\n");
+                if (*pp == '\n')
+                  {
+                    pend = nl;
+                    next = pp + 1;
+                    break;
+                  }
+              }
+
+            /* Normalize whitespaces in the paragraph.  */
+            assert (pend != NULL);
+            for (pp = p; pp < pend; pp++)
+              if (!(*pp == ' ' || *pp == '\t' || *pp == '\n'))
+                break;
+            for (; pp < pend; pp++)
+              {
+                if (*pp == ' ' || *pp == '\t' || *pp == '\n')
+                  {
+                    if (!last_ws)
+                      {
+                        *out++ = ' ';
+                        last_ws = true;
+                      }
+                  }
+                else
+                  {
+                    *out++ = *pp;
+                    last_ws = false;
+                  }
+              }
+            if (*pend != '\0')
+              {
+                memcpy (out, "\n\n", 2);
+                out += 2;
+              }
+            p = next;
+          }
+        *out = '\0';
+        return result;
+      }
      default:
        /* Normalize whitespaces within the text, but not at the beginning
           nor the end of the text.  */
@@ -1000,7 +1063,11 @@ its_preserve_space_rule_constructor (struct its_rule_ty *pop,
             || strcmp (prop, "default") == 0
             /* gettext extension: remove leading/trailing whitespaces only.  */
             || (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS)
-               && strcmp (prop, "trim") == 0)))
+               && strcmp (prop, "trim") == 0)
+           /* gettext extension: same as default except keeping
+              paragraph boundaries.  */
+           || (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS)
+               && strcmp (prop, "paragraph") == 0)))
      {
        error (0, 0, _("invalid attribute value \"%s\" for \"%s\""),
               prop, "space");
@@ -1719,6 +1786,8 @@ its_rule_list_extract_text (its_rule_list_ty *rules,
          whitespace = ITS_WHITESPACE_PRESERVE;
        else if (value && strcmp (value, "trim") == 0)
          whitespace = ITS_WHITESPACE_TRIM;
+      else if (value && strcmp (value, "paragraph") == 0)
+        whitespace = ITS_WHITESPACE_NORMALIZE_PARAGRAPH;
        else
          whitespace = ITS_WHITESPACE_NORMALIZE;
  
@@ -1846,6 +1915,8 @@ its_merge_context_merge_node (struct its_merge_context_ty *context,
          whitespace = ITS_WHITESPACE_PRESERVE;
        else if (value && strcmp (value, "trim") == 0)
          whitespace = ITS_WHITESPACE_TRIM;
+      else if (value && strcmp (value, "paragraph") == 0)
+        whitespace = ITS_WHITESPACE_NORMALIZE_PARAGRAPH;
        else
          whitespace = ITS_WHITESPACE_NORMALIZE;
  
diff --git a/gettext-tools/src/its.h b/gettext-tools/src/its.h

index 72c30c9925becce2719996d7c50166f7c37d4657..49af5cec5abc8ca89e3b8ec4ff134809e0bb42c3 100644 (file)
--- a/gettext-tools/src/its.h
+++ b/gettext-tools/src/its.h
@@ -33,6 +33,7 @@ enum its_whitespace_type_ty
  {
    ITS_WHITESPACE_PRESERVE,
    ITS_WHITESPACE_NORMALIZE,
+  ITS_WHITESPACE_NORMALIZE_PARAGRAPH,
    ITS_WHITESPACE_TRIM
  };
  
diff --git a/gettext-tools/tests/xgettext-its-1 b/gettext-tools/tests/xgettext-its-1

index 125f3e68204f397d959b8aa962ab5ddf485bee23..975a547cdc86c5b018988fe88d51667e68144da1 100755 (executable)
--- a/gettext-tools/tests/xgettext-its-1
+++ b/gettext-tools/tests/xgettext-its-1
@@ -171,6 +171,30 @@ cat <<\EOF >messages.xml
    <message unescaped="This is an unescaped attribute &lt;&gt;&amp;&quot;">
      <p></p>
    </message>
+  <message>
+    <p xml:space="paragraph">
+    This is the first paragraph with
+a newline.
+  
+    This is  the  second paragprah with spaces.
+
+
+    This is the last paragraph.</p>
+  </message>
+  <message>
+    <p xml:space="paragraph">This is the only one paragraph</p>
+  </message>
+  <message>
+    <p xml:space="paragraph">This is the only one paragraph with a boundary
+
+</p>
+  </message>
+  <message>
+    <p xml:space="paragraph"></p>
+  </message>
+  <message>
+    <p xml:space="paragraph"> </p>
+  </message>
  </messages>
  EOF
  
@@ -247,6 +271,26 @@ msgstr ""
  #: messages.xml:61
  msgid "This is an unescaped attribute <>&\""
  msgstr ""
+
+#. (itstool) path: message/p
+#: messages.xml:65
+msgid ""
+"This is the first paragraph with a newline.\n"
+"\n"
+"This is the second paragprah with spaces.\n"
+"\n"
+"This is the last paragraph."
+msgstr ""
+
+#. (itstool) path: message/p
+#: messages.xml:75
+msgid "This is the only one paragraph"
+msgstr ""
+
+#. (itstool) path: message/p
+#: messages.xml:78
+msgid "This is the only one paragraph with a boundary"
+msgstr ""
  EOF
  
  : ${DIFF=diff}
author	Daiki Ueno <ueno@gnu.org>
	Mon, 11 Feb 2019 10:26:53 +0000 (11:26 +0100)
committer	Daiki Ueno <ueno@gnu.org>
	Fri, 15 Feb 2019 11:52:06 +0000 (12:52 +0100)
gettext-tools/doc/gettext.texi		patch \| blob \| blame \| history
gettext-tools/src/its.c		patch \| blob \| blame \| history
gettext-tools/src/its.h		patch \| blob \| blame \| history
gettext-tools/tests/xgettext-its-1		patch \| blob \| blame \| history