From: Daiki Ueno Date: Mon, 11 Feb 2019 10:26:53 +0000 (+0100) Subject: its: Add new preserveSpaceRule "paragraph" X-Git-Tag: v0.20~158 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d52a1c88950ba5aa14bd4a4914aac704b0ce3297;p=thirdparty%2Fgettext.git its: Add new preserveSpaceRule "paragraph" This implements a new text extraction rule preserving paragraph boundaries, as mentioned in: https://gitlab.gnome.org/GNOME/glib/issues/1350 * gettext-tools/doc/gettext.texi (Preparing ITS Rules): Mention "paragraph". * gettext-tools/src/its.c (its_rule_list_extract_text): Accept "paragraph". (its_merge_context_merge_node): Likewise. (normalize_whitespace): Handle "paragraph" rule. * gettext-tools/src/its.h (ITS_WHITESPACE_NORMALIZE_PARAGRAPH): New enum value. * gettext-tools/tests/xgettext-its-1: Add test for "paragraph" rule. --- diff --git a/gettext-tools/doc/gettext.texi b/gettext-tools/doc/gettext.texi index 892854c75..306605842 100644 --- a/gettext-tools/doc/gettext.texi +++ b/gettext-tools/doc/gettext.texi @@ -12354,10 +12354,12 @@ A required @code{escape} attribute with the value @code{yes} or @code{no}. @item Extended Preserve Space This data category extends the standard @samp{Preserve Space} data -category with the additional value @samp{trim}. The value means to -remove the leading and trailing whitespaces of the content, but not to -normalize whitespaces in the middle. In the global rule, the -@code{preserveSpaceRule} element contains the following: +category with the additional values @samp{trim} and @samp{paragraph}. +@samp{trim} means to remove the leading and trailing whitespaces of the +content, but not to normalize whitespaces in the middle. +@samp{paragraph} means to normalize the content but keep the paragraph +boundaries. In the global +rule, the @code{preserveSpaceRule} element contains the following: @itemize @item @@ -12366,7 +12368,7 @@ that selects the nodes to which this rule applies. @item A required @code{space} attribute with the value @code{default}, -@code{preserve}, or @code{trim}. +@code{preserve}, @code{trim}, or @code{paragraph}. @end itemize @end table diff --git a/gettext-tools/src/its.c b/gettext-tools/src/its.c index 8b31e0e1e..9964c2d1f 100644 --- a/gettext-tools/src/its.c +++ b/gettext-tools/src/its.c @@ -399,6 +399,69 @@ normalize_whitespace (const char *text, enum its_whitespace_type_ty whitespace) case ITS_WHITESPACE_TRIM: return trim (text); + case ITS_WHITESPACE_NORMALIZE_PARAGRAPH: + /* Normalize whitespaces within the text, keeping paragraph + boundaries. */ + { + char *result, *p, *out; + + result = trim (text); + for (p = out = result; *p != '\0';) + { + char *pp, *pend = NULL, *next = NULL; + bool last_ws = false; + + /* Find a paragraph boundary. */ + for (pp = p; *pp != '\0';) + { + char *nl = strchrnul (pp, '\n'); + if (*nl == '\0') + { + pend = nl; + next = pend; + break; + } + pp = nl + 1; + pp += strspn (pp, " \t\n"); + if (*pp == '\n') + { + pend = nl; + next = pp + 1; + break; + } + } + + /* Normalize whitespaces in the paragraph. */ + assert (pend != NULL); + for (pp = p; pp < pend; pp++) + if (!(*pp == ' ' || *pp == '\t' || *pp == '\n')) + break; + for (; pp < pend; pp++) + { + if (*pp == ' ' || *pp == '\t' || *pp == '\n') + { + if (!last_ws) + { + *out++ = ' '; + last_ws = true; + } + } + else + { + *out++ = *pp; + last_ws = false; + } + } + if (*pend != '\0') + { + memcpy (out, "\n\n", 2); + out += 2; + } + p = next; + } + *out = '\0'; + return result; + } default: /* Normalize whitespaces within the text, but not at the beginning nor the end of the text. */ @@ -1000,7 +1063,11 @@ its_preserve_space_rule_constructor (struct its_rule_ty *pop, || strcmp (prop, "default") == 0 /* gettext extension: remove leading/trailing whitespaces only. */ || (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS) - && strcmp (prop, "trim") == 0))) + && strcmp (prop, "trim") == 0) + /* gettext extension: same as default except keeping + paragraph boundaries. */ + || (node->ns && xmlStrEqual (node->ns->href, BAD_CAST GT_NS) + && strcmp (prop, "paragraph") == 0))) { error (0, 0, _("invalid attribute value \"%s\" for \"%s\""), prop, "space"); @@ -1719,6 +1786,8 @@ its_rule_list_extract_text (its_rule_list_ty *rules, whitespace = ITS_WHITESPACE_PRESERVE; else if (value && strcmp (value, "trim") == 0) whitespace = ITS_WHITESPACE_TRIM; + else if (value && strcmp (value, "paragraph") == 0) + whitespace = ITS_WHITESPACE_NORMALIZE_PARAGRAPH; else whitespace = ITS_WHITESPACE_NORMALIZE; @@ -1846,6 +1915,8 @@ its_merge_context_merge_node (struct its_merge_context_ty *context, whitespace = ITS_WHITESPACE_PRESERVE; else if (value && strcmp (value, "trim") == 0) whitespace = ITS_WHITESPACE_TRIM; + else if (value && strcmp (value, "paragraph") == 0) + whitespace = ITS_WHITESPACE_NORMALIZE_PARAGRAPH; else whitespace = ITS_WHITESPACE_NORMALIZE; diff --git a/gettext-tools/src/its.h b/gettext-tools/src/its.h index 72c30c992..49af5cec5 100644 --- a/gettext-tools/src/its.h +++ b/gettext-tools/src/its.h @@ -33,6 +33,7 @@ enum its_whitespace_type_ty { ITS_WHITESPACE_PRESERVE, ITS_WHITESPACE_NORMALIZE, + ITS_WHITESPACE_NORMALIZE_PARAGRAPH, ITS_WHITESPACE_TRIM }; diff --git a/gettext-tools/tests/xgettext-its-1 b/gettext-tools/tests/xgettext-its-1 index 125f3e682..975a547cd 100755 --- a/gettext-tools/tests/xgettext-its-1 +++ b/gettext-tools/tests/xgettext-its-1 @@ -171,6 +171,30 @@ cat <<\EOF >messages.xml

+ +

+ This is the first paragraph with +a newline. + + This is the second paragprah with spaces. + + + This is the last paragraph.

+
+ +

This is the only one paragraph

+
+ +

This is the only one paragraph with a boundary + +

+
+ +

+
+ +

+
EOF @@ -247,6 +271,26 @@ msgstr "" #: messages.xml:61 msgid "This is an unescaped attribute <>&\"" msgstr "" + +#. (itstool) path: message/p +#: messages.xml:65 +msgid "" +"This is the first paragraph with a newline.\n" +"\n" +"This is the second paragprah with spaces.\n" +"\n" +"This is the last paragraph." +msgstr "" + +#. (itstool) path: message/p +#: messages.xml:75 +msgid "This is the only one paragraph" +msgstr "" + +#. (itstool) path: message/p +#: messages.xml:78 +msgid "This is the only one paragraph with a boundary" +msgstr "" EOF : ${DIFF=diff}