]> git.ipfire.org Git - thirdparty/gettext.git/commitdiff
its: Fix test failure.
authorBruno Haible <bruno@clisp.org>
Tue, 26 Mar 2019 22:05:09 +0000 (23:05 +0100)
committerBruno Haible <bruno@clisp.org>
Tue, 26 Mar 2019 22:11:14 +0000 (23:11 +0100)
* gettext-tools/src/its.c (normalize_whitespace): Fix detection of paragraph
boundary. Get rid of initial trim() call.
* gettext-tools/tests/xgettext-its-1: Fix typo. Verify removal of whitespace
at the end of the last paragraph.
* autogen.sh (GNULIB_MODULES_TOOLS_FOR_SRC): Add memchr.

autogen.sh
gettext-tools/src/its.c
gettext-tools/tests/xgettext-its-1

index 7f6a1c0272082e805fa4b3928f7ab14e60177c78..f67c05ed84c5322c20e792600a4a8a9a53db100b 100755 (executable)
@@ -226,6 +226,7 @@ if ! $skip_gnulib; then
       locale
       localename
       lock
+      memchr
       memmove
       memset
       minmax
index c542db47672338647830f089114f7f76aff04b3e..8b72e1e19ba75ca734239ec6aeef647a75c43c09 100644 (file)
@@ -403,61 +403,78 @@ normalize_whitespace (const char *text, enum its_whitespace_type_ty whitespace)
       /* Normalize whitespaces within the text, keeping paragraph
          boundaries.  */
       {
-        char *result, *p, *out;
-
-        result = trim (text);
-        for (p = out = result; *p != '\0';)
+        char *result = xstrdup (text);
+        /* Go through the string, shrinking it, reading from *p++
+           and writing to *out++.  (result <= out <= p.)  */
+        const char *start_of_paragraph;
+        char *out;
+
+        out = result;
+        for (start_of_paragraph = result; *start_of_paragraph != '\0';)
           {
-            char *pp, *pend = NULL, *next = NULL;
-            bool last_ws = false;
+            const char *end_of_paragraph;
+            const char *next_paragraph;
 
-            /* Find a paragraph boundary.  */
-            for (pp = p; *pp != '\0';)
-              {
-                char *nl = strchrnul (pp, '\n');
-                if (*nl == '\0')
-                  {
-                    pend = nl;
-                    next = pend;
-                    break;
-                  }
-                pp = nl + 1;
-                pp += strspn (pp, " \t\n");
-                if (*pp == '\n')
-                  {
-                    pend = nl;
-                    next = pp + 1;
-                    break;
-                  }
-              }
+            /* Find the next paragraph boundary.  */
+            {
+              const char *p;
 
-            /* Normalize whitespaces in the paragraph.  */
-            assert (pend != NULL);
-            for (pp = p; pp < pend; pp++)
-              if (!(*pp == ' ' || *pp == '\t' || *pp == '\n'))
-                break;
-            for (; pp < pend; pp++)
-              {
-                if (*pp == ' ' || *pp == '\t' || *pp == '\n')
+              for (p = start_of_paragraph;;)
+                {
+                  const char *nl = strchrnul (p, '\n');
+                  if (*nl == '\0')
+                    {
+                      end_of_paragraph = nl;
+                      next_paragraph = end_of_paragraph;
+                      break;
+                    }
+                  p = nl + 1;
                   {
-                    if (!last_ws)
+                    const char *past_whitespace = p + strspn (p, " \t\n");
+                    if (memchr (p, '\n', past_whitespace - p) != NULL)
                       {
-                        *out++ = ' ';
-                        last_ws = true;
+                        end_of_paragraph = nl;
+                        next_paragraph = past_whitespace;
+                        break;
                       }
+                    p = past_whitespace;
                   }
-                else
-                  {
-                    *out++ = *pp;
-                    last_ws = false;
-                  }
-              }
-            if (*pend != '\0')
+                }
+            }
+
+            /* Normalize whitespaces in the paragraph.  */
+            {
+              const char *p;
+
+              /* Remove whitespace at the beginning of the paragraph.  */
+              for (p = start_of_paragraph; p < end_of_paragraph; p++)
+                if (!(*p == ' ' || *p == '\t' || *p == '\n'))
+                  break;
+
+              for (; p < end_of_paragraph;)
+                {
+                  if (*p == ' ' || *p == '\t' || *p == '\n')
+                    {
+                      /* Normalize whitespace inside the paragraph, and
+                         remove whitespace at the end of the paragraph.  */
+                      do
+                        p++;
+                      while (p < end_of_paragraph
+                             && (*p == ' ' || *p == '\t' || *p == '\n'));
+                      if (p < end_of_paragraph)
+                        *out++ = ' ';
+                    }
+                  else
+                    *out++ = *p++;
+                }
+            }
+
+            if (*next_paragraph != '\0')
               {
                 memcpy (out, "\n\n", 2);
                 out += 2;
               }
-            p = next;
+            start_of_paragraph = next_paragraph;
           }
         *out = '\0';
         return result;
index 975a547cdc86c5b018988fe88d51667e68144da1..22e9163ecbdae87e3fa39f54784d7a639e3842af 100755 (executable)
@@ -176,10 +176,10 @@ cat <<\EOF >messages.xml
     This is the first paragraph with
 a newline.
   
-    This is  the  second paragprah with spaces.
+    This is  the  second paragraph with spaces.
 
 
-    This is the last paragraph.</p>
+    This is the last paragraph.     </p>
   </message>
   <message>
     <p xml:space="paragraph">This is the only one paragraph</p>
@@ -277,7 +277,7 @@ msgstr ""
 msgid ""
 "This is the first paragraph with a newline.\n"
 "\n"
-"This is the second paragprah with spaces.\n"
+"This is the second paragraph with spaces.\n"
 "\n"
 "This is the last paragraph."
 msgstr ""