From: Bruno Haible <bruno@clisp.org>
Date: Wed, 4 Apr 2001 13:41:11 +0000 (+0000)
Subject: Fix a bug: line breaks could occur inside escape sequences, leading to
X-Git-Tag: v0.10.37~52
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2c2850b6f0e73a4f2be39a20de1fcb6f49678115;p=thirdparty%2Fgettext.git

Fix a bug: line breaks could occur inside escape sequences, leading to
invalid PO files.
---

diff --git a/lib/ChangeLog b/lib/ChangeLog
index 261dc7443..842f3b2d8 100644
--- a/lib/ChangeLog
+++ b/lib/ChangeLog
@@ -1,3 +1,11 @@
+2001-04-04  Bruno Haible  <haible@clisp.cons.org>
+
+	* linebreak.h (UC_BREAK_UNDEFINED): New enum value.
+	(u8_width_linebreaks, u16_width_linebreaks, u32_width_linebreaks,
+	mbs_width_linebreaks): Add overrides argument.
+	* linebreak.c (u8_width_linebreaks, u16_width_linebreaks,
+	u32_width_linebreaks, mbs_width_linebreaks): Add overrides argument.
+
 2001-03-29  Bruno Haible  <haible@clisp.cons.org>
 
 	* gettext-0.10.36 released.
diff --git a/lib/linebreak.c b/lib/linebreak.c
index 5452d1c17..599e9ecbb 100644
--- a/lib/linebreak.c
+++ b/lib/linebreak.c
@@ -1151,16 +1151,17 @@ u32_possible_linebreaks (s, n, encoding, p)
 #endif
 
 
-/* Choose the best line breaks, assuming the uc_width function.  Return the
-   column after the end of the string.  */
+/* Choose the best line breaks, assuming the uc_width function.
+   Return the column after the end of the string.  */
 
 int
-u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
+u8_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p)
      const unsigned char *s;
      size_t n;
      int width;
      int start_column;
      int at_end_columns;
+     const char *o;
      const char *encoding;
      char *p;
 {
@@ -1180,6 +1181,10 @@ u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
       unsigned int uc;
       int count = u8_mbtouc (&uc, s, s_end - s);
 
+      /* Respect the override.  */
+      if (o != NULL && *o != UC_BREAK_UNDEFINED)
+        *p = *o;
+
       if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
         {
           /* An atomic piece of text ends here.  */
@@ -1223,6 +1228,8 @@ u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
 
       s += count;
       p += count;
+      if (o != NULL)
+        o += count;
     }
 
   /* The last atomic piece of text ends here.  */
@@ -1239,12 +1246,13 @@ u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
 #ifdef unused
 
 int
-u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
+u16_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p)
      const unsigned short *s;
      size_t n;
      int width;
      int start_column;
      int at_end_columns;
+     const char *o;
      const char *encoding;
      char *p;
 {
@@ -1264,6 +1272,10 @@ u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
       unsigned int uc;
       int count = u16_mbtouc (&uc, s, s_end - s);
 
+      /* Respect the override.  */
+      if (o != NULL && *o != UC_BREAK_UNDEFINED)
+        *p = *o;
+
       if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
         {
           /* An atomic piece of text ends here.  */
@@ -1307,6 +1319,8 @@ u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
 
       s += count;
       p += count;
+      if (o != NULL)
+        o += count;
     }
 
   /* The last atomic piece of text ends here.  */
@@ -1321,12 +1335,13 @@ u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
 }
 
 int
-u32_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
+u32_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p)
      const unsigned int *s;
      size_t n;
      int width;
      int start_column;
      int at_end_columns;
+     const char *o;
      const char *encoding;
      char *p;
 {
@@ -1345,6 +1360,10 @@ u32_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
     {
       unsigned int uc = *s;
 
+      /* Respect the override.  */
+      if (o != NULL && *o != UC_BREAK_UNDEFINED)
+        *p = *o;
+
       if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
         {
           /* An atomic piece of text ends here.  */
@@ -1388,6 +1407,8 @@ u32_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
 
       s++;
       p++;
+      if (o != NULL)
+        o++;
     }
 
   /* The last atomic piece of text ends here.  */
@@ -1505,7 +1526,7 @@ main (argc, argv)
       char *breaks = malloc (length);
       int i;
 
-      u8_width_linebreaks ((unsigned char *) input, length, width, 0, 0, "UTF-8", breaks);
+      u8_width_linebreaks ((unsigned char *) input, length, width, 0, 0, NULL, "UTF-8", breaks);
 
       for (i = 0; i < length; i++)
         {
@@ -1670,7 +1691,7 @@ is_all_ascii (s, n)
 
 #endif /* C_CTYPE_ASCII */
 
-#ifdef unused
+#if defined unused || defined TEST2
 
 void
 mbs_possible_linebreaks (s, n, encoding, p)
@@ -1748,17 +1769,18 @@ mbs_possible_linebreaks (s, n, encoding, p)
 #endif
 
 int
-mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
+mbs_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p)
      const char *s;
      size_t n;
      int width;
      int start_column;
      int at_end_columns;
+     const char *o;
      const char *encoding;
      char *p;
 {
   if (is_utf8_encoding (encoding))
-    return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, encoding, p);
+    return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, o, encoding, p);
   else
     {
 #if HAVE_ICONV
@@ -1771,20 +1793,30 @@ mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
             {
               /* Convert the string to UTF-8 and build a translation table
                  from offsets into s to offsets into the translated string.  */
-              char *memory = malloc (n * sizeof (size_t) + m + m);
+              char *memory = malloc (n * sizeof (size_t) + m + m + (o != NULL ? m : 0));
               if (memory != NULL)
                 {
                   size_t *offtable = (size_t *) memory;
                   char *t = (char *) (offtable + n);
                   char *q = (char *) (t + m);
+                  char *o8 = (o != NULL ? (char *) (q + m) : NULL);
                   int res_column;
                   size_t i;
 
                   iconv_string_keeping_offsets (to_utf8, s, n, offtable, t, m);
 
+                  /* Translate the overrides to the UTF-8 string.  */
+                  if (o != NULL)
+                    {
+                      memset (o8, UC_BREAK_UNDEFINED, m);
+                      for (i = 0; i < n; i++)
+                        if (offtable[i] != (size_t)(-1))
+                          o8[offtable[i]] = o[i];
+                    }
+
                   /* Determine the line breaks of the UTF-8 string.  */
                   res_column =
-                    u8_width_linebreaks ((const unsigned char *) t, m, width, start_column, at_end_columns, encoding, q);
+                    u8_width_linebreaks ((const unsigned char *) t, m, width, start_column, at_end_columns, o8, encoding, q);
 
                   /* Translate the result back to the original string.  */
                   memset (p, UC_BREAK_PROHIBITED, n);
@@ -1805,7 +1837,7 @@ mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
       if (is_all_ascii (s, n))
 	{
 	  /* ASCII is a subset of UTF-8.  */
-	  return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, encoding, p);
+	  return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, o, encoding, p);
 	}
 #endif
       /* We have a non-ASCII string and cannot convert it.
@@ -1816,9 +1848,13 @@ mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
         const char *s_end = s + n;
         while (s < s_end)
           {
-            *p = (*s == '\n' ? UC_BREAK_MANDATORY : UC_BREAK_PROHIBITED);
+            *p = ((o != NULL && *o == UC_BREAK_MANDATORY) || *s == '\n'
+                  ? UC_BREAK_MANDATORY
+                  : UC_BREAK_PROHIBITED);
             s++;
             p++;
+            if (o != NULL)
+              o++;
           }
         /* We cannot compute widths in this case.  */
         return start_column;
@@ -1927,7 +1963,7 @@ main (argc, argv)
       char *breaks = malloc (length);
       int i;
 
-      mbs_width_linebreaks (input, length, width, 0, 0, locale_charset (), breaks);
+      mbs_width_linebreaks (input, length, width, 0, 0, NULL, locale_charset (), breaks);
 
       for (i = 0; i < length; i++)
         {
diff --git a/lib/linebreak.h b/lib/linebreak.h
index d0367eff1..5c7b7e732 100644
--- a/lib/linebreak.h
+++ b/lib/linebreak.h
@@ -51,6 +51,7 @@ extern int u32_width PARAMS ((const unsigned int *s, size_t n, const char *encod
 /* Line breaking.  */
 
 enum {
+  UC_BREAK_UNDEFINED,
   UC_BREAK_PROHIBITED,
   UC_BREAK_POSSIBLE,
   UC_BREAK_MANDATORY,
@@ -71,12 +72,15 @@ extern void u16_possible_linebreaks PARAMS ((const unsigned short *s, size_t n,
 extern void u32_possible_linebreaks PARAMS ((const unsigned int *s, size_t n, const char *encoding, char *p));
 extern void mbs_possible_linebreaks PARAMS ((const char *s, size_t n, const char *encoding, char *p));
 
-/* Choose the best line breaks, assuming the uc_width function.  Return the
-   column after the end of the string.  */
-extern int u8_width_linebreaks PARAMS ((const unsigned char *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p));
-extern int u16_width_linebreaks PARAMS ((const unsigned short *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p));
-extern int u32_width_linebreaks PARAMS ((const unsigned int *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p));
-extern int mbs_width_linebreaks PARAMS ((const char *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p));
+/* Choose the best line breaks, assuming the uc_width function.
+   Return the column after the end of the string.
+   o is an optional override; if o[i] != UC_BREAK_UNDEFINED, o[i] takes
+   precedence over p[i] as returned by the *_possible_linebreaks function.
+ */
+extern int u8_width_linebreaks PARAMS ((const unsigned char *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p));
+extern int u16_width_linebreaks PARAMS ((const unsigned short *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p));
+extern int u32_width_linebreaks PARAMS ((const unsigned int *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p));
+extern int mbs_width_linebreaks PARAMS ((const char *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p));
 
 
 #endif /* _LINEBREAK_H */
diff --git a/src/ChangeLog b/src/ChangeLog
index 8b1396977..ab228d00a 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2001-04-04  Bruno Haible  <haible@clisp.cons.org>
+
+	* write-po.c (wrap): Prohibit line breaks inside backslash escape
+	sequences.
+
 2001-03-29  Bruno Haible  <haible@clisp.cons.org>
 
 	* gettext-0.10.36 released.
diff --git a/src/write-po.c b/src/write-po.c
index 960ff4811..5e1459b16 100644
--- a/src/write-po.c
+++ b/src/write-po.c
@@ -214,9 +214,11 @@ wrap (fp, line_prefix, name, value, do_wrap, charset)
       const char *ep;
       size_t portion_len;
       char *portion;
+      char *overrides;
       char *linebreaks;
       char *pp;
-      int startcol, startcol_after_break, width, endcols;
+      char *op;
+      int startcol, startcol_after_break, width;
       size_t i;
 
       for (es = s; *es != '\0'; )
@@ -279,7 +281,9 @@ wrap (fp, line_prefix, name, value, do_wrap, charset)
 	    }
 	}
       portion = (char *) xmalloc (portion_len);
-      for (ep = s, pp = portion; ep < es; ep++)
+      overrides = (char *) xmalloc (portion_len);
+      memset (overrides, UC_BREAK_UNDEFINED, portion_len);
+      for (ep = s, pp = portion, op = overrides; ep < es; ep++)
 	{
 	  char c = *ep;
 	  const char *esc = strchr (escapes, c);
@@ -287,6 +291,8 @@ wrap (fp, line_prefix, name, value, do_wrap, charset)
 	    {
 	      *pp++ = '\\';
 	      *pp++ = c = escape_names[esc - escapes];
+	      op++;
+	      *op++ = UC_BREAK_PROHIBITED;
 	      /* We warn about any use of escape sequences beside
 		 '\n' and '\t'.  */
 	      if (c != 'n' && c != 't')
@@ -300,11 +306,17 @@ internationalized messages should not contain the `\\%c' escape sequence"),
 	      *pp++ = '0' + (((unsigned char) c >> 6) & 7);
 	      *pp++ = '0' + (((unsigned char) c >> 3) & 7);
 	      *pp++ = '0' + ((unsigned char) c & 7);
+	      op++;
+	      *op++ = UC_BREAK_PROHIBITED;
+	      *op++ = UC_BREAK_PROHIBITED;
+	      *op++ = UC_BREAK_PROHIBITED;
 	    }
 	  else if (c == '\\' || c == '"')
 	    {
 	      *pp++ = '\\';
 	      *pp++ = c;
+	      op++;
+	      *op++ = UC_BREAK_PROHIBITED;
 	    }
 	  else
 	    {
@@ -344,14 +356,22 @@ internationalized messages should not contain the `\\%c' escape sequence"),
 		  insize = inptr - ep;
 		  memcpy (pp, ep, insize);
 		  pp += insize;
+		  op += insize;
 		  ep += insize - 1;
 		}
 	      else
 #endif
-		*pp++ = c;
+		{
+		  *pp++ = c;
+		  op++;
+		}
 	    }
 	}
 
+      /* Don't break immediately before the "\n" at the end.  */
+      if (es > s && es[-1] == '\n')
+	overrides[portion_len - 2] = UC_BREAK_PROHIBITED;
+
       linebreaks = (char *) xmalloc (portion_len);
 
       /* Subsequent lines after a break are all indented.
@@ -389,17 +409,9 @@ internationalized messages should not contain the `\\%c' escape sequence"),
       /* Adjust for indentation of subsequent lines.  */
       startcol -= startcol_after_break;
 
-      /* Do line breaking on the portion.
-	 But don't break immediately before the "\n" at the end.  */
-      endcols = 0;
-      if (es > s && es[-1] == '\n')
-	{
-	  endcols = 2;
-	  linebreaks[portion_len - 2] = UC_BREAK_PROHIBITED;
-	  linebreaks[portion_len - 1] = UC_BREAK_PROHIBITED;
-	}
-      mbs_width_linebreaks (portion, portion_len - endcols, width,
-			    startcol, endcols, charset, linebreaks);
+      /* Do line breaking on the portion.  */
+      mbs_width_linebreaks (portion, portion_len, width, startcol, 0,
+			    overrides, charset, linebreaks);
 
       /* If this is the first line, and we are not using the indented
 	 style, and the line would wrap, then use an empty first line
@@ -455,6 +467,7 @@ internationalized messages should not contain the `\\%c' escape sequence"),
       fputs ("\"\n", fp);
 
       free (linebreaks);
+      free (overrides);
       free (portion);
 
       s = es;