Fix a bug: line breaks could occur inside escape sequences, leading to

author Bruno Haible <bruno@clisp.org>

Wed, 4 Apr 2001 13:41:11 +0000 (13:41 +0000)

committer Bruno Haible <bruno@clisp.org>

Wed, 4 Apr 2001 13:41:11 +0000 (13:41 +0000)
author Bruno Haible <bruno@clisp.org>
Wed, 4 Apr 2001 13:41:11 +0000 (13:41 +0000)
committer Bruno Haible <bruno@clisp.org>
Wed, 4 Apr 2001 13:41:11 +0000 (13:41 +0000)
diff --git a/lib/ChangeLog b/lib/ChangeLog

index 261dc74436529fb8c54c150019a40002d503076c..842f3b2d87ac0ee8fb3b07dbe86b473892de151b 100644 (file)
--- a/lib/ChangeLog
+++ b/lib/ChangeLog
@@ -1,3 +1,11 @@
+2001-04-04  Bruno Haible  <haible@clisp.cons.org>
+
+       * linebreak.h (UC_BREAK_UNDEFINED): New enum value.
+       (u8_width_linebreaks, u16_width_linebreaks, u32_width_linebreaks,
+       mbs_width_linebreaks): Add overrides argument.
+       * linebreak.c (u8_width_linebreaks, u16_width_linebreaks,
+       u32_width_linebreaks, mbs_width_linebreaks): Add overrides argument.
+
  2001-03-29  Bruno Haible  <haible@clisp.cons.org>
  
         * gettext-0.10.36 released.
diff --git a/lib/linebreak.c b/lib/linebreak.c

index 5452d1c17f7445d9315659e3e2e0f8a86b0a7660..599e9ecbbcdcf257af916c8c40926a1c4a8fc37d 100644 (file)
--- a/lib/linebreak.c
+++ b/lib/linebreak.c
@@ -1151,16 +1151,17 @@ u32_possible_linebreaks (s, n, encoding, p)
  #endif
  
  
-/* Choose the best line breaks, assuming the uc_width function.  Return the
-   column after the end of the string.  */
+/* Choose the best line breaks, assuming the uc_width function.
+   Return the column after the end of the string.  */
  
  int
-u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
+u8_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p)
       const unsigned char *s;
       size_t n;
       int width;
       int start_column;
       int at_end_columns;
+     const char *o;
       const char *encoding;
       char *p;
  {
@@ -1180,6 +1181,10 @@ u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
        unsigned int uc;
        int count = u8_mbtouc (&uc, s, s_end - s);
  
+      /* Respect the override.  */
+      if (o != NULL && *o != UC_BREAK_UNDEFINED)
+        *p = *o;
+
        if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
          {
            /* An atomic piece of text ends here.  */
@@ -1223,6 +1228,8 @@ u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
  
        s += count;
        p += count;
+      if (o != NULL)
+        o += count;
      }
  
    /* The last atomic piece of text ends here.  */
@@ -1239,12 +1246,13 @@ u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
  #ifdef unused
  
  int
-u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
+u16_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p)
       const unsigned short *s;
       size_t n;
       int width;
       int start_column;
       int at_end_columns;
+     const char *o;
       const char *encoding;
       char *p;
  {
@@ -1264,6 +1272,10 @@ u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
        unsigned int uc;
        int count = u16_mbtouc (&uc, s, s_end - s);
  
+      /* Respect the override.  */
+      if (o != NULL && *o != UC_BREAK_UNDEFINED)
+        *p = *o;
+
        if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
          {
            /* An atomic piece of text ends here.  */
@@ -1307,6 +1319,8 @@ u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
  
        s += count;
        p += count;
+      if (o != NULL)
+        o += count;
      }
  
    /* The last atomic piece of text ends here.  */
@@ -1321,12 +1335,13 @@ u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
  }
  
  int
-u32_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
+u32_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p)
       const unsigned int *s;
       size_t n;
       int width;
       int start_column;
       int at_end_columns;
+     const char *o;
       const char *encoding;
       char *p;
  {
@@ -1345,6 +1360,10 @@ u32_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
      {
        unsigned int uc = *s;
  
+      /* Respect the override.  */
+      if (o != NULL && *o != UC_BREAK_UNDEFINED)
+        *p = *o;
+
        if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY)
          {
            /* An atomic piece of text ends here.  */
@@ -1388,6 +1407,8 @@ u32_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
  
        s++;
        p++;
+      if (o != NULL)
+        o++;
      }
  
    /* The last atomic piece of text ends here.  */
@@ -1505,7 +1526,7 @@ main (argc, argv)
        char *breaks = malloc (length);
        int i;
  
-      u8_width_linebreaks ((unsigned char *) input, length, width, 0, 0, "UTF-8", breaks);
+      u8_width_linebreaks ((unsigned char *) input, length, width, 0, 0, NULL, "UTF-8", breaks);
  
        for (i = 0; i < length; i++)
          {
@@ -1670,7 +1691,7 @@ is_all_ascii (s, n)
  
  #endif /* C_CTYPE_ASCII */
  
-#ifdef unused
+#if defined unused || defined TEST2
  
  void
  mbs_possible_linebreaks (s, n, encoding, p)
@@ -1748,17 +1769,18 @@ mbs_possible_linebreaks (s, n, encoding, p)
  #endif
  
  int
-mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
+mbs_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p)
       const char *s;
       size_t n;
       int width;
       int start_column;
       int at_end_columns;
+     const char *o;
       const char *encoding;
       char *p;
  {
    if (is_utf8_encoding (encoding))
-    return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, encoding, p);
+    return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, o, encoding, p);
    else
      {
  #if HAVE_ICONV
@@ -1771,20 +1793,30 @@ mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
              {
                /* Convert the string to UTF-8 and build a translation table
                   from offsets into s to offsets into the translated string.  */
-              char *memory = malloc (n * sizeof (size_t) + m + m);
+              char *memory = malloc (n * sizeof (size_t) + m + m + (o != NULL ? m : 0));
                if (memory != NULL)
                  {
                    size_t *offtable = (size_t *) memory;
                    char *t = (char *) (offtable + n);
                    char *q = (char *) (t + m);
+                  char *o8 = (o != NULL ? (char *) (q + m) : NULL);
                    int res_column;
                    size_t i;
  
                    iconv_string_keeping_offsets (to_utf8, s, n, offtable, t, m);
  
+                  /* Translate the overrides to the UTF-8 string.  */
+                  if (o != NULL)
+                    {
+                      memset (o8, UC_BREAK_UNDEFINED, m);
+                      for (i = 0; i < n; i++)
+                        if (offtable[i] != (size_t)(-1))
+                          o8[offtable[i]] = o[i];
+                    }
+
                    /* Determine the line breaks of the UTF-8 string.  */
                    res_column =
-                    u8_width_linebreaks ((const unsigned char *) t, m, width, start_column, at_end_columns, encoding, q);
+                    u8_width_linebreaks ((const unsigned char *) t, m, width, start_column, at_end_columns, o8, encoding, q);
  
                    /* Translate the result back to the original string.  */
                    memset (p, UC_BREAK_PROHIBITED, n);
@@ -1805,7 +1837,7 @@ mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
        if (is_all_ascii (s, n))
         {
           /* ASCII is a subset of UTF-8.  */
-         return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, encoding, p);
+         return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, o, encoding, p);
         }
  #endif
        /* We have a non-ASCII string and cannot convert it.
@@ -1816,9 +1848,13 @@ mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p)
          const char *s_end = s + n;
          while (s < s_end)
            {
-            *p = (*s == '\n' ? UC_BREAK_MANDATORY : UC_BREAK_PROHIBITED);
+            *p = ((o != NULL && *o == UC_BREAK_MANDATORY) || *s == '\n'
+                  ? UC_BREAK_MANDATORY
+                  : UC_BREAK_PROHIBITED);
              s++;
              p++;
+            if (o != NULL)
+              o++;
            }
          /* We cannot compute widths in this case.  */
          return start_column;
@@ -1927,7 +1963,7 @@ main (argc, argv)
        char *breaks = malloc (length);
        int i;
  
-      mbs_width_linebreaks (input, length, width, 0, 0, locale_charset (), breaks);
+      mbs_width_linebreaks (input, length, width, 0, 0, NULL, locale_charset (), breaks);
  
        for (i = 0; i < length; i++)
          {
diff --git a/lib/linebreak.h b/lib/linebreak.h

index d0367eff1bdb0b9ad70f25be3a801faa1679a1dd..5c7b7e73267d87f590f9b2adb0067df3d212b96a 100644 (file)
--- a/lib/linebreak.h
+++ b/lib/linebreak.h
@@ -51,6 +51,7 @@ extern int u32_width PARAMS ((const unsigned int *s, size_t n, const char *encod
  /* Line breaking.  */
  
  enum {
+  UC_BREAK_UNDEFINED,
    UC_BREAK_PROHIBITED,
    UC_BREAK_POSSIBLE,
    UC_BREAK_MANDATORY,
@@ -71,12 +72,15 @@ extern void u16_possible_linebreaks PARAMS ((const unsigned short *s, size_t n,
  extern void u32_possible_linebreaks PARAMS ((const unsigned int *s, size_t n, const char *encoding, char *p));
  extern void mbs_possible_linebreaks PARAMS ((const char *s, size_t n, const char *encoding, char *p));
  
-/* Choose the best line breaks, assuming the uc_width function.  Return the
-   column after the end of the string.  */
-extern int u8_width_linebreaks PARAMS ((const unsigned char *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p));
-extern int u16_width_linebreaks PARAMS ((const unsigned short *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p));
-extern int u32_width_linebreaks PARAMS ((const unsigned int *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p));
-extern int mbs_width_linebreaks PARAMS ((const char *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p));
+/* Choose the best line breaks, assuming the uc_width function.
+   Return the column after the end of the string.
+   o is an optional override; if o[i] != UC_BREAK_UNDEFINED, o[i] takes
+   precedence over p[i] as returned by the *_possible_linebreaks function.
+ */
+extern int u8_width_linebreaks PARAMS ((const unsigned char *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p));
+extern int u16_width_linebreaks PARAMS ((const unsigned short *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p));
+extern int u32_width_linebreaks PARAMS ((const unsigned int *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p));
+extern int mbs_width_linebreaks PARAMS ((const char *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p));
  
  
  #endif /* _LINEBREAK_H */
diff --git a/src/ChangeLog b/src/ChangeLog

index 8b13969770acd5147d40773ce27baa46fc45b180..ab228d00aa20098d254a055ad1387e562685f0da 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,8 @@
+2001-04-04  Bruno Haible  <haible@clisp.cons.org>
+
+       * write-po.c (wrap): Prohibit line breaks inside backslash escape
+       sequences.
+
  2001-03-29  Bruno Haible  <haible@clisp.cons.org>
  
         * gettext-0.10.36 released.
diff --git a/src/write-po.c b/src/write-po.c

index 960ff48111e53d9ef08e28cc0bf0938e56af7020..5e1459b164df99bd238aa64d13fa739088851fde 100644 (file)
--- a/src/write-po.c
+++ b/src/write-po.c
@@ -214,9 +214,11 @@ wrap (fp, line_prefix, name, value, do_wrap, charset)
        const char *ep;
        size_t portion_len;
        char *portion;
+      char *overrides;
        char *linebreaks;
        char *pp;
-      int startcol, startcol_after_break, width, endcols;
+      char *op;
+      int startcol, startcol_after_break, width;
        size_t i;
  
        for (es = s; *es != '\0'; )
@@ -279,7 +281,9 @@ wrap (fp, line_prefix, name, value, do_wrap, charset)
             }
         }
        portion = (char *) xmalloc (portion_len);
-      for (ep = s, pp = portion; ep < es; ep++)
+      overrides = (char *) xmalloc (portion_len);
+      memset (overrides, UC_BREAK_UNDEFINED, portion_len);
+      for (ep = s, pp = portion, op = overrides; ep < es; ep++)
         {
           char c = *ep;
           const char *esc = strchr (escapes, c);
@@ -287,6 +291,8 @@ wrap (fp, line_prefix, name, value, do_wrap, charset)
             {
               *pp++ = '\\';
               *pp++ = c = escape_names[esc - escapes];
+             op++;
+             *op++ = UC_BREAK_PROHIBITED;
               /* We warn about any use of escape sequences beside
                  '\n' and '\t'.  */
               if (c != 'n' && c != 't')
@@ -300,11 +306,17 @@ internationalized messages should not contain the `\\%c' escape sequence"),
               *pp++ = '0' + (((unsigned char) c >> 6) & 7);
               *pp++ = '0' + (((unsigned char) c >> 3) & 7);
               *pp++ = '0' + ((unsigned char) c & 7);
+             op++;
+             *op++ = UC_BREAK_PROHIBITED;
+             *op++ = UC_BREAK_PROHIBITED;
+             *op++ = UC_BREAK_PROHIBITED;
             }
           else if (c == '\\' || c == '"')
             {
               *pp++ = '\\';
               *pp++ = c;
+             op++;
+             *op++ = UC_BREAK_PROHIBITED;
             }
           else
             {
@@ -344,14 +356,22 @@ internationalized messages should not contain the `\\%c' escape sequence"),
                   insize = inptr - ep;
                   memcpy (pp, ep, insize);
                   pp += insize;
+                 op += insize;
                   ep += insize - 1;
                 }
               else
  #endif
-               *pp++ = c;
+               {
+                 *pp++ = c;
+                 op++;
+               }
             }
         }
  
+      /* Don't break immediately before the "\n" at the end.  */
+      if (es > s && es[-1] == '\n')
+       overrides[portion_len - 2] = UC_BREAK_PROHIBITED;
+
        linebreaks = (char *) xmalloc (portion_len);
  
        /* Subsequent lines after a break are all indented.
@@ -389,17 +409,9 @@ internationalized messages should not contain the `\\%c' escape sequence"),
        /* Adjust for indentation of subsequent lines.  */
        startcol -= startcol_after_break;
  
-      /* Do line breaking on the portion.
-        But don't break immediately before the "\n" at the end.  */
-      endcols = 0;
-      if (es > s && es[-1] == '\n')
-       {
-         endcols = 2;
-         linebreaks[portion_len - 2] = UC_BREAK_PROHIBITED;
-         linebreaks[portion_len - 1] = UC_BREAK_PROHIBITED;
-       }
-      mbs_width_linebreaks (portion, portion_len - endcols, width,
-                           startcol, endcols, charset, linebreaks);
+      /* Do line breaking on the portion.  */
+      mbs_width_linebreaks (portion, portion_len, width, startcol, 0,
+                           overrides, charset, linebreaks);
  
        /* If this is the first line, and we are not using the indented
          style, and the line would wrap, then use an empty first line
@@ -455,6 +467,7 @@ internationalized messages should not contain the `\\%c' escape sequence"),
        fputs ("\"\n", fp);
  
        free (linebreaks);
+      free (overrides);
        free (portion);
  
        s = es;
author	Bruno Haible <bruno@clisp.org>
	Wed, 4 Apr 2001 13:41:11 +0000 (13:41 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Wed, 4 Apr 2001 13:41:11 +0000 (13:41 +0000)
lib/ChangeLog		patch \| blob \| blame \| history
lib/linebreak.c		patch \| blob \| blame \| history
lib/linebreak.h		patch \| blob \| blame \| history
src/ChangeLog		patch \| blob \| blame \| history
src/write-po.c		patch \| blob \| blame \| history