From: Bruno Haible Date: Wed, 4 Apr 2001 13:41:11 +0000 (+0000) Subject: Fix a bug: line breaks could occur inside escape sequences, leading to X-Git-Tag: v0.10.37~52 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2c2850b6f0e73a4f2be39a20de1fcb6f49678115;p=thirdparty%2Fgettext.git Fix a bug: line breaks could occur inside escape sequences, leading to invalid PO files. --- diff --git a/lib/ChangeLog b/lib/ChangeLog index 261dc7443..842f3b2d8 100644 --- a/lib/ChangeLog +++ b/lib/ChangeLog @@ -1,3 +1,11 @@ +2001-04-04 Bruno Haible + + * linebreak.h (UC_BREAK_UNDEFINED): New enum value. + (u8_width_linebreaks, u16_width_linebreaks, u32_width_linebreaks, + mbs_width_linebreaks): Add overrides argument. + * linebreak.c (u8_width_linebreaks, u16_width_linebreaks, + u32_width_linebreaks, mbs_width_linebreaks): Add overrides argument. + 2001-03-29 Bruno Haible * gettext-0.10.36 released. diff --git a/lib/linebreak.c b/lib/linebreak.c index 5452d1c17..599e9ecbb 100644 --- a/lib/linebreak.c +++ b/lib/linebreak.c @@ -1151,16 +1151,17 @@ u32_possible_linebreaks (s, n, encoding, p) #endif -/* Choose the best line breaks, assuming the uc_width function. Return the - column after the end of the string. */ +/* Choose the best line breaks, assuming the uc_width function. + Return the column after the end of the string. */ int -u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) +u8_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p) const unsigned char *s; size_t n; int width; int start_column; int at_end_columns; + const char *o; const char *encoding; char *p; { @@ -1180,6 +1181,10 @@ u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) unsigned int uc; int count = u8_mbtouc (&uc, s, s_end - s); + /* Respect the override. */ + if (o != NULL && *o != UC_BREAK_UNDEFINED) + *p = *o; + if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY) { /* An atomic piece of text ends here. */ @@ -1223,6 +1228,8 @@ u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) s += count; p += count; + if (o != NULL) + o += count; } /* The last atomic piece of text ends here. */ @@ -1239,12 +1246,13 @@ u8_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) #ifdef unused int -u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) +u16_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p) const unsigned short *s; size_t n; int width; int start_column; int at_end_columns; + const char *o; const char *encoding; char *p; { @@ -1264,6 +1272,10 @@ u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) unsigned int uc; int count = u16_mbtouc (&uc, s, s_end - s); + /* Respect the override. */ + if (o != NULL && *o != UC_BREAK_UNDEFINED) + *p = *o; + if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY) { /* An atomic piece of text ends here. */ @@ -1307,6 +1319,8 @@ u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) s += count; p += count; + if (o != NULL) + o += count; } /* The last atomic piece of text ends here. */ @@ -1321,12 +1335,13 @@ u16_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) } int -u32_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) +u32_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p) const unsigned int *s; size_t n; int width; int start_column; int at_end_columns; + const char *o; const char *encoding; char *p; { @@ -1345,6 +1360,10 @@ u32_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) { unsigned int uc = *s; + /* Respect the override. */ + if (o != NULL && *o != UC_BREAK_UNDEFINED) + *p = *o; + if (*p == UC_BREAK_POSSIBLE || *p == UC_BREAK_MANDATORY) { /* An atomic piece of text ends here. */ @@ -1388,6 +1407,8 @@ u32_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) s++; p++; + if (o != NULL) + o++; } /* The last atomic piece of text ends here. */ @@ -1505,7 +1526,7 @@ main (argc, argv) char *breaks = malloc (length); int i; - u8_width_linebreaks ((unsigned char *) input, length, width, 0, 0, "UTF-8", breaks); + u8_width_linebreaks ((unsigned char *) input, length, width, 0, 0, NULL, "UTF-8", breaks); for (i = 0; i < length; i++) { @@ -1670,7 +1691,7 @@ is_all_ascii (s, n) #endif /* C_CTYPE_ASCII */ -#ifdef unused +#if defined unused || defined TEST2 void mbs_possible_linebreaks (s, n, encoding, p) @@ -1748,17 +1769,18 @@ mbs_possible_linebreaks (s, n, encoding, p) #endif int -mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) +mbs_width_linebreaks (s, n, width, start_column, at_end_columns, o, encoding, p) const char *s; size_t n; int width; int start_column; int at_end_columns; + const char *o; const char *encoding; char *p; { if (is_utf8_encoding (encoding)) - return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, encoding, p); + return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, o, encoding, p); else { #if HAVE_ICONV @@ -1771,20 +1793,30 @@ mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) { /* Convert the string to UTF-8 and build a translation table from offsets into s to offsets into the translated string. */ - char *memory = malloc (n * sizeof (size_t) + m + m); + char *memory = malloc (n * sizeof (size_t) + m + m + (o != NULL ? m : 0)); if (memory != NULL) { size_t *offtable = (size_t *) memory; char *t = (char *) (offtable + n); char *q = (char *) (t + m); + char *o8 = (o != NULL ? (char *) (q + m) : NULL); int res_column; size_t i; iconv_string_keeping_offsets (to_utf8, s, n, offtable, t, m); + /* Translate the overrides to the UTF-8 string. */ + if (o != NULL) + { + memset (o8, UC_BREAK_UNDEFINED, m); + for (i = 0; i < n; i++) + if (offtable[i] != (size_t)(-1)) + o8[offtable[i]] = o[i]; + } + /* Determine the line breaks of the UTF-8 string. */ res_column = - u8_width_linebreaks ((const unsigned char *) t, m, width, start_column, at_end_columns, encoding, q); + u8_width_linebreaks ((const unsigned char *) t, m, width, start_column, at_end_columns, o8, encoding, q); /* Translate the result back to the original string. */ memset (p, UC_BREAK_PROHIBITED, n); @@ -1805,7 +1837,7 @@ mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) if (is_all_ascii (s, n)) { /* ASCII is a subset of UTF-8. */ - return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, encoding, p); + return u8_width_linebreaks ((const unsigned char *) s, n, width, start_column, at_end_columns, o, encoding, p); } #endif /* We have a non-ASCII string and cannot convert it. @@ -1816,9 +1848,13 @@ mbs_width_linebreaks (s, n, width, start_column, at_end_columns, encoding, p) const char *s_end = s + n; while (s < s_end) { - *p = (*s == '\n' ? UC_BREAK_MANDATORY : UC_BREAK_PROHIBITED); + *p = ((o != NULL && *o == UC_BREAK_MANDATORY) || *s == '\n' + ? UC_BREAK_MANDATORY + : UC_BREAK_PROHIBITED); s++; p++; + if (o != NULL) + o++; } /* We cannot compute widths in this case. */ return start_column; @@ -1927,7 +1963,7 @@ main (argc, argv) char *breaks = malloc (length); int i; - mbs_width_linebreaks (input, length, width, 0, 0, locale_charset (), breaks); + mbs_width_linebreaks (input, length, width, 0, 0, NULL, locale_charset (), breaks); for (i = 0; i < length; i++) { diff --git a/lib/linebreak.h b/lib/linebreak.h index d0367eff1..5c7b7e732 100644 --- a/lib/linebreak.h +++ b/lib/linebreak.h @@ -51,6 +51,7 @@ extern int u32_width PARAMS ((const unsigned int *s, size_t n, const char *encod /* Line breaking. */ enum { + UC_BREAK_UNDEFINED, UC_BREAK_PROHIBITED, UC_BREAK_POSSIBLE, UC_BREAK_MANDATORY, @@ -71,12 +72,15 @@ extern void u16_possible_linebreaks PARAMS ((const unsigned short *s, size_t n, extern void u32_possible_linebreaks PARAMS ((const unsigned int *s, size_t n, const char *encoding, char *p)); extern void mbs_possible_linebreaks PARAMS ((const char *s, size_t n, const char *encoding, char *p)); -/* Choose the best line breaks, assuming the uc_width function. Return the - column after the end of the string. */ -extern int u8_width_linebreaks PARAMS ((const unsigned char *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p)); -extern int u16_width_linebreaks PARAMS ((const unsigned short *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p)); -extern int u32_width_linebreaks PARAMS ((const unsigned int *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p)); -extern int mbs_width_linebreaks PARAMS ((const char *s, size_t n, int width, int start_column, int at_end_columns, const char *encoding, char *p)); +/* Choose the best line breaks, assuming the uc_width function. + Return the column after the end of the string. + o is an optional override; if o[i] != UC_BREAK_UNDEFINED, o[i] takes + precedence over p[i] as returned by the *_possible_linebreaks function. + */ +extern int u8_width_linebreaks PARAMS ((const unsigned char *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p)); +extern int u16_width_linebreaks PARAMS ((const unsigned short *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p)); +extern int u32_width_linebreaks PARAMS ((const unsigned int *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p)); +extern int mbs_width_linebreaks PARAMS ((const char *s, size_t n, int width, int start_column, int at_end_columns, const char *o, const char *encoding, char *p)); #endif /* _LINEBREAK_H */ diff --git a/src/ChangeLog b/src/ChangeLog index 8b1396977..ab228d00a 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,8 @@ +2001-04-04 Bruno Haible + + * write-po.c (wrap): Prohibit line breaks inside backslash escape + sequences. + 2001-03-29 Bruno Haible * gettext-0.10.36 released. diff --git a/src/write-po.c b/src/write-po.c index 960ff4811..5e1459b16 100644 --- a/src/write-po.c +++ b/src/write-po.c @@ -214,9 +214,11 @@ wrap (fp, line_prefix, name, value, do_wrap, charset) const char *ep; size_t portion_len; char *portion; + char *overrides; char *linebreaks; char *pp; - int startcol, startcol_after_break, width, endcols; + char *op; + int startcol, startcol_after_break, width; size_t i; for (es = s; *es != '\0'; ) @@ -279,7 +281,9 @@ wrap (fp, line_prefix, name, value, do_wrap, charset) } } portion = (char *) xmalloc (portion_len); - for (ep = s, pp = portion; ep < es; ep++) + overrides = (char *) xmalloc (portion_len); + memset (overrides, UC_BREAK_UNDEFINED, portion_len); + for (ep = s, pp = portion, op = overrides; ep < es; ep++) { char c = *ep; const char *esc = strchr (escapes, c); @@ -287,6 +291,8 @@ wrap (fp, line_prefix, name, value, do_wrap, charset) { *pp++ = '\\'; *pp++ = c = escape_names[esc - escapes]; + op++; + *op++ = UC_BREAK_PROHIBITED; /* We warn about any use of escape sequences beside '\n' and '\t'. */ if (c != 'n' && c != 't') @@ -300,11 +306,17 @@ internationalized messages should not contain the `\\%c' escape sequence"), *pp++ = '0' + (((unsigned char) c >> 6) & 7); *pp++ = '0' + (((unsigned char) c >> 3) & 7); *pp++ = '0' + ((unsigned char) c & 7); + op++; + *op++ = UC_BREAK_PROHIBITED; + *op++ = UC_BREAK_PROHIBITED; + *op++ = UC_BREAK_PROHIBITED; } else if (c == '\\' || c == '"') { *pp++ = '\\'; *pp++ = c; + op++; + *op++ = UC_BREAK_PROHIBITED; } else { @@ -344,14 +356,22 @@ internationalized messages should not contain the `\\%c' escape sequence"), insize = inptr - ep; memcpy (pp, ep, insize); pp += insize; + op += insize; ep += insize - 1; } else #endif - *pp++ = c; + { + *pp++ = c; + op++; + } } } + /* Don't break immediately before the "\n" at the end. */ + if (es > s && es[-1] == '\n') + overrides[portion_len - 2] = UC_BREAK_PROHIBITED; + linebreaks = (char *) xmalloc (portion_len); /* Subsequent lines after a break are all indented. @@ -389,17 +409,9 @@ internationalized messages should not contain the `\\%c' escape sequence"), /* Adjust for indentation of subsequent lines. */ startcol -= startcol_after_break; - /* Do line breaking on the portion. - But don't break immediately before the "\n" at the end. */ - endcols = 0; - if (es > s && es[-1] == '\n') - { - endcols = 2; - linebreaks[portion_len - 2] = UC_BREAK_PROHIBITED; - linebreaks[portion_len - 1] = UC_BREAK_PROHIBITED; - } - mbs_width_linebreaks (portion, portion_len - endcols, width, - startcol, endcols, charset, linebreaks); + /* Do line breaking on the portion. */ + mbs_width_linebreaks (portion, portion_len, width, startcol, 0, + overrides, charset, linebreaks); /* If this is the first line, and we are not using the indented style, and the line would wrap, then use an empty first line @@ -455,6 +467,7 @@ internationalized messages should not contain the `\\%c' escape sequence"), fputs ("\"\n", fp); free (linebreaks); + free (overrides); free (portion); s = es;