From: Bruno Haible Date: Fri, 9 Mar 2001 16:54:01 +0000 (+0000) Subject: Fix output of strings in CJK encodings. X-Git-Tag: v0.10.36~110 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e44d2fe3a57c11e1087b6ea3ccf212d43f8d6b12;p=thirdparty%2Fgettext.git Fix output of strings in CJK encodings. --- diff --git a/src/ChangeLog b/src/ChangeLog index df62f1d36..06e84439e 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,10 @@ +2001-03-09 Bruno Haible + + Fix output of strings in CJK encodings. + * write-po.c: Include iconv.h. + (wrap): While preparing a line, use iconv to avoid treating the second + byte of a multi-byte character as an ASCII character. + 2001-03-04 Bruno Haible * msgcomm.c (main): Use IS_ABSOLUTE_PATH and concatenated_pathname. diff --git a/src/write-po.c b/src/write-po.c index 28eb1fbec..960ff4811 100644 --- a/src/write-po.c +++ b/src/write-po.c @@ -30,6 +30,10 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ # include #endif +#if HAVE_ICONV +#include +#endif + #include "write-po.h" #include "c-ctype.h" #include "linebreak.h" @@ -190,6 +194,9 @@ wrap (fp, line_prefix, name, value, do_wrap, charset) { const char *s; int first_line; +#if HAVE_ICONV + iconv_t conv = iconv_open ("UTF-8", charset); +#endif /* Loop over the '\n' delimited portions of value. */ s = value; @@ -225,8 +232,51 @@ wrap (fp, line_prefix, name, value, do_wrap, charset) portion_len += 2; else if (escape && !c_isprint ((unsigned char) c)) portion_len += 4; + else if (c == '\\' || c == '"') + portion_len += 2; else - portion_len += 1 + (c == '\\' || c == '"'); + { +#if HAVE_ICONV + if (conv != (iconv_t)(-1)) + { + /* Skip over a complete multi-byte character. Don't + interpret the second byte of a multi-byte character as + ASCII. This is needed for the BIG5, BIG5HKSCS, GBK, + GB18030, SJIS, JOHAB encodings. */ + char scratchbuf[64]; + const char *inptr = ep; + size_t insize; + char *outptr = &scratchbuf[0]; + size_t outsize = sizeof (scratchbuf); + size_t res; + + res = (size_t)(-1); + for (insize = 1; inptr + insize <= es; insize++) + { + res = iconv (conv, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + if (!(res == (size_t)(-1) && errno == EINVAL)) + break; + } + if (res == (size_t)(-1)) + { + if (errno == EILSEQ) + { + error (0, 0, _("invalid multibyte sequence")); + continue; + } + else + abort (); + } + insize = inptr - ep; + portion_len += insize; + ep += insize - 1; + } + else +#endif + portion_len += 1; + } } portion = (char *) xmalloc (portion_len); for (ep = s, pp = portion; ep < es; ep++) @@ -251,12 +301,55 @@ internationalized messages should not contain the `\\%c' escape sequence"), *pp++ = '0' + (((unsigned char) c >> 3) & 7); *pp++ = '0' + ((unsigned char) c & 7); } - else + else if (c == '\\' || c == '"') { - if (c == '\\' || c == '"') - *pp++ = '\\'; + *pp++ = '\\'; *pp++ = c; } + else + { +#if HAVE_ICONV + if (conv != (iconv_t)(-1)) + { + /* Copy a complete multi-byte character. Don't + interpret the second byte of a multi-byte character as + ASCII. This is needed for the BIG5, BIG5HKSCS, GBK, + GB18030, SJIS, JOHAB encodings. */ + char scratchbuf[64]; + const char *inptr = ep; + size_t insize; + char *outptr = &scratchbuf[0]; + size_t outsize = sizeof (scratchbuf); + size_t res; + + res = (size_t)(-1); + for (insize = 1; inptr + insize <= es; insize++) + { + res = iconv (conv, + (ICONV_CONST char **) &inptr, &insize, + &outptr, &outsize); + if (!(res == (size_t)(-1) && errno == EINVAL)) + break; + } + if (res == (size_t)(-1)) + { + if (errno == EILSEQ) + { + error (0, 0, _("invalid multibyte sequence")); + continue; + } + else + abort (); + } + insize = inptr - ep; + memcpy (pp, ep, insize); + pp += insize; + ep += insize - 1; + } + else +#endif + *pp++ = c; + } } linebreaks = (char *) xmalloc (portion_len); @@ -367,6 +460,11 @@ internationalized messages should not contain the `\\%c' escape sequence"), s = es; } while (*s); + +#if HAVE_ICONV + if (conv != (iconv_t)(-1)) + iconv_close (conv); +#endif }