]> git.ipfire.org Git - thirdparty/gettext.git/commitdiff
Fix output of strings in CJK encodings.
authorBruno Haible <bruno@clisp.org>
Fri, 9 Mar 2001 16:54:01 +0000 (16:54 +0000)
committerBruno Haible <bruno@clisp.org>
Fri, 9 Mar 2001 16:54:01 +0000 (16:54 +0000)
src/ChangeLog
src/write-po.c

index df62f1d3632b188f05f3f1ebcb5c1fedd4de17f2..06e84439e63c152cda2915397d4c47a485dabb32 100644 (file)
@@ -1,3 +1,10 @@
+2001-03-09  Bruno Haible  <haible@clisp.cons.org>
+
+       Fix output of strings in CJK encodings.
+       * write-po.c: Include iconv.h.
+       (wrap): While preparing a line, use iconv to avoid treating the second
+       byte of a multi-byte character as an ASCII character.
+
 2001-03-04  Bruno Haible  <haible@clisp.cons.org>
 
        * msgcomm.c (main): Use IS_ABSOLUTE_PATH and concatenated_pathname.
index 28eb1fbec3a6f3eed8a2162343220f1f81ebde40..960ff48111e53d9ef08e28cc0bf0938e56af7020 100644 (file)
@@ -30,6 +30,10 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
 # include <limits.h>
 #endif
 
+#if HAVE_ICONV
+#include <iconv.h>
+#endif
+
 #include "write-po.h"
 #include "c-ctype.h"
 #include "linebreak.h"
@@ -190,6 +194,9 @@ wrap (fp, line_prefix, name, value, do_wrap, charset)
 {
   const char *s;
   int first_line;
+#if HAVE_ICONV
+  iconv_t conv = iconv_open ("UTF-8", charset);
+#endif
 
   /* Loop over the '\n' delimited portions of value.  */
   s = value;
@@ -225,8 +232,51 @@ wrap (fp, line_prefix, name, value, do_wrap, charset)
            portion_len += 2;
          else if (escape && !c_isprint ((unsigned char) c))
            portion_len += 4;
+         else if (c == '\\' || c == '"')
+           portion_len += 2;
          else
-           portion_len += 1 + (c == '\\' || c == '"');
+           {
+#if HAVE_ICONV
+             if (conv != (iconv_t)(-1))
+               {
+                 /* Skip over a complete multi-byte character.  Don't
+                    interpret the second byte of a multi-byte character as
+                    ASCII.  This is needed for the BIG5, BIG5HKSCS, GBK,
+                    GB18030, SJIS, JOHAB encodings.  */
+                 char scratchbuf[64];
+                 const char *inptr = ep;
+                 size_t insize;
+                 char *outptr = &scratchbuf[0];
+                 size_t outsize = sizeof (scratchbuf);
+                 size_t res;
+
+                 res = (size_t)(-1);
+                 for (insize = 1; inptr + insize <= es; insize++)
+                   {
+                     res = iconv (conv,
+                                  (ICONV_CONST char **) &inptr, &insize,
+                                  &outptr, &outsize);
+                     if (!(res == (size_t)(-1) && errno == EINVAL))
+                       break;
+                   }
+                 if (res == (size_t)(-1))
+                   {
+                     if (errno == EILSEQ)
+                       {
+                         error (0, 0, _("invalid multibyte sequence"));
+                         continue;
+                       }
+                     else
+                       abort ();
+                   }
+                 insize = inptr - ep;
+                 portion_len += insize;
+                 ep += insize - 1;
+               }
+             else
+#endif
+               portion_len += 1;
+           }
        }
       portion = (char *) xmalloc (portion_len);
       for (ep = s, pp = portion; ep < es; ep++)
@@ -251,12 +301,55 @@ internationalized messages should not contain the `\\%c' escape sequence"),
              *pp++ = '0' + (((unsigned char) c >> 3) & 7);
              *pp++ = '0' + ((unsigned char) c & 7);
            }
-         else
+         else if (c == '\\' || c == '"')
            {
-             if (c == '\\' || c == '"')
-               *pp++ = '\\';
+             *pp++ = '\\';
              *pp++ = c;
            }
+         else
+           {
+#if HAVE_ICONV
+             if (conv != (iconv_t)(-1))
+               {
+                 /* Copy a complete multi-byte character.  Don't
+                    interpret the second byte of a multi-byte character as
+                    ASCII.  This is needed for the BIG5, BIG5HKSCS, GBK,
+                    GB18030, SJIS, JOHAB encodings.  */
+                 char scratchbuf[64];
+                 const char *inptr = ep;
+                 size_t insize;
+                 char *outptr = &scratchbuf[0];
+                 size_t outsize = sizeof (scratchbuf);
+                 size_t res;
+
+                 res = (size_t)(-1);
+                 for (insize = 1; inptr + insize <= es; insize++)
+                   {
+                     res = iconv (conv,
+                                  (ICONV_CONST char **) &inptr, &insize,
+                                  &outptr, &outsize);
+                     if (!(res == (size_t)(-1) && errno == EINVAL))
+                       break;
+                   }
+                 if (res == (size_t)(-1))
+                   {
+                     if (errno == EILSEQ)
+                       {
+                         error (0, 0, _("invalid multibyte sequence"));
+                         continue;
+                       }
+                     else
+                       abort ();
+                   }
+                 insize = inptr - ep;
+                 memcpy (pp, ep, insize);
+                 pp += insize;
+                 ep += insize - 1;
+               }
+             else
+#endif
+               *pp++ = c;
+           }
        }
 
       linebreaks = (char *) xmalloc (portion_len);
@@ -367,6 +460,11 @@ internationalized messages should not contain the `\\%c' escape sequence"),
       s = es;
     }
   while (*s);
+
+#if HAVE_ICONV
+  if (conv != (iconv_t)(-1))
+    iconv_close (conv);
+#endif
 }