Move PO charset handling to a separate file.

author Bruno Haible <bruno@clisp.org>

Mon, 30 Apr 2001 13:53:15 +0000 (13:53 +0000)

committer Bruno Haible <bruno@clisp.org>

Mon, 30 Apr 2001 13:53:15 +0000 (13:53 +0000)
author Bruno Haible <bruno@clisp.org>
Mon, 30 Apr 2001 13:53:15 +0000 (13:53 +0000)
committer Bruno Haible <bruno@clisp.org>
Mon, 30 Apr 2001 13:53:15 +0000 (13:53 +0000)
diff --git a/src/ChangeLog b/src/ChangeLog

index 989d517df9cf30790fc07c7a2a559d905992a0cd..1ee21d92d490600acee4af89b1fcb5d4252e1028 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,26 @@
+2001-04-28  Bruno Haible  <haible@clisp.cons.org>
+
+       * po-charset.h: New file.
+       * po-charset.c: New file, extracted from po.c. Don't warn about wrong
+       or missing charset if it's a POT file.
+       * po-lex.h (iconv.h, po_lex_iconv): Move to po-charset.h.
+       (po_lex_charset): Remove declaration.
+       * po-lex.c: Include po-charset.h.
+       (po_lex_charset, po_lex_iconv): Move to po-charset.c.
+       (lex_open): Call po_lex_charset_init.
+       (lex_close): Call po_lex_charset_close.
+       * po.c: Don't include ctype.h, stdio.h, mbswidth.h, libgettext.h.
+       Include po-charset.h instead.
+       (program_name, _, SIZEOF, multiline_warning): Move to po-charset.c.
+       (po_callback_message): Move charset handling to po-charset.c.
+       * Makefile.am (noinst_HEADERS): Add po-charset.h.
+       (msgcmp_SOURCES, msgfmt_SOURCES, msgmerge_SOURCES, xgettext_SOURCES,
+       msgcomm_SOURCES): Add po-charset.c.
+       * FILES: Update.
+
+       * msgfmt.c: Don't include ctype.h.
+       * msgunfmt.c: Likewise.
+
  2001-04-28  Bruno Haible  <haible@clisp.cons.org>
  
         * po.h (PO_BASE_TY): Remove next_is_fuzzy field.
diff --git a/src/FILES b/src/FILES

index 0dd327f032d93476a5534da849466bd7dadd82cd..318e03e2112e990f9cc5d23cf96d2454da5636bd 100644 (file)
--- a/src/FILES
+++ b/src/FILES
@@ -36,6 +36,10 @@ open-po.h
  open-po.c
                  Opening PO files for reading.
  
+po-charset.h
+po-charset.c
+                Charset handling while reading PO files.
+
  po-lex.h
  po-lex.c
                  Lexical analysis of PO files.
diff --git a/src/Makefile.am b/src/Makefile.am

index 1b8e8ec325f539358d918deae05b182b39a23b64..ac8dff3397575b7700851fd9dfe8b7f82528673a 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -21,8 +21,9 @@ AUTOMAKE_OPTIONS = 1.2 gnits
  
  bin_PROGRAMS = gettext ngettext msgcmp msgfmt msgmerge msgunfmt xgettext msgcomm
  
-noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-lex.h po.h open-po.h \
-str-list.h write-po.h xget-lex.h dir-list.h po-gram-gen.h po-hash-gen.h
+noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-charset.h po-lex.h \
+po.h open-po.h str-list.h write-po.h xget-lex.h dir-list.h po-gram-gen.h \
+po-hash-gen.h
  
  EXTRA_DIST = FILES
  
@@ -41,16 +42,17 @@ YACC = @YACC@ -d
  gettext_SOURCES = gettext.c
  ngettext_SOURCES = ngettext.c
  msgcmp_SOURCES = message.c msgcmp.c open-po.c po-gram-gen.y po-hash-gen.y \
-po-lex.c po.c str-list.c dir-list.c
-msgfmt_SOURCES = msgfmt.c open-po.c po-gram-gen.y po-hash-gen.y po-lex.c po.c \
-str-list.c message.c dir-list.c
+po-charset.c po-lex.c po.c str-list.c dir-list.c
+msgfmt_SOURCES = msgfmt.c open-po.c po-gram-gen.y po-hash-gen.y po-charset.c \
+po-lex.c po.c str-list.c message.c dir-list.c
  msgmerge_SOURCES = message.c msgmerge.c open-po.c po-gram-gen.y po-hash-gen.y \
-po-lex.c po.c str-list.c dir-list.c write-po.c
+po-charset.c po-lex.c po.c str-list.c dir-list.c write-po.c
  msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c
-xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y po-lex.c \
-po.c str-list.c xget-lex.c xgettext.c dir-list.c write-po.c
-msgcomm_SOURCES = msgcomm.c message.c po-gram-gen.y po-hash-gen.y po-lex.c \
-open-po.c po.c str-list.c dir-list.c write-po.c
+xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \
+po-charset.c po-lex.c po.c str-list.c xget-lex.c xgettext.c dir-list.c \
+write-po.c
+msgcomm_SOURCES = msgcomm.c message.c po-gram-gen.y po-hash-gen.y \
+po-charset.c po-lex.c open-po.c po.c str-list.c dir-list.c write-po.c
  
  # Link dependencies.
  # po-lex.c and po.c may need -liconv.
diff --git a/src/msgfmt.c b/src/msgfmt.c

index fd5c9d123f94f665ade3c8307e40c6c486aa20bc..e0f3138151805aae2f61a023656bca2296d5d53e 100644 (file)
--- a/src/msgfmt.c
+++ b/src/msgfmt.c
@@ -20,7 +20,6 @@
  # include <config.h>
  #endif
  
-#include <ctype.h>
  #include <errno.h>
  #include <getopt.h>
  #include <stdio.h>
diff --git a/src/msgunfmt.c b/src/msgunfmt.c

index 17700934e519d0ae964a27ea8961956442b9bd3c..3cfd7f0e2bc7ee44e4ea533babebdeda4f341abb 100644 (file)
--- a/src/msgunfmt.c
+++ b/src/msgunfmt.c
@@ -20,7 +20,6 @@
  # include <config.h>
  #endif
  
-#include <ctype.h>
  #include <errno.h>
  #include <getopt.h>
  #include <stdio.h>
diff --git a/src/po-charset.c b/src/po-charset.c

new file mode 100644 (file)

index 0000000..7f2263a
--- /dev/null
+++ b/src/po-charset.c
@@ -0,0 +1,369 @@
+/* Charset handling while reading PO files.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "po-charset.h"
+#include "error.h"
+#include "system.h"
+#include "mbswidth.h"
+#include "libgettext.h"
+
+extern const char *program_name;
+
+#define _(str) gettext (str)
+
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+
+const char *
+po_charset_canonicalize (charset)
+     const char *charset;
+{
+  /* The list of charsets supported by glibc's iconv() and by the portable
+     iconv() across platforms.  Taken from intl/config.charset.  */
+  static const char *standard_charsets[] =
+  {
+    "ASCII", "ANSI_X3.4-1968", "US-ASCII",     /* i = 0..2 */
+    "ISO-8859-1", "ISO_8859-1",                        /* i = 3, 4 */
+    "ISO-8859-2", "ISO_8859-2",
+    "ISO-8859-3", "ISO_8859-3",
+    "ISO-8859-4", "ISO_8859-4",
+    "ISO-8859-5", "ISO_8859-5",
+    "ISO-8859-6", "ISO_8859-6",
+    "ISO-8859-7", "ISO_8859-7",
+    "ISO-8859-8", "ISO_8859-8",
+    "ISO-8859-9", "ISO_8859-9",
+    "ISO-8859-13", "ISO_8859-13",
+    "ISO-8859-15", "ISO_8859-15",              /* i = 23, 24 */
+    "KOI8-R",
+    "KOI8-U",
+    "CP850",
+    "CP866",
+    "CP874",
+    "CP932",
+    "CP949",
+    "CP950",
+    "CP1250",
+    "CP1251",
+    "CP1252",
+    "CP1253",
+    "CP1254",
+    "CP1255",
+    "CP1256",
+    "CP1257",
+    "GB2312",
+    "EUC-JP",
+    "EUC-KR",
+    "EUC-TW",
+    "BIG5",
+    "BIG5HKSCS",
+    "GBK",
+    "GB18030",
+    "SJIS",
+    "JOHAB",
+    "TIS-620",
+    "VISCII",
+    "UTF-8"
+  };
+  size_t i;
+
+  for (i = 0; i < SIZEOF (standard_charsets); i++)
+    if (strcasecmp (charset, standard_charsets[i]) == 0)
+      return standard_charsets[i < 3 ? 0 : i < 25 ? ((i - 3) & ~1) + 3 : i];
+  return NULL;
+}
+
+/* The PO file's encoding, as specified in the header entry.  */
+static const char *po_lex_charset;
+
+#if HAVE_ICONV
+/* Converter from the PO file's encoding to UTF-8.  */
+iconv_t po_lex_iconv;
+#endif
+
+void
+po_lex_charset_init ()
+{
+  po_lex_charset = NULL;
+#if HAVE_ICONV
+  po_lex_iconv = (iconv_t)(-1);
+#endif
+}
+
+/* Emit a multiline warning to stderr, consisting of MESSAGE, with the
+   first line prefixed with PREFIX and the remaining lines prefixed with
+   the same amount of spaces.  Reuse the spaces of the previous call if
+   PREFIX is NULL.  Free the PREFIX and MESSAGE when done.  */
+static void
+multiline_warning (prefix, message)
+     char *prefix;
+     char *message;
+{
+  static int width;
+  const char *cp;
+  int i;
+
+  fflush (stdout);
+
+  cp = message;
+
+  if (prefix != NULL)
+    {
+      fputs (prefix, stderr);
+      width = mbswidth (prefix, 0);
+      free (prefix);
+      goto after_indent;
+    }
+
+  while (1)
+    {
+      const char *np;
+
+      for (i = width; i > 0; i--)
+       putc (' ', stderr);
+
+    after_indent:
+      np = strchr (cp, '\n');
+
+      if (np == NULL || np[1] == '\0')
+       {
+         fputs (cp, stderr);
+         break;
+       }
+
+      np++;
+      fwrite (cp, 1, np - cp, stderr);
+      cp = np;
+    }
+
+  free (message);
+}
+
+void
+po_lex_charset_set (header_entry, filename)
+     const char *header_entry;
+     const char *filename;
+{
+  /* Verify the validity of CHARSET.  It is necessary
+     1. for the correct treatment of multibyte characters containing
+       0x5C bytes in the PO lexer,
+     2. so that at run time, gettext() can call iconv() to convert
+       msgstr.  */
+  const char *charsetstr = strstr (header_entry, "charset=");
+
+  if (charsetstr != NULL)
+    {
+      size_t len;
+      char *charset;
+      const char *canon_charset;
+
+      charsetstr += strlen ("charset=");
+      len = strcspn (charsetstr, " \t\n");
+      charset = (char *) alloca (len + 1);
+      memcpy (charset, charsetstr, len);
+      charset[len] = '\0';
+
+      canon_charset = po_charset_canonicalize (charset);
+      if (canon_charset == NULL)
+       {
+         /* Don't warn for POT files, because POT files usually contain
+            only ASCII msgids.  */
+         size_t filenamelen = strlen (filename);
+
+         if (!(filenamelen >= 4
+               && memcmp (filename + filenamelen - 4, ".pot", 4) == 0
+               && strcmp (charset, "CHARSET") == 0))
+           {
+             char *prefix;
+             char *msg;
+
+             asprintf (&prefix, _("%s: warning: "), filename);
+             asprintf (&msg, _("\
+Charset \"%s\" is not a portable encoding name.\n\
+Message conversion to user's charset might not work.\n"),
+                       charset);
+             if (prefix == NULL || msg == NULL)
+               error (EXIT_FAILURE, 0, _("memory exhausted"));
+             multiline_warning (prefix, msg);
+           }
+       }
+      else
+       {
+         /* The list of encodings in standard_charsets which have
+            double-byte characters ending in 0x5C.  For these encodings,
+            the string parser is likely to be confused if it can't see
+            the character boundaries.  */
+         static const char *weird_charsets[] =
+         {
+           "BIG5",
+           "BIG5HKSCS",
+           "GBK",
+           "GB18030",
+           "SJIS",
+           "JOHAB"
+         };
+         const char *envval;
+
+         po_lex_charset = canon_charset;
+#if HAVE_ICONV
+         if (po_lex_iconv != (iconv_t)(-1))
+           iconv_close (po_lex_iconv);
+#endif
+
+         /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35
+            don't know about multibyte encodings, and require a spurious
+            backslash after every multibyte character whose last byte is
+            0x5C.  Some programs, like vim, distribute PO files in this
+            broken format.  GNU msgfmt must continue to support this old
+            PO file format when the Makefile requests it.  */
+         envval = getenv ("OLD_PO_FILE_INPUT");
+         if (envval != NULL && *envval != '\0')
+           {
+             /* Assume the PO file is in old format, with extraneous
+                backslashes.  */
+#if HAVE_ICONV
+             po_lex_iconv = (iconv_t)(-1);
+#endif
+           }
+         else
+           {
+             /* Use iconv() to parse multibyte characters.  */
+#if HAVE_ICONV
+             /* Avoid glibc-2.1 bug with EUC-KR.  */
+# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
+             if (strcmp (po_lex_charset, "EUC-KR") == 0)
+               po_lex_iconv = (iconv_t)(-1);
+             else
+# endif
+             po_lex_iconv = iconv_open ("UTF-8", po_lex_charset);
+             if (po_lex_iconv == (iconv_t)(-1))
+               {
+                 size_t i;
+                 const char *note;
+                 char *prefix;
+                 char *msg;
+
+                 for (i = 0; i < SIZEOF (weird_charsets); i++)
+                   if (strcmp (po_lex_charset, weird_charsets[i]) == 0)
+                     break;
+                 if (i < SIZEOF (weird_charsets))
+                   note = _("Continuing anyway, expect parse errors.");
+                 else
+                   note = _("Continuing anyway.");
+
+                 asprintf (&prefix, _("%s: warning: "), filename);
+                 asprintf (&msg, _("\
+Charset \"%s\" is not supported. %s relies on iconv(),\n\
+and iconv() does not support \"%s\".\n"),
+                           po_lex_charset, basename (program_name),
+                           po_lex_charset);
+                 if (prefix == NULL || msg == NULL)
+                   error (EXIT_FAILURE, 0, _("memory exhausted"));
+                 multiline_warning (prefix, msg);
+
+# if !defined _LIBICONV_VERSION
+                 asprintf (&msg, _("\
+Installing GNU libiconv and then reinstalling GNU gettext\n\
+would fix this problem.\n"));
+                 if (msg == NULL)
+                   error (EXIT_FAILURE, 0, _("memory exhausted"));
+                 multiline_warning (NULL, msg);
+# endif
+
+                 asprintf (&msg, _("%s\n"), note);
+                 if (msg == NULL)
+                   error (EXIT_FAILURE, 0, _("memory exhausted"));
+                 multiline_warning (NULL, msg);
+               }
+#else
+             for (i = 0; i < SIZEOF (weird_charsets); i++)
+               if (strcmp (po_lex_charset, weird_charsets[i]) == 0)
+                 break;
+             if (i < SIZEOF (weird_charsets))
+               {
+                 const char *note =
+                   _("Continuing anyway, expect parse errors.");
+                 char *prefix;
+                 char *msg;
+
+                 asprintf (&prefix, _("%s: warning: "), filename);
+                 asprintf (&msg, _("\
+Charset \"%s\" is not supported. %s relies on iconv().\n\
+This version was built without iconv().\n"),
+                           po_lex_charset, basename (program_name));
+                 if (prefix == NULL || msg == NULL)
+                   error (EXIT_FAILURE, 0, _("memory exhausted"));
+                 multiline_warning (prefix, msg);
+
+                 asprintf (&msg, _("\
+Installing GNU libiconv and then reinstalling GNU gettext\n\
+would fix this problem.\n"));
+                 if (msg == NULL)
+                   error (EXIT_FAILURE, 0, _("memory exhausted"));
+                 multiline_warning (NULL, msg);
+
+                 asprintf (&msg, _("%s\n"), note);
+                 if (msg == NULL)
+                   error (EXIT_FAILURE, 0, _("memory exhausted"));
+                 multiline_warning (NULL, msg);
+               }
+#endif
+           }
+       }
+    }
+  else
+    {
+      /* Don't warn for POT files, because POT files usually contain
+        only ASCII msgids.  */
+      size_t filenamelen = strlen (filename);
+
+      if (!(filenamelen >= 4
+           && memcmp (filename + filenamelen - 4, ".pot", 4) == 0))
+       {
+         char *prefix;
+         char *msg;
+
+         asprintf (&prefix, _("%s: warning: "), filename);
+         asprintf (&msg, _("\
+Charset missing in header.\n\
+Message conversion to user's charset will not work.\n"));
+         if (prefix == NULL || msg == NULL)
+           error (EXIT_FAILURE, 0, _("memory exhausted"));
+         multiline_warning (prefix, msg);
+       }
+    }
+}
+
+void
+po_lex_charset_close ()
+{
+  po_lex_charset = NULL;
+#if HAVE_ICONV
+  if (po_lex_iconv != (iconv_t)(-1))
+    {
+      iconv_close (po_lex_iconv);
+      po_lex_iconv = (iconv_t)(-1);
+    }
+#endif
+}
diff --git a/src/po-charset.h b/src/po-charset.h

new file mode 100644 (file)

index 0000000..68a0e82
--- /dev/null
+++ b/src/po-charset.h
@@ -0,0 +1,44 @@
+/* Charset handling while reading PO files.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
+
+#ifndef _PO_CHARSET_H
+#define _PO_CHARSET_H
+
+#if HAVE_ICONV
+#include <iconv.h>
+#endif
+
+/* Canonicalize an encoding name.  */
+extern const char *po_charset_canonicalize PARAMS ((const char *charset));
+
+#if HAVE_ICONV
+/* Converter from the PO file's encoding to UTF-8.  */
+extern iconv_t po_lex_iconv;
+#endif
+
+/* Initialize the PO file's encoding.  */
+extern void po_lex_charset_init PARAMS ((void));
+
+/* Set the PO file's encoding from the header entry.  */
+extern void po_lex_charset_set PARAMS ((const char *header_entry,
+                                       const char *filename));
+
+/* Finish up with the PO file's encoding.  */
+extern void po_lex_charset_close PARAMS ((void));
+
+#endif /* _PO_CHARSET_H */
diff --git a/src/po-lex.c b/src/po-lex.c

index 637422295f33900f6eb6bcd0897b518c4a6176e1..e1a2dab2475ffaefc124673a52587ac9073e64e1 100644 (file)
--- a/src/po-lex.c
+++ b/src/po-lex.c
@@ -43,6 +43,7 @@
  # define va_dcl char *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8;
  #endif
  
+#include "po-charset.h"
  #include "po-lex.h"
  #include "system.h"
  #include "error.h"
@@ -60,10 +61,6 @@ static FILE *fp;
  lex_pos_ty gram_pos;
  unsigned int gram_max_allowed_errors = 20;
  static int po_lex_obsolete;
-const char *po_lex_charset;
-#if HAVE_ICONV
-iconv_t po_lex_iconv;
-#endif
  static int pass_comments = 0;
  int pass_obsolete_entries = 0;
  
@@ -87,10 +84,7 @@ lex_open (fname)
  
    gram_pos.line_number = 1;
    po_lex_obsolete = 0;
-  po_lex_charset = NULL;
-#if HAVE_ICONV
-  po_lex_iconv = (iconv_t)(-1);
-#endif
+  po_lex_charset_init ();
  }
  
  
@@ -111,14 +105,7 @@ lex_close ()
    gram_pos.line_number = 0;
    error_message_count = 0;
    po_lex_obsolete = 0;
-  po_lex_charset = NULL;
-#if HAVE_ICONV
-  if (po_lex_iconv != (iconv_t)(-1))
-    {
-      iconv_close (po_lex_iconv);
-      po_lex_iconv = (iconv_t)(-1);
-    }
-#endif
+  po_lex_charset_close ();
  }
  
  
diff --git a/src/po-lex.h b/src/po-lex.h

index 279382ad464b52a7c4ce392d3d9ac51dd9e2d4ce..11a1b1c096f8f0b82b965bc014aac03a95d75604 100644 (file)
--- a/src/po-lex.h
+++ b/src/po-lex.h
@@ -21,9 +21,6 @@
  #define _PO_LEX_H
  
  #include <sys/types.h>
-#if HAVE_ICONV
-#include <iconv.h>
-#endif
  #include "error.h"
  #include "pos.h"
  
@@ -39,14 +36,6 @@ extern lex_pos_ty gram_pos;
     terminate.  Cf. error_message_count, declared in <error.h>.  */
  extern unsigned int gram_max_allowed_errors;
  
-/* The PO file's encoding, as specified in the header entry.  */
-extern const char *po_lex_charset;
-
-#if HAVE_ICONV
-/* Converter from the PO file's encoding to UTF-8.  */
-extern iconv_t po_lex_iconv;
-#endif
-
  /* Nonzero if obsolete entries shall be considered as valid.  */
  extern int pass_obsolete_entries;
  
diff --git a/src/po.c b/src/po.c

index 2167d581f5e39b11b2e00ec9d9d8ff6e6d6d9384..0bf95ffcd454f9fea9052557949876a89474bd67 100644 (file)
--- a/src/po.c
+++ b/src/po.c
@@ -22,21 +22,12 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  # include "config.h"
  #endif
  
-#include <ctype.h>
-#include <stdio.h>
  #include <stdlib.h>
  
  #include "po.h"
+#include "po-charset.h"
  #include "po-hash.h"
  #include "system.h"
-#include "mbswidth.h"
-#include "libgettext.h"
-
-extern const char *program_name;
-
-#define _(str) gettext (str)
-
-#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
  
  /* Prototypes for local functions.  */
  static void po_parse_brief PARAMS ((po_ty *__pop));
@@ -160,55 +151,6 @@ po_directive_message (pop, msgid, msgid_pos, msgid_plural,
  }
  
  
-/* Emit a multiline warning to stderr, consisting of MESSAGE, with the
-   first line prefixed with PREFIX and the remaining lines prefixed with
-   the same amount of spaces.  Reuse the spaces of the previous call if
-   PREFIX is NULL.  Free the PREFIX and MESSAGE when done.  */
-static void
-multiline_warning (prefix, message)
-     char *prefix;
-     char *message;
-{
-  static int width;
-  const char *cp;
-  int i;
-
-  fflush (stdout);
-
-  cp = message;
-
-  if (prefix != NULL)
-    {
-      fputs (prefix, stderr);
-      width = mbswidth (prefix, 0);
-      free (prefix);
-      goto after_indent;
-    }
-
-  while (1)
-    {
-      const char *np;
-
-      for (i = width; i > 0; i--)
-       putc (' ', stderr);
-
-    after_indent:
-      np = strchr (cp, '\n');
-
-      if (np == NULL || np[1] == '\0')
-       {
-         fputs (cp, stderr);
-         break;
-       }
-
-      np++;
-      fwrite (cp, 1, np - cp, stderr);
-      cp = np;
-    }
-
-  free (message);
-}
-
  void
  po_callback_message (msgid, msgid_pos, msgid_plural,
                      msgstr, msgstr_len, msgstr_pos)
@@ -223,227 +165,7 @@ po_callback_message (msgid, msgid_pos, msgid_plural,
  
    /* Test for header entry.  Ignore fuzziness of the header entry.  */
    if (msgid[0] == '\0')
-    {
-      /* Verify the validity of CHARSET.  It is necessary
-        1. for the correct treatment of multibyte characters containing
-           0x5C bytes in the PO lexer,
-        2. so that at run time, gettext() can call iconv() to convert
-           msgstr.  */
-      const char *charsetstr = strstr (msgstr, "charset=");
-
-      if (charsetstr != NULL)
-       {
-         /* The list of charsets supported by glibc's iconv() and by
-            the portable iconv() across platforms.  Taken from
-            intl/config.charset.  */
-         static const char *standard_charsets[] =
-         {
-           "ASCII", "ANSI_X3.4-1968", "US-ASCII",
-           "ISO-8859-1", "ISO_8859-1",
-           "ISO-8859-2", "ISO_8859-2",
-           "ISO-8859-3", "ISO_8859-3",
-           "ISO-8859-4", "ISO_8859-4",
-           "ISO-8859-5", "ISO_8859-5",
-           "ISO-8859-6", "ISO_8859-6",
-           "ISO-8859-7", "ISO_8859-7",
-           "ISO-8859-8", "ISO_8859-8",
-           "ISO-8859-9", "ISO_8859-9",
-           "ISO-8859-13", "ISO_8859-13",
-           "ISO-8859-15", "ISO_8859-15",
-           "KOI8-R",
-           "KOI8-U",
-           "CP850",
-           "CP866",
-           "CP874",
-           "CP932",
-           "CP949",
-           "CP950",
-           "CP1250",
-           "CP1251",
-           "CP1252",
-           "CP1253",
-           "CP1254",
-           "CP1255",
-           "CP1256",
-           "CP1257",
-           "GB2312",
-           "EUC-JP",
-           "EUC-KR",
-           "EUC-TW",
-           "BIG5",
-           "BIG5HKSCS",
-           "GBK",
-           "GB18030",
-           "SJIS",
-           "JOHAB",
-           "TIS-620",
-           "VISCII",
-           "UTF-8"
-         };
-         size_t len;
-         char *charset;
-         size_t i;
-
-         charsetstr += strlen ("charset=");
-         len = strcspn (charsetstr, " \t\n");
-         charset = (char *) alloca (len + 1);
-         memcpy (charset, charsetstr, len);
-         charset[len] = '\0';
-
-         for (i = 0; i < SIZEOF (standard_charsets); i++)
-           if (strcasecmp (charset, standard_charsets[i]) == 0)
-             break;
-         if (i == SIZEOF (standard_charsets))
-           {
-             char *prefix;
-             char *msg;
-
-             asprintf (&prefix, _("%s: warning: "), gram_pos.file_name);
-             asprintf (&msg, _("\
-Charset \"%s\" is not a portable encoding name.\n\
-Message conversion to user's charset might not work.\n"),
-                       charset);
-             if (prefix == NULL || msg == NULL)
-               error (EXIT_FAILURE, 0, _("memory exhausted"));
-             multiline_warning (prefix, msg);
-           }
-         else
-           {
-             /* The list of encodings in standard_charsets which have
-                double-byte characters ending in 0x5C.  For these encodings,
-                the string parser is likely to be confused if it can't see
-                the character boundaries.  */
-             static const char *weird_charsets[] =
-             {
-               "BIG5",
-               "BIG5HKSCS",
-               "GBK",
-               "GB18030",
-               "SJIS",
-               "JOHAB"
-             };
-             const char *envval;
-
-             po_lex_charset = standard_charsets[i];
-#if HAVE_ICONV
-             if (po_lex_iconv != (iconv_t)(-1))
-               iconv_close (po_lex_iconv);
-#endif
-
-             /* The old Solaris/openwin msgfmt and GNU msgfmt <= 0.10.35
-                don't know about multibyte encodings, and require a spurious
-                backslash after every multibyte character whose last byte is
-                0x5C.  Some programs, like vim, distribute PO files in this
-                broken format.  GNU msgfmt must continue to support this old
-                PO file format when the Makefile requests it.  */
-             envval = getenv ("OLD_PO_FILE_INPUT");
-             if (envval != NULL && *envval != '\0')
-               {
-                 /* Assume the PO file is in old format, with extraneous
-                    backslashes.  */
-#if HAVE_ICONV
-                 po_lex_iconv = (iconv_t)(-1);
-#endif
-               }
-             else
-               {
-                 /* Use iconv() to parse multibyte characters.  */
-#if HAVE_ICONV
-                 /* Avoid glibc-2.1 bug with EUC-KR.  */
-# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
-                 if (strcmp (po_lex_charset, "EUC-KR") == 0)
-                   po_lex_iconv = (iconv_t)(-1);
-                 else
-# endif
-                 po_lex_iconv = iconv_open ("UTF-8", po_lex_charset);
-                 if (po_lex_iconv == (iconv_t)(-1))
-                   {
-                     const char *note;
-                     char *prefix;
-                     char *msg;
-
-                     for (i = 0; i < SIZEOF (weird_charsets); i++)
-                       if (strcmp (po_lex_charset, weird_charsets[i]) == 0)
-                         break;
-                     if (i < SIZEOF (weird_charsets))
-                       note = _("Continuing anyway, expect parse errors.");
-                     else
-                       note = _("Continuing anyway.");
-
-                     asprintf (&prefix, _("%s: warning: "), gram_pos.file_name);
-                     asprintf (&msg, _("\
-Charset \"%s\" is not supported. %s relies on iconv(),\n\
-and iconv() does not support \"%s\".\n"),
-                               po_lex_charset, basename (program_name),
-                               po_lex_charset);
-                     if (prefix == NULL || msg == NULL)
-                       error (EXIT_FAILURE, 0, _("memory exhausted"));
-                     multiline_warning (prefix, msg);
-
-# if !defined _LIBICONV_VERSION
-                     asprintf (&msg, _("\
-Installing GNU libiconv and then reinstalling GNU gettext\n\
-would fix this problem.\n"));
-                     if (msg == NULL)
-                       error (EXIT_FAILURE, 0, _("memory exhausted"));
-                     multiline_warning (NULL, msg);
-# endif
-
-                     asprintf (&msg, _("%s\n"), note);
-                     if (msg == NULL)
-                       error (EXIT_FAILURE, 0, _("memory exhausted"));
-                     multiline_warning (NULL, msg);
-                   }
-#else
-                 for (i = 0; i < SIZEOF (weird_charsets); i++)
-                   if (strcmp (po_lex_charset, weird_charsets[i]) == 0)
-                     break;
-                 if (i < SIZEOF (weird_charsets))
-                   {
-                     const char *note =
-                       _("Continuing anyway, expect parse errors.");
-                     char *prefix;
-                     char *msg;
-
-                     asprintf (&prefix, _("%s: warning: "), gram_pos.file_name);
-                     asprintf (&msg, _("\
-Charset \"%s\" is not supported. %s relies on iconv().\n\
-This version was built without iconv().\n"),
-                               po_lex_charset, basename (program_name));
-                     if (prefix == NULL || msg == NULL)
-                       error (EXIT_FAILURE, 0, _("memory exhausted"));
-                     multiline_warning (prefix, msg);
-
-                     asprintf (&msg, _("\
-Installing GNU libiconv and then reinstalling GNU gettext\n\
-would fix this problem.\n"));
-                     if (msg == NULL)
-                       error (EXIT_FAILURE, 0, _("memory exhausted"));
-                     multiline_warning (NULL, msg);
-
-                     asprintf (&msg, _("%s\n"), note);
-                     if (msg == NULL)
-                       error (EXIT_FAILURE, 0, _("memory exhausted"));
-                     multiline_warning (NULL, msg);
-                   }
-#endif
-               }
-           }
-       }
-      else
-       {
-         char *prefix;
-         char *msg;
-
-         asprintf (&prefix, _("%s: warning: "), gram_pos.file_name);
-         asprintf (&msg, _("\
-Charset missing in header.\n\
-Message conversion to user's charset will not work.\n"));
-         if (prefix == NULL || msg == NULL)
-           error (EXIT_FAILURE, 0, _("memory exhausted"));
-         multiline_warning (prefix, msg);
-       }
-    }
+    po_lex_charset_set (msgstr, gram_pos.file_name);
  
    po_directive_message (callback_arg, msgid, msgid_pos, msgid_plural,
                         msgstr, msgstr_len, msgstr_pos);
author	Bruno Haible <bruno@clisp.org>
	Mon, 30 Apr 2001 13:53:15 +0000 (13:53 +0000)
committer	Bruno Haible <bruno@clisp.org>
	Mon, 30 Apr 2001 13:53:15 +0000 (13:53 +0000)
src/ChangeLog		patch \| blob \| blame \| history
src/FILES		patch \| blob \| blame \| history
src/Makefile.am		patch \| blob \| blame \| history
src/msgfmt.c		patch \| blob \| blame \| history
src/msgunfmt.c		patch \| blob \| blame \| history
src/po-charset.c	[new file with mode: 0644]	patch \| blob
src/po-charset.h	[new file with mode: 0644]	patch \| blob
src/po-lex.c		patch \| blob \| blame \| history
src/po-lex.h		patch \| blob \| blame \| history
src/po.c		patch \| blob \| blame \| history