+2001-07-22 Bruno Haible <haible@clisp.cons.org>
+
+ * msgl-ascii.h: New file.
+ * msgl-ascii.c: New file.
+ * po-charset.h (po_charset_ascii, po_charset_ascii_compatible): New
+ declarations.
+ * po-charset.c (po_charset_ascii): New variable.
+ (po_charset_ascii_compatible): New function.
+ * write-po.c (has_nonascii): Remove function.
+ (message_print, message_print_obsolete): Use is_ascii_string instead.
+ * msgl-iconv.c (iconv_message_list): Don't complain about missing
+ header entry with charset if all messages are ASCII.
+ * msgl-cat.c (catenate_msgdomain_list): Don't complain about missing
+ header entry with charset if all messages are ASCII. Better choice of
+ canon_to_code: when combining ASCII and an ASCII compatible encoding,
+ choose the latter, not UTF-8. Avoid performing trivial conversions.
+ * Makefile.am (noinst_HEADERS): Add msgl-ascii.h.
+ (msgmerge_SOURCES): Add msgl-ascii.c.
+ (msgunfmt_SOURCES): Likewise.
+ (msgcat_SOURCES): Likewise.
+ (msgcomm_SOURCES): Likewise.
+ (msgconv_SOURCES): Likewise.
+ (msgen_SOURCES): Likewise.
+ (msgexec_SOURCES): Likewise.
+ (msggrep_SOURCES): Likewise.
+ (msguniq_SOURCES): Likewise.
+
2001-07-22 Bruno Haible <haible@clisp.cons.org>
* msgl-cat.h: Include <stdbool.h>.
noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-charset.h po-lex.h \
po.h open-po.h read-po.h str-list.h write-po.h xget-lex.h dir-list.h \
-po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h msgl-cat.h
+po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h msgl-ascii.h msgl-cat.h
EXTRA_DIST = FILES
msgfmt_SOURCES = msgfmt.c open-po.c po-gram-gen.y po-hash-gen.y po-charset.c \
po-lex.c po.c str-list.c message.c dir-list.c
msgmerge_SOURCES = message.c msgmerge.c open-po.c po-gram-gen.y po-hash-gen.y \
-po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c
-msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c
+po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
+msgl-ascii.c
+msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c msgl-ascii.c
xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c str-list.c xget-lex.c xgettext.c dir-list.c \
-write-po.c
+write-po.c msgl-ascii.c
msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
-msgl-iconv.c msgl-cat.c
+msgl-ascii.c msgl-iconv.c msgl-cat.c
msgcomm_SOURCES = msgcomm.c message.c po-gram-gen.y po-hash-gen.y \
-po-charset.c po-lex.c open-po.c po.c str-list.c dir-list.c write-po.c
+po-charset.c po-lex.c open-po.c po.c str-list.c dir-list.c write-po.c \
+msgl-ascii.c
msgconv_SOURCES = msgconv.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
-msgl-iconv.c
+msgl-ascii.c msgl-iconv.c
msgen_SOURCES = msgen.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
-po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c
+po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
+msgl-ascii.c
msgexec_SOURCES = msgexec.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
-msgl-charset.c
+msgl-ascii.c msgl-charset.c
msggrep_SOURCES = msggrep.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
-msgl-charset.c
+msgl-ascii.c msgl-charset.c
msguniq_SOURCES = msguniq.c message.c open-po.c po-gram-gen.y po-hash-gen.y \
po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \
-msgl-iconv.c msgl-cat.c
+msgl-ascii.c msgl-iconv.c msgl-cat.c
# Link dependencies.
# po-lex.c and po.c may need -liconv.
--- /dev/null
+/* Message list test for ASCII character set.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "msgl-ascii.h"
+#include "c-ctype.h"
+
+
+/* This file's structure parallels msgl-iconv.c. */
+
+
+bool
+is_ascii_string (string)
+ const char *string;
+{
+ for (; *string; string++)
+ if (!c_isascii ((unsigned char) *string))
+ return false;
+ return true;
+}
+
+bool
+is_ascii_string_list (slp)
+ string_list_ty *slp;
+{
+ size_t i;
+
+ if (slp != NULL)
+ for (i = 0; i < slp->nitems; i++)
+ if (!is_ascii_string (slp->item[i]))
+ return false;
+ return true;
+}
+
+bool
+is_ascii_message (mp)
+ message_ty *mp;
+{
+ const char *p = mp->msgstr;
+ const char *p_end = p + mp->msgstr_len;
+
+ for (; p < p_end; p++)
+ if (!c_isascii ((unsigned char) *p))
+ return false;
+
+ if (!is_ascii_string_list (mp->comment))
+ return false;
+ if (!is_ascii_string_list (mp->comment_dot))
+ return false;
+
+ return true;
+}
+
+bool
+is_ascii_message_list (mlp)
+ message_list_ty *mlp;
+{
+ size_t j;
+
+ for (j = 0; j < mlp->nitems; j++)
+ if (!is_ascii_message (mlp->item[j]))
+ return false;
+
+ return true;
+}
--- /dev/null
+/* Message list test for ASCII character set.
+ Copyright (C) 2001 Free Software Foundation, Inc.
+ Written by Bruno Haible <haible@clisp.cons.org>, 2001.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+#ifndef _MSGL_ASCII_H
+#define _MSGL_ASCII_H
+
+#include "message.h"
+
+#include <stdbool.h>
+
+extern bool
+ is_ascii_string PARAMS ((const char *string));
+extern bool
+ is_ascii_string_list PARAMS ((string_list_ty *slp));
+extern bool
+ is_ascii_message PARAMS ((message_ty *mp));
+extern bool
+ is_ascii_message_list PARAMS ((message_list_ty *mlp));
+
+#endif /* _MSGL_ASCII_H */
#include "message.h"
#include "read-po.h"
#include "po-charset.h"
+#include "msgl-ascii.h"
#include "msgl-iconv.h"
#include "system.h"
#include "libgettext.h"
}
if (canon_from_code == NULL)
{
- if (k == 0)
- error (EXIT_FAILURE, 0, _("\
-input file `%s' doesn't contain a header entry with a charset specification"),
- files[n]);
+ if (is_ascii_message_list (mlp))
+ canon_from_code = po_charset_ascii;
else
- error (EXIT_FAILURE, 0, _("\
+ {
+ if (k == 0)
+ error (EXIT_FAILURE, 0, _("\
+input file `%s' doesn't contain a header entry with a charset specification"),
+ files[n]);
+ else
+ error (EXIT_FAILURE, 0, _("\
domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"),
- mdlp->item[k]->domain, files[n]);
+ mdlp->item[k]->domain, files[n]);
+ }
}
}
canon_charsets[n][k] = canon_from_code;
all in a single encoding. If so, conversion is not needed. */
const char *first = NULL;
const char *second = NULL;
+ bool with_ASCII = false;
bool with_UTF8 = false;
+ bool all_ASCII_compatible = true;
for (n = 0; n < nfiles; n++)
{
for (k = 0; k < mdlp->nitems; k++)
if (canon_charsets[n][k] != NULL)
{
- if (first == NULL)
- first = canon_charsets[n][k];
- else if (canon_charsets[n][k] != first && second == NULL)
- second = canon_charsets[n][k];
+ if (canon_charsets[n][k] == po_charset_ascii)
+ with_ASCII = true;
+ else
+ {
+ if (first == NULL)
+ first = canon_charsets[n][k];
+ else if (canon_charsets[n][k] != first && second == NULL)
+ second = canon_charsets[n][k];
+
+ if (strcmp (canon_charsets[n][k], "UTF-8") == 0)
+ with_UTF8 = true;
- if (strcmp (canon_charsets[n][k], "UTF-8") == 0)
- with_UTF8 = true;
+ if (!po_charset_ascii_compatible (canon_charsets[n][k]))
+ all_ASCII_compatible = false;
+ }
}
}
+ if (with_ASCII && !all_ASCII_compatible)
+ {
+ /* assert (first != NULL); */
+ if (second == NULL)
+ second = po_charset_ascii;
+ }
+
if (second != NULL)
{
/* A conversion is needed. Warn the user since he hasn't asked
"), first, second));
canon_to_code = po_charset_canonicalize ("UTF-8");
}
+ else if (first != NULL && with_ASCII && all_ASCII_compatible)
+ {
+ /* The conversion is a no-op conversion. Don't warn the user,
+ but still perform the conversion, in order to check that the
+ input was really ASCII. */
+ canon_to_code = first;
+ }
else
{
/* No conversion needed. */
for (k = 0; k < mdlp->nitems; k++)
if (canon_charsets[n][k] != NULL)
- iconv_message_list (mdlp->item[k]->messages, canon_to_code);
+ /* If the user hasn't given a to_code, don't bother doing a noop
+ conversion that would only replace the charset name in the
+ header entry with its canonical equivalent. */
+ if (!(to_code == NULL && canon_charsets[n][k] == canon_to_code))
+ iconv_message_list (mdlp->item[k]->messages, canon_to_code);
}
/* Fill the resulting messages. */
#include "progname.h"
#include "message.h"
#include "po-charset.h"
+#include "msgl-ascii.h"
#include "system.h"
#include "libgettext.h"
}
}
if (canon_from_code == NULL)
- error (EXIT_FAILURE, 0, _("\
+ {
+ if (is_ascii_message_list (mlp))
+ canon_from_code = po_charset_ascii;
+ else
+ error (EXIT_FAILURE, 0, _("\
input file doesn't contain a header entry with a charset specification"));
+ }
/* If the two encodings are the same, nothing to do. */
if (canon_from_code != canon_to_code)
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+static const char ascii[] = "ASCII";
+
+/* The canonicalized encoding name for ASCII. */
+const char *po_charset_ascii = ascii;
+
+/* Canonicalize an encoding name. */
const char *
po_charset_canonicalize (charset)
const char *charset;
iconv() across platforms. Taken from intl/config.charset. */
static const char *standard_charsets[] =
{
- "ASCII", "ANSI_X3.4-1968", "US-ASCII", /* i = 0..2 */
+ ascii, "ANSI_X3.4-1968", "US-ASCII", /* i = 0..2 */
"ISO-8859-1", "ISO_8859-1", /* i = 3, 4 */
"ISO-8859-2", "ISO_8859-2",
"ISO-8859-3", "ISO_8859-3",
return NULL;
}
+/* Test for ASCII compatibility. */
+bool
+po_charset_ascii_compatible (canon_charset)
+ const char *canon_charset;
+{
+ /* There are only a few exceptions to ASCII compatibility. */
+ if (strcmp (canon_charset, "SHIFT_JIS") == 0
+ || strcmp (canon_charset, "JOHAB") == 0
+ || strcmp (canon_charset, "VISCII") == 0)
+ return false;
+ else
+ return true;
+}
+
+
/* The PO file's encoding, as specified in the header entry. */
const char *po_lex_charset;
#ifndef _PO_CHARSET_H
#define _PO_CHARSET_H
+#include <stdbool.h>
+
#if HAVE_ICONV
#include <iconv.h>
#endif
compared using ==. */
extern const char *po_charset_canonicalize PARAMS ((const char *charset));
+/* The canonicalized encoding name for ASCII. */
+extern const char *po_charset_ascii;
+
+/* Test for ASCII compatibility. */
+extern bool po_charset_ascii_compatible PARAMS ((const char *canon_charset));
+
+
/* The PO file's encoding, as specified in the header entry. */
extern const char *po_lex_charset;
#include "write-po.h"
#include "c-ctype.h"
#include "linebreak.h"
+#include "msgl-ascii.h"
#include "system.h"
#include "error.h"
#include "xerror.h"
const char *value, enum is_wrap do_wrap,
const char *charset));
static void print_blank_line PARAMS ((FILE *fp));
-static bool has_nonascii PARAMS ((const char *str));
static void message_print PARAMS ((const message_ty *mp, FILE *fp,
const char *charset, bool blank_line,
bool debug));
putc ('\n', fp);
}
-static bool
-has_nonascii (str)
- const char *str;
-{
- for (; *str; str++)
- if (!c_isascii ((unsigned char) *str))
- return true;
- return false;
-}
-
static void
message_print (mp, fp, charset, blank_line, debug)
const message_ty *mp;
/* Print each of the message components. Wrap them nicely so they
are as readable as possible. If there is no recorded msgstr for
this domain, emit an empty string. */
- if (has_nonascii (mp->msgid))
+ if (!is_ascii_string (mp->msgid))
multiline_warning (xasprintf (_("warning: ")),
xasprintf (_("\
The following msgid contains non-ASCII characters.\n\
/* Print each of the message components. Wrap them nicely so they
are as readable as possible. */
- if (has_nonascii (mp->msgid))
+ if (!is_ascii_string (mp->msgid))
multiline_warning (xasprintf (_("warning: ")),
xasprintf (_("\
The following msgid contains non-ASCII characters.\n\