From: Bruno Haible Date: Tue, 31 Jul 2001 12:48:20 +0000 (+0000) Subject: Improved handling of ASCII charset: avoid redundant warnings. X-Git-Tag: v0.11~579 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2f4339549b3d7726855c377fe6620b357faac7d0;p=thirdparty%2Fgettext.git Improved handling of ASCII charset: avoid redundant warnings. --- diff --git a/src/ChangeLog b/src/ChangeLog index 476b829ce..835658639 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,30 @@ +2001-07-22 Bruno Haible + + * msgl-ascii.h: New file. + * msgl-ascii.c: New file. + * po-charset.h (po_charset_ascii, po_charset_ascii_compatible): New + declarations. + * po-charset.c (po_charset_ascii): New variable. + (po_charset_ascii_compatible): New function. + * write-po.c (has_nonascii): Remove function. + (message_print, message_print_obsolete): Use is_ascii_string instead. + * msgl-iconv.c (iconv_message_list): Don't complain about missing + header entry with charset if all messages are ASCII. + * msgl-cat.c (catenate_msgdomain_list): Don't complain about missing + header entry with charset if all messages are ASCII. Better choice of + canon_to_code: when combining ASCII and an ASCII compatible encoding, + choose the latter, not UTF-8. Avoid performing trivial conversions. + * Makefile.am (noinst_HEADERS): Add msgl-ascii.h. + (msgmerge_SOURCES): Add msgl-ascii.c. + (msgunfmt_SOURCES): Likewise. + (msgcat_SOURCES): Likewise. + (msgcomm_SOURCES): Likewise. + (msgconv_SOURCES): Likewise. + (msgen_SOURCES): Likewise. + (msgexec_SOURCES): Likewise. + (msggrep_SOURCES): Likewise. + (msguniq_SOURCES): Likewise. + 2001-07-22 Bruno Haible * msgl-cat.h: Include . diff --git a/src/Makefile.am b/src/Makefile.am index 2b02ef9a5..5537ca2ee 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -25,7 +25,7 @@ msgcat msgcomm msgconv msgen msgexec msggrep msguniq noinst_HEADERS = pos.h message.h po-gram.h po-hash.h po-charset.h po-lex.h \ po.h open-po.h read-po.h str-list.h write-po.h xget-lex.h dir-list.h \ -po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h msgl-cat.h +po-gram-gen.h po-hash-gen.h msgl-charset.h msgl-iconv.h msgl-ascii.h msgl-cat.h EXTRA_DIST = FILES @@ -47,30 +47,33 @@ po-charset.c po-lex.c po.c str-list.c dir-list.c msgfmt_SOURCES = msgfmt.c open-po.c po-gram-gen.y po-hash-gen.y po-charset.c \ po-lex.c po.c str-list.c message.c dir-list.c msgmerge_SOURCES = message.c msgmerge.c open-po.c po-gram-gen.y po-hash-gen.y \ -po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c -msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c +po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ +msgl-ascii.c +msgunfmt_SOURCES = message.c msgunfmt.c str-list.c write-po.c msgl-ascii.c xgettext_SOURCES = message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c str-list.c xget-lex.c xgettext.c dir-list.c \ -write-po.c +write-po.c msgl-ascii.c msgcat_SOURCES = msgcat.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ -msgl-iconv.c msgl-cat.c +msgl-ascii.c msgl-iconv.c msgl-cat.c msgcomm_SOURCES = msgcomm.c message.c po-gram-gen.y po-hash-gen.y \ -po-charset.c po-lex.c open-po.c po.c str-list.c dir-list.c write-po.c +po-charset.c po-lex.c open-po.c po.c str-list.c dir-list.c write-po.c \ +msgl-ascii.c msgconv_SOURCES = msgconv.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ -msgl-iconv.c +msgl-ascii.c msgl-iconv.c msgen_SOURCES = msgen.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ -po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c +po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ +msgl-ascii.c msgexec_SOURCES = msgexec.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ -msgl-charset.c +msgl-ascii.c msgl-charset.c msggrep_SOURCES = msggrep.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ -msgl-charset.c +msgl-ascii.c msgl-charset.c msguniq_SOURCES = msguniq.c message.c open-po.c po-gram-gen.y po-hash-gen.y \ po-charset.c po-lex.c po.c read-po.c str-list.c dir-list.c write-po.c \ -msgl-iconv.c msgl-cat.c +msgl-ascii.c msgl-iconv.c msgl-cat.c # Link dependencies. # po-lex.c and po.c may need -liconv. diff --git a/src/msgl-ascii.c b/src/msgl-ascii.c new file mode 100644 index 000000000..3b528e7d4 --- /dev/null +++ b/src/msgl-ascii.c @@ -0,0 +1,84 @@ +/* Message list test for ASCII character set. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "msgl-ascii.h" +#include "c-ctype.h" + + +/* This file's structure parallels msgl-iconv.c. */ + + +bool +is_ascii_string (string) + const char *string; +{ + for (; *string; string++) + if (!c_isascii ((unsigned char) *string)) + return false; + return true; +} + +bool +is_ascii_string_list (slp) + string_list_ty *slp; +{ + size_t i; + + if (slp != NULL) + for (i = 0; i < slp->nitems; i++) + if (!is_ascii_string (slp->item[i])) + return false; + return true; +} + +bool +is_ascii_message (mp) + message_ty *mp; +{ + const char *p = mp->msgstr; + const char *p_end = p + mp->msgstr_len; + + for (; p < p_end; p++) + if (!c_isascii ((unsigned char) *p)) + return false; + + if (!is_ascii_string_list (mp->comment)) + return false; + if (!is_ascii_string_list (mp->comment_dot)) + return false; + + return true; +} + +bool +is_ascii_message_list (mlp) + message_list_ty *mlp; +{ + size_t j; + + for (j = 0; j < mlp->nitems; j++) + if (!is_ascii_message (mlp->item[j])) + return false; + + return true; +} diff --git a/src/msgl-ascii.h b/src/msgl-ascii.h new file mode 100644 index 000000000..fcb47b2b1 --- /dev/null +++ b/src/msgl-ascii.h @@ -0,0 +1,35 @@ +/* Message list test for ASCII character set. + Copyright (C) 2001 Free Software Foundation, Inc. + Written by Bruno Haible , 2001. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef _MSGL_ASCII_H +#define _MSGL_ASCII_H + +#include "message.h" + +#include + +extern bool + is_ascii_string PARAMS ((const char *string)); +extern bool + is_ascii_string_list PARAMS ((string_list_ty *slp)); +extern bool + is_ascii_message PARAMS ((message_ty *mp)); +extern bool + is_ascii_message_list PARAMS ((message_list_ty *mlp)); + +#endif /* _MSGL_ASCII_H */ diff --git a/src/msgl-cat.c b/src/msgl-cat.c index 7324e4331..f08b15515 100644 --- a/src/msgl-cat.c +++ b/src/msgl-cat.c @@ -29,6 +29,7 @@ #include "message.h" #include "read-po.h" #include "po-charset.h" +#include "msgl-ascii.h" #include "msgl-iconv.h" #include "system.h" #include "libgettext.h" @@ -166,14 +167,19 @@ two different charsets \"%s\" and \"%s\" in input file"), } if (canon_from_code == NULL) { - if (k == 0) - error (EXIT_FAILURE, 0, _("\ -input file `%s' doesn't contain a header entry with a charset specification"), - files[n]); + if (is_ascii_message_list (mlp)) + canon_from_code = po_charset_ascii; else - error (EXIT_FAILURE, 0, _("\ + { + if (k == 0) + error (EXIT_FAILURE, 0, _("\ +input file `%s' doesn't contain a header entry with a charset specification"), + files[n]); + else + error (EXIT_FAILURE, 0, _("\ domain \"%s\" in input file `%s' doesn't contain a header entry with a charset specification"), - mdlp->item[k]->domain, files[n]); + mdlp->item[k]->domain, files[n]); + } } } canon_charsets[n][k] = canon_from_code; @@ -337,7 +343,9 @@ domain \"%s\" in input file `%s' doesn't contain a header entry with a charset s all in a single encoding. If so, conversion is not needed. */ const char *first = NULL; const char *second = NULL; + bool with_ASCII = false; bool with_UTF8 = false; + bool all_ASCII_compatible = true; for (n = 0; n < nfiles; n++) { @@ -346,16 +354,31 @@ domain \"%s\" in input file `%s' doesn't contain a header entry with a charset s for (k = 0; k < mdlp->nitems; k++) if (canon_charsets[n][k] != NULL) { - if (first == NULL) - first = canon_charsets[n][k]; - else if (canon_charsets[n][k] != first && second == NULL) - second = canon_charsets[n][k]; + if (canon_charsets[n][k] == po_charset_ascii) + with_ASCII = true; + else + { + if (first == NULL) + first = canon_charsets[n][k]; + else if (canon_charsets[n][k] != first && second == NULL) + second = canon_charsets[n][k]; + + if (strcmp (canon_charsets[n][k], "UTF-8") == 0) + with_UTF8 = true; - if (strcmp (canon_charsets[n][k], "UTF-8") == 0) - with_UTF8 = true; + if (!po_charset_ascii_compatible (canon_charsets[n][k])) + all_ASCII_compatible = false; + } } } + if (with_ASCII && !all_ASCII_compatible) + { + /* assert (first != NULL); */ + if (second == NULL) + second = po_charset_ascii; + } + if (second != NULL) { /* A conversion is needed. Warn the user since he hasn't asked @@ -375,6 +398,13 @@ To select a different output encoding, use the --to-code option.\n\ "), first, second)); canon_to_code = po_charset_canonicalize ("UTF-8"); } + else if (first != NULL && with_ASCII && all_ASCII_compatible) + { + /* The conversion is a no-op conversion. Don't warn the user, + but still perform the conversion, in order to check that the + input was really ASCII. */ + canon_to_code = first; + } else { /* No conversion needed. */ @@ -390,7 +420,11 @@ To select a different output encoding, use the --to-code option.\n\ for (k = 0; k < mdlp->nitems; k++) if (canon_charsets[n][k] != NULL) - iconv_message_list (mdlp->item[k]->messages, canon_to_code); + /* If the user hasn't given a to_code, don't bother doing a noop + conversion that would only replace the charset name in the + header entry with its canonical equivalent. */ + if (!(to_code == NULL && canon_charsets[n][k] == canon_to_code)) + iconv_message_list (mdlp->item[k]->messages, canon_to_code); } /* Fill the resulting messages. */ diff --git a/src/msgl-iconv.c b/src/msgl-iconv.c index 1d1ec462f..54b65ca5f 100644 --- a/src/msgl-iconv.c +++ b/src/msgl-iconv.c @@ -33,6 +33,7 @@ #include "progname.h" #include "message.h" #include "po-charset.h" +#include "msgl-ascii.h" #include "system.h" #include "libgettext.h" @@ -314,8 +315,13 @@ two different charsets \"%s\" and \"%s\" in input file"), } } if (canon_from_code == NULL) - error (EXIT_FAILURE, 0, _("\ + { + if (is_ascii_message_list (mlp)) + canon_from_code = po_charset_ascii; + else + error (EXIT_FAILURE, 0, _("\ input file doesn't contain a header entry with a charset specification")); + } /* If the two encodings are the same, nothing to do. */ if (canon_from_code != canon_to_code) diff --git a/src/po-charset.c b/src/po-charset.c index 14f8cfee7..44652263b 100644 --- a/src/po-charset.c +++ b/src/po-charset.c @@ -33,6 +33,12 @@ extern const char *program_name; #define SIZEOF(a) (sizeof(a) / sizeof(a[0])) +static const char ascii[] = "ASCII"; + +/* The canonicalized encoding name for ASCII. */ +const char *po_charset_ascii = ascii; + +/* Canonicalize an encoding name. */ const char * po_charset_canonicalize (charset) const char *charset; @@ -41,7 +47,7 @@ po_charset_canonicalize (charset) iconv() across platforms. Taken from intl/config.charset. */ static const char *standard_charsets[] = { - "ASCII", "ANSI_X3.4-1968", "US-ASCII", /* i = 0..2 */ + ascii, "ANSI_X3.4-1968", "US-ASCII", /* i = 0..2 */ "ISO-8859-1", "ISO_8859-1", /* i = 3, 4 */ "ISO-8859-2", "ISO_8859-2", "ISO-8859-3", "ISO_8859-3", @@ -91,6 +97,21 @@ po_charset_canonicalize (charset) return NULL; } +/* Test for ASCII compatibility. */ +bool +po_charset_ascii_compatible (canon_charset) + const char *canon_charset; +{ + /* There are only a few exceptions to ASCII compatibility. */ + if (strcmp (canon_charset, "SHIFT_JIS") == 0 + || strcmp (canon_charset, "JOHAB") == 0 + || strcmp (canon_charset, "VISCII") == 0) + return false; + else + return true; +} + + /* The PO file's encoding, as specified in the header entry. */ const char *po_lex_charset; diff --git a/src/po-charset.h b/src/po-charset.h index cf3481ed2..5922c9439 100644 --- a/src/po-charset.h +++ b/src/po-charset.h @@ -19,6 +19,8 @@ #ifndef _PO_CHARSET_H #define _PO_CHARSET_H +#include + #if HAVE_ICONV #include #endif @@ -28,6 +30,13 @@ compared using ==. */ extern const char *po_charset_canonicalize PARAMS ((const char *charset)); +/* The canonicalized encoding name for ASCII. */ +extern const char *po_charset_ascii; + +/* Test for ASCII compatibility. */ +extern bool po_charset_ascii_compatible PARAMS ((const char *canon_charset)); + + /* The PO file's encoding, as specified in the header entry. */ extern const char *po_lex_charset; diff --git a/src/write-po.c b/src/write-po.c index f1c9e3801..2d1220c03 100644 --- a/src/write-po.c +++ b/src/write-po.c @@ -37,6 +37,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "write-po.h" #include "c-ctype.h" #include "linebreak.h" +#include "msgl-ascii.h" #include "system.h" #include "error.h" #include "xerror.h" @@ -57,7 +58,6 @@ static void wrap PARAMS ((FILE *fp, const char *line_prefix, const char *name, const char *value, enum is_wrap do_wrap, const char *charset)); static void print_blank_line PARAMS ((FILE *fp)); -static bool has_nonascii PARAMS ((const char *str)); static void message_print PARAMS ((const message_ty *mp, FILE *fp, const char *charset, bool blank_line, bool debug)); @@ -518,16 +518,6 @@ print_blank_line (fp) putc ('\n', fp); } -static bool -has_nonascii (str) - const char *str; -{ - for (; *str; str++) - if (!c_isascii ((unsigned char) *str)) - return true; - return false; -} - static void message_print (mp, fp, charset, blank_line, debug) const message_ty *mp; @@ -677,7 +667,7 @@ message_print (mp, fp, charset, blank_line, debug) /* Print each of the message components. Wrap them nicely so they are as readable as possible. If there is no recorded msgstr for this domain, emit an empty string. */ - if (has_nonascii (mp->msgid)) + if (!is_ascii_string (mp->msgid)) multiline_warning (xasprintf (_("warning: ")), xasprintf (_("\ The following msgid contains non-ASCII characters.\n\ @@ -771,7 +761,7 @@ message_print_obsolete (mp, fp, charset, blank_line) /* Print each of the message components. Wrap them nicely so they are as readable as possible. */ - if (has_nonascii (mp->msgid)) + if (!is_ascii_string (mp->msgid)) multiline_warning (xasprintf (_("warning: ")), xasprintf (_("\ The following msgid contains non-ASCII characters.\n\