msgcmp.c Main source for the 'msgcmp' program.
+msgl-charset.h
+msgl-charset.c
+ Checking the encoding of a list-of-messages.
+
+-------------- The 'msgmerge' program
| msgl-equal.h
| msgl-equal.c
msgconv.c Main source for the 'msgconv' program.
msguniq.c Main source for the 'msguniq' program.
-msgl-charset.h
-msgl-charset.c
- Compare the encoding of a list-of-messages with the locale
- encoding.
-
msgexec.c Main source for the 'msgexec' program.
msgfilter.c Main source for the 'msgfilter' program.
msggrep.c Main source for the 'msggrep' program.
#include "write-po.h"
#include "write-properties.h"
#include "write-stringtable.h"
+#include "msgl-charset.h"
#include "xerror-handler.h"
#include "po-charset.h"
#include "localcharset.h"
/* Read input file. */
result = read_catalog_file (input_file, input_syntax);
+ check_pot_charset (result, input_file);
#if defined _WIN32 || defined __CYGWIN__
/* The function fill_header invokes, directly or indirectly, some programs
#include <string.h>
#include <error.h>
+#include "msgl-ascii.h"
#include "po-charset.h"
#include "localcharset.h"
#include "progname.h"
#define _(str) gettext (str)
+/* Check whether the POT file's encoding is ASCII or UTF-8. Otherwise
+ emit a warning.
+ Rationale: A POT file is routinely copied by a translator to a PO file.
+ If a POT file contains non-ASCII messages (or comments) in an encoding
+ other than UTF-8, the translator will most likely encounter trouble adding
+ her own translations in the same encoding. A translator should not have
+ to convert the POT file to UTF-8 first; instead, the POT file should
+ already be prepeared ready-to-use. */
+void
+check_pot_charset (const msgdomain_list_ty *mdlp, const char *filename)
+{
+ size_t j, k;
+
+ for (k = 0; k < mdlp->nitems; k++)
+ {
+ const message_list_ty *mlp = mdlp->item[k]->messages;
+
+ for (j = 0; j < mlp->nitems; j++)
+ if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
+ {
+ const char *header = mlp->item[j]->msgstr;
+
+ if (header != NULL)
+ {
+ const char *charsetstr = c_strstr (header, "charset=");
+
+ if (charsetstr != NULL)
+ {
+ size_t len;
+ char *charset;
+
+ charsetstr += strlen ("charset=");
+ len = strcspn (charsetstr, " \t\n");
+ charset = (char *) xmalloca (len + 1);
+ memcpy (charset, charsetstr, len);
+ charset[len] = '\0';
+
+ const char *canon_charset = po_charset_canonicalize (charset);
+
+ /* "CHARSET" is often used as a placeholder, equivalent
+ to "any" or "ASCII". */
+ if (!(strcmp (charset, "CHARSET") == 0)
+ && canon_charset == NULL)
+ error (EXIT_FAILURE, 0,
+ _("%s: The present charset \"%s\" is not a portable encoding name."),
+ filename, charset);
+ if (!is_ascii_message_list (mlp)
+ && !(canon_charset == po_charset_ascii
+ || canon_charset == po_charset_utf8))
+ error (EXIT_FAILURE, 0,
+ _("%s: The file contains non-ASCII characters but the present charset \"%s\" is not %s or %s."),
+ filename, charset, "ASCII", "UTF-8");
+
+ freea (charset);
+ }
+ }
+ }
+ }
+}
+
void
compare_po_locale_charsets (const msgdomain_list_ty *mdlp)
{
/* Message list charset and locale charset handling.
- Copyright (C) 2001-2003 Free Software Foundation, Inc.
+ Copyright (C) 2001-2024 Free Software Foundation, Inc.
Written by Bruno Haible <haible@clisp.cons.org>, 2001.
This program is free software: you can redistribute it and/or modify
#endif
+extern void
+ check_pot_charset (const msgdomain_list_ty *mdlp, const char *filename);
+
extern void
compare_po_locale_charsets (const msgdomain_list_ty *mdlp);
#include "write-po.h"
#include "write-properties.h"
#include "write-stringtable.h"
+#include "msgl-charset.h"
#include "format.h"
#include "xalloc.h"
#include "xmalloca.h"
/* This is the references file, created by groping the sources with
the xgettext program. */
ref = read_catalog_file (fn2, input_syntax);
+ check_pot_charset (ref, fn2);
/* Add a dummy header entry, if the references file contains none. */
for (k = 0; k < ref->nitems; k++)
if (message_list_search (ref->item[k]->messages, NULL, "") == NULL)
msgfmt-xml-1 msgfmt-xml-2 msgfmt-xml-3 msgfmt-xml-4 msgfmt-xml-5 \
msggrep-1 msggrep-2 msggrep-3 msggrep-4 msggrep-5 msggrep-6 msggrep-7 \
msggrep-8 msggrep-9 msggrep-10 msggrep-11 \
- msginit-1 msginit-2 msginit-3 msginit-4 \
+ msginit-1 msginit-2 msginit-3 msginit-4 msginit-5 \
msgmerge-1 msgmerge-2 msgmerge-3 msgmerge-4 msgmerge-5 msgmerge-6 \
msgmerge-7 msgmerge-8 msgmerge-9 msgmerge-10 msgmerge-11 msgmerge-12 \
msgmerge-13 msgmerge-14 msgmerge-15 \
msgmerge-charset-1 msgmerge-charset-2 msgmerge-charset-3 \
+ msgmerge-charset-4 \
msgmerge-compendium-1 msgmerge-compendium-2 msgmerge-compendium-3 \
msgmerge-compendium-4 msgmerge-compendium-5 msgmerge-compendium-6 \
msgmerge-domain-1 msgmerge-domain-2 \
xgettext-1 \
xgettext-c-1 xg-c-comment-6.c xg-c-escape-3.c xg-vala-2.vala \
common/supplemental/plurals.xml \
+ testdata/nonascii.pot \
testdata/tcltest_pl.po testdata/tcltest_pl.msg \
testdata/tcltest_cs.po testdata/tcltest_cs.msg \
testdata/xg-el-so-3.el testdata/xg-el-so-4.el \
--- /dev/null
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test validation of POT file with non-ASCII messages.
+
+: ${MSGINIT=msginit}
+${MSGINIT} -i "$wabs_srcdir"/testdata/nonascii.pot -l fr --no-translator -o mi-test5.tmp
+test $? = 1 || Exit 1
--- /dev/null
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test validation of POT file with non-ASCII messages.
+
+cat <<\EOF > mm-ch-4.po
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# Bruno Haible <bruno@clisp.org>, 2024.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: 2024-10-13 18:36+0200\n"
+"Last-Translator: Bruno Haible <bruno@clisp.org>\n"
+"Language-Team: French <traduc@traduc.org>\n"
+"Language: fr\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n > 1);\n"
+
+msgid "You can find me résumé at <%s>."
+msgstr "Vous trouvez mon CV sous <%s>."
+EOF
+
+: ${MSGMERGE=msgmerge}
+${MSGMERGE} -q -o mm-ch-4.tmp.po mm-ch-4.po "$wabs_srcdir"/testdata/nonascii.pot
+test $? = 1 || Exit 1
--- /dev/null
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "You can find me résumé at <%s>."
+msgstr ""