- Tcl: xgettext now supports the \x, \u, and \U escapes as defined in
Tcl 8.6.
+* Portability:
+ - On systems with musl libc, the *gettext() functions in libc now work
+ with MO files generated from PO files with an encoding other than UTF-8.
+ To this effect, the msgfmt program now converts the messages to UTF-8
+ encoding before storing them in a MO file. You can prevent this by
+ using the msgfmt --no-convert option.
+
* xgettext:
- The xgettext option '--sorted-output' is now deprecated.
- xgettext input files of type PO that are not all ASCII and not UTF-8
@key{NUL} byte. Here also, the length in the string descriptor
includes all of them.
+@cindex encoding in MO files
+The character encoding of the strings can be any standard ASCII-compatible
+encoding, such as UTF-8, ISO-8859-1, EUC-JP, etc., as long as the
+encoding's name is stated in the header entry (@pxref{Header Entry}).
+Starting with GNU @code{gettext} version 0.22, the MO files produced by
+@code{msgfmt} have them in UTF-8 encoding, unless the @code{msgfmt}
+option @samp{--no-convert} is used.
+
Nothing prevents a MO file from having embedded @key{NUL}s in strings.
However, the program interface currently used already presumes
that strings are @key{NUL} terminated, so embedded @key{NUL}s are
@c This file is part of the GNU gettext manual.
-@c Copyright (C) 1995-2019 Free Software Foundation, Inc.
+@c Copyright (C) 1995-2023 Free Software Foundation, Inc.
@c See the file gettext.texi for copying conditions.
@pindex msgfmt
@subsection Output details
@table @samp
+@item --no-convert
+@opindex --no-convert@r{, @code{msgfmt} option}
+Don't convert the messages to UTF-8 encoding. By default, messages are
+converted to UTF-8 encoding before being stored in a MO file; this helps
+avoiding conversions at run time, since nowadays most locales use the
+UTF-8 encoding.
+
@item -a @var{number}
@itemx --alignment=@var{number}
@opindex -a@r{, @code{msgfmt} option}
{ "keyword", optional_argument, NULL, 'k' },
{ "language", required_argument, NULL, 'L' },
{ "locale", required_argument, NULL, 'l' },
+ { "no-convert", no_argument, NULL, CHAR_MAX + 17 },
{ "no-hash", no_argument, NULL, CHAR_MAX + 6 },
{ "output-file", required_argument, NULL, 'o' },
{ "properties-input", no_argument, NULL, 'P' },
desktop_template_name = optarg;
xml_template_name = optarg;
break;
+ case CHAR_MAX + 17: /* --no-convert */
+ no_convert_to_utf8 = true;
+ break;
default:
usage (EXIT_FAILURE);
break;
}
}
+ /* Compose the input file name(s).
+ This is used for statistics and error messages. */
+ char *all_input_file_names;
+ {
+ string_list_ty input_file_names;
+
+ string_list_init (&input_file_names);;
+ for (arg_i = optind; arg_i < argc; arg_i++)
+ string_list_append (&input_file_names, argv[arg_i]);
+ all_input_file_names =
+ string_list_join (&input_file_names, ", ", '\0', false);
+ string_list_destroy (&input_file_names);
+ }
+
/* Now write out all domains. */
for (domain = domain_list; domain != NULL; domain = domain->next)
{
else
{
if (msgdomain_write_mo (domain->mlp, domain->domain_name,
- domain->file_name))
+ domain->file_name, all_input_file_names))
exit_status = EXIT_FAILURE;
}
if (do_statistics + verbose >= 2 && optind < argc)
{
/* Print the input file name(s) in front of the statistics line. */
- char *all_input_file_names;
-
- {
- string_list_ty input_file_names;
-
- string_list_init (&input_file_names);;
- for (arg_i = optind; arg_i < argc; arg_i++)
- string_list_append (&input_file_names, argv[arg_i]);
- all_input_file_names =
- string_list_join (&input_file_names, ", ", '\0', false);
- string_list_destroy (&input_file_names);
- }
-
/* TRANSLATORS: The prefix before a statistics message. The argument
is a file name or a comma separated list of file names. */
fprintf (stderr, _("%s: "), all_input_file_names);
- free (all_input_file_names);
}
fprintf (stderr,
ngettext ("%d translated message", "%d translated messages",
printf (_("\
Output details:\n"));
printf (_("\
+ --no-convert don't convert the messages to UTF-8 encoding\n"));
+ printf (_("\
-a, --alignment=NUMBER align strings to NUMBER bytes (default: %d)\n"), DEFAULT_OUTPUT_ALIGNMENT);
printf (_("\
--endianness=BYTEORDER write out 32-bit numbers in the given byte order\n\
#include "xsize.h"
#include "xalloc.h"
#include "xmalloca.h"
+#include "po-charset.h"
+#include "msgl-iconv.h"
#include "msgl-header.h"
#include "binary-io.h"
#include "supersede.h"
#endif /* roundup */
+/* True if no conversion to UTF-8 is desired. */
+bool no_convert_to_utf8;
+
/* Alignment of strings in resulting .mo file. */
size_t alignment;
int
msgdomain_write_mo (message_list_ty *mlp,
const char *domain_name,
- const char *file_name)
+ const char *file_name,
+ const char *input_file)
{
/* If no entry for this domain don't even create the file. */
if (mlp->nitems != 0)
{
+ if (!no_convert_to_utf8)
+ {
+ /* Convert the messages to UTF-8.
+ This is necessary because the *gettext functions in musl libc
+ assume that both the locale encoding and the .mo encoding is UTF-8.
+ It is also helpful for performance on glibc systems, since most
+ locales nowadays have UTF-8 as locale encoding, whereas some PO
+ files still are encoded in EUC-JP or so. */
+ iconv_message_list (mlp, NULL, po_charset_utf8, input_file);
+ }
+
/* Support for "reproducible builds": Delete information that may vary
between builds in the same conditions. */
message_list_delete_header_field (mlp, "POT-Creation-Date:");
/* Writing binary .mo files.
- Copyright (C) 1995-1998, 2000-2003, 2005-2006 Free Software
- Foundation, Inc.
+ Copyright (C) 1995-1998, 2000-2003, 2005-2006, 2023 Free Software Foundation, Inc.
Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
This program is free software: you can redistribute it and/or modify
#include "message.h"
+/* True if no conversion to UTF-8 is desired. */
+extern bool no_convert_to_utf8;
+
/* Alignment of strings in resulting .mo file. */
extern size_t alignment;
/* Write a GNU mo file. mlp is a list containing the messages to be output.
domain_name is the domain name, file_name is the desired file name.
+ input_file is the name of the input file.
Return 0 if ok, nonzero on error. */
extern int
msgdomain_write_mo (message_list_ty *mlp,
const char *domain_name,
- const char *file_name);
+ const char *file_name,
+ const char *input_file);
#endif /* _WRITE_MO_H */
test -d fr/LC_MESSAGES || mkdir fr/LC_MESSAGES
: ${MSGFMT=msgfmt}
-${MSGFMT} -o fr/LC_MESSAGES/pascalprog.mo fr.po
+${MSGFMT} --no-convert -o fr/LC_MESSAGES/pascalprog.mo fr.po
: ${DIFF=diff}
cat <<\EOF > pascalprog.ok
EOF
: ${MSGFMT=msgfmt}
-${MSGFMT} -o mf-12.mo mf-12.po || Exit 1
+${MSGFMT} --no-convert -o mf-12.mo mf-12.po || Exit 1
: ${MSGUNFMT=msgunfmt}
${MSGUNFMT} -o mf-12.tmp mf-12.mo || Exit 1
EOF
: ${MSGFMT=msgfmt}
-${MSGFMT} foo.po || Exit 1
+${MSGFMT} --no-convert foo.po || Exit 1
: ${MSGUNFMT=msgunfmt}
${MSGUNFMT} -o foo-de.tmp foo-de.mo || Exit 1