* gettext-tools/src/po-charset.h (po_lex_isolate_start, po_lex_isolate_end): New
declarations.
* gettext-tools/src/po-charset.c (po_lex_isolate_start, po_lex_isolate_end): New
variables.
(po_lex_charset_init, po_lex_charset_set, po_lex_charset_close): Initialize
them.
* gettext-tools/woe32dll/gettextsrc-exports.c: Export also po_lex_isolate_start,
po_lex_isolate_end.
* gettext-tools/src/read-catalog-abstract.c: Include <stdbool.h>, po-charset.h.
(po_parse_comment_filepos): Parse the syntax of file names surrounded by control
characters.
* gettext-tools/src/msgl-iconv.c: Include msgl-ofn.h.
(iconv_msgdomain_list): Signal error if the target encoding does not contain the
control characters needed for escaping file names with spaces.
* gettext-tools/tests/msgconv-8: New file.
* gettext-tools/tests/Makefile.am (TESTS): Add it.
/* Message list charset and locale charset handling.
- Copyright (C) 2001-2003, 2005-2009, 2019-2020 Free Software Foundation, Inc.
+ Copyright (C) 2001-2003, 2005-2009, 2019-2021 Free Software Foundation, Inc.
Written by Bruno Haible <haible@clisp.cons.org>, 2001.
This program is free software: you can redistribute it and/or modify
#include "xstriconv.h"
#include "xstriconveh.h"
#include "msgl-ascii.h"
+#include "msgl-ofn.h"
#include "xalloc.h"
#include "xmalloca.h"
#include "c-strstr.h"
xasprintf (_("target charset \"%s\" is not a portable encoding name."),
to_code));
+ /* Test whether the control characters required for escaping file names with
+ spaces are present in the target encoding. */
+ if (msgdomain_list_has_filenames_with_spaces (mdlp)
+ && !(canon_to_code == po_charset_utf8
+ || strcmp (canon_to_code, "GB18030") == 0))
+ po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
+ xasprintf (_("Cannot write the control characters that protect file names with spaces in the %s encoding"),
+ canon_to_code));
+
for (k = 0; k < mdlp->nitems; k++)
iconv_message_list_internal (mdlp->item[k]->messages,
mdlp->encoding, canon_to_code, update_header,
/* Charset handling while reading PO files.
- Copyright (C) 2001-2007, 2010, 2019-2020 Free Software Foundation, Inc.
+ Copyright (C) 2001-2007, 2010, 2019-2021 Free Software Foundation, Inc.
Written by Bruno Haible <haible@clisp.cons.org>, 2001.
This program is free software: you can redistribute it and/or modify
/* The PO file's encoding, as specified in the header entry. */
const char *po_lex_charset;
+/* Representation of U+2068 FIRST STRONG ISOLATE (FSI) in the PO file's
+ encoding, or NULL if not available. */
+const char *po_lex_isolate_start;
+/* Representation of U+2069 POP DIRECTIONAL ISOLATE (PDI) in the PO file's
+ encoding, or NULL if not available. */
+const char *po_lex_isolate_end;
+
#if HAVE_ICONV
/* Converter from the PO file's encoding to UTF-8. */
iconv_t po_lex_iconv;
po_lex_charset_init ()
{
po_lex_charset = NULL;
+ po_lex_isolate_start = NULL;
+ po_lex_isolate_end = NULL;
#if HAVE_ICONV
po_lex_iconv = (iconv_t)(-1);
#endif
const char *envval;
po_lex_charset = canon_charset;
+
+ if (strcmp (canon_charset, "UTF-8") == 0)
+ {
+ po_lex_isolate_start = "\xE2\x81\xA8";
+ po_lex_isolate_end = "\xE2\x81\xA9";
+ }
+ else if (strcmp (canon_charset, "GB18030") == 0)
+ {
+ po_lex_isolate_start = "\x81\x36\xAC\x34";
+ po_lex_isolate_end = "\x81\x36\xAC\x35";
+ }
+ else
+ {
+ /* The other encodings don't contain U+2068, U+2069. */
+ po_lex_isolate_start = NULL;
+ po_lex_isolate_end = NULL;
+ }
+
#if HAVE_ICONV
if (po_lex_iconv != (iconv_t)(-1))
iconv_close (po_lex_iconv);
po_lex_charset_close ()
{
po_lex_charset = NULL;
+ po_lex_isolate_start = NULL;
+ po_lex_isolate_end = NULL;
#if HAVE_ICONV
if (po_lex_iconv != (iconv_t)(-1))
{
/* Charset handling while reading PO files.
- Copyright (C) 2001-2003, 2006 Free Software Foundation, Inc.
+ Copyright (C) 2001-2003, 2006, 2021 Free Software Foundation, Inc.
Written by Bruno Haible <haible@clisp.cons.org>, 2001.
This program is free software: you can redistribute it and/or modify
/* Canonicalize an encoding name.
The results of this function are statically allocated and can be
- compared using ==. */
+ compared using ==.
+ Return NULL if CHARSET is not a valid encoding name. */
extern const char *po_charset_canonicalize (const char *charset);
/* The canonicalized encoding name for ASCII. */
/* The PO file's encoding, as specified in the header entry. */
extern DLL_VARIABLE const char *po_lex_charset;
+/* Representation of U+2068 FIRST STRONG ISOLATE (FSI) in the PO file's
+ encoding, or NULL if not available. */
+extern DLL_VARIABLE const char *po_lex_isolate_start;
+/* Representation of U+2069 POP DIRECTIONAL ISOLATE (PDI) in the PO file's
+ encoding, or NULL if not available. */
+extern DLL_VARIABLE const char *po_lex_isolate_end;
+
#if HAVE_ICONV
/* Converter from the PO file's encoding to UTF-8. */
extern DLL_VARIABLE iconv_t po_lex_iconv;
/* Reading PO files, abstract class.
- Copyright (C) 1995-1996, 1998, 2000-2009, 2013, 2015 Free Software
+ Copyright (C) 1995-1996, 1998, 2000-2009, 2013, 2015, 2021 Free Software
Foundation, Inc.
This file was written by Peter Miller <millerp@canb.auug.org.au>
#include "read-catalog-abstract.h"
#include <limits.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
+#include "po-charset.h"
#include "xalloc.h"
#include "xvasprintf.h"
#include "po-xerror.h"
STRING
The latter style, without line number, occurs in PO files converted e.g.
from Pascal .rst files or from OpenOffice resource files.
+ The STRING is either
+ FILENAME
+ or
+ U+2068 FILENAME U+2069.
Call po_callback_comment_filepos for each of them. */
static void
po_parse_comment_filepos (const char *s)
s++;
if (*s != '\0')
{
- const char *string_start = s;
+ bool isolated_filename =
+ (po_lex_isolate_start != NULL
+ && strncmp (s, po_lex_isolate_start,
+ strlen (po_lex_isolate_start)) == 0);
+ if (isolated_filename)
+ s += strlen (po_lex_isolate_start);
- do
- s++;
- while (!(*s == '\0' || *s == ' ' || *s == '\t' || *s == '\n'));
+ const char *filename_start = s;
+ const char *filename_end;
+
+ if (isolated_filename)
+ {
+ for (;; s++)
+ {
+ if (*s == '\0' || *s == '\n')
+ {
+ filename_end = s;
+ break;
+ }
+ if (strncmp (s, po_lex_isolate_end,
+ strlen (po_lex_isolate_end)) == 0)
+ {
+ filename_end = s;
+ s += strlen (po_lex_isolate_end);
+ break;
+ }
+ }
+ }
+ else
+ {
+ do
+ s++;
+ while (!(*s == '\0' || *s == ' ' || *s == '\t' || *s == '\n'));
+ filename_end = s;
+ }
/* See if there is a COLON and NUMBER after the STRING, separated
through optional spaces. */
if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
{
/* Parsed a GNU style file comment with spaces. */
- const char *string_end = s;
- size_t string_length = string_end - string_start;
- char *string = XNMALLOC (string_length + 1, char);
+ size_t filename_length = filename_end - filename_start;
+ char *filename = XNMALLOC (filename_length + 1, char);
- memcpy (string, string_start, string_length);
- string[string_length] = '\0';
+ memcpy (filename, filename_start, filename_length);
+ filename[filename_length] = '\0';
- po_callback_comment_filepos (string, n);
+ po_callback_comment_filepos (filename, n);
- free (string);
+ free (filename);
s = p;
continue;
if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
{
/* Parsed a GNU style file comment with spaces. */
- const char *string_end = s - 1;
- size_t string_length = string_end - string_start;
- char *string = XNMALLOC (string_length + 1, char);
+ filename_end = s - 1;
+ size_t filename_length = filename_end - filename_start;
+ char *filename = XNMALLOC (filename_length + 1, char);
- memcpy (string, string_start, string_length);
- string[string_length] = '\0';
+ memcpy (filename, filename_start, filename_length);
+ filename[filename_length] = '\0';
- po_callback_comment_filepos (string, n);
+ po_callback_comment_filepos (filename, n);
- free (string);
+ free (filename);
s = p;
continue;
{
const char *p = s;
- while (p > string_start)
+ while (p > filename_start)
{
p--;
if (!(*p >= '0' && *p <= '9'))
at the end of STRING. */
if (p < s
- && p > string_start + 1
+ && p > filename_start + 1
&& p[-1] == ':')
{
/* Parsed a GNU style file comment without spaces. */
while (p < s);
{
- size_t string_length = string_end - string_start;
- char *string = XNMALLOC (string_length + 1, char);
+ filename_end = string_end;
+ size_t filename_length = filename_end - filename_start;
+ char *filename = XNMALLOC (filename_length + 1, char);
- memcpy (string, string_start, string_length);
- string[string_length] = '\0';
+ memcpy (filename, filename_start, filename_length);
+ filename[filename_length] = '\0';
- po_callback_comment_filepos (string, n);
+ po_callback_comment_filepos (filename, n);
- free (string);
+ free (filename);
continue;
}
/* Parsed a file comment without line number. */
{
- const char *string_end = s;
- size_t string_length = string_end - string_start;
- char *string = XNMALLOC (string_length + 1, char);
+ size_t filename_length = filename_end - filename_start;
+ char *filename = XNMALLOC (filename_length + 1, char);
- memcpy (string, string_start, string_length);
- string[string_length] = '\0';
+ memcpy (filename, filename_start, filename_length);
+ filename[filename_length] = '\0';
- po_callback_comment_filepos (string, (size_t)(-1));
+ po_callback_comment_filepos (filename, (size_t)(-1));
- free (string);
+ free (filename);
}
}
}
msgcomm-20 msgcomm-21 msgcomm-22 msgcomm-23 msgcomm-24 msgcomm-25 \
msgcomm-26 msgcomm-27 msgcomm-28 \
msgconv-1 msgconv-2 msgconv-3 msgconv-4 msgconv-5 msgconv-6 msgconv-7 \
+ msgconv-8 \
msgen-1 msgen-2 msgen-3 msgen-4 \
msgexec-1 msgexec-2 msgexec-3 msgexec-4 msgexec-5 msgexec-6 \
msgfilter-1 msgfilter-2 msgfilter-3 msgfilter-4 msgfilter-5 \
--- /dev/null
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test file locations with file names that contain spaces.
+
+cat <<\EOF > mco-test8.po
+msgid ""
+msgstr ""
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: xg-test17 a.c:1 xg-test17 x y.c:1
+msgid "foo"
+msgstr ""
+
+#: xg-test17 x y.c:2 xg-test17z.c:1
+msgid "bar"
+msgstr ""
+EOF
+
+: ${MSGCONV=msgconv}
+${MSGCONV} --to-code=UTF-8 \
+ -o mco-test8.out1 mco-test8.po || Exit 1
+
+: ${DIFF=diff}
+${DIFF} mco-test8.po mco-test8.out1 || Exit 1
+
+: ${MSGCONV=msgconv}
+${MSGCONV} --to-code=GB18030 \
+ -o mco-test8.2.po mco-test8.po || Exit 1
+
+cat <<\EOF > mco-test8.ok
+msgid ""
+msgstr ""
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=GB18030\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: \816¬4xg-test17 a.c\816¬5:1 \816¬4xg-test17 x y.c\816¬5:1
+msgid "foo"
+msgstr ""
+
+#: \816¬4xg-test17 x y.c\816¬5:2 xg-test17z.c:1
+msgid "bar"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} mco-test8.ok mco-test8.2.po || Exit 1
+
+: ${MSGCONV=msgconv}
+${MSGCONV} --to-code=GB18030 \
+ -o mco-test8.out2 mco-test8.2.po || Exit 1
+
+: ${DIFF=diff}
+${DIFF} mco-test8.2.po mco-test8.out2 || Exit 1
+
+: ${MSGCONV=msgconv}
+${MSGCONV} --to-code=UTF-8 \
+ -o mco-test8.out3 mco-test8.2.po || Exit 1
+
+: ${DIFF=diff}
+${DIFF} mco-test8.po mco-test8.out3 || Exit 1
+
+: ${MSGCONV=msgconv}
+${MSGCONV} --to-code=ISO-8859-1 \
+ -o mco-test8.out4 mco-test8.po 2>/dev/null
+test $? = 1 || Exit 1
+
+exit 0
/* List of exported symbols of libgettextsrc on Cygwin.
- Copyright (C) 2006-2007, 2009-2011, 2013-2015, 2019 Free Software Foundation,
+ Copyright (C) 2006-2007, 2009-2011, 2013-2015, 2019, 2021 Free Software Foundation,
Inc.
Written by Bruno Haible <bruno@clisp.org>, 2006.
VARIABLE(po_error_at_line)
VARIABLE(po_gram_lval)
VARIABLE(po_lex_charset)
+VARIABLE(po_lex_isolate_start)
+VARIABLE(po_lex_isolate_end)
#if HAVE_ICONV
VARIABLE(po_lex_iconv)
#endif