msgconv: Handle protected file names with spaces.

author Bruno Haible <bruno@clisp.org>

Sun, 4 Apr 2021 16:26:57 +0000 (18:26 +0200)

committer Bruno Haible <bruno@clisp.org>

Sun, 4 Apr 2021 18:48:47 +0000 (20:48 +0200)
author Bruno Haible <bruno@clisp.org>
Sun, 4 Apr 2021 16:26:57 +0000 (18:26 +0200)
committer Bruno Haible <bruno@clisp.org>
Sun, 4 Apr 2021 18:48:47 +0000 (20:48 +0200)
diff --git a/gettext-tools/src/msgl-iconv.c b/gettext-tools/src/msgl-iconv.c

index 0b5ed107e831d53229963a5d55b5356922623c2d..6bafd950624b5ccbab5d6d198b2b7d5844004f8c 100644 (file)
--- a/gettext-tools/src/msgl-iconv.c
+++ b/gettext-tools/src/msgl-iconv.c
@@ -1,5 +1,5 @@
  /* Message list charset and locale charset handling.
-   Copyright (C) 2001-2003, 2005-2009, 2019-2020 Free Software Foundation, Inc.
+   Copyright (C) 2001-2003, 2005-2009, 2019-2021 Free Software Foundation, Inc.
     Written by Bruno Haible <haible@clisp.cons.org>, 2001.
  
     This program is free software: you can redistribute it and/or modify
@@ -40,6 +40,7 @@
  #include "xstriconv.h"
  #include "xstriconveh.h"
  #include "msgl-ascii.h"
+#include "msgl-ofn.h"
  #include "xalloc.h"
  #include "xmalloca.h"
  #include "c-strstr.h"
@@ -363,6 +364,15 @@ iconv_msgdomain_list (msgdomain_list_ty *mdlp,
                 xasprintf (_("target charset \"%s\" is not a portable encoding name."),
                            to_code));
  
+  /* Test whether the control characters required for escaping file names with
+     spaces are present in the target encoding.  */
+  if (msgdomain_list_has_filenames_with_spaces (mdlp)
+      && !(canon_to_code == po_charset_utf8
+           || strcmp (canon_to_code, "GB18030") == 0))
+    po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
+               xasprintf (_("Cannot write the control characters that protect file names with spaces in the %s encoding"),
+                          canon_to_code));
+
    for (k = 0; k < mdlp->nitems; k++)
      iconv_message_list_internal (mdlp->item[k]->messages,
                                   mdlp->encoding, canon_to_code, update_header,
diff --git a/gettext-tools/src/po-charset.c b/gettext-tools/src/po-charset.c

index 2e92c69e29ba0bc360e664463a089550f88b822d..db95cf040971d3f611937e4e09d2d09c021bdd23 100644 (file)
--- a/gettext-tools/src/po-charset.c
+++ b/gettext-tools/src/po-charset.c
@@ -1,5 +1,5 @@
  /* Charset handling while reading PO files.
-   Copyright (C) 2001-2007, 2010, 2019-2020 Free Software Foundation, Inc.
+   Copyright (C) 2001-2007, 2010, 2019-2021 Free Software Foundation, Inc.
     Written by Bruno Haible <haible@clisp.cons.org>, 2001.
  
     This program is free software: you can redistribute it and/or modify
@@ -436,6 +436,13 @@ po_charset_character_iterator (const char *canon_charset)
  /* The PO file's encoding, as specified in the header entry.  */
  const char *po_lex_charset;
  
+/* Representation of U+2068 FIRST STRONG ISOLATE (FSI) in the PO file's
+   encoding, or NULL if not available.  */
+const char *po_lex_isolate_start;
+/* Representation of U+2069 POP DIRECTIONAL ISOLATE (PDI) in the PO file's
+   encoding, or NULL if not available.  */
+const char *po_lex_isolate_end;
+
  #if HAVE_ICONV
  /* Converter from the PO file's encoding to UTF-8.  */
  iconv_t po_lex_iconv;
@@ -448,6 +455,8 @@ void
  po_lex_charset_init ()
  {
    po_lex_charset = NULL;
+  po_lex_isolate_start = NULL;
+  po_lex_isolate_end = NULL;
  #if HAVE_ICONV
    po_lex_iconv = (iconv_t)(-1);
  #endif
@@ -503,6 +512,24 @@ Message conversion to user's charset might not work.\n"),
            const char *envval;
  
            po_lex_charset = canon_charset;
+
+          if (strcmp (canon_charset, "UTF-8") == 0)
+            {
+              po_lex_isolate_start = "\xE2\x81\xA8";
+              po_lex_isolate_end = "\xE2\x81\xA9";
+            }
+          else if (strcmp (canon_charset, "GB18030") == 0)
+            {
+              po_lex_isolate_start = "\x81\x36\xAC\x34";
+              po_lex_isolate_end = "\x81\x36\xAC\x35";
+            }
+          else
+            {
+              /* The other encodings don't contain U+2068, U+2069.  */
+              po_lex_isolate_start = NULL;
+              po_lex_isolate_end = NULL;
+            }
+
  #if HAVE_ICONV
            if (po_lex_iconv != (iconv_t)(-1))
              iconv_close (po_lex_iconv);
@@ -666,6 +693,8 @@ void
  po_lex_charset_close ()
  {
    po_lex_charset = NULL;
+  po_lex_isolate_start = NULL;
+  po_lex_isolate_end = NULL;
  #if HAVE_ICONV
    if (po_lex_iconv != (iconv_t)(-1))
      {
diff --git a/gettext-tools/src/po-charset.h b/gettext-tools/src/po-charset.h

index 75769fc92fed43239b68b09229e008cb84480869..0ab49bd6d5ddd2e456b261800bca85641fa85fc6 100644 (file)
--- a/gettext-tools/src/po-charset.h
+++ b/gettext-tools/src/po-charset.h
@@ -1,5 +1,5 @@
  /* Charset handling while reading PO files.
-   Copyright (C) 2001-2003, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2001-2003, 2006, 2021 Free Software Foundation, Inc.
     Written by Bruno Haible <haible@clisp.cons.org>, 2001.
  
     This program is free software: you can redistribute it and/or modify
@@ -33,7 +33,8 @@ extern "C" {
  
  /* Canonicalize an encoding name.
     The results of this function are statically allocated and can be
-   compared using ==.  */
+   compared using ==.
+   Return NULL if CHARSET is not a valid encoding name.  */
  extern const char *po_charset_canonicalize (const char *charset);
  
  /* The canonicalized encoding name for ASCII.  */
@@ -66,6 +67,13 @@ extern character_iterator_t po_charset_character_iterator (const char *canon_cha
  /* The PO file's encoding, as specified in the header entry.  */
  extern DLL_VARIABLE const char *po_lex_charset;
  
+/* Representation of U+2068 FIRST STRONG ISOLATE (FSI) in the PO file's
+   encoding, or NULL if not available.  */
+extern DLL_VARIABLE const char *po_lex_isolate_start;
+/* Representation of U+2069 POP DIRECTIONAL ISOLATE (PDI) in the PO file's
+   encoding, or NULL if not available.  */
+extern DLL_VARIABLE const char *po_lex_isolate_end;
+
  #if HAVE_ICONV
  /* Converter from the PO file's encoding to UTF-8.  */
  extern DLL_VARIABLE iconv_t po_lex_iconv;
diff --git a/gettext-tools/src/read-catalog-abstract.c b/gettext-tools/src/read-catalog-abstract.c

index 05436c6ba7714157104c238ab0b3d78be8ae8368..aa17eddf83df6891453fe933cc3a8516fc284086 100644 (file)
--- a/gettext-tools/src/read-catalog-abstract.c
+++ b/gettext-tools/src/read-catalog-abstract.c
@@ -1,5 +1,5 @@
  /* Reading PO files, abstract class.
-   Copyright (C) 1995-1996, 1998, 2000-2009, 2013, 2015 Free Software
+   Copyright (C) 1995-1996, 1998, 2000-2009, 2013, 2015, 2021 Free Software
     Foundation, Inc.
  
     This file was written by Peter Miller <millerp@canb.auug.org.au>
@@ -26,9 +26,11 @@
  #include "read-catalog-abstract.h"
  
  #include <limits.h>
+#include <stdbool.h>
  #include <stdlib.h>
  #include <string.h>
  
+#include "po-charset.h"
  #include "xalloc.h"
  #include "xvasprintf.h"
  #include "po-xerror.h"
@@ -453,6 +455,10 @@ po_parse_comment_special (const char *s,
               STRING
     The latter style, without line number, occurs in PO files converted e.g.
     from Pascal .rst files or from OpenOffice resource files.
+   The STRING is either
+             FILENAME
+           or
+             U+2068 FILENAME U+2069.
     Call po_callback_comment_filepos for each of them.  */
  static void
  po_parse_comment_filepos (const char *s)
@@ -463,11 +469,41 @@ po_parse_comment_filepos (const char *s)
          s++;
        if (*s != '\0')
          {
-          const char *string_start = s;
+          bool isolated_filename =
+            (po_lex_isolate_start != NULL
+             && strncmp (s, po_lex_isolate_start,
+                         strlen (po_lex_isolate_start)) == 0);
+          if (isolated_filename)
+            s += strlen (po_lex_isolate_start);
  
-          do
-            s++;
-          while (!(*s == '\0' || *s == ' ' || *s == '\t' || *s == '\n'));
+          const char *filename_start = s;
+          const char *filename_end;
+
+          if (isolated_filename)
+            {
+              for (;; s++)
+                {
+                  if (*s == '\0' || *s == '\n')
+                    {
+                      filename_end = s;
+                      break;
+                    }
+                  if (strncmp (s, po_lex_isolate_end,
+                               strlen (po_lex_isolate_end)) == 0)
+                    {
+                      filename_end = s;
+                      s += strlen (po_lex_isolate_end);
+                      break;
+                    }
+                }
+            }
+          else
+            {
+              do
+                s++;
+              while (!(*s == '\0' || *s == ' ' || *s == '\t' || *s == '\n'));
+              filename_end = s;
+            }
  
            /* See if there is a COLON and NUMBER after the STRING, separated
               through optional spaces.  */
@@ -499,16 +535,15 @@ po_parse_comment_filepos (const char *s)
                      if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
                        {
                          /* Parsed a GNU style file comment with spaces.  */
-                        const char *string_end = s;
-                        size_t string_length = string_end - string_start;
-                        char *string = XNMALLOC (string_length + 1, char);
+                        size_t filename_length = filename_end - filename_start;
+                        char *filename = XNMALLOC (filename_length + 1, char);
  
-                        memcpy (string, string_start, string_length);
-                        string[string_length] = '\0';
+                        memcpy (filename, filename_start, filename_length);
+                        filename[filename_length] = '\0';
  
-                        po_callback_comment_filepos (string, n);
+                        po_callback_comment_filepos (filename, n);
  
-                        free (string);
+                        free (filename);
  
                          s = p;
                          continue;
@@ -541,16 +576,16 @@ po_parse_comment_filepos (const char *s)
                    if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n')
                      {
                        /* Parsed a GNU style file comment with spaces.  */
-                      const char *string_end = s - 1;
-                      size_t string_length = string_end - string_start;
-                      char *string = XNMALLOC (string_length + 1, char);
+                      filename_end = s - 1;
+                      size_t filename_length = filename_end - filename_start;
+                      char *filename = XNMALLOC (filename_length + 1, char);
  
-                      memcpy (string, string_start, string_length);
-                      string[string_length] = '\0';
+                      memcpy (filename, filename_start, filename_length);
+                      filename[filename_length] = '\0';
  
-                      po_callback_comment_filepos (string, n);
+                      po_callback_comment_filepos (filename, n);
  
-                      free (string);
+                      free (filename);
  
                        s = p;
                        continue;
@@ -563,7 +598,7 @@ po_parse_comment_filepos (const char *s)
            {
              const char *p = s;
  
-            while (p > string_start)
+            while (p > filename_start)
                {
                  p--;
                  if (!(*p >= '0' && *p <= '9'))
@@ -577,7 +612,7 @@ po_parse_comment_filepos (const char *s)
                 at the end of STRING.  */
  
              if (p < s
-                && p > string_start + 1
+                && p > filename_start + 1
                  && p[-1] == ':')
                {
                  /* Parsed a GNU style file comment without spaces.  */
@@ -595,15 +630,16 @@ po_parse_comment_filepos (const char *s)
                    while (p < s);
  
                    {
-                    size_t string_length = string_end - string_start;
-                    char *string = XNMALLOC (string_length + 1, char);
+                    filename_end = string_end;
+                    size_t filename_length = filename_end - filename_start;
+                    char *filename = XNMALLOC (filename_length + 1, char);
  
-                    memcpy (string, string_start, string_length);
-                    string[string_length] = '\0';
+                    memcpy (filename, filename_start, filename_length);
+                    filename[filename_length] = '\0';
  
-                    po_callback_comment_filepos (string, n);
+                    po_callback_comment_filepos (filename, n);
  
-                    free (string);
+                    free (filename);
  
                      continue;
                    }
@@ -613,16 +649,15 @@ po_parse_comment_filepos (const char *s)
  
            /* Parsed a file comment without line number.  */
            {
-            const char *string_end = s;
-            size_t string_length = string_end - string_start;
-            char *string = XNMALLOC (string_length + 1, char);
+            size_t filename_length = filename_end - filename_start;
+            char *filename = XNMALLOC (filename_length + 1, char);
  
-            memcpy (string, string_start, string_length);
-            string[string_length] = '\0';
+            memcpy (filename, filename_start, filename_length);
+            filename[filename_length] = '\0';
  
-            po_callback_comment_filepos (string, (size_t)(-1));
+            po_callback_comment_filepos (filename, (size_t)(-1));
  
-            free (string);
+            free (filename);
            }
          }
      }
diff --git a/gettext-tools/tests/Makefile.am b/gettext-tools/tests/Makefile.am

index 181288a0cd30c9a7a470f1634048634e4e8e7fdd..4cf3be2afbf7de8679f74b4b9e01cf862e6b2b6c 100644 (file)
--- a/gettext-tools/tests/Makefile.am
+++ b/gettext-tools/tests/Makefile.am
@@ -42,6 +42,7 @@ TESTS = gettext-1 gettext-2 \
         msgcomm-20 msgcomm-21 msgcomm-22 msgcomm-23 msgcomm-24 msgcomm-25 \
         msgcomm-26 msgcomm-27 msgcomm-28 \
         msgconv-1 msgconv-2 msgconv-3 msgconv-4 msgconv-5 msgconv-6 msgconv-7 \
+       msgconv-8 \
         msgen-1 msgen-2 msgen-3 msgen-4 \
         msgexec-1 msgexec-2 msgexec-3 msgexec-4 msgexec-5 msgexec-6 \
         msgfilter-1 msgfilter-2 msgfilter-3 msgfilter-4 msgfilter-5 \
diff --git a/gettext-tools/tests/msgconv-8 b/gettext-tools/tests/msgconv-8

new file mode 100755 (executable)

index 0000000..0d9837b
--- /dev/null
+++ b/gettext-tools/tests/msgconv-8
@@ -0,0 +1,71 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test file locations with file names that contain spaces.
+
+cat <<\EOF > mco-test8.po
+msgid ""
+msgstr ""
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: ⁨xg-test17 a.c⁩:1 ⁨xg-test17 x y.c⁩:1
+msgid "foo"
+msgstr ""
+
+#: ⁨xg-test17 x y.c⁩:2 xg-test17z.c:1
+msgid "bar"
+msgstr ""
+EOF
+
+: ${MSGCONV=msgconv}
+${MSGCONV} --to-code=UTF-8 \
+           -o mco-test8.out1 mco-test8.po || Exit 1
+
+: ${DIFF=diff}
+${DIFF} mco-test8.po mco-test8.out1 || Exit 1
+
+: ${MSGCONV=msgconv}
+${MSGCONV} --to-code=GB18030 \
+           -o mco-test8.2.po mco-test8.po || Exit 1
+
+cat <<\EOF > mco-test8.ok
+msgid ""
+msgstr ""
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=GB18030\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: \816¬4xg-test17 a.c\816¬5:1 \816¬4xg-test17 x y.c\816¬5:1
+msgid "foo"
+msgstr ""
+
+#: \816¬4xg-test17 x y.c\816¬5:2 xg-test17z.c:1
+msgid "bar"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} mco-test8.ok mco-test8.2.po || Exit 1
+
+: ${MSGCONV=msgconv}
+${MSGCONV} --to-code=GB18030 \
+           -o mco-test8.out2 mco-test8.2.po || Exit 1
+
+: ${DIFF=diff}
+${DIFF} mco-test8.2.po mco-test8.out2 || Exit 1
+
+: ${MSGCONV=msgconv}
+${MSGCONV} --to-code=UTF-8 \
+           -o mco-test8.out3 mco-test8.2.po || Exit 1
+
+: ${DIFF=diff}
+${DIFF} mco-test8.po mco-test8.out3 || Exit 1
+
+: ${MSGCONV=msgconv}
+${MSGCONV} --to-code=ISO-8859-1 \
+           -o mco-test8.out4 mco-test8.po 2>/dev/null
+test $? = 1 || Exit 1
+
+exit 0
diff --git a/gettext-tools/woe32dll/gettextsrc-exports.c b/gettext-tools/woe32dll/gettextsrc-exports.c

index 6d760895961db57e013552b6b3f248bfa8eb17c2..6e8ea9c0fa8116bdffb97260c28e6064676a2164 100644 (file)
--- a/gettext-tools/woe32dll/gettextsrc-exports.c
+++ b/gettext-tools/woe32dll/gettextsrc-exports.c
@@ -1,5 +1,5 @@
  /* List of exported symbols of libgettextsrc on Cygwin.
-   Copyright (C) 2006-2007, 2009-2011, 2013-2015, 2019 Free Software Foundation,
+   Copyright (C) 2006-2007, 2009-2011, 2013-2015, 2019, 2021 Free Software Foundation,
     Inc.
     Written by Bruno Haible <bruno@clisp.org>, 2006.
  
@@ -77,6 +77,8 @@ VARIABLE(po_error)
  VARIABLE(po_error_at_line)
  VARIABLE(po_gram_lval)
  VARIABLE(po_lex_charset)
+VARIABLE(po_lex_isolate_start)
+VARIABLE(po_lex_isolate_end)
  #if HAVE_ICONV
  VARIABLE(po_lex_iconv)
  #endif
author	Bruno Haible <bruno@clisp.org>
	Sun, 4 Apr 2021 16:26:57 +0000 (18:26 +0200)
committer	Bruno Haible <bruno@clisp.org>
	Sun, 4 Apr 2021 18:48:47 +0000 (20:48 +0200)
gettext-tools/src/msgl-iconv.c		patch \| blob \| blame \| history
gettext-tools/src/po-charset.c		patch \| blob \| blame \| history
gettext-tools/src/po-charset.h		patch \| blob \| blame \| history
gettext-tools/src/read-catalog-abstract.c		patch \| blob \| blame \| history
gettext-tools/tests/Makefile.am		patch \| blob \| blame \| history
gettext-tools/tests/msgconv-8	[new file with mode: 0755]	patch \| blob
gettext-tools/woe32dll/gettextsrc-exports.c		patch \| blob \| blame \| history