]> git.ipfire.org Git - thirdparty/gettext.git/commitdiff
xgettext: In language PO, handle non-ASCII non-UTF-8 input files correctly.
authorBruno Haible <bruno@clisp.org>
Fri, 10 Mar 2023 22:32:24 +0000 (23:32 +0100)
committerBruno Haible <bruno@clisp.org>
Tue, 14 Mar 2023 01:57:22 +0000 (02:57 +0100)
* gettext-tools/src/x-po.c: Include msgl-iconv.h, msgl-ascii.h, po-charset.h.
(extract): If a header charset is present, convert the messages to UTF-8;
otherwise verify that they are all ASCII.
* gettext-tools/tests/xgettext-po-3: New file.
* gettext-tools/tests/xgettext-po-4: New file.
* gettext-tools/tests/testdata/xg-po-3.po: New file.
* gettext-tools/tests/testdata/xg-po-4.po: New file.
* gettext-tools/tests/Makefile.am (TESTS, EXTRA_DIST): Add them.
* NEWS: Mention the change.

NEWS
gettext-tools/src/x-po.c
gettext-tools/tests/Makefile.am
gettext-tools/tests/testdata/xg-po-3.po [new file with mode: 0644]
gettext-tools/tests/testdata/xg-po-4.po [new file with mode: 0644]
gettext-tools/tests/xgettext-po-3 [new file with mode: 0755]
gettext-tools/tests/xgettext-po-4 [new file with mode: 0755]

diff --git a/NEWS b/NEWS
index 1456de2e1d5a63873f1bd752a9aed631d9c04720..828853bc32b98f5cc70f346136bdfe92024a7f93 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -17,7 +17,9 @@ Version 0.21.2 - February 2023
     arguments.
 
 * xgettext:
-  The xgettext option '--sorted-output' is now deprecated.
+  - The xgettext option '--sorted-output' is now deprecated.
+  - xgettext input files of type PO that are not all ASCII and not UTF-8
+    encoded are now handled correctly.
 
 * Emacs PO mode:
   Fix an incompatibility with Emacs version 29 or newer.
index 9b5690f94744970a36faa34eea4cb0bcae11e20a..fd6b517a06d087186a7898dfff13e8f50d76f270 100644 (file)
@@ -1,5 +1,5 @@
 /* xgettext PO, JavaProperties, and NXStringTable backends.
-   Copyright (C) 1995-1998, 2000-2003, 2005-2006, 2008-2009, 2014, 2018, 2020 Free Software Foundation, Inc.
+   Copyright (C) 1995-1998, 2000-2003, 2005-2006, 2008-2009, 2014, 2018, 2020, 2023 Free Software Foundation, Inc.
 
    This file was written by Peter Miller <millerp@canb.auug.org.au>
 
@@ -37,6 +37,9 @@
 #include "read-po.h"
 #include "read-properties.h"
 #include "read-stringtable.h"
+#include "msgl-iconv.h"
+#include "msgl-ascii.h"
+#include "po-charset.h"
 #include "po-lex.h"
 #include "gettext.h"
 
@@ -201,10 +204,31 @@ extract (FILE *fp,
                     }
                 }
             }
+
+          if (!input_syntax->produces_utf8)
+            {
+              /* Convert the messages to UTF-8.
+                 finalize_header() expects this.  */
+              message_list_ty *mlp = mdlp->item[0]->messages;
+              iconv_message_list (mlp, NULL, po_charset_utf8, logical_filename);
+            }
         }
 
       free (header_charset);
     }
+  else
+    {
+      if (!xgettext_omit_header && !input_syntax->produces_utf8)
+        {
+          /* finalize_header() expects the messages to be in UTF-8 encoding.
+             We don't know the encoding here; therefore we have to reject the
+             input if it is not entirely ASCII.  */
+          if (!is_ascii_msgdomain_list (mdlp))
+            error (EXIT_FAILURE, 0,
+                   _("%s: input file doesn't contain a header entry with a charset specification"),
+                   logical_filename);
+        }
+    }
 }
 
 
index ba4ebc3a9d919fda5a4f9943e325557f523977b2..240a88a5185f6de10ca6ac1a77e9462f6432a170 100644 (file)
@@ -132,7 +132,7 @@ TESTS = gettext-1 gettext-2 \
        xgettext-php-1 xgettext-php-2 xgettext-php-3 xgettext-php-4 \
        xgettext-php-stackovfl-1 xgettext-php-stackovfl-2 \
        xgettext-php-stackovfl-3 xgettext-php-stackovfl-4 \
-       xgettext-po-1 xgettext-po-2 \
+       xgettext-po-1 xgettext-po-2 xgettext-po-3 xgettext-po-4 \
        xgettext-properties-1 xgettext-properties-2 xgettext-properties-3 \
        xgettext-properties-4 \
        xgettext-rst-1 xgettext-rst-2 \
@@ -227,7 +227,8 @@ EXTRA_DIST += init.sh init.cfg $(TESTS) \
        xgettext-1 \
        xgettext-c-1 xg-c-comment-6.c xg-c-escape-3.c xg-vala-2.vala \
        common/supplemental/plurals.xml \
-       testdata/xg-el-so-3.el testdata/xg-el-so-4.el
+       testdata/xg-el-so-3.el testdata/xg-el-so-4.el \
+       testdata/xg-po-3.po testdata/xg-po-4.po
 
 XGETTEXT = ../src/xgettext
 
diff --git a/gettext-tools/tests/testdata/xg-po-3.po b/gettext-tools/tests/testdata/xg-po-3.po
new file mode 100644 (file)
index 0000000..c2930fe
--- /dev/null
@@ -0,0 +1,7 @@
+msgid ""
+msgstr ""
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "ü"
+msgstr ""
diff --git a/gettext-tools/tests/testdata/xg-po-4.po b/gettext-tools/tests/testdata/xg-po-4.po
new file mode 100644 (file)
index 0000000..cc199ee
--- /dev/null
@@ -0,0 +1,2 @@
+msgid "ü"
+msgstr ""
diff --git a/gettext-tools/tests/xgettext-po-3 b/gettext-tools/tests/xgettext-po-3
new file mode 100755 (executable)
index 0000000..6742b1e
--- /dev/null
@@ -0,0 +1,36 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test PO extractor with non-ASCII input file with header entry.
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --no-location -d xg-po-3.tmp "$wabs_srcdir"/testdata/xg-po-3.po || Exit 1
+func_filter_POT_Creation_Date xg-po-3.tmp.po xg-po-3.pot
+
+cat <<\EOF > xg-po-3.ok
+# SOME DESCRIPTIVE TITLE.
+# Copyright (C) YEAR THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the PACKAGE package.
+# FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"Report-Msgid-Bugs-To: \n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
+"Language-Team: LANGUAGE <LL@li.org>\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+msgid "ü"
+msgstr ""
+EOF
+
+: ${DIFF=diff}
+${DIFF} xg-po-3.ok xg-po-3.pot || Exit 1
+
+Exit 0
diff --git a/gettext-tools/tests/xgettext-po-4 b/gettext-tools/tests/xgettext-po-4
new file mode 100755 (executable)
index 0000000..6ed4e88
--- /dev/null
@@ -0,0 +1,12 @@
+#! /bin/sh
+. "${srcdir=.}/init.sh"; path_prepend_ . ../src
+
+# Test PO extractor with non-ASCII input file without header entry.
+
+: ${XGETTEXT=xgettext}
+${XGETTEXT} --no-location -d xg-po-4.tmp "$wabs_srcdir"/testdata/xg-po-4.po 2>xg-po-so-4.err
+result=$?
+cat xg-po-so-4.err
+test $result = 1 || Exit 1
+
+exit 0