]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
zip: add support for Info-ZIP Unicode Path Extra Field 1180/head
authorTomasz Mikolajewski <mtomasz@chromium.org>
Mon, 10 Nov 2014 07:29:53 +0000 (16:29 +0900)
committerMike Frysinger <vapier@gentoo.org>
Thu, 25 Apr 2019 23:34:42 +0000 (09:34 +1000)
The zip specification supports storing path names in UTF-8 encoding
via the Info-ZIP Unicode Path Extra Field (0x7075).

Makefile.am
libarchive/archive_read_support_format_zip.c
libarchive/test/CMakeLists.txt
libarchive/test/test_read_format_zip_7075_utf8_paths.c [new file with mode: 0644]
libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu [new file with mode: 0644]

index b7a2a6bcaa78029e8c335a56befee07ef095e468..bb887b7d2d9f5bc07675dc3b633386c7755c5a70 100644 (file)
@@ -512,6 +512,7 @@ libarchive_test_SOURCES= \
        libarchive/test/test_read_format_warc.c \
        libarchive/test/test_read_format_xar.c \
        libarchive/test/test_read_format_zip.c \
+       libarchive/test/test_read_format_zip_7075_utf8_paths.c \
        libarchive/test/test_read_format_zip_comment_stored.c \
        libarchive/test/test_read_format_zip_encryption_data.c \
        libarchive/test/test_read_format_zip_encryption_partially.c \
@@ -861,6 +862,7 @@ libarchive_test_EXTRA_DIST=\
        libarchive/test/test_read_format_ustar_filename_koi8r.tar.Z.uu \
        libarchive/test/test_read_format_warc.warc.uu \
        libarchive/test/test_read_format_zip.zip.uu \
+       libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu \
        libarchive/test/test_read_format_zip_bz2_hang.zip.uu \
        libarchive/test/test_read_format_zip_bzip2.zipx.uu \
        libarchive/test/test_read_format_zip_bzip2_multi.zipx.uu \
index 6937969c1626b1fee4bf54edd90d108935a4a69f..1e65968a6640a0910dee4cd43b33405faf689998 100644 (file)
@@ -472,9 +472,11 @@ zip_time(const char *p)
  *  triplets.  id and size are 2 bytes each.
  */
 static int
-process_extra(struct archive_read *a, const char *p, size_t extra_length, struct zip_entry* zip_entry)
+process_extra(struct archive_read *a, struct archive_entry *entry,
+               const char *p, size_t extra_length, struct zip_entry* zip_entry)
 {
        unsigned offset = 0;
+       struct zip *zip = (struct zip *)(a->format->data);
 
        if (extra_length == 0) {
                return ARCHIVE_OK;
@@ -732,6 +734,35 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct
                        }
                        break;
                }
+               case 0x7075:
+               {
+                       /* Info-ZIP Unicode Path Extra Field. */
+                       if (datasize < 5 || entry == NULL)
+                               break;
+                       offset += 5;
+                       datasize -= 5;
+
+                       /* The path name in this field is always encoded in UTF-8. */
+                       if (zip->sconv_utf8 == NULL) {
+                               zip->sconv_utf8 =
+                                       archive_string_conversion_from_charset(
+                                       &a->archive, "UTF-8", 1);
+                               // If the converter from UTF-8 is not available, then the
+                               // path name from the main field will more likely be correct.
+                               if (zip->sconv_utf8 == NULL)
+                                       break;
+                       }
+
+                       if (archive_entry_copy_pathname_l(entry,
+                           p + offset, datasize, zip->sconv_utf8) != 0) {
+                               // Ignore the error, and fallback to the path name from the main
+                               // field.
+#ifdef DEBUG
+                               fprintf(stderr, "Failed to read the ZIP 0x7075 extra field path.\n");
+#endif
+                       }
+                       break;
+               }
                case 0x7855:
                        /* Info-ZIP Unix Extra Field (type 2) "Ux". */
 #ifdef DEBUG
@@ -928,7 +959,7 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry,
                return (ARCHIVE_FATAL);
        }
 
-       if (ARCHIVE_OK != process_extra(a, h, extra_length, zip_entry)) {
+       if (ARCHIVE_OK != process_extra(a, entry, h, extra_length, zip_entry)) {
                return ARCHIVE_FATAL;
        }
        __archive_read_consume(a, extra_length);
@@ -3391,7 +3422,8 @@ expose_parent_dirs(struct zip *zip, const char *name, size_t name_length)
 }
 
 static int
-slurp_central_directory(struct archive_read *a, struct zip *zip)
+slurp_central_directory(struct archive_read *a, struct archive_entry* entry,
+    struct zip *zip)
 {
        ssize_t i;
        unsigned found;
@@ -3538,7 +3570,7 @@ slurp_central_directory(struct archive_read *a, struct zip *zip)
                            "Truncated ZIP file header");
                        return ARCHIVE_FATAL;
                }
-               if (ARCHIVE_OK != process_extra(a, p + filename_length, extra_length, zip_entry)) {
+               if (ARCHIVE_OK != process_extra(a, entry, p + filename_length, extra_length, zip_entry)) {
                        return ARCHIVE_FATAL;
                }
 
@@ -3797,7 +3829,7 @@ archive_read_format_zip_seekable_read_header(struct archive_read *a,
                a->archive.archive_format_name = "ZIP";
 
        if (zip->zip_entries == NULL) {
-               r = slurp_central_directory(a, zip);
+               r = slurp_central_directory(a, entry, zip);
                if (r != ARCHIVE_OK)
                        return r;
                /* Get first entry whose local header offset is lower than
index 690a83c5cd60c2d20bd0ce8b8cad8ed412051d79..c6c95aa563fbb7acb88edf89bfca186cddf4bd52 100644 (file)
@@ -165,6 +165,7 @@ IF(ENABLE_TEST)
     test_read_format_warc.c
     test_read_format_xar.c
     test_read_format_zip.c
+    test_read_format_zip_7075_utf8_paths.c
     test_read_format_zip_comment_stored.c
     test_read_format_zip_encryption_data.c
     test_read_format_zip_encryption_header.c
diff --git a/libarchive/test/test_read_format_zip_7075_utf8_paths.c b/libarchive/test/test_read_format_zip_7075_utf8_paths.c
new file mode 100644 (file)
index 0000000..bfc95b0
--- /dev/null
@@ -0,0 +1,88 @@
+/*-
+ * Copyright (c) 2003-2007 Tim Kientzle
+ * Copyright (c) 2011 Michihiro NAKAJIMA
+ * Copyright (c) 2019 Mike Frysinger
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "test.h"
+__FBSDID("$FreeBSD$");
+
+#include <locale.h>
+
+static void
+verify(struct archive *a) {
+       struct archive_entry *ae;
+       const wchar_t *wp;
+
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+       assert((wp = archive_entry_pathname_w(ae)) != NULL);
+       assertEqualInt(0, wcscmp(wp, L"File 1.txt"));
+
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+       assert((wp = archive_entry_pathname_w(ae)) != NULL);
+       assertEqualInt(0, wcscmp(wp, L"File 2 - ö.txt"));
+
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+       assert((wp = archive_entry_pathname_w(ae)) != NULL);
+       assertEqualInt(0, wcscmp(wp, L"File 3 - ä.txt"));
+
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+       assert((wp = archive_entry_pathname_w(ae)) != NULL);
+       assertEqualInt(0, wcscmp(wp, L"File 4 - å.txt"));
+
+       assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
+}
+
+DEFINE_TEST(test_read_format_zip_utf8_paths)
+{
+       const char *refname = "test_read_format_zip_7075_utf8_paths.zip";
+       struct archive *a;
+       char *p;
+       size_t s;
+
+       extract_reference_file(refname);
+
+       if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
+               skipping("en_US.UTF-8 locale not available on this system.");
+               return;
+       }
+
+       /* Verify with seeking reader. */
+       assert((a = archive_read_new()) != NULL);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 10240));
+       verify(a);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a));
+
+       /* Verify with streaming reader. */
+       p = slurpfile(&s, refname);
+       assert((a = archive_read_new()) != NULL);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, read_open_memory(a, p, s, 31));
+       verify(a);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_free(a));
+}
diff --git a/libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu b/libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu
new file mode 100644 (file)
index 0000000..65c34f5
--- /dev/null
@@ -0,0 +1,20 @@
+begin 644 test_read_format_zip_7075_utf8_paths.zip
+M4$L#!!0````(`,$^9D5BZ95P"P````D````*````1FEL92`Q+G1X=`M)+2Y1
+M2,O,204`4$L#!!0````(`,$^9D5BZ95P"P````D````/`!@`1FEL92`R("T@
+M>'@N='AT=7`4``'W!4IU1FEL92`R("T@P[8N='AT"TDM+E%(R\Q)!0!02P,$
+M%`````@`P3YF16+IE7`+````"0````\`&`!&:6QE(#,@+2!X>"YT>'1U<!0`
+M`3:&T0)&:6QE(#,@+2##I"YT>'0+22TN44C+S$D%`%!+`P04````"`#!/F9%
+M8NF5<`L````)````#P`8`$9I;&4@-"`M('AX+G1X='5P%``!G[AP'$9I;&4@
+M-"`M(,.E+G1X=`M)+2Y12,O,204`4$L!`A\`%`````@`P3YF16+IE7`+````
+M"0````H`)``````````@`````````$9I;&4@,2YT>'0*`"````````$`&``Q
+M6UASCOG/`5^OQVV.^<\!7Z_';8[YSP%02P$"'P`4````"`#!/F9%8NF5<`L`
+M```)````#@`\`````````"`````S````1FEL92`R("T@E"YT>'0*`"``````
+M``$`&``Q6UASCOG/`2M.B72.^<\!*TZ)=([YSP%U<!0``?<%2G5&:6QE(#(@
+M+2##MBYT>'102P$"'P`4````"`#!/F9%8NF5<`L````)````#@`\````````
+M`"````"#````1FEL92`S("T@A"YT>'0*`"````````$`&``Q6UASCOG/`5<$
+M&W>.^<\!5P0;=X[YSP%U<!0``3:&T0)&:6QE(#,@+2##I"YT>'102P$"'P`4
+M````"`#!/F9%8NF5<`L````)````#@`\`````````"````#3````1FEL92`T
+M("T@ABYT>'0*`"````````$`&``Q6UASCOG/`6#)ZG:.^<\!8,GJ=H[YSP%U
+M<!0``9^X<!Q&:6QE(#0@+2##I2YT>'102P4&``````0`!`#$`0``(P$`````
+`
+end