From: Tomasz Mikolajewski Date: Mon, 10 Nov 2014 07:29:53 +0000 (+0900) Subject: zip: add support for Info-ZIP Unicode Path Extra Field X-Git-Tag: v3.4.0~44^2 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6c55c379b18cdb448a92f9509847f0820352f857;p=thirdparty%2Flibarchive.git zip: add support for Info-ZIP Unicode Path Extra Field The zip specification supports storing path names in UTF-8 encoding via the Info-ZIP Unicode Path Extra Field (0x7075). --- diff --git a/Makefile.am b/Makefile.am index b7a2a6bca..bb887b7d2 100644 --- a/Makefile.am +++ b/Makefile.am @@ -512,6 +512,7 @@ libarchive_test_SOURCES= \ libarchive/test/test_read_format_warc.c \ libarchive/test/test_read_format_xar.c \ libarchive/test/test_read_format_zip.c \ + libarchive/test/test_read_format_zip_7075_utf8_paths.c \ libarchive/test/test_read_format_zip_comment_stored.c \ libarchive/test/test_read_format_zip_encryption_data.c \ libarchive/test/test_read_format_zip_encryption_partially.c \ @@ -861,6 +862,7 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_read_format_ustar_filename_koi8r.tar.Z.uu \ libarchive/test/test_read_format_warc.warc.uu \ libarchive/test/test_read_format_zip.zip.uu \ + libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu \ libarchive/test/test_read_format_zip_bz2_hang.zip.uu \ libarchive/test/test_read_format_zip_bzip2.zipx.uu \ libarchive/test/test_read_format_zip_bzip2_multi.zipx.uu \ diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index 6937969c1..1e65968a6 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -472,9 +472,11 @@ zip_time(const char *p) * triplets. id and size are 2 bytes each. */ static int -process_extra(struct archive_read *a, const char *p, size_t extra_length, struct zip_entry* zip_entry) +process_extra(struct archive_read *a, struct archive_entry *entry, + const char *p, size_t extra_length, struct zip_entry* zip_entry) { unsigned offset = 0; + struct zip *zip = (struct zip *)(a->format->data); if (extra_length == 0) { return ARCHIVE_OK; @@ -732,6 +734,35 @@ process_extra(struct archive_read *a, const char *p, size_t extra_length, struct } break; } + case 0x7075: + { + /* Info-ZIP Unicode Path Extra Field. */ + if (datasize < 5 || entry == NULL) + break; + offset += 5; + datasize -= 5; + + /* The path name in this field is always encoded in UTF-8. */ + if (zip->sconv_utf8 == NULL) { + zip->sconv_utf8 = + archive_string_conversion_from_charset( + &a->archive, "UTF-8", 1); + // If the converter from UTF-8 is not available, then the + // path name from the main field will more likely be correct. + if (zip->sconv_utf8 == NULL) + break; + } + + if (archive_entry_copy_pathname_l(entry, + p + offset, datasize, zip->sconv_utf8) != 0) { + // Ignore the error, and fallback to the path name from the main + // field. +#ifdef DEBUG + fprintf(stderr, "Failed to read the ZIP 0x7075 extra field path.\n"); +#endif + } + break; + } case 0x7855: /* Info-ZIP Unix Extra Field (type 2) "Ux". */ #ifdef DEBUG @@ -928,7 +959,7 @@ zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry, return (ARCHIVE_FATAL); } - if (ARCHIVE_OK != process_extra(a, h, extra_length, zip_entry)) { + if (ARCHIVE_OK != process_extra(a, entry, h, extra_length, zip_entry)) { return ARCHIVE_FATAL; } __archive_read_consume(a, extra_length); @@ -3391,7 +3422,8 @@ expose_parent_dirs(struct zip *zip, const char *name, size_t name_length) } static int -slurp_central_directory(struct archive_read *a, struct zip *zip) +slurp_central_directory(struct archive_read *a, struct archive_entry* entry, + struct zip *zip) { ssize_t i; unsigned found; @@ -3538,7 +3570,7 @@ slurp_central_directory(struct archive_read *a, struct zip *zip) "Truncated ZIP file header"); return ARCHIVE_FATAL; } - if (ARCHIVE_OK != process_extra(a, p + filename_length, extra_length, zip_entry)) { + if (ARCHIVE_OK != process_extra(a, entry, p + filename_length, extra_length, zip_entry)) { return ARCHIVE_FATAL; } @@ -3797,7 +3829,7 @@ archive_read_format_zip_seekable_read_header(struct archive_read *a, a->archive.archive_format_name = "ZIP"; if (zip->zip_entries == NULL) { - r = slurp_central_directory(a, zip); + r = slurp_central_directory(a, entry, zip); if (r != ARCHIVE_OK) return r; /* Get first entry whose local header offset is lower than diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index 690a83c5c..c6c95aa56 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -165,6 +165,7 @@ IF(ENABLE_TEST) test_read_format_warc.c test_read_format_xar.c test_read_format_zip.c + test_read_format_zip_7075_utf8_paths.c test_read_format_zip_comment_stored.c test_read_format_zip_encryption_data.c test_read_format_zip_encryption_header.c diff --git a/libarchive/test/test_read_format_zip_7075_utf8_paths.c b/libarchive/test/test_read_format_zip_7075_utf8_paths.c new file mode 100644 index 000000000..bfc95b097 --- /dev/null +++ b/libarchive/test/test_read_format_zip_7075_utf8_paths.c @@ -0,0 +1,88 @@ +/*- + * Copyright (c) 2003-2007 Tim Kientzle + * Copyright (c) 2011 Michihiro NAKAJIMA + * Copyright (c) 2019 Mike Frysinger + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" +__FBSDID("$FreeBSD$"); + +#include + +static void +verify(struct archive *a) { + struct archive_entry *ae; + const wchar_t *wp; + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assert((wp = archive_entry_pathname_w(ae)) != NULL); + assertEqualInt(0, wcscmp(wp, L"File 1.txt")); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assert((wp = archive_entry_pathname_w(ae)) != NULL); + assertEqualInt(0, wcscmp(wp, L"File 2 - ö.txt")); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assert((wp = archive_entry_pathname_w(ae)) != NULL); + assertEqualInt(0, wcscmp(wp, L"File 3 - ä.txt")); + + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assert((wp = archive_entry_pathname_w(ae)) != NULL); + assertEqualInt(0, wcscmp(wp, L"File 4 - å.txt")); + + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); +} + +DEFINE_TEST(test_read_format_zip_utf8_paths) +{ + const char *refname = "test_read_format_zip_7075_utf8_paths.zip"; + struct archive *a; + char *p; + size_t s; + + extract_reference_file(refname); + + if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { + skipping("en_US.UTF-8 locale not available on this system."); + return; + } + + /* Verify with seeking reader. */ + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, refname, 10240)); + verify(a); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_free(a)); + + /* Verify with streaming reader. */ + p = slurpfile(&s, refname); + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + assertEqualIntA(a, ARCHIVE_OK, read_open_memory(a, p, s, 31)); + verify(a); + assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_free(a)); +} diff --git a/libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu b/libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu new file mode 100644 index 000000000..65c34f517 --- /dev/null +++ b/libarchive/test/test_read_format_zip_7075_utf8_paths.zip.uu @@ -0,0 +1,20 @@ +begin 644 test_read_format_zip_7075_utf8_paths.zip +M4$L#!!0````(`,$^9D5BZ95P"P````D````*````1FEL92`Q+G1X=`M)+2Y1 +M2,O,204`4$L#!!0````(`,$^9D5BZ95P"P````D````/`!@`1FEL92`R("T@ +M>'@N='AT=7`4``'W!4IU1FEL92`R("T@P[8N='AT"TDM+E%(R\Q)!0!02P,$ +M%`````@`P3YF16+IE7`+````"0````\`&`!&:6QE(#,@+2!X>"YT>'1U'0+22TN44C+S$D%`%!+`P04````"`#!/F9% +M8NF5<`L````)````#P`8`$9I;&4@-"`M('AX+G1X='5P%``!G[AP'$9I;&4@ +M-"`M(,.E+G1X=`M)+2Y12,O,204`4$L!`A\`%`````@`P3YF16+IE7`+```` +M"0````H`)``````````@`````````$9I;&4@,2YT>'0*`"````````$`&``Q +M6UASCOG/`5^OQVV.^<\!7Z_';8[YSP%02P$"'P`4````"`#!/F9%8NF5<`L` +M```)````#@`\`````````"`````S````1FEL92`R("T@E"YT>'0*`"`````` +M``$`&``Q6UASCOG/`2M.B72.^<\!*TZ)=([YSP%U'102P$"'P`4````"`#!/F9%8NF5<`L````)````#@`\```````` +M`"````"#````1FEL92`S("T@A"YT>'0*`"````````$`&``Q6UASCOG/`5<$ +M&W>.^<\!5P0;=X[YSP%U'102P$"'P`4 +M````"`#!/F9%8NF5<`L````)````#@`\`````````"````#3````1FEL92`T +M("T@ABYT>'0*`"````````$`&``Q6UASCOG/`6#)ZG:.^<\!8,GJ=H[YSP%U +M'102P4&``````0`!`#$`0``(P$````` +` +end