From 52dfe43a8051f09d23d3636b20f55e5ffc4f045d Mon Sep 17 00:00:00 2001 From: yyyy Date: Fri, 13 Sep 2024 06:05:01 +0800 Subject: [PATCH] Fix Windows path when writing zip entries (#2309) Before writing a zip entry, its' pathname might be modified for two reasons: 1. Path using Windows path separators will be converted to POSIX style. 2. Path using local encoding will be transcoded if a target charset is set. Must make sure these two mechanisms can coexist without overwriting each other. --- .gitignore | 2 + Makefile.am | 1 + libarchive/archive_write_set_format_zip.c | 6 +- libarchive/test/CMakeLists.txt | 1 + .../test/test_write_format_zip_windows_path.c | 100 ++++++++++++++++++ 5 files changed, 107 insertions(+), 3 deletions(-) create mode 100644 libarchive/test/test_write_format_zip_windows_path.c diff --git a/.gitignore b/.gitignore index 368828b8d..1e60b50dc 100644 --- a/.gitignore +++ b/.gitignore @@ -63,8 +63,10 @@ CMakeCache.txt CMakeFiles/ DartConfiguration.tcl cmake.tmp/ +cmake-*/ .vs/ .vscode/ +.idea/ doc/html/*.html doc/man/*.1 diff --git a/Makefile.am b/Makefile.am index 326280cde..c978508d0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -647,6 +647,7 @@ libarchive_test_SOURCES= \ libarchive/test/test_write_format_zip_file_zip64.c \ libarchive/test/test_write_format_zip_large.c \ libarchive/test/test_write_format_zip_stream.c \ + libarchive/test/test_write_format_zip_windows_path.c \ libarchive/test/test_write_format_zip_zip64.c \ libarchive/test/test_write_open_memory.c \ libarchive/test/test_write_read_format_zip.c \ diff --git a/libarchive/archive_write_set_format_zip.c b/libarchive/archive_write_set_format_zip.c index e01cde687..ad3e90759 100644 --- a/libarchive/archive_write_set_format_zip.c +++ b/libarchive/archive_write_set_format_zip.c @@ -609,7 +609,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) const char *p; size_t len; - if (archive_entry_pathname_l(entry, &p, &len, sconv) != 0) { + if (archive_entry_pathname_l(zip->entry, &p, &len, sconv) != 0) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory for Pathname"); @@ -618,7 +618,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Can't translate Pathname '%s' to %s", - archive_entry_pathname(entry), + archive_entry_pathname(zip->entry), archive_string_conversion_charset_name(sconv)); ret2 = ARCHIVE_WARN; } @@ -631,7 +631,7 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry) * for filename. */ if (type == AE_IFLNK) { - if (archive_entry_symlink_l(entry, &p, &len, sconv)) { + if (archive_entry_symlink_l(zip->entry, &p, &len, sconv)) { if (errno == ENOMEM) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory " diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index d9aa273ed..4b7e21fc2 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -291,6 +291,7 @@ IF(ENABLE_TEST) test_write_format_zip_file_zip64.c test_write_format_zip_large.c test_write_format_zip_stream.c + test_write_format_zip_windows_path.c test_write_format_zip_zip64.c test_write_open_memory.c test_write_read_format_zip.c diff --git a/libarchive/test/test_write_format_zip_windows_path.c b/libarchive/test/test_write_format_zip_windows_path.c new file mode 100644 index 000000000..c8be48293 --- /dev/null +++ b/libarchive/test/test_write_format_zip_windows_path.c @@ -0,0 +1,100 @@ +/*- + * Copyright (c) 2024 Yang Zhou + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "test.h" + +static void +test_with_hdrcharset(const char *charset) +{ + static const char *raw_path = "dir_stored\\dir1/file"; + static const char *replaced = "dir_stored/dir1/file"; + struct archive *a; + size_t used; + size_t buffsize = 1000000; + char *buff; + + buff = malloc(buffsize); + + /* Create a new archive in memory. */ + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_zip(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_add_filter_none(a)); + if (charset != NULL) { + assertEqualIntA(a, ARCHIVE_OK, archive_write_set_format_option(a, "zip", "hdrcharset", charset)); + } + assertEqualIntA(a, ARCHIVE_OK, archive_write_open_memory(a, buff, buffsize, &used)); + + /* + * Write a file with mixed '/' and '\' + */ + struct archive_entry *ae; + assert((ae = archive_entry_new()) != NULL); + archive_entry_set_mtime(ae, 1, 10); + archive_entry_copy_pathname(ae, raw_path); + archive_entry_set_mode(ae, AE_IFREG | 0755); + archive_entry_set_size(ae, 0); + assertEqualIntA(a, ARCHIVE_OK, archive_write_header(a, ae)); + archive_entry_free(ae); + + assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_write_free(a)); + dumpfile("constructed.zip", buff, used); + + /* + * Check if the generated archive contains and only contains expected path. + * Intentionally avoid using `archive_read_XXX` functions because it silently replaces '\' with '/', + * making it difficult to get the exact path written in the archive. + */ +#if defined(_WIN32) && !defined(__CYGWIN__) + const char *expected = replaced; + const char *unexpected = raw_path; +#else + const char *expected = raw_path; + const char *unexpected = replaced; +#endif + int expected_found = 0; + int unexpected_found = 0; + size_t len = strlen(raw_path); + for (char *ptr = buff; ptr < (buff + used - len); ptr++) { + if (memcmp(ptr, expected, len) == 0) + ++expected_found; + if (memcmp(ptr, unexpected, len) == 0) + ++unexpected_found; + } + failure("should find expected path in both local and central header (charset=%s)", charset); + assertEqualInt(2, expected_found); + failure("should not find unexpected path in anywhere (charset=%s)", charset); + assertEqualInt(0, unexpected_found); +} + +DEFINE_TEST(test_write_format_zip_windows_path) +{ + test_with_hdrcharset(NULL); +#if defined(_WIN32) && !defined(__CYGWIN__) || HAVE_ICONV + test_with_hdrcharset("ISO-8859-1"); + test_with_hdrcharset("UTF-8"); +#endif +} -- 2.47.2