]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
[7zip] Read/write symlink paths as UTF-8 (#2252)
authorDuncan Horn <40036384+dunhor@users.noreply.github.com>
Fri, 11 Oct 2024 06:30:25 +0000 (23:30 -0700)
committerGitHub <noreply@github.com>
Fri, 11 Oct 2024 06:30:25 +0000 (08:30 +0200)
I previously tried to find documentation on how symlinks are expected to
be stored in 7zip files, however the best reference I could find was
[here](https://py7zr.readthedocs.io/en/latest/archive_format.html). That
site suggests that symlink paths are stored as UTF-8 encoded strings:

Makefile.am
libarchive/archive_read_support_format_7zip.c
libarchive/archive_write_set_format_7zip.c
libarchive/test/CMakeLists.txt
libarchive/test/test_7zip_filename_encoding.c [new file with mode: 0644]

index e3dbdb1d016390a79762b2fdd0deac68cfba1222..a36126c471d6b66f764b238339e89d4b06834c64 100644 (file)
@@ -365,6 +365,7 @@ libarchive_test_SOURCES= \
        $(test_utils_SOURCES) \
        libarchive/test/read_open_memory.c \
        libarchive/test/test.h \
+       libarchive/test/test_7zip_filename_encoding.c \
        libarchive/test/test_acl_nfs4.c \
        libarchive/test/test_acl_pax.c \
        libarchive/test/test_acl_platform_nfs4.c \
index fd5792d1663ded42354ecdbb93a36ba4136ec8dc..b4e34d68dbba6255ab974775eca7a166ca4e5b4f 100644 (file)
@@ -833,9 +833,20 @@ archive_read_format_7zip_read_header(struct archive_read *a,
                        zip_entry->mode |= AE_IFREG;
                        archive_entry_set_mode(entry, zip_entry->mode);
                } else {
+                       struct archive_string_conv* utf8_conv;
+
                        symname[symsize] = '\0';
-                       archive_entry_copy_symlink(entry,
-                           (const char *)symname);
+
+                       /* Symbolic links are embedded as UTF-8 strings */
+                       utf8_conv = archive_string_conversion_from_charset(&a->archive,
+                           "UTF-8", 1);
+                       if (utf8_conv == NULL) {
+                               free(symname);
+                               return ARCHIVE_FATAL;
+                       }
+
+                       archive_entry_copy_symlink_l(entry, (const char*)symname, symsize,
+                           utf8_conv);
                }
                free(symname);
                archive_entry_set_size(entry, 0);
index c0ea9d6b15484fe85591ec755c149695c8f15d1c..b870338fc0256cd131afbb824cf4d545b5e1d000 100644 (file)
@@ -521,7 +521,7 @@ _7z_write_header(struct archive_write *a, struct archive_entry *entry)
         */
        if (archive_entry_filetype(entry) == AE_IFLNK) {
                ssize_t bytes;
-               const void *p = (const void *)archive_entry_symlink(entry);
+               const void *p = (const void *)archive_entry_symlink_utf8(entry);
                bytes = compress_out(a, p, (size_t)file->size, ARCHIVE_Z_RUN);
                if (bytes < 0)
                        return ((int)bytes);
@@ -1563,8 +1563,18 @@ file_new(struct archive_write *a, struct archive_entry *entry,
                archive_entry_set_size(entry, 0);
        if (archive_entry_filetype(entry) == AE_IFDIR)
                file->dir = 1;
-       else if (archive_entry_filetype(entry) == AE_IFLNK)
-               file->size = strlen(archive_entry_symlink(entry));
+       else if (archive_entry_filetype(entry) == AE_IFLNK) {
+               const char* linkpath;
+               linkpath = archive_entry_symlink_utf8(entry);
+               if (linkpath == NULL) {
+                       free(file);
+                       archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+                           "symlink path could not be converted to UTF-8");
+                       return (ARCHIVE_FAILED);
+               }
+               else
+                       file->size = strlen(linkpath);
+       }
        if (archive_entry_mtime_is_set(entry)) {
                file->flg |= MTIME_IS_SET;
                file->times[MTIME].time = archive_entry_mtime(entry);
index 7d5bc3626f7d410b7845d657a3450b96bd3461e4..314c972d26a6754ae0f232af0ca754443b637d92 100644 (file)
@@ -9,6 +9,7 @@ IF(ENABLE_TEST)
     ../../test_utils/test_main.c
     read_open_memory.c
     test.h
+    test_7zip_filename_encoding.c
     test_acl_nfs4.c
     test_acl_pax.c
     test_acl_platform_nfs4.c
diff --git a/libarchive/test/test_7zip_filename_encoding.c b/libarchive/test/test_7zip_filename_encoding.c
new file mode 100644 (file)
index 0000000..cf562d3
--- /dev/null
@@ -0,0 +1,100 @@
+/*\r
+ * Copyright (c) 2003-2018\r
+ * All rights reserved.\r
+ *\r
+ * Redistribution and use in source and binary forms, with or without\r
+ * modification, are permitted provided that the following conditions\r
+ * are met:\r
+ * 1. Redistributions of source code must retain the above copyright\r
+ *    notice, this list of conditions and the following disclaimer\r
+ *    in this position and unchanged.\r
+ * 2. Redistributions in binary form must reproduce the above copyright\r
+ *    notice, this list of conditions and the following disclaimer in the\r
+ *    documentation and/or other materials provided with the distribution.\r
+ *\r
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR\r
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\r
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\r
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,\r
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT\r
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\r
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\r
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\r
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF\r
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+ */\r
+#include "test.h"\r
+\r
+DEFINE_TEST(test_7zip_filename_encoding_UTF16_win)\r
+{\r
+#if !defined(_WIN32) || defined(__CYGWIN__)\r
+       skipping("This test is meant to verify unicode string handling"\r
+               " on Windows with UTF-16 names");\r
+       return;\r
+#else\r
+       struct archive *a;\r
+       struct archive_entry *entry;\r
+       char buff[4096];\r
+       size_t used;\r
+\r
+       /*\r
+        * Don't call setlocale because we're verifying that the '_w' functions\r
+        * work as expected\r
+        */\r
+\r
+       a = archive_write_new();\r
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_7zip(a));\r
+       assertEqualInt(ARCHIVE_OK,\r
+           archive_write_open_memory(a, buff, sizeof(buff), &used));\r
+\r
+       /* Part 1: file */\r
+       entry = archive_entry_new2(a);\r
+       archive_entry_copy_pathname_w(entry, L"\u8868.txt");\r
+       archive_entry_set_filetype(entry, AE_IFREG);\r
+       archive_entry_set_size(entry, 0);\r
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));\r
+\r
+       /* Part 2: directory */\r
+       archive_entry_clear(entry);\r
+       archive_entry_copy_pathname_w(entry, L"\u8868");\r
+       archive_entry_set_filetype(entry, AE_IFDIR);\r
+       archive_entry_set_size(entry, 0);\r
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));\r
+\r
+       /* Part 3: symlink */\r
+       archive_entry_clear(entry);\r
+       archive_entry_set_pathname(entry, "link.txt");\r
+       archive_entry_copy_symlink_w(entry, L"\u8868.txt");\r
+       archive_entry_set_filetype(entry, AE_IFLNK);\r
+       archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);\r
+       archive_entry_set_size(entry, 0);\r
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));\r
+\r
+       /* NOTE: 7zip does not support hardlinks */\r
+\r
+       archive_entry_free(entry);\r
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));\r
+\r
+       /* Ensure that the archive contents can be read properly */\r
+       /* NOTE: 7zip file contents are not in the order we wrote them! */\r
+       a = archive_read_new();\r
+       archive_read_support_format_all(a);\r
+       archive_read_support_filter_all(a);\r
+       assertEqualIntA(a, ARCHIVE_OK, read_open_memory_seek(a, buff, used, 7));\r
+\r
+       /* Read part 3: symlink */\r
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));\r
+       assertEqualWString(L"\u8868.txt", archive_entry_symlink_w(entry));\r
+\r
+       /* Read part 1: file */\r
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));\r
+       assertEqualWString(L"\u8868.txt", archive_entry_pathname_w(entry));\r
+\r
+       /* Read part 2: directory */\r
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));\r
+       /* NOTE: Trailing slash added automatically for us */\r
+       assertEqualWString(L"\u8868/", archive_entry_pathname_w(entry));\r
+\r
+       archive_read_free(a);\r
+#endif\r
+}
\ No newline at end of file