]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Update ustar creation sanity check to use WCS path on Windows (#2230)
authorDuncan Horn <40036384+dunhor@users.noreply.github.com>
Wed, 12 Jun 2024 19:01:40 +0000 (12:01 -0700)
committerGitHub <noreply@github.com>
Wed, 12 Jun 2024 19:01:40 +0000 (12:01 -0700)
On Windows, the MBS pathname might be null if the string was set with a
WCS that can't be represented by the current locale. This is handled
properly by the rest of the code, but there's a sanity check that does
not make the proper distinction.

Note: this is a partial cherry-pick from
https://github.com/libarchive/libarchive/pull/2095, which I'm going to
go through and break into smaller pieces in hopes of getting some things
in while discussion of other things can continue.

libarchive/archive_write_set_format_ustar.c
libarchive/test/test_ustar_filename_encoding.c

index 673487b27fe3072b028712fb6477619d06c994c3..d8f0b458462c0f57d22ee53f2617138b821d2747 100644 (file)
@@ -254,7 +254,11 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry)
                sconv = ustar->opt_sconv;
 
        /* Sanity check. */
+#if defined(_WIN32) && !defined(__CYGWIN__)
+       if (archive_entry_pathname_w(entry) == NULL) {
+#else
        if (archive_entry_pathname(entry) == NULL) {
+#endif
                archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
                    "Can't record entry in tar file without pathname");
                return (ARCHIVE_FAILED);
index cc62453f1c1b60cb5707f54e5daf38d0d3e960e0..1242bd1d3cd20b4f1fab5a4c700769e4fdd975cf 100644 (file)
@@ -390,3 +390,105 @@ DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
        assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
 }
 
+DEFINE_TEST(test_ustar_filename_encoding_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+       skipping("This test is meant to verify unicode string handling"
+               " on Windows with UTF-16 names");
+       return;
+#else
+       struct archive *a;
+       struct archive_entry *entry;
+       char buff[4096];
+       size_t used;
+
+       /*
+        * Don't call setlocale because we're verifying that the '_w' functions
+        * work as expected when 'hdrcharset' is UTF-8
+        */
+
+       /* Part 1: file */
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+       if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+               skipping("This system cannot convert character-set"
+                   " from UTF-16 to UTF-8.");
+               archive_write_free(a);
+               return;
+       }
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the filename using a UTF-16 string */
+       archive_entry_copy_pathname_w(entry, L"\u8868.txt");
+       archive_entry_set_filetype(entry, AE_IFREG);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+       /* Check UTF-8 version. */
+       assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
+
+       /* Part 2: directory */
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+       assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the directory name using a UTF-16 string */
+       /* NOTE: Explicitly not adding trailing slash to test that code path */
+       archive_entry_copy_pathname_w(entry, L"\u8868");
+       archive_entry_set_filetype(entry, AE_IFDIR);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+       /* Check UTF-8 version. */
+       assertEqualMem(buff, "\xE8\xA1\xA8/", 4);
+
+       /* Part 3: symlink */
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+       assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the symlink target using a UTF-16 string */
+       archive_entry_set_pathname(entry, "link.txt");
+       archive_entry_copy_symlink_w(entry, L"\u8868.txt");
+       archive_entry_set_filetype(entry, AE_IFLNK);
+       archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+       /* Check UTF-8 version. */
+       assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
+
+       /* Part 4: hardlink */
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+       assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the symlink target using a UTF-16 string */
+       archive_entry_set_pathname(entry, "link.txt");
+       archive_entry_copy_hardlink_w(entry, L"\u8868.txt");
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+       /* Check UTF-8 version. */
+       assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
+#endif
+}