]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Fix a couple issues with creating PAX archives (#2228)
authorDuncan Horn <40036384+dunhor@users.noreply.github.com>
Sun, 16 Jun 2024 05:20:00 +0000 (22:20 -0700)
committerGitHub <noreply@github.com>
Sun, 16 Jun 2024 05:20:00 +0000 (22:20 -0700)
Note: this is a partial cherry-pick from
https://github.com/libarchive/libarchive/pull/2095, which I'm going to
go through and break into smaller pieces in hopes of getting some things
in while discussion of other things can continue.

There's basically two fixes here:

The first is to check for the presence of the WCS pathname on Windows
before failing since the conversion from WCS -> MBS might fail. Later
execution already handles such paths correctly.

The second is to set the converted link name on the target entry where
relevant. Note that there has been prior discussion on this here:
https://github.com/libarchive/libarchive/pull/2095/files#r1531599325

libarchive/archive_write_set_format_pax.c
libarchive/test/test_pax_filename_encoding.c

index e93333074a6a67312d93d53d5faadb0574b6b8de..4aace4682587089bce3e5f0613dec4a1b1dbc2ec 100644 (file)
@@ -608,7 +608,15 @@ archive_write_pax_header(struct archive_write *a,
        const time_t ustar_max_mtime = get_ustar_max_mtime();
 
        /* Sanity check. */
+#if defined(_WIN32) && !defined(__CYGWIN__)
+       /* NOTE: If the caller supplied a pathname that fails WCS conversion (e.g.
+        * if it is invalid UTF-8), we are expected to return ARCHIVE_WARN later on
+        * in execution, hence the check for both pointers */
+       if ((archive_entry_pathname_w(entry_original) == NULL) &&
+           (archive_entry_pathname(entry_original) == NULL)) {
+#else
        if (archive_entry_pathname(entry_original) == NULL) {
+#endif
                archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
                          "Can't record entry in tar file without pathname");
                return (ARCHIVE_FAILED);
@@ -1032,6 +1040,14 @@ archive_write_pax_header(struct archive_write *a,
                                        archive_entry_set_symlink(entry_main,
                                            "././@LongSymLink");
                        }
+                       else {
+                               /* Otherwise, has non-ASCII characters; update the paths to
+                                * however they got decoded above */
+                               if (hardlink != NULL) 
+                                       archive_entry_set_hardlink(entry_main, linkpath);
+                               else
+                                       archive_entry_set_symlink(entry_main, linkpath);
+                       }
                        need_extension = 1;
                }
        }
index 737641c5abcaef24a29cb277ef705abaea41e7cb..3165b65dd33128cb1608044ed215dc1083fdda55 100644 (file)
@@ -579,6 +579,102 @@ DEFINE_TEST(test_pax_filename_encoding_KOI8R_CP1251)
        assertEqualInt(ARCHIVE_OK, archive_write_free(a));
 }
 
+/*
+ * Verify that unicode filenames are correctly preserved on Windows
+ */
+DEFINE_TEST(test_pax_filename_encoding_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+       skipping("This test is meant to verify unicode string handling"
+               " on Windows with UTF-16 names");
+       return;
+#else
+       struct archive *a;
+       struct archive_entry *entry;
+       char buff[0x2000];
+       size_t used;
+
+       /*
+        * Don't call setlocale because we're verifying that the '_w' functions
+        * work as expected when 'hdrcharset' is UTF-8
+        */
+
+       /* Check if the platform completely supports the string conversion. */
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
+       if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+               skipping("This system cannot convert character-set"
+                   " from UTF-16 to UTF-8.");
+               archive_write_free(a);
+               return;
+       }
+
+       /* Re-create a write archive object since filenames should be written
+        * in UTF-8 by default. */
+       archive_write_free(a);
+
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       /* Part 1: file */
+       entry = archive_entry_new2(a);
+       archive_entry_copy_pathname_w(entry, L"\u4f60\u597d.txt");
+       archive_entry_set_filetype(entry, AE_IFREG);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+
+       /* Part 2: directory */
+       /* NOTE: Explicitly not adding trailing slash to test that code path */
+       archive_entry_copy_pathname_w(entry, L"\u043f\u0440\u0438");
+       archive_entry_set_filetype(entry, AE_IFDIR);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+
+       /* Part 3: symlink */
+       archive_entry_copy_pathname_w(entry, L"\u518d\u89c1.txt");
+       archive_entry_copy_symlink_w(entry, L"\u4f60\u597d.txt");
+       archive_entry_set_filetype(entry, AE_IFLNK);
+       archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+
+       /* Part 4: hardlink */
+       archive_entry_copy_pathname_w(entry, L"\u665a\u5b89.txt");
+       archive_entry_copy_hardlink_w(entry, L"\u4f60\u597d.txt");
+       archive_entry_set_filetype(entry, AE_IFREG);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+       /* Ensure that the names round trip properly */
+       a = archive_read_new();
+       archive_read_support_format_all(a);
+       archive_read_support_filter_all(a);
+       assertEqualInt(0, archive_read_open_memory(a, buff, used));
+
+       /* Read part 1: file */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));
+       assertEqualWString(L"\u4f60\u597d.txt", archive_entry_pathname_w(entry));
+
+       /* Read part 2: directory */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));
+       assertEqualWString(L"\u043f\u0440\u0438/", archive_entry_pathname_w(entry));
+
+       /* Read part 3: symlink */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));
+       assertEqualWString(L"\u518d\u89c1.txt", archive_entry_pathname_w(entry));
+       assertEqualWString(L"\u4f60\u597d.txt", archive_entry_symlink_w(entry));
+
+       /* Read part 4: hardlink */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &entry));
+       assertEqualWString(L"\u665a\u5b89.txt", archive_entry_pathname_w(entry));
+       assertEqualWString(L"\u4f60\u597d.txt", archive_entry_hardlink_w(entry));
+#endif
+}
 
 DEFINE_TEST(test_pax_filename_encoding)
 {