]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Fix gnutar creation with unicode hardlink names on Windows (#2227)
authorDuncan Horn <40036384+dunhor@users.noreply.github.com>
Thu, 20 Jun 2024 21:03:54 +0000 (14:03 -0700)
committerGitHub <noreply@github.com>
Thu, 20 Jun 2024 21:03:54 +0000 (23:03 +0200)
The code currently uses `archive_entry_hardlink` to determine if an
entry is a hardlink, however on Windows, this call will fail if the path
cannot be represented in the current locale. This instead checks to see
if any entry in the `archive_mstring` is set.

libarchive/archive_entry.c
libarchive/archive_entry.h
libarchive/archive_write_set_format_gnutar.c
libarchive/test/test_gnutar_filename_encoding.c

index 178f7f628317e52a37101543ff393089b7af768b..ef322341a9ed0a9cdd1dc9ba43446d681b871096 100644 (file)
@@ -526,6 +526,12 @@ archive_entry_hardlink_w(struct archive_entry *entry)
        return (NULL);
 }
 
+int
+archive_entry_hardlink_is_set(struct archive_entry *entry)
+{
+       return (entry->ae_set & AE_SET_HARDLINK) != 0;
+}
+
 int
 _archive_entry_hardlink_l(struct archive_entry *entry,
     const char **p, size_t *len, struct archive_string_conv *sc)
index b51f34e42bf48bcbd672509a3d21dfdddea63d34..3a0afffb08cdb54299e9515ade0dd19012b271f7 100644 (file)
@@ -263,6 +263,7 @@ __LA_DECL void               archive_entry_set_link_to_hardlink(struct archive_entry *);
 __LA_DECL const char   *archive_entry_hardlink(struct archive_entry *);
 __LA_DECL const char   *archive_entry_hardlink_utf8(struct archive_entry *);
 __LA_DECL const wchar_t        *archive_entry_hardlink_w(struct archive_entry *);
+__LA_DECL int           archive_entry_hardlink_is_set(struct archive_entry *);
 __LA_DECL la_int64_t    archive_entry_ino(struct archive_entry *);
 __LA_DECL la_int64_t    archive_entry_ino64(struct archive_entry *);
 __LA_DECL int           archive_entry_ino_is_set(struct archive_entry *);
index a88350b8741193a80bedb564387a72f0832a95f1..a3a49c573c64d44090bc895a3e483d689a181833 100644 (file)
@@ -523,7 +523,7 @@ archive_write_gnutar_header(struct archive_write *a,
                        goto exit_write_header;
        }
 
-       if (archive_entry_hardlink(entry) != NULL) {
+       if (archive_entry_hardlink_is_set(entry)) {
                tartype = '1';
        } else
                switch (archive_entry_filetype(entry)) {
index f473ddfb4fe95a18b55cdf6478f00f54964f8272..476ec2149fdfbb4ba611e47677e4f58949a0ecaf 100644 (file)
@@ -389,3 +389,105 @@ DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8)
        assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
 }
 
+DEFINE_TEST(test_gnutar_filename_encoding_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+       skipping("This test is meant to verify unicode string handling"
+               " on Windows with UTF-16 names");
+       return;
+#else
+       struct archive *a;
+       struct archive_entry *entry;
+       char buff[4096];
+       size_t used;
+
+       /*
+        * Don't call setlocale because we're verifying that the '_w' functions
+        * work as expected when 'hdrcharset' is UTF-8
+        */
+
+       /* Part 1: file */
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
+       if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
+               skipping("This system cannot convert character-set"
+                   " from UTF-16 to UTF-8.");
+               archive_write_free(a);
+               return;
+       }
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the filename using a UTF-16 string */
+       archive_entry_copy_pathname_w(entry, L"\u8868.txt");
+       archive_entry_set_filetype(entry, AE_IFREG);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+       /* Check UTF-8 version. */
+       assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
+
+       /* Part 2: directory */
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
+       assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the directory name using a UTF-16 string */
+       /* NOTE: Explicitly not adding trailing slash to test that code path */
+       archive_entry_copy_pathname_w(entry, L"\u8868");
+       archive_entry_set_filetype(entry, AE_IFDIR);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+       /* Check UTF-8 version. */
+       assertEqualMem(buff, "\xE8\xA1\xA8/", 4);
+
+       /* Part 3: symlink */
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
+       assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the symlink target using a UTF-16 string */
+       archive_entry_set_pathname(entry, "link.txt");
+       archive_entry_copy_symlink_w(entry, L"\u8868.txt");
+       archive_entry_set_filetype(entry, AE_IFLNK);
+       archive_entry_set_symlink_type(entry, AE_SYMLINK_TYPE_FILE);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+       /* Check UTF-8 version. */
+       assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
+
+       /* Part 4: hardlink */
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
+       assertEqualInt(ARCHIVE_OK, archive_write_set_options(a, "hdrcharset=UTF-8"));
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the symlink target using a UTF-16 string */
+       archive_entry_set_pathname(entry, "link.txt");
+       archive_entry_copy_hardlink_w(entry, L"\u8868.txt");
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+
+       /* Check UTF-8 version. */
+       assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
+#endif
+}