]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
archive_write: Fix crash on failure to convert WCS/UTF-8 pathname to MBS 2856/head
authorBrad King <brad.king@kitware.com>
Wed, 4 Feb 2026 21:35:03 +0000 (16:35 -0500)
committerBrad King <brad.king@kitware.com>
Fri, 6 Feb 2026 01:40:23 +0000 (20:40 -0500)
If an entry pathname is set only by WCS or UTF-8, it may not have any
MBS representation in the archive's hdrcharset.  Do not crash or create
an archive with an empty pathname.  Furthermore, the entry pathname may
not have any MBS representation in the current locale.  Do not report a
`(null)` pathname in the error message.

libarchive/archive_write_set_format_gnutar.c
libarchive/archive_write_set_format_ustar.c
libarchive/archive_write_set_format_v7tar.c
libarchive/archive_write_set_format_zip.c
libarchive/test/CMakeLists.txt
libarchive/test/test_gnutar_filename_encoding.c
libarchive/test/test_ustar_filename_encoding.c
libarchive/test/test_v7tar_filename_encoding.c [new file with mode: 0644]
libarchive/test/test_zip_filename_encoding.c

index 14ef4cd2444d6a3c6d6e2aa1ae7e69490f9eab66..b67007a631c095c3f6929f2520a7c1ac2aaba132 100644 (file)
@@ -293,6 +293,17 @@ archive_write_gnutar_header(struct archive_write *a,
        } else
                sconv = gnutar->opt_sconv;
 
+       /* Sanity check. */
+       if (archive_entry_pathname(entry) == NULL
+#if defined(_WIN32) && !defined(__CYGWIN__)
+           && archive_entry_pathname_w(entry) == NULL
+#endif
+           ) {
+               archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+                   "Can't record entry in tar file without pathname");
+               return ARCHIVE_FAILED;
+       }
+
        /* Only regular files (not hardlinks) have data. */
        if (archive_entry_hardlink(entry) != NULL ||
            archive_entry_symlink(entry) != NULL ||
@@ -385,17 +396,30 @@ archive_write_gnutar_header(struct archive_write *a,
        r = archive_entry_pathname_l(entry, &(gnutar->pathname),
            &(gnutar->pathname_length), sconv);
        if (r != 0) {
+               const char* p_mbs;
                if (errno == ENOMEM) {
                        archive_set_error(&a->archive, ENOMEM,
                            "Can't allocate memory for pathname");
                        ret = ARCHIVE_FATAL;
                        goto exit_write_header;
                }
-               archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
-                   "Can't translate pathname '%s' to %s",
-                   archive_entry_pathname(entry),
-                   archive_string_conversion_charset_name(sconv));
-               ret2 = ARCHIVE_WARN;
+               p_mbs = archive_entry_pathname(entry);
+               if (p_mbs) {
+                       /* We have a wrongly-encoded MBS pathname.
+                        * Warn and use it.  */
+                       archive_set_error(&a->archive,
+                           ARCHIVE_ERRNO_FILE_FORMAT,
+                           "Can't translate pathname '%s' to %s", p_mbs,
+                           archive_string_conversion_charset_name(sconv));
+                       ret2 = ARCHIVE_WARN;
+               } else {
+                       /* We have no MBS pathname.  Fail.  */
+                       archive_set_error(&a->archive,
+                           ARCHIVE_ERRNO_FILE_FORMAT,
+                           "Can't translate pathname to %s",
+                           archive_string_conversion_charset_name(sconv));
+                       return ARCHIVE_FAILED;
+               }
        }
        r = archive_entry_uname_l(entry, &(gnutar->uname),
            &(gnutar->uname_length), sconv);
index 4084eb45596841bc363a828d5bbde04680903569..97724c1588b3642e023cce15c9acf94d5787e4d3 100644 (file)
@@ -254,11 +254,11 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry)
                sconv = ustar->opt_sconv;
 
        /* Sanity check. */
+       if (archive_entry_pathname(entry) == NULL
 #if defined(_WIN32) && !defined(__CYGWIN__)
-       if (archive_entry_pathname_w(entry) == NULL) {
-#else
-       if (archive_entry_pathname(entry) == NULL) {
+           && archive_entry_pathname_w(entry) == NULL
 #endif
+           ) {
                archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
                    "Can't record entry in tar file without pathname");
                return (ARCHIVE_FAILED);
@@ -409,15 +409,28 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
         */
        r = archive_entry_pathname_l(entry, &pp, &copy_length, sconv);
        if (r != 0) {
+               const char* p_mbs;
                if (errno == ENOMEM) {
                        archive_set_error(&a->archive, ENOMEM,
                            "Can't allocate memory for Pathname");
                        return (ARCHIVE_FATAL);
                }
-               archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
-                   "Can't translate pathname '%s' to %s",
-                   pp, archive_string_conversion_charset_name(sconv));
-               ret = ARCHIVE_WARN;
+               p_mbs = archive_entry_pathname(entry);
+               if (p_mbs) {
+                       /* We have a wrongly-encoded MBS pathname.
+                        * Warn and use it.  */
+                       archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+                           "Can't translate pathname '%s' to %s", p_mbs,
+                           archive_string_conversion_charset_name(sconv));
+                       ret = ARCHIVE_WARN;
+               } else {
+                       /* We have no MBS pathname.  Fail.  */
+                       archive_set_error(&a->archive,
+                           ARCHIVE_ERRNO_FILE_FORMAT,
+                           "Can't translate pathname to %s",
+                           archive_string_conversion_charset_name(sconv));
+                       return ARCHIVE_FAILED;
+               }
        }
        if (copy_length <= USTAR_name_size)
                memcpy(h + USTAR_name_offset, pp, copy_length);
index 2598fc076c0ca1ee6e466977630c398f80d5ceef..37ba73a1320a080310a33041777d1be2e16b2459 100644 (file)
@@ -232,7 +232,11 @@ archive_write_v7tar_header(struct archive_write *a, struct archive_entry *entry)
                sconv = v7tar->opt_sconv;
 
        /* Sanity check. */
-       if (archive_entry_pathname(entry) == NULL) {
+       if (archive_entry_pathname(entry) == NULL
+#if defined(_WIN32) && !defined(__CYGWIN__)
+           && archive_entry_pathname_w(entry) == NULL
+#endif
+           ) {
                archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
                    "Can't record entry in tar file without pathname");
                return (ARCHIVE_FAILED);
@@ -382,15 +386,28 @@ format_header_v7tar(struct archive_write *a, char h[512],
         */
        r = archive_entry_pathname_l(entry, &pp, &copy_length, sconv);
        if (r != 0) {
+               const char* p_mbs;
                if (errno == ENOMEM) {
                        archive_set_error(&a->archive, ENOMEM,
                            "Can't allocate memory for Pathname");
                        return (ARCHIVE_FATAL);
                }
-               archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
-                   "Can't translate pathname '%s' to %s",
-                   pp, archive_string_conversion_charset_name(sconv));
-               ret = ARCHIVE_WARN;
+               p_mbs = archive_entry_pathname(entry);
+               if (p_mbs) {
+                       /* We have a wrongly-encoded MBS pathname.
+                        * Warn and use it.  */
+                       archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
+                           "Can't translate pathname '%s' to %s", p_mbs,
+                           archive_string_conversion_charset_name(sconv));
+                       ret = ARCHIVE_WARN;
+               } else {
+                       /* We have no MBS pathname.  Fail.  */
+                       archive_set_error(&a->archive,
+                           ARCHIVE_ERRNO_FILE_FORMAT,
+                           "Can't translate pathname to %s",
+                           archive_string_conversion_charset_name(sconv));
+                       return ARCHIVE_FAILED;
+               }
        }
        if (strict && copy_length < V7TAR_name_size)
                memcpy(h + V7TAR_name_offset, pp, copy_length);
index 19121b5191481fa24119a1b5a58bc08e6cca7f6a..f74922ac1b9f5a696ddf70fa39b110861bbef47d 100644 (file)
@@ -802,6 +802,17 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry)
        int version_needed = 10;
 #define MIN_VERSION_NEEDED(x) do { if (version_needed < x) { version_needed = x; } } while (0)
 
+       /* Sanity check. */
+       if (archive_entry_pathname(entry) == NULL
+#if defined(_WIN32) && !defined(__CYGWIN__)
+           && archive_entry_pathname_w(entry) == NULL
+#endif
+           ) {
+               archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
+                   "Can't record entry in zip file without pathname");
+               return ARCHIVE_FAILED;
+       }
+
        /* Ignore types of entries that we don't support. */
        type = archive_entry_filetype(entry);
        if (type != AE_IFREG && type != AE_IFDIR && type != AE_IFLNK) {
@@ -882,22 +893,33 @@ archive_write_zip_header(struct archive_write *a, struct archive_entry *entry)
                return (ARCHIVE_FATAL);
        }
 
-       if (sconv != NULL) {
+       {
                const char *p;
                size_t len;
 
                if (archive_entry_pathname_l(zip->entry, &p, &len, sconv) != 0) {
+                       const char* p_mbs;
                        if (errno == ENOMEM) {
                                archive_set_error(&a->archive, ENOMEM,
                                    "Can't allocate memory for Pathname");
                                return (ARCHIVE_FATAL);
                        }
-                       archive_set_error(&a->archive,
-                           ARCHIVE_ERRNO_FILE_FORMAT,
-                           "Can't translate Pathname '%s' to %s",
-                           archive_entry_pathname(zip->entry),
-                           archive_string_conversion_charset_name(sconv));
-                       ret2 = ARCHIVE_WARN;
+                       p_mbs = archive_entry_pathname(zip->entry);
+                       if (p_mbs) {
+                               /* We have a wrongly-encoded MBS pathname.  Warn and use it.  */
+                               archive_set_error(&a->archive,
+                                   ARCHIVE_ERRNO_FILE_FORMAT,
+                                   "Can't translate pathname '%s' to %s", p_mbs,
+                                   archive_string_conversion_charset_name(sconv));
+                               ret2 = ARCHIVE_WARN;
+                       } else {
+                               /* We have no MBS pathname.  Fail.  */
+                               archive_set_error(&a->archive,
+                                   ARCHIVE_ERRNO_FILE_FORMAT,
+                                   "Can't translate pathname to %s",
+                                   archive_string_conversion_charset_name(sconv));
+                               return ARCHIVE_FAILED;
+                       }
                }
                if (len > 0)
                        archive_entry_set_pathname(zip->entry, p);
index 02521c2bc1ad96ee8c2f6eba163ad68e01427e50..129e054c0b4464efb771d943f45582c3e02105ca 100644 (file)
@@ -227,6 +227,7 @@ IF(ENABLE_TEST)
     test_tar_large.c
     test_ustar_filename_encoding.c
     test_ustar_filenames.c
+    test_v7tar_filename_encoding.c
     test_warn_missing_hardlink_target.c
     test_write_disk.c
     test_write_disk_appledouble.c
index 476ec2149fdfbb4ba611e47677e4f58949a0ecaf..830b3884f8298614d619399243092136ddd0d3bf 100644 (file)
@@ -491,3 +491,43 @@ DEFINE_TEST(test_gnutar_filename_encoding_UTF16_win)
        assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
 #endif
 }
+
+DEFINE_TEST(test_gnutar_filename_encoding_fail_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+       skipping("This test is meant to verify unicode string handling"
+               " on Windows with UTF-16 names");
+       return;
+#else
+       struct archive *a;
+       struct archive_entry *entry;
+       char buff[4096];
+       size_t used;
+
+       /* Test the C locale by not calling setlocale.  */
+
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
+       if (archive_write_set_options(a, "hdrcharset=CP437") != ARCHIVE_OK) {
+               skipping("This system cannot convert character-set"
+                   " from UTF-16 to CP437.");
+               archive_write_free(a);
+               return;
+       }
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the filename using a UTF-16 string */
+       archive_entry_copy_pathname_w(entry, L"\u8868.txt");
+       archive_entry_set_filetype(entry, AE_IFREG);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_FAILED, archive_write_header(a, entry));
+       /* The pathname cannot even be represented in the current locale
+          for inclusion in the error message.  */
+       assertEqualString("Can't translate pathname to CP437",
+           archive_error_string(a));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+#endif
+}
index 1242bd1d3cd20b4f1fab5a4c700769e4fdd975cf..440d1b99276ef707b9ffeb3ada307a61d330dfbb 100644 (file)
@@ -492,3 +492,43 @@ DEFINE_TEST(test_ustar_filename_encoding_UTF16_win)
        assertEqualMem(buff + 157, "\xE8\xA1\xA8.txt", 7);
 #endif
 }
+
+DEFINE_TEST(test_ustar_filename_encoding_fail_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+       skipping("This test is meant to verify unicode string handling"
+               " on Windows with UTF-16 names");
+       return;
+#else
+       struct archive *a;
+       struct archive_entry *entry;
+       char buff[4096];
+       size_t used;
+
+       /* Test the C locale by not calling setlocale.  */
+
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
+       if (archive_write_set_options(a, "hdrcharset=CP437") != ARCHIVE_OK) {
+               skipping("This system cannot convert character-set"
+                   " from UTF-16 to CP437.");
+               archive_write_free(a);
+               return;
+       }
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the filename using a UTF-16 string */
+       archive_entry_copy_pathname_w(entry, L"\u8868.txt");
+       archive_entry_set_filetype(entry, AE_IFREG);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_FAILED, archive_write_header(a, entry));
+       /* The pathname cannot even be represented in the current locale
+          for inclusion in the error message.  */
+       assertEqualString("Can't translate pathname to CP437",
+           archive_error_string(a));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+#endif
+}
diff --git a/libarchive/test/test_v7tar_filename_encoding.c b/libarchive/test/test_v7tar_filename_encoding.c
new file mode 100644 (file)
index 0000000..96594b1
--- /dev/null
@@ -0,0 +1,67 @@
+/*-
+ * Copyright (c) 2003-2025 Tim Kientzle
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "test.h"
+
+#include <locale.h>
+
+DEFINE_TEST(test_v7tar_filename_encoding_fail_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+       skipping("This test is meant to verify unicode string handling"
+               " on Windows with UTF-16 names");
+       return;
+#else
+       struct archive *a;
+       struct archive_entry *entry;
+       char buff[4096];
+       size_t used;
+
+       /* Test the C locale by not calling setlocale.  */
+
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_v7tar(a));
+       if (archive_write_set_options(a, "hdrcharset=CP437") != ARCHIVE_OK) {
+               skipping("This system cannot convert character-set"
+                   " from UTF-16 to CP437.");
+               archive_write_free(a);
+               return;
+       }
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the filename using a UTF-16 string */
+       archive_entry_copy_pathname_w(entry, L"\u8868.txt");
+       archive_entry_set_filetype(entry, AE_IFREG);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_FAILED, archive_write_header(a, entry));
+       /* The pathname cannot even be represented in the current locale
+          for inclusion in the error message.  */
+       assertEqualString("Can't translate pathname to CP437",
+           archive_error_string(a));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+#endif
+}
index b6786f2c3b187cee1a0301a435a77b834e80690a..1cb394547066c4a55877c70a5f175a579bfeaa1a 100644 (file)
@@ -622,3 +622,43 @@ DEFINE_TEST(test_zip_filename_encoding_UTF16_win)
        /* NOTE: ZIP does not support hardlinks */
 #endif
 }
+
+DEFINE_TEST(test_zip_filename_encoding_fail_UTF16_win)
+{
+#if !defined(_WIN32) || defined(__CYGWIN__)
+       skipping("This test is meant to verify unicode string handling"
+               " on Windows with UTF-16 names");
+       return;
+#else
+       struct archive *a;
+       struct archive_entry *entry;
+       char buff[4096];
+       size_t used;
+
+       /* Test the C locale by not calling setlocale.  */
+
+       a = archive_write_new();
+       assertEqualInt(ARCHIVE_OK, archive_write_set_format_zip(a));
+       if (archive_write_set_options(a, "hdrcharset=CP437") != ARCHIVE_OK) {
+               skipping("This system cannot convert character-set"
+                   " from UTF-16 to CP437.");
+               archive_write_free(a);
+               return;
+       }
+       assertEqualInt(ARCHIVE_OK,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       entry = archive_entry_new2(a);
+       /* Set the filename using a UTF-16 string */
+       archive_entry_copy_pathname_w(entry, L"\u8868.txt");
+       archive_entry_set_filetype(entry, AE_IFREG);
+       archive_entry_set_size(entry, 0);
+       assertEqualInt(ARCHIVE_FAILED, archive_write_header(a, entry));
+       /* The pathname cannot even be represented in the current locale
+          for inclusion in the error message.  */
+       assertEqualString("Can't translate pathname to CP437",
+           archive_error_string(a));
+       archive_entry_free(entry);
+       assertEqualInt(ARCHIVE_OK, archive_write_free(a));
+#endif
+}