From 9d2e02e2d879ef6c4dae68bf0791c335a35a7137 Mon Sep 17 00:00:00 2001 From: Tim Kientzle Date: Wed, 30 Apr 2008 17:48:09 -0400 Subject: [PATCH] Synchronize a bunch of changes from my local tree. SVN-Revision: 5 --- libarchive/Makefile | 12 +- libarchive/archive_entry.c | 408 +++++++++++++----- libarchive/archive_entry.h | 314 +++++++++----- libarchive/archive_entry_link_resolver.c | 386 ++++++++++++----- libarchive/archive_entry_private.h | 26 +- libarchive/archive_platform.h | 3 + .../archive_read_support_format_iso9660.c | 5 + libarchive/archive_read_support_format_tar.c | 92 ++-- libarchive/archive_read_support_format_zip.c | 117 ++++- libarchive/archive_string.c | 255 ++++++++++- libarchive/archive_string.h | 26 +- libarchive/archive_util.c | 24 +- libarchive/archive_write_disk.c | 10 +- libarchive/archive_write_set_format_pax.c | 75 +++- libarchive/archive_write_set_format_ustar.c | 28 +- libarchive/test/Makefile | 15 +- libarchive/test/main.c | 129 +++++- libarchive/test/test_acl_pax.c | 12 +- libarchive/test/test_archive_api_feature.c | 13 +- libarchive/test/test_entry.c | 30 +- libarchive/test/test_pax_filename_encoding.c | 166 ++++++- libarchive/test/test_tar_filenames.c | 34 +- libarchive/test/test_tar_large.c | 5 + libarchive/test/test_write_format_ar.c | 2 +- tar/Makefile | 2 +- tar/bsdtar.c | 2 +- 26 files changed, 1662 insertions(+), 529 deletions(-) diff --git a/libarchive/Makefile b/libarchive/Makefile index 75078d790..585b89771 100644 --- a/libarchive/Makefile +++ b/libarchive/Makefile @@ -8,12 +8,12 @@ LDADD= -lbz2 -lz # Version is three numbers: # Major: Bumped ONLY when API/ABI breakage happens (see SHLIB_MAJOR) # Minor: Bumped when significant new features are added -# Revision: Bumped on any notable change +# Revision: Bumped frequently. # The useful version number (one integer, easy to compare) -LIBARCHIVE_VERSION= 2004012 +LIBARCHIVE_VERSION_NUMBER=2005001 # The pretty version string -LIBARCHIVE_VERSION_STRING!= echo $$((${LIBARCHIVE_VERSION} / 1000000)).$$((${LIBARCHIVE_VERSION} / 1000 % 1000)).$$((${LIBARCHIVE_VERSION} % 1000)) +LIBARCHIVE_VERSION_STRING=2.5.1b # FreeBSD SHLIB_MAJOR value is managed as part of the FreeBSD system. # It has no real relation to the version number above. @@ -31,10 +31,8 @@ INCS= archive.h archive_entry.h # Note: FreeBSD has inttypes.h, so enable that include in archive.h.in archive.h: archive.h.in Makefile cat ${.CURDIR}/archive.h.in | sed \ - -e 's/@LIBARCHIVE_VERSION@/${LIBARCHIVE_VERSION}/g' \ - -e 's/@LIBARCHIVE_VERSION_STRING@/${LIBARCHIVE_VERSION_STRING}/g' \ - -e 's/@SHLIB_MAJOR@/${SHLIB_MAJOR}/g' \ - -e 's|@ARCHIVE_H_INCLUDE_INTTYPES_H@|#include /* For int64_t */|g' \ + -e 's/@LIBARCHIVE_VERSION_NUMBER@/${LIBARCHIVE_VERSION_NUMBER}/g' \ + -e 's/@LIBARCHIVE_VERSION_STRING@/${LIBARCHIVE_VERSION_STRING}/g' \ > archive.h # archive.h needs to be cleaned diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c index 5f9e39a66..228f91ac6 100644 --- a/libarchive/archive_entry.c +++ b/libarchive/archive_entry.c @@ -91,15 +91,17 @@ static void aes_clean(struct aes *); static void aes_copy(struct aes *dest, struct aes *src); static const char * aes_get_mbs(struct aes *); static const wchar_t * aes_get_wcs(struct aes *); -static void aes_set_mbs(struct aes *, const char *mbs); -static void aes_copy_mbs(struct aes *, const char *mbs); +static int aes_set_mbs(struct aes *, const char *mbs); +static int aes_copy_mbs(struct aes *, const char *mbs); /* static void aes_set_wcs(struct aes *, const wchar_t *wcs); */ -static void aes_copy_wcs(struct aes *, const wchar_t *wcs); -static void aes_copy_wcs_len(struct aes *, const wchar_t *wcs, size_t); +static int aes_copy_wcs(struct aes *, const wchar_t *wcs); +static int aes_copy_wcs_len(struct aes *, const wchar_t *wcs, size_t); static char * ae_fflagstostr(unsigned long bitset, unsigned long bitclear); static const wchar_t *ae_wcstofflags(const wchar_t *stringp, unsigned long *setp, unsigned long *clrp); +static const char *ae_strtofflags(const char *stringp, + unsigned long *setp, unsigned long *clrp); static void append_entry_w(wchar_t **wp, const wchar_t *prefix, int tag, const wchar_t *wname, int perm, int id); static void append_id_w(wchar_t **wp, int id); @@ -144,173 +146,216 @@ static size_t wcslen(const wchar_t *s) #define wmemcpy(a,b,i) (wchar_t *)memcpy((a), (b), (i) * sizeof(wchar_t)) #endif - static void aes_clean(struct aes *aes) { - if (aes->aes_mbs_alloc) { - free(aes->aes_mbs_alloc); - aes->aes_mbs_alloc = NULL; - } - if (aes->aes_wcs_alloc) { - free(aes->aes_wcs_alloc); - aes->aes_wcs_alloc = NULL; + if (aes->aes_wcs) { + free((wchar_t *)(uintptr_t)aes->aes_wcs); + aes->aes_wcs = NULL; } - memset(aes, 0, sizeof(*aes)); + archive_string_free(&(aes->aes_mbs)); + archive_string_free(&(aes->aes_utf8)); + aes->aes_set = 0; } static void aes_copy(struct aes *dest, struct aes *src) { - *dest = *src; - if (src->aes_mbs != NULL) { - dest->aes_mbs_alloc = strdup(src->aes_mbs); - dest->aes_mbs = dest->aes_mbs_alloc; - if (dest->aes_mbs == NULL) - __archive_errx(1, "No memory for aes_copy()"); - } + wchar_t *wp; + + dest->aes_set = src->aes_set; + archive_string_copy(&(dest->aes_mbs), &(src->aes_mbs)); + archive_string_copy(&(dest->aes_utf8), &(src->aes_utf8)); if (src->aes_wcs != NULL) { - dest->aes_wcs_alloc = (wchar_t *)malloc((wcslen(src->aes_wcs) + 1) + wp = (wchar_t *)malloc((wcslen(src->aes_wcs) + 1) * sizeof(wchar_t)); - dest->aes_wcs = dest->aes_wcs_alloc; - if (dest->aes_wcs == NULL) + if (wp == NULL) __archive_errx(1, "No memory for aes_copy()"); - wcscpy(dest->aes_wcs_alloc, src->aes_wcs); + wcscpy(wp, src->aes_wcs); + dest->aes_wcs = wp; + } +} + +static const char * +aes_get_utf8(struct aes *aes) +{ + if (aes->aes_set & AES_SET_UTF8) + return (aes->aes_utf8.s); + if ((aes->aes_set & AES_SET_WCS) + && archive_strappend_w_utf8(&(aes->aes_utf8), aes->aes_wcs) != NULL) { + aes->aes_set |= AES_SET_UTF8; + return (aes->aes_utf8.s); } + return (NULL); } static const char * aes_get_mbs(struct aes *aes) { - if (aes->aes_mbs == NULL && aes->aes_wcs == NULL) - return NULL; - if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) { - /* - * XXX Need to estimate the number of byte in the - * multi-byte form. Assume that, on average, wcs - * chars encode to no more than 3 bytes. There must - * be a better way... XXX - */ - size_t mbs_length = wcslen(aes->aes_wcs) * 3 + 64; - - aes->aes_mbs_alloc = (char *)malloc(mbs_length); - aes->aes_mbs = aes->aes_mbs_alloc; - if (aes->aes_mbs == NULL) - __archive_errx(1, "No memory for aes_get_mbs()"); - wcstombs(aes->aes_mbs_alloc, aes->aes_wcs, mbs_length - 1); - aes->aes_mbs_alloc[mbs_length - 1] = 0; + /* If we already have an MBS form, return that immediately. */ + if (aes->aes_set & AES_SET_MBS) + return (aes->aes_mbs.s); + /* If there's a WCS form, try converting with the native locale. */ + if ((aes->aes_set & AES_SET_WCS) + && archive_strappend_w_mbs(&(aes->aes_mbs), aes->aes_wcs) != NULL) { + aes->aes_set |= AES_SET_MBS; + return (aes->aes_mbs.s); } - return (aes->aes_mbs); + /* We'll use UTF-8 for MBS if all else fails. */ + return (aes_get_utf8(aes)); } static const wchar_t * aes_get_wcs(struct aes *aes) { + wchar_t *w; int r; - if (aes->aes_wcs == NULL && aes->aes_mbs == NULL) - return NULL; - if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) { + /* Return WCS form if we already have it. */ + if (aes->aes_set & AES_SET_WCS) + return (aes->aes_wcs); + + if (aes->aes_set & AES_SET_MBS) { + /* Try converting MBS to WCS using native locale. */ /* * No single byte will be more than one wide character, * so this length estimate will always be big enough. */ - size_t wcs_length = strlen(aes->aes_mbs); + size_t wcs_length = aes->aes_mbs.length; - aes->aes_wcs_alloc - = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t)); - aes->aes_wcs = aes->aes_wcs_alloc; - if (aes->aes_wcs == NULL) + w = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t)); + if (w == NULL) __archive_errx(1, "No memory for aes_get_wcs()"); - r = mbstowcs(aes->aes_wcs_alloc, aes->aes_mbs, wcs_length); - aes->aes_wcs_alloc[wcs_length] = 0; - if (r == -1) { - /* Conversion failed, don't lie to our clients. */ - free(aes->aes_wcs_alloc); - aes->aes_wcs = aes->aes_wcs_alloc = NULL; + r = mbstowcs(w, aes->aes_mbs.s, wcs_length); + w[wcs_length] = 0; + if (r > 0) { + aes->aes_set |= AES_SET_WCS; + return (aes->aes_wcs = w); } + free(w); } - return (aes->aes_wcs); + + if (aes->aes_set & AES_SET_UTF8) { + /* Try converting UTF8 to WCS. */ + aes->aes_wcs = __archive_string_utf8_w(&(aes->aes_utf8)); + aes->aes_set |= AES_SET_WCS; + return (aes->aes_wcs); + } + return (NULL); } -static void +static int aes_set_mbs(struct aes *aes, const char *mbs) { - if (aes->aes_mbs_alloc) { - free(aes->aes_mbs_alloc); - aes->aes_mbs_alloc = NULL; - } - if (aes->aes_wcs_alloc) { - free(aes->aes_wcs_alloc); - aes->aes_wcs_alloc = NULL; - } - aes->aes_mbs = mbs; - aes->aes_wcs = NULL; + return (aes_copy_mbs(aes, mbs)); } -static void +static int aes_copy_mbs(struct aes *aes, const char *mbs) { - if (aes->aes_mbs_alloc) { - free(aes->aes_mbs_alloc); - aes->aes_mbs_alloc = NULL; + if (mbs == NULL) { + aes->aes_set = 0; + return (0); } - if (aes->aes_wcs_alloc) { - free(aes->aes_wcs_alloc); - aes->aes_wcs_alloc = NULL; + aes->aes_set = AES_SET_MBS; /* Only MBS form is set now. */ + archive_strcpy(&(aes->aes_mbs), mbs); + archive_string_empty(&(aes->aes_utf8)); + if (aes->aes_wcs) { + free((wchar_t *)(uintptr_t)aes->aes_wcs); + aes->aes_wcs = NULL; } - aes->aes_mbs_alloc = (char *)malloc((strlen(mbs) + 1) * sizeof(char)); - if (aes->aes_mbs_alloc == NULL) - __archive_errx(1, "No memory for aes_copy_mbs()"); - strcpy(aes->aes_mbs_alloc, mbs); - aes->aes_mbs = aes->aes_mbs_alloc; - aes->aes_wcs = NULL; + return (0); } -#if 0 -static void -aes_set_wcs(struct aes *aes, const wchar_t *wcs) +/* + * The 'update' form tries to proactively update all forms of + * this string (WCS and MBS) and returns an error if any of + * them fail. This is used by the 'pax' handler, for instance, + * to detect and report character-conversion failures early while + * still allowing clients to get potentially useful values from + * the more tolerant lazy conversions. (get_mbs and get_wcs will + * strive to give the user something useful, so you can get hopefully + * usable values even if some of the character conversions are failing.) + */ +static int +aes_update_utf8(struct aes *aes, const char *utf8) { - if (aes->aes_mbs_alloc) { - free(aes->aes_mbs_alloc); - aes->aes_mbs_alloc = NULL; + if (utf8 == NULL) { + aes->aes_set = 0; + return (1); /* Succeeded in clearing everything. */ } - if (aes->aes_wcs_alloc) { - free(aes->aes_wcs_alloc); - aes->aes_wcs_alloc = NULL; + + /* Save the UTF8 string. */ + archive_strcpy(&(aes->aes_utf8), utf8); + + /* Empty the mbs and wcs strings. */ + archive_string_empty(&(aes->aes_mbs)); + if (aes->aes_wcs) { + free((wchar_t *)(uintptr_t)aes->aes_wcs); + aes->aes_wcs = NULL; } - aes->aes_mbs = NULL; - aes->aes_wcs = wcs; + + aes->aes_set = AES_SET_UTF8; /* Only UTF8 is set now. */ + + /* TODO: We should just do a direct UTF-8 to MBS conversion + * here. That would be faster, use less space, and give the + * same information. (If a UTF-8 to MBS conversion succeeds, + * then UTF-8->WCS and Unicode->MBS conversions will both + * succeed.) */ + + /* Try converting UTF8 to WCS, return false on failure. */ + aes->aes_wcs = __archive_string_utf8_w(&(aes->aes_utf8)); + if (aes->aes_wcs == NULL) + return (0); + aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; /* Both UTF8 and WCS set. */ + + /* Try converting WCS to MBS, return false on failure. */ + if (archive_strappend_w_mbs(&(aes->aes_mbs), aes->aes_wcs) == NULL) + return (0); + aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS; + + /* All conversions succeeded. */ + return (1); } -#endif -static void +static int aes_copy_wcs(struct aes *aes, const wchar_t *wcs) { - aes_copy_wcs_len(aes, wcs, wcslen(wcs)); + return aes_copy_wcs_len(aes, wcs, wcs == NULL ? 0 : wcslen(wcs)); } -static void +static int aes_copy_wcs_len(struct aes *aes, const wchar_t *wcs, size_t len) { - if (aes->aes_mbs_alloc) { - free(aes->aes_mbs_alloc); - aes->aes_mbs_alloc = NULL; + wchar_t *w; + + if (wcs == NULL) { + aes->aes_set = 0; + return (0); } - if (aes->aes_wcs_alloc) { - free(aes->aes_wcs_alloc); - aes->aes_wcs_alloc = NULL; + aes->aes_set = AES_SET_WCS; /* Only WCS form set. */ + archive_string_empty(&(aes->aes_mbs)); + archive_string_empty(&(aes->aes_utf8)); + if (aes->aes_wcs) { + free((wchar_t *)(uintptr_t)aes->aes_wcs); + aes->aes_wcs = NULL; } - aes->aes_mbs = NULL; - aes->aes_wcs_alloc = (wchar_t *)malloc((len + 1) * sizeof(wchar_t)); - if (aes->aes_wcs_alloc == NULL) + w = (wchar_t *)malloc((len + 1) * sizeof(wchar_t)); + if (w == NULL) __archive_errx(1, "No memory for aes_copy_wcs()"); - wmemcpy(aes->aes_wcs_alloc, wcs, len); - aes->aes_wcs_alloc[len] = L'\0'; - aes->aes_wcs = aes->aes_wcs_alloc; + wmemcpy(w, wcs, len); + w[len] = L'\0'; + aes->aes_wcs = w; + return (0); } +/**************************************************************************** + * + * Public Interface + * + ****************************************************************************/ + struct archive_entry * archive_entry_clear(struct archive_entry *entry) { @@ -350,6 +395,8 @@ archive_entry_clone(struct archive_entry *entry) aes_copy(&entry2->ae_hardlink, &entry->ae_hardlink); aes_copy(&entry2->ae_pathname, &entry->ae_pathname); aes_copy(&entry2->ae_symlink, &entry->ae_symlink); + entry2->ae_hardlinkset = entry->ae_hardlinkset; + entry2->ae_symlinkset = entry->ae_symlinkset; aes_copy(&entry2->ae_uname, &entry->ae_uname); /* Copy ACL data over. */ @@ -515,12 +562,16 @@ archive_entry_gname_w(struct archive_entry *entry) const char * archive_entry_hardlink(struct archive_entry *entry) { + if (!entry->ae_hardlinkset) + return (NULL); return (aes_get_mbs(&entry->ae_hardlink)); } const wchar_t * archive_entry_hardlink_w(struct archive_entry *entry) { + if (!entry->ae_hardlinkset) + return (NULL); return (aes_get_wcs(&entry->ae_hardlink)); } @@ -600,15 +651,25 @@ archive_entry_size(struct archive_entry *entry) return (entry->ae_stat.aest_size); } +const char * +archive_entry_sourcepath(struct archive_entry *entry) +{ + return (aes_get_mbs(&entry->ae_sourcepath)); +} + const char * archive_entry_symlink(struct archive_entry *entry) { + if (!entry->ae_symlinkset) + return (NULL); return (aes_get_mbs(&entry->ae_symlink)); } const wchar_t * archive_entry_symlink_w(struct archive_entry *entry) { + if (!entry->ae_symlinkset) + return (NULL); return (aes_get_wcs(&entry->ae_symlink)); } @@ -651,6 +712,15 @@ archive_entry_set_fflags(struct archive_entry *entry, entry->ae_fflags_clear = clear; } +const char * +archive_entry_copy_fflags_text(struct archive_entry *entry, + const char *flags) +{ + aes_copy_mbs(&entry->ae_fflags_text, flags); + return (ae_strtofflags(flags, + &entry->ae_fflags_set, &entry->ae_fflags_clear)); +} + const wchar_t * archive_entry_copy_fflags_text_w(struct archive_entry *entry, const wchar_t *flags) @@ -685,6 +755,12 @@ archive_entry_copy_gname_w(struct archive_entry *entry, const wchar_t *name) aes_copy_wcs(&entry->ae_gname, name); } +int +archive_entry_update_gname_utf8(struct archive_entry *entry, const char *name) +{ + return (aes_update_utf8(&entry->ae_gname, name)); +} + void archive_entry_set_ino(struct archive_entry *entry, unsigned long ino) { @@ -696,18 +772,24 @@ void archive_entry_set_hardlink(struct archive_entry *entry, const char *target) { aes_set_mbs(&entry->ae_hardlink, target); + if (target != NULL) + entry->ae_hardlinkset = 1; } void archive_entry_copy_hardlink(struct archive_entry *entry, const char *target) { aes_copy_mbs(&entry->ae_hardlink, target); + if (target != NULL) + entry->ae_hardlinkset = 1; } void archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target) { aes_copy_wcs(&entry->ae_hardlink, target); + if (target != NULL) + entry->ae_hardlinkset = 1; } void @@ -754,8 +836,7 @@ archive_entry_set_devminor(struct archive_entry *entry, dev_t m) void archive_entry_set_link(struct archive_entry *entry, const char *target) { - if (entry->ae_symlink.aes_mbs != NULL || - entry->ae_symlink.aes_wcs != NULL) + if (entry->ae_symlinkset) aes_set_mbs(&entry->ae_symlink, target); else aes_set_mbs(&entry->ae_hardlink, target); @@ -765,8 +846,7 @@ archive_entry_set_link(struct archive_entry *entry, const char *target) void archive_entry_copy_link(struct archive_entry *entry, const char *target) { - if (entry->ae_symlink.aes_mbs != NULL || - entry->ae_symlink.aes_wcs != NULL) + if (entry->ae_symlinkset) aes_copy_mbs(&entry->ae_symlink, target); else aes_copy_mbs(&entry->ae_hardlink, target); @@ -776,13 +856,21 @@ archive_entry_copy_link(struct archive_entry *entry, const char *target) void archive_entry_copy_link_w(struct archive_entry *entry, const wchar_t *target) { - if (entry->ae_symlink.aes_mbs != NULL || - entry->ae_symlink.aes_wcs != NULL) + if (entry->ae_symlinkset) aes_copy_wcs(&entry->ae_symlink, target); else aes_copy_wcs(&entry->ae_hardlink, target); } +int +archive_entry_update_link_utf8(struct archive_entry *entry, const char *target) +{ + if (entry->ae_symlinkset) + return (aes_update_utf8(&entry->ae_symlink, target)); + else + return (aes_update_utf8(&entry->ae_hardlink, target)); +} + void archive_entry_set_mode(struct archive_entry *entry, mode_t m) { @@ -823,6 +911,12 @@ archive_entry_copy_pathname_w(struct archive_entry *entry, const wchar_t *name) aes_copy_wcs(&entry->ae_pathname, name); } +int +archive_entry_update_pathname_utf8(struct archive_entry *entry, const char *name) +{ + return (aes_update_utf8(&entry->ae_pathname, name)); +} + void archive_entry_set_perm(struct archive_entry *entry, mode_t p) { @@ -862,22 +956,34 @@ archive_entry_set_size(struct archive_entry *entry, int64_t s) entry->ae_stat.aest_size = s; } +void +archive_entry_copy_sourcepath(struct archive_entry *entry, const char *path) +{ + aes_set_mbs(&entry->ae_sourcepath, path); +} + void archive_entry_set_symlink(struct archive_entry *entry, const char *linkname) { aes_set_mbs(&entry->ae_symlink, linkname); + if (linkname != NULL) + entry->ae_symlinkset = 1; } void archive_entry_copy_symlink(struct archive_entry *entry, const char *linkname) { aes_copy_mbs(&entry->ae_symlink, linkname); + if (linkname != NULL) + entry->ae_symlinkset = 1; } void archive_entry_copy_symlink_w(struct archive_entry *entry, const wchar_t *linkname) { aes_copy_wcs(&entry->ae_symlink, linkname); + if (linkname != NULL) + entry->ae_symlinkset = 1; } void @@ -905,6 +1011,12 @@ archive_entry_copy_uname_w(struct archive_entry *entry, const wchar_t *name) aes_copy_wcs(&entry->ae_uname, name); } +int +archive_entry_update_uname_utf8(struct archive_entry *entry, const char *name) +{ + return (aes_update_utf8(&entry->ae_uname, name)); +} + /* * ACL management. The following would, of course, be a lot simpler * if: 1) the last draft of POSIX.1e were a really thorough and @@ -1744,7 +1856,7 @@ static struct flag { * Convert file flags to a comma-separated string. If no flags * are set, return the empty string. */ -char * +static char * ae_fflagstostr(unsigned long bitset, unsigned long bitclear) { char *string, *dp; @@ -1788,6 +1900,70 @@ ae_fflagstostr(unsigned long bitset, unsigned long bitclear) return (string); } +/* + * strtofflags -- + * Take string of arguments and return file flags. This + * version works a little differently than strtofflags(3). + * In particular, it always tests every token, skipping any + * unrecognized tokens. It returns a pointer to the first + * unrecognized token, or NULL if every token was recognized. + * This version is also const-correct and does not modify the + * provided string. + */ +static const char * +ae_strtofflags(const char *s, unsigned long *setp, unsigned long *clrp) +{ + const char *start, *end; + struct flag *flag; + unsigned long set, clear; + const char *failed; + + set = clear = 0; + start = s; + failed = NULL; + /* Find start of first token. */ + while (*start == '\t' || *start == ' ' || *start == ',') + start++; + while (*start != '\0') { + /* Locate end of token. */ + end = start; + while (*end != '\0' && *end != '\t' && + *end != ' ' && *end != ',') + end++; + for (flag = flags; flag->wname != NULL; flag++) { + if (memcmp(start, flag->wname, end - start) == 0) { + /* Matched "noXXXX", so reverse the sense. */ + clear |= flag->set; + set |= flag->clear; + break; + } else if (memcmp(start, flag->wname + 2, end - start) + == 0) { + /* Matched "XXXX", so don't reverse. */ + set |= flag->set; + clear |= flag->clear; + break; + } + } + /* Ignore unknown flag names. */ + if (flag->wname == NULL && failed == NULL) + failed = start; + + /* Find start of next token. */ + start = end; + while (*start == '\t' || *start == ' ' || *start == ',') + start++; + + } + + if (setp) + *setp = set; + if (clrp) + *clrp = clear; + + /* Return location of first failure. */ + return (failed); +} + /* * wcstofflags -- * Take string of arguments and return file flags. This @@ -1798,7 +1974,7 @@ ae_fflagstostr(unsigned long bitset, unsigned long bitclear) * This version is also const-correct and does not modify the * provided string. */ -const wchar_t * +static const wchar_t * ae_wcstofflags(const wchar_t *s, unsigned long *setp, unsigned long *clrp) { const wchar_t *start, *end; diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h index 3bfe9e912..5f7771ca7 100644 --- a/libarchive/archive_entry.h +++ b/libarchive/archive_entry.h @@ -31,17 +31,56 @@ #include #include /* for wchar_t */ #include + +/* Get appropriate definitions of standard POSIX-style types. */ +/* These should match the types used in 'struct stat' */ +#ifdef _WIN32 +#define __LA_UID_T unsigned int +#define __LA_GID_T unsigned int +#define __LA_INO_T unsigned int +#define __LA_DEV_T unsigned int +#define __LA_MODE_T unsigned short +#else #include +#define __LA_UID_T uid_t +#define __LA_GID_T gid_t +#define __LA_INO_T ino_t +#define __LA_DEV_T dev_t +#define __LA_MODE_T mode_t +#endif + +/* + * On Windows, define LIBARCHIVE_STATIC if you're building or using a + * .lib. The default here assumes you're building a DLL. Only + * libarchive source should ever define __LIBARCHIVE_BUILD. + */ +#if ((defined __WIN32__) || (defined _WIN32)) && (!defined LIBARCHIVE_STATIC) +# ifdef __LIBARCHIVE_BUILD +# ifdef __GNUC__ +# define __LA_DECL __attribute__((dllexport)) extern +# else +# define __LA_DECL __declspec(dllexport) +# endif +# else +# ifdef __GNUC__ +# define __LA_DECL __attribute__((dllimport)) extern +# else +# define __LA_DECL __declspec(dllimport) +# endif +# endif +#else +/* Static libraries on all platforms and shared libraries on non-Windows. */ +# define __LA_DECL +#endif #ifdef __cplusplus extern "C" { #endif - /* * Description of an archive entry. * - * Basically, a "struct stat" with a few text fields added in. + * You can think of this as "struct stat" with some text fields added in. * * TODO: Add "comment", "charset", and possibly other entries that are * supported by "pax interchange" format. However, GNU, ustar, cpio, @@ -90,50 +129,51 @@ struct archive_entry; * Basic object manipulation */ -struct archive_entry *archive_entry_clear(struct archive_entry *); +__LA_DECL struct archive_entry *archive_entry_clear(struct archive_entry *); /* The 'clone' function does a deep copy; all of the strings are copied too. */ -struct archive_entry *archive_entry_clone(struct archive_entry *); -void archive_entry_free(struct archive_entry *); -struct archive_entry *archive_entry_new(void); +__LA_DECL struct archive_entry *archive_entry_clone(struct archive_entry *); +__LA_DECL void archive_entry_free(struct archive_entry *); +__LA_DECL struct archive_entry *archive_entry_new(void); /* * Retrieve fields from an archive_entry. */ -time_t archive_entry_atime(struct archive_entry *); -long archive_entry_atime_nsec(struct archive_entry *); -time_t archive_entry_ctime(struct archive_entry *); -long archive_entry_ctime_nsec(struct archive_entry *); -dev_t archive_entry_dev(struct archive_entry *); -dev_t archive_entry_devmajor(struct archive_entry *); -dev_t archive_entry_devminor(struct archive_entry *); -mode_t archive_entry_filetype(struct archive_entry *); -void archive_entry_fflags(struct archive_entry *, +__LA_DECL time_t archive_entry_atime(struct archive_entry *); +__LA_DECL long archive_entry_atime_nsec(struct archive_entry *); +__LA_DECL time_t archive_entry_ctime(struct archive_entry *); +__LA_DECL long archive_entry_ctime_nsec(struct archive_entry *); +__LA_DECL dev_t archive_entry_dev(struct archive_entry *); +__LA_DECL dev_t archive_entry_devmajor(struct archive_entry *); +__LA_DECL dev_t archive_entry_devminor(struct archive_entry *); +__LA_DECL __LA_MODE_T archive_entry_filetype(struct archive_entry *); +__LA_DECL void archive_entry_fflags(struct archive_entry *, unsigned long * /* set */, unsigned long * /* clear */); -const char *archive_entry_fflags_text(struct archive_entry *); -gid_t archive_entry_gid(struct archive_entry *); -const char *archive_entry_gname(struct archive_entry *); -const wchar_t *archive_entry_gname_w(struct archive_entry *); -const char *archive_entry_hardlink(struct archive_entry *); -const wchar_t *archive_entry_hardlink_w(struct archive_entry *); -ino_t archive_entry_ino(struct archive_entry *); -mode_t archive_entry_mode(struct archive_entry *); -time_t archive_entry_mtime(struct archive_entry *); -long archive_entry_mtime_nsec(struct archive_entry *); -unsigned int archive_entry_nlink(struct archive_entry *); -const char *archive_entry_pathname(struct archive_entry *); -const wchar_t *archive_entry_pathname_w(struct archive_entry *); -dev_t archive_entry_rdev(struct archive_entry *); -dev_t archive_entry_rdevmajor(struct archive_entry *); -dev_t archive_entry_rdevminor(struct archive_entry *); -int64_t archive_entry_size(struct archive_entry *); -const char *archive_entry_strmode(struct archive_entry *); -const char *archive_entry_symlink(struct archive_entry *); -const wchar_t *archive_entry_symlink_w(struct archive_entry *); -uid_t archive_entry_uid(struct archive_entry *); -const char *archive_entry_uname(struct archive_entry *); -const wchar_t *archive_entry_uname_w(struct archive_entry *); +__LA_DECL const char *archive_entry_fflags_text(struct archive_entry *); +__LA_DECL __LA_GID_T archive_entry_gid(struct archive_entry *); +__LA_DECL const char *archive_entry_gname(struct archive_entry *); +__LA_DECL const wchar_t *archive_entry_gname_w(struct archive_entry *); +__LA_DECL const char *archive_entry_hardlink(struct archive_entry *); +__LA_DECL const wchar_t *archive_entry_hardlink_w(struct archive_entry *); +__LA_DECL __LA_INO_T archive_entry_ino(struct archive_entry *); +__LA_DECL __LA_MODE_T archive_entry_mode(struct archive_entry *); +__LA_DECL time_t archive_entry_mtime(struct archive_entry *); +__LA_DECL long archive_entry_mtime_nsec(struct archive_entry *); +__LA_DECL unsigned int archive_entry_nlink(struct archive_entry *); +__LA_DECL const char *archive_entry_pathname(struct archive_entry *); +__LA_DECL const wchar_t *archive_entry_pathname_w(struct archive_entry *); +__LA_DECL dev_t archive_entry_rdev(struct archive_entry *); +__LA_DECL dev_t archive_entry_rdevmajor(struct archive_entry *); +__LA_DECL dev_t archive_entry_rdevminor(struct archive_entry *); +__LA_DECL const char *archive_entry_sourcepath(struct archive_entry *); +__LA_DECL int64_t archive_entry_size(struct archive_entry *); +__LA_DECL const char *archive_entry_strmode(struct archive_entry *); +__LA_DECL const char *archive_entry_symlink(struct archive_entry *); +__LA_DECL const wchar_t *archive_entry_symlink_w(struct archive_entry *); +__LA_DECL __LA_UID_T archive_entry_uid(struct archive_entry *); +__LA_DECL const char *archive_entry_uname(struct archive_entry *); +__LA_DECL const wchar_t *archive_entry_uname_w(struct archive_entry *); /* * Set fields in an archive_entry. @@ -142,48 +182,54 @@ const wchar_t *archive_entry_uname_w(struct archive_entry *); * In contrast, 'copy' functions do copy the object pointed to. */ -void archive_entry_set_atime(struct archive_entry *, time_t, long); -void archive_entry_set_ctime(struct archive_entry *, time_t, long); -void archive_entry_set_dev(struct archive_entry *, dev_t); -void archive_entry_set_devmajor(struct archive_entry *, dev_t); -void archive_entry_set_devminor(struct archive_entry *, dev_t); -void archive_entry_set_filetype(struct archive_entry *, unsigned int); -void archive_entry_set_fflags(struct archive_entry *, +__LA_DECL void archive_entry_set_atime(struct archive_entry *, time_t, long); +__LA_DECL void archive_entry_set_ctime(struct archive_entry *, time_t, long); +__LA_DECL void archive_entry_set_dev(struct archive_entry *, dev_t); +__LA_DECL void archive_entry_set_devmajor(struct archive_entry *, dev_t); +__LA_DECL void archive_entry_set_devminor(struct archive_entry *, dev_t); +__LA_DECL void archive_entry_set_filetype(struct archive_entry *, unsigned int); +__LA_DECL void archive_entry_set_fflags(struct archive_entry *, unsigned long /* set */, unsigned long /* clear */); /* Returns pointer to start of first invalid token, or NULL if none. */ /* Note that all recognized tokens are processed, regardless. */ -const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *, +__LA_DECL const char *archive_entry_copy_fflags_text(struct archive_entry *, + const char *); +__LA_DECL const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *, const wchar_t *); -void archive_entry_set_gid(struct archive_entry *, gid_t); -void archive_entry_set_gname(struct archive_entry *, const char *); -void archive_entry_copy_gname(struct archive_entry *, const char *); -void archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *); -void archive_entry_set_hardlink(struct archive_entry *, const char *); -void archive_entry_copy_hardlink(struct archive_entry *, const char *); -void archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *); -void archive_entry_set_ino(struct archive_entry *, unsigned long); -void archive_entry_set_link(struct archive_entry *, const char *); -void archive_entry_copy_link(struct archive_entry *, const char *); -void archive_entry_copy_link_w(struct archive_entry *, const wchar_t *); -void archive_entry_set_mode(struct archive_entry *, mode_t); -void archive_entry_set_mtime(struct archive_entry *, time_t, long); -void archive_entry_set_nlink(struct archive_entry *, unsigned int); -void archive_entry_set_pathname(struct archive_entry *, const char *); -void archive_entry_copy_pathname(struct archive_entry *, const char *); -void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *); -void archive_entry_set_perm(struct archive_entry *, mode_t); -void archive_entry_set_rdev(struct archive_entry *, dev_t); -void archive_entry_set_rdevmajor(struct archive_entry *, dev_t); -void archive_entry_set_rdevminor(struct archive_entry *, dev_t); -void archive_entry_set_size(struct archive_entry *, int64_t); -void archive_entry_set_symlink(struct archive_entry *, const char *); -void archive_entry_copy_symlink(struct archive_entry *, const char *); -void archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *); -void archive_entry_set_uid(struct archive_entry *, uid_t); -void archive_entry_set_uname(struct archive_entry *, const char *); -void archive_entry_copy_uname(struct archive_entry *, const char *); -void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *); - +__LA_DECL void archive_entry_set_gid(struct archive_entry *, __LA_GID_T); +__LA_DECL void archive_entry_set_gname(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_gname(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *); +__LA_DECL int archive_entry_update_gname_utf8(struct archive_entry *, const char *); +__LA_DECL void archive_entry_set_hardlink(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_hardlink(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *); +__LA_DECL void archive_entry_set_ino(struct archive_entry *, unsigned long); +__LA_DECL void archive_entry_set_link(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_link(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_link_w(struct archive_entry *, const wchar_t *); +__LA_DECL int archive_entry_update_link_utf8(struct archive_entry *, const char *); +__LA_DECL void archive_entry_set_mode(struct archive_entry *, __LA_MODE_T); +__LA_DECL void archive_entry_set_mtime(struct archive_entry *, time_t, long); +__LA_DECL void archive_entry_set_nlink(struct archive_entry *, unsigned int); +__LA_DECL void archive_entry_set_pathname(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_pathname(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *); +__LA_DECL int archive_entry_update_pathname_utf8(struct archive_entry *, const char *); +__LA_DECL void archive_entry_set_perm(struct archive_entry *, __LA_MODE_T); +__LA_DECL void archive_entry_set_rdev(struct archive_entry *, dev_t); +__LA_DECL void archive_entry_set_rdevmajor(struct archive_entry *, dev_t); +__LA_DECL void archive_entry_set_rdevminor(struct archive_entry *, dev_t); +__LA_DECL void archive_entry_set_size(struct archive_entry *, int64_t); +__LA_DECL void archive_entry_copy_sourcepath(struct archive_entry *, const char *); +__LA_DECL void archive_entry_set_symlink(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_symlink(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *); +__LA_DECL void archive_entry_set_uid(struct archive_entry *, __LA_UID_T); +__LA_DECL void archive_entry_set_uname(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_uname(struct archive_entry *, const char *); +__LA_DECL void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *); +__LA_DECL int archive_entry_update_uname_utf8(struct archive_entry *, const char *); /* * Routines to bulk copy fields to/from a platform-native "struct * stat." Libarchive used to just store a struct stat inside of each @@ -193,8 +239,8 @@ void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *); * * TODO: On Linux, provide both stat32 and stat64 versions of these functions. */ -const struct stat *archive_entry_stat(struct archive_entry *); -void archive_entry_copy_stat(struct archive_entry *, const struct stat *); +__LA_DECL const struct stat *archive_entry_stat(struct archive_entry *); +__LA_DECL void archive_entry_copy_stat(struct archive_entry *, const struct stat *); /* * ACL routines. This used to simply store and return text-format ACL @@ -242,11 +288,11 @@ void archive_entry_copy_stat(struct archive_entry *, const struct stat *); * POSIX.1e) is useful for handling archive formats that combine * default and access information in a single ACL list. */ -void archive_entry_acl_clear(struct archive_entry *); -void archive_entry_acl_add_entry(struct archive_entry *, +__LA_DECL void archive_entry_acl_clear(struct archive_entry *); +__LA_DECL void archive_entry_acl_add_entry(struct archive_entry *, int /* type */, int /* permset */, int /* tag */, int /* qual */, const char * /* name */); -void archive_entry_acl_add_entry_w(struct archive_entry *, +__LA_DECL void archive_entry_acl_add_entry_w(struct archive_entry *, int /* type */, int /* permset */, int /* tag */, int /* qual */, const wchar_t * /* name */); @@ -255,11 +301,11 @@ void archive_entry_acl_add_entry_w(struct archive_entry *, * "next" entry. The want_type parameter allows you to request only * access entries or only default entries. */ -int archive_entry_acl_reset(struct archive_entry *, int /* want_type */); -int archive_entry_acl_next(struct archive_entry *, int /* want_type */, +__LA_DECL int archive_entry_acl_reset(struct archive_entry *, int /* want_type */); +__LA_DECL int archive_entry_acl_next(struct archive_entry *, int /* want_type */, int * /* type */, int * /* permset */, int * /* tag */, int * /* qual */, const char ** /* name */); -int archive_entry_acl_next_w(struct archive_entry *, int /* want_type */, +__LA_DECL int archive_entry_acl_next_w(struct archive_entry *, int /* want_type */, int * /* type */, int * /* permset */, int * /* tag */, int * /* qual */, const wchar_t ** /* name */); @@ -276,11 +322,11 @@ int archive_entry_acl_next_w(struct archive_entry *, int /* want_type */, */ #define ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID 1024 #define ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT 2048 -const wchar_t *archive_entry_acl_text_w(struct archive_entry *, +__LA_DECL const wchar_t *archive_entry_acl_text_w(struct archive_entry *, int /* flags */); /* Return a count of entries matching 'want_type' */ -int archive_entry_acl_count(struct archive_entry *, int /* want_type */); +__LA_DECL int archive_entry_acl_count(struct archive_entry *, int /* want_type */); /* * Private ACL parser. This is private because it handles some @@ -295,15 +341,15 @@ int archive_entry_acl_count(struct archive_entry *, int /* want_type */); * TODO: Move this declaration out of the public header and into * a private header. Warnings above are silly. */ -int __archive_entry_acl_parse_w(struct archive_entry *, +__LA_DECL int __archive_entry_acl_parse_w(struct archive_entry *, const wchar_t *, int /* type */); /* * extended attributes */ -void archive_entry_xattr_clear(struct archive_entry *); -void archive_entry_xattr_add_entry(struct archive_entry *, +__LA_DECL void archive_entry_xattr_clear(struct archive_entry *); +__LA_DECL void archive_entry_xattr_add_entry(struct archive_entry *, const char * /* name */, const void * /* value */, size_t /* size */); @@ -312,37 +358,93 @@ void archive_entry_xattr_add_entry(struct archive_entry *, * "next" entry. */ -int archive_entry_xattr_count(struct archive_entry *); -int archive_entry_xattr_reset(struct archive_entry *); -int archive_entry_xattr_next(struct archive_entry *, +__LA_DECL int archive_entry_xattr_count(struct archive_entry *); +__LA_DECL int archive_entry_xattr_reset(struct archive_entry *); +__LA_DECL int archive_entry_xattr_next(struct archive_entry *, const char ** /* name */, const void ** /* value */, size_t *); /* - * Utility to detect hardlinks. + * Utility to match up hardlinks. * - * The 'struct archive_hardlink_lookup' is a cache of entry - * names and dev/ino numbers. Here's how to use it: - * 1. Create a lookup object with archive_hardlink_lookup_new() - * 2. Hand each archive_entry to archive_hardlink_lookup(). - * That function will return NULL (this is not a hardlink to - * a previous entry) or the pathname of the first entry - * that matched this. - * 3. Use archive_hardlink_lookup_free() to release the cache. + * The 'struct archive_entry_linkresolver' is a cache of archive entries + * for files with multiple links. Here's how to use it: + * 1. Create a lookup object with archive_entry_linkresolver_new() + * 2. Tell it the archive format you're using. + * 3. Hand each archive_entry to archive_entry_linkify(). + * That function will return 0, 1, or 2 entries that should + * be written. + * 4. Call archive_entry_linkify(resolver, NULL) until + * no more entries are returned. + * 5. Call archive_entry_link_resolver_free(resolver) to free resources. + * + * The entries returned have their hardlink and size fields updated + * appropriately. If an entry is passed in that does not refer to + * a file with multiple links, it is returned unchanged. The intention + * is that you should be able to simply filter all entries through + * this machine. * * To make things more efficient, be sure that each entry has a valid * nlinks value. The hardlink cache uses this to track when all links * have been found. If the nlinks value is zero, it will keep every * name in the cache indefinitely, which can use a lot of memory. + * + * Note that archive_entry_size() is reset to zero if the file + * body should not be written to the archive. Pay attention! */ -struct archive_entry_linkresolver; +__LA_DECL struct archive_entry_linkresolver; -struct archive_entry_linkresolver *archive_entry_linkresolver_new(void); -void archive_entry_linkresolver_free(struct archive_entry_linkresolver *); -const char *archive_entry_linkresolve(struct archive_entry_linkresolver *, - struct archive_entry *); +/* + * There are three different strategies for marking hardlinks. + * The descriptions below name them after the best-known + * formats that rely on each strategy: + * + * "Old cpio" is the simplest, it always returns any entry unmodified. + * As far as I know, only cpio formats use this. Old cpio archives + * store every link with the full body; the onus is on the dearchiver + * to detect and properly link the files as they are restored. + * "tar" is also pretty simple; it caches a copy the first time it sees + * any link. Subsequent appearances are modified to be hardlink + * references to the first one without any body. Used by all tar + * formats, although the newest tar formats permit the "old cpio" strategy + * as well. This strategy is very simple for the dearchiver, + * and reasonably straightforward for the archiver. + * "new cpio" is trickier. It stores the body only with the last + * occurrence. The complication is that we might not + * see every link to a particular file in a single session, so + * there's no easy way to know when we've seen the last occurrence. + * The solution here is to queue one link until we see the next. + * At the end of the session, you can enumerate any remaining + * entries by calling archive_entry_linkify(NULL) and store those + * bodies. If you have a file with three links l1, l2, and l3, + * you'll get the following behavior if you see all three links: + * linkify(l1) => NULL (the resolver stores l1 internally) + * linkify(l2) => l1 (resolver stores l2, you write l1) + * linkify(l3) => l2, l3 (all links seen, you can write both). + * If you only see l1 and l2, you'll get this behavior: + * linkify(l1) => NULL + * linkify(l2) => l1 + * linkify(NULL) => l2 (at end, you retrieve remaining links) + * As the name suggests, this strategy is used by newer cpio variants. + * It's noticably more complex for the archiver, slightly more complex + * for the dearchiver than the tar strategy, but makes it straightforward + * to restore a file using any link by simply continuing to scan until + * you see a link that is stored with a body. In contrast, the tar + * strategy requires you to rescan the archive from the beginning to + * correctly extract an arbitrary link. + */ + +__LA_DECL struct archive_entry_linkresolver *archive_entry_linkresolver_new(void); +__LA_DECL void archive_entry_linkresolver_set_strategy( + struct archive_entry_linkresolver *, int /* format_code */); +__LA_DECL void archive_entry_linkresolver_free(struct archive_entry_linkresolver *); +__LA_DECL void archive_entry_linkify(struct archive_entry_linkresolver *, + struct archive_entry **, struct archive_entry **); #ifdef __cplusplus } #endif +/* This is meaningless outside of this header. */ +#undef __LA_DECL + #endif /* !ARCHIVE_ENTRY_H_INCLUDED */ diff --git a/libarchive/archive_entry_link_resolver.c b/libarchive/archive_entry_link_resolver.c index 78a3c65d0..0df9ff92e 100644 --- a/libarchive/archive_entry_link_resolver.c +++ b/libarchive/archive_entry_link_resolver.c @@ -40,135 +40,216 @@ __FBSDID("$FreeBSD: src/lib/libarchive/archive_entry_link_resolver.c,v 1.1 2007/ #include #endif +#include "archive.h" #include "archive_entry.h" +/* + * This is mostly a pretty straightforward hash table implementation. + * The only interesting bit is the different strategies used to + * match up links. These strategies match those used by various + * archiving formats: + * tar - content stored with first link, remainder refer back to it. + * This requires us to match each subsequent link up with the + * first appearance. + * cpio - Old cpio just stored body with each link, match-ups were + * implicit. This is trivial. + * new cpio - New cpio only stores body with last link, match-ups + * are implicit. This is actually quite tricky; see the notes + * below. + */ + +/* Users pass us a format code, we translate that into a strategy here. */ +#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0 +#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 1 +#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 2 + /* Initial size of link cache. */ #define links_cache_initial_size 1024 +struct links_entry { + struct links_entry *next; + struct links_entry *previous; + int links; /* # links not yet seen */ + int hash; + struct archive_entry *canonical; + struct archive_entry *entry; +}; + struct archive_entry_linkresolver { - char *last_name; + struct links_entry **buckets; + struct links_entry *spare; unsigned long number_entries; size_t number_buckets; - struct links_entry **buckets; + int strategy; }; -struct links_entry { - struct links_entry *next; - struct links_entry *previous; - int links; - dev_t dev; - ino_t ino; - char *name; -}; +static struct links_entry *find_entry(struct archive_entry_linkresolver *, + struct archive_entry *); +static void grow_hash(struct archive_entry_linkresolver *); +static struct links_entry *insert_entry(struct archive_entry_linkresolver *, + struct archive_entry *); +static struct links_entry *next_entry(struct archive_entry_linkresolver *); struct archive_entry_linkresolver * archive_entry_linkresolver_new(void) { - struct archive_entry_linkresolver *links_cache; + struct archive_entry_linkresolver *res; size_t i; - links_cache = malloc(sizeof(struct archive_entry_linkresolver)); - if (links_cache == NULL) + res = malloc(sizeof(struct archive_entry_linkresolver)); + if (res == NULL) return (NULL); - memset(links_cache, 0, sizeof(struct archive_entry_linkresolver)); - links_cache->number_buckets = links_cache_initial_size; - links_cache->buckets = malloc(links_cache->number_buckets * - sizeof(links_cache->buckets[0])); - if (links_cache->buckets == NULL) { - free(links_cache); + memset(res, 0, sizeof(struct archive_entry_linkresolver)); + res->number_buckets = links_cache_initial_size; + res->buckets = malloc(res->number_buckets * + sizeof(res->buckets[0])); + if (res->buckets == NULL) { + free(res); return (NULL); } - for (i = 0; i < links_cache->number_buckets; i++) - links_cache->buckets[i] = NULL; - return (links_cache); + for (i = 0; i < res->number_buckets; i++) + res->buckets[i] = NULL; + return (res); } void -archive_entry_linkresolver_free(struct archive_entry_linkresolver *links_cache) +archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res, + int fmt) { - size_t i; + int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK; + + switch (fmtbase) { + case ARCHIVE_FORMAT_CPIO: + switch (fmt) { + case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC: + case ARCHIVE_FORMAT_CPIO_SVR4_CRC: + res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO; + break; + default: + res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO; + break; + } + break; + case ARCHIVE_FORMAT_TAR: + res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; + break; + default: + res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR; + break; + } +} + +void +archive_entry_linkresolver_free(struct archive_entry_linkresolver *res) +{ + struct links_entry *le; + + if (res->buckets != NULL) { + while ((le = next_entry(res)) != NULL) + archive_entry_free(le->entry); + free(res->buckets); + res->buckets = NULL; + } + free(res); +} + +void +archive_entry_linkify(struct archive_entry_linkresolver *res, + struct archive_entry **e, struct archive_entry **f) +{ + struct links_entry *le; + struct archive_entry *t; + + *f = NULL; /* Default: Don't return a second entry. */ + + if (*e == NULL) { + le = next_entry(res); + if (le != NULL) + *e = le->entry; + return; + } - if (links_cache->buckets == NULL) + /* If it has only one link, then we're done. */ + if (archive_entry_nlink(*e) == 1) return; - for (i = 0; i < links_cache->number_buckets; i++) { - while (links_cache->buckets[i] != NULL) { - struct links_entry *lp = links_cache->buckets[i]->next; - if (links_cache->buckets[i]->name != NULL) - free(links_cache->buckets[i]->name); - free(links_cache->buckets[i]); - links_cache->buckets[i] = lp; + switch (res->strategy) { + case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR: + le = find_entry(res, *e); + if (le != NULL) { + archive_entry_set_size(*e, 0); + archive_entry_set_hardlink(*e, + archive_entry_pathname(le->canonical)); + } else + insert_entry(res, *e); + return; + case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO: + /* This one is trivial. */ + return; + case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO: + le = find_entry(res, *e); + if (le != NULL) { + /* + * Put the new entry in le, return the + * old entry from le. + */ + t = *e; + *e = le->entry; + le->entry = t; + /* Make the old entry into a hardlink. */ + archive_entry_set_size(*e, 0); + archive_entry_set_hardlink(*e, + archive_entry_pathname(le->canonical)); + /* If we ran out of links, return the + * final entry as well. */ + if (le->links == 0) + *f = le->entry; + } else { + /* + * If we haven't seen it, tuck it away + * for future use. + */ + le = insert_entry(res, *e); + le->entry = *e; + *e = NULL; } + return; + default: + break; } - free(links_cache->buckets); - links_cache->buckets = NULL; + return; } -const char * -archive_entry_linkresolve(struct archive_entry_linkresolver *links_cache, +static struct links_entry * +find_entry(struct archive_entry_linkresolver *res, struct archive_entry *entry) { - struct links_entry *le, **new_buckets; - int hash; - size_t i, new_size; + struct links_entry *le; + int hash, bucket; dev_t dev; ino_t ino; - int nlinks; - - /* Free a held name. */ - free(links_cache->last_name); - links_cache->last_name = NULL; + /* Free a held entry. */ + if (res->spare != NULL) { + archive_entry_free(res->spare->canonical); + free(res->spare); + res->spare = NULL; + } /* If the links cache overflowed and got flushed, don't bother. */ - if (links_cache->buckets == NULL) + if (res->buckets == NULL) return (NULL); dev = archive_entry_dev(entry); ino = archive_entry_ino(entry); - nlinks = archive_entry_nlink(entry); - - /* An entry with one link can't be a hard link. */ - if (nlinks == 1) - return (NULL); - - /* If the links cache is getting too full, enlarge the hash table. */ - if (links_cache->number_entries > links_cache->number_buckets * 2) - { - /* Try to enlarge the bucket list. */ - new_size = links_cache->number_buckets * 2; - new_buckets = malloc(new_size * sizeof(struct links_entry *)); - - if (new_buckets != NULL) { - memset(new_buckets, 0, - new_size * sizeof(struct links_entry *)); - for (i = 0; i < links_cache->number_buckets; i++) { - while (links_cache->buckets[i] != NULL) { - /* Remove entry from old bucket. */ - le = links_cache->buckets[i]; - links_cache->buckets[i] = le->next; - - /* Add entry to new bucket. */ - hash = (le->dev ^ le->ino) % new_size; - - if (new_buckets[hash] != NULL) - new_buckets[hash]->previous = - le; - le->next = new_buckets[hash]; - le->previous = NULL; - new_buckets[hash] = le; - } - } - free(links_cache->buckets); - links_cache->buckets = new_buckets; - links_cache->number_buckets = new_size; - } - } + hash = dev ^ ino; /* Try to locate this entry in the links cache. */ - hash = ( dev ^ ino ) % links_cache->number_buckets; - for (le = links_cache->buckets[hash]; le != NULL; le = le->next) { - if (le->dev == dev && le->ino == ino) { + bucket = hash % res->number_buckets; + for (le = res->buckets[bucket]; le != NULL; le = le->next) { + if (le->hash == hash + && dev == archive_entry_dev(le->entry) + && ino == archive_entry_ino(le->entry)) { /* * Decrement link count each time and release * the entry if it hits zero. This saves @@ -177,46 +258,123 @@ archive_entry_linkresolve(struct archive_entry_linkresolver *links_cache, */ --le->links; if (le->links > 0) - return (le->name); - /* - * When we release the entry, save the name - * until the next call. - */ - links_cache->last_name = le->name; - /* - * Release the entry. - */ + return (le); + /* Remove it from this hash bucket. */ if (le->previous != NULL) le->previous->next = le->next; if (le->next != NULL) le->next->previous = le->previous; - if (links_cache->buckets[hash] == le) - links_cache->buckets[hash] = le->next; - links_cache->number_entries--; - free(le); - return (links_cache->last_name); + if (res->buckets[bucket] == le) + res->buckets[bucket] = le->next; + res->number_entries--; + /* Defer freeing this entry. */ + res->spare = le; + return (le); } } + return (NULL); +} + +static struct links_entry * +next_entry(struct archive_entry_linkresolver *res) +{ + struct links_entry *le; + size_t bucket; + + /* Free a held entry. */ + if (res->spare != NULL) { + archive_entry_free(res->spare->canonical); + free(res->spare); + res->spare = NULL; + } + + /* If the links cache overflowed and got flushed, don't bother. */ + if (res->buckets == NULL) + return (NULL); + + /* Look for next non-empty bucket in the links cache. */ + for (bucket = 0; bucket < res->number_buckets; bucket++) { + le = res->buckets[bucket]; + if (le != NULL) { + /* Remove it from this hash bucket. */ + if (le->next != NULL) + le->next->previous = le->previous; + res->buckets[bucket] = le->next; + res->number_entries--; + /* Defer freeing this entry. */ + res->spare = le; + return (le); + } + } + return (NULL); +} + +static struct links_entry * +insert_entry(struct archive_entry_linkresolver *res, + struct archive_entry *entry) +{ + struct links_entry *le; + int hash, bucket; /* Add this entry to the links cache. */ le = malloc(sizeof(struct links_entry)); if (le == NULL) return (NULL); - le->name = strdup(archive_entry_pathname(entry)); - if (le->name == NULL) { - free(le); - return (NULL); - } + le->entry = entry; + + /* If the links cache is getting too full, enlarge the hash table. */ + if (res->number_entries > res->number_buckets * 2) + grow_hash(res); + + hash = archive_entry_dev(entry) ^ archive_entry_ino(entry); + bucket = hash % res->number_buckets; /* If we could allocate the entry, record it. */ - if (links_cache->buckets[hash] != NULL) - links_cache->buckets[hash]->previous = le; - links_cache->number_entries++; - le->next = links_cache->buckets[hash]; + if (res->buckets[bucket] != NULL) + res->buckets[bucket]->previous = le; + res->number_entries++; + le->next = res->buckets[bucket]; le->previous = NULL; - links_cache->buckets[hash] = le; - le->dev = dev; - le->ino = ino; - le->links = nlinks - 1; - return (NULL); + res->buckets[bucket] = le; + le->hash = hash; + le->links = archive_entry_nlink(entry) - 1; + le->canonical = archive_entry_clone(entry); + return (le); +} + +static void +grow_hash(struct archive_entry_linkresolver *res) +{ + struct links_entry *le, **new_buckets; + size_t new_size; + size_t i, bucket; + + /* Try to enlarge the bucket list. */ + new_size = res->number_buckets * 2; + new_buckets = malloc(new_size * sizeof(struct links_entry *)); + + if (new_buckets != NULL) { + memset(new_buckets, 0, + new_size * sizeof(struct links_entry *)); + for (i = 0; i < res->number_buckets; i++) { + while (res->buckets[i] != NULL) { + /* Remove entry from old bucket. */ + le = res->buckets[i]; + res->buckets[i] = le->next; + + /* Add entry to new bucket. */ + bucket = le->hash % new_size; + + if (new_buckets[bucket] != NULL) + new_buckets[bucket]->previous = + le; + le->next = new_buckets[bucket]; + le->previous = NULL; + new_buckets[bucket] = le; + } + } + free(res->buckets); + res->buckets = new_buckets; + res->number_buckets = new_size; + } } diff --git a/libarchive/archive_entry_private.h b/libarchive/archive_entry_private.h index 0d368a4dd..f893fb982 100644 --- a/libarchive/archive_entry_private.h +++ b/libarchive/archive_entry_private.h @@ -28,17 +28,25 @@ #ifndef ARCHIVE_ENTRY_PRIVATE_H_INCLUDED #define ARCHIVE_ENTRY_PRIVATE_H_INCLUDED +#include "archive_string.h" + /* * Handle wide character (i.e., Unicode) and non-wide character * strings transparently. - * */ struct aes { - const char *aes_mbs; - char *aes_mbs_alloc; + struct archive_string aes_mbs; + struct archive_string aes_utf8; const wchar_t *aes_wcs; - wchar_t *aes_wcs_alloc; + /* Bitmap of which of the above are valid. Because we're lazy + * about malloc-ing and reusing the underlying storage, we + * can't rely on NULL pointers to indicate whether a string + * has been set. */ + int aes_set; +#define AES_SET_MBS 1 +#define AES_SET_UTF8 2 +#define AES_SET_WCS 4 }; struct ae_acl { @@ -128,8 +136,6 @@ struct archive_entry { dev_t aest_rdevminor; } ae_stat; - - /* * Use aes here so that we get transparent mbs<->wcs conversions. */ @@ -141,15 +147,23 @@ struct archive_entry { struct aes ae_pathname; /* Name of entry */ struct aes ae_symlink; /* symlink contents */ struct aes ae_uname; /* Name of owner */ + unsigned char ae_hardlinkset; + unsigned char ae_symlinkset; + + /* Not used within libarchive; useful for some clients. */ + struct aes ae_sourcepath; /* Path this entry is sourced from. */ + /* ACL support. */ struct ae_acl *acl_head; struct ae_acl *acl_p; int acl_state; /* See acl_next for details. */ wchar_t *acl_text_w; + /* extattr support. */ struct ae_xattr *xattr_head; struct ae_xattr *xattr_p; + /* Miscellaneous. */ char strmode[12]; }; diff --git a/libarchive/archive_platform.h b/libarchive/archive_platform.h index b14ccd820..41fd4e549 100644 --- a/libarchive/archive_platform.h +++ b/libarchive/archive_platform.h @@ -36,6 +36,9 @@ #ifndef ARCHIVE_PLATFORM_H_INCLUDED #define ARCHIVE_PLATFORM_H_INCLUDED +/* archive.h and archive_entry.h require this. */ +#define __LIBARCHIVE_BUILD 1 + #ifdef _WIN32 #include "config_windows.h" #include "archive_windows.h" diff --git a/libarchive/archive_read_support_format_iso9660.c b/libarchive/archive_read_support_format_iso9660.c index d333f0ccb..ee9831b90 100644 --- a/libarchive/archive_read_support_format_iso9660.c +++ b/libarchive/archive_read_support_format_iso9660.c @@ -908,6 +908,11 @@ fprintf(stderr, " *** Discarding CE data.\n"); file->ce_size = 0; } + /* Don't waste time seeking for zero-length bodies. */ + if (file->size == 0) { + file->offset = iso9660->current_position; + } + /* If CE exists, find and read it now. */ if (file->ce_offset > 0) offset = file->ce_offset; diff --git a/libarchive/archive_read_support_format_tar.c b/libarchive/archive_read_support_format_tar.c index 76fda2d63..147ec0b27 100644 --- a/libarchive/archive_read_support_format_tar.c +++ b/libarchive/archive_read_support_format_tar.c @@ -145,6 +145,8 @@ struct sparse_block { struct tar { struct archive_string acl_text; struct archive_string entry_pathname; + /* For "GNU.sparse.name" and other similar path extensions. */ + struct archive_string entry_pathname_override; struct archive_string entry_linkpath; struct archive_string entry_uname; struct archive_string entry_gname; @@ -272,6 +274,7 @@ archive_read_format_tar_cleanup(struct archive_read *a) gnu_clear_sparse_list(tar); archive_string_free(&tar->acl_text); archive_string_free(&tar->entry_pathname); + archive_string_free(&tar->entry_pathname_override); archive_string_free(&tar->entry_linkpath); archive_string_free(&tar->entry_uname); archive_string_free(&tar->entry_gname); @@ -1174,7 +1177,6 @@ pax_header(struct archive_read *a, struct tar *tar, size_t attr_length, l, line_length; char *line, *p; char *key, *value; - wchar_t *wp; int err, err2; attr_length = strlen(attr); @@ -1182,6 +1184,7 @@ pax_header(struct archive_read *a, struct tar *tar, archive_string_empty(&(tar->entry_gname)); archive_string_empty(&(tar->entry_linkpath)); archive_string_empty(&(tar->entry_pathname)); + archive_string_empty(&(tar->entry_pathname_override)); archive_string_empty(&(tar->entry_uname)); err = ARCHIVE_OK; while (attr_length > 0) { @@ -1257,13 +1260,13 @@ pax_header(struct archive_read *a, struct tar *tar, if (tar->pax_hdrcharset_binary) archive_entry_copy_gname(entry, value); else { - wp = utf8_decode(tar, value, strlen(value)); - if (wp == NULL) { - archive_entry_copy_gname(entry, value); - if (err > ARCHIVE_WARN) - err = ARCHIVE_WARN; - } else - archive_entry_copy_gname_w(entry, wp); + if (!archive_entry_update_gname_utf8(entry, value)) { + err = ARCHIVE_WARN; + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Gname in pax header can't " + "be converted to current locale."); + } } } if (archive_strlen(&(tar->entry_linkpath)) > 0) { @@ -1271,27 +1274,40 @@ pax_header(struct archive_read *a, struct tar *tar, if (tar->pax_hdrcharset_binary) archive_entry_copy_link(entry, value); else { - wp = utf8_decode(tar, value, strlen(value)); - if (wp == NULL) { - archive_entry_copy_link(entry, value); - if (err > ARCHIVE_WARN) - err = ARCHIVE_WARN; - } else - archive_entry_copy_link_w(entry, wp); + if (!archive_entry_update_link_utf8(entry, value)) { + err = ARCHIVE_WARN; + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Linkname in pax header can't " + "be converted to current locale."); + } } } - if (archive_strlen(&(tar->entry_pathname)) > 0) { + /* + * Some extensions (such as the GNU sparse file extensions) + * deliberately store a synthetic name under the regular 'path' + * attribute and the real file name under a different attribute. + * Since we're supposed to not care about the order, we + * have no choice but to store all of the various filenames + * we find and figure it all out afterwards. This is the + * figuring out part. + */ + value = NULL; + if (archive_strlen(&(tar->entry_pathname_override)) > 0) + value = tar->entry_pathname_override.s; + else if (archive_strlen(&(tar->entry_pathname)) > 0) value = tar->entry_pathname.s; + if (value != NULL) { if (tar->pax_hdrcharset_binary) archive_entry_copy_pathname(entry, value); else { - wp = utf8_decode(tar, value, strlen(value)); - if (wp == NULL) { - archive_entry_copy_pathname(entry, value); - if (err > ARCHIVE_WARN) - err = ARCHIVE_WARN; - } else - archive_entry_copy_pathname_w(entry, wp); + if (!archive_entry_update_pathname_utf8(entry, value)) { + err = ARCHIVE_WARN; + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Pathname in pax header can't be " + "converted to current locale."); + } } } if (archive_strlen(&(tar->entry_uname)) > 0) { @@ -1299,13 +1315,13 @@ pax_header(struct archive_read *a, struct tar *tar, if (tar->pax_hdrcharset_binary) archive_entry_copy_uname(entry, value); else { - wp = utf8_decode(tar, value, strlen(value)); - if (wp == NULL) { - archive_entry_copy_uname(entry, value); - if (err > ARCHIVE_WARN) - err = ARCHIVE_WARN; - } else - archive_entry_copy_uname_w(entry, wp); + if (!archive_entry_update_uname_utf8(entry, value)) { + err = ARCHIVE_WARN; + archive_set_error(&a->archive, + ARCHIVE_ERRNO_FILE_FORMAT, + "Uname in pax header can't " + "be converted to current locale."); + } } } return (err); @@ -1415,11 +1431,13 @@ pax_attribute(struct tar *tar, struct archive_entry *entry, tar->sparse_gnu_pending = 1; } if (strcmp(key, "GNU.sparse.name") == 0) { - wp = utf8_decode(tar, value, strlen(value)); - if (wp != NULL) - archive_entry_copy_pathname_w(entry, wp); - else - archive_entry_copy_pathname(entry, value); + /* + * The real filename; when storing sparse + * files, GNU tar puts a synthesized name into + * the regular 'path' attribute in an attempt + * to limit confusion. ;-) + */ + archive_strcpy(&(tar->entry_pathname_override), value); } if (strcmp(key, "GNU.sparse.realsize") == 0) { tar->realsize = tar_atol10(value, strlen(value)); @@ -1455,9 +1473,7 @@ pax_attribute(struct tar *tar, struct archive_entry *entry, archive_entry_set_rdevminor(entry, tar_atol10(value, strlen(value))); } else if (strcmp(key, "SCHILY.fflags")==0) { - wp = utf8_decode(tar, value, strlen(value)); - /* TODO: if (wp == NULL) */ - archive_entry_copy_fflags_text_w(entry, wp); + archive_entry_copy_fflags_text(entry, value); } else if (strcmp(key, "SCHILY.dev")==0) { archive_entry_set_dev(entry, tar_atol10(value, strlen(value))); diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c index 3c951313d..a105297ad 100644 --- a/libarchive/archive_read_support_format_zip.c +++ b/libarchive/archive_read_support_format_zip.c @@ -162,11 +162,9 @@ archive_read_support_format_zip(struct archive *_a) static int archive_read_format_zip_bid(struct archive_read *a) { - int bid = 0; const char *p; - - if (a->archive.archive_format == ARCHIVE_FORMAT_ZIP) - bid += 1; + const void *buff; + size_t bytes_avail; if ((p = __archive_read_ahead(a, 4)) == NULL) return (-1); @@ -184,9 +182,104 @@ archive_read_format_zip_bid(struct archive_read *a) || (p[2] == '0' && p[3] == '0')) return (30); } + + /* + * Attempt to handle self-extracting archives + * by noting a PE header and searching forward + * up to 64k for a 'PK\003\004' marker. + */ + if (p[0] == 'M' && p[1] == 'Z') { + /* + * TODO: Additional checks that this really is a PE + * file before we invoke the 128k lookahead below. + * No point in allocating a bigger lookahead buffer + * if we don't need to. + */ + /* + * TODO: Of course, the compression layer lookahead + * buffers aren't dynamically sized yet; they should be. + */ + bytes_avail = (a->decompressor->read_ahead)(a, &buff, 128*1024); + p = (const char *)buff; + + /* + * TODO: Optimize by jumping forward based on values + * in the PE header. Note that we don't need to be + * exact, but we mustn't skip too far. The search + * below will compensate if we undershoot. Skipping + * will also reduce the chance of false positives + * (which is not really all that high to begin with, + * so maybe skipping isn't really necessary). + */ + + while (p < bytes_avail + (const char *)buff) { + if (p[0] == 'P' && p[1] == 'K' /* "PK" signature */ + && p[2] == 3 && p[3] == 4 /* File entry */ + && p[8] == 8 /* compression == deflate */ + && p[9] == 0 /* High byte of compression */ + ) + { + return (30); + } + ++p; + } + } + return (0); } +/* + * Search forward for a "PK\003\004" file header. This handles the + * case of self-extracting archives, where there is an executable + * prepended to the ZIP archive. + */ +static int +skip_sfx(struct archive_read *a) +{ + const void *h; + const char *p, *q; + size_t skip, bytes; + + /* + * TODO: We should be able to skip forward by a bunch + * by lifting some values from the PE header. We don't + * need to be exact (we're still going to search forward + * to find the header), but it will speed things up and + * reduce the chance of a false positive. + */ + for (;;) { + bytes = (a->decompressor->read_ahead)(a, &h, 4096); + if (bytes < 4) + return (ARCHIVE_FATAL); + p = h; + q = p + bytes; + + /* + * Scan ahead until we find something that looks + * like the zip header. + */ + while (p + 4 < q) { + switch (p[3]) { + case '\004': + /* TODO: Additional verification here. */ + if (memcmp("PK\003\004", p, 4) == 0) { + skip = p - (const char *)h; + (a->decompressor->consume)(a, skip); + return (ARCHIVE_OK); + } + p += 4; + break; + case '\003': p += 1; break; + case 'K': p += 2; break; + case 'P': p += 3; break; + default: p += 4; break; + } + } + skip = p - (const char *)h; + (a->decompressor->consume)(a, skip); + } +} + static int archive_read_format_zip_read_header(struct archive_read *a, struct archive_entry *entry) @@ -194,6 +287,7 @@ archive_read_format_zip_read_header(struct archive_read *a, const void *h; const char *signature; struct zip *zip; + int r = ARCHIVE_OK, r1; a->archive.archive_format = ARCHIVE_FORMAT_ZIP; if (a->archive.archive_format_name == NULL) @@ -209,6 +303,16 @@ archive_read_format_zip_read_header(struct archive_read *a, return (ARCHIVE_FATAL); signature = (const char *)h; + if (signature[0] == 'M' && signature[1] == 'Z') { + /* This is an executable? Must be self-extracting... */ + r = skip_sfx(a); + if (r < ARCHIVE_WARN) + return (r); + if ((h = __archive_read_ahead(a, 4)) == NULL) + return (ARCHIVE_FATAL); + signature = (const char *)h; + } + if (signature[0] != 'P' || signature[1] != 'K') { archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, "Bad ZIP file"); @@ -239,7 +343,10 @@ archive_read_format_zip_read_header(struct archive_read *a, if (signature[2] == '\003' && signature[3] == '\004') { /* Regular file entry. */ - return (zip_read_file_header(a, entry, zip)); + r1 = zip_read_file_header(a, entry, zip); + if (r1 != ARCHIVE_OK) + return (r1); + return (r); } if (signature[2] == '\005' && signature[3] == '\006') { diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c index 7e43b360a..e308c480b 100644 --- a/libarchive/archive_string.c +++ b/libarchive/archive_string.c @@ -37,6 +37,9 @@ __FBSDID("$FreeBSD: src/lib/libarchive/archive_string.c,v 1.11 2007/07/15 19:13: #ifdef HAVE_STRING_H #include #endif +#ifdef HAVE_WCHAR_H +#include +#endif #include "archive_private.h" #include "archive_string.h" @@ -55,11 +58,15 @@ __archive_string_append(struct archive_string *as, const char *p, size_t s) void __archive_string_copy(struct archive_string *dest, struct archive_string *src) { - if (__archive_string_ensure(dest, src->length + 1) == NULL) - __archive_errx(1, "Out of memory"); - memcpy(dest->s, src->s, src->length); - dest->length = src->length; - dest->s[dest->length] = 0; + if (src->length == 0) + dest->length = 0; + else { + if (__archive_string_ensure(dest, src->length + 1) == NULL) + __archive_errx(1, "Out of memory"); + memcpy(dest->s, src->s, src->length); + dest->length = src->length; + dest->s[dest->length] = 0; + } } void @@ -67,21 +74,52 @@ __archive_string_free(struct archive_string *as) { as->length = 0; as->buffer_length = 0; - if (as->s != NULL) + if (as->s != NULL) { free(as->s); + as->s = NULL; + } } /* Returns NULL on any allocation failure. */ struct archive_string * __archive_string_ensure(struct archive_string *as, size_t s) { + /* If buffer is already big enough, don't reallocate. */ if (as->s && (s <= as->buffer_length)) return (as); + /* + * Growing the buffer at least exponentially ensures that + * append operations are always linear in the number of + * characters appended. Using a smaller growth rate for + * larger buffers reduces memory waste somewhat at the cost of + * a larger constant factor. + */ if (as->buffer_length < 32) + /* Start with a minimum 32-character buffer. */ as->buffer_length = 32; - while (as->buffer_length < s) + else if (as->buffer_length < 8192) + /* Buffers under 8k are doubled for speed. */ as->buffer_length *= 2; + else { + /* Buffers 8k and over grow by at least 25% each time. */ + size_t old_length = as->buffer_length; + as->buffer_length = (as->buffer_length * 5) / 4; + /* Be safe: If size wraps, release buffer and return NULL. */ + if (as->buffer_length < old_length) { + free(as->s); + as->s = NULL; + return (NULL); + } + } + /* + * The computation above is a lower limit to how much we'll + * grow the buffer. In any case, we have to grow it enough to + * hold the request. + */ + if (as->buffer_length < s) + as->buffer_length = s; + /* Now we can reallocate the buffer. */ as->s = (char *)realloc(as->s, as->buffer_length); if (as->s == NULL) return (NULL); @@ -124,3 +162,206 @@ __archive_strappend_int(struct archive_string *as, int d, int base) __archive_strappend_char(as, digits[d % base]); return (as); } + +/* + * Home-grown wcrtomb for UTF-8. + */ +static size_t +my_wcrtomb_utf8(char *p, wchar_t wc, mbstate_t *s) +{ + (void)s; /* UNUSED */ + + if (p == NULL) + return (0); + if (wc <= 0x7f) { + p[0] = (char)wc; + return (1); + } + if (wc <= 0x7ff) { + p[0] = 0xc0 | ((wc >> 6) & 0x1f); + p[1] = 0x80 | (wc & 0x3f); + return (2); + } + if (wc <= 0xffff) { + p[0] = 0xe0 | ((wc >> 12) & 0x0f); + p[1] = 0x80 | ((wc >> 6) & 0x3f); + p[2] = 0x80 | (wc & 0x3f); + return (3); + } + if (wc <= 0x1fffff) { + p[0] = 0xf0 | ((wc >> 18) & 0x07); + p[1] = 0x80 | ((wc >> 12) & 0x3f); + p[2] = 0x80 | ((wc >> 6) & 0x3f); + p[3] = 0x80 | (wc & 0x3f); + return (4); + } + /* Unicode has no codes larger than 0x1fffff. */ + /* + * Awkward point: UTF-8 <-> wchar_t conversions + * can actually fail. + */ + return ((size_t)-1); +} + +static int +my_wcstombs(struct archive_string *as, const wchar_t *w, + size_t (*func)(char *, wchar_t, mbstate_t *)) +{ + size_t n; + char *p; + mbstate_t shift_state; + char buff[256]; + + /* + * Convert one wide char at a time into 'buff', whenever that + * fills, append it to the string. + */ + p = buff; + wcrtomb(NULL, L'\0', &shift_state); + while (*w != L'\0') { + /* Flush the buffer when we have <=16 bytes free. */ + /* (No encoding has a single character >16 bytes.) */ + if ((size_t)(p - buff) >= (size_t)(sizeof(buff) - 16)) { + *p = '\0'; + archive_strcat(as, buff); + p = buff; + } + n = (*func)(p, *w++, &shift_state); + if (n == (size_t)-1) + return (-1); + p += n; + } + *p = '\0'; + archive_strcat(as, buff); + return (0); +} + +/* + * Translates a wide character string into UTF-8 and appends + * to the archive_string. Note: returns NULL if conversion fails. + */ +struct archive_string * +__archive_strappend_w_utf8(struct archive_string *as, const wchar_t *w) +{ + if (my_wcstombs(as, w, my_wcrtomb_utf8)) + return (NULL); + return (as); +} + +/* + * Translates a wide character string into current locale character set + * and appends to the archive_string. Note: returns NULL if conversion + * fails. + * + * TODO: use my_wcrtomb_utf8 if !HAVE_WCRTOMB (add configure logic first!) + */ +struct archive_string * +__archive_strappend_w_mbs(struct archive_string *as, const wchar_t *w) +{ + if (my_wcstombs(as, w, wcrtomb)) + return (NULL); + return (as); +} + + +/* + * Home-grown mbrtowc for UTF-8. Some systems lack UTF-8 + * (or even lack mbrtowc()) and we need UTF-8 support for pax + * format. So please don't replace this with a call to the + * standard mbrtowc() function! + */ +static size_t +my_mbrtowc_utf8(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) +{ + int ch; + + /* + * This argument is here to make the prototype identical to the + * standard mbrtowc(), so I can build generic string processors + * that just accept a pointer to a suitable mbrtowc() function. + */ + (void)ps; /* UNUSED */ + + /* Standard behavior: a NULL value for 's' just resets shift state. */ + if (s == NULL) + return (0); + /* If length argument is zero, don't look at the first character. */ + if (n <= 0) + return ((size_t)-2); + + /* + * Decode 1-4 bytes depending on the value of the first byte. + */ + ch = (unsigned char)*s; + if (ch == 0) { + return (0); /* Standard: return 0 for end-of-string. */ + } + if ((ch & 0x80) == 0) { + *pwc = ch & 0x7f; + return (1); + } + if ((ch & 0xe0) == 0xc0) { + if (n < 2) + return ((size_t)-2); + if ((s[1] & 0xc0) != 0x80) return (size_t)-1; + *pwc = ((ch & 0x1f) << 6) | (s[1] & 0x3f); + return (2); + } + if ((ch & 0xf0) == 0xe0) { + if (n < 3) + return ((size_t)-2); + if ((s[1] & 0xc0) != 0x80) return (size_t)-1; + if ((s[2] & 0xc0) != 0x80) return (size_t)-1; + *pwc = ((ch & 0x0f) << 12) + | ((s[1] & 0x3f) << 6) + | (s[2] & 0x3f); + return (3); + } + if ((ch & 0xf8) == 0xf0) { + if (n < 4) + return ((size_t)-2); + if ((s[1] & 0xc0) != 0x80) return (size_t)-1; + if ((s[2] & 0xc0) != 0x80) return (size_t)-1; + if ((s[3] & 0xc0) != 0x80) return (size_t)-1; + *pwc = ((ch & 0x07) << 18) + | ((s[1] & 0x3f) << 12) + | ((s[2] & 0x3f) << 6) + | (s[3] & 0x3f); + return (4); + } + /* Invalid first byte. */ + return ((size_t)-1); +} + +/* + * Return a wide-character string by converting this archive_string + * from UTF-8. + */ +wchar_t * +__archive_string_utf8_w(struct archive_string *as) +{ + wchar_t *ws, *dest; + const char *src; + size_t n; + int err; + + ws = (wchar_t *)malloc((as->length + 1) * sizeof(wchar_t)); + if (ws == NULL) + __archive_errx(1, "Out of memory"); + err = 0; + dest = ws; + src = as->s; + while (*src != '\0') { + n = my_mbrtowc_utf8(dest, src, 8, NULL); + if (n == 0) + break; + if (n == (size_t)-1 || n == (size_t)-2) { + free(ws); + return (NULL); + } + dest++; + src += n; + } + *dest++ = L'\0'; + return (ws); +} diff --git a/libarchive/archive_string.h b/libarchive/archive_string.h index f56c50fe4..61e70777f 100644 --- a/libarchive/archive_string.h +++ b/libarchive/archive_string.h @@ -33,6 +33,9 @@ #ifdef HAVE_STRING_H #include #endif +#ifdef HAVE_WCHAR_H +#include +#endif /* * Basic resizable/reusable string support a la Java's "StringBuffer." @@ -60,16 +63,22 @@ struct archive_string * __archive_strappend_char(struct archive_string *, char); #define archive_strappend_char __archive_strappend_char -/* Append a char to an archive_string using UTF8. */ -struct archive_string * -__archive_strappend_char_UTF8(struct archive_string *, int); -#define archive_strappend_char_UTF8 __archive_strappend_char_UTF8 - /* Append an integer in the specified base (2 <= base <= 16). */ struct archive_string * __archive_strappend_int(struct archive_string *as, int d, int base); #define archive_strappend_int __archive_strappend_int +/* Convert a wide-char string to UTF-8 and append the result. */ +struct archive_string * +__archive_strappend_w_utf8(struct archive_string *, const wchar_t *); +#define archive_strappend_w_utf8 __archive_strappend_w_utf8 + +/* Convert a wide-char string to current locale and append the result. */ +/* Returns NULL if conversion fails. */ +struct archive_string * +__archive_strappend_w_mbs(struct archive_string *, const wchar_t *); +#define archive_strappend_w_mbs __archive_strappend_w_mbs + /* Basic append operation. */ struct archive_string * __archive_string_append(struct archive_string *as, const char *p, size_t s); @@ -95,7 +104,7 @@ __archive_strncat(struct archive_string *, const char *, size_t); /* Copy a C string to an archive_string, resizing as necessary. */ #define archive_strcpy(as,p) \ - ((as)->length = 0, __archive_string_append((as), (p), strlen(p))) + ((as)->length = 0, __archive_string_append((as), (p), p == NULL ? 0 : strlen(p))) /* Copy a C string to an archive_string with limit, resizing as necessary. */ #define archive_strncpy(as,p,l) \ @@ -119,4 +128,9 @@ void __archive_string_vsprintf(struct archive_string *, const char *, void __archive_string_sprintf(struct archive_string *, const char *, ...); #define archive_string_sprintf __archive_string_sprintf +/* Allocates a fresh buffer and converts as (assumed to be UTF-8) into it. + * Returns NULL if conversion failed in any way. */ +wchar_t *__archive_string_utf8_w(struct archive_string *as); + + #endif diff --git a/libarchive/archive_util.c b/libarchive/archive_util.c index 69d69a513..55dd1fa10 100644 --- a/libarchive/archive_util.c +++ b/libarchive/archive_util.c @@ -77,32 +77,10 @@ archive_version_number(void) return (ARCHIVE_VERSION_NUMBER); } -/* - * Format a version string of the form "libarchive x.y.z", where x, y, - * z are the correct parts of the version ID from - * archive_version_number(). - * - * I used to do all of this at build time in shell scripts but that - * proved to be a portability headache. - */ - const char * archive_version_string(void) { - static char buff[128]; - struct archive_string as; - int n; - - if (buff[0] == '\0') { - n = archive_version_number(); - memset(&as, 0, sizeof(as)); - archive_string_sprintf(&as, "libarchive %d.%d.%d", - n / 1000000, (n / 1000) % 1000, n % 1000); - strncpy(buff, as.s, sizeof(buff)); - buff[sizeof(buff) - 1] = '\0'; - archive_string_free(&as); - } - return (buff); + return (ARCHIVE_VERSION_STRING); } int diff --git a/libarchive/archive_write_disk.c b/libarchive/archive_write_disk.c index 620beac48..58a7fd22b 100644 --- a/libarchive/archive_write_disk.c +++ b/libarchive/archive_write_disk.c @@ -294,7 +294,7 @@ _archive_write_header(struct archive *_a, struct archive_entry *entry) archive_clear_error(&a->archive); if (a->archive.state & ARCHIVE_STATE_DATA) { r = _archive_write_finish_entry(&a->archive); - if (r != ARCHIVE_OK) + if (r == ARCHIVE_FATAL) return (r); } @@ -485,10 +485,12 @@ _archive_write_data_block(struct archive *_a, /* Write the data. */ while (size > 0 && a->offset < a->filesize) { if ((off_t)(a->offset + size) > a->filesize) { - size = (size_t)(a->filesize - a->offset); - archive_set_error(&a->archive, errno, - "Write request too large"); + archive_set_error(&a->archive, 0, + "Write request too large (tried to write %u bytes, but only %u bytes remain)", + (unsigned int)size, + (unsigned int)(a->filesize - a->offset)); r = ARCHIVE_WARN; + size = (size_t)(a->filesize - a->offset); } bytes_written = write(a->fd, buff, size); if (bytes_written < 0) { diff --git a/libarchive/archive_write_set_format_pax.c b/libarchive/archive_write_set_format_pax.c index d6e3e6c4b..89f89bc9f 100644 --- a/libarchive/archive_write_set_format_pax.c +++ b/libarchive/archive_write_set_format_pax.c @@ -386,7 +386,7 @@ archive_write_pax_header(struct archive_write *a, const char *p; char *t; const wchar_t *wp; - const char *suffix_start; + const char *suffix; int need_extension, r, ret; struct pax *pax; const char *hdrcharset = NULL; @@ -496,34 +496,73 @@ archive_write_pax_header(struct archive_write *a, if (hdrcharset != NULL) add_pax_attr(&(pax->pax_header), "hdrcharset", hdrcharset); - /* - * Determining whether or not the name is too big is ugly - * because of the rules for dividing names between 'name' and - * 'prefix' fields. Here, I pick out the longest possible - * suffix, then test whether the remaining prefix is too long. - */ - if (strlen(path) <= 100) /* Short enough for just 'name' field */ - suffix_start = path; /* Record a zero-length prefix */ - else - /* Find the largest suffix that fits in 'name' field. */ - suffix_start = strchr(path + strlen(path) - 100 - 1, '/'); /* * If name is too long, or has non-ASCII characters, add * 'path' to pax extended attrs. (Note that an unconvertible * name must have non-ASCII characters.) */ - if (suffix_start == NULL || suffix_start - path > 155 - || path_w == NULL || has_non_ASCII(path_w)) { - if (path_w == NULL || hdrcharset != NULL) + if (path == NULL) { + /* We don't have a narrow version, so we have to store + * the wide version. */ + add_pax_attr_w(&(pax->pax_header), "path", path_w); + archive_entry_set_pathname(entry_main, "@WidePath"); + need_extension = 1; + } else if (has_non_ASCII(path_w)) { + /* We have non-ASCII characters. */ + if (path_w == NULL || hdrcharset != NULL) { /* Can't do UTF-8, so store it raw. */ add_pax_attr(&(pax->pax_header), "path", path); - else - add_pax_attr_w(&(pax->pax_header), "path", path_w); + } else { + /* Store UTF-8 */ + add_pax_attr_w(&(pax->pax_header), + "path", path_w); + } archive_entry_set_pathname(entry_main, build_ustar_entry_name(ustar_entry_name, path, strlen(path), NULL)); need_extension = 1; + } else { + /* We have an all-ASCII path; we'd like to just store + * it in the ustar header if it will fit. Yes, this + * duplicates some of the logic in + * write_set_format_ustar.c + */ + if (strlen(path) <= 100) { + /* Fits in the old 100-char tar name field. */ + } else { + /* Find largest suffix that will fit. */ + /* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */ + suffix = strchr(path + strlen(path) - 100 - 1, '/'); + /* Don't attempt an empty prefix. */ + if (suffix == path) + suffix = strchr(suffix + 1, '/'); + /* We can put it in the ustar header if it's + * all ASCII and it's either <= 100 characters + * or can be split at a '/' into a prefix <= + * 155 chars and a suffix <= 100 chars. (Note + * the strchr() above will return NULL exactly + * when the path can't be split.) + */ + if (suffix == NULL /* Suffix > 100 chars. */ + || suffix[1] == '\0' /* empty suffix */ + || suffix - path > 155) /* Prefix > 155 chars */ + { + if (path_w == NULL || hdrcharset != NULL) { + /* Can't do UTF-8, so store it raw. */ + add_pax_attr(&(pax->pax_header), + "path", path); + } else { + /* Store UTF-8 */ + add_pax_attr_w(&(pax->pax_header), + "path", path_w); + } + archive_entry_set_pathname(entry_main, + build_ustar_entry_name(ustar_entry_name, + path, strlen(path), NULL)); + need_extension = 1; + } + } } if (linkpath != NULL) { @@ -1215,6 +1254,8 @@ archive_write_pax_data(struct archive_write *a, const void *buff, size_t s) static int has_non_ASCII(const wchar_t *wp) { + if (wp == NULL) + return (1); while (*wp != L'\0' && *wp < 128) wp++; return (*wp != L'\0'); diff --git a/libarchive/archive_write_set_format_ustar.c b/libarchive/archive_write_set_format_ustar.c index c2c0011ae..e7f652d1d 100644 --- a/libarchive/archive_write_set_format_ustar.c +++ b/libarchive/archive_write_set_format_ustar.c @@ -206,7 +206,7 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry) !(archive_entry_filetype(entry) == AE_IFREG)) archive_entry_set_size(entry, 0); - if (AE_IFDIR == archive_entry_mode(entry)) { + if (AE_IFDIR == archive_entry_filetype(entry)) { const char *p; char *t; /* @@ -282,24 +282,30 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512], /* Store in two pieces, splitting at a '/'. */ p = strchr(pp + strlen(pp) - USTAR_name_size - 1, '/'); /* - * If the separator we found is the first '/', find - * the next one. (This is a pathological case that - * occurs for paths of exactly 101 bytes that start with - * '/'; it occurs because the separating '/' is not - * stored explicitly and the reconstruction assumes that - * an empty prefix means there is no '/' separator.) + * Look for the next '/' if we chose the first character + * as the separator. (ustar format doesn't permit + * an empty prefix.) */ if (p == pp) p = strchr(p + 1, '/'); - /* - * If there is no path separator, or the prefix or - * remaining name are too large, return an error. - */ + /* Fail if the name won't fit. */ if (!p) { + /* No separator. */ + archive_set_error(&a->archive, ENAMETOOLONG, + "Pathname too long"); + ret = ARCHIVE_WARN; + } else if (p[1] == '\0') { + /* + * The only feasible separator is a final '/'; + * this would result in a non-empty prefix and + * an empty name, which POSIX doesn't + * explicity forbid, but it just feels wrong. + */ archive_set_error(&a->archive, ENAMETOOLONG, "Pathname too long"); ret = ARCHIVE_WARN; } else if (p > pp + USTAR_prefix_size) { + /* Prefix is too long. */ archive_set_error(&a->archive, ENAMETOOLONG, "Pathname too long"); ret = ARCHIVE_WARN; diff --git a/libarchive/test/Makefile b/libarchive/test/Makefile index 9d04b089e..24b09544a 100644 --- a/libarchive/test/Makefile +++ b/libarchive/test/Makefile @@ -18,6 +18,7 @@ TESTS= \ test_empty_write.c \ test_entry.c \ test_entry_strmode.c \ + test_link_resolver.c \ test_pax_filename_encoding.c \ test_read_compress_program.c \ test_read_data_large.c \ @@ -38,6 +39,7 @@ TESTS= \ test_read_format_mtree.c \ test_read_format_pax_bz2.c \ test_read_format_tar.c \ + test_read_format_tar_empty_filename.c \ test_read_format_tbz.c \ test_read_format_tgz.c \ test_read_format_tz.c \ @@ -48,6 +50,7 @@ TESTS= \ test_read_truncated.c \ test_tar_filenames.c \ test_tar_large.c \ + test_ustar_filenames.c \ test_write_compress_program.c \ test_write_compress.c \ test_write_disk.c \ @@ -61,6 +64,7 @@ TESTS= \ test_write_format_cpio_empty.c \ test_write_format_shar_empty.c \ test_write_format_tar.c \ + test_write_format_tar_ustar.c \ test_write_format_tar_empty.c \ test_write_open_memory.c @@ -68,7 +72,8 @@ TESTS= \ # Build the test program using all libarchive sources + the test sources. SRCS= ${LA_SRCS} \ ${TESTS} \ - list.h \ + ${.OBJDIR}/list.h \ + ${.OBJDIR}/archive.h \ main.c \ read_open_memory.c @@ -96,17 +101,15 @@ WARNS=6 # Build libarchive_test and run it. check test: libarchive_test - ./libarchive_test -k -r ${.CURDIR} - -INCS=archive.h list.h + ./libarchive_test -v -r ${.CURDIR} # Build archive.h, but in our .OBJDIR, not libarchive's # This keeps libarchive_test and libarchive builds completely separate. -archive.h: ${LA_SRCDIR}/archive.h.in ${LA_SRCDIR}/Makefile +${.OBJDIR}/archive.h: ${LA_SRCDIR}/archive.h.in ${LA_SRCDIR}/Makefile cd ${LA_SRCDIR} && unset MAKEOBJDIRPREFIX && MAKEOBJDIR=${.OBJDIR} make archive.h # list.h is just a list of all tests, as indicated by DEFINE_TEST macro lines -list.h: ${TESTS} Makefile +${.OBJDIR}/list.h: ${TESTS} Makefile (cd ${.CURDIR}; cat ${TESTS}) | grep DEFINE_TEST > list.h CLEANFILES += *.out *.o *.core *~ list.h archive.h diff --git a/libarchive/test/main.c b/libarchive/test/main.c index 397780327..6e6ca6c8d 100644 --- a/libarchive/test/main.c +++ b/libarchive/test/main.c @@ -63,10 +63,14 @@ extern char *optarg; extern int optind; #endif -/* Default is to crash and try to force a core dump on failure. */ -static int dump_on_failure = 1; +/* Enable core dump on failure. */ +static int dump_on_failure = 0; +/* Default is to remove temp dirs for successful tests. */ +static int keep_temp_files = 0; /* Default is to print some basic information about each test. */ static int quiet_flag = 0; +/* Default is to summarize repeated failures. */ +static int verbose = 0; /* Cumulative count of component failures. */ static int failures = 0; /* Cumulative count of skipped component tests. */ @@ -242,7 +246,7 @@ test_assert(const char *file, int line, int value, const char *condition, void * return (value); } failures ++; - if (previous_failures(file, line)) + if (!verbose && previous_failures(file, line)) return (value); fprintf(stderr, "%s:%d: Assertion failed\n", file, line); fprintf(stderr, " Condition: %s\n", condition); @@ -261,7 +265,7 @@ test_assert_equal_int(const char *file, int line, return (1); } failures ++; - if (previous_failures(file, line)) + if (!verbose && previous_failures(file, line)) return (0); fprintf(stderr, "%s:%d: Assertion failed: Ints not equal\n", file, line); @@ -271,6 +275,30 @@ test_assert_equal_int(const char *file, int line, return (0); } +static void strdump(const char *p) +{ + if (p == NULL) { + fprintf(stderr, "(null)"); + return; + } + fprintf(stderr, "\""); + while (*p != '\0') { + unsigned int c = 0xff & *p++; + switch (c) { + case '\a': fprintf(stderr, "\a"); break; + case '\b': fprintf(stderr, "\b"); break; + case '\n': fprintf(stderr, "\n"); break; + case '\r': fprintf(stderr, "\r"); break; + default: + if (c >= 32 && c < 127) + fprintf(stderr, "%c", c); + else + fprintf(stderr, "\\x%02X", c); + } + } + fprintf(stderr, "\""); +} + /* assertEqualString() displays the values of the two strings. */ int test_assert_equal_string(const char *file, int line, @@ -289,16 +317,41 @@ test_assert_equal_string(const char *file, int line, return (1); } failures ++; - if (previous_failures(file, line)) + if (!verbose && previous_failures(file, line)) return (0); fprintf(stderr, "%s:%d: Assertion failed: Strings not equal\n", file, line); - fprintf(stderr, " %s = \"%s\"\n", e1, v1); - fprintf(stderr, " %s = \"%s\"\n", e2, v2); + fprintf(stderr, " %s = ", e1); + strdump(v1); + fprintf(stderr, " (length %d)\n", v1 == NULL ? 0 : strlen(v1)); + fprintf(stderr, " %s = ", e2); + strdump(v2); + fprintf(stderr, " (length %d)\n", v2 == NULL ? 0 : strlen(v2)); report_failure(extra); return (0); } +static void wcsdump(const wchar_t *w) +{ + if (w == NULL) { + fprintf(stderr, "(null)"); + return; + } + fprintf(stderr, "\""); + while (*w != L'\0') { + unsigned int c = *w++; + if (c >= 32 && c < 127) + fprintf(stderr, "%c", c); + else if (c < 256) + fprintf(stderr, "\\x%02X", c); + else if (c < 0x10000) + fprintf(stderr, "\\u%04X", c); + else + fprintf(stderr, "\\U%08X", c); + } + fprintf(stderr, "\""); +} + /* assertEqualWString() displays the values of the two strings. */ int test_assert_equal_wstring(const char *file, int line, @@ -307,17 +360,31 @@ test_assert_equal_wstring(const char *file, int line, void *extra) { ++assertions; - if (wcscmp(v1, v2) == 0) { + if (v1 == NULL) { + if (v2 == NULL) { + msg[0] = '\0'; + return (1); + } + } else if (v2 == NULL) { + if (v1 == NULL) { + msg[0] = '\0'; + return (1); + } + } else if (wcscmp(v1, v2) == 0) { msg[0] = '\0'; return (1); } failures ++; - if (previous_failures(file, line)) + if (!verbose && previous_failures(file, line)) return (0); fprintf(stderr, "%s:%d: Assertion failed: Unicode strings not equal\n", file, line); - fwprintf(stderr, L" %s = \"%ls\"\n", e1, v1); - fwprintf(stderr, L" %s = \"%ls\"\n", e2, v2); + fprintf(stderr, " %s = ", e1); + wcsdump(v1); + fprintf(stderr, "\n"); + fprintf(stderr, " %s = ", e2); + wcsdump(v2); + fprintf(stderr, "\n"); report_failure(extra); return (0); } @@ -378,7 +445,7 @@ test_assert_equal_mem(const char *file, int line, return (1); } failures ++; - if (previous_failures(file, line)) + if (!verbose && previous_failures(file, line)) return (0); fprintf(stderr, "%s:%d: Assertion failed: memory not equal\n", file, line); @@ -410,12 +477,13 @@ test_assert_empty_file(const char *f1fmt, ...) if (stat(f1, &st) != 0) { fprintf(stderr, "%s:%d: Could not stat: %s\n", test_filename, test_line, f1); report_failure(NULL); + return (0); } if (st.st_size == 0) return (1); failures ++; - if (previous_failures(test_filename, test_line)) + if (!verbose && previous_failures(test_filename, test_line)) return (0); fprintf(stderr, "%s:%d: File not empty: %s\n", test_filename, test_line, f1); @@ -462,7 +530,7 @@ test_assert_equal_file(const char *f1, const char *f2pattern, ...) break; } failures ++; - if (previous_failures(test_filename, test_line)) + if (!verbose && previous_failures(test_filename, test_line)) return (0); fprintf(stderr, "%s:%d: Files are not identical\n", test_filename, test_line); @@ -633,6 +701,12 @@ static int test_run(int i, const char *tmpdir) (*tests[i].func)(); /* Summarize the results of this test. */ summarize(); + /* If there were no failures, we can remove the work dir. */ + if (failures == failures_before) { + if (!keep_temp_files && chdir(tmpdir) == 0) { + systemf("rm -rf %s", tests[i].name); + } + } /* Return appropriate status. */ return (failures == failures_before ? 0 : 1); } @@ -646,8 +720,9 @@ static void usage(const char *program) printf("Default is to run all tests.\n"); printf("Otherwise, specify the numbers of the tests you wish to run.\n"); printf("Options:\n"); - printf(" -k Keep running after failures.\n"); - printf(" Default: Core dump after any failure.\n"); + printf(" -d Dump core after any failure, for debugging.\n"); + printf(" -k Keep all temp files.\n"); + printf(" Default: temp files for successful tests deleted.\n"); #ifdef PROGRAM printf(" -p Path to executable to be tested.\n"); printf(" Default: path taken from " ENVBASE " environment variable.\n"); @@ -655,6 +730,7 @@ static void usage(const char *program) printf(" -q Quiet.\n"); printf(" -r Path to dir containing reference files.\n"); printf(" Default: Current directory.\n"); + printf(" -v Verbose.\n"); printf("Available tests:\n"); for (i = 0; i < limit; i++) printf(" %d: %s\n", i, tests[i].name); @@ -747,9 +823,9 @@ int main(int argc, char **argv) testprog = getenv(ENVBASE); #endif - /* Allow -k to be controlled through the environment. */ - if (getenv(ENVBASE "_KEEP_GOING") != NULL) - dump_on_failure = 0; + /* Allow -d to be controlled through the environment. */ + if (getenv(ENVBASE "_DEBUG") != NULL) + dump_on_failure = 1; /* Get the directory holding test files from environment. */ refdir = getenv(ENVBASE "_TEST_FILES"); @@ -757,10 +833,13 @@ int main(int argc, char **argv) /* * Parse options. */ - while ((opt = getopt(argc, argv, "kp:qr:")) != -1) { + while ((opt = getopt(argc, argv, "dkp:qr:v")) != -1) { switch (opt) { + case 'd': + dump_on_failure = 1; + break; case 'k': - dump_on_failure = 0; + keep_temp_files = 1; break; case 'p': #ifdef PROGRAM @@ -775,6 +854,9 @@ int main(int argc, char **argv) case 'r': refdir = optarg; break; + case 'v': + verbose = 1; + break; case '?': default: usage(progname); @@ -823,6 +905,7 @@ int main(int argc, char **argv) --p; *p = '\0'; } + systemf("rm %s/refdir", tmpdir); } /* @@ -878,5 +961,9 @@ int main(int argc, char **argv) free(refdir_alloc); + /* If the final tmpdir is empty, we can remove it. */ + /* This should be the usual case when all tests succeed. */ + rmdir(tmpdir); + return (tests_failed); } diff --git a/libarchive/test/test_acl_pax.c b/libarchive/test/test_acl_pax.c index abf746949..6ae3dd271 100644 --- a/libarchive/test/test_acl_pax.c +++ b/libarchive/test/test_acl_pax.c @@ -332,14 +332,10 @@ acl_match(struct acl_t *acl, int type, int permset, int tag, int qual, const cha return (1); if (qual != acl->qual) return (0); - if (name == NULL) { - if (acl->name == NULL || acl->name[0] == '\0') - return (1); - } - if (acl->name == NULL) { - if (name[0] == '\0') - return (1); - } + if (name == NULL) + return (acl->name == NULL || acl->name[0] == '\0'); + if (acl->name == NULL) + return (name == NULL || name[0] == '\0'); return (0 == strcmp(name, acl->name)); } diff --git a/libarchive/test/test_archive_api_feature.c b/libarchive/test/test_archive_api_feature.c index cfc0b8413..21d189d58 100644 --- a/libarchive/test/test_archive_api_feature.c +++ b/libarchive/test/test_archive_api_feature.c @@ -28,6 +28,7 @@ __FBSDID("$FreeBSD: src/lib/libarchive/test/test_archive_api_feature.c,v 1.4 200 DEFINE_TEST(test_archive_api_feature) { char buff[128]; + const char *p; /* This is the (hopefully) final versioning API. */ assertEqualInt(ARCHIVE_VERSION_NUMBER, archive_version_number()); @@ -35,7 +36,17 @@ DEFINE_TEST(test_archive_api_feature) archive_version_number() / 1000000, (archive_version_number() / 1000) % 1000, archive_version_number() % 1000); - assertEqualString(buff, archive_version_string()); + failure("Version string is: %s, computed is: %s", + archive_version_string(), buff); + assert(memcmp(buff, archive_version_string(), strlen(buff)) == 0); + if (strlen(buff) < strlen(archive_version_string())) { + p = archive_version_string() + strlen(buff); + failure("Version string is: %s", archive_version_string()); + assert(*p == 'a' || *p == 'b' || *p == 'c' || *p == 'd'); + ++p; + failure("Version string is: %s", archive_version_string()); + assert(*p == '\0'); + } /* This is all scheduled to disappear in libarchive 3.0 */ #if ARCHIVE_VERSION_NUMBER < 3000000 diff --git a/libarchive/test/test_entry.c b/libarchive/test/test_entry.c index 29edae7ff..8386d9892 100644 --- a/libarchive/test/test_entry.c +++ b/libarchive/test/test_entry.c @@ -52,6 +52,8 @@ DEFINE_TEST(test_entry) const void *xval; /* For xattr tests. */ size_t xsize; /* For xattr tests. */ int c; + wchar_t wc; + long l; assert((e = archive_entry_new()) != NULL); @@ -146,7 +148,7 @@ DEFINE_TEST(test_entry) archive_entry_copy_link_w(e, L"link3"); assertEqualString(archive_entry_hardlink(e), NULL); assertEqualString(archive_entry_symlink(e), "link3"); - /* Arbitrarily override hardlink if both hardlink and symlink set. */ + /* Arbitrarily override symlink if both hardlink and symlink set. */ archive_entry_set_hardlink(e, "hardlink"); archive_entry_set_symlink(e, "symlink"); archive_entry_set_link(e, "link"); @@ -726,8 +728,10 @@ DEFINE_TEST(test_entry) /* * Exercise the character-conversion logic, if we can. */ - failure("Can't exercise charset-conversion logic."); - if (assert(NULL != setlocale(LC_ALL, "de_DE.UTF-8"))) { + if (NULL == setlocale(LC_ALL, "de_DE.UTF-8")) { + skipping("Can't exercise charset-conversion logic without" + " a suitable locale."); + } else { /* A filename that cannot be converted to wide characters. */ archive_entry_copy_pathname(e, "abc\314\214mno\374xyz"); failure("Converting invalid chars to Unicode should fail."); @@ -756,6 +760,26 @@ DEFINE_TEST(test_entry) assert(NULL == archive_entry_symlink_w(e)); } + l = 0x12345678L; + wc = (wchar_t)l; /* Wide character too big for UTF-8. */ + if (NULL == setlocale(LC_ALL, "C") || (long)wc != l) { + skipping("Testing charset conversion failure requires 32-bit wchar_t and support for \"C\" locale."); + } else { + /* + * Build the string L"xxx\U12345678yyy\u5678zzz" without + * using C99 \u#### syntax, which isn't uniformly + * supported. (GCC 3.4.6, for instance, defaults to + * "c89 plus GNU extensions.") + */ + wcscpy(wbuff, L"xxxAyyyBzzz"); + wbuff[3] = 0x12345678; + wbuff[7] = 0x5678; + /* A wide filename that cannot be converted to narrow. */ + archive_entry_copy_pathname_w(e, wbuff); + failure("Converting wide characters from Unicode should fail."); + assertEqualString(NULL, archive_entry_pathname(e)); + } + /* Release the experimental entry. */ archive_entry_free(e); } diff --git a/libarchive/test/test_pax_filename_encoding.c b/libarchive/test/test_pax_filename_encoding.c index b11be58a0..34c4fc823 100644 --- a/libarchive/test/test_pax_filename_encoding.c +++ b/libarchive/test/test_pax_filename_encoding.c @@ -34,24 +34,20 @@ __FBSDID("$FreeBSD: src/lib/libarchive/test/test_pax_filename_encoding.c,v 1.1 2 * stored and restored correctly, regardless of the encodings. */ -DEFINE_TEST(test_pax_filename_encoding) +/* + * Read a manually-created archive that has filenames that are + * stored in binary instead of UTF-8 and verify that we get + * the right filename returned and that we get a warning only + * if the header isn't marked as binary. + */ +DEFINE_TEST(test_pax_filename_encoding_1) { static const char testname[] = "test_pax_filename_encoding.tar.gz"; - char buff[65536]; /* * \314\214 is a valid 2-byte UTF-8 sequence. * \374 is invalid in UTF-8. */ char filename[] = "abc\314\214mno\374xyz"; - char longname[] = "abc\314\214mno\374xyz" - "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" - "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" - "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" - "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" - "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" - "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" - ; - size_t used; struct archive *a; struct archive_entry *entry; @@ -69,8 +65,7 @@ DEFINE_TEST(test_pax_filename_encoding) * in it, but the header is not marked as hdrcharset=BINARY, so that * requires a warning. */ - failure("An invalid UTF8 pathname in a pax archive should be read\n" - " without conversion but with a warning"); + failure("Invalid UTF8 in a pax archive pathname should cause a warning"); assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); assertEqualString(filename, archive_entry_pathname(entry)); /* @@ -82,15 +77,39 @@ DEFINE_TEST(test_pax_filename_encoding) assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry)); assertEqualString(filename, archive_entry_pathname(entry)); archive_read_finish(a); +} + +/* + * Set the locale and write a pathname containing invalid characters. + * This should work; the underlying implementation should automatically + * fall back to storing the pathname in binary. + */ +DEFINE_TEST(test_pax_filename_encoding_2) +{ + char filename[] = "abc\314\214mno\374xyz"; + struct archive *a; + struct archive_entry *entry; + char buff[65536]; + char longname[] = "abc\314\214mno\374xyz" + "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" + "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" + "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" + "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" + "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" + "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz" + ; + size_t used; /* * We need a starting locale which has invalid sequences. * de_DE.UTF-8 seems to be commonly supported. */ /* If it doesn't exist, just warn and return. */ - failure("We need a suitable locale for the encoding tests."); - if (!assert(NULL != setlocale(LC_ALL, "de_DE.UTF-8"))) + if (NULL == setlocale(LC_ALL, "de_DE.UTF-8")) { + skipping("invalid encoding tests require a suitable locale;" + " de_DE.UTF-8 not available on this system"); return; + } assert((a = archive_write_new()) != NULL); assertEqualIntA(a, 0, archive_write_set_format_pax(a)); @@ -159,3 +178,120 @@ DEFINE_TEST(test_pax_filename_encoding) assertEqualInt(0, archive_read_finish(a)); } +/* + * Create an entry starting from a wide-character Unicode pathname, + * read it back into "C" locale, which doesn't support the name. + * TODO: Figure out the "right" behavior here. + */ +DEFINE_TEST(test_pax_filename_encoding_3) +{ + wchar_t badname[] = L"xxxAyyyBzzz"; + const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz"; + struct archive *a; + struct archive_entry *entry; + char buff[65536]; + size_t used; + + badname[3] = 0x1234; + badname[7] = 0x5678; + + /* If it doesn't exist, just warn and return. */ + if (NULL == setlocale(LC_ALL, "C")) { + skipping("Can't set \"C\" locale, so can't exercise " + "certain character-conversion failures"); + return; + } + + assert((a = archive_write_new()) != NULL); + assertEqualIntA(a, 0, archive_write_set_format_pax(a)); + assertEqualIntA(a, 0, archive_write_set_compression_none(a)); + assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0)); + assertEqualInt(0, + archive_write_open_memory(a, buff, sizeof(buff), &used)); + + assert((entry = archive_entry_new()) != NULL); + /* Set pathname to non-convertible wide value. */ + archive_entry_copy_pathname_w(entry, badname); + archive_entry_set_filetype(entry, AE_IFREG); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + + assert((entry = archive_entry_new()) != NULL); + archive_entry_copy_pathname_w(entry, L"abc"); + /* Set gname to non-convertible wide value. */ + archive_entry_copy_gname_w(entry, badname); + archive_entry_set_filetype(entry, AE_IFREG); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + + assert((entry = archive_entry_new()) != NULL); + archive_entry_copy_pathname_w(entry, L"abc"); + /* Set uname to non-convertible wide value. */ + archive_entry_copy_uname_w(entry, badname); + archive_entry_set_filetype(entry, AE_IFREG); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + + assert((entry = archive_entry_new()) != NULL); + archive_entry_copy_pathname_w(entry, L"abc"); + /* Set hardlink to non-convertible wide value. */ + archive_entry_copy_hardlink_w(entry, badname); + archive_entry_set_filetype(entry, AE_IFREG); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + + assert((entry = archive_entry_new()) != NULL); + archive_entry_copy_pathname_w(entry, L"abc"); + /* Set symlink to non-convertible wide value. */ + archive_entry_copy_symlink_w(entry, badname); + archive_entry_set_filetype(entry, AE_IFLNK); + assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); + archive_entry_free(entry); + + assertEqualInt(0, archive_write_close(a)); + assertEqualInt(0, archive_write_finish(a)); + + /* + * Now read the entries back. + */ + + assert((a = archive_read_new()) != NULL); + assertEqualInt(0, archive_read_support_format_tar(a)); + assertEqualInt(0, archive_read_open_memory(a, buff, used)); + + failure("A non-convertible pathname should cause a warning."); + assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); + assertEqualWString(badname, archive_entry_pathname_w(entry)); + failure("If native locale can't convert, we should get UTF-8 back."); + assertEqualString(badname_utf8, archive_entry_pathname(entry)); + + failure("A non-convertible gname should cause a warning."); + assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); + assertEqualWString(badname, archive_entry_gname_w(entry)); + failure("If native locale can't convert, we should get UTF-8 back."); + assertEqualString(badname_utf8, archive_entry_gname(entry)); + + failure("A non-convertible uname should cause a warning."); + assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); + assertEqualWString(badname, archive_entry_uname_w(entry)); + failure("If native locale can't convert, we should get UTF-8 back."); + assertEqualString(badname_utf8, archive_entry_uname(entry)); + + failure("A non-convertible hardlink should cause a warning."); + assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); + assertEqualWString(badname, archive_entry_hardlink_w(entry)); + failure("If native locale can't convert, we should get UTF-8 back."); + assertEqualString(badname_utf8, archive_entry_hardlink(entry)); + + failure("A non-convertible symlink should cause a warning."); + assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry)); + assertEqualWString(badname, archive_entry_symlink_w(entry)); + assertEqualWString(NULL, archive_entry_hardlink_w(entry)); + failure("If native locale can't convert, we should get UTF-8 back."); + assertEqualString(badname_utf8, archive_entry_symlink(entry)); + + assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry)); + + assertEqualInt(0, archive_read_close(a)); + assertEqualInt(0, archive_read_finish(a)); +} diff --git a/libarchive/test/test_tar_filenames.c b/libarchive/test/test_tar_filenames.c index 8b83b5277..9b98448e4 100644 --- a/libarchive/test/test_tar_filenames.c +++ b/libarchive/test/test_tar_filenames.c @@ -40,19 +40,22 @@ test_filename(const char *prefix, int dlen, int flen) struct archive_entry *ae; struct archive *a; size_t used; - size_t prefix_length = 0; - unsigned i = 0; + char *p; + int i; + p = filename; if (prefix) { strcpy(filename, prefix); - i = prefix_length = strlen(prefix); + p += strlen(p); } - for (; i < prefix_length + dlen; i++) - filename[i] = 'a'; - filename[i++] = '/'; - for (; i < prefix_length + dlen + flen + 1; i++) - filename[i] = 'b'; - filename[i++] = '\0'; + if (dlen > 0) { + for (i = 0; i < dlen; i++) + *p++ = 'a'; + *p++ = '/'; + } + for (i = 0; i < flen; i++) + *p++ = 'b'; + *p = '\0'; strcpy(dirname, filename); @@ -160,15 +163,22 @@ DEFINE_TEST(test_tar_filenames) int dlen, flen; /* Repeat the following for a variety of dir/file lengths. */ - for (dlen = 40; dlen < 60; dlen++) { - for (flen = 40; flen < 60; flen++) { + for (dlen = 45; dlen < 55; dlen++) { + for (flen = 45; flen < 55; flen++) { + test_filename(NULL, dlen, flen); + test_filename("/", dlen, flen); + } + } + + for (dlen = 0; dlen < 140; dlen += 10) { + for (flen = 98; flen < 102; flen++) { test_filename(NULL, dlen, flen); test_filename("/", dlen, flen); } } for (dlen = 140; dlen < 160; dlen++) { - for (flen = 90; flen < 110; flen++) { + for (flen = 95; flen < 105; flen++) { test_filename(NULL, dlen, flen); test_filename("/", dlen, flen); } diff --git a/libarchive/test/test_tar_large.c b/libarchive/test/test_tar_large.c index c675ac1ee..a05b49f6a 100644 --- a/libarchive/test/test_tar_large.c +++ b/libarchive/test/test_tar_large.c @@ -242,6 +242,11 @@ DEFINE_TEST(test_tar_large) archive_entry_copy_pathname(ae, namebuff); archive_entry_set_mode(ae, S_IFREG | 0755); filesize = tests[i]; + + if (filesize < 0) { + skipping("32-bit off_t doesn't permit testing of very large files."); + return; + } archive_entry_set_size(ae, filesize); assertA(0 == archive_write_header(a, ae)); diff --git a/libarchive/test/test_write_format_ar.c b/libarchive/test/test_write_format_ar.c index 6c7a4462a..432557ca1 100644 --- a/libarchive/test/test_write_format_ar.c +++ b/libarchive/test/test_write_format_ar.c @@ -30,7 +30,7 @@ __FBSDID("$FreeBSD: src/lib/libarchive/test/test_write_format_ar.c,v 1.6 2008/03 char buff[4096]; char buff2[64]; -static unsigned char strtab[] = "abcdefghijklmn.o/\nggghhhjjjrrrttt.o/\niiijjjdddsssppp.o/\n"; +static char strtab[] = "abcdefghijklmn.o/\nggghhhjjjrrrttt.o/\niiijjjdddsssppp.o/\n"; DEFINE_TEST(test_write_format_ar) { diff --git a/tar/Makefile b/tar/Makefile index fa325429e..44098eac8 100644 --- a/tar/Makefile +++ b/tar/Makefile @@ -1,7 +1,7 @@ # $FreeBSD: src/usr.bin/tar/Makefile,v 1.34 2008/03/18 06:18:49 kientzle Exp $ PROG= bsdtar -BSDTAR_VERSION_STRING=2.5.0b +BSDTAR_VERSION_STRING=2.5.1b SRCS= bsdtar.c getdate.y matching.c read.c tree.c util.c write.c WARNS?= 5 DPADD= ${LIBARCHIVE} ${LIBBZ2} ${LIBZ} diff --git a/tar/bsdtar.c b/tar/bsdtar.c index 81d9841bd..abd575ca2 100644 --- a/tar/bsdtar.c +++ b/tar/bsdtar.c @@ -788,7 +788,7 @@ version(void) printf("bsdtar %s - %s\n", BSDTAR_VERSION_STRING, archive_version()); - exit(1); + exit(0); } static const char *long_help_msg = -- 2.47.3