# Version is three numbers:
# Major: Bumped ONLY when API/ABI breakage happens (see SHLIB_MAJOR)
# Minor: Bumped when significant new features are added
-# Revision: Bumped on any notable change
+# Revision: Bumped frequently.
# The useful version number (one integer, easy to compare)
-LIBARCHIVE_VERSION= 2004012
+LIBARCHIVE_VERSION_NUMBER=2005001
# The pretty version string
-LIBARCHIVE_VERSION_STRING!= echo $$((${LIBARCHIVE_VERSION} / 1000000)).$$((${LIBARCHIVE_VERSION} / 1000 % 1000)).$$((${LIBARCHIVE_VERSION} % 1000))
+LIBARCHIVE_VERSION_STRING=2.5.1b
# FreeBSD SHLIB_MAJOR value is managed as part of the FreeBSD system.
# It has no real relation to the version number above.
# Note: FreeBSD has inttypes.h, so enable that include in archive.h.in
archive.h: archive.h.in Makefile
cat ${.CURDIR}/archive.h.in | sed \
- -e 's/@LIBARCHIVE_VERSION@/${LIBARCHIVE_VERSION}/g' \
- -e 's/@LIBARCHIVE_VERSION_STRING@/${LIBARCHIVE_VERSION_STRING}/g' \
- -e 's/@SHLIB_MAJOR@/${SHLIB_MAJOR}/g' \
- -e 's|@ARCHIVE_H_INCLUDE_INTTYPES_H@|#include <inttypes.h> /* For int64_t */|g' \
+ -e 's/@LIBARCHIVE_VERSION_NUMBER@/${LIBARCHIVE_VERSION_NUMBER}/g' \
+ -e 's/@LIBARCHIVE_VERSION_STRING@/${LIBARCHIVE_VERSION_STRING}/g' \
> archive.h
# archive.h needs to be cleaned
static void aes_copy(struct aes *dest, struct aes *src);
static const char * aes_get_mbs(struct aes *);
static const wchar_t * aes_get_wcs(struct aes *);
-static void aes_set_mbs(struct aes *, const char *mbs);
-static void aes_copy_mbs(struct aes *, const char *mbs);
+static int aes_set_mbs(struct aes *, const char *mbs);
+static int aes_copy_mbs(struct aes *, const char *mbs);
/* static void aes_set_wcs(struct aes *, const wchar_t *wcs); */
-static void aes_copy_wcs(struct aes *, const wchar_t *wcs);
-static void aes_copy_wcs_len(struct aes *, const wchar_t *wcs, size_t);
+static int aes_copy_wcs(struct aes *, const wchar_t *wcs);
+static int aes_copy_wcs_len(struct aes *, const wchar_t *wcs, size_t);
static char * ae_fflagstostr(unsigned long bitset, unsigned long bitclear);
static const wchar_t *ae_wcstofflags(const wchar_t *stringp,
unsigned long *setp, unsigned long *clrp);
+static const char *ae_strtofflags(const char *stringp,
+ unsigned long *setp, unsigned long *clrp);
static void append_entry_w(wchar_t **wp, const wchar_t *prefix, int tag,
const wchar_t *wname, int perm, int id);
static void append_id_w(wchar_t **wp, int id);
#define wmemcpy(a,b,i) (wchar_t *)memcpy((a), (b), (i) * sizeof(wchar_t))
#endif
-
static void
aes_clean(struct aes *aes)
{
- if (aes->aes_mbs_alloc) {
- free(aes->aes_mbs_alloc);
- aes->aes_mbs_alloc = NULL;
- }
- if (aes->aes_wcs_alloc) {
- free(aes->aes_wcs_alloc);
- aes->aes_wcs_alloc = NULL;
+ if (aes->aes_wcs) {
+ free((wchar_t *)(uintptr_t)aes->aes_wcs);
+ aes->aes_wcs = NULL;
}
- memset(aes, 0, sizeof(*aes));
+ archive_string_free(&(aes->aes_mbs));
+ archive_string_free(&(aes->aes_utf8));
+ aes->aes_set = 0;
}
static void
aes_copy(struct aes *dest, struct aes *src)
{
- *dest = *src;
- if (src->aes_mbs != NULL) {
- dest->aes_mbs_alloc = strdup(src->aes_mbs);
- dest->aes_mbs = dest->aes_mbs_alloc;
- if (dest->aes_mbs == NULL)
- __archive_errx(1, "No memory for aes_copy()");
- }
+ wchar_t *wp;
+
+ dest->aes_set = src->aes_set;
+ archive_string_copy(&(dest->aes_mbs), &(src->aes_mbs));
+ archive_string_copy(&(dest->aes_utf8), &(src->aes_utf8));
if (src->aes_wcs != NULL) {
- dest->aes_wcs_alloc = (wchar_t *)malloc((wcslen(src->aes_wcs) + 1)
+ wp = (wchar_t *)malloc((wcslen(src->aes_wcs) + 1)
* sizeof(wchar_t));
- dest->aes_wcs = dest->aes_wcs_alloc;
- if (dest->aes_wcs == NULL)
+ if (wp == NULL)
__archive_errx(1, "No memory for aes_copy()");
- wcscpy(dest->aes_wcs_alloc, src->aes_wcs);
+ wcscpy(wp, src->aes_wcs);
+ dest->aes_wcs = wp;
+ }
+}
+
+static const char *
+aes_get_utf8(struct aes *aes)
+{
+ if (aes->aes_set & AES_SET_UTF8)
+ return (aes->aes_utf8.s);
+ if ((aes->aes_set & AES_SET_WCS)
+ && archive_strappend_w_utf8(&(aes->aes_utf8), aes->aes_wcs) != NULL) {
+ aes->aes_set |= AES_SET_UTF8;
+ return (aes->aes_utf8.s);
}
+ return (NULL);
}
static const char *
aes_get_mbs(struct aes *aes)
{
- if (aes->aes_mbs == NULL && aes->aes_wcs == NULL)
- return NULL;
- if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) {
- /*
- * XXX Need to estimate the number of byte in the
- * multi-byte form. Assume that, on average, wcs
- * chars encode to no more than 3 bytes. There must
- * be a better way... XXX
- */
- size_t mbs_length = wcslen(aes->aes_wcs) * 3 + 64;
-
- aes->aes_mbs_alloc = (char *)malloc(mbs_length);
- aes->aes_mbs = aes->aes_mbs_alloc;
- if (aes->aes_mbs == NULL)
- __archive_errx(1, "No memory for aes_get_mbs()");
- wcstombs(aes->aes_mbs_alloc, aes->aes_wcs, mbs_length - 1);
- aes->aes_mbs_alloc[mbs_length - 1] = 0;
+ /* If we already have an MBS form, return that immediately. */
+ if (aes->aes_set & AES_SET_MBS)
+ return (aes->aes_mbs.s);
+ /* If there's a WCS form, try converting with the native locale. */
+ if ((aes->aes_set & AES_SET_WCS)
+ && archive_strappend_w_mbs(&(aes->aes_mbs), aes->aes_wcs) != NULL) {
+ aes->aes_set |= AES_SET_MBS;
+ return (aes->aes_mbs.s);
}
- return (aes->aes_mbs);
+ /* We'll use UTF-8 for MBS if all else fails. */
+ return (aes_get_utf8(aes));
}
static const wchar_t *
aes_get_wcs(struct aes *aes)
{
+ wchar_t *w;
int r;
- if (aes->aes_wcs == NULL && aes->aes_mbs == NULL)
- return NULL;
- if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) {
+ /* Return WCS form if we already have it. */
+ if (aes->aes_set & AES_SET_WCS)
+ return (aes->aes_wcs);
+
+ if (aes->aes_set & AES_SET_MBS) {
+ /* Try converting MBS to WCS using native locale. */
/*
* No single byte will be more than one wide character,
* so this length estimate will always be big enough.
*/
- size_t wcs_length = strlen(aes->aes_mbs);
+ size_t wcs_length = aes->aes_mbs.length;
- aes->aes_wcs_alloc
- = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t));
- aes->aes_wcs = aes->aes_wcs_alloc;
- if (aes->aes_wcs == NULL)
+ w = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t));
+ if (w == NULL)
__archive_errx(1, "No memory for aes_get_wcs()");
- r = mbstowcs(aes->aes_wcs_alloc, aes->aes_mbs, wcs_length);
- aes->aes_wcs_alloc[wcs_length] = 0;
- if (r == -1) {
- /* Conversion failed, don't lie to our clients. */
- free(aes->aes_wcs_alloc);
- aes->aes_wcs = aes->aes_wcs_alloc = NULL;
+ r = mbstowcs(w, aes->aes_mbs.s, wcs_length);
+ w[wcs_length] = 0;
+ if (r > 0) {
+ aes->aes_set |= AES_SET_WCS;
+ return (aes->aes_wcs = w);
}
+ free(w);
}
- return (aes->aes_wcs);
+
+ if (aes->aes_set & AES_SET_UTF8) {
+ /* Try converting UTF8 to WCS. */
+ aes->aes_wcs = __archive_string_utf8_w(&(aes->aes_utf8));
+ aes->aes_set |= AES_SET_WCS;
+ return (aes->aes_wcs);
+ }
+ return (NULL);
}
-static void
+static int
aes_set_mbs(struct aes *aes, const char *mbs)
{
- if (aes->aes_mbs_alloc) {
- free(aes->aes_mbs_alloc);
- aes->aes_mbs_alloc = NULL;
- }
- if (aes->aes_wcs_alloc) {
- free(aes->aes_wcs_alloc);
- aes->aes_wcs_alloc = NULL;
- }
- aes->aes_mbs = mbs;
- aes->aes_wcs = NULL;
+ return (aes_copy_mbs(aes, mbs));
}
-static void
+static int
aes_copy_mbs(struct aes *aes, const char *mbs)
{
- if (aes->aes_mbs_alloc) {
- free(aes->aes_mbs_alloc);
- aes->aes_mbs_alloc = NULL;
+ if (mbs == NULL) {
+ aes->aes_set = 0;
+ return (0);
}
- if (aes->aes_wcs_alloc) {
- free(aes->aes_wcs_alloc);
- aes->aes_wcs_alloc = NULL;
+ aes->aes_set = AES_SET_MBS; /* Only MBS form is set now. */
+ archive_strcpy(&(aes->aes_mbs), mbs);
+ archive_string_empty(&(aes->aes_utf8));
+ if (aes->aes_wcs) {
+ free((wchar_t *)(uintptr_t)aes->aes_wcs);
+ aes->aes_wcs = NULL;
}
- aes->aes_mbs_alloc = (char *)malloc((strlen(mbs) + 1) * sizeof(char));
- if (aes->aes_mbs_alloc == NULL)
- __archive_errx(1, "No memory for aes_copy_mbs()");
- strcpy(aes->aes_mbs_alloc, mbs);
- aes->aes_mbs = aes->aes_mbs_alloc;
- aes->aes_wcs = NULL;
+ return (0);
}
-#if 0
-static void
-aes_set_wcs(struct aes *aes, const wchar_t *wcs)
+/*
+ * The 'update' form tries to proactively update all forms of
+ * this string (WCS and MBS) and returns an error if any of
+ * them fail. This is used by the 'pax' handler, for instance,
+ * to detect and report character-conversion failures early while
+ * still allowing clients to get potentially useful values from
+ * the more tolerant lazy conversions. (get_mbs and get_wcs will
+ * strive to give the user something useful, so you can get hopefully
+ * usable values even if some of the character conversions are failing.)
+ */
+static int
+aes_update_utf8(struct aes *aes, const char *utf8)
{
- if (aes->aes_mbs_alloc) {
- free(aes->aes_mbs_alloc);
- aes->aes_mbs_alloc = NULL;
+ if (utf8 == NULL) {
+ aes->aes_set = 0;
+ return (1); /* Succeeded in clearing everything. */
}
- if (aes->aes_wcs_alloc) {
- free(aes->aes_wcs_alloc);
- aes->aes_wcs_alloc = NULL;
+
+ /* Save the UTF8 string. */
+ archive_strcpy(&(aes->aes_utf8), utf8);
+
+ /* Empty the mbs and wcs strings. */
+ archive_string_empty(&(aes->aes_mbs));
+ if (aes->aes_wcs) {
+ free((wchar_t *)(uintptr_t)aes->aes_wcs);
+ aes->aes_wcs = NULL;
}
- aes->aes_mbs = NULL;
- aes->aes_wcs = wcs;
+
+ aes->aes_set = AES_SET_UTF8; /* Only UTF8 is set now. */
+
+ /* TODO: We should just do a direct UTF-8 to MBS conversion
+ * here. That would be faster, use less space, and give the
+ * same information. (If a UTF-8 to MBS conversion succeeds,
+ * then UTF-8->WCS and Unicode->MBS conversions will both
+ * succeed.) */
+
+ /* Try converting UTF8 to WCS, return false on failure. */
+ aes->aes_wcs = __archive_string_utf8_w(&(aes->aes_utf8));
+ if (aes->aes_wcs == NULL)
+ return (0);
+ aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; /* Both UTF8 and WCS set. */
+
+ /* Try converting WCS to MBS, return false on failure. */
+ if (archive_strappend_w_mbs(&(aes->aes_mbs), aes->aes_wcs) == NULL)
+ return (0);
+ aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS;
+
+ /* All conversions succeeded. */
+ return (1);
}
-#endif
-static void
+static int
aes_copy_wcs(struct aes *aes, const wchar_t *wcs)
{
- aes_copy_wcs_len(aes, wcs, wcslen(wcs));
+ return aes_copy_wcs_len(aes, wcs, wcs == NULL ? 0 : wcslen(wcs));
}
-static void
+static int
aes_copy_wcs_len(struct aes *aes, const wchar_t *wcs, size_t len)
{
- if (aes->aes_mbs_alloc) {
- free(aes->aes_mbs_alloc);
- aes->aes_mbs_alloc = NULL;
+ wchar_t *w;
+
+ if (wcs == NULL) {
+ aes->aes_set = 0;
+ return (0);
}
- if (aes->aes_wcs_alloc) {
- free(aes->aes_wcs_alloc);
- aes->aes_wcs_alloc = NULL;
+ aes->aes_set = AES_SET_WCS; /* Only WCS form set. */
+ archive_string_empty(&(aes->aes_mbs));
+ archive_string_empty(&(aes->aes_utf8));
+ if (aes->aes_wcs) {
+ free((wchar_t *)(uintptr_t)aes->aes_wcs);
+ aes->aes_wcs = NULL;
}
- aes->aes_mbs = NULL;
- aes->aes_wcs_alloc = (wchar_t *)malloc((len + 1) * sizeof(wchar_t));
- if (aes->aes_wcs_alloc == NULL)
+ w = (wchar_t *)malloc((len + 1) * sizeof(wchar_t));
+ if (w == NULL)
__archive_errx(1, "No memory for aes_copy_wcs()");
- wmemcpy(aes->aes_wcs_alloc, wcs, len);
- aes->aes_wcs_alloc[len] = L'\0';
- aes->aes_wcs = aes->aes_wcs_alloc;
+ wmemcpy(w, wcs, len);
+ w[len] = L'\0';
+ aes->aes_wcs = w;
+ return (0);
}
+/****************************************************************************
+ *
+ * Public Interface
+ *
+ ****************************************************************************/
+
struct archive_entry *
archive_entry_clear(struct archive_entry *entry)
{
aes_copy(&entry2->ae_hardlink, &entry->ae_hardlink);
aes_copy(&entry2->ae_pathname, &entry->ae_pathname);
aes_copy(&entry2->ae_symlink, &entry->ae_symlink);
+ entry2->ae_hardlinkset = entry->ae_hardlinkset;
+ entry2->ae_symlinkset = entry->ae_symlinkset;
aes_copy(&entry2->ae_uname, &entry->ae_uname);
/* Copy ACL data over. */
const char *
archive_entry_hardlink(struct archive_entry *entry)
{
+ if (!entry->ae_hardlinkset)
+ return (NULL);
return (aes_get_mbs(&entry->ae_hardlink));
}
const wchar_t *
archive_entry_hardlink_w(struct archive_entry *entry)
{
+ if (!entry->ae_hardlinkset)
+ return (NULL);
return (aes_get_wcs(&entry->ae_hardlink));
}
return (entry->ae_stat.aest_size);
}
+const char *
+archive_entry_sourcepath(struct archive_entry *entry)
+{
+ return (aes_get_mbs(&entry->ae_sourcepath));
+}
+
const char *
archive_entry_symlink(struct archive_entry *entry)
{
+ if (!entry->ae_symlinkset)
+ return (NULL);
return (aes_get_mbs(&entry->ae_symlink));
}
const wchar_t *
archive_entry_symlink_w(struct archive_entry *entry)
{
+ if (!entry->ae_symlinkset)
+ return (NULL);
return (aes_get_wcs(&entry->ae_symlink));
}
entry->ae_fflags_clear = clear;
}
+const char *
+archive_entry_copy_fflags_text(struct archive_entry *entry,
+ const char *flags)
+{
+ aes_copy_mbs(&entry->ae_fflags_text, flags);
+ return (ae_strtofflags(flags,
+ &entry->ae_fflags_set, &entry->ae_fflags_clear));
+}
+
const wchar_t *
archive_entry_copy_fflags_text_w(struct archive_entry *entry,
const wchar_t *flags)
aes_copy_wcs(&entry->ae_gname, name);
}
+int
+archive_entry_update_gname_utf8(struct archive_entry *entry, const char *name)
+{
+ return (aes_update_utf8(&entry->ae_gname, name));
+}
+
void
archive_entry_set_ino(struct archive_entry *entry, unsigned long ino)
{
archive_entry_set_hardlink(struct archive_entry *entry, const char *target)
{
aes_set_mbs(&entry->ae_hardlink, target);
+ if (target != NULL)
+ entry->ae_hardlinkset = 1;
}
void
archive_entry_copy_hardlink(struct archive_entry *entry, const char *target)
{
aes_copy_mbs(&entry->ae_hardlink, target);
+ if (target != NULL)
+ entry->ae_hardlinkset = 1;
}
void
archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target)
{
aes_copy_wcs(&entry->ae_hardlink, target);
+ if (target != NULL)
+ entry->ae_hardlinkset = 1;
}
void
void
archive_entry_set_link(struct archive_entry *entry, const char *target)
{
- if (entry->ae_symlink.aes_mbs != NULL ||
- entry->ae_symlink.aes_wcs != NULL)
+ if (entry->ae_symlinkset)
aes_set_mbs(&entry->ae_symlink, target);
else
aes_set_mbs(&entry->ae_hardlink, target);
void
archive_entry_copy_link(struct archive_entry *entry, const char *target)
{
- if (entry->ae_symlink.aes_mbs != NULL ||
- entry->ae_symlink.aes_wcs != NULL)
+ if (entry->ae_symlinkset)
aes_copy_mbs(&entry->ae_symlink, target);
else
aes_copy_mbs(&entry->ae_hardlink, target);
void
archive_entry_copy_link_w(struct archive_entry *entry, const wchar_t *target)
{
- if (entry->ae_symlink.aes_mbs != NULL ||
- entry->ae_symlink.aes_wcs != NULL)
+ if (entry->ae_symlinkset)
aes_copy_wcs(&entry->ae_symlink, target);
else
aes_copy_wcs(&entry->ae_hardlink, target);
}
+int
+archive_entry_update_link_utf8(struct archive_entry *entry, const char *target)
+{
+ if (entry->ae_symlinkset)
+ return (aes_update_utf8(&entry->ae_symlink, target));
+ else
+ return (aes_update_utf8(&entry->ae_hardlink, target));
+}
+
void
archive_entry_set_mode(struct archive_entry *entry, mode_t m)
{
aes_copy_wcs(&entry->ae_pathname, name);
}
+int
+archive_entry_update_pathname_utf8(struct archive_entry *entry, const char *name)
+{
+ return (aes_update_utf8(&entry->ae_pathname, name));
+}
+
void
archive_entry_set_perm(struct archive_entry *entry, mode_t p)
{
entry->ae_stat.aest_size = s;
}
+void
+archive_entry_copy_sourcepath(struct archive_entry *entry, const char *path)
+{
+ aes_set_mbs(&entry->ae_sourcepath, path);
+}
+
void
archive_entry_set_symlink(struct archive_entry *entry, const char *linkname)
{
aes_set_mbs(&entry->ae_symlink, linkname);
+ if (linkname != NULL)
+ entry->ae_symlinkset = 1;
}
void
archive_entry_copy_symlink(struct archive_entry *entry, const char *linkname)
{
aes_copy_mbs(&entry->ae_symlink, linkname);
+ if (linkname != NULL)
+ entry->ae_symlinkset = 1;
}
void
archive_entry_copy_symlink_w(struct archive_entry *entry, const wchar_t *linkname)
{
aes_copy_wcs(&entry->ae_symlink, linkname);
+ if (linkname != NULL)
+ entry->ae_symlinkset = 1;
}
void
aes_copy_wcs(&entry->ae_uname, name);
}
+int
+archive_entry_update_uname_utf8(struct archive_entry *entry, const char *name)
+{
+ return (aes_update_utf8(&entry->ae_uname, name));
+}
+
/*
* ACL management. The following would, of course, be a lot simpler
* if: 1) the last draft of POSIX.1e were a really thorough and
* Convert file flags to a comma-separated string. If no flags
* are set, return the empty string.
*/
-char *
+static char *
ae_fflagstostr(unsigned long bitset, unsigned long bitclear)
{
char *string, *dp;
return (string);
}
+/*
+ * strtofflags --
+ * Take string of arguments and return file flags. This
+ * version works a little differently than strtofflags(3).
+ * In particular, it always tests every token, skipping any
+ * unrecognized tokens. It returns a pointer to the first
+ * unrecognized token, or NULL if every token was recognized.
+ * This version is also const-correct and does not modify the
+ * provided string.
+ */
+static const char *
+ae_strtofflags(const char *s, unsigned long *setp, unsigned long *clrp)
+{
+ const char *start, *end;
+ struct flag *flag;
+ unsigned long set, clear;
+ const char *failed;
+
+ set = clear = 0;
+ start = s;
+ failed = NULL;
+ /* Find start of first token. */
+ while (*start == '\t' || *start == ' ' || *start == ',')
+ start++;
+ while (*start != '\0') {
+ /* Locate end of token. */
+ end = start;
+ while (*end != '\0' && *end != '\t' &&
+ *end != ' ' && *end != ',')
+ end++;
+ for (flag = flags; flag->wname != NULL; flag++) {
+ if (memcmp(start, flag->wname, end - start) == 0) {
+ /* Matched "noXXXX", so reverse the sense. */
+ clear |= flag->set;
+ set |= flag->clear;
+ break;
+ } else if (memcmp(start, flag->wname + 2, end - start)
+ == 0) {
+ /* Matched "XXXX", so don't reverse. */
+ set |= flag->set;
+ clear |= flag->clear;
+ break;
+ }
+ }
+ /* Ignore unknown flag names. */
+ if (flag->wname == NULL && failed == NULL)
+ failed = start;
+
+ /* Find start of next token. */
+ start = end;
+ while (*start == '\t' || *start == ' ' || *start == ',')
+ start++;
+
+ }
+
+ if (setp)
+ *setp = set;
+ if (clrp)
+ *clrp = clear;
+
+ /* Return location of first failure. */
+ return (failed);
+}
+
/*
* wcstofflags --
* Take string of arguments and return file flags. This
* This version is also const-correct and does not modify the
* provided string.
*/
-const wchar_t *
+static const wchar_t *
ae_wcstofflags(const wchar_t *s, unsigned long *setp, unsigned long *clrp)
{
const wchar_t *start, *end;
#include <sys/types.h>
#include <stddef.h> /* for wchar_t */
#include <time.h>
+
+/* Get appropriate definitions of standard POSIX-style types. */
+/* These should match the types used in 'struct stat' */
+#ifdef _WIN32
+#define __LA_UID_T unsigned int
+#define __LA_GID_T unsigned int
+#define __LA_INO_T unsigned int
+#define __LA_DEV_T unsigned int
+#define __LA_MODE_T unsigned short
+#else
#include <unistd.h>
+#define __LA_UID_T uid_t
+#define __LA_GID_T gid_t
+#define __LA_INO_T ino_t
+#define __LA_DEV_T dev_t
+#define __LA_MODE_T mode_t
+#endif
+
+/*
+ * On Windows, define LIBARCHIVE_STATIC if you're building or using a
+ * .lib. The default here assumes you're building a DLL. Only
+ * libarchive source should ever define __LIBARCHIVE_BUILD.
+ */
+#if ((defined __WIN32__) || (defined _WIN32)) && (!defined LIBARCHIVE_STATIC)
+# ifdef __LIBARCHIVE_BUILD
+# ifdef __GNUC__
+# define __LA_DECL __attribute__((dllexport)) extern
+# else
+# define __LA_DECL __declspec(dllexport)
+# endif
+# else
+# ifdef __GNUC__
+# define __LA_DECL __attribute__((dllimport)) extern
+# else
+# define __LA_DECL __declspec(dllimport)
+# endif
+# endif
+#else
+/* Static libraries on all platforms and shared libraries on non-Windows. */
+# define __LA_DECL
+#endif
#ifdef __cplusplus
extern "C" {
#endif
-
/*
* Description of an archive entry.
*
- * Basically, a "struct stat" with a few text fields added in.
+ * You can think of this as "struct stat" with some text fields added in.
*
* TODO: Add "comment", "charset", and possibly other entries that are
* supported by "pax interchange" format. However, GNU, ustar, cpio,
* Basic object manipulation
*/
-struct archive_entry *archive_entry_clear(struct archive_entry *);
+__LA_DECL struct archive_entry *archive_entry_clear(struct archive_entry *);
/* The 'clone' function does a deep copy; all of the strings are copied too. */
-struct archive_entry *archive_entry_clone(struct archive_entry *);
-void archive_entry_free(struct archive_entry *);
-struct archive_entry *archive_entry_new(void);
+__LA_DECL struct archive_entry *archive_entry_clone(struct archive_entry *);
+__LA_DECL void archive_entry_free(struct archive_entry *);
+__LA_DECL struct archive_entry *archive_entry_new(void);
/*
* Retrieve fields from an archive_entry.
*/
-time_t archive_entry_atime(struct archive_entry *);
-long archive_entry_atime_nsec(struct archive_entry *);
-time_t archive_entry_ctime(struct archive_entry *);
-long archive_entry_ctime_nsec(struct archive_entry *);
-dev_t archive_entry_dev(struct archive_entry *);
-dev_t archive_entry_devmajor(struct archive_entry *);
-dev_t archive_entry_devminor(struct archive_entry *);
-mode_t archive_entry_filetype(struct archive_entry *);
-void archive_entry_fflags(struct archive_entry *,
+__LA_DECL time_t archive_entry_atime(struct archive_entry *);
+__LA_DECL long archive_entry_atime_nsec(struct archive_entry *);
+__LA_DECL time_t archive_entry_ctime(struct archive_entry *);
+__LA_DECL long archive_entry_ctime_nsec(struct archive_entry *);
+__LA_DECL dev_t archive_entry_dev(struct archive_entry *);
+__LA_DECL dev_t archive_entry_devmajor(struct archive_entry *);
+__LA_DECL dev_t archive_entry_devminor(struct archive_entry *);
+__LA_DECL __LA_MODE_T archive_entry_filetype(struct archive_entry *);
+__LA_DECL void archive_entry_fflags(struct archive_entry *,
unsigned long * /* set */,
unsigned long * /* clear */);
-const char *archive_entry_fflags_text(struct archive_entry *);
-gid_t archive_entry_gid(struct archive_entry *);
-const char *archive_entry_gname(struct archive_entry *);
-const wchar_t *archive_entry_gname_w(struct archive_entry *);
-const char *archive_entry_hardlink(struct archive_entry *);
-const wchar_t *archive_entry_hardlink_w(struct archive_entry *);
-ino_t archive_entry_ino(struct archive_entry *);
-mode_t archive_entry_mode(struct archive_entry *);
-time_t archive_entry_mtime(struct archive_entry *);
-long archive_entry_mtime_nsec(struct archive_entry *);
-unsigned int archive_entry_nlink(struct archive_entry *);
-const char *archive_entry_pathname(struct archive_entry *);
-const wchar_t *archive_entry_pathname_w(struct archive_entry *);
-dev_t archive_entry_rdev(struct archive_entry *);
-dev_t archive_entry_rdevmajor(struct archive_entry *);
-dev_t archive_entry_rdevminor(struct archive_entry *);
-int64_t archive_entry_size(struct archive_entry *);
-const char *archive_entry_strmode(struct archive_entry *);
-const char *archive_entry_symlink(struct archive_entry *);
-const wchar_t *archive_entry_symlink_w(struct archive_entry *);
-uid_t archive_entry_uid(struct archive_entry *);
-const char *archive_entry_uname(struct archive_entry *);
-const wchar_t *archive_entry_uname_w(struct archive_entry *);
+__LA_DECL const char *archive_entry_fflags_text(struct archive_entry *);
+__LA_DECL __LA_GID_T archive_entry_gid(struct archive_entry *);
+__LA_DECL const char *archive_entry_gname(struct archive_entry *);
+__LA_DECL const wchar_t *archive_entry_gname_w(struct archive_entry *);
+__LA_DECL const char *archive_entry_hardlink(struct archive_entry *);
+__LA_DECL const wchar_t *archive_entry_hardlink_w(struct archive_entry *);
+__LA_DECL __LA_INO_T archive_entry_ino(struct archive_entry *);
+__LA_DECL __LA_MODE_T archive_entry_mode(struct archive_entry *);
+__LA_DECL time_t archive_entry_mtime(struct archive_entry *);
+__LA_DECL long archive_entry_mtime_nsec(struct archive_entry *);
+__LA_DECL unsigned int archive_entry_nlink(struct archive_entry *);
+__LA_DECL const char *archive_entry_pathname(struct archive_entry *);
+__LA_DECL const wchar_t *archive_entry_pathname_w(struct archive_entry *);
+__LA_DECL dev_t archive_entry_rdev(struct archive_entry *);
+__LA_DECL dev_t archive_entry_rdevmajor(struct archive_entry *);
+__LA_DECL dev_t archive_entry_rdevminor(struct archive_entry *);
+__LA_DECL const char *archive_entry_sourcepath(struct archive_entry *);
+__LA_DECL int64_t archive_entry_size(struct archive_entry *);
+__LA_DECL const char *archive_entry_strmode(struct archive_entry *);
+__LA_DECL const char *archive_entry_symlink(struct archive_entry *);
+__LA_DECL const wchar_t *archive_entry_symlink_w(struct archive_entry *);
+__LA_DECL __LA_UID_T archive_entry_uid(struct archive_entry *);
+__LA_DECL const char *archive_entry_uname(struct archive_entry *);
+__LA_DECL const wchar_t *archive_entry_uname_w(struct archive_entry *);
/*
* Set fields in an archive_entry.
* In contrast, 'copy' functions do copy the object pointed to.
*/
-void archive_entry_set_atime(struct archive_entry *, time_t, long);
-void archive_entry_set_ctime(struct archive_entry *, time_t, long);
-void archive_entry_set_dev(struct archive_entry *, dev_t);
-void archive_entry_set_devmajor(struct archive_entry *, dev_t);
-void archive_entry_set_devminor(struct archive_entry *, dev_t);
-void archive_entry_set_filetype(struct archive_entry *, unsigned int);
-void archive_entry_set_fflags(struct archive_entry *,
+__LA_DECL void archive_entry_set_atime(struct archive_entry *, time_t, long);
+__LA_DECL void archive_entry_set_ctime(struct archive_entry *, time_t, long);
+__LA_DECL void archive_entry_set_dev(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_devmajor(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_devminor(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_filetype(struct archive_entry *, unsigned int);
+__LA_DECL void archive_entry_set_fflags(struct archive_entry *,
unsigned long /* set */, unsigned long /* clear */);
/* Returns pointer to start of first invalid token, or NULL if none. */
/* Note that all recognized tokens are processed, regardless. */
-const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *,
+__LA_DECL const char *archive_entry_copy_fflags_text(struct archive_entry *,
+ const char *);
+__LA_DECL const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *,
const wchar_t *);
-void archive_entry_set_gid(struct archive_entry *, gid_t);
-void archive_entry_set_gname(struct archive_entry *, const char *);
-void archive_entry_copy_gname(struct archive_entry *, const char *);
-void archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *);
-void archive_entry_set_hardlink(struct archive_entry *, const char *);
-void archive_entry_copy_hardlink(struct archive_entry *, const char *);
-void archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *);
-void archive_entry_set_ino(struct archive_entry *, unsigned long);
-void archive_entry_set_link(struct archive_entry *, const char *);
-void archive_entry_copy_link(struct archive_entry *, const char *);
-void archive_entry_copy_link_w(struct archive_entry *, const wchar_t *);
-void archive_entry_set_mode(struct archive_entry *, mode_t);
-void archive_entry_set_mtime(struct archive_entry *, time_t, long);
-void archive_entry_set_nlink(struct archive_entry *, unsigned int);
-void archive_entry_set_pathname(struct archive_entry *, const char *);
-void archive_entry_copy_pathname(struct archive_entry *, const char *);
-void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *);
-void archive_entry_set_perm(struct archive_entry *, mode_t);
-void archive_entry_set_rdev(struct archive_entry *, dev_t);
-void archive_entry_set_rdevmajor(struct archive_entry *, dev_t);
-void archive_entry_set_rdevminor(struct archive_entry *, dev_t);
-void archive_entry_set_size(struct archive_entry *, int64_t);
-void archive_entry_set_symlink(struct archive_entry *, const char *);
-void archive_entry_copy_symlink(struct archive_entry *, const char *);
-void archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *);
-void archive_entry_set_uid(struct archive_entry *, uid_t);
-void archive_entry_set_uname(struct archive_entry *, const char *);
-void archive_entry_copy_uname(struct archive_entry *, const char *);
-void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
-
+__LA_DECL void archive_entry_set_gid(struct archive_entry *, __LA_GID_T);
+__LA_DECL void archive_entry_set_gname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_gname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int archive_entry_update_gname_utf8(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_set_hardlink(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_hardlink(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *);
+__LA_DECL void archive_entry_set_ino(struct archive_entry *, unsigned long);
+__LA_DECL void archive_entry_set_link(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_link(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_link_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int archive_entry_update_link_utf8(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_set_mode(struct archive_entry *, __LA_MODE_T);
+__LA_DECL void archive_entry_set_mtime(struct archive_entry *, time_t, long);
+__LA_DECL void archive_entry_set_nlink(struct archive_entry *, unsigned int);
+__LA_DECL void archive_entry_set_pathname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_pathname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int archive_entry_update_pathname_utf8(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_set_perm(struct archive_entry *, __LA_MODE_T);
+__LA_DECL void archive_entry_set_rdev(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_rdevmajor(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_rdevminor(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_size(struct archive_entry *, int64_t);
+__LA_DECL void archive_entry_copy_sourcepath(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_set_symlink(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_symlink(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *);
+__LA_DECL void archive_entry_set_uid(struct archive_entry *, __LA_UID_T);
+__LA_DECL void archive_entry_set_uname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_uname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int archive_entry_update_uname_utf8(struct archive_entry *, const char *);
/*
* Routines to bulk copy fields to/from a platform-native "struct
* stat." Libarchive used to just store a struct stat inside of each
*
* TODO: On Linux, provide both stat32 and stat64 versions of these functions.
*/
-const struct stat *archive_entry_stat(struct archive_entry *);
-void archive_entry_copy_stat(struct archive_entry *, const struct stat *);
+__LA_DECL const struct stat *archive_entry_stat(struct archive_entry *);
+__LA_DECL void archive_entry_copy_stat(struct archive_entry *, const struct stat *);
/*
* ACL routines. This used to simply store and return text-format ACL
* POSIX.1e) is useful for handling archive formats that combine
* default and access information in a single ACL list.
*/
-void archive_entry_acl_clear(struct archive_entry *);
-void archive_entry_acl_add_entry(struct archive_entry *,
+__LA_DECL void archive_entry_acl_clear(struct archive_entry *);
+__LA_DECL void archive_entry_acl_add_entry(struct archive_entry *,
int /* type */, int /* permset */, int /* tag */,
int /* qual */, const char * /* name */);
-void archive_entry_acl_add_entry_w(struct archive_entry *,
+__LA_DECL void archive_entry_acl_add_entry_w(struct archive_entry *,
int /* type */, int /* permset */, int /* tag */,
int /* qual */, const wchar_t * /* name */);
* "next" entry. The want_type parameter allows you to request only
* access entries or only default entries.
*/
-int archive_entry_acl_reset(struct archive_entry *, int /* want_type */);
-int archive_entry_acl_next(struct archive_entry *, int /* want_type */,
+__LA_DECL int archive_entry_acl_reset(struct archive_entry *, int /* want_type */);
+__LA_DECL int archive_entry_acl_next(struct archive_entry *, int /* want_type */,
int * /* type */, int * /* permset */, int * /* tag */,
int * /* qual */, const char ** /* name */);
-int archive_entry_acl_next_w(struct archive_entry *, int /* want_type */,
+__LA_DECL int archive_entry_acl_next_w(struct archive_entry *, int /* want_type */,
int * /* type */, int * /* permset */, int * /* tag */,
int * /* qual */, const wchar_t ** /* name */);
*/
#define ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID 1024
#define ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT 2048
-const wchar_t *archive_entry_acl_text_w(struct archive_entry *,
+__LA_DECL const wchar_t *archive_entry_acl_text_w(struct archive_entry *,
int /* flags */);
/* Return a count of entries matching 'want_type' */
-int archive_entry_acl_count(struct archive_entry *, int /* want_type */);
+__LA_DECL int archive_entry_acl_count(struct archive_entry *, int /* want_type */);
/*
* Private ACL parser. This is private because it handles some
* TODO: Move this declaration out of the public header and into
* a private header. Warnings above are silly.
*/
-int __archive_entry_acl_parse_w(struct archive_entry *,
+__LA_DECL int __archive_entry_acl_parse_w(struct archive_entry *,
const wchar_t *, int /* type */);
/*
* extended attributes
*/
-void archive_entry_xattr_clear(struct archive_entry *);
-void archive_entry_xattr_add_entry(struct archive_entry *,
+__LA_DECL void archive_entry_xattr_clear(struct archive_entry *);
+__LA_DECL void archive_entry_xattr_add_entry(struct archive_entry *,
const char * /* name */, const void * /* value */,
size_t /* size */);
* "next" entry.
*/
-int archive_entry_xattr_count(struct archive_entry *);
-int archive_entry_xattr_reset(struct archive_entry *);
-int archive_entry_xattr_next(struct archive_entry *,
+__LA_DECL int archive_entry_xattr_count(struct archive_entry *);
+__LA_DECL int archive_entry_xattr_reset(struct archive_entry *);
+__LA_DECL int archive_entry_xattr_next(struct archive_entry *,
const char ** /* name */, const void ** /* value */, size_t *);
/*
- * Utility to detect hardlinks.
+ * Utility to match up hardlinks.
*
- * The 'struct archive_hardlink_lookup' is a cache of entry
- * names and dev/ino numbers. Here's how to use it:
- * 1. Create a lookup object with archive_hardlink_lookup_new()
- * 2. Hand each archive_entry to archive_hardlink_lookup().
- * That function will return NULL (this is not a hardlink to
- * a previous entry) or the pathname of the first entry
- * that matched this.
- * 3. Use archive_hardlink_lookup_free() to release the cache.
+ * The 'struct archive_entry_linkresolver' is a cache of archive entries
+ * for files with multiple links. Here's how to use it:
+ * 1. Create a lookup object with archive_entry_linkresolver_new()
+ * 2. Tell it the archive format you're using.
+ * 3. Hand each archive_entry to archive_entry_linkify().
+ * That function will return 0, 1, or 2 entries that should
+ * be written.
+ * 4. Call archive_entry_linkify(resolver, NULL) until
+ * no more entries are returned.
+ * 5. Call archive_entry_link_resolver_free(resolver) to free resources.
+ *
+ * The entries returned have their hardlink and size fields updated
+ * appropriately. If an entry is passed in that does not refer to
+ * a file with multiple links, it is returned unchanged. The intention
+ * is that you should be able to simply filter all entries through
+ * this machine.
*
* To make things more efficient, be sure that each entry has a valid
* nlinks value. The hardlink cache uses this to track when all links
* have been found. If the nlinks value is zero, it will keep every
* name in the cache indefinitely, which can use a lot of memory.
+ *
+ * Note that archive_entry_size() is reset to zero if the file
+ * body should not be written to the archive. Pay attention!
*/
-struct archive_entry_linkresolver;
+__LA_DECL struct archive_entry_linkresolver;
-struct archive_entry_linkresolver *archive_entry_linkresolver_new(void);
-void archive_entry_linkresolver_free(struct archive_entry_linkresolver *);
-const char *archive_entry_linkresolve(struct archive_entry_linkresolver *,
- struct archive_entry *);
+/*
+ * There are three different strategies for marking hardlinks.
+ * The descriptions below name them after the best-known
+ * formats that rely on each strategy:
+ *
+ * "Old cpio" is the simplest, it always returns any entry unmodified.
+ * As far as I know, only cpio formats use this. Old cpio archives
+ * store every link with the full body; the onus is on the dearchiver
+ * to detect and properly link the files as they are restored.
+ * "tar" is also pretty simple; it caches a copy the first time it sees
+ * any link. Subsequent appearances are modified to be hardlink
+ * references to the first one without any body. Used by all tar
+ * formats, although the newest tar formats permit the "old cpio" strategy
+ * as well. This strategy is very simple for the dearchiver,
+ * and reasonably straightforward for the archiver.
+ * "new cpio" is trickier. It stores the body only with the last
+ * occurrence. The complication is that we might not
+ * see every link to a particular file in a single session, so
+ * there's no easy way to know when we've seen the last occurrence.
+ * The solution here is to queue one link until we see the next.
+ * At the end of the session, you can enumerate any remaining
+ * entries by calling archive_entry_linkify(NULL) and store those
+ * bodies. If you have a file with three links l1, l2, and l3,
+ * you'll get the following behavior if you see all three links:
+ * linkify(l1) => NULL (the resolver stores l1 internally)
+ * linkify(l2) => l1 (resolver stores l2, you write l1)
+ * linkify(l3) => l2, l3 (all links seen, you can write both).
+ * If you only see l1 and l2, you'll get this behavior:
+ * linkify(l1) => NULL
+ * linkify(l2) => l1
+ * linkify(NULL) => l2 (at end, you retrieve remaining links)
+ * As the name suggests, this strategy is used by newer cpio variants.
+ * It's noticably more complex for the archiver, slightly more complex
+ * for the dearchiver than the tar strategy, but makes it straightforward
+ * to restore a file using any link by simply continuing to scan until
+ * you see a link that is stored with a body. In contrast, the tar
+ * strategy requires you to rescan the archive from the beginning to
+ * correctly extract an arbitrary link.
+ */
+
+__LA_DECL struct archive_entry_linkresolver *archive_entry_linkresolver_new(void);
+__LA_DECL void archive_entry_linkresolver_set_strategy(
+ struct archive_entry_linkresolver *, int /* format_code */);
+__LA_DECL void archive_entry_linkresolver_free(struct archive_entry_linkresolver *);
+__LA_DECL void archive_entry_linkify(struct archive_entry_linkresolver *,
+ struct archive_entry **, struct archive_entry **);
#ifdef __cplusplus
}
#endif
+/* This is meaningless outside of this header. */
+#undef __LA_DECL
+
#endif /* !ARCHIVE_ENTRY_H_INCLUDED */
#include <string.h>
#endif
+#include "archive.h"
#include "archive_entry.h"
+/*
+ * This is mostly a pretty straightforward hash table implementation.
+ * The only interesting bit is the different strategies used to
+ * match up links. These strategies match those used by various
+ * archiving formats:
+ * tar - content stored with first link, remainder refer back to it.
+ * This requires us to match each subsequent link up with the
+ * first appearance.
+ * cpio - Old cpio just stored body with each link, match-ups were
+ * implicit. This is trivial.
+ * new cpio - New cpio only stores body with last link, match-ups
+ * are implicit. This is actually quite tricky; see the notes
+ * below.
+ */
+
+/* Users pass us a format code, we translate that into a strategy here. */
+#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
+#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 1
+#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 2
+
/* Initial size of link cache. */
#define links_cache_initial_size 1024
+struct links_entry {
+ struct links_entry *next;
+ struct links_entry *previous;
+ int links; /* # links not yet seen */
+ int hash;
+ struct archive_entry *canonical;
+ struct archive_entry *entry;
+};
+
struct archive_entry_linkresolver {
- char *last_name;
+ struct links_entry **buckets;
+ struct links_entry *spare;
unsigned long number_entries;
size_t number_buckets;
- struct links_entry **buckets;
+ int strategy;
};
-struct links_entry {
- struct links_entry *next;
- struct links_entry *previous;
- int links;
- dev_t dev;
- ino_t ino;
- char *name;
-};
+static struct links_entry *find_entry(struct archive_entry_linkresolver *,
+ struct archive_entry *);
+static void grow_hash(struct archive_entry_linkresolver *);
+static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
+ struct archive_entry *);
+static struct links_entry *next_entry(struct archive_entry_linkresolver *);
struct archive_entry_linkresolver *
archive_entry_linkresolver_new(void)
{
- struct archive_entry_linkresolver *links_cache;
+ struct archive_entry_linkresolver *res;
size_t i;
- links_cache = malloc(sizeof(struct archive_entry_linkresolver));
- if (links_cache == NULL)
+ res = malloc(sizeof(struct archive_entry_linkresolver));
+ if (res == NULL)
return (NULL);
- memset(links_cache, 0, sizeof(struct archive_entry_linkresolver));
- links_cache->number_buckets = links_cache_initial_size;
- links_cache->buckets = malloc(links_cache->number_buckets *
- sizeof(links_cache->buckets[0]));
- if (links_cache->buckets == NULL) {
- free(links_cache);
+ memset(res, 0, sizeof(struct archive_entry_linkresolver));
+ res->number_buckets = links_cache_initial_size;
+ res->buckets = malloc(res->number_buckets *
+ sizeof(res->buckets[0]));
+ if (res->buckets == NULL) {
+ free(res);
return (NULL);
}
- for (i = 0; i < links_cache->number_buckets; i++)
- links_cache->buckets[i] = NULL;
- return (links_cache);
+ for (i = 0; i < res->number_buckets; i++)
+ res->buckets[i] = NULL;
+ return (res);
}
void
-archive_entry_linkresolver_free(struct archive_entry_linkresolver *links_cache)
+archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
+ int fmt)
{
- size_t i;
+ int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
+
+ switch (fmtbase) {
+ case ARCHIVE_FORMAT_CPIO:
+ switch (fmt) {
+ case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
+ case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
+ res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
+ break;
+ default:
+ res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
+ break;
+ }
+ break;
+ case ARCHIVE_FORMAT_TAR:
+ res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
+ break;
+ default:
+ res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
+ break;
+ }
+}
+
+void
+archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
+{
+ struct links_entry *le;
+
+ if (res->buckets != NULL) {
+ while ((le = next_entry(res)) != NULL)
+ archive_entry_free(le->entry);
+ free(res->buckets);
+ res->buckets = NULL;
+ }
+ free(res);
+}
+
+void
+archive_entry_linkify(struct archive_entry_linkresolver *res,
+ struct archive_entry **e, struct archive_entry **f)
+{
+ struct links_entry *le;
+ struct archive_entry *t;
+
+ *f = NULL; /* Default: Don't return a second entry. */
+
+ if (*e == NULL) {
+ le = next_entry(res);
+ if (le != NULL)
+ *e = le->entry;
+ return;
+ }
- if (links_cache->buckets == NULL)
+ /* If it has only one link, then we're done. */
+ if (archive_entry_nlink(*e) == 1)
return;
- for (i = 0; i < links_cache->number_buckets; i++) {
- while (links_cache->buckets[i] != NULL) {
- struct links_entry *lp = links_cache->buckets[i]->next;
- if (links_cache->buckets[i]->name != NULL)
- free(links_cache->buckets[i]->name);
- free(links_cache->buckets[i]);
- links_cache->buckets[i] = lp;
+ switch (res->strategy) {
+ case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
+ le = find_entry(res, *e);
+ if (le != NULL) {
+ archive_entry_set_size(*e, 0);
+ archive_entry_set_hardlink(*e,
+ archive_entry_pathname(le->canonical));
+ } else
+ insert_entry(res, *e);
+ return;
+ case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
+ /* This one is trivial. */
+ return;
+ case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
+ le = find_entry(res, *e);
+ if (le != NULL) {
+ /*
+ * Put the new entry in le, return the
+ * old entry from le.
+ */
+ t = *e;
+ *e = le->entry;
+ le->entry = t;
+ /* Make the old entry into a hardlink. */
+ archive_entry_set_size(*e, 0);
+ archive_entry_set_hardlink(*e,
+ archive_entry_pathname(le->canonical));
+ /* If we ran out of links, return the
+ * final entry as well. */
+ if (le->links == 0)
+ *f = le->entry;
+ } else {
+ /*
+ * If we haven't seen it, tuck it away
+ * for future use.
+ */
+ le = insert_entry(res, *e);
+ le->entry = *e;
+ *e = NULL;
}
+ return;
+ default:
+ break;
}
- free(links_cache->buckets);
- links_cache->buckets = NULL;
+ return;
}
-const char *
-archive_entry_linkresolve(struct archive_entry_linkresolver *links_cache,
+static struct links_entry *
+find_entry(struct archive_entry_linkresolver *res,
struct archive_entry *entry)
{
- struct links_entry *le, **new_buckets;
- int hash;
- size_t i, new_size;
+ struct links_entry *le;
+ int hash, bucket;
dev_t dev;
ino_t ino;
- int nlinks;
-
- /* Free a held name. */
- free(links_cache->last_name);
- links_cache->last_name = NULL;
+ /* Free a held entry. */
+ if (res->spare != NULL) {
+ archive_entry_free(res->spare->canonical);
+ free(res->spare);
+ res->spare = NULL;
+ }
/* If the links cache overflowed and got flushed, don't bother. */
- if (links_cache->buckets == NULL)
+ if (res->buckets == NULL)
return (NULL);
dev = archive_entry_dev(entry);
ino = archive_entry_ino(entry);
- nlinks = archive_entry_nlink(entry);
-
- /* An entry with one link can't be a hard link. */
- if (nlinks == 1)
- return (NULL);
-
- /* If the links cache is getting too full, enlarge the hash table. */
- if (links_cache->number_entries > links_cache->number_buckets * 2)
- {
- /* Try to enlarge the bucket list. */
- new_size = links_cache->number_buckets * 2;
- new_buckets = malloc(new_size * sizeof(struct links_entry *));
-
- if (new_buckets != NULL) {
- memset(new_buckets, 0,
- new_size * sizeof(struct links_entry *));
- for (i = 0; i < links_cache->number_buckets; i++) {
- while (links_cache->buckets[i] != NULL) {
- /* Remove entry from old bucket. */
- le = links_cache->buckets[i];
- links_cache->buckets[i] = le->next;
-
- /* Add entry to new bucket. */
- hash = (le->dev ^ le->ino) % new_size;
-
- if (new_buckets[hash] != NULL)
- new_buckets[hash]->previous =
- le;
- le->next = new_buckets[hash];
- le->previous = NULL;
- new_buckets[hash] = le;
- }
- }
- free(links_cache->buckets);
- links_cache->buckets = new_buckets;
- links_cache->number_buckets = new_size;
- }
- }
+ hash = dev ^ ino;
/* Try to locate this entry in the links cache. */
- hash = ( dev ^ ino ) % links_cache->number_buckets;
- for (le = links_cache->buckets[hash]; le != NULL; le = le->next) {
- if (le->dev == dev && le->ino == ino) {
+ bucket = hash % res->number_buckets;
+ for (le = res->buckets[bucket]; le != NULL; le = le->next) {
+ if (le->hash == hash
+ && dev == archive_entry_dev(le->entry)
+ && ino == archive_entry_ino(le->entry)) {
/*
* Decrement link count each time and release
* the entry if it hits zero. This saves
*/
--le->links;
if (le->links > 0)
- return (le->name);
- /*
- * When we release the entry, save the name
- * until the next call.
- */
- links_cache->last_name = le->name;
- /*
- * Release the entry.
- */
+ return (le);
+ /* Remove it from this hash bucket. */
if (le->previous != NULL)
le->previous->next = le->next;
if (le->next != NULL)
le->next->previous = le->previous;
- if (links_cache->buckets[hash] == le)
- links_cache->buckets[hash] = le->next;
- links_cache->number_entries--;
- free(le);
- return (links_cache->last_name);
+ if (res->buckets[bucket] == le)
+ res->buckets[bucket] = le->next;
+ res->number_entries--;
+ /* Defer freeing this entry. */
+ res->spare = le;
+ return (le);
}
}
+ return (NULL);
+}
+
+static struct links_entry *
+next_entry(struct archive_entry_linkresolver *res)
+{
+ struct links_entry *le;
+ size_t bucket;
+
+ /* Free a held entry. */
+ if (res->spare != NULL) {
+ archive_entry_free(res->spare->canonical);
+ free(res->spare);
+ res->spare = NULL;
+ }
+
+ /* If the links cache overflowed and got flushed, don't bother. */
+ if (res->buckets == NULL)
+ return (NULL);
+
+ /* Look for next non-empty bucket in the links cache. */
+ for (bucket = 0; bucket < res->number_buckets; bucket++) {
+ le = res->buckets[bucket];
+ if (le != NULL) {
+ /* Remove it from this hash bucket. */
+ if (le->next != NULL)
+ le->next->previous = le->previous;
+ res->buckets[bucket] = le->next;
+ res->number_entries--;
+ /* Defer freeing this entry. */
+ res->spare = le;
+ return (le);
+ }
+ }
+ return (NULL);
+}
+
+static struct links_entry *
+insert_entry(struct archive_entry_linkresolver *res,
+ struct archive_entry *entry)
+{
+ struct links_entry *le;
+ int hash, bucket;
/* Add this entry to the links cache. */
le = malloc(sizeof(struct links_entry));
if (le == NULL)
return (NULL);
- le->name = strdup(archive_entry_pathname(entry));
- if (le->name == NULL) {
- free(le);
- return (NULL);
- }
+ le->entry = entry;
+
+ /* If the links cache is getting too full, enlarge the hash table. */
+ if (res->number_entries > res->number_buckets * 2)
+ grow_hash(res);
+
+ hash = archive_entry_dev(entry) ^ archive_entry_ino(entry);
+ bucket = hash % res->number_buckets;
/* If we could allocate the entry, record it. */
- if (links_cache->buckets[hash] != NULL)
- links_cache->buckets[hash]->previous = le;
- links_cache->number_entries++;
- le->next = links_cache->buckets[hash];
+ if (res->buckets[bucket] != NULL)
+ res->buckets[bucket]->previous = le;
+ res->number_entries++;
+ le->next = res->buckets[bucket];
le->previous = NULL;
- links_cache->buckets[hash] = le;
- le->dev = dev;
- le->ino = ino;
- le->links = nlinks - 1;
- return (NULL);
+ res->buckets[bucket] = le;
+ le->hash = hash;
+ le->links = archive_entry_nlink(entry) - 1;
+ le->canonical = archive_entry_clone(entry);
+ return (le);
+}
+
+static void
+grow_hash(struct archive_entry_linkresolver *res)
+{
+ struct links_entry *le, **new_buckets;
+ size_t new_size;
+ size_t i, bucket;
+
+ /* Try to enlarge the bucket list. */
+ new_size = res->number_buckets * 2;
+ new_buckets = malloc(new_size * sizeof(struct links_entry *));
+
+ if (new_buckets != NULL) {
+ memset(new_buckets, 0,
+ new_size * sizeof(struct links_entry *));
+ for (i = 0; i < res->number_buckets; i++) {
+ while (res->buckets[i] != NULL) {
+ /* Remove entry from old bucket. */
+ le = res->buckets[i];
+ res->buckets[i] = le->next;
+
+ /* Add entry to new bucket. */
+ bucket = le->hash % new_size;
+
+ if (new_buckets[bucket] != NULL)
+ new_buckets[bucket]->previous =
+ le;
+ le->next = new_buckets[bucket];
+ le->previous = NULL;
+ new_buckets[bucket] = le;
+ }
+ }
+ free(res->buckets);
+ res->buckets = new_buckets;
+ res->number_buckets = new_size;
+ }
}
#ifndef ARCHIVE_ENTRY_PRIVATE_H_INCLUDED
#define ARCHIVE_ENTRY_PRIVATE_H_INCLUDED
+#include "archive_string.h"
+
/*
* Handle wide character (i.e., Unicode) and non-wide character
* strings transparently.
- *
*/
struct aes {
- const char *aes_mbs;
- char *aes_mbs_alloc;
+ struct archive_string aes_mbs;
+ struct archive_string aes_utf8;
const wchar_t *aes_wcs;
- wchar_t *aes_wcs_alloc;
+ /* Bitmap of which of the above are valid. Because we're lazy
+ * about malloc-ing and reusing the underlying storage, we
+ * can't rely on NULL pointers to indicate whether a string
+ * has been set. */
+ int aes_set;
+#define AES_SET_MBS 1
+#define AES_SET_UTF8 2
+#define AES_SET_WCS 4
};
struct ae_acl {
dev_t aest_rdevminor;
} ae_stat;
-
-
/*
* Use aes here so that we get transparent mbs<->wcs conversions.
*/
struct aes ae_pathname; /* Name of entry */
struct aes ae_symlink; /* symlink contents */
struct aes ae_uname; /* Name of owner */
+ unsigned char ae_hardlinkset;
+ unsigned char ae_symlinkset;
+
+ /* Not used within libarchive; useful for some clients. */
+ struct aes ae_sourcepath; /* Path this entry is sourced from. */
+ /* ACL support. */
struct ae_acl *acl_head;
struct ae_acl *acl_p;
int acl_state; /* See acl_next for details. */
wchar_t *acl_text_w;
+ /* extattr support. */
struct ae_xattr *xattr_head;
struct ae_xattr *xattr_p;
+ /* Miscellaneous. */
char strmode[12];
};
#ifndef ARCHIVE_PLATFORM_H_INCLUDED
#define ARCHIVE_PLATFORM_H_INCLUDED
+/* archive.h and archive_entry.h require this. */
+#define __LIBARCHIVE_BUILD 1
+
#ifdef _WIN32
#include "config_windows.h"
#include "archive_windows.h"
file->ce_size = 0;
}
+ /* Don't waste time seeking for zero-length bodies. */
+ if (file->size == 0) {
+ file->offset = iso9660->current_position;
+ }
+
/* If CE exists, find and read it now. */
if (file->ce_offset > 0)
offset = file->ce_offset;
struct tar {
struct archive_string acl_text;
struct archive_string entry_pathname;
+ /* For "GNU.sparse.name" and other similar path extensions. */
+ struct archive_string entry_pathname_override;
struct archive_string entry_linkpath;
struct archive_string entry_uname;
struct archive_string entry_gname;
gnu_clear_sparse_list(tar);
archive_string_free(&tar->acl_text);
archive_string_free(&tar->entry_pathname);
+ archive_string_free(&tar->entry_pathname_override);
archive_string_free(&tar->entry_linkpath);
archive_string_free(&tar->entry_uname);
archive_string_free(&tar->entry_gname);
size_t attr_length, l, line_length;
char *line, *p;
char *key, *value;
- wchar_t *wp;
int err, err2;
attr_length = strlen(attr);
archive_string_empty(&(tar->entry_gname));
archive_string_empty(&(tar->entry_linkpath));
archive_string_empty(&(tar->entry_pathname));
+ archive_string_empty(&(tar->entry_pathname_override));
archive_string_empty(&(tar->entry_uname));
err = ARCHIVE_OK;
while (attr_length > 0) {
if (tar->pax_hdrcharset_binary)
archive_entry_copy_gname(entry, value);
else {
- wp = utf8_decode(tar, value, strlen(value));
- if (wp == NULL) {
- archive_entry_copy_gname(entry, value);
- if (err > ARCHIVE_WARN)
- err = ARCHIVE_WARN;
- } else
- archive_entry_copy_gname_w(entry, wp);
+ if (!archive_entry_update_gname_utf8(entry, value)) {
+ err = ARCHIVE_WARN;
+ archive_set_error(&a->archive,
+ ARCHIVE_ERRNO_FILE_FORMAT,
+ "Gname in pax header can't "
+ "be converted to current locale.");
+ }
}
}
if (archive_strlen(&(tar->entry_linkpath)) > 0) {
if (tar->pax_hdrcharset_binary)
archive_entry_copy_link(entry, value);
else {
- wp = utf8_decode(tar, value, strlen(value));
- if (wp == NULL) {
- archive_entry_copy_link(entry, value);
- if (err > ARCHIVE_WARN)
- err = ARCHIVE_WARN;
- } else
- archive_entry_copy_link_w(entry, wp);
+ if (!archive_entry_update_link_utf8(entry, value)) {
+ err = ARCHIVE_WARN;
+ archive_set_error(&a->archive,
+ ARCHIVE_ERRNO_FILE_FORMAT,
+ "Linkname in pax header can't "
+ "be converted to current locale.");
+ }
}
}
- if (archive_strlen(&(tar->entry_pathname)) > 0) {
+ /*
+ * Some extensions (such as the GNU sparse file extensions)
+ * deliberately store a synthetic name under the regular 'path'
+ * attribute and the real file name under a different attribute.
+ * Since we're supposed to not care about the order, we
+ * have no choice but to store all of the various filenames
+ * we find and figure it all out afterwards. This is the
+ * figuring out part.
+ */
+ value = NULL;
+ if (archive_strlen(&(tar->entry_pathname_override)) > 0)
+ value = tar->entry_pathname_override.s;
+ else if (archive_strlen(&(tar->entry_pathname)) > 0)
value = tar->entry_pathname.s;
+ if (value != NULL) {
if (tar->pax_hdrcharset_binary)
archive_entry_copy_pathname(entry, value);
else {
- wp = utf8_decode(tar, value, strlen(value));
- if (wp == NULL) {
- archive_entry_copy_pathname(entry, value);
- if (err > ARCHIVE_WARN)
- err = ARCHIVE_WARN;
- } else
- archive_entry_copy_pathname_w(entry, wp);
+ if (!archive_entry_update_pathname_utf8(entry, value)) {
+ err = ARCHIVE_WARN;
+ archive_set_error(&a->archive,
+ ARCHIVE_ERRNO_FILE_FORMAT,
+ "Pathname in pax header can't be "
+ "converted to current locale.");
+ }
}
}
if (archive_strlen(&(tar->entry_uname)) > 0) {
if (tar->pax_hdrcharset_binary)
archive_entry_copy_uname(entry, value);
else {
- wp = utf8_decode(tar, value, strlen(value));
- if (wp == NULL) {
- archive_entry_copy_uname(entry, value);
- if (err > ARCHIVE_WARN)
- err = ARCHIVE_WARN;
- } else
- archive_entry_copy_uname_w(entry, wp);
+ if (!archive_entry_update_uname_utf8(entry, value)) {
+ err = ARCHIVE_WARN;
+ archive_set_error(&a->archive,
+ ARCHIVE_ERRNO_FILE_FORMAT,
+ "Uname in pax header can't "
+ "be converted to current locale.");
+ }
}
}
return (err);
tar->sparse_gnu_pending = 1;
}
if (strcmp(key, "GNU.sparse.name") == 0) {
- wp = utf8_decode(tar, value, strlen(value));
- if (wp != NULL)
- archive_entry_copy_pathname_w(entry, wp);
- else
- archive_entry_copy_pathname(entry, value);
+ /*
+ * The real filename; when storing sparse
+ * files, GNU tar puts a synthesized name into
+ * the regular 'path' attribute in an attempt
+ * to limit confusion. ;-)
+ */
+ archive_strcpy(&(tar->entry_pathname_override), value);
}
if (strcmp(key, "GNU.sparse.realsize") == 0) {
tar->realsize = tar_atol10(value, strlen(value));
archive_entry_set_rdevminor(entry,
tar_atol10(value, strlen(value)));
} else if (strcmp(key, "SCHILY.fflags")==0) {
- wp = utf8_decode(tar, value, strlen(value));
- /* TODO: if (wp == NULL) */
- archive_entry_copy_fflags_text_w(entry, wp);
+ archive_entry_copy_fflags_text(entry, value);
} else if (strcmp(key, "SCHILY.dev")==0) {
archive_entry_set_dev(entry,
tar_atol10(value, strlen(value)));
static int
archive_read_format_zip_bid(struct archive_read *a)
{
- int bid = 0;
const char *p;
-
- if (a->archive.archive_format == ARCHIVE_FORMAT_ZIP)
- bid += 1;
+ const void *buff;
+ size_t bytes_avail;
if ((p = __archive_read_ahead(a, 4)) == NULL)
return (-1);
|| (p[2] == '0' && p[3] == '0'))
return (30);
}
+
+ /*
+ * Attempt to handle self-extracting archives
+ * by noting a PE header and searching forward
+ * up to 64k for a 'PK\003\004' marker.
+ */
+ if (p[0] == 'M' && p[1] == 'Z') {
+ /*
+ * TODO: Additional checks that this really is a PE
+ * file before we invoke the 128k lookahead below.
+ * No point in allocating a bigger lookahead buffer
+ * if we don't need to.
+ */
+ /*
+ * TODO: Of course, the compression layer lookahead
+ * buffers aren't dynamically sized yet; they should be.
+ */
+ bytes_avail = (a->decompressor->read_ahead)(a, &buff, 128*1024);
+ p = (const char *)buff;
+
+ /*
+ * TODO: Optimize by jumping forward based on values
+ * in the PE header. Note that we don't need to be
+ * exact, but we mustn't skip too far. The search
+ * below will compensate if we undershoot. Skipping
+ * will also reduce the chance of false positives
+ * (which is not really all that high to begin with,
+ * so maybe skipping isn't really necessary).
+ */
+
+ while (p < bytes_avail + (const char *)buff) {
+ if (p[0] == 'P' && p[1] == 'K' /* "PK" signature */
+ && p[2] == 3 && p[3] == 4 /* File entry */
+ && p[8] == 8 /* compression == deflate */
+ && p[9] == 0 /* High byte of compression */
+ )
+ {
+ return (30);
+ }
+ ++p;
+ }
+ }
+
return (0);
}
+/*
+ * Search forward for a "PK\003\004" file header. This handles the
+ * case of self-extracting archives, where there is an executable
+ * prepended to the ZIP archive.
+ */
+static int
+skip_sfx(struct archive_read *a)
+{
+ const void *h;
+ const char *p, *q;
+ size_t skip, bytes;
+
+ /*
+ * TODO: We should be able to skip forward by a bunch
+ * by lifting some values from the PE header. We don't
+ * need to be exact (we're still going to search forward
+ * to find the header), but it will speed things up and
+ * reduce the chance of a false positive.
+ */
+ for (;;) {
+ bytes = (a->decompressor->read_ahead)(a, &h, 4096);
+ if (bytes < 4)
+ return (ARCHIVE_FATAL);
+ p = h;
+ q = p + bytes;
+
+ /*
+ * Scan ahead until we find something that looks
+ * like the zip header.
+ */
+ while (p + 4 < q) {
+ switch (p[3]) {
+ case '\004':
+ /* TODO: Additional verification here. */
+ if (memcmp("PK\003\004", p, 4) == 0) {
+ skip = p - (const char *)h;
+ (a->decompressor->consume)(a, skip);
+ return (ARCHIVE_OK);
+ }
+ p += 4;
+ break;
+ case '\003': p += 1; break;
+ case 'K': p += 2; break;
+ case 'P': p += 3; break;
+ default: p += 4; break;
+ }
+ }
+ skip = p - (const char *)h;
+ (a->decompressor->consume)(a, skip);
+ }
+}
+
static int
archive_read_format_zip_read_header(struct archive_read *a,
struct archive_entry *entry)
const void *h;
const char *signature;
struct zip *zip;
+ int r = ARCHIVE_OK, r1;
a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
if (a->archive.archive_format_name == NULL)
return (ARCHIVE_FATAL);
signature = (const char *)h;
+ if (signature[0] == 'M' && signature[1] == 'Z') {
+ /* This is an executable? Must be self-extracting... */
+ r = skip_sfx(a);
+ if (r < ARCHIVE_WARN)
+ return (r);
+ if ((h = __archive_read_ahead(a, 4)) == NULL)
+ return (ARCHIVE_FATAL);
+ signature = (const char *)h;
+ }
+
if (signature[0] != 'P' || signature[1] != 'K') {
archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
"Bad ZIP file");
if (signature[2] == '\003' && signature[3] == '\004') {
/* Regular file entry. */
- return (zip_read_file_header(a, entry, zip));
+ r1 = zip_read_file_header(a, entry, zip);
+ if (r1 != ARCHIVE_OK)
+ return (r1);
+ return (r);
}
if (signature[2] == '\005' && signature[3] == '\006') {
#ifdef HAVE_STRING_H
#include <string.h>
#endif
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
#include "archive_private.h"
#include "archive_string.h"
void
__archive_string_copy(struct archive_string *dest, struct archive_string *src)
{
- if (__archive_string_ensure(dest, src->length + 1) == NULL)
- __archive_errx(1, "Out of memory");
- memcpy(dest->s, src->s, src->length);
- dest->length = src->length;
- dest->s[dest->length] = 0;
+ if (src->length == 0)
+ dest->length = 0;
+ else {
+ if (__archive_string_ensure(dest, src->length + 1) == NULL)
+ __archive_errx(1, "Out of memory");
+ memcpy(dest->s, src->s, src->length);
+ dest->length = src->length;
+ dest->s[dest->length] = 0;
+ }
}
void
{
as->length = 0;
as->buffer_length = 0;
- if (as->s != NULL)
+ if (as->s != NULL) {
free(as->s);
+ as->s = NULL;
+ }
}
/* Returns NULL on any allocation failure. */
struct archive_string *
__archive_string_ensure(struct archive_string *as, size_t s)
{
+ /* If buffer is already big enough, don't reallocate. */
if (as->s && (s <= as->buffer_length))
return (as);
+ /*
+ * Growing the buffer at least exponentially ensures that
+ * append operations are always linear in the number of
+ * characters appended. Using a smaller growth rate for
+ * larger buffers reduces memory waste somewhat at the cost of
+ * a larger constant factor.
+ */
if (as->buffer_length < 32)
+ /* Start with a minimum 32-character buffer. */
as->buffer_length = 32;
- while (as->buffer_length < s)
+ else if (as->buffer_length < 8192)
+ /* Buffers under 8k are doubled for speed. */
as->buffer_length *= 2;
+ else {
+ /* Buffers 8k and over grow by at least 25% each time. */
+ size_t old_length = as->buffer_length;
+ as->buffer_length = (as->buffer_length * 5) / 4;
+ /* Be safe: If size wraps, release buffer and return NULL. */
+ if (as->buffer_length < old_length) {
+ free(as->s);
+ as->s = NULL;
+ return (NULL);
+ }
+ }
+ /*
+ * The computation above is a lower limit to how much we'll
+ * grow the buffer. In any case, we have to grow it enough to
+ * hold the request.
+ */
+ if (as->buffer_length < s)
+ as->buffer_length = s;
+ /* Now we can reallocate the buffer. */
as->s = (char *)realloc(as->s, as->buffer_length);
if (as->s == NULL)
return (NULL);
__archive_strappend_char(as, digits[d % base]);
return (as);
}
+
+/*
+ * Home-grown wcrtomb for UTF-8.
+ */
+static size_t
+my_wcrtomb_utf8(char *p, wchar_t wc, mbstate_t *s)
+{
+ (void)s; /* UNUSED */
+
+ if (p == NULL)
+ return (0);
+ if (wc <= 0x7f) {
+ p[0] = (char)wc;
+ return (1);
+ }
+ if (wc <= 0x7ff) {
+ p[0] = 0xc0 | ((wc >> 6) & 0x1f);
+ p[1] = 0x80 | (wc & 0x3f);
+ return (2);
+ }
+ if (wc <= 0xffff) {
+ p[0] = 0xe0 | ((wc >> 12) & 0x0f);
+ p[1] = 0x80 | ((wc >> 6) & 0x3f);
+ p[2] = 0x80 | (wc & 0x3f);
+ return (3);
+ }
+ if (wc <= 0x1fffff) {
+ p[0] = 0xf0 | ((wc >> 18) & 0x07);
+ p[1] = 0x80 | ((wc >> 12) & 0x3f);
+ p[2] = 0x80 | ((wc >> 6) & 0x3f);
+ p[3] = 0x80 | (wc & 0x3f);
+ return (4);
+ }
+ /* Unicode has no codes larger than 0x1fffff. */
+ /*
+ * Awkward point: UTF-8 <-> wchar_t conversions
+ * can actually fail.
+ */
+ return ((size_t)-1);
+}
+
+static int
+my_wcstombs(struct archive_string *as, const wchar_t *w,
+ size_t (*func)(char *, wchar_t, mbstate_t *))
+{
+ size_t n;
+ char *p;
+ mbstate_t shift_state;
+ char buff[256];
+
+ /*
+ * Convert one wide char at a time into 'buff', whenever that
+ * fills, append it to the string.
+ */
+ p = buff;
+ wcrtomb(NULL, L'\0', &shift_state);
+ while (*w != L'\0') {
+ /* Flush the buffer when we have <=16 bytes free. */
+ /* (No encoding has a single character >16 bytes.) */
+ if ((size_t)(p - buff) >= (size_t)(sizeof(buff) - 16)) {
+ *p = '\0';
+ archive_strcat(as, buff);
+ p = buff;
+ }
+ n = (*func)(p, *w++, &shift_state);
+ if (n == (size_t)-1)
+ return (-1);
+ p += n;
+ }
+ *p = '\0';
+ archive_strcat(as, buff);
+ return (0);
+}
+
+/*
+ * Translates a wide character string into UTF-8 and appends
+ * to the archive_string. Note: returns NULL if conversion fails.
+ */
+struct archive_string *
+__archive_strappend_w_utf8(struct archive_string *as, const wchar_t *w)
+{
+ if (my_wcstombs(as, w, my_wcrtomb_utf8))
+ return (NULL);
+ return (as);
+}
+
+/*
+ * Translates a wide character string into current locale character set
+ * and appends to the archive_string. Note: returns NULL if conversion
+ * fails.
+ *
+ * TODO: use my_wcrtomb_utf8 if !HAVE_WCRTOMB (add configure logic first!)
+ */
+struct archive_string *
+__archive_strappend_w_mbs(struct archive_string *as, const wchar_t *w)
+{
+ if (my_wcstombs(as, w, wcrtomb))
+ return (NULL);
+ return (as);
+}
+
+
+/*
+ * Home-grown mbrtowc for UTF-8. Some systems lack UTF-8
+ * (or even lack mbrtowc()) and we need UTF-8 support for pax
+ * format. So please don't replace this with a call to the
+ * standard mbrtowc() function!
+ */
+static size_t
+my_mbrtowc_utf8(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
+{
+ int ch;
+
+ /*
+ * This argument is here to make the prototype identical to the
+ * standard mbrtowc(), so I can build generic string processors
+ * that just accept a pointer to a suitable mbrtowc() function.
+ */
+ (void)ps; /* UNUSED */
+
+ /* Standard behavior: a NULL value for 's' just resets shift state. */
+ if (s == NULL)
+ return (0);
+ /* If length argument is zero, don't look at the first character. */
+ if (n <= 0)
+ return ((size_t)-2);
+
+ /*
+ * Decode 1-4 bytes depending on the value of the first byte.
+ */
+ ch = (unsigned char)*s;
+ if (ch == 0) {
+ return (0); /* Standard: return 0 for end-of-string. */
+ }
+ if ((ch & 0x80) == 0) {
+ *pwc = ch & 0x7f;
+ return (1);
+ }
+ if ((ch & 0xe0) == 0xc0) {
+ if (n < 2)
+ return ((size_t)-2);
+ if ((s[1] & 0xc0) != 0x80) return (size_t)-1;
+ *pwc = ((ch & 0x1f) << 6) | (s[1] & 0x3f);
+ return (2);
+ }
+ if ((ch & 0xf0) == 0xe0) {
+ if (n < 3)
+ return ((size_t)-2);
+ if ((s[1] & 0xc0) != 0x80) return (size_t)-1;
+ if ((s[2] & 0xc0) != 0x80) return (size_t)-1;
+ *pwc = ((ch & 0x0f) << 12)
+ | ((s[1] & 0x3f) << 6)
+ | (s[2] & 0x3f);
+ return (3);
+ }
+ if ((ch & 0xf8) == 0xf0) {
+ if (n < 4)
+ return ((size_t)-2);
+ if ((s[1] & 0xc0) != 0x80) return (size_t)-1;
+ if ((s[2] & 0xc0) != 0x80) return (size_t)-1;
+ if ((s[3] & 0xc0) != 0x80) return (size_t)-1;
+ *pwc = ((ch & 0x07) << 18)
+ | ((s[1] & 0x3f) << 12)
+ | ((s[2] & 0x3f) << 6)
+ | (s[3] & 0x3f);
+ return (4);
+ }
+ /* Invalid first byte. */
+ return ((size_t)-1);
+}
+
+/*
+ * Return a wide-character string by converting this archive_string
+ * from UTF-8.
+ */
+wchar_t *
+__archive_string_utf8_w(struct archive_string *as)
+{
+ wchar_t *ws, *dest;
+ const char *src;
+ size_t n;
+ int err;
+
+ ws = (wchar_t *)malloc((as->length + 1) * sizeof(wchar_t));
+ if (ws == NULL)
+ __archive_errx(1, "Out of memory");
+ err = 0;
+ dest = ws;
+ src = as->s;
+ while (*src != '\0') {
+ n = my_mbrtowc_utf8(dest, src, 8, NULL);
+ if (n == 0)
+ break;
+ if (n == (size_t)-1 || n == (size_t)-2) {
+ free(ws);
+ return (NULL);
+ }
+ dest++;
+ src += n;
+ }
+ *dest++ = L'\0';
+ return (ws);
+}
#ifdef HAVE_STRING_H
#include <string.h>
#endif
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
/*
* Basic resizable/reusable string support a la Java's "StringBuffer."
__archive_strappend_char(struct archive_string *, char);
#define archive_strappend_char __archive_strappend_char
-/* Append a char to an archive_string using UTF8. */
-struct archive_string *
-__archive_strappend_char_UTF8(struct archive_string *, int);
-#define archive_strappend_char_UTF8 __archive_strappend_char_UTF8
-
/* Append an integer in the specified base (2 <= base <= 16). */
struct archive_string *
__archive_strappend_int(struct archive_string *as, int d, int base);
#define archive_strappend_int __archive_strappend_int
+/* Convert a wide-char string to UTF-8 and append the result. */
+struct archive_string *
+__archive_strappend_w_utf8(struct archive_string *, const wchar_t *);
+#define archive_strappend_w_utf8 __archive_strappend_w_utf8
+
+/* Convert a wide-char string to current locale and append the result. */
+/* Returns NULL if conversion fails. */
+struct archive_string *
+__archive_strappend_w_mbs(struct archive_string *, const wchar_t *);
+#define archive_strappend_w_mbs __archive_strappend_w_mbs
+
/* Basic append operation. */
struct archive_string *
__archive_string_append(struct archive_string *as, const char *p, size_t s);
/* Copy a C string to an archive_string, resizing as necessary. */
#define archive_strcpy(as,p) \
- ((as)->length = 0, __archive_string_append((as), (p), strlen(p)))
+ ((as)->length = 0, __archive_string_append((as), (p), p == NULL ? 0 : strlen(p)))
/* Copy a C string to an archive_string with limit, resizing as necessary. */
#define archive_strncpy(as,p,l) \
void __archive_string_sprintf(struct archive_string *, const char *, ...);
#define archive_string_sprintf __archive_string_sprintf
+/* Allocates a fresh buffer and converts as (assumed to be UTF-8) into it.
+ * Returns NULL if conversion failed in any way. */
+wchar_t *__archive_string_utf8_w(struct archive_string *as);
+
+
#endif
return (ARCHIVE_VERSION_NUMBER);
}
-/*
- * Format a version string of the form "libarchive x.y.z", where x, y,
- * z are the correct parts of the version ID from
- * archive_version_number().
- *
- * I used to do all of this at build time in shell scripts but that
- * proved to be a portability headache.
- */
-
const char *
archive_version_string(void)
{
- static char buff[128];
- struct archive_string as;
- int n;
-
- if (buff[0] == '\0') {
- n = archive_version_number();
- memset(&as, 0, sizeof(as));
- archive_string_sprintf(&as, "libarchive %d.%d.%d",
- n / 1000000, (n / 1000) % 1000, n % 1000);
- strncpy(buff, as.s, sizeof(buff));
- buff[sizeof(buff) - 1] = '\0';
- archive_string_free(&as);
- }
- return (buff);
+ return (ARCHIVE_VERSION_STRING);
}
int
archive_clear_error(&a->archive);
if (a->archive.state & ARCHIVE_STATE_DATA) {
r = _archive_write_finish_entry(&a->archive);
- if (r != ARCHIVE_OK)
+ if (r == ARCHIVE_FATAL)
return (r);
}
/* Write the data. */
while (size > 0 && a->offset < a->filesize) {
if ((off_t)(a->offset + size) > a->filesize) {
- size = (size_t)(a->filesize - a->offset);
- archive_set_error(&a->archive, errno,
- "Write request too large");
+ archive_set_error(&a->archive, 0,
+ "Write request too large (tried to write %u bytes, but only %u bytes remain)",
+ (unsigned int)size,
+ (unsigned int)(a->filesize - a->offset));
r = ARCHIVE_WARN;
+ size = (size_t)(a->filesize - a->offset);
}
bytes_written = write(a->fd, buff, size);
if (bytes_written < 0) {
const char *p;
char *t;
const wchar_t *wp;
- const char *suffix_start;
+ const char *suffix;
int need_extension, r, ret;
struct pax *pax;
const char *hdrcharset = NULL;
if (hdrcharset != NULL)
add_pax_attr(&(pax->pax_header), "hdrcharset", hdrcharset);
- /*
- * Determining whether or not the name is too big is ugly
- * because of the rules for dividing names between 'name' and
- * 'prefix' fields. Here, I pick out the longest possible
- * suffix, then test whether the remaining prefix is too long.
- */
- if (strlen(path) <= 100) /* Short enough for just 'name' field */
- suffix_start = path; /* Record a zero-length prefix */
- else
- /* Find the largest suffix that fits in 'name' field. */
- suffix_start = strchr(path + strlen(path) - 100 - 1, '/');
/*
* If name is too long, or has non-ASCII characters, add
* 'path' to pax extended attrs. (Note that an unconvertible
* name must have non-ASCII characters.)
*/
- if (suffix_start == NULL || suffix_start - path > 155
- || path_w == NULL || has_non_ASCII(path_w)) {
- if (path_w == NULL || hdrcharset != NULL)
+ if (path == NULL) {
+ /* We don't have a narrow version, so we have to store
+ * the wide version. */
+ add_pax_attr_w(&(pax->pax_header), "path", path_w);
+ archive_entry_set_pathname(entry_main, "@WidePath");
+ need_extension = 1;
+ } else if (has_non_ASCII(path_w)) {
+ /* We have non-ASCII characters. */
+ if (path_w == NULL || hdrcharset != NULL) {
/* Can't do UTF-8, so store it raw. */
add_pax_attr(&(pax->pax_header), "path", path);
- else
- add_pax_attr_w(&(pax->pax_header), "path", path_w);
+ } else {
+ /* Store UTF-8 */
+ add_pax_attr_w(&(pax->pax_header),
+ "path", path_w);
+ }
archive_entry_set_pathname(entry_main,
build_ustar_entry_name(ustar_entry_name,
path, strlen(path), NULL));
need_extension = 1;
+ } else {
+ /* We have an all-ASCII path; we'd like to just store
+ * it in the ustar header if it will fit. Yes, this
+ * duplicates some of the logic in
+ * write_set_format_ustar.c
+ */
+ if (strlen(path) <= 100) {
+ /* Fits in the old 100-char tar name field. */
+ } else {
+ /* Find largest suffix that will fit. */
+ /* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */
+ suffix = strchr(path + strlen(path) - 100 - 1, '/');
+ /* Don't attempt an empty prefix. */
+ if (suffix == path)
+ suffix = strchr(suffix + 1, '/');
+ /* We can put it in the ustar header if it's
+ * all ASCII and it's either <= 100 characters
+ * or can be split at a '/' into a prefix <=
+ * 155 chars and a suffix <= 100 chars. (Note
+ * the strchr() above will return NULL exactly
+ * when the path can't be split.)
+ */
+ if (suffix == NULL /* Suffix > 100 chars. */
+ || suffix[1] == '\0' /* empty suffix */
+ || suffix - path > 155) /* Prefix > 155 chars */
+ {
+ if (path_w == NULL || hdrcharset != NULL) {
+ /* Can't do UTF-8, so store it raw. */
+ add_pax_attr(&(pax->pax_header),
+ "path", path);
+ } else {
+ /* Store UTF-8 */
+ add_pax_attr_w(&(pax->pax_header),
+ "path", path_w);
+ }
+ archive_entry_set_pathname(entry_main,
+ build_ustar_entry_name(ustar_entry_name,
+ path, strlen(path), NULL));
+ need_extension = 1;
+ }
+ }
}
if (linkpath != NULL) {
static int
has_non_ASCII(const wchar_t *wp)
{
+ if (wp == NULL)
+ return (1);
while (*wp != L'\0' && *wp < 128)
wp++;
return (*wp != L'\0');
!(archive_entry_filetype(entry) == AE_IFREG))
archive_entry_set_size(entry, 0);
- if (AE_IFDIR == archive_entry_mode(entry)) {
+ if (AE_IFDIR == archive_entry_filetype(entry)) {
const char *p;
char *t;
/*
/* Store in two pieces, splitting at a '/'. */
p = strchr(pp + strlen(pp) - USTAR_name_size - 1, '/');
/*
- * If the separator we found is the first '/', find
- * the next one. (This is a pathological case that
- * occurs for paths of exactly 101 bytes that start with
- * '/'; it occurs because the separating '/' is not
- * stored explicitly and the reconstruction assumes that
- * an empty prefix means there is no '/' separator.)
+ * Look for the next '/' if we chose the first character
+ * as the separator. (ustar format doesn't permit
+ * an empty prefix.)
*/
if (p == pp)
p = strchr(p + 1, '/');
- /*
- * If there is no path separator, or the prefix or
- * remaining name are too large, return an error.
- */
+ /* Fail if the name won't fit. */
if (!p) {
+ /* No separator. */
+ archive_set_error(&a->archive, ENAMETOOLONG,
+ "Pathname too long");
+ ret = ARCHIVE_WARN;
+ } else if (p[1] == '\0') {
+ /*
+ * The only feasible separator is a final '/';
+ * this would result in a non-empty prefix and
+ * an empty name, which POSIX doesn't
+ * explicity forbid, but it just feels wrong.
+ */
archive_set_error(&a->archive, ENAMETOOLONG,
"Pathname too long");
ret = ARCHIVE_WARN;
} else if (p > pp + USTAR_prefix_size) {
+ /* Prefix is too long. */
archive_set_error(&a->archive, ENAMETOOLONG,
"Pathname too long");
ret = ARCHIVE_WARN;
test_empty_write.c \
test_entry.c \
test_entry_strmode.c \
+ test_link_resolver.c \
test_pax_filename_encoding.c \
test_read_compress_program.c \
test_read_data_large.c \
test_read_format_mtree.c \
test_read_format_pax_bz2.c \
test_read_format_tar.c \
+ test_read_format_tar_empty_filename.c \
test_read_format_tbz.c \
test_read_format_tgz.c \
test_read_format_tz.c \
test_read_truncated.c \
test_tar_filenames.c \
test_tar_large.c \
+ test_ustar_filenames.c \
test_write_compress_program.c \
test_write_compress.c \
test_write_disk.c \
test_write_format_cpio_empty.c \
test_write_format_shar_empty.c \
test_write_format_tar.c \
+ test_write_format_tar_ustar.c \
test_write_format_tar_empty.c \
test_write_open_memory.c
# Build the test program using all libarchive sources + the test sources.
SRCS= ${LA_SRCS} \
${TESTS} \
- list.h \
+ ${.OBJDIR}/list.h \
+ ${.OBJDIR}/archive.h \
main.c \
read_open_memory.c
# Build libarchive_test and run it.
check test: libarchive_test
- ./libarchive_test -k -r ${.CURDIR}
-
-INCS=archive.h list.h
+ ./libarchive_test -v -r ${.CURDIR}
# Build archive.h, but in our .OBJDIR, not libarchive's
# This keeps libarchive_test and libarchive builds completely separate.
-archive.h: ${LA_SRCDIR}/archive.h.in ${LA_SRCDIR}/Makefile
+${.OBJDIR}/archive.h: ${LA_SRCDIR}/archive.h.in ${LA_SRCDIR}/Makefile
cd ${LA_SRCDIR} && unset MAKEOBJDIRPREFIX && MAKEOBJDIR=${.OBJDIR} make archive.h
# list.h is just a list of all tests, as indicated by DEFINE_TEST macro lines
-list.h: ${TESTS} Makefile
+${.OBJDIR}/list.h: ${TESTS} Makefile
(cd ${.CURDIR}; cat ${TESTS}) | grep DEFINE_TEST > list.h
CLEANFILES += *.out *.o *.core *~ list.h archive.h
extern int optind;
#endif
-/* Default is to crash and try to force a core dump on failure. */
-static int dump_on_failure = 1;
+/* Enable core dump on failure. */
+static int dump_on_failure = 0;
+/* Default is to remove temp dirs for successful tests. */
+static int keep_temp_files = 0;
/* Default is to print some basic information about each test. */
static int quiet_flag = 0;
+/* Default is to summarize repeated failures. */
+static int verbose = 0;
/* Cumulative count of component failures. */
static int failures = 0;
/* Cumulative count of skipped component tests. */
return (value);
}
failures ++;
- if (previous_failures(file, line))
+ if (!verbose && previous_failures(file, line))
return (value);
fprintf(stderr, "%s:%d: Assertion failed\n", file, line);
fprintf(stderr, " Condition: %s\n", condition);
return (1);
}
failures ++;
- if (previous_failures(file, line))
+ if (!verbose && previous_failures(file, line))
return (0);
fprintf(stderr, "%s:%d: Assertion failed: Ints not equal\n",
file, line);
return (0);
}
+static void strdump(const char *p)
+{
+ if (p == NULL) {
+ fprintf(stderr, "(null)");
+ return;
+ }
+ fprintf(stderr, "\"");
+ while (*p != '\0') {
+ unsigned int c = 0xff & *p++;
+ switch (c) {
+ case '\a': fprintf(stderr, "\a"); break;
+ case '\b': fprintf(stderr, "\b"); break;
+ case '\n': fprintf(stderr, "\n"); break;
+ case '\r': fprintf(stderr, "\r"); break;
+ default:
+ if (c >= 32 && c < 127)
+ fprintf(stderr, "%c", c);
+ else
+ fprintf(stderr, "\\x%02X", c);
+ }
+ }
+ fprintf(stderr, "\"");
+}
+
/* assertEqualString() displays the values of the two strings. */
int
test_assert_equal_string(const char *file, int line,
return (1);
}
failures ++;
- if (previous_failures(file, line))
+ if (!verbose && previous_failures(file, line))
return (0);
fprintf(stderr, "%s:%d: Assertion failed: Strings not equal\n",
file, line);
- fprintf(stderr, " %s = \"%s\"\n", e1, v1);
- fprintf(stderr, " %s = \"%s\"\n", e2, v2);
+ fprintf(stderr, " %s = ", e1);
+ strdump(v1);
+ fprintf(stderr, " (length %d)\n", v1 == NULL ? 0 : strlen(v1));
+ fprintf(stderr, " %s = ", e2);
+ strdump(v2);
+ fprintf(stderr, " (length %d)\n", v2 == NULL ? 0 : strlen(v2));
report_failure(extra);
return (0);
}
+static void wcsdump(const wchar_t *w)
+{
+ if (w == NULL) {
+ fprintf(stderr, "(null)");
+ return;
+ }
+ fprintf(stderr, "\"");
+ while (*w != L'\0') {
+ unsigned int c = *w++;
+ if (c >= 32 && c < 127)
+ fprintf(stderr, "%c", c);
+ else if (c < 256)
+ fprintf(stderr, "\\x%02X", c);
+ else if (c < 0x10000)
+ fprintf(stderr, "\\u%04X", c);
+ else
+ fprintf(stderr, "\\U%08X", c);
+ }
+ fprintf(stderr, "\"");
+}
+
/* assertEqualWString() displays the values of the two strings. */
int
test_assert_equal_wstring(const char *file, int line,
void *extra)
{
++assertions;
- if (wcscmp(v1, v2) == 0) {
+ if (v1 == NULL) {
+ if (v2 == NULL) {
+ msg[0] = '\0';
+ return (1);
+ }
+ } else if (v2 == NULL) {
+ if (v1 == NULL) {
+ msg[0] = '\0';
+ return (1);
+ }
+ } else if (wcscmp(v1, v2) == 0) {
msg[0] = '\0';
return (1);
}
failures ++;
- if (previous_failures(file, line))
+ if (!verbose && previous_failures(file, line))
return (0);
fprintf(stderr, "%s:%d: Assertion failed: Unicode strings not equal\n",
file, line);
- fwprintf(stderr, L" %s = \"%ls\"\n", e1, v1);
- fwprintf(stderr, L" %s = \"%ls\"\n", e2, v2);
+ fprintf(stderr, " %s = ", e1);
+ wcsdump(v1);
+ fprintf(stderr, "\n");
+ fprintf(stderr, " %s = ", e2);
+ wcsdump(v2);
+ fprintf(stderr, "\n");
report_failure(extra);
return (0);
}
return (1);
}
failures ++;
- if (previous_failures(file, line))
+ if (!verbose && previous_failures(file, line))
return (0);
fprintf(stderr, "%s:%d: Assertion failed: memory not equal\n",
file, line);
if (stat(f1, &st) != 0) {
fprintf(stderr, "%s:%d: Could not stat: %s\n", test_filename, test_line, f1);
report_failure(NULL);
+ return (0);
}
if (st.st_size == 0)
return (1);
failures ++;
- if (previous_failures(test_filename, test_line))
+ if (!verbose && previous_failures(test_filename, test_line))
return (0);
fprintf(stderr, "%s:%d: File not empty: %s\n", test_filename, test_line, f1);
break;
}
failures ++;
- if (previous_failures(test_filename, test_line))
+ if (!verbose && previous_failures(test_filename, test_line))
return (0);
fprintf(stderr, "%s:%d: Files are not identical\n",
test_filename, test_line);
(*tests[i].func)();
/* Summarize the results of this test. */
summarize();
+ /* If there were no failures, we can remove the work dir. */
+ if (failures == failures_before) {
+ if (!keep_temp_files && chdir(tmpdir) == 0) {
+ systemf("rm -rf %s", tests[i].name);
+ }
+ }
/* Return appropriate status. */
return (failures == failures_before ? 0 : 1);
}
printf("Default is to run all tests.\n");
printf("Otherwise, specify the numbers of the tests you wish to run.\n");
printf("Options:\n");
- printf(" -k Keep running after failures.\n");
- printf(" Default: Core dump after any failure.\n");
+ printf(" -d Dump core after any failure, for debugging.\n");
+ printf(" -k Keep all temp files.\n");
+ printf(" Default: temp files for successful tests deleted.\n");
#ifdef PROGRAM
printf(" -p <path> Path to executable to be tested.\n");
printf(" Default: path taken from " ENVBASE " environment variable.\n");
printf(" -q Quiet.\n");
printf(" -r <dir> Path to dir containing reference files.\n");
printf(" Default: Current directory.\n");
+ printf(" -v Verbose.\n");
printf("Available tests:\n");
for (i = 0; i < limit; i++)
printf(" %d: %s\n", i, tests[i].name);
testprog = getenv(ENVBASE);
#endif
- /* Allow -k to be controlled through the environment. */
- if (getenv(ENVBASE "_KEEP_GOING") != NULL)
- dump_on_failure = 0;
+ /* Allow -d to be controlled through the environment. */
+ if (getenv(ENVBASE "_DEBUG") != NULL)
+ dump_on_failure = 1;
/* Get the directory holding test files from environment. */
refdir = getenv(ENVBASE "_TEST_FILES");
/*
* Parse options.
*/
- while ((opt = getopt(argc, argv, "kp:qr:")) != -1) {
+ while ((opt = getopt(argc, argv, "dkp:qr:v")) != -1) {
switch (opt) {
+ case 'd':
+ dump_on_failure = 1;
+ break;
case 'k':
- dump_on_failure = 0;
+ keep_temp_files = 1;
break;
case 'p':
#ifdef PROGRAM
case 'r':
refdir = optarg;
break;
+ case 'v':
+ verbose = 1;
+ break;
case '?':
default:
usage(progname);
--p;
*p = '\0';
}
+ systemf("rm %s/refdir", tmpdir);
}
/*
free(refdir_alloc);
+ /* If the final tmpdir is empty, we can remove it. */
+ /* This should be the usual case when all tests succeed. */
+ rmdir(tmpdir);
+
return (tests_failed);
}
return (1);
if (qual != acl->qual)
return (0);
- if (name == NULL) {
- if (acl->name == NULL || acl->name[0] == '\0')
- return (1);
- }
- if (acl->name == NULL) {
- if (name[0] == '\0')
- return (1);
- }
+ if (name == NULL)
+ return (acl->name == NULL || acl->name[0] == '\0');
+ if (acl->name == NULL)
+ return (name == NULL || name[0] == '\0');
return (0 == strcmp(name, acl->name));
}
DEFINE_TEST(test_archive_api_feature)
{
char buff[128];
+ const char *p;
/* This is the (hopefully) final versioning API. */
assertEqualInt(ARCHIVE_VERSION_NUMBER, archive_version_number());
archive_version_number() / 1000000,
(archive_version_number() / 1000) % 1000,
archive_version_number() % 1000);
- assertEqualString(buff, archive_version_string());
+ failure("Version string is: %s, computed is: %s",
+ archive_version_string(), buff);
+ assert(memcmp(buff, archive_version_string(), strlen(buff)) == 0);
+ if (strlen(buff) < strlen(archive_version_string())) {
+ p = archive_version_string() + strlen(buff);
+ failure("Version string is: %s", archive_version_string());
+ assert(*p == 'a' || *p == 'b' || *p == 'c' || *p == 'd');
+ ++p;
+ failure("Version string is: %s", archive_version_string());
+ assert(*p == '\0');
+ }
/* This is all scheduled to disappear in libarchive 3.0 */
#if ARCHIVE_VERSION_NUMBER < 3000000
const void *xval; /* For xattr tests. */
size_t xsize; /* For xattr tests. */
int c;
+ wchar_t wc;
+ long l;
assert((e = archive_entry_new()) != NULL);
archive_entry_copy_link_w(e, L"link3");
assertEqualString(archive_entry_hardlink(e), NULL);
assertEqualString(archive_entry_symlink(e), "link3");
- /* Arbitrarily override hardlink if both hardlink and symlink set. */
+ /* Arbitrarily override symlink if both hardlink and symlink set. */
archive_entry_set_hardlink(e, "hardlink");
archive_entry_set_symlink(e, "symlink");
archive_entry_set_link(e, "link");
/*
* Exercise the character-conversion logic, if we can.
*/
- failure("Can't exercise charset-conversion logic.");
- if (assert(NULL != setlocale(LC_ALL, "de_DE.UTF-8"))) {
+ if (NULL == setlocale(LC_ALL, "de_DE.UTF-8")) {
+ skipping("Can't exercise charset-conversion logic without"
+ " a suitable locale.");
+ } else {
/* A filename that cannot be converted to wide characters. */
archive_entry_copy_pathname(e, "abc\314\214mno\374xyz");
failure("Converting invalid chars to Unicode should fail.");
assert(NULL == archive_entry_symlink_w(e));
}
+ l = 0x12345678L;
+ wc = (wchar_t)l; /* Wide character too big for UTF-8. */
+ if (NULL == setlocale(LC_ALL, "C") || (long)wc != l) {
+ skipping("Testing charset conversion failure requires 32-bit wchar_t and support for \"C\" locale.");
+ } else {
+ /*
+ * Build the string L"xxx\U12345678yyy\u5678zzz" without
+ * using C99 \u#### syntax, which isn't uniformly
+ * supported. (GCC 3.4.6, for instance, defaults to
+ * "c89 plus GNU extensions.")
+ */
+ wcscpy(wbuff, L"xxxAyyyBzzz");
+ wbuff[3] = 0x12345678;
+ wbuff[7] = 0x5678;
+ /* A wide filename that cannot be converted to narrow. */
+ archive_entry_copy_pathname_w(e, wbuff);
+ failure("Converting wide characters from Unicode should fail.");
+ assertEqualString(NULL, archive_entry_pathname(e));
+ }
+
/* Release the experimental entry. */
archive_entry_free(e);
}
* stored and restored correctly, regardless of the encodings.
*/
-DEFINE_TEST(test_pax_filename_encoding)
+/*
+ * Read a manually-created archive that has filenames that are
+ * stored in binary instead of UTF-8 and verify that we get
+ * the right filename returned and that we get a warning only
+ * if the header isn't marked as binary.
+ */
+DEFINE_TEST(test_pax_filename_encoding_1)
{
static const char testname[] = "test_pax_filename_encoding.tar.gz";
- char buff[65536];
/*
* \314\214 is a valid 2-byte UTF-8 sequence.
* \374 is invalid in UTF-8.
*/
char filename[] = "abc\314\214mno\374xyz";
- char longname[] = "abc\314\214mno\374xyz"
- "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
- "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
- "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
- "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
- "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
- "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
- ;
- size_t used;
struct archive *a;
struct archive_entry *entry;
* in it, but the header is not marked as hdrcharset=BINARY, so that
* requires a warning.
*/
- failure("An invalid UTF8 pathname in a pax archive should be read\n"
- " without conversion but with a warning");
+ failure("Invalid UTF8 in a pax archive pathname should cause a warning");
assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
assertEqualString(filename, archive_entry_pathname(entry));
/*
assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry));
assertEqualString(filename, archive_entry_pathname(entry));
archive_read_finish(a);
+}
+
+/*
+ * Set the locale and write a pathname containing invalid characters.
+ * This should work; the underlying implementation should automatically
+ * fall back to storing the pathname in binary.
+ */
+DEFINE_TEST(test_pax_filename_encoding_2)
+{
+ char filename[] = "abc\314\214mno\374xyz";
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[65536];
+ char longname[] = "abc\314\214mno\374xyz"
+ "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+ "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+ "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+ "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+ "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+ "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+ ;
+ size_t used;
/*
* We need a starting locale which has invalid sequences.
* de_DE.UTF-8 seems to be commonly supported.
*/
/* If it doesn't exist, just warn and return. */
- failure("We need a suitable locale for the encoding tests.");
- if (!assert(NULL != setlocale(LC_ALL, "de_DE.UTF-8")))
+ if (NULL == setlocale(LC_ALL, "de_DE.UTF-8")) {
+ skipping("invalid encoding tests require a suitable locale;"
+ " de_DE.UTF-8 not available on this system");
return;
+ }
assert((a = archive_write_new()) != NULL);
assertEqualIntA(a, 0, archive_write_set_format_pax(a));
assertEqualInt(0, archive_read_finish(a));
}
+/*
+ * Create an entry starting from a wide-character Unicode pathname,
+ * read it back into "C" locale, which doesn't support the name.
+ * TODO: Figure out the "right" behavior here.
+ */
+DEFINE_TEST(test_pax_filename_encoding_3)
+{
+ wchar_t badname[] = L"xxxAyyyBzzz";
+ const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz";
+ struct archive *a;
+ struct archive_entry *entry;
+ char buff[65536];
+ size_t used;
+
+ badname[3] = 0x1234;
+ badname[7] = 0x5678;
+
+ /* If it doesn't exist, just warn and return. */
+ if (NULL == setlocale(LC_ALL, "C")) {
+ skipping("Can't set \"C\" locale, so can't exercise "
+ "certain character-conversion failures");
+ return;
+ }
+
+ assert((a = archive_write_new()) != NULL);
+ assertEqualIntA(a, 0, archive_write_set_format_pax(a));
+ assertEqualIntA(a, 0, archive_write_set_compression_none(a));
+ assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
+ assertEqualInt(0,
+ archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+ assert((entry = archive_entry_new()) != NULL);
+ /* Set pathname to non-convertible wide value. */
+ archive_entry_copy_pathname_w(entry, badname);
+ archive_entry_set_filetype(entry, AE_IFREG);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+
+ assert((entry = archive_entry_new()) != NULL);
+ archive_entry_copy_pathname_w(entry, L"abc");
+ /* Set gname to non-convertible wide value. */
+ archive_entry_copy_gname_w(entry, badname);
+ archive_entry_set_filetype(entry, AE_IFREG);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+
+ assert((entry = archive_entry_new()) != NULL);
+ archive_entry_copy_pathname_w(entry, L"abc");
+ /* Set uname to non-convertible wide value. */
+ archive_entry_copy_uname_w(entry, badname);
+ archive_entry_set_filetype(entry, AE_IFREG);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+
+ assert((entry = archive_entry_new()) != NULL);
+ archive_entry_copy_pathname_w(entry, L"abc");
+ /* Set hardlink to non-convertible wide value. */
+ archive_entry_copy_hardlink_w(entry, badname);
+ archive_entry_set_filetype(entry, AE_IFREG);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+
+ assert((entry = archive_entry_new()) != NULL);
+ archive_entry_copy_pathname_w(entry, L"abc");
+ /* Set symlink to non-convertible wide value. */
+ archive_entry_copy_symlink_w(entry, badname);
+ archive_entry_set_filetype(entry, AE_IFLNK);
+ assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+ archive_entry_free(entry);
+
+ assertEqualInt(0, archive_write_close(a));
+ assertEqualInt(0, archive_write_finish(a));
+
+ /*
+ * Now read the entries back.
+ */
+
+ assert((a = archive_read_new()) != NULL);
+ assertEqualInt(0, archive_read_support_format_tar(a));
+ assertEqualInt(0, archive_read_open_memory(a, buff, used));
+
+ failure("A non-convertible pathname should cause a warning.");
+ assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+ assertEqualWString(badname, archive_entry_pathname_w(entry));
+ failure("If native locale can't convert, we should get UTF-8 back.");
+ assertEqualString(badname_utf8, archive_entry_pathname(entry));
+
+ failure("A non-convertible gname should cause a warning.");
+ assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+ assertEqualWString(badname, archive_entry_gname_w(entry));
+ failure("If native locale can't convert, we should get UTF-8 back.");
+ assertEqualString(badname_utf8, archive_entry_gname(entry));
+
+ failure("A non-convertible uname should cause a warning.");
+ assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+ assertEqualWString(badname, archive_entry_uname_w(entry));
+ failure("If native locale can't convert, we should get UTF-8 back.");
+ assertEqualString(badname_utf8, archive_entry_uname(entry));
+
+ failure("A non-convertible hardlink should cause a warning.");
+ assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+ assertEqualWString(badname, archive_entry_hardlink_w(entry));
+ failure("If native locale can't convert, we should get UTF-8 back.");
+ assertEqualString(badname_utf8, archive_entry_hardlink(entry));
+
+ failure("A non-convertible symlink should cause a warning.");
+ assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+ assertEqualWString(badname, archive_entry_symlink_w(entry));
+ assertEqualWString(NULL, archive_entry_hardlink_w(entry));
+ failure("If native locale can't convert, we should get UTF-8 back.");
+ assertEqualString(badname_utf8, archive_entry_symlink(entry));
+
+ assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry));
+
+ assertEqualInt(0, archive_read_close(a));
+ assertEqualInt(0, archive_read_finish(a));
+}
struct archive_entry *ae;
struct archive *a;
size_t used;
- size_t prefix_length = 0;
- unsigned i = 0;
+ char *p;
+ int i;
+ p = filename;
if (prefix) {
strcpy(filename, prefix);
- i = prefix_length = strlen(prefix);
+ p += strlen(p);
}
- for (; i < prefix_length + dlen; i++)
- filename[i] = 'a';
- filename[i++] = '/';
- for (; i < prefix_length + dlen + flen + 1; i++)
- filename[i] = 'b';
- filename[i++] = '\0';
+ if (dlen > 0) {
+ for (i = 0; i < dlen; i++)
+ *p++ = 'a';
+ *p++ = '/';
+ }
+ for (i = 0; i < flen; i++)
+ *p++ = 'b';
+ *p = '\0';
strcpy(dirname, filename);
int dlen, flen;
/* Repeat the following for a variety of dir/file lengths. */
- for (dlen = 40; dlen < 60; dlen++) {
- for (flen = 40; flen < 60; flen++) {
+ for (dlen = 45; dlen < 55; dlen++) {
+ for (flen = 45; flen < 55; flen++) {
+ test_filename(NULL, dlen, flen);
+ test_filename("/", dlen, flen);
+ }
+ }
+
+ for (dlen = 0; dlen < 140; dlen += 10) {
+ for (flen = 98; flen < 102; flen++) {
test_filename(NULL, dlen, flen);
test_filename("/", dlen, flen);
}
}
for (dlen = 140; dlen < 160; dlen++) {
- for (flen = 90; flen < 110; flen++) {
+ for (flen = 95; flen < 105; flen++) {
test_filename(NULL, dlen, flen);
test_filename("/", dlen, flen);
}
archive_entry_copy_pathname(ae, namebuff);
archive_entry_set_mode(ae, S_IFREG | 0755);
filesize = tests[i];
+
+ if (filesize < 0) {
+ skipping("32-bit off_t doesn't permit testing of very large files.");
+ return;
+ }
archive_entry_set_size(ae, filesize);
assertA(0 == archive_write_header(a, ae));
char buff[4096];
char buff2[64];
-static unsigned char strtab[] = "abcdefghijklmn.o/\nggghhhjjjrrrttt.o/\niiijjjdddsssppp.o/\n";
+static char strtab[] = "abcdefghijklmn.o/\nggghhhjjjrrrttt.o/\niiijjjdddsssppp.o/\n";
DEFINE_TEST(test_write_format_ar)
{
# $FreeBSD: src/usr.bin/tar/Makefile,v 1.34 2008/03/18 06:18:49 kientzle Exp $
PROG= bsdtar
-BSDTAR_VERSION_STRING=2.5.0b
+BSDTAR_VERSION_STRING=2.5.1b
SRCS= bsdtar.c getdate.y matching.c read.c tree.c util.c write.c
WARNS?= 5
DPADD= ${LIBARCHIVE} ${LIBBZ2} ${LIBZ}
printf("bsdtar %s - %s\n",
BSDTAR_VERSION_STRING,
archive_version());
- exit(1);
+ exit(0);
}
static const char *long_help_msg =