From 9d2e02e2d879ef6c4dae68bf0791c335a35a7137 Mon Sep 17 00:00:00 2001
From: Tim Kientzle <kientzle@gmail.com>
Date: Wed, 30 Apr 2008 17:48:09 -0400
Subject: [PATCH] Synchronize a bunch of changes from my local tree.

SVN-Revision: 5
---
 libarchive/Makefile                           |  12 +-
 libarchive/archive_entry.c                    | 408 +++++++++++++-----
 libarchive/archive_entry.h                    | 314 +++++++++-----
 libarchive/archive_entry_link_resolver.c      | 386 ++++++++++++-----
 libarchive/archive_entry_private.h            |  26 +-
 libarchive/archive_platform.h                 |   3 +
 .../archive_read_support_format_iso9660.c     |   5 +
 libarchive/archive_read_support_format_tar.c  |  92 ++--
 libarchive/archive_read_support_format_zip.c  | 117 ++++-
 libarchive/archive_string.c                   | 255 ++++++++++-
 libarchive/archive_string.h                   |  26 +-
 libarchive/archive_util.c                     |  24 +-
 libarchive/archive_write_disk.c               |  10 +-
 libarchive/archive_write_set_format_pax.c     |  75 +++-
 libarchive/archive_write_set_format_ustar.c   |  28 +-
 libarchive/test/Makefile                      |  15 +-
 libarchive/test/main.c                        | 129 +++++-
 libarchive/test/test_acl_pax.c                |  12 +-
 libarchive/test/test_archive_api_feature.c    |  13 +-
 libarchive/test/test_entry.c                  |  30 +-
 libarchive/test/test_pax_filename_encoding.c  | 166 ++++++-
 libarchive/test/test_tar_filenames.c          |  34 +-
 libarchive/test/test_tar_large.c              |   5 +
 libarchive/test/test_write_format_ar.c        |   2 +-
 tar/Makefile                                  |   2 +-
 tar/bsdtar.c                                  |   2 +-
 26 files changed, 1662 insertions(+), 529 deletions(-)

diff --git a/libarchive/Makefile b/libarchive/Makefile
index 75078d790..585b89771 100644
--- a/libarchive/Makefile
+++ b/libarchive/Makefile
@@ -8,12 +8,12 @@ LDADD=	-lbz2 -lz
 # Version is three numbers:
 #  Major: Bumped ONLY when API/ABI breakage happens (see SHLIB_MAJOR)
 #  Minor: Bumped when significant new features are added
-#  Revision: Bumped on any notable change
+#  Revision: Bumped frequently.
 
 # The useful version number (one integer, easy to compare)
-LIBARCHIVE_VERSION= 2004012
+LIBARCHIVE_VERSION_NUMBER=2005001
 # The pretty version string
-LIBARCHIVE_VERSION_STRING!= echo $$((${LIBARCHIVE_VERSION} / 1000000)).$$((${LIBARCHIVE_VERSION} / 1000 % 1000)).$$((${LIBARCHIVE_VERSION} % 1000))
+LIBARCHIVE_VERSION_STRING=2.5.1b
 
 # FreeBSD SHLIB_MAJOR value is managed as part of the FreeBSD system.
 # It has no real relation to the version number above.
@@ -31,10 +31,8 @@ INCS=	archive.h archive_entry.h
 # Note: FreeBSD has inttypes.h, so enable that include in archive.h.in
 archive.h:	archive.h.in Makefile
 	cat ${.CURDIR}/archive.h.in | sed				\
-		-e 's/@LIBARCHIVE_VERSION@/${LIBARCHIVE_VERSION}/g'	\
-		-e 's/@LIBARCHIVE_VERSION_STRING@/${LIBARCHIVE_VERSION_STRING}/g' \
-		-e 's/@SHLIB_MAJOR@/${SHLIB_MAJOR}/g'			\
-		-e 's|@ARCHIVE_H_INCLUDE_INTTYPES_H@|#include <inttypes.h>  /* For int64_t */|g' \
+	   -e 's/@LIBARCHIVE_VERSION_NUMBER@/${LIBARCHIVE_VERSION_NUMBER}/g' \
+	   -e 's/@LIBARCHIVE_VERSION_STRING@/${LIBARCHIVE_VERSION_STRING}/g' \
 		> archive.h
 
 # archive.h needs to be cleaned
diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c
index 5f9e39a66..228f91ac6 100644
--- a/libarchive/archive_entry.c
+++ b/libarchive/archive_entry.c
@@ -91,15 +91,17 @@ static void	aes_clean(struct aes *);
 static void	aes_copy(struct aes *dest, struct aes *src);
 static const char *	aes_get_mbs(struct aes *);
 static const wchar_t *	aes_get_wcs(struct aes *);
-static void	aes_set_mbs(struct aes *, const char *mbs);
-static void	aes_copy_mbs(struct aes *, const char *mbs);
+static int	aes_set_mbs(struct aes *, const char *mbs);
+static int	aes_copy_mbs(struct aes *, const char *mbs);
 /* static void	aes_set_wcs(struct aes *, const wchar_t *wcs); */
-static void	aes_copy_wcs(struct aes *, const wchar_t *wcs);
-static void	aes_copy_wcs_len(struct aes *, const wchar_t *wcs, size_t);
+static int	aes_copy_wcs(struct aes *, const wchar_t *wcs);
+static int	aes_copy_wcs_len(struct aes *, const wchar_t *wcs, size_t);
 
 static char *	 ae_fflagstostr(unsigned long bitset, unsigned long bitclear);
 static const wchar_t	*ae_wcstofflags(const wchar_t *stringp,
 		    unsigned long *setp, unsigned long *clrp);
+static const char	*ae_strtofflags(const char *stringp,
+		    unsigned long *setp, unsigned long *clrp);
 static void	append_entry_w(wchar_t **wp, const wchar_t *prefix, int tag,
 		    const wchar_t *wname, int perm, int id);
 static void	append_id_w(wchar_t **wp, int id);
@@ -144,173 +146,216 @@ static size_t wcslen(const wchar_t *s)
 #define wmemcpy(a,b,i)  (wchar_t *)memcpy((a), (b), (i) * sizeof(wchar_t))
 #endif
 
-
 static void
 aes_clean(struct aes *aes)
 {
-	if (aes->aes_mbs_alloc) {
-		free(aes->aes_mbs_alloc);
-		aes->aes_mbs_alloc = NULL;
-	}
-	if (aes->aes_wcs_alloc) {
-		free(aes->aes_wcs_alloc);
-		aes->aes_wcs_alloc = NULL;
+	if (aes->aes_wcs) {
+		free((wchar_t *)(uintptr_t)aes->aes_wcs);
+		aes->aes_wcs = NULL;
 	}
-	memset(aes, 0, sizeof(*aes));
+	archive_string_free(&(aes->aes_mbs));
+	archive_string_free(&(aes->aes_utf8));
+	aes->aes_set = 0;
 }
 
 static void
 aes_copy(struct aes *dest, struct aes *src)
 {
-	*dest = *src;
-	if (src->aes_mbs != NULL) {
-		dest->aes_mbs_alloc = strdup(src->aes_mbs);
-		dest->aes_mbs = dest->aes_mbs_alloc;
-		if (dest->aes_mbs == NULL)
-			__archive_errx(1, "No memory for aes_copy()");
-	}
+	wchar_t *wp;
+
+	dest->aes_set = src->aes_set;
+	archive_string_copy(&(dest->aes_mbs), &(src->aes_mbs));
+	archive_string_copy(&(dest->aes_utf8), &(src->aes_utf8));
 
 	if (src->aes_wcs != NULL) {
-		dest->aes_wcs_alloc = (wchar_t *)malloc((wcslen(src->aes_wcs) + 1)
+		wp = (wchar_t *)malloc((wcslen(src->aes_wcs) + 1)
 		    * sizeof(wchar_t));
-		dest->aes_wcs = dest->aes_wcs_alloc;
-		if (dest->aes_wcs == NULL)
+		if (wp == NULL)
 			__archive_errx(1, "No memory for aes_copy()");
-		wcscpy(dest->aes_wcs_alloc, src->aes_wcs);
+		wcscpy(wp, src->aes_wcs);
+		dest->aes_wcs = wp;
+	}
+}
+
+static const char *
+aes_get_utf8(struct aes *aes)
+{
+	if (aes->aes_set & AES_SET_UTF8)
+		return (aes->aes_utf8.s);
+	if ((aes->aes_set & AES_SET_WCS)
+	    && archive_strappend_w_utf8(&(aes->aes_utf8), aes->aes_wcs) != NULL) {
+		aes->aes_set |= AES_SET_UTF8;
+		return (aes->aes_utf8.s);
 	}
+	return (NULL);
 }
 
 static const char *
 aes_get_mbs(struct aes *aes)
 {
-	if (aes->aes_mbs == NULL && aes->aes_wcs == NULL)
-		return NULL;
-	if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) {
-		/*
-		 * XXX Need to estimate the number of byte in the
-		 * multi-byte form.  Assume that, on average, wcs
-		 * chars encode to no more than 3 bytes.  There must
-		 * be a better way... XXX
-		 */
-		size_t mbs_length = wcslen(aes->aes_wcs) * 3 + 64;
-
-		aes->aes_mbs_alloc = (char *)malloc(mbs_length);
-		aes->aes_mbs = aes->aes_mbs_alloc;
-		if (aes->aes_mbs == NULL)
-			__archive_errx(1, "No memory for aes_get_mbs()");
-		wcstombs(aes->aes_mbs_alloc, aes->aes_wcs, mbs_length - 1);
-		aes->aes_mbs_alloc[mbs_length - 1] = 0;
+	/* If we already have an MBS form, return that immediately. */
+	if (aes->aes_set & AES_SET_MBS)
+		return (aes->aes_mbs.s);
+	/* If there's a WCS form, try converting with the native locale. */
+	if ((aes->aes_set & AES_SET_WCS)
+	    && archive_strappend_w_mbs(&(aes->aes_mbs), aes->aes_wcs) != NULL) {
+		aes->aes_set |= AES_SET_MBS;
+		return (aes->aes_mbs.s);
 	}
-	return (aes->aes_mbs);
+	/* We'll use UTF-8 for MBS if all else fails. */
+	return (aes_get_utf8(aes));
 }
 
 static const wchar_t *
 aes_get_wcs(struct aes *aes)
 {
+	wchar_t *w;
 	int r;
 
-	if (aes->aes_wcs == NULL && aes->aes_mbs == NULL)
-		return NULL;
-	if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) {
+	/* Return WCS form if we already have it. */
+	if (aes->aes_set & AES_SET_WCS)
+		return (aes->aes_wcs);
+
+	if (aes->aes_set & AES_SET_MBS) {
+		/* Try converting MBS to WCS using native locale. */
 		/*
 		 * No single byte will be more than one wide character,
 		 * so this length estimate will always be big enough.
 		 */
-		size_t wcs_length = strlen(aes->aes_mbs);
+		size_t wcs_length = aes->aes_mbs.length;
 
-		aes->aes_wcs_alloc
-		    = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t));
-		aes->aes_wcs = aes->aes_wcs_alloc;
-		if (aes->aes_wcs == NULL)
+		w = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t));
+		if (w == NULL)
 			__archive_errx(1, "No memory for aes_get_wcs()");
-		r = mbstowcs(aes->aes_wcs_alloc, aes->aes_mbs, wcs_length);
-		aes->aes_wcs_alloc[wcs_length] = 0;
-		if (r == -1) {
-			/* Conversion failed, don't lie to our clients. */
-			free(aes->aes_wcs_alloc);
-			aes->aes_wcs = aes->aes_wcs_alloc = NULL;
+		r = mbstowcs(w, aes->aes_mbs.s, wcs_length);
+		w[wcs_length] = 0;
+		if (r > 0) {
+			aes->aes_set |= AES_SET_WCS;
+			return (aes->aes_wcs = w);
 		}
+		free(w);
 	}
-	return (aes->aes_wcs);
+
+	if (aes->aes_set & AES_SET_UTF8) {
+		/* Try converting UTF8 to WCS. */
+		aes->aes_wcs = __archive_string_utf8_w(&(aes->aes_utf8));
+		aes->aes_set |= AES_SET_WCS;
+		return (aes->aes_wcs);
+	}
+	return (NULL);
 }
 
-static void
+static int
 aes_set_mbs(struct aes *aes, const char *mbs)
 {
-	if (aes->aes_mbs_alloc) {
-		free(aes->aes_mbs_alloc);
-		aes->aes_mbs_alloc = NULL;
-	}
-	if (aes->aes_wcs_alloc) {
-		free(aes->aes_wcs_alloc);
-		aes->aes_wcs_alloc = NULL;
-	}
-	aes->aes_mbs = mbs;
-	aes->aes_wcs = NULL;
+	return (aes_copy_mbs(aes, mbs));
 }
 
-static void
+static int
 aes_copy_mbs(struct aes *aes, const char *mbs)
 {
-	if (aes->aes_mbs_alloc) {
-		free(aes->aes_mbs_alloc);
-		aes->aes_mbs_alloc = NULL;
+	if (mbs == NULL) {
+		aes->aes_set = 0;
+		return (0);
 	}
-	if (aes->aes_wcs_alloc) {
-		free(aes->aes_wcs_alloc);
-		aes->aes_wcs_alloc = NULL;
+	aes->aes_set = AES_SET_MBS; /* Only MBS form is set now. */
+	archive_strcpy(&(aes->aes_mbs), mbs);
+	archive_string_empty(&(aes->aes_utf8));
+	if (aes->aes_wcs) {
+		free((wchar_t *)(uintptr_t)aes->aes_wcs);
+		aes->aes_wcs = NULL;
 	}
-	aes->aes_mbs_alloc = (char *)malloc((strlen(mbs) + 1) * sizeof(char));
-	if (aes->aes_mbs_alloc == NULL)
-		__archive_errx(1, "No memory for aes_copy_mbs()");
-	strcpy(aes->aes_mbs_alloc, mbs);
-	aes->aes_mbs = aes->aes_mbs_alloc;
-	aes->aes_wcs = NULL;
+	return (0);
 }
 
-#if 0
-static void
-aes_set_wcs(struct aes *aes, const wchar_t *wcs)
+/*
+ * The 'update' form tries to proactively update all forms of
+ * this string (WCS and MBS) and returns an error if any of
+ * them fail.  This is used by the 'pax' handler, for instance,
+ * to detect and report character-conversion failures early while
+ * still allowing clients to get potentially useful values from
+ * the more tolerant lazy conversions.  (get_mbs and get_wcs will
+ * strive to give the user something useful, so you can get hopefully
+ * usable values even if some of the character conversions are failing.)
+ */
+static int
+aes_update_utf8(struct aes *aes, const char *utf8)
 {
-	if (aes->aes_mbs_alloc) {
-		free(aes->aes_mbs_alloc);
-		aes->aes_mbs_alloc = NULL;
+	if (utf8 == NULL) {
+		aes->aes_set = 0;
+		return (1); /* Succeeded in clearing everything. */
 	}
-	if (aes->aes_wcs_alloc) {
-		free(aes->aes_wcs_alloc);
-		aes->aes_wcs_alloc = NULL;
+
+	/* Save the UTF8 string. */
+	archive_strcpy(&(aes->aes_utf8), utf8);
+
+	/* Empty the mbs and wcs strings. */
+	archive_string_empty(&(aes->aes_mbs));
+	if (aes->aes_wcs) {
+		free((wchar_t *)(uintptr_t)aes->aes_wcs);
+		aes->aes_wcs = NULL;
 	}
-	aes->aes_mbs = NULL;
-	aes->aes_wcs = wcs;
+
+	aes->aes_set = AES_SET_UTF8;	/* Only UTF8 is set now. */
+
+	/* TODO: We should just do a direct UTF-8 to MBS conversion
+	 * here.  That would be faster, use less space, and give the
+	 * same information.  (If a UTF-8 to MBS conversion succeeds,
+	 * then UTF-8->WCS and Unicode->MBS conversions will both
+	 * succeed.) */
+
+	/* Try converting UTF8 to WCS, return false on failure. */
+	aes->aes_wcs = __archive_string_utf8_w(&(aes->aes_utf8));
+	if (aes->aes_wcs == NULL)
+		return (0);
+	aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; /* Both UTF8 and WCS set. */
+
+	/* Try converting WCS to MBS, return false on failure. */
+	if (archive_strappend_w_mbs(&(aes->aes_mbs), aes->aes_wcs) == NULL)
+		return (0);
+	aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS;
+
+	/* All conversions succeeded. */
+	return (1);
 }
-#endif
 
-static void
+static int
 aes_copy_wcs(struct aes *aes, const wchar_t *wcs)
 {
-	aes_copy_wcs_len(aes, wcs, wcslen(wcs));
+	return aes_copy_wcs_len(aes, wcs, wcs == NULL ? 0 : wcslen(wcs));
 }
 
-static void
+static int
 aes_copy_wcs_len(struct aes *aes, const wchar_t *wcs, size_t len)
 {
-	if (aes->aes_mbs_alloc) {
-		free(aes->aes_mbs_alloc);
-		aes->aes_mbs_alloc = NULL;
+	wchar_t *w;
+
+	if (wcs == NULL) {
+		aes->aes_set = 0;
+		return (0);
 	}
-	if (aes->aes_wcs_alloc) {
-		free(aes->aes_wcs_alloc);
-		aes->aes_wcs_alloc = NULL;
+	aes->aes_set = AES_SET_WCS; /* Only WCS form set. */
+	archive_string_empty(&(aes->aes_mbs));
+	archive_string_empty(&(aes->aes_utf8));
+	if (aes->aes_wcs) {
+		free((wchar_t *)(uintptr_t)aes->aes_wcs);
+		aes->aes_wcs = NULL;
 	}
-	aes->aes_mbs = NULL;
-	aes->aes_wcs_alloc = (wchar_t *)malloc((len + 1) * sizeof(wchar_t));
-	if (aes->aes_wcs_alloc == NULL)
+	w = (wchar_t *)malloc((len + 1) * sizeof(wchar_t));
+	if (w == NULL)
 		__archive_errx(1, "No memory for aes_copy_wcs()");
-	wmemcpy(aes->aes_wcs_alloc, wcs, len);
-	aes->aes_wcs_alloc[len] = L'\0';
-	aes->aes_wcs = aes->aes_wcs_alloc;
+	wmemcpy(w, wcs, len);
+	w[len] = L'\0';
+	aes->aes_wcs = w;
+	return (0);
 }
 
+/****************************************************************************
+ *
+ * Public Interface
+ *
+ ****************************************************************************/
+
 struct archive_entry *
 archive_entry_clear(struct archive_entry *entry)
 {
@@ -350,6 +395,8 @@ archive_entry_clone(struct archive_entry *entry)
 	aes_copy(&entry2->ae_hardlink, &entry->ae_hardlink);
 	aes_copy(&entry2->ae_pathname, &entry->ae_pathname);
 	aes_copy(&entry2->ae_symlink, &entry->ae_symlink);
+	entry2->ae_hardlinkset = entry->ae_hardlinkset;
+	entry2->ae_symlinkset = entry->ae_symlinkset;
 	aes_copy(&entry2->ae_uname, &entry->ae_uname);
 
 	/* Copy ACL data over. */
@@ -515,12 +562,16 @@ archive_entry_gname_w(struct archive_entry *entry)
 const char *
 archive_entry_hardlink(struct archive_entry *entry)
 {
+	if (!entry->ae_hardlinkset)
+		return (NULL);
 	return (aes_get_mbs(&entry->ae_hardlink));
 }
 
 const wchar_t *
 archive_entry_hardlink_w(struct archive_entry *entry)
 {
+	if (!entry->ae_hardlinkset)
+		return (NULL);
 	return (aes_get_wcs(&entry->ae_hardlink));
 }
 
@@ -600,15 +651,25 @@ archive_entry_size(struct archive_entry *entry)
 	return (entry->ae_stat.aest_size);
 }
 
+const char *
+archive_entry_sourcepath(struct archive_entry *entry)
+{
+	return (aes_get_mbs(&entry->ae_sourcepath));
+}
+
 const char *
 archive_entry_symlink(struct archive_entry *entry)
 {
+	if (!entry->ae_symlinkset)
+		return (NULL);
 	return (aes_get_mbs(&entry->ae_symlink));
 }
 
 const wchar_t *
 archive_entry_symlink_w(struct archive_entry *entry)
 {
+	if (!entry->ae_symlinkset)
+		return (NULL);
 	return (aes_get_wcs(&entry->ae_symlink));
 }
 
@@ -651,6 +712,15 @@ archive_entry_set_fflags(struct archive_entry *entry,
 	entry->ae_fflags_clear = clear;
 }
 
+const char *
+archive_entry_copy_fflags_text(struct archive_entry *entry,
+    const char *flags)
+{
+	aes_copy_mbs(&entry->ae_fflags_text, flags);
+	return (ae_strtofflags(flags,
+		    &entry->ae_fflags_set, &entry->ae_fflags_clear));
+}
+
 const wchar_t *
 archive_entry_copy_fflags_text_w(struct archive_entry *entry,
     const wchar_t *flags)
@@ -685,6 +755,12 @@ archive_entry_copy_gname_w(struct archive_entry *entry, const wchar_t *name)
 	aes_copy_wcs(&entry->ae_gname, name);
 }
 
+int
+archive_entry_update_gname_utf8(struct archive_entry *entry, const char *name)
+{
+	return (aes_update_utf8(&entry->ae_gname, name));
+}
+
 void
 archive_entry_set_ino(struct archive_entry *entry, unsigned long ino)
 {
@@ -696,18 +772,24 @@ void
 archive_entry_set_hardlink(struct archive_entry *entry, const char *target)
 {
 	aes_set_mbs(&entry->ae_hardlink, target);
+	if (target != NULL)
+		entry->ae_hardlinkset = 1;
 }
 
 void
 archive_entry_copy_hardlink(struct archive_entry *entry, const char *target)
 {
 	aes_copy_mbs(&entry->ae_hardlink, target);
+	if (target != NULL)
+		entry->ae_hardlinkset = 1;
 }
 
 void
 archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target)
 {
 	aes_copy_wcs(&entry->ae_hardlink, target);
+	if (target != NULL)
+		entry->ae_hardlinkset = 1;
 }
 
 void
@@ -754,8 +836,7 @@ archive_entry_set_devminor(struct archive_entry *entry, dev_t m)
 void
 archive_entry_set_link(struct archive_entry *entry, const char *target)
 {
-	if (entry->ae_symlink.aes_mbs != NULL ||
-	    entry->ae_symlink.aes_wcs != NULL)
+	if (entry->ae_symlinkset)
 		aes_set_mbs(&entry->ae_symlink, target);
 	else
 		aes_set_mbs(&entry->ae_hardlink, target);
@@ -765,8 +846,7 @@ archive_entry_set_link(struct archive_entry *entry, const char *target)
 void
 archive_entry_copy_link(struct archive_entry *entry, const char *target)
 {
-	if (entry->ae_symlink.aes_mbs != NULL ||
-	    entry->ae_symlink.aes_wcs != NULL)
+	if (entry->ae_symlinkset)
 		aes_copy_mbs(&entry->ae_symlink, target);
 	else
 		aes_copy_mbs(&entry->ae_hardlink, target);
@@ -776,13 +856,21 @@ archive_entry_copy_link(struct archive_entry *entry, const char *target)
 void
 archive_entry_copy_link_w(struct archive_entry *entry, const wchar_t *target)
 {
-	if (entry->ae_symlink.aes_mbs != NULL ||
-	    entry->ae_symlink.aes_wcs != NULL)
+	if (entry->ae_symlinkset)
 		aes_copy_wcs(&entry->ae_symlink, target);
 	else
 		aes_copy_wcs(&entry->ae_hardlink, target);
 }
 
+int
+archive_entry_update_link_utf8(struct archive_entry *entry, const char *target)
+{
+	if (entry->ae_symlinkset)
+		return (aes_update_utf8(&entry->ae_symlink, target));
+	else
+		return (aes_update_utf8(&entry->ae_hardlink, target));
+}
+
 void
 archive_entry_set_mode(struct archive_entry *entry, mode_t m)
 {
@@ -823,6 +911,12 @@ archive_entry_copy_pathname_w(struct archive_entry *entry, const wchar_t *name)
 	aes_copy_wcs(&entry->ae_pathname, name);
 }
 
+int
+archive_entry_update_pathname_utf8(struct archive_entry *entry, const char *name)
+{
+	return (aes_update_utf8(&entry->ae_pathname, name));
+}
+
 void
 archive_entry_set_perm(struct archive_entry *entry, mode_t p)
 {
@@ -862,22 +956,34 @@ archive_entry_set_size(struct archive_entry *entry, int64_t s)
 	entry->ae_stat.aest_size = s;
 }
 
+void
+archive_entry_copy_sourcepath(struct archive_entry *entry, const char *path)
+{
+	aes_set_mbs(&entry->ae_sourcepath, path);
+}
+
 void
 archive_entry_set_symlink(struct archive_entry *entry, const char *linkname)
 {
 	aes_set_mbs(&entry->ae_symlink, linkname);
+	if (linkname != NULL)
+		entry->ae_symlinkset = 1;
 }
 
 void
 archive_entry_copy_symlink(struct archive_entry *entry, const char *linkname)
 {
 	aes_copy_mbs(&entry->ae_symlink, linkname);
+	if (linkname != NULL)
+		entry->ae_symlinkset = 1;
 }
 
 void
 archive_entry_copy_symlink_w(struct archive_entry *entry, const wchar_t *linkname)
 {
 	aes_copy_wcs(&entry->ae_symlink, linkname);
+	if (linkname != NULL)
+		entry->ae_symlinkset = 1;
 }
 
 void
@@ -905,6 +1011,12 @@ archive_entry_copy_uname_w(struct archive_entry *entry, const wchar_t *name)
 	aes_copy_wcs(&entry->ae_uname, name);
 }
 
+int
+archive_entry_update_uname_utf8(struct archive_entry *entry, const char *name)
+{
+	return (aes_update_utf8(&entry->ae_uname, name));
+}
+
 /*
  * ACL management.  The following would, of course, be a lot simpler
  * if: 1) the last draft of POSIX.1e were a really thorough and
@@ -1744,7 +1856,7 @@ static struct flag {
  *	Convert file flags to a comma-separated string.  If no flags
  *	are set, return the empty string.
  */
-char *
+static char *
 ae_fflagstostr(unsigned long bitset, unsigned long bitclear)
 {
 	char *string, *dp;
@@ -1788,6 +1900,70 @@ ae_fflagstostr(unsigned long bitset, unsigned long bitclear)
 	return (string);
 }
 
+/*
+ * strtofflags --
+ *	Take string of arguments and return file flags.  This
+ *	version works a little differently than strtofflags(3).
+ *	In particular, it always tests every token, skipping any
+ *	unrecognized tokens.  It returns a pointer to the first
+ *	unrecognized token, or NULL if every token was recognized.
+ *	This version is also const-correct and does not modify the
+ *	provided string.
+ */
+static const char *
+ae_strtofflags(const char *s, unsigned long *setp, unsigned long *clrp)
+{
+	const char *start, *end;
+	struct flag *flag;
+	unsigned long set, clear;
+	const char *failed;
+
+	set = clear = 0;
+	start = s;
+	failed = NULL;
+	/* Find start of first token. */
+	while (*start == '\t'  ||  *start == ' '  ||  *start == ',')
+		start++;
+	while (*start != '\0') {
+		/* Locate end of token. */
+		end = start;
+		while (*end != '\0'  &&  *end != '\t'  &&
+		    *end != ' '  &&  *end != ',')
+			end++;
+		for (flag = flags; flag->wname != NULL; flag++) {
+			if (memcmp(start, flag->wname, end - start) == 0) {
+				/* Matched "noXXXX", so reverse the sense. */
+				clear |= flag->set;
+				set |= flag->clear;
+				break;
+			} else if (memcmp(start, flag->wname + 2, end - start)
+			    == 0) {
+				/* Matched "XXXX", so don't reverse. */
+				set |= flag->set;
+				clear |= flag->clear;
+				break;
+			}
+		}
+		/* Ignore unknown flag names. */
+		if (flag->wname == NULL  &&  failed == NULL)
+			failed = start;
+
+		/* Find start of next token. */
+		start = end;
+		while (*start == '\t'  ||  *start == ' '  ||  *start == ',')
+			start++;
+
+	}
+
+	if (setp)
+		*setp = set;
+	if (clrp)
+		*clrp = clear;
+
+	/* Return location of first failure. */
+	return (failed);
+}
+
 /*
  * wcstofflags --
  *	Take string of arguments and return file flags.  This
@@ -1798,7 +1974,7 @@ ae_fflagstostr(unsigned long bitset, unsigned long bitclear)
  *	This version is also const-correct and does not modify the
  *	provided string.
  */
-const wchar_t *
+static const wchar_t *
 ae_wcstofflags(const wchar_t *s, unsigned long *setp, unsigned long *clrp)
 {
 	const wchar_t *start, *end;
diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h
index 3bfe9e912..5f7771ca7 100644
--- a/libarchive/archive_entry.h
+++ b/libarchive/archive_entry.h
@@ -31,17 +31,56 @@
 #include <sys/types.h>
 #include <stddef.h>  /* for wchar_t */
 #include <time.h>
+
+/* Get appropriate definitions of standard POSIX-style types. */
+/* These should match the types used in 'struct stat' */
+#ifdef _WIN32
+#define	__LA_UID_T	unsigned int
+#define	__LA_GID_T	unsigned int
+#define	__LA_INO_T	unsigned int
+#define	__LA_DEV_T	unsigned int
+#define	__LA_MODE_T	unsigned short
+#else
 #include <unistd.h>
+#define	__LA_UID_T	uid_t
+#define	__LA_GID_T	gid_t
+#define	__LA_INO_T	ino_t
+#define	__LA_DEV_T	dev_t
+#define	__LA_MODE_T	mode_t
+#endif
+
+/*
+ * On Windows, define LIBARCHIVE_STATIC if you're building or using a
+ * .lib.  The default here assumes you're building a DLL.  Only
+ * libarchive source should ever define __LIBARCHIVE_BUILD.
+ */
+#if ((defined __WIN32__) || (defined _WIN32)) && (!defined LIBARCHIVE_STATIC)
+# ifdef __LIBARCHIVE_BUILD
+#  ifdef __GNUC__
+#   define __LA_DECL	__attribute__((dllexport)) extern
+#  else
+#   define __LA_DECL	__declspec(dllexport)
+#  endif
+# else
+#  ifdef __GNUC__
+#   define __LA_DECL	__attribute__((dllimport)) extern
+#  else
+#   define __LA_DECL	__declspec(dllimport)
+#  endif
+# endif
+#else
+/* Static libraries on all platforms and shared libraries on non-Windows. */
+# define __LA_DECL
+#endif
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-
 /*
  * Description of an archive entry.
  *
- * Basically, a "struct stat" with a few text fields added in.
+ * You can think of this as "struct stat" with some text fields added in.
  *
  * TODO: Add "comment", "charset", and possibly other entries that are
  * supported by "pax interchange" format.  However, GNU, ustar, cpio,
@@ -90,50 +129,51 @@ struct archive_entry;
  * Basic object manipulation
  */
 
-struct archive_entry	*archive_entry_clear(struct archive_entry *);
+__LA_DECL struct archive_entry	*archive_entry_clear(struct archive_entry *);
 /* The 'clone' function does a deep copy; all of the strings are copied too. */
-struct archive_entry	*archive_entry_clone(struct archive_entry *);
-void			 archive_entry_free(struct archive_entry *);
-struct archive_entry	*archive_entry_new(void);
+__LA_DECL struct archive_entry	*archive_entry_clone(struct archive_entry *);
+__LA_DECL void			 archive_entry_free(struct archive_entry *);
+__LA_DECL struct archive_entry	*archive_entry_new(void);
 
 /*
  * Retrieve fields from an archive_entry.
  */
 
-time_t			 archive_entry_atime(struct archive_entry *);
-long			 archive_entry_atime_nsec(struct archive_entry *);
-time_t			 archive_entry_ctime(struct archive_entry *);
-long			 archive_entry_ctime_nsec(struct archive_entry *);
-dev_t			 archive_entry_dev(struct archive_entry *);
-dev_t			 archive_entry_devmajor(struct archive_entry *);
-dev_t			 archive_entry_devminor(struct archive_entry *);
-mode_t			 archive_entry_filetype(struct archive_entry *);
-void			 archive_entry_fflags(struct archive_entry *,
+__LA_DECL time_t	 archive_entry_atime(struct archive_entry *);
+__LA_DECL long		 archive_entry_atime_nsec(struct archive_entry *);
+__LA_DECL time_t	 archive_entry_ctime(struct archive_entry *);
+__LA_DECL long		 archive_entry_ctime_nsec(struct archive_entry *);
+__LA_DECL dev_t		 archive_entry_dev(struct archive_entry *);
+__LA_DECL dev_t		 archive_entry_devmajor(struct archive_entry *);
+__LA_DECL dev_t		 archive_entry_devminor(struct archive_entry *);
+__LA_DECL __LA_MODE_T	 archive_entry_filetype(struct archive_entry *);
+__LA_DECL void		 archive_entry_fflags(struct archive_entry *,
 			    unsigned long * /* set */,
 			    unsigned long * /* clear */);
-const char		*archive_entry_fflags_text(struct archive_entry *);
-gid_t			 archive_entry_gid(struct archive_entry *);
-const char		*archive_entry_gname(struct archive_entry *);
-const wchar_t		*archive_entry_gname_w(struct archive_entry *);
-const char		*archive_entry_hardlink(struct archive_entry *);
-const wchar_t		*archive_entry_hardlink_w(struct archive_entry *);
-ino_t			 archive_entry_ino(struct archive_entry *);
-mode_t			 archive_entry_mode(struct archive_entry *);
-time_t			 archive_entry_mtime(struct archive_entry *);
-long			 archive_entry_mtime_nsec(struct archive_entry *);
-unsigned int		 archive_entry_nlink(struct archive_entry *);
-const char		*archive_entry_pathname(struct archive_entry *);
-const wchar_t		*archive_entry_pathname_w(struct archive_entry *);
-dev_t			 archive_entry_rdev(struct archive_entry *);
-dev_t			 archive_entry_rdevmajor(struct archive_entry *);
-dev_t			 archive_entry_rdevminor(struct archive_entry *);
-int64_t			 archive_entry_size(struct archive_entry *);
-const char		*archive_entry_strmode(struct archive_entry *);
-const char		*archive_entry_symlink(struct archive_entry *);
-const wchar_t		*archive_entry_symlink_w(struct archive_entry *);
-uid_t			 archive_entry_uid(struct archive_entry *);
-const char		*archive_entry_uname(struct archive_entry *);
-const wchar_t		*archive_entry_uname_w(struct archive_entry *);
+__LA_DECL const char	*archive_entry_fflags_text(struct archive_entry *);
+__LA_DECL __LA_GID_T	 archive_entry_gid(struct archive_entry *);
+__LA_DECL const char	*archive_entry_gname(struct archive_entry *);
+__LA_DECL const wchar_t	*archive_entry_gname_w(struct archive_entry *);
+__LA_DECL const char	*archive_entry_hardlink(struct archive_entry *);
+__LA_DECL const wchar_t	*archive_entry_hardlink_w(struct archive_entry *);
+__LA_DECL __LA_INO_T	 archive_entry_ino(struct archive_entry *);
+__LA_DECL __LA_MODE_T	 archive_entry_mode(struct archive_entry *);
+__LA_DECL time_t	 archive_entry_mtime(struct archive_entry *);
+__LA_DECL long		 archive_entry_mtime_nsec(struct archive_entry *);
+__LA_DECL unsigned int	 archive_entry_nlink(struct archive_entry *);
+__LA_DECL const char	*archive_entry_pathname(struct archive_entry *);
+__LA_DECL const wchar_t	*archive_entry_pathname_w(struct archive_entry *);
+__LA_DECL dev_t		 archive_entry_rdev(struct archive_entry *);
+__LA_DECL dev_t		 archive_entry_rdevmajor(struct archive_entry *);
+__LA_DECL dev_t		 archive_entry_rdevminor(struct archive_entry *);
+__LA_DECL const char	*archive_entry_sourcepath(struct archive_entry *);
+__LA_DECL int64_t	 archive_entry_size(struct archive_entry *);
+__LA_DECL const char	*archive_entry_strmode(struct archive_entry *);
+__LA_DECL const char	*archive_entry_symlink(struct archive_entry *);
+__LA_DECL const wchar_t	*archive_entry_symlink_w(struct archive_entry *);
+__LA_DECL __LA_UID_T	 archive_entry_uid(struct archive_entry *);
+__LA_DECL const char	*archive_entry_uname(struct archive_entry *);
+__LA_DECL const wchar_t	*archive_entry_uname_w(struct archive_entry *);
 
 /*
  * Set fields in an archive_entry.
@@ -142,48 +182,54 @@ const wchar_t		*archive_entry_uname_w(struct archive_entry *);
  * In contrast, 'copy' functions do copy the object pointed to.
  */
 
-void	archive_entry_set_atime(struct archive_entry *, time_t, long);
-void	archive_entry_set_ctime(struct archive_entry *, time_t, long);
-void	archive_entry_set_dev(struct archive_entry *, dev_t);
-void	archive_entry_set_devmajor(struct archive_entry *, dev_t);
-void	archive_entry_set_devminor(struct archive_entry *, dev_t);
-void	archive_entry_set_filetype(struct archive_entry *, unsigned int);
-void	archive_entry_set_fflags(struct archive_entry *,
+__LA_DECL void	archive_entry_set_atime(struct archive_entry *, time_t, long);
+__LA_DECL void	archive_entry_set_ctime(struct archive_entry *, time_t, long);
+__LA_DECL void	archive_entry_set_dev(struct archive_entry *, dev_t);
+__LA_DECL void	archive_entry_set_devmajor(struct archive_entry *, dev_t);
+__LA_DECL void	archive_entry_set_devminor(struct archive_entry *, dev_t);
+__LA_DECL void	archive_entry_set_filetype(struct archive_entry *, unsigned int);
+__LA_DECL void	archive_entry_set_fflags(struct archive_entry *,
 	    unsigned long /* set */, unsigned long /* clear */);
 /* Returns pointer to start of first invalid token, or NULL if none. */
 /* Note that all recognized tokens are processed, regardless. */
-const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *,
+__LA_DECL const char *archive_entry_copy_fflags_text(struct archive_entry *,
+	    const char *);
+__LA_DECL const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *,
 	    const wchar_t *);
-void	archive_entry_set_gid(struct archive_entry *, gid_t);
-void	archive_entry_set_gname(struct archive_entry *, const char *);
-void	archive_entry_copy_gname(struct archive_entry *, const char *);
-void	archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *);
-void	archive_entry_set_hardlink(struct archive_entry *, const char *);
-void	archive_entry_copy_hardlink(struct archive_entry *, const char *);
-void	archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *);
-void	archive_entry_set_ino(struct archive_entry *, unsigned long);
-void	archive_entry_set_link(struct archive_entry *, const char *);
-void	archive_entry_copy_link(struct archive_entry *, const char *);
-void	archive_entry_copy_link_w(struct archive_entry *, const wchar_t *);
-void	archive_entry_set_mode(struct archive_entry *, mode_t);
-void	archive_entry_set_mtime(struct archive_entry *, time_t, long);
-void	archive_entry_set_nlink(struct archive_entry *, unsigned int);
-void	archive_entry_set_pathname(struct archive_entry *, const char *);
-void	archive_entry_copy_pathname(struct archive_entry *, const char *);
-void	archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *);
-void	archive_entry_set_perm(struct archive_entry *, mode_t);
-void	archive_entry_set_rdev(struct archive_entry *, dev_t);
-void	archive_entry_set_rdevmajor(struct archive_entry *, dev_t);
-void	archive_entry_set_rdevminor(struct archive_entry *, dev_t);
-void	archive_entry_set_size(struct archive_entry *, int64_t);
-void	archive_entry_set_symlink(struct archive_entry *, const char *);
-void	archive_entry_copy_symlink(struct archive_entry *, const char *);
-void	archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *);
-void	archive_entry_set_uid(struct archive_entry *, uid_t);
-void	archive_entry_set_uname(struct archive_entry *, const char *);
-void	archive_entry_copy_uname(struct archive_entry *, const char *);
-void	archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
-
+__LA_DECL void	archive_entry_set_gid(struct archive_entry *, __LA_GID_T);
+__LA_DECL void	archive_entry_set_gname(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_gname(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int	archive_entry_update_gname_utf8(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_set_hardlink(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_hardlink(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *);
+__LA_DECL void	archive_entry_set_ino(struct archive_entry *, unsigned long);
+__LA_DECL void	archive_entry_set_link(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_link(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_link_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int	archive_entry_update_link_utf8(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_set_mode(struct archive_entry *, __LA_MODE_T);
+__LA_DECL void	archive_entry_set_mtime(struct archive_entry *, time_t, long);
+__LA_DECL void	archive_entry_set_nlink(struct archive_entry *, unsigned int);
+__LA_DECL void	archive_entry_set_pathname(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_pathname(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int	archive_entry_update_pathname_utf8(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_set_perm(struct archive_entry *, __LA_MODE_T);
+__LA_DECL void	archive_entry_set_rdev(struct archive_entry *, dev_t);
+__LA_DECL void	archive_entry_set_rdevmajor(struct archive_entry *, dev_t);
+__LA_DECL void	archive_entry_set_rdevminor(struct archive_entry *, dev_t);
+__LA_DECL void	archive_entry_set_size(struct archive_entry *, int64_t);
+__LA_DECL void	archive_entry_copy_sourcepath(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_set_symlink(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_symlink(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *);
+__LA_DECL void	archive_entry_set_uid(struct archive_entry *, __LA_UID_T);
+__LA_DECL void	archive_entry_set_uname(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_uname(struct archive_entry *, const char *);
+__LA_DECL void	archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int	archive_entry_update_uname_utf8(struct archive_entry *, const char *);
 /*
  * Routines to bulk copy fields to/from a platform-native "struct
  * stat."  Libarchive used to just store a struct stat inside of each
@@ -193,8 +239,8 @@ void	archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
  *
  * TODO: On Linux, provide both stat32 and stat64 versions of these functions.
  */
-const struct stat	*archive_entry_stat(struct archive_entry *);
-void	archive_entry_copy_stat(struct archive_entry *, const struct stat *);
+__LA_DECL const struct stat	*archive_entry_stat(struct archive_entry *);
+__LA_DECL void	archive_entry_copy_stat(struct archive_entry *, const struct stat *);
 
 /*
  * ACL routines.  This used to simply store and return text-format ACL
@@ -242,11 +288,11 @@ void	archive_entry_copy_stat(struct archive_entry *, const struct stat *);
  * POSIX.1e) is useful for handling archive formats that combine
  * default and access information in a single ACL list.
  */
-void	 archive_entry_acl_clear(struct archive_entry *);
-void	 archive_entry_acl_add_entry(struct archive_entry *,
+__LA_DECL void	 archive_entry_acl_clear(struct archive_entry *);
+__LA_DECL void	 archive_entry_acl_add_entry(struct archive_entry *,
 	    int /* type */, int /* permset */, int /* tag */,
 	    int /* qual */, const char * /* name */);
-void	 archive_entry_acl_add_entry_w(struct archive_entry *,
+__LA_DECL void	 archive_entry_acl_add_entry_w(struct archive_entry *,
 	    int /* type */, int /* permset */, int /* tag */,
 	    int /* qual */, const wchar_t * /* name */);
 
@@ -255,11 +301,11 @@ void	 archive_entry_acl_add_entry_w(struct archive_entry *,
  * "next" entry.  The want_type parameter allows you to request only
  * access entries or only default entries.
  */
-int	 archive_entry_acl_reset(struct archive_entry *, int /* want_type */);
-int	 archive_entry_acl_next(struct archive_entry *, int /* want_type */,
+__LA_DECL int	 archive_entry_acl_reset(struct archive_entry *, int /* want_type */);
+__LA_DECL int	 archive_entry_acl_next(struct archive_entry *, int /* want_type */,
 	    int * /* type */, int * /* permset */, int * /* tag */,
 	    int * /* qual */, const char ** /* name */);
-int	 archive_entry_acl_next_w(struct archive_entry *, int /* want_type */,
+__LA_DECL int	 archive_entry_acl_next_w(struct archive_entry *, int /* want_type */,
 	    int * /* type */, int * /* permset */, int * /* tag */,
 	    int * /* qual */, const wchar_t ** /* name */);
 
@@ -276,11 +322,11 @@ int	 archive_entry_acl_next_w(struct archive_entry *, int /* want_type */,
  */
 #define	ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID	1024
 #define	ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT	2048
-const wchar_t	*archive_entry_acl_text_w(struct archive_entry *,
+__LA_DECL const wchar_t	*archive_entry_acl_text_w(struct archive_entry *,
 		    int /* flags */);
 
 /* Return a count of entries matching 'want_type' */
-int	 archive_entry_acl_count(struct archive_entry *, int /* want_type */);
+__LA_DECL int	 archive_entry_acl_count(struct archive_entry *, int /* want_type */);
 
 /*
  * Private ACL parser.  This is private because it handles some
@@ -295,15 +341,15 @@ int	 archive_entry_acl_count(struct archive_entry *, int /* want_type */);
  * TODO: Move this declaration out of the public header and into
  * a private header.  Warnings above are silly.
  */
-int		 __archive_entry_acl_parse_w(struct archive_entry *,
+__LA_DECL int		 __archive_entry_acl_parse_w(struct archive_entry *,
 		    const wchar_t *, int /* type */);
 
 /*
  * extended attributes
  */
 
-void	 archive_entry_xattr_clear(struct archive_entry *);
-void	 archive_entry_xattr_add_entry(struct archive_entry *,
+__LA_DECL void	 archive_entry_xattr_clear(struct archive_entry *);
+__LA_DECL void	 archive_entry_xattr_add_entry(struct archive_entry *,
 	    const char * /* name */, const void * /* value */,
 	    size_t /* size */);
 
@@ -312,37 +358,93 @@ void	 archive_entry_xattr_add_entry(struct archive_entry *,
  * "next" entry.
  */
 
-int	archive_entry_xattr_count(struct archive_entry *);
-int	archive_entry_xattr_reset(struct archive_entry *);
-int	archive_entry_xattr_next(struct archive_entry *,
+__LA_DECL int	archive_entry_xattr_count(struct archive_entry *);
+__LA_DECL int	archive_entry_xattr_reset(struct archive_entry *);
+__LA_DECL int	archive_entry_xattr_next(struct archive_entry *,
 	    const char ** /* name */, const void ** /* value */, size_t *);
 
 /*
- * Utility to detect hardlinks.
+ * Utility to match up hardlinks.
  *
- * The 'struct archive_hardlink_lookup' is a cache of entry
- * names and dev/ino numbers.  Here's how to use it:
- *   1. Create a lookup object with archive_hardlink_lookup_new()
- *   2. Hand each archive_entry to archive_hardlink_lookup().
- *      That function will return NULL (this is not a hardlink to
- *      a previous entry) or the pathname of the first entry
- *      that matched this.
- *   3. Use archive_hardlink_lookup_free() to release the cache.
+ * The 'struct archive_entry_linkresolver' is a cache of archive entries
+ * for files with multiple links.  Here's how to use it:
+ *   1. Create a lookup object with archive_entry_linkresolver_new()
+ *   2. Tell it the archive format you're using.
+ *   3. Hand each archive_entry to archive_entry_linkify().
+ *      That function will return 0, 1, or 2 entries that should
+ *      be written.
+ *   4. Call archive_entry_linkify(resolver, NULL) until
+ *      no more entries are returned.
+ *   5. Call archive_entry_link_resolver_free(resolver) to free resources.
+ *
+ * The entries returned have their hardlink and size fields updated
+ * appropriately.  If an entry is passed in that does not refer to
+ * a file with multiple links, it is returned unchanged.  The intention
+ * is that you should be able to simply filter all entries through
+ * this machine.
  *
  * To make things more efficient, be sure that each entry has a valid
  * nlinks value.  The hardlink cache uses this to track when all links
  * have been found.  If the nlinks value is zero, it will keep every
  * name in the cache indefinitely, which can use a lot of memory.
+ *
+ * Note that archive_entry_size() is reset to zero if the file
+ * body should not be written to the archive.  Pay attention!
  */
-struct archive_entry_linkresolver;
+__LA_DECL struct archive_entry_linkresolver;
 
-struct archive_entry_linkresolver *archive_entry_linkresolver_new(void);
-void archive_entry_linkresolver_free(struct archive_entry_linkresolver *);
-const char *archive_entry_linkresolve(struct archive_entry_linkresolver *,
-    struct archive_entry *);
+/*
+ * There are three different strategies for marking hardlinks.
+ * The descriptions below name them after the best-known
+ * formats that rely on each strategy:
+ *
+ * "Old cpio" is the simplest, it always returns any entry unmodified.
+ *    As far as I know, only cpio formats use this.  Old cpio archives
+ *    store every link with the full body; the onus is on the dearchiver
+ *    to detect and properly link the files as they are restored.
+ * "tar" is also pretty simple; it caches a copy the first time it sees
+ *    any link.  Subsequent appearances are modified to be hardlink
+ *    references to the first one without any body.  Used by all tar
+ *    formats, although the newest tar formats permit the "old cpio" strategy
+ *    as well.  This strategy is very simple for the dearchiver,
+ *    and reasonably straightforward for the archiver.
+ * "new cpio" is trickier.  It stores the body only with the last
+ *    occurrence.  The complication is that we might not
+ *    see every link to a particular file in a single session, so
+ *    there's no easy way to know when we've seen the last occurrence.
+ *    The solution here is to queue one link until we see the next.
+ *    At the end of the session, you can enumerate any remaining
+ *    entries by calling archive_entry_linkify(NULL) and store those
+ *    bodies.  If you have a file with three links l1, l2, and l3,
+ *    you'll get the following behavior if you see all three links:
+ *           linkify(l1) => NULL   (the resolver stores l1 internally)
+ *           linkify(l2) => l1     (resolver stores l2, you write l1)
+ *           linkify(l3) => l2, l3 (all links seen, you can write both).
+ *    If you only see l1 and l2, you'll get this behavior:
+ *           linkify(l1) => NULL
+ *           linkify(l2) => l1
+ *           linkify(NULL) => l2   (at end, you retrieve remaining links)
+ *    As the name suggests, this strategy is used by newer cpio variants.
+ *    It's noticably more complex for the archiver, slightly more complex
+ *    for the dearchiver than the tar strategy, but makes it straightforward
+ *    to restore a file using any link by simply continuing to scan until
+ *    you see a link that is stored with a body.  In contrast, the tar
+ *    strategy requires you to rescan the archive from the beginning to
+ *    correctly extract an arbitrary link.
+ */
+
+__LA_DECL struct archive_entry_linkresolver *archive_entry_linkresolver_new(void);
+__LA_DECL void archive_entry_linkresolver_set_strategy(
+	struct archive_entry_linkresolver *, int /* format_code */);
+__LA_DECL void archive_entry_linkresolver_free(struct archive_entry_linkresolver *);
+__LA_DECL void archive_entry_linkify(struct archive_entry_linkresolver *,
+    struct archive_entry **, struct archive_entry **);
 
 #ifdef __cplusplus
 }
 #endif
 
+/* This is meaningless outside of this header. */
+#undef __LA_DECL
+
 #endif /* !ARCHIVE_ENTRY_H_INCLUDED */
diff --git a/libarchive/archive_entry_link_resolver.c b/libarchive/archive_entry_link_resolver.c
index 78a3c65d0..0df9ff92e 100644
--- a/libarchive/archive_entry_link_resolver.c
+++ b/libarchive/archive_entry_link_resolver.c
@@ -40,135 +40,216 @@ __FBSDID("$FreeBSD: src/lib/libarchive/archive_entry_link_resolver.c,v 1.1 2007/
 #include <string.h>
 #endif
 
+#include "archive.h"
 #include "archive_entry.h"
 
+/*
+ * This is mostly a pretty straightforward hash table implementation.
+ * The only interesting bit is the different strategies used to
+ * match up links.  These strategies match those used by various
+ * archiving formats:
+ *   tar - content stored with first link, remainder refer back to it.
+ *       This requires us to match each subsequent link up with the
+ *       first appearance.
+ *   cpio - Old cpio just stored body with each link, match-ups were
+ *       implicit.  This is trivial.
+ *   new cpio - New cpio only stores body with last link, match-ups
+ *       are implicit.  This is actually quite tricky; see the notes
+ *       below.
+ */
+
+/* Users pass us a format code, we translate that into a strategy here. */
+#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR	0
+#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 1
+#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 2
+
 /* Initial size of link cache. */
 #define	links_cache_initial_size 1024
 
+struct links_entry {
+	struct links_entry	*next;
+	struct links_entry	*previous;
+	int			 links; /* # links not yet seen */
+	int			 hash;
+	struct archive_entry	*canonical;
+	struct archive_entry	*entry;
+};
+
 struct archive_entry_linkresolver {
-	char			 *last_name;
+	struct links_entry	**buckets;
+	struct links_entry	 *spare;
 	unsigned long		  number_entries;
 	size_t			  number_buckets;
-	struct links_entry	**buckets;
+	int			  strategy;
 };
 
-struct links_entry {
-	struct links_entry	*next;
-	struct links_entry	*previous;
-	int			 links;
-	dev_t			 dev;
-	ino_t			 ino;
-	char			*name;
-};
+static struct links_entry *find_entry(struct archive_entry_linkresolver *,
+		    struct archive_entry *);
+static void grow_hash(struct archive_entry_linkresolver *);
+static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
+		    struct archive_entry *);
+static struct links_entry *next_entry(struct archive_entry_linkresolver *);
 
 struct archive_entry_linkresolver *
 archive_entry_linkresolver_new(void)
 {
-	struct archive_entry_linkresolver *links_cache;
+	struct archive_entry_linkresolver *res;
 	size_t i;
 
-	links_cache = malloc(sizeof(struct archive_entry_linkresolver));
-	if (links_cache == NULL)
+	res = malloc(sizeof(struct archive_entry_linkresolver));
+	if (res == NULL)
 		return (NULL);
-	memset(links_cache, 0, sizeof(struct archive_entry_linkresolver));
-	links_cache->number_buckets = links_cache_initial_size;
-	links_cache->buckets = malloc(links_cache->number_buckets *
-	    sizeof(links_cache->buckets[0]));
-	if (links_cache->buckets == NULL) {
-		free(links_cache);
+	memset(res, 0, sizeof(struct archive_entry_linkresolver));
+	res->number_buckets = links_cache_initial_size;
+	res->buckets = malloc(res->number_buckets *
+	    sizeof(res->buckets[0]));
+	if (res->buckets == NULL) {
+		free(res);
 		return (NULL);
 	}
-	for (i = 0; i < links_cache->number_buckets; i++)
-		links_cache->buckets[i] = NULL;
-	return (links_cache);
+	for (i = 0; i < res->number_buckets; i++)
+		res->buckets[i] = NULL;
+	return (res);
 }
 
 void
-archive_entry_linkresolver_free(struct archive_entry_linkresolver *links_cache)
+archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
+    int fmt)
 {
-	size_t i;
+	int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
+
+	switch (fmtbase) {
+	case ARCHIVE_FORMAT_CPIO:
+		switch (fmt) {
+		case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
+		case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
+			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
+			break;
+		default:
+			res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
+			break;
+		}
+		break;
+	case ARCHIVE_FORMAT_TAR:
+		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
+		break;
+	default:
+		res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
+		break;
+	}
+}
+
+void
+archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
+{
+	struct links_entry *le;
+
+	if (res->buckets != NULL) {
+		while ((le = next_entry(res)) != NULL)
+			archive_entry_free(le->entry);
+		free(res->buckets);
+		res->buckets = NULL;
+	}
+	free(res);
+}
+
+void
+archive_entry_linkify(struct archive_entry_linkresolver *res,
+    struct archive_entry **e, struct archive_entry **f)
+{
+	struct links_entry *le;
+	struct archive_entry *t;
+
+	*f = NULL; /* Default: Don't return a second entry. */
+
+	if (*e == NULL) {
+		le = next_entry(res);
+		if (le != NULL)
+			*e = le->entry;
+		return;
+	}
 
-	if (links_cache->buckets == NULL)
+	/* If it has only one link, then we're done. */
+	if (archive_entry_nlink(*e) == 1)
 		return;
 
-	for (i = 0; i < links_cache->number_buckets; i++) {
-		while (links_cache->buckets[i] != NULL) {
-			struct links_entry *lp = links_cache->buckets[i]->next;
-			if (links_cache->buckets[i]->name != NULL)
-				free(links_cache->buckets[i]->name);
-			free(links_cache->buckets[i]);
-			links_cache->buckets[i] = lp;
+	switch (res->strategy) {
+	case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
+		le = find_entry(res, *e);
+		if (le != NULL) {
+			archive_entry_set_size(*e, 0);
+			archive_entry_set_hardlink(*e,
+			    archive_entry_pathname(le->canonical));
+		} else
+			insert_entry(res, *e);
+		return;
+	case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
+		/* This one is trivial. */
+		return;
+	case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
+		le = find_entry(res, *e);
+		if (le != NULL) {
+			/*
+			 * Put the new entry in le, return the
+			 * old entry from le.
+			 */
+			t = *e;
+			*e = le->entry;
+			le->entry = t;
+			/* Make the old entry into a hardlink. */
+			archive_entry_set_size(*e, 0);
+			archive_entry_set_hardlink(*e,
+			    archive_entry_pathname(le->canonical));
+			/* If we ran out of links, return the
+			 * final entry as well. */
+			if (le->links == 0)
+				*f = le->entry;
+		} else {
+			/*
+			 * If we haven't seen it, tuck it away
+			 * for future use.
+			 */
+			le = insert_entry(res, *e);
+			le->entry = *e;
+			*e = NULL;
 		}
+		return;
+	default:
+		break;
 	}
-	free(links_cache->buckets);
-	links_cache->buckets = NULL;
+	return;
 }
 
-const char *
-archive_entry_linkresolve(struct archive_entry_linkresolver *links_cache,
+static struct links_entry *
+find_entry(struct archive_entry_linkresolver *res,
     struct archive_entry *entry)
 {
-	struct links_entry	*le, **new_buckets;
-	int			 hash;
-	size_t			 i, new_size;
+	struct links_entry	*le;
+	int			 hash, bucket;
 	dev_t			 dev;
 	ino_t			 ino;
-	int			 nlinks;
-
 
-	/* Free a held name. */
-	free(links_cache->last_name);
-	links_cache->last_name = NULL;
+	/* Free a held entry. */
+	if (res->spare != NULL) {
+		archive_entry_free(res->spare->canonical);
+		free(res->spare);
+		res->spare = NULL;
+	}
 
 	/* If the links cache overflowed and got flushed, don't bother. */
-	if (links_cache->buckets == NULL)
+	if (res->buckets == NULL)
 		return (NULL);
 
 	dev = archive_entry_dev(entry);
 	ino = archive_entry_ino(entry);
-	nlinks = archive_entry_nlink(entry);
-
-	/* An entry with one link can't be a hard link. */
-	if (nlinks == 1)
-		return (NULL);
-
-	/* If the links cache is getting too full, enlarge the hash table. */
-	if (links_cache->number_entries > links_cache->number_buckets * 2)
-	{
-		/* Try to enlarge the bucket list. */
-		new_size = links_cache->number_buckets * 2;
-		new_buckets = malloc(new_size * sizeof(struct links_entry *));
-
-		if (new_buckets != NULL) {
-			memset(new_buckets, 0,
-			    new_size * sizeof(struct links_entry *));
-			for (i = 0; i < links_cache->number_buckets; i++) {
-				while (links_cache->buckets[i] != NULL) {
-					/* Remove entry from old bucket. */
-					le = links_cache->buckets[i];
-					links_cache->buckets[i] = le->next;
-
-					/* Add entry to new bucket. */
-					hash = (le->dev ^ le->ino) % new_size;
-
-					if (new_buckets[hash] != NULL)
-						new_buckets[hash]->previous =
-						    le;
-					le->next = new_buckets[hash];
-					le->previous = NULL;
-					new_buckets[hash] = le;
-				}
-			}
-			free(links_cache->buckets);
-			links_cache->buckets = new_buckets;
-			links_cache->number_buckets = new_size;
-		}
-	}
+	hash = dev ^ ino;
 
 	/* Try to locate this entry in the links cache. */
-	hash = ( dev ^ ino ) % links_cache->number_buckets;
-	for (le = links_cache->buckets[hash]; le != NULL; le = le->next) {
-		if (le->dev == dev && le->ino == ino) {
+	bucket = hash % res->number_buckets;
+	for (le = res->buckets[bucket]; le != NULL; le = le->next) {
+		if (le->hash == hash
+		    && dev == archive_entry_dev(le->entry)
+		    && ino == archive_entry_ino(le->entry)) {
 			/*
 			 * Decrement link count each time and release
 			 * the entry if it hits zero.  This saves
@@ -177,46 +258,123 @@ archive_entry_linkresolve(struct archive_entry_linkresolver *links_cache,
 			 */
 			--le->links;
 			if (le->links > 0)
-				return (le->name);
-			/*
-			 * When we release the entry, save the name
-			 * until the next call.
-			 */
-			links_cache->last_name = le->name;
-			/*
-			 * Release the entry.
-			 */
+				return (le);
+			/* Remove it from this hash bucket. */
 			if (le->previous != NULL)
 				le->previous->next = le->next;
 			if (le->next != NULL)
 				le->next->previous = le->previous;
-			if (links_cache->buckets[hash] == le)
-				links_cache->buckets[hash] = le->next;
-			links_cache->number_entries--;
-			free(le);
-			return (links_cache->last_name);
+			if (res->buckets[bucket] == le)
+				res->buckets[bucket] = le->next;
+			res->number_entries--;
+			/* Defer freeing this entry. */
+			res->spare = le;
+			return (le);
 		}
 	}
+	return (NULL);
+}
+
+static struct links_entry *
+next_entry(struct archive_entry_linkresolver *res)
+{
+	struct links_entry	*le;
+	size_t			 bucket;
+
+	/* Free a held entry. */
+	if (res->spare != NULL) {
+		archive_entry_free(res->spare->canonical);
+		free(res->spare);
+		res->spare = NULL;
+	}
+
+	/* If the links cache overflowed and got flushed, don't bother. */
+	if (res->buckets == NULL)
+		return (NULL);
+
+	/* Look for next non-empty bucket in the links cache. */
+	for (bucket = 0; bucket < res->number_buckets; bucket++) {
+		le = res->buckets[bucket];
+		if (le != NULL) {
+			/* Remove it from this hash bucket. */
+			if (le->next != NULL)
+				le->next->previous = le->previous;
+			res->buckets[bucket] = le->next;
+			res->number_entries--;
+			/* Defer freeing this entry. */
+			res->spare = le;
+			return (le);
+		}
+	}
+	return (NULL);
+}
+
+static struct links_entry *
+insert_entry(struct archive_entry_linkresolver *res,
+    struct archive_entry *entry)
+{
+	struct links_entry *le;
+	int			 hash, bucket;
 
 	/* Add this entry to the links cache. */
 	le = malloc(sizeof(struct links_entry));
 	if (le == NULL)
 		return (NULL);
-	le->name = strdup(archive_entry_pathname(entry));
-	if (le->name == NULL) {
-		free(le);
-		return (NULL);
-	}
+	le->entry = entry;
+
+	/* If the links cache is getting too full, enlarge the hash table. */
+	if (res->number_entries > res->number_buckets * 2)
+		grow_hash(res);
+
+	hash = archive_entry_dev(entry) ^ archive_entry_ino(entry);
+	bucket = hash % res->number_buckets;
 
 	/* If we could allocate the entry, record it. */
-	if (links_cache->buckets[hash] != NULL)
-		links_cache->buckets[hash]->previous = le;
-	links_cache->number_entries++;
-	le->next = links_cache->buckets[hash];
+	if (res->buckets[bucket] != NULL)
+		res->buckets[bucket]->previous = le;
+	res->number_entries++;
+	le->next = res->buckets[bucket];
 	le->previous = NULL;
-	links_cache->buckets[hash] = le;
-	le->dev = dev;
-	le->ino = ino;
-	le->links = nlinks - 1;
-	return (NULL);
+	res->buckets[bucket] = le;
+	le->hash = hash;
+	le->links = archive_entry_nlink(entry) - 1;
+	le->canonical = archive_entry_clone(entry);
+	return (le);
+}
+
+static void
+grow_hash(struct archive_entry_linkresolver *res)
+{
+	struct links_entry *le, **new_buckets;
+	size_t new_size;
+	size_t i, bucket;
+
+	/* Try to enlarge the bucket list. */
+	new_size = res->number_buckets * 2;
+	new_buckets = malloc(new_size * sizeof(struct links_entry *));
+
+	if (new_buckets != NULL) {
+		memset(new_buckets, 0,
+		    new_size * sizeof(struct links_entry *));
+		for (i = 0; i < res->number_buckets; i++) {
+			while (res->buckets[i] != NULL) {
+				/* Remove entry from old bucket. */
+				le = res->buckets[i];
+				res->buckets[i] = le->next;
+
+				/* Add entry to new bucket. */
+				bucket = le->hash % new_size;
+
+				if (new_buckets[bucket] != NULL)
+					new_buckets[bucket]->previous =
+					    le;
+				le->next = new_buckets[bucket];
+				le->previous = NULL;
+				new_buckets[bucket] = le;
+			}
+		}
+		free(res->buckets);
+		res->buckets = new_buckets;
+		res->number_buckets = new_size;
+	}
 }
diff --git a/libarchive/archive_entry_private.h b/libarchive/archive_entry_private.h
index 0d368a4dd..f893fb982 100644
--- a/libarchive/archive_entry_private.h
+++ b/libarchive/archive_entry_private.h
@@ -28,17 +28,25 @@
 #ifndef ARCHIVE_ENTRY_PRIVATE_H_INCLUDED
 #define	ARCHIVE_ENTRY_PRIVATE_H_INCLUDED
 
+#include "archive_string.h"
+
 /*
  * Handle wide character (i.e., Unicode) and non-wide character
  * strings transparently.
- *
  */
 
 struct aes {
-	const char *aes_mbs;
-	char *aes_mbs_alloc;
+	struct archive_string aes_mbs;
+	struct archive_string aes_utf8;
 	const wchar_t *aes_wcs;
-	wchar_t *aes_wcs_alloc;
+	/* Bitmap of which of the above are valid.  Because we're lazy
+	 * about malloc-ing and reusing the underlying storage, we
+	 * can't rely on NULL pointers to indicate whether a string
+	 * has been set. */
+	int aes_set;
+#define	AES_SET_MBS 1
+#define	AES_SET_UTF8 2
+#define	AES_SET_WCS 4
 };
 
 struct ae_acl {
@@ -128,8 +136,6 @@ struct archive_entry {
 		dev_t		aest_rdevminor;
 	} ae_stat;
 
-
-
 	/*
 	 * Use aes here so that we get transparent mbs<->wcs conversions.
 	 */
@@ -141,15 +147,23 @@ struct archive_entry {
 	struct aes ae_pathname;	/* Name of entry */
 	struct aes ae_symlink;		/* symlink contents */
 	struct aes ae_uname;		/* Name of owner */
+	unsigned char	ae_hardlinkset;
+	unsigned char	ae_symlinkset;
+
+	/* Not used within libarchive; useful for some clients. */
+	struct aes ae_sourcepath;	/* Path this entry is sourced from. */
 
+	/* ACL support. */
 	struct ae_acl	*acl_head;
 	struct ae_acl	*acl_p;
 	int		 acl_state;	/* See acl_next for details. */
 	wchar_t		*acl_text_w;
 
+	/* extattr support. */
 	struct ae_xattr *xattr_head;
 	struct ae_xattr *xattr_p;
 
+	/* Miscellaneous. */
 	char		 strmode[12];
 };
 
diff --git a/libarchive/archive_platform.h b/libarchive/archive_platform.h
index b14ccd820..41fd4e549 100644
--- a/libarchive/archive_platform.h
+++ b/libarchive/archive_platform.h
@@ -36,6 +36,9 @@
 #ifndef ARCHIVE_PLATFORM_H_INCLUDED
 #define	ARCHIVE_PLATFORM_H_INCLUDED
 
+/* archive.h and archive_entry.h require this. */
+#define	__LIBARCHIVE_BUILD 1
+
 #ifdef _WIN32
 #include "config_windows.h"
 #include "archive_windows.h"
diff --git a/libarchive/archive_read_support_format_iso9660.c b/libarchive/archive_read_support_format_iso9660.c
index d333f0ccb..ee9831b90 100644
--- a/libarchive/archive_read_support_format_iso9660.c
+++ b/libarchive/archive_read_support_format_iso9660.c
@@ -908,6 +908,11 @@ fprintf(stderr, " *** Discarding CE data.\n");
 			file->ce_size = 0;
 		}
 
+		/* Don't waste time seeking for zero-length bodies. */
+		if (file->size == 0) {
+			file->offset = iso9660->current_position;
+		}
+
 		/* If CE exists, find and read it now. */
 		if (file->ce_offset > 0)
 			offset = file->ce_offset;
diff --git a/libarchive/archive_read_support_format_tar.c b/libarchive/archive_read_support_format_tar.c
index 76fda2d63..147ec0b27 100644
--- a/libarchive/archive_read_support_format_tar.c
+++ b/libarchive/archive_read_support_format_tar.c
@@ -145,6 +145,8 @@ struct sparse_block {
 struct tar {
 	struct archive_string	 acl_text;
 	struct archive_string	 entry_pathname;
+	/* For "GNU.sparse.name" and other similar path extensions. */
+	struct archive_string	 entry_pathname_override;
 	struct archive_string	 entry_linkpath;
 	struct archive_string	 entry_uname;
 	struct archive_string	 entry_gname;
@@ -272,6 +274,7 @@ archive_read_format_tar_cleanup(struct archive_read *a)
 	gnu_clear_sparse_list(tar);
 	archive_string_free(&tar->acl_text);
 	archive_string_free(&tar->entry_pathname);
+	archive_string_free(&tar->entry_pathname_override);
 	archive_string_free(&tar->entry_linkpath);
 	archive_string_free(&tar->entry_uname);
 	archive_string_free(&tar->entry_gname);
@@ -1174,7 +1177,6 @@ pax_header(struct archive_read *a, struct tar *tar,
 	size_t attr_length, l, line_length;
 	char *line, *p;
 	char *key, *value;
-	wchar_t *wp;
 	int err, err2;
 
 	attr_length = strlen(attr);
@@ -1182,6 +1184,7 @@ pax_header(struct archive_read *a, struct tar *tar,
 	archive_string_empty(&(tar->entry_gname));
 	archive_string_empty(&(tar->entry_linkpath));
 	archive_string_empty(&(tar->entry_pathname));
+	archive_string_empty(&(tar->entry_pathname_override));
 	archive_string_empty(&(tar->entry_uname));
 	err = ARCHIVE_OK;
 	while (attr_length > 0) {
@@ -1257,13 +1260,13 @@ pax_header(struct archive_read *a, struct tar *tar,
 		if (tar->pax_hdrcharset_binary)
 			archive_entry_copy_gname(entry, value);
 		else {
-			wp = utf8_decode(tar, value, strlen(value));
-			if (wp == NULL) {
-				archive_entry_copy_gname(entry, value);
-				if (err > ARCHIVE_WARN)
-					err = ARCHIVE_WARN;
-			} else
-				archive_entry_copy_gname_w(entry, wp);
+			if (!archive_entry_update_gname_utf8(entry, value)) {
+				err = ARCHIVE_WARN;
+				archive_set_error(&a->archive,
+				    ARCHIVE_ERRNO_FILE_FORMAT,
+				    "Gname in pax header can't "
+				    "be converted to current locale.");
+			}
 		}
 	}
 	if (archive_strlen(&(tar->entry_linkpath)) > 0) {
@@ -1271,27 +1274,40 @@ pax_header(struct archive_read *a, struct tar *tar,
 		if (tar->pax_hdrcharset_binary)
 			archive_entry_copy_link(entry, value);
 		else {
-			wp = utf8_decode(tar, value, strlen(value));
-			if (wp == NULL) {
-				archive_entry_copy_link(entry, value);
-				if (err > ARCHIVE_WARN)
-					err = ARCHIVE_WARN;
-			} else
-				archive_entry_copy_link_w(entry, wp);
+			if (!archive_entry_update_link_utf8(entry, value)) {
+				err = ARCHIVE_WARN;
+				archive_set_error(&a->archive,
+				    ARCHIVE_ERRNO_FILE_FORMAT,
+				    "Linkname in pax header can't "
+				    "be converted to current locale.");
+			}
 		}
 	}
-	if (archive_strlen(&(tar->entry_pathname)) > 0) {
+	/*
+	 * Some extensions (such as the GNU sparse file extensions)
+	 * deliberately store a synthetic name under the regular 'path'
+	 * attribute and the real file name under a different attribute.
+	 * Since we're supposed to not care about the order, we
+	 * have no choice but to store all of the various filenames
+	 * we find and figure it all out afterwards.  This is the
+	 * figuring out part.
+	 */
+	value = NULL;
+	if (archive_strlen(&(tar->entry_pathname_override)) > 0)
+		value = tar->entry_pathname_override.s;
+	else if (archive_strlen(&(tar->entry_pathname)) > 0)
 		value = tar->entry_pathname.s;
+	if (value != NULL) {
 		if (tar->pax_hdrcharset_binary)
 			archive_entry_copy_pathname(entry, value);
 		else {
-			wp = utf8_decode(tar, value, strlen(value));
-			if (wp == NULL) {
-				archive_entry_copy_pathname(entry, value);
-				if (err > ARCHIVE_WARN)
-					err = ARCHIVE_WARN;
-			} else
-				archive_entry_copy_pathname_w(entry, wp);
+			if (!archive_entry_update_pathname_utf8(entry, value)) {
+				err = ARCHIVE_WARN;
+				archive_set_error(&a->archive,
+				    ARCHIVE_ERRNO_FILE_FORMAT,
+				    "Pathname in pax header can't be "
+				    "converted to current locale.");
+			}
 		}
 	}
 	if (archive_strlen(&(tar->entry_uname)) > 0) {
@@ -1299,13 +1315,13 @@ pax_header(struct archive_read *a, struct tar *tar,
 		if (tar->pax_hdrcharset_binary)
 			archive_entry_copy_uname(entry, value);
 		else {
-			wp = utf8_decode(tar, value, strlen(value));
-			if (wp == NULL) {
-				archive_entry_copy_uname(entry, value);
-				if (err > ARCHIVE_WARN)
-					err = ARCHIVE_WARN;
-			} else
-				archive_entry_copy_uname_w(entry, wp);
+			if (!archive_entry_update_uname_utf8(entry, value)) {
+				err = ARCHIVE_WARN;
+				archive_set_error(&a->archive,
+				    ARCHIVE_ERRNO_FILE_FORMAT,
+				    "Uname in pax header can't "
+				    "be converted to current locale.");
+			}
 		}
 	}
 	return (err);
@@ -1415,11 +1431,13 @@ pax_attribute(struct tar *tar, struct archive_entry *entry,
 			tar->sparse_gnu_pending = 1;
 		}
 		if (strcmp(key, "GNU.sparse.name") == 0) {
-			wp = utf8_decode(tar, value, strlen(value));
-			if (wp != NULL)
-				archive_entry_copy_pathname_w(entry, wp);
-			else
-				archive_entry_copy_pathname(entry, value);
+			/*
+			 * The real filename; when storing sparse
+			 * files, GNU tar puts a synthesized name into
+			 * the regular 'path' attribute in an attempt
+			 * to limit confusion. ;-)
+			 */
+			archive_strcpy(&(tar->entry_pathname_override), value);
 		}
 		if (strcmp(key, "GNU.sparse.realsize") == 0) {
 			tar->realsize = tar_atol10(value, strlen(value));
@@ -1455,9 +1473,7 @@ pax_attribute(struct tar *tar, struct archive_entry *entry,
 			archive_entry_set_rdevminor(entry,
 			    tar_atol10(value, strlen(value)));
 		} else if (strcmp(key, "SCHILY.fflags")==0) {
-			wp = utf8_decode(tar, value, strlen(value));
-			/* TODO: if (wp == NULL) */
-			archive_entry_copy_fflags_text_w(entry, wp);
+			archive_entry_copy_fflags_text(entry, value);
 		} else if (strcmp(key, "SCHILY.dev")==0) {
 			archive_entry_set_dev(entry,
 			    tar_atol10(value, strlen(value)));
diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c
index 3c951313d..a105297ad 100644
--- a/libarchive/archive_read_support_format_zip.c
+++ b/libarchive/archive_read_support_format_zip.c
@@ -162,11 +162,9 @@ archive_read_support_format_zip(struct archive *_a)
 static int
 archive_read_format_zip_bid(struct archive_read *a)
 {
-	int bid = 0;
 	const char *p;
-
-	if (a->archive.archive_format == ARCHIVE_FORMAT_ZIP)
-		bid += 1;
+	const void *buff;
+	size_t bytes_avail;
 
 	if ((p = __archive_read_ahead(a, 4)) == NULL)
 		return (-1);
@@ -184,9 +182,104 @@ archive_read_format_zip_bid(struct archive_read *a)
 		    || (p[2] == '0' && p[3] == '0'))
 			return (30);
 	}
+
+	/*
+	 * Attempt to handle self-extracting archives
+	 * by noting a PE header and searching forward
+	 * up to 64k for a 'PK\003\004' marker.
+	 */
+	if (p[0] == 'M' && p[1] == 'Z') {
+		/*
+		 * TODO: Additional checks that this really is a PE
+		 * file before we invoke the 128k lookahead below.
+		 * No point in allocating a bigger lookahead buffer
+		 * if we don't need to.
+		 */
+		/*
+		 * TODO: Of course, the compression layer lookahead
+		 * buffers aren't dynamically sized yet; they should be.
+		 */
+		bytes_avail = (a->decompressor->read_ahead)(a, &buff, 128*1024);
+		p = (const char *)buff;
+
+		/*
+		 * TODO: Optimize by jumping forward based on values
+		 * in the PE header.  Note that we don't need to be
+		 * exact, but we mustn't skip too far.  The search
+		 * below will compensate if we undershoot.  Skipping
+		 * will also reduce the chance of false positives
+		 * (which is not really all that high to begin with,
+		 * so maybe skipping isn't really necessary).
+		 */
+
+		while (p < bytes_avail + (const char *)buff) {
+			if (p[0] == 'P' && p[1] == 'K' /* "PK" signature */
+			    && p[2] == 3 && p[3] == 4 /* File entry */
+			    && p[8] == 8 /* compression == deflate */
+			    && p[9] == 0 /* High byte of compression */
+				)
+			{
+				return (30);
+			}
+			++p;
+		}
+	}
+
 	return (0);
 }
 
+/*
+ * Search forward for a "PK\003\004" file header.  This handles the
+ * case of self-extracting archives, where there is an executable
+ * prepended to the ZIP archive.
+ */
+static int
+skip_sfx(struct archive_read *a)
+{
+	const void *h;
+	const char *p, *q;
+	size_t skip, bytes;
+
+	/*
+	 * TODO: We should be able to skip forward by a bunch
+	 * by lifting some values from the PE header.  We don't
+	 * need to be exact (we're still going to search forward
+	 * to find the header), but it will speed things up and
+	 * reduce the chance of a false positive.
+	 */
+	for (;;) {
+		bytes = (a->decompressor->read_ahead)(a, &h, 4096);
+		if (bytes < 4)
+			return (ARCHIVE_FATAL);
+		p = h;
+		q = p + bytes;
+
+		/*
+		 * Scan ahead until we find something that looks
+		 * like the zip header.
+		 */
+		while (p + 4 < q) {
+			switch (p[3]) {
+			case '\004':
+				/* TODO: Additional verification here. */
+				if (memcmp("PK\003\004", p, 4) == 0) {
+					skip = p - (const char *)h;
+					(a->decompressor->consume)(a, skip);
+					return (ARCHIVE_OK);
+				}
+				p += 4;
+				break;
+			case '\003': p += 1; break;
+			case 'K': p += 2; break;
+			case 'P': p += 3; break;
+			default: p += 4; break;
+			}
+		}
+		skip = p - (const char *)h;
+		(a->decompressor->consume)(a, skip);
+	}
+}
+
 static int
 archive_read_format_zip_read_header(struct archive_read *a,
     struct archive_entry *entry)
@@ -194,6 +287,7 @@ archive_read_format_zip_read_header(struct archive_read *a,
 	const void *h;
 	const char *signature;
 	struct zip *zip;
+	int r = ARCHIVE_OK, r1;
 
 	a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
 	if (a->archive.archive_format_name == NULL)
@@ -209,6 +303,16 @@ archive_read_format_zip_read_header(struct archive_read *a,
 		return (ARCHIVE_FATAL);
 
 	signature = (const char *)h;
+	if (signature[0] == 'M' && signature[1] == 'Z') {
+		/* This is an executable?  Must be self-extracting... */
+		r = skip_sfx(a);
+		if (r < ARCHIVE_WARN)
+			return (r);
+		if ((h = __archive_read_ahead(a, 4)) == NULL)
+			return (ARCHIVE_FATAL);
+		signature = (const char *)h;
+	}
+
 	if (signature[0] != 'P' || signature[1] != 'K') {
 		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 		    "Bad ZIP file");
@@ -239,7 +343,10 @@ archive_read_format_zip_read_header(struct archive_read *a,
 
 	if (signature[2] == '\003' && signature[3] == '\004') {
 		/* Regular file entry. */
-		return (zip_read_file_header(a, entry, zip));
+		r1 = zip_read_file_header(a, entry, zip);
+		if (r1 != ARCHIVE_OK)
+			return (r1);
+		return (r);
 	}
 
 	if (signature[2] == '\005' && signature[3] == '\006') {
diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c
index 7e43b360a..e308c480b 100644
--- a/libarchive/archive_string.c
+++ b/libarchive/archive_string.c
@@ -37,6 +37,9 @@ __FBSDID("$FreeBSD: src/lib/libarchive/archive_string.c,v 1.11 2007/07/15 19:13:
 #ifdef HAVE_STRING_H
 #include <string.h>
 #endif
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
 
 #include "archive_private.h"
 #include "archive_string.h"
@@ -55,11 +58,15 @@ __archive_string_append(struct archive_string *as, const char *p, size_t s)
 void
 __archive_string_copy(struct archive_string *dest, struct archive_string *src)
 {
-	if (__archive_string_ensure(dest, src->length + 1) == NULL)
-		__archive_errx(1, "Out of memory");
-	memcpy(dest->s, src->s, src->length);
-	dest->length = src->length;
-	dest->s[dest->length] = 0;
+	if (src->length == 0)
+		dest->length = 0;
+	else {
+		if (__archive_string_ensure(dest, src->length + 1) == NULL)
+			__archive_errx(1, "Out of memory");
+		memcpy(dest->s, src->s, src->length);
+		dest->length = src->length;
+		dest->s[dest->length] = 0;
+	}
 }
 
 void
@@ -67,21 +74,52 @@ __archive_string_free(struct archive_string *as)
 {
 	as->length = 0;
 	as->buffer_length = 0;
-	if (as->s != NULL)
+	if (as->s != NULL) {
 		free(as->s);
+		as->s = NULL;
+	}
 }
 
 /* Returns NULL on any allocation failure. */
 struct archive_string *
 __archive_string_ensure(struct archive_string *as, size_t s)
 {
+	/* If buffer is already big enough, don't reallocate. */
 	if (as->s && (s <= as->buffer_length))
 		return (as);
 
+	/*
+	 * Growing the buffer at least exponentially ensures that
+	 * append operations are always linear in the number of
+	 * characters appended.  Using a smaller growth rate for
+	 * larger buffers reduces memory waste somewhat at the cost of
+	 * a larger constant factor.
+	 */
 	if (as->buffer_length < 32)
+		/* Start with a minimum 32-character buffer. */
 		as->buffer_length = 32;
-	while (as->buffer_length < s)
+	else if (as->buffer_length < 8192)
+		/* Buffers under 8k are doubled for speed. */
 		as->buffer_length *= 2;
+	else {
+		/* Buffers 8k and over grow by at least 25% each time. */
+		size_t old_length = as->buffer_length;
+		as->buffer_length = (as->buffer_length * 5) / 4;
+		/* Be safe: If size wraps, release buffer and return NULL. */
+		if (as->buffer_length < old_length) {
+			free(as->s);
+			as->s = NULL;
+			return (NULL);
+		}
+	}
+	/*
+	 * The computation above is a lower limit to how much we'll
+	 * grow the buffer.  In any case, we have to grow it enough to
+	 * hold the request.
+	 */
+	if (as->buffer_length < s)
+		as->buffer_length = s;
+	/* Now we can reallocate the buffer. */
 	as->s = (char *)realloc(as->s, as->buffer_length);
 	if (as->s == NULL)
 		return (NULL);
@@ -124,3 +162,206 @@ __archive_strappend_int(struct archive_string *as, int d, int base)
 	__archive_strappend_char(as, digits[d % base]);
 	return (as);
 }
+
+/*
+ * Home-grown wcrtomb for UTF-8.
+ */
+static size_t
+my_wcrtomb_utf8(char *p, wchar_t wc, mbstate_t *s)
+{
+	(void)s; /* UNUSED */
+
+	if (p == NULL)
+		return (0);
+	if (wc <= 0x7f) {
+		p[0] = (char)wc;
+		return (1);
+	}
+	if (wc <= 0x7ff) {
+		p[0] = 0xc0 | ((wc >> 6) & 0x1f);
+		p[1] = 0x80 | (wc & 0x3f);
+		return (2);
+	}
+	if (wc <= 0xffff) {
+		p[0] = 0xe0 | ((wc >> 12) & 0x0f);
+		p[1] = 0x80 | ((wc >> 6) & 0x3f);
+		p[2] = 0x80 | (wc & 0x3f);
+		return (3);
+	}
+	if (wc <= 0x1fffff) {
+		p[0] = 0xf0 | ((wc >> 18) & 0x07);
+		p[1] = 0x80 | ((wc >> 12) & 0x3f);
+		p[2] = 0x80 | ((wc >> 6) & 0x3f);
+		p[3] = 0x80 | (wc & 0x3f);
+		return (4);
+	}
+	/* Unicode has no codes larger than 0x1fffff. */
+	/*
+	 * Awkward point:  UTF-8 <-> wchar_t conversions
+	 * can actually fail.
+	 */
+	return ((size_t)-1);
+}
+
+static int
+my_wcstombs(struct archive_string *as, const wchar_t *w,
+    size_t (*func)(char *, wchar_t, mbstate_t *))
+{
+	size_t n;
+	char *p;
+	mbstate_t shift_state;
+	char buff[256];
+
+	/*
+	 * Convert one wide char at a time into 'buff', whenever that
+	 * fills, append it to the string.
+	 */
+	p = buff;
+	wcrtomb(NULL, L'\0', &shift_state);
+	while (*w != L'\0') {
+		/* Flush the buffer when we have <=16 bytes free. */
+		/* (No encoding has a single character >16 bytes.) */
+		if ((size_t)(p - buff) >= (size_t)(sizeof(buff) - 16)) {
+			*p = '\0';
+			archive_strcat(as, buff);
+			p = buff;
+		}
+		n = (*func)(p, *w++, &shift_state);
+		if (n == (size_t)-1)
+			return (-1);
+		p += n;
+	}
+	*p = '\0';
+	archive_strcat(as, buff);
+	return (0);
+}
+
+/*
+ * Translates a wide character string into UTF-8 and appends
+ * to the archive_string.  Note: returns NULL if conversion fails.
+ */
+struct archive_string *
+__archive_strappend_w_utf8(struct archive_string *as, const wchar_t *w)
+{
+	if (my_wcstombs(as, w, my_wcrtomb_utf8))
+		return (NULL);
+	return (as);
+}
+
+/*
+ * Translates a wide character string into current locale character set
+ * and appends to the archive_string.  Note: returns NULL if conversion
+ * fails.
+ *
+ * TODO: use my_wcrtomb_utf8 if !HAVE_WCRTOMB (add configure logic first!)
+ */
+struct archive_string *
+__archive_strappend_w_mbs(struct archive_string *as, const wchar_t *w)
+{
+	if (my_wcstombs(as, w, wcrtomb))
+		return (NULL);
+	return (as);
+}
+
+
+/*
+ * Home-grown mbrtowc for UTF-8.  Some systems lack UTF-8
+ * (or even lack mbrtowc()) and we need UTF-8 support for pax
+ * format.  So please don't replace this with a call to the
+ * standard mbrtowc() function!
+ */
+static size_t
+my_mbrtowc_utf8(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
+{
+        int ch;
+
+	/*
+	 * This argument is here to make the prototype identical to the
+	 * standard mbrtowc(), so I can build generic string processors
+	 * that just accept a pointer to a suitable mbrtowc() function.
+	 */
+	(void)ps; /* UNUSED */
+
+	/* Standard behavior:  a NULL value for 's' just resets shift state. */
+        if (s == NULL)
+                return (0);
+	/* If length argument is zero, don't look at the first character. */
+	if (n <= 0)
+		return ((size_t)-2);
+
+        /*
+	 * Decode 1-4 bytes depending on the value of the first byte.
+	 */
+        ch = (unsigned char)*s;
+	if (ch == 0) {
+		return (0); /* Standard:  return 0 for end-of-string. */
+	}
+	if ((ch & 0x80) == 0) {
+                *pwc = ch & 0x7f;
+		return (1);
+        }
+	if ((ch & 0xe0) == 0xc0) {
+		if (n < 2)
+			return ((size_t)-2);
+		if ((s[1] & 0xc0) != 0x80) return (size_t)-1;
+                *pwc = ((ch & 0x1f) << 6) | (s[1] & 0x3f);
+		return (2);
+        }
+	if ((ch & 0xf0) == 0xe0) {
+		if (n < 3)
+			return ((size_t)-2);
+		if ((s[1] & 0xc0) != 0x80) return (size_t)-1;
+		if ((s[2] & 0xc0) != 0x80) return (size_t)-1;
+                *pwc = ((ch & 0x0f) << 12)
+		    | ((s[1] & 0x3f) << 6)
+		    | (s[2] & 0x3f);
+		return (3);
+        }
+	if ((ch & 0xf8) == 0xf0) {
+		if (n < 4)
+			return ((size_t)-2);
+		if ((s[1] & 0xc0) != 0x80) return (size_t)-1;
+		if ((s[2] & 0xc0) != 0x80) return (size_t)-1;
+		if ((s[3] & 0xc0) != 0x80) return (size_t)-1;
+                *pwc = ((ch & 0x07) << 18)
+		    | ((s[1] & 0x3f) << 12)
+		    | ((s[2] & 0x3f) << 6)
+		    | (s[3] & 0x3f);
+		return (4);
+        }
+	/* Invalid first byte. */
+	return ((size_t)-1);
+}
+
+/*
+ * Return a wide-character string by converting this archive_string
+ * from UTF-8.
+ */
+wchar_t *
+__archive_string_utf8_w(struct archive_string *as)
+{
+	wchar_t *ws, *dest;
+	const char *src;
+	size_t n;
+	int err;
+
+	ws = (wchar_t *)malloc((as->length + 1) * sizeof(wchar_t));
+	if (ws == NULL)
+		__archive_errx(1, "Out of memory");
+	err = 0;
+	dest = ws;
+	src = as->s;
+	while (*src != '\0') {
+		n = my_mbrtowc_utf8(dest, src, 8, NULL);
+		if (n == 0)
+			break;
+		if (n == (size_t)-1 || n == (size_t)-2) {
+			free(ws);
+			return (NULL);
+		}
+		dest++;
+		src += n;
+	}
+	*dest++ = L'\0';
+	return (ws);
+}
diff --git a/libarchive/archive_string.h b/libarchive/archive_string.h
index f56c50fe4..61e70777f 100644
--- a/libarchive/archive_string.h
+++ b/libarchive/archive_string.h
@@ -33,6 +33,9 @@
 #ifdef HAVE_STRING_H
 #include <string.h>
 #endif
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
 
 /*
  * Basic resizable/reusable string support a la Java's "StringBuffer."
@@ -60,16 +63,22 @@ struct archive_string *
 __archive_strappend_char(struct archive_string *, char);
 #define	archive_strappend_char __archive_strappend_char
 
-/* Append a char to an archive_string using UTF8. */
-struct archive_string *
-__archive_strappend_char_UTF8(struct archive_string *, int);
-#define	archive_strappend_char_UTF8 __archive_strappend_char_UTF8
-
 /* Append an integer in the specified base (2 <= base <= 16). */
 struct archive_string *
 __archive_strappend_int(struct archive_string *as, int d, int base);
 #define	archive_strappend_int __archive_strappend_int
 
+/* Convert a wide-char string to UTF-8 and append the result. */
+struct archive_string *
+__archive_strappend_w_utf8(struct archive_string *, const wchar_t *);
+#define	archive_strappend_w_utf8	__archive_strappend_w_utf8
+
+/* Convert a wide-char string to current locale and append the result. */
+/* Returns NULL if conversion fails. */
+struct archive_string *
+__archive_strappend_w_mbs(struct archive_string *, const wchar_t *);
+#define	archive_strappend_w_mbs	__archive_strappend_w_mbs
+
 /* Basic append operation. */
 struct archive_string *
 __archive_string_append(struct archive_string *as, const char *p, size_t s);
@@ -95,7 +104,7 @@ __archive_strncat(struct archive_string *, const char *, size_t);
 
 /* Copy a C string to an archive_string, resizing as necessary. */
 #define	archive_strcpy(as,p) \
-	((as)->length = 0, __archive_string_append((as), (p), strlen(p)))
+	((as)->length = 0, __archive_string_append((as), (p), p == NULL ? 0 : strlen(p)))
 
 /* Copy a C string to an archive_string with limit, resizing as necessary. */
 #define	archive_strncpy(as,p,l) \
@@ -119,4 +128,9 @@ void	__archive_string_vsprintf(struct archive_string *, const char *,
 void	__archive_string_sprintf(struct archive_string *, const char *, ...);
 #define	archive_string_sprintf	__archive_string_sprintf
 
+/* Allocates a fresh buffer and converts as (assumed to be UTF-8) into it.
+ * Returns NULL if conversion failed in any way. */
+wchar_t *__archive_string_utf8_w(struct archive_string *as);
+
+
 #endif
diff --git a/libarchive/archive_util.c b/libarchive/archive_util.c
index 69d69a513..55dd1fa10 100644
--- a/libarchive/archive_util.c
+++ b/libarchive/archive_util.c
@@ -77,32 +77,10 @@ archive_version_number(void)
 	return (ARCHIVE_VERSION_NUMBER);
 }
 
-/*
- * Format a version string of the form "libarchive x.y.z", where x, y,
- * z are the correct parts of the version ID from
- * archive_version_number().
- *
- * I used to do all of this at build time in shell scripts but that
- * proved to be a portability headache.
- */
-
 const char *
 archive_version_string(void)
 {
-	static char buff[128];
-	struct archive_string as;
-	int n;
-
-	if (buff[0] == '\0') {
-		n = archive_version_number();
-		memset(&as, 0, sizeof(as));
-		archive_string_sprintf(&as, "libarchive %d.%d.%d",
-		    n / 1000000, (n / 1000) % 1000, n % 1000);
-		strncpy(buff, as.s, sizeof(buff));
-		buff[sizeof(buff) - 1] = '\0';
-		archive_string_free(&as);
-	}
-	return (buff);
+	return (ARCHIVE_VERSION_STRING);
 }
 
 int
diff --git a/libarchive/archive_write_disk.c b/libarchive/archive_write_disk.c
index 620beac48..58a7fd22b 100644
--- a/libarchive/archive_write_disk.c
+++ b/libarchive/archive_write_disk.c
@@ -294,7 +294,7 @@ _archive_write_header(struct archive *_a, struct archive_entry *entry)
 	archive_clear_error(&a->archive);
 	if (a->archive.state & ARCHIVE_STATE_DATA) {
 		r = _archive_write_finish_entry(&a->archive);
-		if (r != ARCHIVE_OK)
+		if (r == ARCHIVE_FATAL)
 			return (r);
 	}
 
@@ -485,10 +485,12 @@ _archive_write_data_block(struct archive *_a,
 	/* Write the data. */
 	while (size > 0 && a->offset < a->filesize) {
 		if ((off_t)(a->offset + size) > a->filesize) {
-			size = (size_t)(a->filesize - a->offset);
-			archive_set_error(&a->archive, errno,
-			    "Write request too large");
+			archive_set_error(&a->archive, 0,
+			    "Write request too large (tried to write %u bytes, but only %u bytes remain)",
+			    (unsigned int)size,
+			    (unsigned int)(a->filesize - a->offset));
 			r = ARCHIVE_WARN;
+			size = (size_t)(a->filesize - a->offset);
 		}
 		bytes_written = write(a->fd, buff, size);
 		if (bytes_written < 0) {
diff --git a/libarchive/archive_write_set_format_pax.c b/libarchive/archive_write_set_format_pax.c
index d6e3e6c4b..89f89bc9f 100644
--- a/libarchive/archive_write_set_format_pax.c
+++ b/libarchive/archive_write_set_format_pax.c
@@ -386,7 +386,7 @@ archive_write_pax_header(struct archive_write *a,
 	const char *p;
 	char *t;
 	const wchar_t *wp;
-	const char *suffix_start;
+	const char *suffix;
 	int need_extension, r, ret;
 	struct pax *pax;
 	const char *hdrcharset = NULL;
@@ -496,34 +496,73 @@ archive_write_pax_header(struct archive_write *a,
 	if (hdrcharset != NULL)
 		add_pax_attr(&(pax->pax_header), "hdrcharset", hdrcharset);
 
-	/*
-	 * Determining whether or not the name is too big is ugly
-	 * because of the rules for dividing names between 'name' and
-	 * 'prefix' fields.  Here, I pick out the longest possible
-	 * suffix, then test whether the remaining prefix is too long.
-	 */
-	if (strlen(path) <= 100)    /* Short enough for just 'name' field */
-		suffix_start = path;	/* Record a zero-length prefix */
-	else
-		/* Find the largest suffix that fits in 'name' field. */
-		suffix_start = strchr(path + strlen(path) - 100 - 1, '/');
 
 	/*
 	 * If name is too long, or has non-ASCII characters, add
 	 * 'path' to pax extended attrs.  (Note that an unconvertible
 	 * name must have non-ASCII characters.)
 	 */
-	if (suffix_start == NULL || suffix_start - path > 155
-	    || path_w == NULL || has_non_ASCII(path_w)) {
-		if (path_w == NULL || hdrcharset != NULL)
+	if (path == NULL) {
+		/* We don't have a narrow version, so we have to store
+		 * the wide version. */
+		add_pax_attr_w(&(pax->pax_header), "path", path_w);
+		archive_entry_set_pathname(entry_main, "@WidePath");
+		need_extension = 1;
+	} else if (has_non_ASCII(path_w)) {
+		/* We have non-ASCII characters. */
+		if (path_w == NULL || hdrcharset != NULL) {
 			/* Can't do UTF-8, so store it raw. */
 			add_pax_attr(&(pax->pax_header), "path", path);
-		else
-			add_pax_attr_w(&(pax->pax_header), "path", path_w);
+		} else {
+			/* Store UTF-8 */
+			add_pax_attr_w(&(pax->pax_header),
+			    "path", path_w);
+		}
 		archive_entry_set_pathname(entry_main,
 		    build_ustar_entry_name(ustar_entry_name,
 			path, strlen(path), NULL));
 		need_extension = 1;
+	} else {
+		/* We have an all-ASCII path; we'd like to just store
+		 * it in the ustar header if it will fit.  Yes, this
+		 * duplicates some of the logic in
+		 * write_set_format_ustar.c
+		 */
+		if (strlen(path) <= 100) {
+			/* Fits in the old 100-char tar name field. */
+		} else {
+			/* Find largest suffix that will fit. */
+			/* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */
+			suffix = strchr(path + strlen(path) - 100 - 1, '/');
+			/* Don't attempt an empty prefix. */
+			if (suffix == path)
+				suffix = strchr(suffix + 1, '/');
+			/* We can put it in the ustar header if it's
+			 * all ASCII and it's either <= 100 characters
+			 * or can be split at a '/' into a prefix <=
+			 * 155 chars and a suffix <= 100 chars.  (Note
+			 * the strchr() above will return NULL exactly
+			 * when the path can't be split.)
+			 */
+			if (suffix == NULL       /* Suffix > 100 chars. */
+			    || suffix[1] == '\0'    /* empty suffix */
+			    || suffix - path > 155)  /* Prefix > 155 chars */
+			{
+				if (path_w == NULL || hdrcharset != NULL) {
+					/* Can't do UTF-8, so store it raw. */
+					add_pax_attr(&(pax->pax_header),
+					    "path", path);
+				} else {
+					/* Store UTF-8 */
+					add_pax_attr_w(&(pax->pax_header),
+					    "path", path_w);
+				}
+				archive_entry_set_pathname(entry_main,
+				    build_ustar_entry_name(ustar_entry_name,
+					path, strlen(path), NULL));
+				need_extension = 1;
+			}
+		}
 	}
 
 	if (linkpath != NULL) {
@@ -1215,6 +1254,8 @@ archive_write_pax_data(struct archive_write *a, const void *buff, size_t s)
 static int
 has_non_ASCII(const wchar_t *wp)
 {
+	if (wp == NULL)
+		return (1);
 	while (*wp != L'\0' && *wp < 128)
 		wp++;
 	return (*wp != L'\0');
diff --git a/libarchive/archive_write_set_format_ustar.c b/libarchive/archive_write_set_format_ustar.c
index c2c0011ae..e7f652d1d 100644
--- a/libarchive/archive_write_set_format_ustar.c
+++ b/libarchive/archive_write_set_format_ustar.c
@@ -206,7 +206,7 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry)
 	    !(archive_entry_filetype(entry) == AE_IFREG))
 		archive_entry_set_size(entry, 0);
 
-	if (AE_IFDIR == archive_entry_mode(entry)) {
+	if (AE_IFDIR == archive_entry_filetype(entry)) {
 		const char *p;
 		char *t;
 		/*
@@ -282,24 +282,30 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
 		/* Store in two pieces, splitting at a '/'. */
 		p = strchr(pp + strlen(pp) - USTAR_name_size - 1, '/');
 		/*
-		 * If the separator we found is the first '/', find
-		 * the next one.  (This is a pathological case that
-		 * occurs for paths of exactly 101 bytes that start with
-		 * '/'; it occurs because the separating '/' is not
-		 * stored explicitly and the reconstruction assumes that
-		 * an empty prefix means there is no '/' separator.)
+		 * Look for the next '/' if we chose the first character
+		 * as the separator.  (ustar format doesn't permit
+		 * an empty prefix.)
 		 */
 		if (p == pp)
 			p = strchr(p + 1, '/');
-		/*
-		 * If there is no path separator, or the prefix or
-		 * remaining name are too large, return an error.
-		 */
+		/* Fail if the name won't fit. */
 		if (!p) {
+			/* No separator. */
+			archive_set_error(&a->archive, ENAMETOOLONG,
+			    "Pathname too long");
+			ret = ARCHIVE_WARN;
+		} else if (p[1] == '\0') {
+			/*
+			 * The only feasible separator is a final '/';
+			 * this would result in a non-empty prefix and
+			 * an empty name, which POSIX doesn't
+			 * explicity forbid, but it just feels wrong.
+			 */
 			archive_set_error(&a->archive, ENAMETOOLONG,
 			    "Pathname too long");
 			ret = ARCHIVE_WARN;
 		} else if (p  > pp + USTAR_prefix_size) {
+			/* Prefix is too long. */
 			archive_set_error(&a->archive, ENAMETOOLONG,
 			    "Pathname too long");
 			ret = ARCHIVE_WARN;
diff --git a/libarchive/test/Makefile b/libarchive/test/Makefile
index 9d04b089e..24b09544a 100644
--- a/libarchive/test/Makefile
+++ b/libarchive/test/Makefile
@@ -18,6 +18,7 @@ TESTS= \
 	test_empty_write.c			\
 	test_entry.c				\
 	test_entry_strmode.c			\
+	test_link_resolver.c			\
 	test_pax_filename_encoding.c		\
 	test_read_compress_program.c		\
 	test_read_data_large.c			\
@@ -38,6 +39,7 @@ TESTS= \
 	test_read_format_mtree.c		\
 	test_read_format_pax_bz2.c		\
 	test_read_format_tar.c			\
+	test_read_format_tar_empty_filename.c	\
 	test_read_format_tbz.c			\
 	test_read_format_tgz.c			\
 	test_read_format_tz.c			\
@@ -48,6 +50,7 @@ TESTS= \
 	test_read_truncated.c			\
 	test_tar_filenames.c			\
 	test_tar_large.c			\
+	test_ustar_filenames.c			\
 	test_write_compress_program.c		\
 	test_write_compress.c			\
 	test_write_disk.c			\
@@ -61,6 +64,7 @@ TESTS= \
 	test_write_format_cpio_empty.c		\
 	test_write_format_shar_empty.c		\
 	test_write_format_tar.c			\
+	test_write_format_tar_ustar.c		\
 	test_write_format_tar_empty.c		\
 	test_write_open_memory.c
 
@@ -68,7 +72,8 @@ TESTS= \
 # Build the test program using all libarchive sources + the test sources.
 SRCS= ${LA_SRCS}				\
 	${TESTS}				\
-	list.h					\
+	${.OBJDIR}/list.h			\
+	${.OBJDIR}/archive.h			\
 	main.c					\
 	read_open_memory.c
 
@@ -96,17 +101,15 @@ WARNS=6
 
 # Build libarchive_test and run it.
 check test: libarchive_test
-	./libarchive_test -k -r ${.CURDIR}
-
-INCS=archive.h list.h
+	./libarchive_test -v -r ${.CURDIR}
 
 # Build archive.h, but in our .OBJDIR, not libarchive's
 # This keeps libarchive_test and libarchive builds completely separate.
-archive.h: ${LA_SRCDIR}/archive.h.in ${LA_SRCDIR}/Makefile
+${.OBJDIR}/archive.h: ${LA_SRCDIR}/archive.h.in ${LA_SRCDIR}/Makefile
 	cd ${LA_SRCDIR} && unset MAKEOBJDIRPREFIX && MAKEOBJDIR=${.OBJDIR} make archive.h
 
 # list.h is just a list of all tests, as indicated by DEFINE_TEST macro lines
-list.h: ${TESTS} Makefile
+${.OBJDIR}/list.h: ${TESTS} Makefile
 	(cd ${.CURDIR}; cat ${TESTS}) | grep DEFINE_TEST > list.h
 
 CLEANFILES += *.out *.o *.core *~ list.h archive.h
diff --git a/libarchive/test/main.c b/libarchive/test/main.c
index 397780327..6e6ca6c8d 100644
--- a/libarchive/test/main.c
+++ b/libarchive/test/main.c
@@ -63,10 +63,14 @@ extern char *optarg;
 extern int optind;
 #endif
 
-/* Default is to crash and try to force a core dump on failure. */
-static int dump_on_failure = 1;
+/* Enable core dump on failure. */
+static int dump_on_failure = 0;
+/* Default is to remove temp dirs for successful tests. */
+static int keep_temp_files = 0;
 /* Default is to print some basic information about each test. */
 static int quiet_flag = 0;
+/* Default is to summarize repeated failures. */
+static int verbose = 0;
 /* Cumulative count of component failures. */
 static int failures = 0;
 /* Cumulative count of skipped component tests. */
@@ -242,7 +246,7 @@ test_assert(const char *file, int line, int value, const char *condition, void *
 		return (value);
 	}
 	failures ++;
-	if (previous_failures(file, line))
+	if (!verbose && previous_failures(file, line))
 		return (value);
 	fprintf(stderr, "%s:%d: Assertion failed\n", file, line);
 	fprintf(stderr, "   Condition: %s\n", condition);
@@ -261,7 +265,7 @@ test_assert_equal_int(const char *file, int line,
 		return (1);
 	}
 	failures ++;
-	if (previous_failures(file, line))
+	if (!verbose && previous_failures(file, line))
 		return (0);
 	fprintf(stderr, "%s:%d: Assertion failed: Ints not equal\n",
 	    file, line);
@@ -271,6 +275,30 @@ test_assert_equal_int(const char *file, int line,
 	return (0);
 }
 
+static void strdump(const char *p)
+{
+	if (p == NULL) {
+		fprintf(stderr, "(null)");
+		return;
+	}
+	fprintf(stderr, "\"");
+	while (*p != '\0') {
+		unsigned int c = 0xff & *p++;
+		switch (c) {
+		case '\a': fprintf(stderr, "\a"); break;
+		case '\b': fprintf(stderr, "\b"); break;
+		case '\n': fprintf(stderr, "\n"); break;
+		case '\r': fprintf(stderr, "\r"); break;
+		default:
+			if (c >= 32 && c < 127)
+				fprintf(stderr, "%c", c);
+			else
+				fprintf(stderr, "\\x%02X", c);
+		}
+	}
+	fprintf(stderr, "\"");
+}
+
 /* assertEqualString() displays the values of the two strings. */
 int
 test_assert_equal_string(const char *file, int line,
@@ -289,16 +317,41 @@ test_assert_equal_string(const char *file, int line,
 		return (1);
 	}
 	failures ++;
-	if (previous_failures(file, line))
+	if (!verbose && previous_failures(file, line))
 		return (0);
 	fprintf(stderr, "%s:%d: Assertion failed: Strings not equal\n",
 	    file, line);
-	fprintf(stderr, "      %s = \"%s\"\n", e1, v1);
-	fprintf(stderr, "      %s = \"%s\"\n", e2, v2);
+	fprintf(stderr, "      %s = ", e1);
+	strdump(v1);
+	fprintf(stderr, " (length %d)\n", v1 == NULL ? 0 : strlen(v1));
+	fprintf(stderr, "      %s = ", e2);
+	strdump(v2);
+	fprintf(stderr, " (length %d)\n", v2 == NULL ? 0 : strlen(v2));
 	report_failure(extra);
 	return (0);
 }
 
+static void wcsdump(const wchar_t *w)
+{
+	if (w == NULL) {
+		fprintf(stderr, "(null)");
+		return;
+	}
+	fprintf(stderr, "\"");
+	while (*w != L'\0') {
+		unsigned int c = *w++;
+		if (c >= 32 && c < 127)
+			fprintf(stderr, "%c", c);
+		else if (c < 256)
+			fprintf(stderr, "\\x%02X", c);
+		else if (c < 0x10000)
+			fprintf(stderr, "\\u%04X", c);
+		else
+			fprintf(stderr, "\\U%08X", c);
+	}
+	fprintf(stderr, "\"");
+}
+
 /* assertEqualWString() displays the values of the two strings. */
 int
 test_assert_equal_wstring(const char *file, int line,
@@ -307,17 +360,31 @@ test_assert_equal_wstring(const char *file, int line,
     void *extra)
 {
 	++assertions;
-	if (wcscmp(v1, v2) == 0) {
+	if (v1 == NULL) {
+		if (v2 == NULL) {
+			msg[0] = '\0';
+			return (1);
+		}
+	} else if (v2 == NULL) {
+		if (v1 == NULL) {
+			msg[0] = '\0';
+			return (1);
+		}
+	} else if (wcscmp(v1, v2) == 0) {
 		msg[0] = '\0';
 		return (1);
 	}
 	failures ++;
-	if (previous_failures(file, line))
+	if (!verbose && previous_failures(file, line))
 		return (0);
 	fprintf(stderr, "%s:%d: Assertion failed: Unicode strings not equal\n",
 	    file, line);
-	fwprintf(stderr, L"      %s = \"%ls\"\n", e1, v1);
-	fwprintf(stderr, L"      %s = \"%ls\"\n", e2, v2);
+	fprintf(stderr, "      %s = ", e1);
+	wcsdump(v1);
+	fprintf(stderr, "\n");
+	fprintf(stderr, "      %s = ", e2);
+	wcsdump(v2);
+	fprintf(stderr, "\n");
 	report_failure(extra);
 	return (0);
 }
@@ -378,7 +445,7 @@ test_assert_equal_mem(const char *file, int line,
 		return (1);
 	}
 	failures ++;
-	if (previous_failures(file, line))
+	if (!verbose && previous_failures(file, line))
 		return (0);
 	fprintf(stderr, "%s:%d: Assertion failed: memory not equal\n",
 	    file, line);
@@ -410,12 +477,13 @@ test_assert_empty_file(const char *f1fmt, ...)
 	if (stat(f1, &st) != 0) {
 		fprintf(stderr, "%s:%d: Could not stat: %s\n", test_filename, test_line, f1);
 		report_failure(NULL);
+		return (0);
 	}
 	if (st.st_size == 0)
 		return (1);
 
 	failures ++;
-	if (previous_failures(test_filename, test_line))
+	if (!verbose && previous_failures(test_filename, test_line))
 		return (0);
 
 	fprintf(stderr, "%s:%d: File not empty: %s\n", test_filename, test_line, f1);
@@ -462,7 +530,7 @@ test_assert_equal_file(const char *f1, const char *f2pattern, ...)
 			break;
 	}
 	failures ++;
-	if (previous_failures(test_filename, test_line))
+	if (!verbose && previous_failures(test_filename, test_line))
 		return (0);
 	fprintf(stderr, "%s:%d: Files are not identical\n",
 	    test_filename, test_line);
@@ -633,6 +701,12 @@ static int test_run(int i, const char *tmpdir)
 	(*tests[i].func)();
 	/* Summarize the results of this test. */
 	summarize();
+	/* If there were no failures, we can remove the work dir. */
+	if (failures == failures_before) {
+		if (!keep_temp_files && chdir(tmpdir) == 0) {
+			systemf("rm -rf %s", tests[i].name);
+		}
+	}
 	/* Return appropriate status. */
 	return (failures == failures_before ? 0 : 1);
 }
@@ -646,8 +720,9 @@ static void usage(const char *program)
 	printf("Default is to run all tests.\n");
 	printf("Otherwise, specify the numbers of the tests you wish to run.\n");
 	printf("Options:\n");
-	printf("  -k  Keep running after failures.\n");
-	printf("      Default: Core dump after any failure.\n");
+	printf("  -d  Dump core after any failure, for debugging.\n");
+	printf("  -k  Keep all temp files.\n");
+	printf("      Default: temp files for successful tests deleted.\n");
 #ifdef PROGRAM
 	printf("  -p <path>  Path to executable to be tested.\n");
 	printf("      Default: path taken from " ENVBASE " environment variable.\n");
@@ -655,6 +730,7 @@ static void usage(const char *program)
 	printf("  -q  Quiet.\n");
 	printf("  -r <dir>   Path to dir containing reference files.\n");
 	printf("      Default: Current directory.\n");
+	printf("  -v  Verbose.\n");
 	printf("Available tests:\n");
 	for (i = 0; i < limit; i++)
 		printf("  %d: %s\n", i, tests[i].name);
@@ -747,9 +823,9 @@ int main(int argc, char **argv)
 	testprog = getenv(ENVBASE);
 #endif
 
-	/* Allow -k to be controlled through the environment. */
-	if (getenv(ENVBASE "_KEEP_GOING") != NULL)
-		dump_on_failure = 0;
+	/* Allow -d to be controlled through the environment. */
+	if (getenv(ENVBASE "_DEBUG") != NULL)
+		dump_on_failure = 1;
 
 	/* Get the directory holding test files from environment. */
 	refdir = getenv(ENVBASE "_TEST_FILES");
@@ -757,10 +833,13 @@ int main(int argc, char **argv)
 	/*
 	 * Parse options.
 	 */
-	while ((opt = getopt(argc, argv, "kp:qr:")) != -1) {
+	while ((opt = getopt(argc, argv, "dkp:qr:v")) != -1) {
 		switch (opt) {
+		case 'd':
+			dump_on_failure = 1;
+			break;
 		case 'k':
-			dump_on_failure = 0;
+			keep_temp_files = 1;
 			break;
 		case 'p':
 #ifdef PROGRAM
@@ -775,6 +854,9 @@ int main(int argc, char **argv)
 		case 'r':
 			refdir = optarg;
 			break;
+		case 'v':
+			verbose = 1;
+			break;
 		case '?':
 		default:
 			usage(progname);
@@ -823,6 +905,7 @@ int main(int argc, char **argv)
 			--p;
 			*p = '\0';
 		}
+		systemf("rm %s/refdir", tmpdir);
 	}
 
 	/*
@@ -878,5 +961,9 @@ int main(int argc, char **argv)
 
 	free(refdir_alloc);
 
+	/* If the final tmpdir is empty, we can remove it. */
+	/* This should be the usual case when all tests succeed. */
+	rmdir(tmpdir);
+
 	return (tests_failed);
 }
diff --git a/libarchive/test/test_acl_pax.c b/libarchive/test/test_acl_pax.c
index abf746949..6ae3dd271 100644
--- a/libarchive/test/test_acl_pax.c
+++ b/libarchive/test/test_acl_pax.c
@@ -332,14 +332,10 @@ acl_match(struct acl_t *acl, int type, int permset, int tag, int qual, const cha
 		return (1);
 	if (qual != acl->qual)
 		return (0);
-	if (name == NULL) {
-		if (acl->name == NULL || acl->name[0] == '\0')
-			return (1);
-	}
-	if (acl->name == NULL) {
-		if (name[0] == '\0')
-			return (1);
-	}
+	if (name == NULL)
+		return (acl->name == NULL || acl->name[0] == '\0');
+	if (acl->name == NULL)
+		return (name == NULL || name[0] == '\0');
 	return (0 == strcmp(name, acl->name));
 }
 
diff --git a/libarchive/test/test_archive_api_feature.c b/libarchive/test/test_archive_api_feature.c
index cfc0b8413..21d189d58 100644
--- a/libarchive/test/test_archive_api_feature.c
+++ b/libarchive/test/test_archive_api_feature.c
@@ -28,6 +28,7 @@ __FBSDID("$FreeBSD: src/lib/libarchive/test/test_archive_api_feature.c,v 1.4 200
 DEFINE_TEST(test_archive_api_feature)
 {
 	char buff[128];
+	const char *p;
 
 	/* This is the (hopefully) final versioning API. */
 	assertEqualInt(ARCHIVE_VERSION_NUMBER, archive_version_number());
@@ -35,7 +36,17 @@ DEFINE_TEST(test_archive_api_feature)
 	    archive_version_number() / 1000000,
 	    (archive_version_number() / 1000) % 1000,
 	    archive_version_number() % 1000);
-	assertEqualString(buff, archive_version_string());
+	failure("Version string is: %s, computed is: %s",
+	    archive_version_string(), buff);
+	assert(memcmp(buff, archive_version_string(), strlen(buff)) == 0);
+	if (strlen(buff) < strlen(archive_version_string())) {
+		p = archive_version_string() + strlen(buff);
+		failure("Version string is: %s", archive_version_string());
+		assert(*p == 'a' || *p == 'b' || *p == 'c' || *p == 'd');
+		++p;
+		failure("Version string is: %s", archive_version_string());
+		assert(*p == '\0');
+	}
 
 /* This is all scheduled to disappear in libarchive 3.0 */
 #if ARCHIVE_VERSION_NUMBER < 3000000
diff --git a/libarchive/test/test_entry.c b/libarchive/test/test_entry.c
index 29edae7ff..8386d9892 100644
--- a/libarchive/test/test_entry.c
+++ b/libarchive/test/test_entry.c
@@ -52,6 +52,8 @@ DEFINE_TEST(test_entry)
 	const void *xval; /* For xattr tests. */
 	size_t xsize; /* For xattr tests. */
 	int c;
+	wchar_t wc;
+	long l;
 
 	assert((e = archive_entry_new()) != NULL);
 
@@ -146,7 +148,7 @@ DEFINE_TEST(test_entry)
 	archive_entry_copy_link_w(e, L"link3");
 	assertEqualString(archive_entry_hardlink(e), NULL);
 	assertEqualString(archive_entry_symlink(e), "link3");
-	/* Arbitrarily override hardlink if both hardlink and symlink set. */
+	/* Arbitrarily override symlink if both hardlink and symlink set. */
 	archive_entry_set_hardlink(e, "hardlink");
 	archive_entry_set_symlink(e, "symlink");
 	archive_entry_set_link(e, "link");
@@ -726,8 +728,10 @@ DEFINE_TEST(test_entry)
 	/*
 	 * Exercise the character-conversion logic, if we can.
 	 */
-	failure("Can't exercise charset-conversion logic.");
-	if (assert(NULL != setlocale(LC_ALL, "de_DE.UTF-8"))) {
+	if (NULL == setlocale(LC_ALL, "de_DE.UTF-8")) {
+		skipping("Can't exercise charset-conversion logic without"
+			" a suitable locale.");
+	} else {
 		/* A filename that cannot be converted to wide characters. */
 		archive_entry_copy_pathname(e, "abc\314\214mno\374xyz");
 		failure("Converting invalid chars to Unicode should fail.");
@@ -756,6 +760,26 @@ DEFINE_TEST(test_entry)
 		assert(NULL == archive_entry_symlink_w(e));
 	}
 
+	l = 0x12345678L;
+	wc = (wchar_t)l; /* Wide character too big for UTF-8. */
+	if (NULL == setlocale(LC_ALL, "C") || (long)wc != l) {
+		skipping("Testing charset conversion failure requires 32-bit wchar_t and support for \"C\" locale.");
+	} else {
+		/*
+		 * Build the string L"xxx\U12345678yyy\u5678zzz" without
+		 * using C99 \u#### syntax, which isn't uniformly
+		 * supported.  (GCC 3.4.6, for instance, defaults to
+		 * "c89 plus GNU extensions.")
+		 */
+		wcscpy(wbuff, L"xxxAyyyBzzz");
+		wbuff[3] = 0x12345678;
+		wbuff[7] = 0x5678;
+		/* A wide filename that cannot be converted to narrow. */
+		archive_entry_copy_pathname_w(e, wbuff);
+		failure("Converting wide characters from Unicode should fail.");
+		assertEqualString(NULL, archive_entry_pathname(e));
+	}
+
 	/* Release the experimental entry. */
 	archive_entry_free(e);
 }
diff --git a/libarchive/test/test_pax_filename_encoding.c b/libarchive/test/test_pax_filename_encoding.c
index b11be58a0..34c4fc823 100644
--- a/libarchive/test/test_pax_filename_encoding.c
+++ b/libarchive/test/test_pax_filename_encoding.c
@@ -34,24 +34,20 @@ __FBSDID("$FreeBSD: src/lib/libarchive/test/test_pax_filename_encoding.c,v 1.1 2
  * stored and restored correctly, regardless of the encodings.
  */
 
-DEFINE_TEST(test_pax_filename_encoding)
+/*
+ * Read a manually-created archive that has filenames that are
+ * stored in binary instead of UTF-8 and verify that we get
+ * the right filename returned and that we get a warning only
+ * if the header isn't marked as binary.
+ */
+DEFINE_TEST(test_pax_filename_encoding_1)
 {
 	static const char testname[] = "test_pax_filename_encoding.tar.gz";
-	char buff[65536];
 	/*
 	 * \314\214 is a valid 2-byte UTF-8 sequence.
 	 * \374 is invalid in UTF-8.
 	 */
 	char filename[] = "abc\314\214mno\374xyz";
-	char longname[] = "abc\314\214mno\374xyz"
-	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-	    ;
-	size_t used;
 	struct archive *a;
 	struct archive_entry *entry;
 
@@ -69,8 +65,7 @@ DEFINE_TEST(test_pax_filename_encoding)
 	 * in it, but the header is not marked as hdrcharset=BINARY, so that
 	 * requires a warning.
 	 */
-	failure("An invalid UTF8 pathname in a pax archive should be read\n"
-	    " without conversion but with a warning");
+	failure("Invalid UTF8 in a pax archive pathname should cause a warning");
 	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
 	assertEqualString(filename, archive_entry_pathname(entry));
 	/*
@@ -82,15 +77,39 @@ DEFINE_TEST(test_pax_filename_encoding)
 	assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry));
 	assertEqualString(filename, archive_entry_pathname(entry));
 	archive_read_finish(a);
+}
+
+/*
+ * Set the locale and write a pathname containing invalid characters.
+ * This should work; the underlying implementation should automatically
+ * fall back to storing the pathname in binary.
+ */
+DEFINE_TEST(test_pax_filename_encoding_2)
+{
+	char filename[] = "abc\314\214mno\374xyz";
+	struct archive *a;
+	struct archive_entry *entry;
+	char buff[65536];
+	char longname[] = "abc\314\214mno\374xyz"
+	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+	    ;
+	size_t used;
 
 	/*
 	 * We need a starting locale which has invalid sequences.
 	 * de_DE.UTF-8 seems to be commonly supported.
 	 */
 	/* If it doesn't exist, just warn and return. */
-	failure("We need a suitable locale for the encoding tests.");
-	if (!assert(NULL != setlocale(LC_ALL, "de_DE.UTF-8")))
+	if (NULL == setlocale(LC_ALL, "de_DE.UTF-8")) {
+		skipping("invalid encoding tests require a suitable locale;"
+		    " de_DE.UTF-8 not available on this system");
 		return;
+	}
 
 	assert((a = archive_write_new()) != NULL);
 	assertEqualIntA(a, 0, archive_write_set_format_pax(a));
@@ -159,3 +178,120 @@ DEFINE_TEST(test_pax_filename_encoding)
 	assertEqualInt(0, archive_read_finish(a));
 }
 
+/*
+ * Create an entry starting from a wide-character Unicode pathname,
+ * read it back into "C" locale, which doesn't support the name.
+ * TODO: Figure out the "right" behavior here.
+ */
+DEFINE_TEST(test_pax_filename_encoding_3)
+{
+	wchar_t badname[] = L"xxxAyyyBzzz";
+	const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz";
+	struct archive *a;
+	struct archive_entry *entry;
+	char buff[65536];
+	size_t used;
+
+	badname[3] = 0x1234;
+	badname[7] = 0x5678;
+
+	/* If it doesn't exist, just warn and return. */
+	if (NULL == setlocale(LC_ALL, "C")) {
+		skipping("Can't set \"C\" locale, so can't exercise "
+		    "certain character-conversion failures");
+		return;
+	}
+
+	assert((a = archive_write_new()) != NULL);
+	assertEqualIntA(a, 0, archive_write_set_format_pax(a));
+	assertEqualIntA(a, 0, archive_write_set_compression_none(a));
+	assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
+	assertEqualInt(0,
+	    archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+	assert((entry = archive_entry_new()) != NULL);
+	/* Set pathname to non-convertible wide value. */
+	archive_entry_copy_pathname_w(entry, badname);
+	archive_entry_set_filetype(entry, AE_IFREG);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+
+	assert((entry = archive_entry_new()) != NULL);
+	archive_entry_copy_pathname_w(entry, L"abc");
+	/* Set gname to non-convertible wide value. */
+	archive_entry_copy_gname_w(entry, badname);
+	archive_entry_set_filetype(entry, AE_IFREG);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+
+	assert((entry = archive_entry_new()) != NULL);
+	archive_entry_copy_pathname_w(entry, L"abc");
+	/* Set uname to non-convertible wide value. */
+	archive_entry_copy_uname_w(entry, badname);
+	archive_entry_set_filetype(entry, AE_IFREG);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+
+	assert((entry = archive_entry_new()) != NULL);
+	archive_entry_copy_pathname_w(entry, L"abc");
+	/* Set hardlink to non-convertible wide value. */
+	archive_entry_copy_hardlink_w(entry, badname);
+	archive_entry_set_filetype(entry, AE_IFREG);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+
+	assert((entry = archive_entry_new()) != NULL);
+	archive_entry_copy_pathname_w(entry, L"abc");
+	/* Set symlink to non-convertible wide value. */
+	archive_entry_copy_symlink_w(entry, badname);
+	archive_entry_set_filetype(entry, AE_IFLNK);
+	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+	archive_entry_free(entry);
+
+	assertEqualInt(0, archive_write_close(a));
+	assertEqualInt(0, archive_write_finish(a));
+
+	/*
+	 * Now read the entries back.
+	 */
+
+	assert((a = archive_read_new()) != NULL);
+	assertEqualInt(0, archive_read_support_format_tar(a));
+	assertEqualInt(0, archive_read_open_memory(a, buff, used));
+
+	failure("A non-convertible pathname should cause a warning.");
+	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+	assertEqualWString(badname, archive_entry_pathname_w(entry));
+	failure("If native locale can't convert, we should get UTF-8 back.");
+	assertEqualString(badname_utf8, archive_entry_pathname(entry));
+
+	failure("A non-convertible gname should cause a warning.");
+	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+	assertEqualWString(badname, archive_entry_gname_w(entry));
+	failure("If native locale can't convert, we should get UTF-8 back.");
+	assertEqualString(badname_utf8, archive_entry_gname(entry));
+
+	failure("A non-convertible uname should cause a warning.");
+	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+	assertEqualWString(badname, archive_entry_uname_w(entry));
+	failure("If native locale can't convert, we should get UTF-8 back.");
+	assertEqualString(badname_utf8, archive_entry_uname(entry));
+
+	failure("A non-convertible hardlink should cause a warning.");
+	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+	assertEqualWString(badname, archive_entry_hardlink_w(entry));
+	failure("If native locale can't convert, we should get UTF-8 back.");
+	assertEqualString(badname_utf8, archive_entry_hardlink(entry));
+
+	failure("A non-convertible symlink should cause a warning.");
+	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+	assertEqualWString(badname, archive_entry_symlink_w(entry));
+	assertEqualWString(NULL, archive_entry_hardlink_w(entry));
+	failure("If native locale can't convert, we should get UTF-8 back.");
+	assertEqualString(badname_utf8, archive_entry_symlink(entry));
+
+	assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry));
+
+	assertEqualInt(0, archive_read_close(a));
+	assertEqualInt(0, archive_read_finish(a));
+}
diff --git a/libarchive/test/test_tar_filenames.c b/libarchive/test/test_tar_filenames.c
index 8b83b5277..9b98448e4 100644
--- a/libarchive/test/test_tar_filenames.c
+++ b/libarchive/test/test_tar_filenames.c
@@ -40,19 +40,22 @@ test_filename(const char *prefix, int dlen, int flen)
 	struct archive_entry *ae;
 	struct archive *a;
 	size_t used;
-	size_t prefix_length = 0;
-	unsigned i = 0;
+	char *p;
+	int i;
 
+	p = filename;
 	if (prefix) {
 		strcpy(filename, prefix);
-		i = prefix_length = strlen(prefix);
+		p += strlen(p);
 	}
-	for (; i < prefix_length + dlen; i++)
-		filename[i] = 'a';
-	filename[i++] = '/';
-	for (; i < prefix_length + dlen + flen + 1; i++)
-		filename[i] = 'b';
-	filename[i++] = '\0';
+	if (dlen > 0) {
+		for (i = 0; i < dlen; i++)
+			*p++ = 'a';
+		*p++ = '/';
+	}
+	for (i = 0; i < flen; i++)
+		*p++ = 'b';
+	*p = '\0';
 
 	strcpy(dirname, filename);
 
@@ -160,15 +163,22 @@ DEFINE_TEST(test_tar_filenames)
 	int dlen, flen;
 
 	/* Repeat the following for a variety of dir/file lengths. */
-	for (dlen = 40; dlen < 60; dlen++) {
-		for (flen = 40; flen < 60; flen++) {
+	for (dlen = 45; dlen < 55; dlen++) {
+		for (flen = 45; flen < 55; flen++) {
+			test_filename(NULL, dlen, flen);
+			test_filename("/", dlen, flen);
+		}
+	}
+
+	for (dlen = 0; dlen < 140; dlen += 10) {
+		for (flen = 98; flen < 102; flen++) {
 			test_filename(NULL, dlen, flen);
 			test_filename("/", dlen, flen);
 		}
 	}
 
 	for (dlen = 140; dlen < 160; dlen++) {
-		for (flen = 90; flen < 110; flen++) {
+		for (flen = 95; flen < 105; flen++) {
 			test_filename(NULL, dlen, flen);
 			test_filename("/", dlen, flen);
 		}
diff --git a/libarchive/test/test_tar_large.c b/libarchive/test/test_tar_large.c
index c675ac1ee..a05b49f6a 100644
--- a/libarchive/test/test_tar_large.c
+++ b/libarchive/test/test_tar_large.c
@@ -242,6 +242,11 @@ DEFINE_TEST(test_tar_large)
 		archive_entry_copy_pathname(ae, namebuff);
 		archive_entry_set_mode(ae, S_IFREG | 0755);
 		filesize = tests[i];
+
+		if (filesize < 0) {
+			skipping("32-bit off_t doesn't permit testing of very large files.");
+			return;
+		}
 		archive_entry_set_size(ae, filesize);
 
 		assertA(0 == archive_write_header(a, ae));
diff --git a/libarchive/test/test_write_format_ar.c b/libarchive/test/test_write_format_ar.c
index 6c7a4462a..432557ca1 100644
--- a/libarchive/test/test_write_format_ar.c
+++ b/libarchive/test/test_write_format_ar.c
@@ -30,7 +30,7 @@ __FBSDID("$FreeBSD: src/lib/libarchive/test/test_write_format_ar.c,v 1.6 2008/03
 
 char buff[4096];
 char buff2[64];
-static unsigned char strtab[] = "abcdefghijklmn.o/\nggghhhjjjrrrttt.o/\niiijjjdddsssppp.o/\n";
+static char strtab[] = "abcdefghijklmn.o/\nggghhhjjjrrrttt.o/\niiijjjdddsssppp.o/\n";
 
 DEFINE_TEST(test_write_format_ar)
 {
diff --git a/tar/Makefile b/tar/Makefile
index fa325429e..44098eac8 100644
--- a/tar/Makefile
+++ b/tar/Makefile
@@ -1,7 +1,7 @@
 # $FreeBSD: src/usr.bin/tar/Makefile,v 1.34 2008/03/18 06:18:49 kientzle Exp $
 
 PROG=	bsdtar
-BSDTAR_VERSION_STRING=2.5.0b
+BSDTAR_VERSION_STRING=2.5.1b
 SRCS=	bsdtar.c getdate.y matching.c read.c tree.c util.c write.c
 WARNS?=	5
 DPADD=	${LIBARCHIVE} ${LIBBZ2} ${LIBZ}
diff --git a/tar/bsdtar.c b/tar/bsdtar.c
index 81d9841bd..abd575ca2 100644
--- a/tar/bsdtar.c
+++ b/tar/bsdtar.c
@@ -788,7 +788,7 @@ version(void)
 	printf("bsdtar %s - %s\n",
 	    BSDTAR_VERSION_STRING,
 	    archive_version());
-	exit(1);
+	exit(0);
 }
 
 static const char *long_help_msg =
-- 
2.47.3