Synchronize a bunch of changes from my local tree.

author Tim Kientzle <kientzle@gmail.com>

Wed, 30 Apr 2008 21:48:09 +0000 (17:48 -0400)

committer Tim Kientzle <kientzle@gmail.com>

Wed, 30 Apr 2008 21:48:09 +0000 (17:48 -0400)
author Tim Kientzle <kientzle@gmail.com>
Wed, 30 Apr 2008 21:48:09 +0000 (17:48 -0400)
committer Tim Kientzle <kientzle@gmail.com>
Wed, 30 Apr 2008 21:48:09 +0000 (17:48 -0400)
diff --git a/libarchive/Makefile b/libarchive/Makefile

index 75078d790e9d2a8b9fa7ec086cb503e149f1b62d..585b8977167014946f0a1b512aabc0ccdc3972b9 100644 (file)
--- a/libarchive/Makefile
+++ b/libarchive/Makefile
@@ -8,12 +8,12 @@ LDADD=        -lbz2 -lz
  # Version is three numbers:
  #  Major: Bumped ONLY when API/ABI breakage happens (see SHLIB_MAJOR)
  #  Minor: Bumped when significant new features are added
-#  Revision: Bumped on any notable change
+#  Revision: Bumped frequently.
  
  # The useful version number (one integer, easy to compare)
-LIBARCHIVE_VERSION= 2004012
+LIBARCHIVE_VERSION_NUMBER=2005001
  # The pretty version string
-LIBARCHIVE_VERSION_STRING!= echo $$((${LIBARCHIVE_VERSION} / 1000000)).$$((${LIBARCHIVE_VERSION} / 1000 % 1000)).$$((${LIBARCHIVE_VERSION} % 1000))
+LIBARCHIVE_VERSION_STRING=2.5.1b
  
  # FreeBSD SHLIB_MAJOR value is managed as part of the FreeBSD system.
  # It has no real relation to the version number above.
@@ -31,10 +31,8 @@ INCS=        archive.h archive_entry.h
  # Note: FreeBSD has inttypes.h, so enable that include in archive.h.in
  archive.h:     archive.h.in Makefile
         cat ${.CURDIR}/archive.h.in | sed                               \
-               -e 's/@LIBARCHIVE_VERSION@/${LIBARCHIVE_VERSION}/g'     \
-               -e 's/@LIBARCHIVE_VERSION_STRING@/${LIBARCHIVE_VERSION_STRING}/g' \
-               -e 's/@SHLIB_MAJOR@/${SHLIB_MAJOR}/g'                   \
-               -e 's|@ARCHIVE_H_INCLUDE_INTTYPES_H@|#include <inttypes.h>  /* For int64_t */|g' \
+          -e 's/@LIBARCHIVE_VERSION_NUMBER@/${LIBARCHIVE_VERSION_NUMBER}/g' \
+          -e 's/@LIBARCHIVE_VERSION_STRING@/${LIBARCHIVE_VERSION_STRING}/g' \
                 > archive.h
  
  # archive.h needs to be cleaned
diff --git a/libarchive/archive_entry.c b/libarchive/archive_entry.c

index 5f9e39a6674ffeeb78f083a2fa7c9d9f344c362f..228f91ac6e7d170d0bffd8d75b433a770a5f3203 100644 (file)
--- a/libarchive/archive_entry.c
+++ b/libarchive/archive_entry.c
@@ -91,15 +91,17 @@ static void aes_clean(struct aes *);
  static void    aes_copy(struct aes *dest, struct aes *src);
  static const char *    aes_get_mbs(struct aes *);
  static const wchar_t * aes_get_wcs(struct aes *);
-static void    aes_set_mbs(struct aes *, const char *mbs);
-static void    aes_copy_mbs(struct aes *, const char *mbs);
+static int     aes_set_mbs(struct aes *, const char *mbs);
+static int     aes_copy_mbs(struct aes *, const char *mbs);
  /* static void aes_set_wcs(struct aes *, const wchar_t *wcs); */
-static void    aes_copy_wcs(struct aes *, const wchar_t *wcs);
-static void    aes_copy_wcs_len(struct aes *, const wchar_t *wcs, size_t);
+static int     aes_copy_wcs(struct aes *, const wchar_t *wcs);
+static int     aes_copy_wcs_len(struct aes *, const wchar_t *wcs, size_t);
  
  static char *   ae_fflagstostr(unsigned long bitset, unsigned long bitclear);
  static const wchar_t   *ae_wcstofflags(const wchar_t *stringp,
                     unsigned long *setp, unsigned long *clrp);
+static const char      *ae_strtofflags(const char *stringp,
+                   unsigned long *setp, unsigned long *clrp);
  static void    append_entry_w(wchar_t **wp, const wchar_t *prefix, int tag,
                     const wchar_t *wname, int perm, int id);
  static void    append_id_w(wchar_t **wp, int id);
@@ -144,173 +146,216 @@ static size_t wcslen(const wchar_t *s)
  #define wmemcpy(a,b,i)  (wchar_t *)memcpy((a), (b), (i) * sizeof(wchar_t))
  #endif
  
-
  static void
  aes_clean(struct aes *aes)
  {
-       if (aes->aes_mbs_alloc) {
-               free(aes->aes_mbs_alloc);
-               aes->aes_mbs_alloc = NULL;
-       }
-       if (aes->aes_wcs_alloc) {
-               free(aes->aes_wcs_alloc);
-               aes->aes_wcs_alloc = NULL;
+       if (aes->aes_wcs) {
+               free((wchar_t *)(uintptr_t)aes->aes_wcs);
+               aes->aes_wcs = NULL;
         }
-       memset(aes, 0, sizeof(*aes));
+       archive_string_free(&(aes->aes_mbs));
+       archive_string_free(&(aes->aes_utf8));
+       aes->aes_set = 0;
  }
  
  static void
  aes_copy(struct aes *dest, struct aes *src)
  {
-       *dest = *src;
-       if (src->aes_mbs != NULL) {
-               dest->aes_mbs_alloc = strdup(src->aes_mbs);
-               dest->aes_mbs = dest->aes_mbs_alloc;
-               if (dest->aes_mbs == NULL)
-                       __archive_errx(1, "No memory for aes_copy()");
-       }
+       wchar_t *wp;
+
+       dest->aes_set = src->aes_set;
+       archive_string_copy(&(dest->aes_mbs), &(src->aes_mbs));
+       archive_string_copy(&(dest->aes_utf8), &(src->aes_utf8));
  
         if (src->aes_wcs != NULL) {
-               dest->aes_wcs_alloc = (wchar_t *)malloc((wcslen(src->aes_wcs) + 1)
+               wp = (wchar_t *)malloc((wcslen(src->aes_wcs) + 1)
                     * sizeof(wchar_t));
-               dest->aes_wcs = dest->aes_wcs_alloc;
-               if (dest->aes_wcs == NULL)
+               if (wp == NULL)
                         __archive_errx(1, "No memory for aes_copy()");
-               wcscpy(dest->aes_wcs_alloc, src->aes_wcs);
+               wcscpy(wp, src->aes_wcs);
+               dest->aes_wcs = wp;
+       }
+}
+
+static const char *
+aes_get_utf8(struct aes *aes)
+{
+       if (aes->aes_set & AES_SET_UTF8)
+               return (aes->aes_utf8.s);
+       if ((aes->aes_set & AES_SET_WCS)
+           && archive_strappend_w_utf8(&(aes->aes_utf8), aes->aes_wcs) != NULL) {
+               aes->aes_set |= AES_SET_UTF8;
+               return (aes->aes_utf8.s);
         }
+       return (NULL);
  }
  
  static const char *
  aes_get_mbs(struct aes *aes)
  {
-       if (aes->aes_mbs == NULL && aes->aes_wcs == NULL)
-               return NULL;
-       if (aes->aes_mbs == NULL && aes->aes_wcs != NULL) {
-               /*
-                * XXX Need to estimate the number of byte in the
-                * multi-byte form.  Assume that, on average, wcs
-                * chars encode to no more than 3 bytes.  There must
-                * be a better way... XXX
-                */
-               size_t mbs_length = wcslen(aes->aes_wcs) * 3 + 64;
-
-               aes->aes_mbs_alloc = (char *)malloc(mbs_length);
-               aes->aes_mbs = aes->aes_mbs_alloc;
-               if (aes->aes_mbs == NULL)
-                       __archive_errx(1, "No memory for aes_get_mbs()");
-               wcstombs(aes->aes_mbs_alloc, aes->aes_wcs, mbs_length - 1);
-               aes->aes_mbs_alloc[mbs_length - 1] = 0;
+       /* If we already have an MBS form, return that immediately. */
+       if (aes->aes_set & AES_SET_MBS)
+               return (aes->aes_mbs.s);
+       /* If there's a WCS form, try converting with the native locale. */
+       if ((aes->aes_set & AES_SET_WCS)
+           && archive_strappend_w_mbs(&(aes->aes_mbs), aes->aes_wcs) != NULL) {
+               aes->aes_set |= AES_SET_MBS;
+               return (aes->aes_mbs.s);
         }
-       return (aes->aes_mbs);
+       /* We'll use UTF-8 for MBS if all else fails. */
+       return (aes_get_utf8(aes));
  }
  
  static const wchar_t *
  aes_get_wcs(struct aes *aes)
  {
+       wchar_t *w;
         int r;
  
-       if (aes->aes_wcs == NULL && aes->aes_mbs == NULL)
-               return NULL;
-       if (aes->aes_wcs == NULL && aes->aes_mbs != NULL) {
+       /* Return WCS form if we already have it. */
+       if (aes->aes_set & AES_SET_WCS)
+               return (aes->aes_wcs);
+
+       if (aes->aes_set & AES_SET_MBS) {
+               /* Try converting MBS to WCS using native locale. */
                 /*
                  * No single byte will be more than one wide character,
                  * so this length estimate will always be big enough.
                  */
-               size_t wcs_length = strlen(aes->aes_mbs);
+               size_t wcs_length = aes->aes_mbs.length;
  
-               aes->aes_wcs_alloc
-                   = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t));
-               aes->aes_wcs = aes->aes_wcs_alloc;
-               if (aes->aes_wcs == NULL)
+               w = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t));
+               if (w == NULL)
                         __archive_errx(1, "No memory for aes_get_wcs()");
-               r = mbstowcs(aes->aes_wcs_alloc, aes->aes_mbs, wcs_length);
-               aes->aes_wcs_alloc[wcs_length] = 0;
-               if (r == -1) {
-                       /* Conversion failed, don't lie to our clients. */
-                       free(aes->aes_wcs_alloc);
-                       aes->aes_wcs = aes->aes_wcs_alloc = NULL;
+               r = mbstowcs(w, aes->aes_mbs.s, wcs_length);
+               w[wcs_length] = 0;
+               if (r > 0) {
+                       aes->aes_set |= AES_SET_WCS;
+                       return (aes->aes_wcs = w);
                 }
+               free(w);
         }
-       return (aes->aes_wcs);
+
+       if (aes->aes_set & AES_SET_UTF8) {
+               /* Try converting UTF8 to WCS. */
+               aes->aes_wcs = __archive_string_utf8_w(&(aes->aes_utf8));
+               aes->aes_set |= AES_SET_WCS;
+               return (aes->aes_wcs);
+       }
+       return (NULL);
  }
  
-static void
+static int
  aes_set_mbs(struct aes *aes, const char *mbs)
  {
-       if (aes->aes_mbs_alloc) {
-               free(aes->aes_mbs_alloc);
-               aes->aes_mbs_alloc = NULL;
-       }
-       if (aes->aes_wcs_alloc) {
-               free(aes->aes_wcs_alloc);
-               aes->aes_wcs_alloc = NULL;
-       }
-       aes->aes_mbs = mbs;
-       aes->aes_wcs = NULL;
+       return (aes_copy_mbs(aes, mbs));
  }
  
-static void
+static int
  aes_copy_mbs(struct aes *aes, const char *mbs)
  {
-       if (aes->aes_mbs_alloc) {
-               free(aes->aes_mbs_alloc);
-               aes->aes_mbs_alloc = NULL;
+       if (mbs == NULL) {
+               aes->aes_set = 0;
+               return (0);
         }
-       if (aes->aes_wcs_alloc) {
-               free(aes->aes_wcs_alloc);
-               aes->aes_wcs_alloc = NULL;
+       aes->aes_set = AES_SET_MBS; /* Only MBS form is set now. */
+       archive_strcpy(&(aes->aes_mbs), mbs);
+       archive_string_empty(&(aes->aes_utf8));
+       if (aes->aes_wcs) {
+               free((wchar_t *)(uintptr_t)aes->aes_wcs);
+               aes->aes_wcs = NULL;
         }
-       aes->aes_mbs_alloc = (char *)malloc((strlen(mbs) + 1) * sizeof(char));
-       if (aes->aes_mbs_alloc == NULL)
-               __archive_errx(1, "No memory for aes_copy_mbs()");
-       strcpy(aes->aes_mbs_alloc, mbs);
-       aes->aes_mbs = aes->aes_mbs_alloc;
-       aes->aes_wcs = NULL;
+       return (0);
  }
  
-#if 0
-static void
-aes_set_wcs(struct aes *aes, const wchar_t *wcs)
+/*
+ * The 'update' form tries to proactively update all forms of
+ * this string (WCS and MBS) and returns an error if any of
+ * them fail.  This is used by the 'pax' handler, for instance,
+ * to detect and report character-conversion failures early while
+ * still allowing clients to get potentially useful values from
+ * the more tolerant lazy conversions.  (get_mbs and get_wcs will
+ * strive to give the user something useful, so you can get hopefully
+ * usable values even if some of the character conversions are failing.)
+ */
+static int
+aes_update_utf8(struct aes *aes, const char *utf8)
  {
-       if (aes->aes_mbs_alloc) {
-               free(aes->aes_mbs_alloc);
-               aes->aes_mbs_alloc = NULL;
+       if (utf8 == NULL) {
+               aes->aes_set = 0;
+               return (1); /* Succeeded in clearing everything. */
         }
-       if (aes->aes_wcs_alloc) {
-               free(aes->aes_wcs_alloc);
-               aes->aes_wcs_alloc = NULL;
+
+       /* Save the UTF8 string. */
+       archive_strcpy(&(aes->aes_utf8), utf8);
+
+       /* Empty the mbs and wcs strings. */
+       archive_string_empty(&(aes->aes_mbs));
+       if (aes->aes_wcs) {
+               free((wchar_t *)(uintptr_t)aes->aes_wcs);
+               aes->aes_wcs = NULL;
         }
-       aes->aes_mbs = NULL;
-       aes->aes_wcs = wcs;
+
+       aes->aes_set = AES_SET_UTF8;    /* Only UTF8 is set now. */
+
+       /* TODO: We should just do a direct UTF-8 to MBS conversion
+        * here.  That would be faster, use less space, and give the
+        * same information.  (If a UTF-8 to MBS conversion succeeds,
+        * then UTF-8->WCS and Unicode->MBS conversions will both
+        * succeed.) */
+
+       /* Try converting UTF8 to WCS, return false on failure. */
+       aes->aes_wcs = __archive_string_utf8_w(&(aes->aes_utf8));
+       if (aes->aes_wcs == NULL)
+               return (0);
+       aes->aes_set = AES_SET_UTF8 | AES_SET_WCS; /* Both UTF8 and WCS set. */
+
+       /* Try converting WCS to MBS, return false on failure. */
+       if (archive_strappend_w_mbs(&(aes->aes_mbs), aes->aes_wcs) == NULL)
+               return (0);
+       aes->aes_set = AES_SET_UTF8 | AES_SET_WCS | AES_SET_MBS;
+
+       /* All conversions succeeded. */
+       return (1);
  }
-#endif
  
-static void
+static int
  aes_copy_wcs(struct aes *aes, const wchar_t *wcs)
  {
-       aes_copy_wcs_len(aes, wcs, wcslen(wcs));
+       return aes_copy_wcs_len(aes, wcs, wcs == NULL ? 0 : wcslen(wcs));
  }
  
-static void
+static int
  aes_copy_wcs_len(struct aes *aes, const wchar_t *wcs, size_t len)
  {
-       if (aes->aes_mbs_alloc) {
-               free(aes->aes_mbs_alloc);
-               aes->aes_mbs_alloc = NULL;
+       wchar_t *w;
+
+       if (wcs == NULL) {
+               aes->aes_set = 0;
+               return (0);
         }
-       if (aes->aes_wcs_alloc) {
-               free(aes->aes_wcs_alloc);
-               aes->aes_wcs_alloc = NULL;
+       aes->aes_set = AES_SET_WCS; /* Only WCS form set. */
+       archive_string_empty(&(aes->aes_mbs));
+       archive_string_empty(&(aes->aes_utf8));
+       if (aes->aes_wcs) {
+               free((wchar_t *)(uintptr_t)aes->aes_wcs);
+               aes->aes_wcs = NULL;
         }
-       aes->aes_mbs = NULL;
-       aes->aes_wcs_alloc = (wchar_t *)malloc((len + 1) * sizeof(wchar_t));
-       if (aes->aes_wcs_alloc == NULL)
+       w = (wchar_t *)malloc((len + 1) * sizeof(wchar_t));
+       if (w == NULL)
                 __archive_errx(1, "No memory for aes_copy_wcs()");
-       wmemcpy(aes->aes_wcs_alloc, wcs, len);
-       aes->aes_wcs_alloc[len] = L'\0';
-       aes->aes_wcs = aes->aes_wcs_alloc;
+       wmemcpy(w, wcs, len);
+       w[len] = L'\0';
+       aes->aes_wcs = w;
+       return (0);
  }
  
+/****************************************************************************
+ *
+ * Public Interface
+ *
+ ****************************************************************************/
+
  struct archive_entry *
  archive_entry_clear(struct archive_entry *entry)
  {
@@ -350,6 +395,8 @@ archive_entry_clone(struct archive_entry *entry)
         aes_copy(&entry2->ae_hardlink, &entry->ae_hardlink);
         aes_copy(&entry2->ae_pathname, &entry->ae_pathname);
         aes_copy(&entry2->ae_symlink, &entry->ae_symlink);
+       entry2->ae_hardlinkset = entry->ae_hardlinkset;
+       entry2->ae_symlinkset = entry->ae_symlinkset;
         aes_copy(&entry2->ae_uname, &entry->ae_uname);
  
         /* Copy ACL data over. */
@@ -515,12 +562,16 @@ archive_entry_gname_w(struct archive_entry *entry)
  const char *
  archive_entry_hardlink(struct archive_entry *entry)
  {
+       if (!entry->ae_hardlinkset)
+               return (NULL);
         return (aes_get_mbs(&entry->ae_hardlink));
  }
  
  const wchar_t *
  archive_entry_hardlink_w(struct archive_entry *entry)
  {
+       if (!entry->ae_hardlinkset)
+               return (NULL);
         return (aes_get_wcs(&entry->ae_hardlink));
  }
  
@@ -600,15 +651,25 @@ archive_entry_size(struct archive_entry *entry)
         return (entry->ae_stat.aest_size);
  }
  
+const char *
+archive_entry_sourcepath(struct archive_entry *entry)
+{
+       return (aes_get_mbs(&entry->ae_sourcepath));
+}
+
  const char *
  archive_entry_symlink(struct archive_entry *entry)
  {
+       if (!entry->ae_symlinkset)
+               return (NULL);
         return (aes_get_mbs(&entry->ae_symlink));
  }
  
  const wchar_t *
  archive_entry_symlink_w(struct archive_entry *entry)
  {
+       if (!entry->ae_symlinkset)
+               return (NULL);
         return (aes_get_wcs(&entry->ae_symlink));
  }
  
@@ -651,6 +712,15 @@ archive_entry_set_fflags(struct archive_entry *entry,
         entry->ae_fflags_clear = clear;
  }
  
+const char *
+archive_entry_copy_fflags_text(struct archive_entry *entry,
+    const char *flags)
+{
+       aes_copy_mbs(&entry->ae_fflags_text, flags);
+       return (ae_strtofflags(flags,
+                   &entry->ae_fflags_set, &entry->ae_fflags_clear));
+}
+
  const wchar_t *
  archive_entry_copy_fflags_text_w(struct archive_entry *entry,
      const wchar_t *flags)
@@ -685,6 +755,12 @@ archive_entry_copy_gname_w(struct archive_entry *entry, const wchar_t *name)
         aes_copy_wcs(&entry->ae_gname, name);
  }
  
+int
+archive_entry_update_gname_utf8(struct archive_entry *entry, const char *name)
+{
+       return (aes_update_utf8(&entry->ae_gname, name));
+}
+
  void
  archive_entry_set_ino(struct archive_entry *entry, unsigned long ino)
  {
@@ -696,18 +772,24 @@ void
  archive_entry_set_hardlink(struct archive_entry *entry, const char *target)
  {
         aes_set_mbs(&entry->ae_hardlink, target);
+       if (target != NULL)
+               entry->ae_hardlinkset = 1;
  }
  
  void
  archive_entry_copy_hardlink(struct archive_entry *entry, const char *target)
  {
         aes_copy_mbs(&entry->ae_hardlink, target);
+       if (target != NULL)
+               entry->ae_hardlinkset = 1;
  }
  
  void
  archive_entry_copy_hardlink_w(struct archive_entry *entry, const wchar_t *target)
  {
         aes_copy_wcs(&entry->ae_hardlink, target);
+       if (target != NULL)
+               entry->ae_hardlinkset = 1;
  }
  
  void
@@ -754,8 +836,7 @@ archive_entry_set_devminor(struct archive_entry *entry, dev_t m)
  void
  archive_entry_set_link(struct archive_entry *entry, const char *target)
  {
-       if (entry->ae_symlink.aes_mbs != NULL ||
-           entry->ae_symlink.aes_wcs != NULL)
+       if (entry->ae_symlinkset)
                 aes_set_mbs(&entry->ae_symlink, target);
         else
                 aes_set_mbs(&entry->ae_hardlink, target);
@@ -765,8 +846,7 @@ archive_entry_set_link(struct archive_entry *entry, const char *target)
  void
  archive_entry_copy_link(struct archive_entry *entry, const char *target)
  {
-       if (entry->ae_symlink.aes_mbs != NULL ||
-           entry->ae_symlink.aes_wcs != NULL)
+       if (entry->ae_symlinkset)
                 aes_copy_mbs(&entry->ae_symlink, target);
         else
                 aes_copy_mbs(&entry->ae_hardlink, target);
@@ -776,13 +856,21 @@ archive_entry_copy_link(struct archive_entry *entry, const char *target)
  void
  archive_entry_copy_link_w(struct archive_entry *entry, const wchar_t *target)
  {
-       if (entry->ae_symlink.aes_mbs != NULL ||
-           entry->ae_symlink.aes_wcs != NULL)
+       if (entry->ae_symlinkset)
                 aes_copy_wcs(&entry->ae_symlink, target);
         else
                 aes_copy_wcs(&entry->ae_hardlink, target);
  }
  
+int
+archive_entry_update_link_utf8(struct archive_entry *entry, const char *target)
+{
+       if (entry->ae_symlinkset)
+               return (aes_update_utf8(&entry->ae_symlink, target));
+       else
+               return (aes_update_utf8(&entry->ae_hardlink, target));
+}
+
  void
  archive_entry_set_mode(struct archive_entry *entry, mode_t m)
  {
@@ -823,6 +911,12 @@ archive_entry_copy_pathname_w(struct archive_entry *entry, const wchar_t *name)
         aes_copy_wcs(&entry->ae_pathname, name);
  }
  
+int
+archive_entry_update_pathname_utf8(struct archive_entry *entry, const char *name)
+{
+       return (aes_update_utf8(&entry->ae_pathname, name));
+}
+
  void
  archive_entry_set_perm(struct archive_entry *entry, mode_t p)
  {
@@ -862,22 +956,34 @@ archive_entry_set_size(struct archive_entry *entry, int64_t s)
         entry->ae_stat.aest_size = s;
  }
  
+void
+archive_entry_copy_sourcepath(struct archive_entry *entry, const char *path)
+{
+       aes_set_mbs(&entry->ae_sourcepath, path);
+}
+
  void
  archive_entry_set_symlink(struct archive_entry *entry, const char *linkname)
  {
         aes_set_mbs(&entry->ae_symlink, linkname);
+       if (linkname != NULL)
+               entry->ae_symlinkset = 1;
  }
  
  void
  archive_entry_copy_symlink(struct archive_entry *entry, const char *linkname)
  {
         aes_copy_mbs(&entry->ae_symlink, linkname);
+       if (linkname != NULL)
+               entry->ae_symlinkset = 1;
  }
  
  void
  archive_entry_copy_symlink_w(struct archive_entry *entry, const wchar_t *linkname)
  {
         aes_copy_wcs(&entry->ae_symlink, linkname);
+       if (linkname != NULL)
+               entry->ae_symlinkset = 1;
  }
  
  void
@@ -905,6 +1011,12 @@ archive_entry_copy_uname_w(struct archive_entry *entry, const wchar_t *name)
         aes_copy_wcs(&entry->ae_uname, name);
  }
  
+int
+archive_entry_update_uname_utf8(struct archive_entry *entry, const char *name)
+{
+       return (aes_update_utf8(&entry->ae_uname, name));
+}
+
  /*
   * ACL management.  The following would, of course, be a lot simpler
   * if: 1) the last draft of POSIX.1e were a really thorough and
@@ -1744,7 +1856,7 @@ static struct flag {
   *     Convert file flags to a comma-separated string.  If no flags
   *     are set, return the empty string.
   */
-char *
+static char *
  ae_fflagstostr(unsigned long bitset, unsigned long bitclear)
  {
         char *string, *dp;
@@ -1788,6 +1900,70 @@ ae_fflagstostr(unsigned long bitset, unsigned long bitclear)
         return (string);
  }
  
+/*
+ * strtofflags --
+ *     Take string of arguments and return file flags.  This
+ *     version works a little differently than strtofflags(3).
+ *     In particular, it always tests every token, skipping any
+ *     unrecognized tokens.  It returns a pointer to the first
+ *     unrecognized token, or NULL if every token was recognized.
+ *     This version is also const-correct and does not modify the
+ *     provided string.
+ */
+static const char *
+ae_strtofflags(const char *s, unsigned long *setp, unsigned long *clrp)
+{
+       const char *start, *end;
+       struct flag *flag;
+       unsigned long set, clear;
+       const char *failed;
+
+       set = clear = 0;
+       start = s;
+       failed = NULL;
+       /* Find start of first token. */
+       while (*start == '\t'  ||  *start == ' '  ||  *start == ',')
+               start++;
+       while (*start != '\0') {
+               /* Locate end of token. */
+               end = start;
+               while (*end != '\0'  &&  *end != '\t'  &&
+                   *end != ' '  &&  *end != ',')
+                       end++;
+               for (flag = flags; flag->wname != NULL; flag++) {
+                       if (memcmp(start, flag->wname, end - start) == 0) {
+                               /* Matched "noXXXX", so reverse the sense. */
+                               clear |= flag->set;
+                               set |= flag->clear;
+                               break;
+                       } else if (memcmp(start, flag->wname + 2, end - start)
+                           == 0) {
+                               /* Matched "XXXX", so don't reverse. */
+                               set |= flag->set;
+                               clear |= flag->clear;
+                               break;
+                       }
+               }
+               /* Ignore unknown flag names. */
+               if (flag->wname == NULL  &&  failed == NULL)
+                       failed = start;
+
+               /* Find start of next token. */
+               start = end;
+               while (*start == '\t'  ||  *start == ' '  ||  *start == ',')
+                       start++;
+
+       }
+
+       if (setp)
+               *setp = set;
+       if (clrp)
+               *clrp = clear;
+
+       /* Return location of first failure. */
+       return (failed);
+}
+
  /*
   * wcstofflags --
   *     Take string of arguments and return file flags.  This
@@ -1798,7 +1974,7 @@ ae_fflagstostr(unsigned long bitset, unsigned long bitclear)
   *     This version is also const-correct and does not modify the
   *     provided string.
   */
-const wchar_t *
+static const wchar_t *
  ae_wcstofflags(const wchar_t *s, unsigned long *setp, unsigned long *clrp)
  {
         const wchar_t *start, *end;
diff --git a/libarchive/archive_entry.h b/libarchive/archive_entry.h

index 3bfe9e912bcc411d687a8cbecaaa7bc9e6008fe0..5f7771ca79df1ccff87f2d762957e1b5be012e9b 100644 (file)
--- a/libarchive/archive_entry.h
+++ b/libarchive/archive_entry.h
@@ -31,17 +31,56 @@
  #include <sys/types.h>
  #include <stddef.h>  /* for wchar_t */
  #include <time.h>
+
+/* Get appropriate definitions of standard POSIX-style types. */
+/* These should match the types used in 'struct stat' */
+#ifdef _WIN32
+#define        __LA_UID_T      unsigned int
+#define        __LA_GID_T      unsigned int
+#define        __LA_INO_T      unsigned int
+#define        __LA_DEV_T      unsigned int
+#define        __LA_MODE_T     unsigned short
+#else
  #include <unistd.h>
+#define        __LA_UID_T      uid_t
+#define        __LA_GID_T      gid_t
+#define        __LA_INO_T      ino_t
+#define        __LA_DEV_T      dev_t
+#define        __LA_MODE_T     mode_t
+#endif
+
+/*
+ * On Windows, define LIBARCHIVE_STATIC if you're building or using a
+ * .lib.  The default here assumes you're building a DLL.  Only
+ * libarchive source should ever define __LIBARCHIVE_BUILD.
+ */
+#if ((defined __WIN32__) || (defined _WIN32)) && (!defined LIBARCHIVE_STATIC)
+# ifdef __LIBARCHIVE_BUILD
+#  ifdef __GNUC__
+#   define __LA_DECL   __attribute__((dllexport)) extern
+#  else
+#   define __LA_DECL   __declspec(dllexport)
+#  endif
+# else
+#  ifdef __GNUC__
+#   define __LA_DECL   __attribute__((dllimport)) extern
+#  else
+#   define __LA_DECL   __declspec(dllimport)
+#  endif
+# endif
+#else
+/* Static libraries on all platforms and shared libraries on non-Windows. */
+# define __LA_DECL
+#endif
  
  #ifdef __cplusplus
  extern "C" {
  #endif
  
-
  /*
   * Description of an archive entry.
   *
- * Basically, a "struct stat" with a few text fields added in.
+ * You can think of this as "struct stat" with some text fields added in.
   *
   * TODO: Add "comment", "charset", and possibly other entries that are
   * supported by "pax interchange" format.  However, GNU, ustar, cpio,
@@ -90,50 +129,51 @@ struct archive_entry;
   * Basic object manipulation
   */
  
-struct archive_entry   *archive_entry_clear(struct archive_entry *);
+__LA_DECL struct archive_entry *archive_entry_clear(struct archive_entry *);
  /* The 'clone' function does a deep copy; all of the strings are copied too. */
-struct archive_entry   *archive_entry_clone(struct archive_entry *);
-void                    archive_entry_free(struct archive_entry *);
-struct archive_entry   *archive_entry_new(void);
+__LA_DECL struct archive_entry *archive_entry_clone(struct archive_entry *);
+__LA_DECL void                  archive_entry_free(struct archive_entry *);
+__LA_DECL struct archive_entry *archive_entry_new(void);
  
  /*
   * Retrieve fields from an archive_entry.
   */
  
-time_t                  archive_entry_atime(struct archive_entry *);
-long                    archive_entry_atime_nsec(struct archive_entry *);
-time_t                  archive_entry_ctime(struct archive_entry *);
-long                    archive_entry_ctime_nsec(struct archive_entry *);
-dev_t                   archive_entry_dev(struct archive_entry *);
-dev_t                   archive_entry_devmajor(struct archive_entry *);
-dev_t                   archive_entry_devminor(struct archive_entry *);
-mode_t                  archive_entry_filetype(struct archive_entry *);
-void                    archive_entry_fflags(struct archive_entry *,
+__LA_DECL time_t        archive_entry_atime(struct archive_entry *);
+__LA_DECL long          archive_entry_atime_nsec(struct archive_entry *);
+__LA_DECL time_t        archive_entry_ctime(struct archive_entry *);
+__LA_DECL long          archive_entry_ctime_nsec(struct archive_entry *);
+__LA_DECL dev_t                 archive_entry_dev(struct archive_entry *);
+__LA_DECL dev_t                 archive_entry_devmajor(struct archive_entry *);
+__LA_DECL dev_t                 archive_entry_devminor(struct archive_entry *);
+__LA_DECL __LA_MODE_T   archive_entry_filetype(struct archive_entry *);
+__LA_DECL void          archive_entry_fflags(struct archive_entry *,
                             unsigned long * /* set */,
                             unsigned long * /* clear */);
-const char             *archive_entry_fflags_text(struct archive_entry *);
-gid_t                   archive_entry_gid(struct archive_entry *);
-const char             *archive_entry_gname(struct archive_entry *);
-const wchar_t          *archive_entry_gname_w(struct archive_entry *);
-const char             *archive_entry_hardlink(struct archive_entry *);
-const wchar_t          *archive_entry_hardlink_w(struct archive_entry *);
-ino_t                   archive_entry_ino(struct archive_entry *);
-mode_t                  archive_entry_mode(struct archive_entry *);
-time_t                  archive_entry_mtime(struct archive_entry *);
-long                    archive_entry_mtime_nsec(struct archive_entry *);
-unsigned int            archive_entry_nlink(struct archive_entry *);
-const char             *archive_entry_pathname(struct archive_entry *);
-const wchar_t          *archive_entry_pathname_w(struct archive_entry *);
-dev_t                   archive_entry_rdev(struct archive_entry *);
-dev_t                   archive_entry_rdevmajor(struct archive_entry *);
-dev_t                   archive_entry_rdevminor(struct archive_entry *);
-int64_t                         archive_entry_size(struct archive_entry *);
-const char             *archive_entry_strmode(struct archive_entry *);
-const char             *archive_entry_symlink(struct archive_entry *);
-const wchar_t          *archive_entry_symlink_w(struct archive_entry *);
-uid_t                   archive_entry_uid(struct archive_entry *);
-const char             *archive_entry_uname(struct archive_entry *);
-const wchar_t          *archive_entry_uname_w(struct archive_entry *);
+__LA_DECL const char   *archive_entry_fflags_text(struct archive_entry *);
+__LA_DECL __LA_GID_T    archive_entry_gid(struct archive_entry *);
+__LA_DECL const char   *archive_entry_gname(struct archive_entry *);
+__LA_DECL const wchar_t        *archive_entry_gname_w(struct archive_entry *);
+__LA_DECL const char   *archive_entry_hardlink(struct archive_entry *);
+__LA_DECL const wchar_t        *archive_entry_hardlink_w(struct archive_entry *);
+__LA_DECL __LA_INO_T    archive_entry_ino(struct archive_entry *);
+__LA_DECL __LA_MODE_T   archive_entry_mode(struct archive_entry *);
+__LA_DECL time_t        archive_entry_mtime(struct archive_entry *);
+__LA_DECL long          archive_entry_mtime_nsec(struct archive_entry *);
+__LA_DECL unsigned int  archive_entry_nlink(struct archive_entry *);
+__LA_DECL const char   *archive_entry_pathname(struct archive_entry *);
+__LA_DECL const wchar_t        *archive_entry_pathname_w(struct archive_entry *);
+__LA_DECL dev_t                 archive_entry_rdev(struct archive_entry *);
+__LA_DECL dev_t                 archive_entry_rdevmajor(struct archive_entry *);
+__LA_DECL dev_t                 archive_entry_rdevminor(struct archive_entry *);
+__LA_DECL const char   *archive_entry_sourcepath(struct archive_entry *);
+__LA_DECL int64_t       archive_entry_size(struct archive_entry *);
+__LA_DECL const char   *archive_entry_strmode(struct archive_entry *);
+__LA_DECL const char   *archive_entry_symlink(struct archive_entry *);
+__LA_DECL const wchar_t        *archive_entry_symlink_w(struct archive_entry *);
+__LA_DECL __LA_UID_T    archive_entry_uid(struct archive_entry *);
+__LA_DECL const char   *archive_entry_uname(struct archive_entry *);
+__LA_DECL const wchar_t        *archive_entry_uname_w(struct archive_entry *);
  
  /*
   * Set fields in an archive_entry.
@@ -142,48 +182,54 @@ const wchar_t             *archive_entry_uname_w(struct archive_entry *);
   * In contrast, 'copy' functions do copy the object pointed to.
   */
  
-void   archive_entry_set_atime(struct archive_entry *, time_t, long);
-void   archive_entry_set_ctime(struct archive_entry *, time_t, long);
-void   archive_entry_set_dev(struct archive_entry *, dev_t);
-void   archive_entry_set_devmajor(struct archive_entry *, dev_t);
-void   archive_entry_set_devminor(struct archive_entry *, dev_t);
-void   archive_entry_set_filetype(struct archive_entry *, unsigned int);
-void   archive_entry_set_fflags(struct archive_entry *,
+__LA_DECL void archive_entry_set_atime(struct archive_entry *, time_t, long);
+__LA_DECL void archive_entry_set_ctime(struct archive_entry *, time_t, long);
+__LA_DECL void archive_entry_set_dev(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_devmajor(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_devminor(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_filetype(struct archive_entry *, unsigned int);
+__LA_DECL void archive_entry_set_fflags(struct archive_entry *,
             unsigned long /* set */, unsigned long /* clear */);
  /* Returns pointer to start of first invalid token, or NULL if none. */
  /* Note that all recognized tokens are processed, regardless. */
-const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *,
+__LA_DECL const char *archive_entry_copy_fflags_text(struct archive_entry *,
+           const char *);
+__LA_DECL const wchar_t *archive_entry_copy_fflags_text_w(struct archive_entry *,
             const wchar_t *);
-void   archive_entry_set_gid(struct archive_entry *, gid_t);
-void   archive_entry_set_gname(struct archive_entry *, const char *);
-void   archive_entry_copy_gname(struct archive_entry *, const char *);
-void   archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *);
-void   archive_entry_set_hardlink(struct archive_entry *, const char *);
-void   archive_entry_copy_hardlink(struct archive_entry *, const char *);
-void   archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *);
-void   archive_entry_set_ino(struct archive_entry *, unsigned long);
-void   archive_entry_set_link(struct archive_entry *, const char *);
-void   archive_entry_copy_link(struct archive_entry *, const char *);
-void   archive_entry_copy_link_w(struct archive_entry *, const wchar_t *);
-void   archive_entry_set_mode(struct archive_entry *, mode_t);
-void   archive_entry_set_mtime(struct archive_entry *, time_t, long);
-void   archive_entry_set_nlink(struct archive_entry *, unsigned int);
-void   archive_entry_set_pathname(struct archive_entry *, const char *);
-void   archive_entry_copy_pathname(struct archive_entry *, const char *);
-void   archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *);
-void   archive_entry_set_perm(struct archive_entry *, mode_t);
-void   archive_entry_set_rdev(struct archive_entry *, dev_t);
-void   archive_entry_set_rdevmajor(struct archive_entry *, dev_t);
-void   archive_entry_set_rdevminor(struct archive_entry *, dev_t);
-void   archive_entry_set_size(struct archive_entry *, int64_t);
-void   archive_entry_set_symlink(struct archive_entry *, const char *);
-void   archive_entry_copy_symlink(struct archive_entry *, const char *);
-void   archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *);
-void   archive_entry_set_uid(struct archive_entry *, uid_t);
-void   archive_entry_set_uname(struct archive_entry *, const char *);
-void   archive_entry_copy_uname(struct archive_entry *, const char *);
-void   archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
-
+__LA_DECL void archive_entry_set_gid(struct archive_entry *, __LA_GID_T);
+__LA_DECL void archive_entry_set_gname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_gname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_gname_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int  archive_entry_update_gname_utf8(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_set_hardlink(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_hardlink(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_hardlink_w(struct archive_entry *, const wchar_t *);
+__LA_DECL void archive_entry_set_ino(struct archive_entry *, unsigned long);
+__LA_DECL void archive_entry_set_link(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_link(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_link_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int  archive_entry_update_link_utf8(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_set_mode(struct archive_entry *, __LA_MODE_T);
+__LA_DECL void archive_entry_set_mtime(struct archive_entry *, time_t, long);
+__LA_DECL void archive_entry_set_nlink(struct archive_entry *, unsigned int);
+__LA_DECL void archive_entry_set_pathname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_pathname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_pathname_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int  archive_entry_update_pathname_utf8(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_set_perm(struct archive_entry *, __LA_MODE_T);
+__LA_DECL void archive_entry_set_rdev(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_rdevmajor(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_rdevminor(struct archive_entry *, dev_t);
+__LA_DECL void archive_entry_set_size(struct archive_entry *, int64_t);
+__LA_DECL void archive_entry_copy_sourcepath(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_set_symlink(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_symlink(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_symlink_w(struct archive_entry *, const wchar_t *);
+__LA_DECL void archive_entry_set_uid(struct archive_entry *, __LA_UID_T);
+__LA_DECL void archive_entry_set_uname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_uname(struct archive_entry *, const char *);
+__LA_DECL void archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
+__LA_DECL int  archive_entry_update_uname_utf8(struct archive_entry *, const char *);
  /*
   * Routines to bulk copy fields to/from a platform-native "struct
   * stat."  Libarchive used to just store a struct stat inside of each
@@ -193,8 +239,8 @@ void        archive_entry_copy_uname_w(struct archive_entry *, const wchar_t *);
   *
   * TODO: On Linux, provide both stat32 and stat64 versions of these functions.
   */
-const struct stat      *archive_entry_stat(struct archive_entry *);
-void   archive_entry_copy_stat(struct archive_entry *, const struct stat *);
+__LA_DECL const struct stat    *archive_entry_stat(struct archive_entry *);
+__LA_DECL void archive_entry_copy_stat(struct archive_entry *, const struct stat *);
  
  /*
   * ACL routines.  This used to simply store and return text-format ACL
@@ -242,11 +288,11 @@ void      archive_entry_copy_stat(struct archive_entry *, const struct stat *);
   * POSIX.1e) is useful for handling archive formats that combine
   * default and access information in a single ACL list.
   */
-void    archive_entry_acl_clear(struct archive_entry *);
-void    archive_entry_acl_add_entry(struct archive_entry *,
+__LA_DECL void  archive_entry_acl_clear(struct archive_entry *);
+__LA_DECL void  archive_entry_acl_add_entry(struct archive_entry *,
             int /* type */, int /* permset */, int /* tag */,
             int /* qual */, const char * /* name */);
-void    archive_entry_acl_add_entry_w(struct archive_entry *,
+__LA_DECL void  archive_entry_acl_add_entry_w(struct archive_entry *,
             int /* type */, int /* permset */, int /* tag */,
             int /* qual */, const wchar_t * /* name */);
  
@@ -255,11 +301,11 @@ void       archive_entry_acl_add_entry_w(struct archive_entry *,
   * "next" entry.  The want_type parameter allows you to request only
   * access entries or only default entries.
   */
-int     archive_entry_acl_reset(struct archive_entry *, int /* want_type */);
-int     archive_entry_acl_next(struct archive_entry *, int /* want_type */,
+__LA_DECL int   archive_entry_acl_reset(struct archive_entry *, int /* want_type */);
+__LA_DECL int   archive_entry_acl_next(struct archive_entry *, int /* want_type */,
             int * /* type */, int * /* permset */, int * /* tag */,
             int * /* qual */, const char ** /* name */);
-int     archive_entry_acl_next_w(struct archive_entry *, int /* want_type */,
+__LA_DECL int   archive_entry_acl_next_w(struct archive_entry *, int /* want_type */,
             int * /* type */, int * /* permset */, int * /* tag */,
             int * /* qual */, const wchar_t ** /* name */);
  
@@ -276,11 +322,11 @@ int        archive_entry_acl_next_w(struct archive_entry *, int /* want_type */,
   */
  #define        ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID        1024
  #define        ARCHIVE_ENTRY_ACL_STYLE_MARK_DEFAULT    2048
-const wchar_t  *archive_entry_acl_text_w(struct archive_entry *,
+__LA_DECL const wchar_t        *archive_entry_acl_text_w(struct archive_entry *,
                     int /* flags */);
  
  /* Return a count of entries matching 'want_type' */
-int     archive_entry_acl_count(struct archive_entry *, int /* want_type */);
+__LA_DECL int   archive_entry_acl_count(struct archive_entry *, int /* want_type */);
  
  /*
   * Private ACL parser.  This is private because it handles some
@@ -295,15 +341,15 @@ int        archive_entry_acl_count(struct archive_entry *, int /* want_type */);
   * TODO: Move this declaration out of the public header and into
   * a private header.  Warnings above are silly.
   */
-int             __archive_entry_acl_parse_w(struct archive_entry *,
+__LA_DECL int           __archive_entry_acl_parse_w(struct archive_entry *,
                     const wchar_t *, int /* type */);
  
  /*
   * extended attributes
   */
  
-void    archive_entry_xattr_clear(struct archive_entry *);
-void    archive_entry_xattr_add_entry(struct archive_entry *,
+__LA_DECL void  archive_entry_xattr_clear(struct archive_entry *);
+__LA_DECL void  archive_entry_xattr_add_entry(struct archive_entry *,
             const char * /* name */, const void * /* value */,
             size_t /* size */);
  
@@ -312,37 +358,93 @@ void       archive_entry_xattr_add_entry(struct archive_entry *,
   * "next" entry.
   */
  
-int    archive_entry_xattr_count(struct archive_entry *);
-int    archive_entry_xattr_reset(struct archive_entry *);
-int    archive_entry_xattr_next(struct archive_entry *,
+__LA_DECL int  archive_entry_xattr_count(struct archive_entry *);
+__LA_DECL int  archive_entry_xattr_reset(struct archive_entry *);
+__LA_DECL int  archive_entry_xattr_next(struct archive_entry *,
             const char ** /* name */, const void ** /* value */, size_t *);
  
  /*
- * Utility to detect hardlinks.
+ * Utility to match up hardlinks.
   *
- * The 'struct archive_hardlink_lookup' is a cache of entry
- * names and dev/ino numbers.  Here's how to use it:
- *   1. Create a lookup object with archive_hardlink_lookup_new()
- *   2. Hand each archive_entry to archive_hardlink_lookup().
- *      That function will return NULL (this is not a hardlink to
- *      a previous entry) or the pathname of the first entry
- *      that matched this.
- *   3. Use archive_hardlink_lookup_free() to release the cache.
+ * The 'struct archive_entry_linkresolver' is a cache of archive entries
+ * for files with multiple links.  Here's how to use it:
+ *   1. Create a lookup object with archive_entry_linkresolver_new()
+ *   2. Tell it the archive format you're using.
+ *   3. Hand each archive_entry to archive_entry_linkify().
+ *      That function will return 0, 1, or 2 entries that should
+ *      be written.
+ *   4. Call archive_entry_linkify(resolver, NULL) until
+ *      no more entries are returned.
+ *   5. Call archive_entry_link_resolver_free(resolver) to free resources.
+ *
+ * The entries returned have their hardlink and size fields updated
+ * appropriately.  If an entry is passed in that does not refer to
+ * a file with multiple links, it is returned unchanged.  The intention
+ * is that you should be able to simply filter all entries through
+ * this machine.
   *
   * To make things more efficient, be sure that each entry has a valid
   * nlinks value.  The hardlink cache uses this to track when all links
   * have been found.  If the nlinks value is zero, it will keep every
   * name in the cache indefinitely, which can use a lot of memory.
+ *
+ * Note that archive_entry_size() is reset to zero if the file
+ * body should not be written to the archive.  Pay attention!
   */
-struct archive_entry_linkresolver;
+__LA_DECL struct archive_entry_linkresolver;
  
-struct archive_entry_linkresolver *archive_entry_linkresolver_new(void);
-void archive_entry_linkresolver_free(struct archive_entry_linkresolver *);
-const char *archive_entry_linkresolve(struct archive_entry_linkresolver *,
-    struct archive_entry *);
+/*
+ * There are three different strategies for marking hardlinks.
+ * The descriptions below name them after the best-known
+ * formats that rely on each strategy:
+ *
+ * "Old cpio" is the simplest, it always returns any entry unmodified.
+ *    As far as I know, only cpio formats use this.  Old cpio archives
+ *    store every link with the full body; the onus is on the dearchiver
+ *    to detect and properly link the files as they are restored.
+ * "tar" is also pretty simple; it caches a copy the first time it sees
+ *    any link.  Subsequent appearances are modified to be hardlink
+ *    references to the first one without any body.  Used by all tar
+ *    formats, although the newest tar formats permit the "old cpio" strategy
+ *    as well.  This strategy is very simple for the dearchiver,
+ *    and reasonably straightforward for the archiver.
+ * "new cpio" is trickier.  It stores the body only with the last
+ *    occurrence.  The complication is that we might not
+ *    see every link to a particular file in a single session, so
+ *    there's no easy way to know when we've seen the last occurrence.
+ *    The solution here is to queue one link until we see the next.
+ *    At the end of the session, you can enumerate any remaining
+ *    entries by calling archive_entry_linkify(NULL) and store those
+ *    bodies.  If you have a file with three links l1, l2, and l3,
+ *    you'll get the following behavior if you see all three links:
+ *           linkify(l1) => NULL   (the resolver stores l1 internally)
+ *           linkify(l2) => l1     (resolver stores l2, you write l1)
+ *           linkify(l3) => l2, l3 (all links seen, you can write both).
+ *    If you only see l1 and l2, you'll get this behavior:
+ *           linkify(l1) => NULL
+ *           linkify(l2) => l1
+ *           linkify(NULL) => l2   (at end, you retrieve remaining links)
+ *    As the name suggests, this strategy is used by newer cpio variants.
+ *    It's noticably more complex for the archiver, slightly more complex
+ *    for the dearchiver than the tar strategy, but makes it straightforward
+ *    to restore a file using any link by simply continuing to scan until
+ *    you see a link that is stored with a body.  In contrast, the tar
+ *    strategy requires you to rescan the archive from the beginning to
+ *    correctly extract an arbitrary link.
+ */
+
+__LA_DECL struct archive_entry_linkresolver *archive_entry_linkresolver_new(void);
+__LA_DECL void archive_entry_linkresolver_set_strategy(
+       struct archive_entry_linkresolver *, int /* format_code */);
+__LA_DECL void archive_entry_linkresolver_free(struct archive_entry_linkresolver *);
+__LA_DECL void archive_entry_linkify(struct archive_entry_linkresolver *,
+    struct archive_entry **, struct archive_entry **);
  
  #ifdef __cplusplus
  }
  #endif
  
+/* This is meaningless outside of this header. */
+#undef __LA_DECL
+
  #endif /* !ARCHIVE_ENTRY_H_INCLUDED */
diff --git a/libarchive/archive_entry_link_resolver.c b/libarchive/archive_entry_link_resolver.c

index 78a3c65d01821e47612607353bacfaa05b0f58ff..0df9ff92e6855d586ad61c82621d8212a6719a04 100644 (file)
--- a/libarchive/archive_entry_link_resolver.c
+++ b/libarchive/archive_entry_link_resolver.c
@@ -40,135 +40,216 @@ __FBSDID("$FreeBSD: src/lib/libarchive/archive_entry_link_resolver.c,v 1.1 2007/
  #include <string.h>
  #endif
  
+#include "archive.h"
  #include "archive_entry.h"
  
+/*
+ * This is mostly a pretty straightforward hash table implementation.
+ * The only interesting bit is the different strategies used to
+ * match up links.  These strategies match those used by various
+ * archiving formats:
+ *   tar - content stored with first link, remainder refer back to it.
+ *       This requires us to match each subsequent link up with the
+ *       first appearance.
+ *   cpio - Old cpio just stored body with each link, match-ups were
+ *       implicit.  This is trivial.
+ *   new cpio - New cpio only stores body with last link, match-ups
+ *       are implicit.  This is actually quite tricky; see the notes
+ *       below.
+ */
+
+/* Users pass us a format code, we translate that into a strategy here. */
+#define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
+#define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 1
+#define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 2
+
  /* Initial size of link cache. */
  #define        links_cache_initial_size 1024
  
+struct links_entry {
+       struct links_entry      *next;
+       struct links_entry      *previous;
+       int                      links; /* # links not yet seen */
+       int                      hash;
+       struct archive_entry    *canonical;
+       struct archive_entry    *entry;
+};
+
  struct archive_entry_linkresolver {
-       char                     *last_name;
+       struct links_entry      **buckets;
+       struct links_entry       *spare;
         unsigned long             number_entries;
         size_t                    number_buckets;
-       struct links_entry      **buckets;
+       int                       strategy;
  };
  
-struct links_entry {
-       struct links_entry      *next;
-       struct links_entry      *previous;
-       int                      links;
-       dev_t                    dev;
-       ino_t                    ino;
-       char                    *name;
-};
+static struct links_entry *find_entry(struct archive_entry_linkresolver *,
+                   struct archive_entry *);
+static void grow_hash(struct archive_entry_linkresolver *);
+static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
+                   struct archive_entry *);
+static struct links_entry *next_entry(struct archive_entry_linkresolver *);
  
  struct archive_entry_linkresolver *
  archive_entry_linkresolver_new(void)
  {
-       struct archive_entry_linkresolver *links_cache;
+       struct archive_entry_linkresolver *res;
         size_t i;
  
-       links_cache = malloc(sizeof(struct archive_entry_linkresolver));
-       if (links_cache == NULL)
+       res = malloc(sizeof(struct archive_entry_linkresolver));
+       if (res == NULL)
                 return (NULL);
-       memset(links_cache, 0, sizeof(struct archive_entry_linkresolver));
-       links_cache->number_buckets = links_cache_initial_size;
-       links_cache->buckets = malloc(links_cache->number_buckets *
-           sizeof(links_cache->buckets[0]));
-       if (links_cache->buckets == NULL) {
-               free(links_cache);
+       memset(res, 0, sizeof(struct archive_entry_linkresolver));
+       res->number_buckets = links_cache_initial_size;
+       res->buckets = malloc(res->number_buckets *
+           sizeof(res->buckets[0]));
+       if (res->buckets == NULL) {
+               free(res);
                 return (NULL);
         }
-       for (i = 0; i < links_cache->number_buckets; i++)
-               links_cache->buckets[i] = NULL;
-       return (links_cache);
+       for (i = 0; i < res->number_buckets; i++)
+               res->buckets[i] = NULL;
+       return (res);
  }
  
  void
-archive_entry_linkresolver_free(struct archive_entry_linkresolver *links_cache)
+archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
+    int fmt)
  {
-       size_t i;
+       int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
+
+       switch (fmtbase) {
+       case ARCHIVE_FORMAT_CPIO:
+               switch (fmt) {
+               case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
+               case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
+                       res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
+                       break;
+               default:
+                       res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
+                       break;
+               }
+               break;
+       case ARCHIVE_FORMAT_TAR:
+               res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
+               break;
+       default:
+               res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
+               break;
+       }
+}
+
+void
+archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
+{
+       struct links_entry *le;
+
+       if (res->buckets != NULL) {
+               while ((le = next_entry(res)) != NULL)
+                       archive_entry_free(le->entry);
+               free(res->buckets);
+               res->buckets = NULL;
+       }
+       free(res);
+}
+
+void
+archive_entry_linkify(struct archive_entry_linkresolver *res,
+    struct archive_entry **e, struct archive_entry **f)
+{
+       struct links_entry *le;
+       struct archive_entry *t;
+
+       *f = NULL; /* Default: Don't return a second entry. */
+
+       if (*e == NULL) {
+               le = next_entry(res);
+               if (le != NULL)
+                       *e = le->entry;
+               return;
+       }
  
-       if (links_cache->buckets == NULL)
+       /* If it has only one link, then we're done. */
+       if (archive_entry_nlink(*e) == 1)
                 return;
  
-       for (i = 0; i < links_cache->number_buckets; i++) {
-               while (links_cache->buckets[i] != NULL) {
-                       struct links_entry *lp = links_cache->buckets[i]->next;
-                       if (links_cache->buckets[i]->name != NULL)
-                               free(links_cache->buckets[i]->name);
-                       free(links_cache->buckets[i]);
-                       links_cache->buckets[i] = lp;
+       switch (res->strategy) {
+       case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
+               le = find_entry(res, *e);
+               if (le != NULL) {
+                       archive_entry_set_size(*e, 0);
+                       archive_entry_set_hardlink(*e,
+                           archive_entry_pathname(le->canonical));
+               } else
+                       insert_entry(res, *e);
+               return;
+       case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
+               /* This one is trivial. */
+               return;
+       case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
+               le = find_entry(res, *e);
+               if (le != NULL) {
+                       /*
+                        * Put the new entry in le, return the
+                        * old entry from le.
+                        */
+                       t = *e;
+                       *e = le->entry;
+                       le->entry = t;
+                       /* Make the old entry into a hardlink. */
+                       archive_entry_set_size(*e, 0);
+                       archive_entry_set_hardlink(*e,
+                           archive_entry_pathname(le->canonical));
+                       /* If we ran out of links, return the
+                        * final entry as well. */
+                       if (le->links == 0)
+                               *f = le->entry;
+               } else {
+                       /*
+                        * If we haven't seen it, tuck it away
+                        * for future use.
+                        */
+                       le = insert_entry(res, *e);
+                       le->entry = *e;
+                       *e = NULL;
                 }
+               return;
+       default:
+               break;
         }
-       free(links_cache->buckets);
-       links_cache->buckets = NULL;
+       return;
  }
  
-const char *
-archive_entry_linkresolve(struct archive_entry_linkresolver *links_cache,
+static struct links_entry *
+find_entry(struct archive_entry_linkresolver *res,
      struct archive_entry *entry)
  {
-       struct links_entry      *le, **new_buckets;
-       int                      hash;
-       size_t                   i, new_size;
+       struct links_entry      *le;
+       int                      hash, bucket;
         dev_t                    dev;
         ino_t                    ino;
-       int                      nlinks;
-
  
-       /* Free a held name. */
-       free(links_cache->last_name);
-       links_cache->last_name = NULL;
+       /* Free a held entry. */
+       if (res->spare != NULL) {
+               archive_entry_free(res->spare->canonical);
+               free(res->spare);
+               res->spare = NULL;
+       }
  
         /* If the links cache overflowed and got flushed, don't bother. */
-       if (links_cache->buckets == NULL)
+       if (res->buckets == NULL)
                 return (NULL);
  
         dev = archive_entry_dev(entry);
         ino = archive_entry_ino(entry);
-       nlinks = archive_entry_nlink(entry);
-
-       /* An entry with one link can't be a hard link. */
-       if (nlinks == 1)
-               return (NULL);
-
-       /* If the links cache is getting too full, enlarge the hash table. */
-       if (links_cache->number_entries > links_cache->number_buckets * 2)
-       {
-               /* Try to enlarge the bucket list. */
-               new_size = links_cache->number_buckets * 2;
-               new_buckets = malloc(new_size * sizeof(struct links_entry *));
-
-               if (new_buckets != NULL) {
-                       memset(new_buckets, 0,
-                           new_size * sizeof(struct links_entry *));
-                       for (i = 0; i < links_cache->number_buckets; i++) {
-                               while (links_cache->buckets[i] != NULL) {
-                                       /* Remove entry from old bucket. */
-                                       le = links_cache->buckets[i];
-                                       links_cache->buckets[i] = le->next;
-
-                                       /* Add entry to new bucket. */
-                                       hash = (le->dev ^ le->ino) % new_size;
-
-                                       if (new_buckets[hash] != NULL)
-                                               new_buckets[hash]->previous =
-                                                   le;
-                                       le->next = new_buckets[hash];
-                                       le->previous = NULL;
-                                       new_buckets[hash] = le;
-                               }
-                       }
-                       free(links_cache->buckets);
-                       links_cache->buckets = new_buckets;
-                       links_cache->number_buckets = new_size;
-               }
-       }
+       hash = dev ^ ino;
  
         /* Try to locate this entry in the links cache. */
-       hash = ( dev ^ ino ) % links_cache->number_buckets;
-       for (le = links_cache->buckets[hash]; le != NULL; le = le->next) {
-               if (le->dev == dev && le->ino == ino) {
+       bucket = hash % res->number_buckets;
+       for (le = res->buckets[bucket]; le != NULL; le = le->next) {
+               if (le->hash == hash
+                   && dev == archive_entry_dev(le->entry)
+                   && ino == archive_entry_ino(le->entry)) {
                         /*
                          * Decrement link count each time and release
                          * the entry if it hits zero.  This saves
@@ -177,46 +258,123 @@ archive_entry_linkresolve(struct archive_entry_linkresolver *links_cache,
                          */
                         --le->links;
                         if (le->links > 0)
-                               return (le->name);
-                       /*
-                        * When we release the entry, save the name
-                        * until the next call.
-                        */
-                       links_cache->last_name = le->name;
-                       /*
-                        * Release the entry.
-                        */
+                               return (le);
+                       /* Remove it from this hash bucket. */
                         if (le->previous != NULL)
                                 le->previous->next = le->next;
                         if (le->next != NULL)
                                 le->next->previous = le->previous;
-                       if (links_cache->buckets[hash] == le)
-                               links_cache->buckets[hash] = le->next;
-                       links_cache->number_entries--;
-                       free(le);
-                       return (links_cache->last_name);
+                       if (res->buckets[bucket] == le)
+                               res->buckets[bucket] = le->next;
+                       res->number_entries--;
+                       /* Defer freeing this entry. */
+                       res->spare = le;
+                       return (le);
                 }
         }
+       return (NULL);
+}
+
+static struct links_entry *
+next_entry(struct archive_entry_linkresolver *res)
+{
+       struct links_entry      *le;
+       size_t                   bucket;
+
+       /* Free a held entry. */
+       if (res->spare != NULL) {
+               archive_entry_free(res->spare->canonical);
+               free(res->spare);
+               res->spare = NULL;
+       }
+
+       /* If the links cache overflowed and got flushed, don't bother. */
+       if (res->buckets == NULL)
+               return (NULL);
+
+       /* Look for next non-empty bucket in the links cache. */
+       for (bucket = 0; bucket < res->number_buckets; bucket++) {
+               le = res->buckets[bucket];
+               if (le != NULL) {
+                       /* Remove it from this hash bucket. */
+                       if (le->next != NULL)
+                               le->next->previous = le->previous;
+                       res->buckets[bucket] = le->next;
+                       res->number_entries--;
+                       /* Defer freeing this entry. */
+                       res->spare = le;
+                       return (le);
+               }
+       }
+       return (NULL);
+}
+
+static struct links_entry *
+insert_entry(struct archive_entry_linkresolver *res,
+    struct archive_entry *entry)
+{
+       struct links_entry *le;
+       int                      hash, bucket;
  
         /* Add this entry to the links cache. */
         le = malloc(sizeof(struct links_entry));
         if (le == NULL)
                 return (NULL);
-       le->name = strdup(archive_entry_pathname(entry));
-       if (le->name == NULL) {
-               free(le);
-               return (NULL);
-       }
+       le->entry = entry;
+
+       /* If the links cache is getting too full, enlarge the hash table. */
+       if (res->number_entries > res->number_buckets * 2)
+               grow_hash(res);
+
+       hash = archive_entry_dev(entry) ^ archive_entry_ino(entry);
+       bucket = hash % res->number_buckets;
  
         /* If we could allocate the entry, record it. */
-       if (links_cache->buckets[hash] != NULL)
-               links_cache->buckets[hash]->previous = le;
-       links_cache->number_entries++;
-       le->next = links_cache->buckets[hash];
+       if (res->buckets[bucket] != NULL)
+               res->buckets[bucket]->previous = le;
+       res->number_entries++;
+       le->next = res->buckets[bucket];
         le->previous = NULL;
-       links_cache->buckets[hash] = le;
-       le->dev = dev;
-       le->ino = ino;
-       le->links = nlinks - 1;
-       return (NULL);
+       res->buckets[bucket] = le;
+       le->hash = hash;
+       le->links = archive_entry_nlink(entry) - 1;
+       le->canonical = archive_entry_clone(entry);
+       return (le);
+}
+
+static void
+grow_hash(struct archive_entry_linkresolver *res)
+{
+       struct links_entry *le, **new_buckets;
+       size_t new_size;
+       size_t i, bucket;
+
+       /* Try to enlarge the bucket list. */
+       new_size = res->number_buckets * 2;
+       new_buckets = malloc(new_size * sizeof(struct links_entry *));
+
+       if (new_buckets != NULL) {
+               memset(new_buckets, 0,
+                   new_size * sizeof(struct links_entry *));
+               for (i = 0; i < res->number_buckets; i++) {
+                       while (res->buckets[i] != NULL) {
+                               /* Remove entry from old bucket. */
+                               le = res->buckets[i];
+                               res->buckets[i] = le->next;
+
+                               /* Add entry to new bucket. */
+                               bucket = le->hash % new_size;
+
+                               if (new_buckets[bucket] != NULL)
+                                       new_buckets[bucket]->previous =
+                                           le;
+                               le->next = new_buckets[bucket];
+                               le->previous = NULL;
+                               new_buckets[bucket] = le;
+                       }
+               }
+               free(res->buckets);
+               res->buckets = new_buckets;
+               res->number_buckets = new_size;
+       }
  }
diff --git a/libarchive/archive_entry_private.h b/libarchive/archive_entry_private.h

index 0d368a4dd186feaef4d42fd583dac2dd7d7ae81c..f893fb982aeb5b1e1844b09fad2c528aca44a2c9 100644 (file)
--- a/libarchive/archive_entry_private.h
+++ b/libarchive/archive_entry_private.h
@@ -28,17 +28,25 @@
  #ifndef ARCHIVE_ENTRY_PRIVATE_H_INCLUDED
  #define        ARCHIVE_ENTRY_PRIVATE_H_INCLUDED
  
+#include "archive_string.h"
+
  /*
   * Handle wide character (i.e., Unicode) and non-wide character
   * strings transparently.
- *
   */
  
  struct aes {
-       const char *aes_mbs;
-       char *aes_mbs_alloc;
+       struct archive_string aes_mbs;
+       struct archive_string aes_utf8;
         const wchar_t *aes_wcs;
-       wchar_t *aes_wcs_alloc;
+       /* Bitmap of which of the above are valid.  Because we're lazy
+        * about malloc-ing and reusing the underlying storage, we
+        * can't rely on NULL pointers to indicate whether a string
+        * has been set. */
+       int aes_set;
+#define        AES_SET_MBS 1
+#define        AES_SET_UTF8 2
+#define        AES_SET_WCS 4
  };
  
  struct ae_acl {
@@ -128,8 +136,6 @@ struct archive_entry {
                 dev_t           aest_rdevminor;
         } ae_stat;
  
-
-
         /*
          * Use aes here so that we get transparent mbs<->wcs conversions.
          */
@@ -141,15 +147,23 @@ struct archive_entry {
         struct aes ae_pathname; /* Name of entry */
         struct aes ae_symlink;          /* symlink contents */
         struct aes ae_uname;            /* Name of owner */
+       unsigned char   ae_hardlinkset;
+       unsigned char   ae_symlinkset;
+
+       /* Not used within libarchive; useful for some clients. */
+       struct aes ae_sourcepath;       /* Path this entry is sourced from. */
  
+       /* ACL support. */
         struct ae_acl   *acl_head;
         struct ae_acl   *acl_p;
         int              acl_state;     /* See acl_next for details. */
         wchar_t         *acl_text_w;
  
+       /* extattr support. */
         struct ae_xattr *xattr_head;
         struct ae_xattr *xattr_p;
  
+       /* Miscellaneous. */
         char             strmode[12];
  };
  
diff --git a/libarchive/archive_platform.h b/libarchive/archive_platform.h

index b14ccd8206bbd333ddac004371bee5f9d8577b1b..41fd4e549dc29369f7c8de8e55ba5164553bddc4 100644 (file)
--- a/libarchive/archive_platform.h
+++ b/libarchive/archive_platform.h
@@ -36,6 +36,9 @@
  #ifndef ARCHIVE_PLATFORM_H_INCLUDED
  #define        ARCHIVE_PLATFORM_H_INCLUDED
  
+/* archive.h and archive_entry.h require this. */
+#define        __LIBARCHIVE_BUILD 1
+
  #ifdef _WIN32
  #include "config_windows.h"
  #include "archive_windows.h"
diff --git a/libarchive/archive_read_support_format_iso9660.c b/libarchive/archive_read_support_format_iso9660.c

index d333f0ccb4e0fca3eeeb189edda25f3c97d97c5b..ee9831b90be9ef9c91e6fe9186f3c0cc6eb67572 100644 (file)
--- a/libarchive/archive_read_support_format_iso9660.c
+++ b/libarchive/archive_read_support_format_iso9660.c
@@ -908,6 +908,11 @@ fprintf(stderr, " *** Discarding CE data.\n");
                         file->ce_size = 0;
                 }
  
+               /* Don't waste time seeking for zero-length bodies. */
+               if (file->size == 0) {
+                       file->offset = iso9660->current_position;
+               }
+
                 /* If CE exists, find and read it now. */
                 if (file->ce_offset > 0)
                         offset = file->ce_offset;
diff --git a/libarchive/archive_read_support_format_tar.c b/libarchive/archive_read_support_format_tar.c

index 76fda2d63c238bbeb3edb2be06d7c993b807484e..147ec0b279525819c5e57319dd8d2ba293369d67 100644 (file)
--- a/libarchive/archive_read_support_format_tar.c
+++ b/libarchive/archive_read_support_format_tar.c
@@ -145,6 +145,8 @@ struct sparse_block {
  struct tar {
         struct archive_string    acl_text;
         struct archive_string    entry_pathname;
+       /* For "GNU.sparse.name" and other similar path extensions. */
+       struct archive_string    entry_pathname_override;
         struct archive_string    entry_linkpath;
         struct archive_string    entry_uname;
         struct archive_string    entry_gname;
@@ -272,6 +274,7 @@ archive_read_format_tar_cleanup(struct archive_read *a)
         gnu_clear_sparse_list(tar);
         archive_string_free(&tar->acl_text);
         archive_string_free(&tar->entry_pathname);
+       archive_string_free(&tar->entry_pathname_override);
         archive_string_free(&tar->entry_linkpath);
         archive_string_free(&tar->entry_uname);
         archive_string_free(&tar->entry_gname);
@@ -1174,7 +1177,6 @@ pax_header(struct archive_read *a, struct tar *tar,
         size_t attr_length, l, line_length;
         char *line, *p;
         char *key, *value;
-       wchar_t *wp;
         int err, err2;
  
         attr_length = strlen(attr);
@@ -1182,6 +1184,7 @@ pax_header(struct archive_read *a, struct tar *tar,
         archive_string_empty(&(tar->entry_gname));
         archive_string_empty(&(tar->entry_linkpath));
         archive_string_empty(&(tar->entry_pathname));
+       archive_string_empty(&(tar->entry_pathname_override));
         archive_string_empty(&(tar->entry_uname));
         err = ARCHIVE_OK;
         while (attr_length > 0) {
@@ -1257,13 +1260,13 @@ pax_header(struct archive_read *a, struct tar *tar,
                 if (tar->pax_hdrcharset_binary)
                         archive_entry_copy_gname(entry, value);
                 else {
-                       wp = utf8_decode(tar, value, strlen(value));
-                       if (wp == NULL) {
-                               archive_entry_copy_gname(entry, value);
-                               if (err > ARCHIVE_WARN)
-                                       err = ARCHIVE_WARN;
-                       } else
-                               archive_entry_copy_gname_w(entry, wp);
+                       if (!archive_entry_update_gname_utf8(entry, value)) {
+                               err = ARCHIVE_WARN;
+                               archive_set_error(&a->archive,
+                                   ARCHIVE_ERRNO_FILE_FORMAT,
+                                   "Gname in pax header can't "
+                                   "be converted to current locale.");
+                       }
                 }
         }
         if (archive_strlen(&(tar->entry_linkpath)) > 0) {
@@ -1271,27 +1274,40 @@ pax_header(struct archive_read *a, struct tar *tar,
                 if (tar->pax_hdrcharset_binary)
                         archive_entry_copy_link(entry, value);
                 else {
-                       wp = utf8_decode(tar, value, strlen(value));
-                       if (wp == NULL) {
-                               archive_entry_copy_link(entry, value);
-                               if (err > ARCHIVE_WARN)
-                                       err = ARCHIVE_WARN;
-                       } else
-                               archive_entry_copy_link_w(entry, wp);
+                       if (!archive_entry_update_link_utf8(entry, value)) {
+                               err = ARCHIVE_WARN;
+                               archive_set_error(&a->archive,
+                                   ARCHIVE_ERRNO_FILE_FORMAT,
+                                   "Linkname in pax header can't "
+                                   "be converted to current locale.");
+                       }
                 }
         }
-       if (archive_strlen(&(tar->entry_pathname)) > 0) {
+       /*
+        * Some extensions (such as the GNU sparse file extensions)
+        * deliberately store a synthetic name under the regular 'path'
+        * attribute and the real file name under a different attribute.
+        * Since we're supposed to not care about the order, we
+        * have no choice but to store all of the various filenames
+        * we find and figure it all out afterwards.  This is the
+        * figuring out part.
+        */
+       value = NULL;
+       if (archive_strlen(&(tar->entry_pathname_override)) > 0)
+               value = tar->entry_pathname_override.s;
+       else if (archive_strlen(&(tar->entry_pathname)) > 0)
                 value = tar->entry_pathname.s;
+       if (value != NULL) {
                 if (tar->pax_hdrcharset_binary)
                         archive_entry_copy_pathname(entry, value);
                 else {
-                       wp = utf8_decode(tar, value, strlen(value));
-                       if (wp == NULL) {
-                               archive_entry_copy_pathname(entry, value);
-                               if (err > ARCHIVE_WARN)
-                                       err = ARCHIVE_WARN;
-                       } else
-                               archive_entry_copy_pathname_w(entry, wp);
+                       if (!archive_entry_update_pathname_utf8(entry, value)) {
+                               err = ARCHIVE_WARN;
+                               archive_set_error(&a->archive,
+                                   ARCHIVE_ERRNO_FILE_FORMAT,
+                                   "Pathname in pax header can't be "
+                                   "converted to current locale.");
+                       }
                 }
         }
         if (archive_strlen(&(tar->entry_uname)) > 0) {
@@ -1299,13 +1315,13 @@ pax_header(struct archive_read *a, struct tar *tar,
                 if (tar->pax_hdrcharset_binary)
                         archive_entry_copy_uname(entry, value);
                 else {
-                       wp = utf8_decode(tar, value, strlen(value));
-                       if (wp == NULL) {
-                               archive_entry_copy_uname(entry, value);
-                               if (err > ARCHIVE_WARN)
-                                       err = ARCHIVE_WARN;
-                       } else
-                               archive_entry_copy_uname_w(entry, wp);
+                       if (!archive_entry_update_uname_utf8(entry, value)) {
+                               err = ARCHIVE_WARN;
+                               archive_set_error(&a->archive,
+                                   ARCHIVE_ERRNO_FILE_FORMAT,
+                                   "Uname in pax header can't "
+                                   "be converted to current locale.");
+                       }
                 }
         }
         return (err);
@@ -1415,11 +1431,13 @@ pax_attribute(struct tar *tar, struct archive_entry *entry,
                         tar->sparse_gnu_pending = 1;
                 }
                 if (strcmp(key, "GNU.sparse.name") == 0) {
-                       wp = utf8_decode(tar, value, strlen(value));
-                       if (wp != NULL)
-                               archive_entry_copy_pathname_w(entry, wp);
-                       else
-                               archive_entry_copy_pathname(entry, value);
+                       /*
+                        * The real filename; when storing sparse
+                        * files, GNU tar puts a synthesized name into
+                        * the regular 'path' attribute in an attempt
+                        * to limit confusion. ;-)
+                        */
+                       archive_strcpy(&(tar->entry_pathname_override), value);
                 }
                 if (strcmp(key, "GNU.sparse.realsize") == 0) {
                         tar->realsize = tar_atol10(value, strlen(value));
@@ -1455,9 +1473,7 @@ pax_attribute(struct tar *tar, struct archive_entry *entry,
                         archive_entry_set_rdevminor(entry,
                             tar_atol10(value, strlen(value)));
                 } else if (strcmp(key, "SCHILY.fflags")==0) {
-                       wp = utf8_decode(tar, value, strlen(value));
-                       /* TODO: if (wp == NULL) */
-                       archive_entry_copy_fflags_text_w(entry, wp);
+                       archive_entry_copy_fflags_text(entry, value);
                 } else if (strcmp(key, "SCHILY.dev")==0) {
                         archive_entry_set_dev(entry,
                             tar_atol10(value, strlen(value)));
diff --git a/libarchive/archive_read_support_format_zip.c b/libarchive/archive_read_support_format_zip.c

index 3c951313df43ebb3f97eff3376936c10d98e2470..a105297ad4f9766183947c1393ee5320faa02228 100644 (file)
--- a/libarchive/archive_read_support_format_zip.c
+++ b/libarchive/archive_read_support_format_zip.c
@@ -162,11 +162,9 @@ archive_read_support_format_zip(struct archive *_a)
  static int
  archive_read_format_zip_bid(struct archive_read *a)
  {
-       int bid = 0;
         const char *p;
-
-       if (a->archive.archive_format == ARCHIVE_FORMAT_ZIP)
-               bid += 1;
+       const void *buff;
+       size_t bytes_avail;
  
         if ((p = __archive_read_ahead(a, 4)) == NULL)
                 return (-1);
@@ -184,9 +182,104 @@ archive_read_format_zip_bid(struct archive_read *a)
                     || (p[2] == '0' && p[3] == '0'))
                         return (30);
         }
+
+       /*
+        * Attempt to handle self-extracting archives
+        * by noting a PE header and searching forward
+        * up to 64k for a 'PK\003\004' marker.
+        */
+       if (p[0] == 'M' && p[1] == 'Z') {
+               /*
+                * TODO: Additional checks that this really is a PE
+                * file before we invoke the 128k lookahead below.
+                * No point in allocating a bigger lookahead buffer
+                * if we don't need to.
+                */
+               /*
+                * TODO: Of course, the compression layer lookahead
+                * buffers aren't dynamically sized yet; they should be.
+                */
+               bytes_avail = (a->decompressor->read_ahead)(a, &buff, 128*1024);
+               p = (const char *)buff;
+
+               /*
+                * TODO: Optimize by jumping forward based on values
+                * in the PE header.  Note that we don't need to be
+                * exact, but we mustn't skip too far.  The search
+                * below will compensate if we undershoot.  Skipping
+                * will also reduce the chance of false positives
+                * (which is not really all that high to begin with,
+                * so maybe skipping isn't really necessary).
+                */
+
+               while (p < bytes_avail + (const char *)buff) {
+                       if (p[0] == 'P' && p[1] == 'K' /* "PK" signature */
+                           && p[2] == 3 && p[3] == 4 /* File entry */
+                           && p[8] == 8 /* compression == deflate */
+                           && p[9] == 0 /* High byte of compression */
+                               )
+                       {
+                               return (30);
+                       }
+                       ++p;
+               }
+       }
+
         return (0);
  }
  
+/*
+ * Search forward for a "PK\003\004" file header.  This handles the
+ * case of self-extracting archives, where there is an executable
+ * prepended to the ZIP archive.
+ */
+static int
+skip_sfx(struct archive_read *a)
+{
+       const void *h;
+       const char *p, *q;
+       size_t skip, bytes;
+
+       /*
+        * TODO: We should be able to skip forward by a bunch
+        * by lifting some values from the PE header.  We don't
+        * need to be exact (we're still going to search forward
+        * to find the header), but it will speed things up and
+        * reduce the chance of a false positive.
+        */
+       for (;;) {
+               bytes = (a->decompressor->read_ahead)(a, &h, 4096);
+               if (bytes < 4)
+                       return (ARCHIVE_FATAL);
+               p = h;
+               q = p + bytes;
+
+               /*
+                * Scan ahead until we find something that looks
+                * like the zip header.
+                */
+               while (p + 4 < q) {
+                       switch (p[3]) {
+                       case '\004':
+                               /* TODO: Additional verification here. */
+                               if (memcmp("PK\003\004", p, 4) == 0) {
+                                       skip = p - (const char *)h;
+                                       (a->decompressor->consume)(a, skip);
+                                       return (ARCHIVE_OK);
+                               }
+                               p += 4;
+                               break;
+                       case '\003': p += 1; break;
+                       case 'K': p += 2; break;
+                       case 'P': p += 3; break;
+                       default: p += 4; break;
+                       }
+               }
+               skip = p - (const char *)h;
+               (a->decompressor->consume)(a, skip);
+       }
+}
+
  static int
  archive_read_format_zip_read_header(struct archive_read *a,
      struct archive_entry *entry)
@@ -194,6 +287,7 @@ archive_read_format_zip_read_header(struct archive_read *a,
         const void *h;
         const char *signature;
         struct zip *zip;
+       int r = ARCHIVE_OK, r1;
  
         a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
         if (a->archive.archive_format_name == NULL)
@@ -209,6 +303,16 @@ archive_read_format_zip_read_header(struct archive_read *a,
                 return (ARCHIVE_FATAL);
  
         signature = (const char *)h;
+       if (signature[0] == 'M' && signature[1] == 'Z') {
+               /* This is an executable?  Must be self-extracting... */
+               r = skip_sfx(a);
+               if (r < ARCHIVE_WARN)
+                       return (r);
+               if ((h = __archive_read_ahead(a, 4)) == NULL)
+                       return (ARCHIVE_FATAL);
+               signature = (const char *)h;
+       }
+
         if (signature[0] != 'P' || signature[1] != 'K') {
                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
                     "Bad ZIP file");
@@ -239,7 +343,10 @@ archive_read_format_zip_read_header(struct archive_read *a,
  
         if (signature[2] == '\003' && signature[3] == '\004') {
                 /* Regular file entry. */
-               return (zip_read_file_header(a, entry, zip));
+               r1 = zip_read_file_header(a, entry, zip);
+               if (r1 != ARCHIVE_OK)
+                       return (r1);
+               return (r);
         }
  
         if (signature[2] == '\005' && signature[3] == '\006') {
diff --git a/libarchive/archive_string.c b/libarchive/archive_string.c

index 7e43b360a4d0b4baf050e79f124452290fbbaf7c..e308c480bba59295eeeab52af37220c5ffa63698 100644 (file)
--- a/libarchive/archive_string.c
+++ b/libarchive/archive_string.c
@@ -37,6 +37,9 @@ __FBSDID("$FreeBSD: src/lib/libarchive/archive_string.c,v 1.11 2007/07/15 19:13:
  #ifdef HAVE_STRING_H
  #include <string.h>
  #endif
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
  
  #include "archive_private.h"
  #include "archive_string.h"
@@ -55,11 +58,15 @@ __archive_string_append(struct archive_string *as, const char *p, size_t s)
  void
  __archive_string_copy(struct archive_string *dest, struct archive_string *src)
  {
-       if (__archive_string_ensure(dest, src->length + 1) == NULL)
-               __archive_errx(1, "Out of memory");
-       memcpy(dest->s, src->s, src->length);
-       dest->length = src->length;
-       dest->s[dest->length] = 0;
+       if (src->length == 0)
+               dest->length = 0;
+       else {
+               if (__archive_string_ensure(dest, src->length + 1) == NULL)
+                       __archive_errx(1, "Out of memory");
+               memcpy(dest->s, src->s, src->length);
+               dest->length = src->length;
+               dest->s[dest->length] = 0;
+       }
  }
  
  void
@@ -67,21 +74,52 @@ __archive_string_free(struct archive_string *as)
  {
         as->length = 0;
         as->buffer_length = 0;
-       if (as->s != NULL)
+       if (as->s != NULL) {
                 free(as->s);
+               as->s = NULL;
+       }
  }
  
  /* Returns NULL on any allocation failure. */
  struct archive_string *
  __archive_string_ensure(struct archive_string *as, size_t s)
  {
+       /* If buffer is already big enough, don't reallocate. */
         if (as->s && (s <= as->buffer_length))
                 return (as);
  
+       /*
+        * Growing the buffer at least exponentially ensures that
+        * append operations are always linear in the number of
+        * characters appended.  Using a smaller growth rate for
+        * larger buffers reduces memory waste somewhat at the cost of
+        * a larger constant factor.
+        */
         if (as->buffer_length < 32)
+               /* Start with a minimum 32-character buffer. */
                 as->buffer_length = 32;
-       while (as->buffer_length < s)
+       else if (as->buffer_length < 8192)
+               /* Buffers under 8k are doubled for speed. */
                 as->buffer_length *= 2;
+       else {
+               /* Buffers 8k and over grow by at least 25% each time. */
+               size_t old_length = as->buffer_length;
+               as->buffer_length = (as->buffer_length * 5) / 4;
+               /* Be safe: If size wraps, release buffer and return NULL. */
+               if (as->buffer_length < old_length) {
+                       free(as->s);
+                       as->s = NULL;
+                       return (NULL);
+               }
+       }
+       /*
+        * The computation above is a lower limit to how much we'll
+        * grow the buffer.  In any case, we have to grow it enough to
+        * hold the request.
+        */
+       if (as->buffer_length < s)
+               as->buffer_length = s;
+       /* Now we can reallocate the buffer. */
         as->s = (char *)realloc(as->s, as->buffer_length);
         if (as->s == NULL)
                 return (NULL);
@@ -124,3 +162,206 @@ __archive_strappend_int(struct archive_string *as, int d, int base)
         __archive_strappend_char(as, digits[d % base]);
         return (as);
  }
+
+/*
+ * Home-grown wcrtomb for UTF-8.
+ */
+static size_t
+my_wcrtomb_utf8(char *p, wchar_t wc, mbstate_t *s)
+{
+       (void)s; /* UNUSED */
+
+       if (p == NULL)
+               return (0);
+       if (wc <= 0x7f) {
+               p[0] = (char)wc;
+               return (1);
+       }
+       if (wc <= 0x7ff) {
+               p[0] = 0xc0 | ((wc >> 6) & 0x1f);
+               p[1] = 0x80 | (wc & 0x3f);
+               return (2);
+       }
+       if (wc <= 0xffff) {
+               p[0] = 0xe0 | ((wc >> 12) & 0x0f);
+               p[1] = 0x80 | ((wc >> 6) & 0x3f);
+               p[2] = 0x80 | (wc & 0x3f);
+               return (3);
+       }
+       if (wc <= 0x1fffff) {
+               p[0] = 0xf0 | ((wc >> 18) & 0x07);
+               p[1] = 0x80 | ((wc >> 12) & 0x3f);
+               p[2] = 0x80 | ((wc >> 6) & 0x3f);
+               p[3] = 0x80 | (wc & 0x3f);
+               return (4);
+       }
+       /* Unicode has no codes larger than 0x1fffff. */
+       /*
+        * Awkward point:  UTF-8 <-> wchar_t conversions
+        * can actually fail.
+        */
+       return ((size_t)-1);
+}
+
+static int
+my_wcstombs(struct archive_string *as, const wchar_t *w,
+    size_t (*func)(char *, wchar_t, mbstate_t *))
+{
+       size_t n;
+       char *p;
+       mbstate_t shift_state;
+       char buff[256];
+
+       /*
+        * Convert one wide char at a time into 'buff', whenever that
+        * fills, append it to the string.
+        */
+       p = buff;
+       wcrtomb(NULL, L'\0', &shift_state);
+       while (*w != L'\0') {
+               /* Flush the buffer when we have <=16 bytes free. */
+               /* (No encoding has a single character >16 bytes.) */
+               if ((size_t)(p - buff) >= (size_t)(sizeof(buff) - 16)) {
+                       *p = '\0';
+                       archive_strcat(as, buff);
+                       p = buff;
+               }
+               n = (*func)(p, *w++, &shift_state);
+               if (n == (size_t)-1)
+                       return (-1);
+               p += n;
+       }
+       *p = '\0';
+       archive_strcat(as, buff);
+       return (0);
+}
+
+/*
+ * Translates a wide character string into UTF-8 and appends
+ * to the archive_string.  Note: returns NULL if conversion fails.
+ */
+struct archive_string *
+__archive_strappend_w_utf8(struct archive_string *as, const wchar_t *w)
+{
+       if (my_wcstombs(as, w, my_wcrtomb_utf8))
+               return (NULL);
+       return (as);
+}
+
+/*
+ * Translates a wide character string into current locale character set
+ * and appends to the archive_string.  Note: returns NULL if conversion
+ * fails.
+ *
+ * TODO: use my_wcrtomb_utf8 if !HAVE_WCRTOMB (add configure logic first!)
+ */
+struct archive_string *
+__archive_strappend_w_mbs(struct archive_string *as, const wchar_t *w)
+{
+       if (my_wcstombs(as, w, wcrtomb))
+               return (NULL);
+       return (as);
+}
+
+
+/*
+ * Home-grown mbrtowc for UTF-8.  Some systems lack UTF-8
+ * (or even lack mbrtowc()) and we need UTF-8 support for pax
+ * format.  So please don't replace this with a call to the
+ * standard mbrtowc() function!
+ */
+static size_t
+my_mbrtowc_utf8(wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
+{
+        int ch;
+
+       /*
+        * This argument is here to make the prototype identical to the
+        * standard mbrtowc(), so I can build generic string processors
+        * that just accept a pointer to a suitable mbrtowc() function.
+        */
+       (void)ps; /* UNUSED */
+
+       /* Standard behavior:  a NULL value for 's' just resets shift state. */
+        if (s == NULL)
+                return (0);
+       /* If length argument is zero, don't look at the first character. */
+       if (n <= 0)
+               return ((size_t)-2);
+
+        /*
+        * Decode 1-4 bytes depending on the value of the first byte.
+        */
+        ch = (unsigned char)*s;
+       if (ch == 0) {
+               return (0); /* Standard:  return 0 for end-of-string. */
+       }
+       if ((ch & 0x80) == 0) {
+                *pwc = ch & 0x7f;
+               return (1);
+        }
+       if ((ch & 0xe0) == 0xc0) {
+               if (n < 2)
+                       return ((size_t)-2);
+               if ((s[1] & 0xc0) != 0x80) return (size_t)-1;
+                *pwc = ((ch & 0x1f) << 6) | (s[1] & 0x3f);
+               return (2);
+        }
+       if ((ch & 0xf0) == 0xe0) {
+               if (n < 3)
+                       return ((size_t)-2);
+               if ((s[1] & 0xc0) != 0x80) return (size_t)-1;
+               if ((s[2] & 0xc0) != 0x80) return (size_t)-1;
+                *pwc = ((ch & 0x0f) << 12)
+                   | ((s[1] & 0x3f) << 6)
+                   | (s[2] & 0x3f);
+               return (3);
+        }
+       if ((ch & 0xf8) == 0xf0) {
+               if (n < 4)
+                       return ((size_t)-2);
+               if ((s[1] & 0xc0) != 0x80) return (size_t)-1;
+               if ((s[2] & 0xc0) != 0x80) return (size_t)-1;
+               if ((s[3] & 0xc0) != 0x80) return (size_t)-1;
+                *pwc = ((ch & 0x07) << 18)
+                   | ((s[1] & 0x3f) << 12)
+                   | ((s[2] & 0x3f) << 6)
+                   | (s[3] & 0x3f);
+               return (4);
+        }
+       /* Invalid first byte. */
+       return ((size_t)-1);
+}
+
+/*
+ * Return a wide-character string by converting this archive_string
+ * from UTF-8.
+ */
+wchar_t *
+__archive_string_utf8_w(struct archive_string *as)
+{
+       wchar_t *ws, *dest;
+       const char *src;
+       size_t n;
+       int err;
+
+       ws = (wchar_t *)malloc((as->length + 1) * sizeof(wchar_t));
+       if (ws == NULL)
+               __archive_errx(1, "Out of memory");
+       err = 0;
+       dest = ws;
+       src = as->s;
+       while (*src != '\0') {
+               n = my_mbrtowc_utf8(dest, src, 8, NULL);
+               if (n == 0)
+                       break;
+               if (n == (size_t)-1 || n == (size_t)-2) {
+                       free(ws);
+                       return (NULL);
+               }
+               dest++;
+               src += n;
+       }
+       *dest++ = L'\0';
+       return (ws);
+}
diff --git a/libarchive/archive_string.h b/libarchive/archive_string.h

index f56c50fe42a8b444f8c221e9b208b4f295f504fd..61e70777f19f0ae0f89427131e72e49a79bd5902 100644 (file)
--- a/libarchive/archive_string.h
+++ b/libarchive/archive_string.h
@@ -33,6 +33,9 @@
  #ifdef HAVE_STRING_H
  #include <string.h>
  #endif
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
  
  /*
   * Basic resizable/reusable string support a la Java's "StringBuffer."
@@ -60,16 +63,22 @@ struct archive_string *
  __archive_strappend_char(struct archive_string *, char);
  #define        archive_strappend_char __archive_strappend_char
  
-/* Append a char to an archive_string using UTF8. */
-struct archive_string *
-__archive_strappend_char_UTF8(struct archive_string *, int);
-#define        archive_strappend_char_UTF8 __archive_strappend_char_UTF8
-
  /* Append an integer in the specified base (2 <= base <= 16). */
  struct archive_string *
  __archive_strappend_int(struct archive_string *as, int d, int base);
  #define        archive_strappend_int __archive_strappend_int
  
+/* Convert a wide-char string to UTF-8 and append the result. */
+struct archive_string *
+__archive_strappend_w_utf8(struct archive_string *, const wchar_t *);
+#define        archive_strappend_w_utf8        __archive_strappend_w_utf8
+
+/* Convert a wide-char string to current locale and append the result. */
+/* Returns NULL if conversion fails. */
+struct archive_string *
+__archive_strappend_w_mbs(struct archive_string *, const wchar_t *);
+#define        archive_strappend_w_mbs __archive_strappend_w_mbs
+
  /* Basic append operation. */
  struct archive_string *
  __archive_string_append(struct archive_string *as, const char *p, size_t s);
@@ -95,7 +104,7 @@ __archive_strncat(struct archive_string *, const char *, size_t);
  
  /* Copy a C string to an archive_string, resizing as necessary. */
  #define        archive_strcpy(as,p) \
-       ((as)->length = 0, __archive_string_append((as), (p), strlen(p)))
+       ((as)->length = 0, __archive_string_append((as), (p), p == NULL ? 0 : strlen(p)))
  
  /* Copy a C string to an archive_string with limit, resizing as necessary. */
  #define        archive_strncpy(as,p,l) \
@@ -119,4 +128,9 @@ void        __archive_string_vsprintf(struct archive_string *, const char *,
  void   __archive_string_sprintf(struct archive_string *, const char *, ...);
  #define        archive_string_sprintf  __archive_string_sprintf
  
+/* Allocates a fresh buffer and converts as (assumed to be UTF-8) into it.
+ * Returns NULL if conversion failed in any way. */
+wchar_t *__archive_string_utf8_w(struct archive_string *as);
+
+
  #endif
diff --git a/libarchive/archive_util.c b/libarchive/archive_util.c

index 69d69a513ec3632f29eb636b77d65eebe044ec40..55dd1fa10c3f64f6e5c181dc48d6b253ddb54975 100644 (file)
--- a/libarchive/archive_util.c
+++ b/libarchive/archive_util.c
@@ -77,32 +77,10 @@ archive_version_number(void)
         return (ARCHIVE_VERSION_NUMBER);
  }
  
-/*
- * Format a version string of the form "libarchive x.y.z", where x, y,
- * z are the correct parts of the version ID from
- * archive_version_number().
- *
- * I used to do all of this at build time in shell scripts but that
- * proved to be a portability headache.
- */
-
  const char *
  archive_version_string(void)
  {
-       static char buff[128];
-       struct archive_string as;
-       int n;
-
-       if (buff[0] == '\0') {
-               n = archive_version_number();
-               memset(&as, 0, sizeof(as));
-               archive_string_sprintf(&as, "libarchive %d.%d.%d",
-                   n / 1000000, (n / 1000) % 1000, n % 1000);
-               strncpy(buff, as.s, sizeof(buff));
-               buff[sizeof(buff) - 1] = '\0';
-               archive_string_free(&as);
-       }
-       return (buff);
+       return (ARCHIVE_VERSION_STRING);
  }
  
  int
diff --git a/libarchive/archive_write_disk.c b/libarchive/archive_write_disk.c

index 620beac48ed1d7744eb0139f2a2c119519ec6404..58a7fd22bfe82ab3e38f37c061e5ddd288ca6b00 100644 (file)
--- a/libarchive/archive_write_disk.c
+++ b/libarchive/archive_write_disk.c
@@ -294,7 +294,7 @@ _archive_write_header(struct archive *_a, struct archive_entry *entry)
         archive_clear_error(&a->archive);
         if (a->archive.state & ARCHIVE_STATE_DATA) {
                 r = _archive_write_finish_entry(&a->archive);
-               if (r != ARCHIVE_OK)
+               if (r == ARCHIVE_FATAL)
                         return (r);
         }
  
@@ -485,10 +485,12 @@ _archive_write_data_block(struct archive *_a,
         /* Write the data. */
         while (size > 0 && a->offset < a->filesize) {
                 if ((off_t)(a->offset + size) > a->filesize) {
-                       size = (size_t)(a->filesize - a->offset);
-                       archive_set_error(&a->archive, errno,
-                           "Write request too large");
+                       archive_set_error(&a->archive, 0,
+                           "Write request too large (tried to write %u bytes, but only %u bytes remain)",
+                           (unsigned int)size,
+                           (unsigned int)(a->filesize - a->offset));
                         r = ARCHIVE_WARN;
+                       size = (size_t)(a->filesize - a->offset);
                 }
                 bytes_written = write(a->fd, buff, size);
                 if (bytes_written < 0) {
diff --git a/libarchive/archive_write_set_format_pax.c b/libarchive/archive_write_set_format_pax.c

index d6e3e6c4b03575045151a81c3658e043782051ca..89f89bc9f54b1af95a66c642e6ba42cd504e0789 100644 (file)
--- a/libarchive/archive_write_set_format_pax.c
+++ b/libarchive/archive_write_set_format_pax.c
@@ -386,7 +386,7 @@ archive_write_pax_header(struct archive_write *a,
         const char *p;
         char *t;
         const wchar_t *wp;
-       const char *suffix_start;
+       const char *suffix;
         int need_extension, r, ret;
         struct pax *pax;
         const char *hdrcharset = NULL;
@@ -496,34 +496,73 @@ archive_write_pax_header(struct archive_write *a,
         if (hdrcharset != NULL)
                 add_pax_attr(&(pax->pax_header), "hdrcharset", hdrcharset);
  
-       /*
-        * Determining whether or not the name is too big is ugly
-        * because of the rules for dividing names between 'name' and
-        * 'prefix' fields.  Here, I pick out the longest possible
-        * suffix, then test whether the remaining prefix is too long.
-        */
-       if (strlen(path) <= 100)    /* Short enough for just 'name' field */
-               suffix_start = path;    /* Record a zero-length prefix */
-       else
-               /* Find the largest suffix that fits in 'name' field. */
-               suffix_start = strchr(path + strlen(path) - 100 - 1, '/');
  
         /*
          * If name is too long, or has non-ASCII characters, add
          * 'path' to pax extended attrs.  (Note that an unconvertible
          * name must have non-ASCII characters.)
          */
-       if (suffix_start == NULL || suffix_start - path > 155
-           || path_w == NULL || has_non_ASCII(path_w)) {
-               if (path_w == NULL || hdrcharset != NULL)
+       if (path == NULL) {
+               /* We don't have a narrow version, so we have to store
+                * the wide version. */
+               add_pax_attr_w(&(pax->pax_header), "path", path_w);
+               archive_entry_set_pathname(entry_main, "@WidePath");
+               need_extension = 1;
+       } else if (has_non_ASCII(path_w)) {
+               /* We have non-ASCII characters. */
+               if (path_w == NULL || hdrcharset != NULL) {
                         /* Can't do UTF-8, so store it raw. */
                         add_pax_attr(&(pax->pax_header), "path", path);
-               else
-                       add_pax_attr_w(&(pax->pax_header), "path", path_w);
+               } else {
+                       /* Store UTF-8 */
+                       add_pax_attr_w(&(pax->pax_header),
+                           "path", path_w);
+               }
                 archive_entry_set_pathname(entry_main,
                     build_ustar_entry_name(ustar_entry_name,
                         path, strlen(path), NULL));
                 need_extension = 1;
+       } else {
+               /* We have an all-ASCII path; we'd like to just store
+                * it in the ustar header if it will fit.  Yes, this
+                * duplicates some of the logic in
+                * write_set_format_ustar.c
+                */
+               if (strlen(path) <= 100) {
+                       /* Fits in the old 100-char tar name field. */
+               } else {
+                       /* Find largest suffix that will fit. */
+                       /* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */
+                       suffix = strchr(path + strlen(path) - 100 - 1, '/');
+                       /* Don't attempt an empty prefix. */
+                       if (suffix == path)
+                               suffix = strchr(suffix + 1, '/');
+                       /* We can put it in the ustar header if it's
+                        * all ASCII and it's either <= 100 characters
+                        * or can be split at a '/' into a prefix <=
+                        * 155 chars and a suffix <= 100 chars.  (Note
+                        * the strchr() above will return NULL exactly
+                        * when the path can't be split.)
+                        */
+                       if (suffix == NULL       /* Suffix > 100 chars. */
+                           || suffix[1] == '\0'    /* empty suffix */
+                           || suffix - path > 155)  /* Prefix > 155 chars */
+                       {
+                               if (path_w == NULL || hdrcharset != NULL) {
+                                       /* Can't do UTF-8, so store it raw. */
+                                       add_pax_attr(&(pax->pax_header),
+                                           "path", path);
+                               } else {
+                                       /* Store UTF-8 */
+                                       add_pax_attr_w(&(pax->pax_header),
+                                           "path", path_w);
+                               }
+                               archive_entry_set_pathname(entry_main,
+                                   build_ustar_entry_name(ustar_entry_name,
+                                       path, strlen(path), NULL));
+                               need_extension = 1;
+                       }
+               }
         }
  
         if (linkpath != NULL) {
@@ -1215,6 +1254,8 @@ archive_write_pax_data(struct archive_write *a, const void *buff, size_t s)
  static int
  has_non_ASCII(const wchar_t *wp)
  {
+       if (wp == NULL)
+               return (1);
         while (*wp != L'\0' && *wp < 128)
                 wp++;
         return (*wp != L'\0');
diff --git a/libarchive/archive_write_set_format_ustar.c b/libarchive/archive_write_set_format_ustar.c

index c2c0011aee2bf68e0e7c82e3372246490451e9e0..e7f652d1dc2a2fa5b47629d8dad9a39ad681f838 100644 (file)
--- a/libarchive/archive_write_set_format_ustar.c
+++ b/libarchive/archive_write_set_format_ustar.c
@@ -206,7 +206,7 @@ archive_write_ustar_header(struct archive_write *a, struct archive_entry *entry)
             !(archive_entry_filetype(entry) == AE_IFREG))
                 archive_entry_set_size(entry, 0);
  
-       if (AE_IFDIR == archive_entry_mode(entry)) {
+       if (AE_IFDIR == archive_entry_filetype(entry)) {
                 const char *p;
                 char *t;
                 /*
@@ -282,24 +282,30 @@ __archive_write_format_header_ustar(struct archive_write *a, char h[512],
                 /* Store in two pieces, splitting at a '/'. */
                 p = strchr(pp + strlen(pp) - USTAR_name_size - 1, '/');
                 /*
-                * If the separator we found is the first '/', find
-                * the next one.  (This is a pathological case that
-                * occurs for paths of exactly 101 bytes that start with
-                * '/'; it occurs because the separating '/' is not
-                * stored explicitly and the reconstruction assumes that
-                * an empty prefix means there is no '/' separator.)
+                * Look for the next '/' if we chose the first character
+                * as the separator.  (ustar format doesn't permit
+                * an empty prefix.)
                  */
                 if (p == pp)
                         p = strchr(p + 1, '/');
-               /*
-                * If there is no path separator, or the prefix or
-                * remaining name are too large, return an error.
-                */
+               /* Fail if the name won't fit. */
                 if (!p) {
+                       /* No separator. */
+                       archive_set_error(&a->archive, ENAMETOOLONG,
+                           "Pathname too long");
+                       ret = ARCHIVE_WARN;
+               } else if (p[1] == '\0') {
+                       /*
+                        * The only feasible separator is a final '/';
+                        * this would result in a non-empty prefix and
+                        * an empty name, which POSIX doesn't
+                        * explicity forbid, but it just feels wrong.
+                        */
                         archive_set_error(&a->archive, ENAMETOOLONG,
                             "Pathname too long");
                         ret = ARCHIVE_WARN;
                 } else if (p  > pp + USTAR_prefix_size) {
+                       /* Prefix is too long. */
                         archive_set_error(&a->archive, ENAMETOOLONG,
                             "Pathname too long");
                         ret = ARCHIVE_WARN;
diff --git a/libarchive/test/Makefile b/libarchive/test/Makefile

index 9d04b089e7fdddaacc6777a7165ae514d9b57d39..24b09544a88993695f6d7fdab1f3e68fc063a18e 100644 (file)
--- a/libarchive/test/Makefile
+++ b/libarchive/test/Makefile
@@ -18,6 +18,7 @@ TESTS= \
         test_empty_write.c                      \
         test_entry.c                            \
         test_entry_strmode.c                    \
+       test_link_resolver.c                    \
         test_pax_filename_encoding.c            \
         test_read_compress_program.c            \
         test_read_data_large.c                  \
@@ -38,6 +39,7 @@ TESTS= \
         test_read_format_mtree.c                \
         test_read_format_pax_bz2.c              \
         test_read_format_tar.c                  \
+       test_read_format_tar_empty_filename.c   \
         test_read_format_tbz.c                  \
         test_read_format_tgz.c                  \
         test_read_format_tz.c                   \
@@ -48,6 +50,7 @@ TESTS= \
         test_read_truncated.c                   \
         test_tar_filenames.c                    \
         test_tar_large.c                        \
+       test_ustar_filenames.c                  \
         test_write_compress_program.c           \
         test_write_compress.c                   \
         test_write_disk.c                       \
@@ -61,6 +64,7 @@ TESTS= \
         test_write_format_cpio_empty.c          \
         test_write_format_shar_empty.c          \
         test_write_format_tar.c                 \
+       test_write_format_tar_ustar.c           \
         test_write_format_tar_empty.c           \
         test_write_open_memory.c
  
@@ -68,7 +72,8 @@ TESTS= \
  # Build the test program using all libarchive sources + the test sources.
  SRCS= ${LA_SRCS}                               \
         ${TESTS}                                \
-       list.h                                  \
+       ${.OBJDIR}/list.h                       \
+       ${.OBJDIR}/archive.h                    \
         main.c                                  \
         read_open_memory.c
  
@@ -96,17 +101,15 @@ WARNS=6
  
  # Build libarchive_test and run it.
  check test: libarchive_test
-       ./libarchive_test -k -r ${.CURDIR}
-
-INCS=archive.h list.h
+       ./libarchive_test -v -r ${.CURDIR}
  
  # Build archive.h, but in our .OBJDIR, not libarchive's
  # This keeps libarchive_test and libarchive builds completely separate.
-archive.h: ${LA_SRCDIR}/archive.h.in ${LA_SRCDIR}/Makefile
+${.OBJDIR}/archive.h: ${LA_SRCDIR}/archive.h.in ${LA_SRCDIR}/Makefile
         cd ${LA_SRCDIR} && unset MAKEOBJDIRPREFIX && MAKEOBJDIR=${.OBJDIR} make archive.h
  
  # list.h is just a list of all tests, as indicated by DEFINE_TEST macro lines
-list.h: ${TESTS} Makefile
+${.OBJDIR}/list.h: ${TESTS} Makefile
         (cd ${.CURDIR}; cat ${TESTS}) | grep DEFINE_TEST > list.h
  
  CLEANFILES += *.out *.o *.core *~ list.h archive.h
diff --git a/libarchive/test/main.c b/libarchive/test/main.c

index 3977803278f7c7cca1e42a3ae647ea2ebc0bf3e6..6e6ca6c8da123198a73aed9a4a688e8b6096ca04 100644 (file)
--- a/libarchive/test/main.c
+++ b/libarchive/test/main.c
@@ -63,10 +63,14 @@ extern char *optarg;
  extern int optind;
  #endif
  
-/* Default is to crash and try to force a core dump on failure. */
-static int dump_on_failure = 1;
+/* Enable core dump on failure. */
+static int dump_on_failure = 0;
+/* Default is to remove temp dirs for successful tests. */
+static int keep_temp_files = 0;
  /* Default is to print some basic information about each test. */
  static int quiet_flag = 0;
+/* Default is to summarize repeated failures. */
+static int verbose = 0;
  /* Cumulative count of component failures. */
  static int failures = 0;
  /* Cumulative count of skipped component tests. */
@@ -242,7 +246,7 @@ test_assert(const char *file, int line, int value, const char *condition, void *
                 return (value);
         }
         failures ++;
-       if (previous_failures(file, line))
+       if (!verbose && previous_failures(file, line))
                 return (value);
         fprintf(stderr, "%s:%d: Assertion failed\n", file, line);
         fprintf(stderr, "   Condition: %s\n", condition);
@@ -261,7 +265,7 @@ test_assert_equal_int(const char *file, int line,
                 return (1);
         }
         failures ++;
-       if (previous_failures(file, line))
+       if (!verbose && previous_failures(file, line))
                 return (0);
         fprintf(stderr, "%s:%d: Assertion failed: Ints not equal\n",
             file, line);
@@ -271,6 +275,30 @@ test_assert_equal_int(const char *file, int line,
         return (0);
  }
  
+static void strdump(const char *p)
+{
+       if (p == NULL) {
+               fprintf(stderr, "(null)");
+               return;
+       }
+       fprintf(stderr, "\"");
+       while (*p != '\0') {
+               unsigned int c = 0xff & *p++;
+               switch (c) {
+               case '\a': fprintf(stderr, "\a"); break;
+               case '\b': fprintf(stderr, "\b"); break;
+               case '\n': fprintf(stderr, "\n"); break;
+               case '\r': fprintf(stderr, "\r"); break;
+               default:
+                       if (c >= 32 && c < 127)
+                               fprintf(stderr, "%c", c);
+                       else
+                               fprintf(stderr, "\\x%02X", c);
+               }
+       }
+       fprintf(stderr, "\"");
+}
+
  /* assertEqualString() displays the values of the two strings. */
  int
  test_assert_equal_string(const char *file, int line,
@@ -289,16 +317,41 @@ test_assert_equal_string(const char *file, int line,
                 return (1);
         }
         failures ++;
-       if (previous_failures(file, line))
+       if (!verbose && previous_failures(file, line))
                 return (0);
         fprintf(stderr, "%s:%d: Assertion failed: Strings not equal\n",
             file, line);
-       fprintf(stderr, "      %s = \"%s\"\n", e1, v1);
-       fprintf(stderr, "      %s = \"%s\"\n", e2, v2);
+       fprintf(stderr, "      %s = ", e1);
+       strdump(v1);
+       fprintf(stderr, " (length %d)\n", v1 == NULL ? 0 : strlen(v1));
+       fprintf(stderr, "      %s = ", e2);
+       strdump(v2);
+       fprintf(stderr, " (length %d)\n", v2 == NULL ? 0 : strlen(v2));
         report_failure(extra);
         return (0);
  }
  
+static void wcsdump(const wchar_t *w)
+{
+       if (w == NULL) {
+               fprintf(stderr, "(null)");
+               return;
+       }
+       fprintf(stderr, "\"");
+       while (*w != L'\0') {
+               unsigned int c = *w++;
+               if (c >= 32 && c < 127)
+                       fprintf(stderr, "%c", c);
+               else if (c < 256)
+                       fprintf(stderr, "\\x%02X", c);
+               else if (c < 0x10000)
+                       fprintf(stderr, "\\u%04X", c);
+               else
+                       fprintf(stderr, "\\U%08X", c);
+       }
+       fprintf(stderr, "\"");
+}
+
  /* assertEqualWString() displays the values of the two strings. */
  int
  test_assert_equal_wstring(const char *file, int line,
@@ -307,17 +360,31 @@ test_assert_equal_wstring(const char *file, int line,
      void *extra)
  {
         ++assertions;
-       if (wcscmp(v1, v2) == 0) {
+       if (v1 == NULL) {
+               if (v2 == NULL) {
+                       msg[0] = '\0';
+                       return (1);
+               }
+       } else if (v2 == NULL) {
+               if (v1 == NULL) {
+                       msg[0] = '\0';
+                       return (1);
+               }
+       } else if (wcscmp(v1, v2) == 0) {
                 msg[0] = '\0';
                 return (1);
         }
         failures ++;
-       if (previous_failures(file, line))
+       if (!verbose && previous_failures(file, line))
                 return (0);
         fprintf(stderr, "%s:%d: Assertion failed: Unicode strings not equal\n",
             file, line);
-       fwprintf(stderr, L"      %s = \"%ls\"\n", e1, v1);
-       fwprintf(stderr, L"      %s = \"%ls\"\n", e2, v2);
+       fprintf(stderr, "      %s = ", e1);
+       wcsdump(v1);
+       fprintf(stderr, "\n");
+       fprintf(stderr, "      %s = ", e2);
+       wcsdump(v2);
+       fprintf(stderr, "\n");
         report_failure(extra);
         return (0);
  }
@@ -378,7 +445,7 @@ test_assert_equal_mem(const char *file, int line,
                 return (1);
         }
         failures ++;
-       if (previous_failures(file, line))
+       if (!verbose && previous_failures(file, line))
                 return (0);
         fprintf(stderr, "%s:%d: Assertion failed: memory not equal\n",
             file, line);
@@ -410,12 +477,13 @@ test_assert_empty_file(const char *f1fmt, ...)
         if (stat(f1, &st) != 0) {
                 fprintf(stderr, "%s:%d: Could not stat: %s\n", test_filename, test_line, f1);
                 report_failure(NULL);
+               return (0);
         }
         if (st.st_size == 0)
                 return (1);
  
         failures ++;
-       if (previous_failures(test_filename, test_line))
+       if (!verbose && previous_failures(test_filename, test_line))
                 return (0);
  
         fprintf(stderr, "%s:%d: File not empty: %s\n", test_filename, test_line, f1);
@@ -462,7 +530,7 @@ test_assert_equal_file(const char *f1, const char *f2pattern, ...)
                         break;
         }
         failures ++;
-       if (previous_failures(test_filename, test_line))
+       if (!verbose && previous_failures(test_filename, test_line))
                 return (0);
         fprintf(stderr, "%s:%d: Files are not identical\n",
             test_filename, test_line);
@@ -633,6 +701,12 @@ static int test_run(int i, const char *tmpdir)
         (*tests[i].func)();
         /* Summarize the results of this test. */
         summarize();
+       /* If there were no failures, we can remove the work dir. */
+       if (failures == failures_before) {
+               if (!keep_temp_files && chdir(tmpdir) == 0) {
+                       systemf("rm -rf %s", tests[i].name);
+               }
+       }
         /* Return appropriate status. */
         return (failures == failures_before ? 0 : 1);
  }
@@ -646,8 +720,9 @@ static void usage(const char *program)
         printf("Default is to run all tests.\n");
         printf("Otherwise, specify the numbers of the tests you wish to run.\n");
         printf("Options:\n");
-       printf("  -k  Keep running after failures.\n");
-       printf("      Default: Core dump after any failure.\n");
+       printf("  -d  Dump core after any failure, for debugging.\n");
+       printf("  -k  Keep all temp files.\n");
+       printf("      Default: temp files for successful tests deleted.\n");
  #ifdef PROGRAM
         printf("  -p <path>  Path to executable to be tested.\n");
         printf("      Default: path taken from " ENVBASE " environment variable.\n");
@@ -655,6 +730,7 @@ static void usage(const char *program)
         printf("  -q  Quiet.\n");
         printf("  -r <dir>   Path to dir containing reference files.\n");
         printf("      Default: Current directory.\n");
+       printf("  -v  Verbose.\n");
         printf("Available tests:\n");
         for (i = 0; i < limit; i++)
                 printf("  %d: %s\n", i, tests[i].name);
@@ -747,9 +823,9 @@ int main(int argc, char **argv)
         testprog = getenv(ENVBASE);
  #endif
  
-       /* Allow -k to be controlled through the environment. */
-       if (getenv(ENVBASE "_KEEP_GOING") != NULL)
-               dump_on_failure = 0;
+       /* Allow -d to be controlled through the environment. */
+       if (getenv(ENVBASE "_DEBUG") != NULL)
+               dump_on_failure = 1;
  
         /* Get the directory holding test files from environment. */
         refdir = getenv(ENVBASE "_TEST_FILES");
@@ -757,10 +833,13 @@ int main(int argc, char **argv)
         /*
          * Parse options.
          */
-       while ((opt = getopt(argc, argv, "kp:qr:")) != -1) {
+       while ((opt = getopt(argc, argv, "dkp:qr:v")) != -1) {
                 switch (opt) {
+               case 'd':
+                       dump_on_failure = 1;
+                       break;
                 case 'k':
-                       dump_on_failure = 0;
+                       keep_temp_files = 1;
                         break;
                 case 'p':
  #ifdef PROGRAM
@@ -775,6 +854,9 @@ int main(int argc, char **argv)
                 case 'r':
                         refdir = optarg;
                         break;
+               case 'v':
+                       verbose = 1;
+                       break;
                 case '?':
                 default:
                         usage(progname);
@@ -823,6 +905,7 @@ int main(int argc, char **argv)
                         --p;
                         *p = '\0';
                 }
+               systemf("rm %s/refdir", tmpdir);
         }
  
         /*
@@ -878,5 +961,9 @@ int main(int argc, char **argv)
  
         free(refdir_alloc);
  
+       /* If the final tmpdir is empty, we can remove it. */
+       /* This should be the usual case when all tests succeed. */
+       rmdir(tmpdir);
+
         return (tests_failed);
  }
diff --git a/libarchive/test/test_acl_pax.c b/libarchive/test/test_acl_pax.c

index abf7469496c40f7ac4555d7e3925bbd219cc332f..6ae3dd271f1c25dd02d792ec1e5ca10f1ce4fd71 100644 (file)
--- a/libarchive/test/test_acl_pax.c
+++ b/libarchive/test/test_acl_pax.c
@@ -332,14 +332,10 @@ acl_match(struct acl_t *acl, int type, int permset, int tag, int qual, const cha
                 return (1);
         if (qual != acl->qual)
                 return (0);
-       if (name == NULL) {
-               if (acl->name == NULL || acl->name[0] == '\0')
-                       return (1);
-       }
-       if (acl->name == NULL) {
-               if (name[0] == '\0')
-                       return (1);
-       }
+       if (name == NULL)
+               return (acl->name == NULL || acl->name[0] == '\0');
+       if (acl->name == NULL)
+               return (name == NULL || name[0] == '\0');
         return (0 == strcmp(name, acl->name));
  }
  
diff --git a/libarchive/test/test_archive_api_feature.c b/libarchive/test/test_archive_api_feature.c

index cfc0b8413f985cf088179d2c98b23cbbe3fa7109..21d189d5876d1253de3438ffe52a2fa18c92a6aa 100644 (file)
--- a/libarchive/test/test_archive_api_feature.c
+++ b/libarchive/test/test_archive_api_feature.c
@@ -28,6 +28,7 @@ __FBSDID("$FreeBSD: src/lib/libarchive/test/test_archive_api_feature.c,v 1.4 200
  DEFINE_TEST(test_archive_api_feature)
  {
         char buff[128];
+       const char *p;
  
         /* This is the (hopefully) final versioning API. */
         assertEqualInt(ARCHIVE_VERSION_NUMBER, archive_version_number());
@@ -35,7 +36,17 @@ DEFINE_TEST(test_archive_api_feature)
             archive_version_number() / 1000000,
             (archive_version_number() / 1000) % 1000,
             archive_version_number() % 1000);
-       assertEqualString(buff, archive_version_string());
+       failure("Version string is: %s, computed is: %s",
+           archive_version_string(), buff);
+       assert(memcmp(buff, archive_version_string(), strlen(buff)) == 0);
+       if (strlen(buff) < strlen(archive_version_string())) {
+               p = archive_version_string() + strlen(buff);
+               failure("Version string is: %s", archive_version_string());
+               assert(*p == 'a' || *p == 'b' || *p == 'c' || *p == 'd');
+               ++p;
+               failure("Version string is: %s", archive_version_string());
+               assert(*p == '\0');
+       }
  
  /* This is all scheduled to disappear in libarchive 3.0 */
  #if ARCHIVE_VERSION_NUMBER < 3000000
diff --git a/libarchive/test/test_entry.c b/libarchive/test/test_entry.c

index 29edae7ffd83585944051b28ab9598b3c9ce3c02..8386d98926ef82c1b8c15090be7af3e2d92324c0 100644 (file)
--- a/libarchive/test/test_entry.c
+++ b/libarchive/test/test_entry.c
@@ -52,6 +52,8 @@ DEFINE_TEST(test_entry)
         const void *xval; /* For xattr tests. */
         size_t xsize; /* For xattr tests. */
         int c;
+       wchar_t wc;
+       long l;
  
         assert((e = archive_entry_new()) != NULL);
  
@@ -146,7 +148,7 @@ DEFINE_TEST(test_entry)
         archive_entry_copy_link_w(e, L"link3");
         assertEqualString(archive_entry_hardlink(e), NULL);
         assertEqualString(archive_entry_symlink(e), "link3");
-       /* Arbitrarily override hardlink if both hardlink and symlink set. */
+       /* Arbitrarily override symlink if both hardlink and symlink set. */
         archive_entry_set_hardlink(e, "hardlink");
         archive_entry_set_symlink(e, "symlink");
         archive_entry_set_link(e, "link");
@@ -726,8 +728,10 @@ DEFINE_TEST(test_entry)
         /*
          * Exercise the character-conversion logic, if we can.
          */
-       failure("Can't exercise charset-conversion logic.");
-       if (assert(NULL != setlocale(LC_ALL, "de_DE.UTF-8"))) {
+       if (NULL == setlocale(LC_ALL, "de_DE.UTF-8")) {
+               skipping("Can't exercise charset-conversion logic without"
+                       " a suitable locale.");
+       } else {
                 /* A filename that cannot be converted to wide characters. */
                 archive_entry_copy_pathname(e, "abc\314\214mno\374xyz");
                 failure("Converting invalid chars to Unicode should fail.");
@@ -756,6 +760,26 @@ DEFINE_TEST(test_entry)
                 assert(NULL == archive_entry_symlink_w(e));
         }
  
+       l = 0x12345678L;
+       wc = (wchar_t)l; /* Wide character too big for UTF-8. */
+       if (NULL == setlocale(LC_ALL, "C") || (long)wc != l) {
+               skipping("Testing charset conversion failure requires 32-bit wchar_t and support for \"C\" locale.");
+       } else {
+               /*
+                * Build the string L"xxx\U12345678yyy\u5678zzz" without
+                * using C99 \u#### syntax, which isn't uniformly
+                * supported.  (GCC 3.4.6, for instance, defaults to
+                * "c89 plus GNU extensions.")
+                */
+               wcscpy(wbuff, L"xxxAyyyBzzz");
+               wbuff[3] = 0x12345678;
+               wbuff[7] = 0x5678;
+               /* A wide filename that cannot be converted to narrow. */
+               archive_entry_copy_pathname_w(e, wbuff);
+               failure("Converting wide characters from Unicode should fail.");
+               assertEqualString(NULL, archive_entry_pathname(e));
+       }
+
         /* Release the experimental entry. */
         archive_entry_free(e);
  }
diff --git a/libarchive/test/test_pax_filename_encoding.c b/libarchive/test/test_pax_filename_encoding.c

index b11be58a07e2ce2f423798e81f7a46b019afc975..34c4fc8230fe26ac22cf435bca4447309a021be2 100644 (file)
--- a/libarchive/test/test_pax_filename_encoding.c
+++ b/libarchive/test/test_pax_filename_encoding.c
@@ -34,24 +34,20 @@ __FBSDID("$FreeBSD: src/lib/libarchive/test/test_pax_filename_encoding.c,v 1.1 2
   * stored and restored correctly, regardless of the encodings.
   */
  
-DEFINE_TEST(test_pax_filename_encoding)
+/*
+ * Read a manually-created archive that has filenames that are
+ * stored in binary instead of UTF-8 and verify that we get
+ * the right filename returned and that we get a warning only
+ * if the header isn't marked as binary.
+ */
+DEFINE_TEST(test_pax_filename_encoding_1)
  {
         static const char testname[] = "test_pax_filename_encoding.tar.gz";
-       char buff[65536];
         /*
          * \314\214 is a valid 2-byte UTF-8 sequence.
          * \374 is invalid in UTF-8.
          */
         char filename[] = "abc\314\214mno\374xyz";
-       char longname[] = "abc\314\214mno\374xyz"
-           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
-           ;
-       size_t used;
         struct archive *a;
         struct archive_entry *entry;
  
@@ -69,8 +65,7 @@ DEFINE_TEST(test_pax_filename_encoding)
          * in it, but the header is not marked as hdrcharset=BINARY, so that
          * requires a warning.
          */
-       failure("An invalid UTF8 pathname in a pax archive should be read\n"
-           " without conversion but with a warning");
+       failure("Invalid UTF8 in a pax archive pathname should cause a warning");
         assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
         assertEqualString(filename, archive_entry_pathname(entry));
         /*
@@ -82,15 +77,39 @@ DEFINE_TEST(test_pax_filename_encoding)
         assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry));
         assertEqualString(filename, archive_entry_pathname(entry));
         archive_read_finish(a);
+}
+
+/*
+ * Set the locale and write a pathname containing invalid characters.
+ * This should work; the underlying implementation should automatically
+ * fall back to storing the pathname in binary.
+ */
+DEFINE_TEST(test_pax_filename_encoding_2)
+{
+       char filename[] = "abc\314\214mno\374xyz";
+       struct archive *a;
+       struct archive_entry *entry;
+       char buff[65536];
+       char longname[] = "abc\314\214mno\374xyz"
+           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+           "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
+           ;
+       size_t used;
  
         /*
          * We need a starting locale which has invalid sequences.
          * de_DE.UTF-8 seems to be commonly supported.
          */
         /* If it doesn't exist, just warn and return. */
-       failure("We need a suitable locale for the encoding tests.");
-       if (!assert(NULL != setlocale(LC_ALL, "de_DE.UTF-8")))
+       if (NULL == setlocale(LC_ALL, "de_DE.UTF-8")) {
+               skipping("invalid encoding tests require a suitable locale;"
+                   " de_DE.UTF-8 not available on this system");
                 return;
+       }
  
         assert((a = archive_write_new()) != NULL);
         assertEqualIntA(a, 0, archive_write_set_format_pax(a));
@@ -159,3 +178,120 @@ DEFINE_TEST(test_pax_filename_encoding)
         assertEqualInt(0, archive_read_finish(a));
  }
  
+/*
+ * Create an entry starting from a wide-character Unicode pathname,
+ * read it back into "C" locale, which doesn't support the name.
+ * TODO: Figure out the "right" behavior here.
+ */
+DEFINE_TEST(test_pax_filename_encoding_3)
+{
+       wchar_t badname[] = L"xxxAyyyBzzz";
+       const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz";
+       struct archive *a;
+       struct archive_entry *entry;
+       char buff[65536];
+       size_t used;
+
+       badname[3] = 0x1234;
+       badname[7] = 0x5678;
+
+       /* If it doesn't exist, just warn and return. */
+       if (NULL == setlocale(LC_ALL, "C")) {
+               skipping("Can't set \"C\" locale, so can't exercise "
+                   "certain character-conversion failures");
+               return;
+       }
+
+       assert((a = archive_write_new()) != NULL);
+       assertEqualIntA(a, 0, archive_write_set_format_pax(a));
+       assertEqualIntA(a, 0, archive_write_set_compression_none(a));
+       assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
+       assertEqualInt(0,
+           archive_write_open_memory(a, buff, sizeof(buff), &used));
+
+       assert((entry = archive_entry_new()) != NULL);
+       /* Set pathname to non-convertible wide value. */
+       archive_entry_copy_pathname_w(entry, badname);
+       archive_entry_set_filetype(entry, AE_IFREG);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+
+       assert((entry = archive_entry_new()) != NULL);
+       archive_entry_copy_pathname_w(entry, L"abc");
+       /* Set gname to non-convertible wide value. */
+       archive_entry_copy_gname_w(entry, badname);
+       archive_entry_set_filetype(entry, AE_IFREG);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+
+       assert((entry = archive_entry_new()) != NULL);
+       archive_entry_copy_pathname_w(entry, L"abc");
+       /* Set uname to non-convertible wide value. */
+       archive_entry_copy_uname_w(entry, badname);
+       archive_entry_set_filetype(entry, AE_IFREG);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+
+       assert((entry = archive_entry_new()) != NULL);
+       archive_entry_copy_pathname_w(entry, L"abc");
+       /* Set hardlink to non-convertible wide value. */
+       archive_entry_copy_hardlink_w(entry, badname);
+       archive_entry_set_filetype(entry, AE_IFREG);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+
+       assert((entry = archive_entry_new()) != NULL);
+       archive_entry_copy_pathname_w(entry, L"abc");
+       /* Set symlink to non-convertible wide value. */
+       archive_entry_copy_symlink_w(entry, badname);
+       archive_entry_set_filetype(entry, AE_IFLNK);
+       assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
+       archive_entry_free(entry);
+
+       assertEqualInt(0, archive_write_close(a));
+       assertEqualInt(0, archive_write_finish(a));
+
+       /*
+        * Now read the entries back.
+        */
+
+       assert((a = archive_read_new()) != NULL);
+       assertEqualInt(0, archive_read_support_format_tar(a));
+       assertEqualInt(0, archive_read_open_memory(a, buff, used));
+
+       failure("A non-convertible pathname should cause a warning.");
+       assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+       assertEqualWString(badname, archive_entry_pathname_w(entry));
+       failure("If native locale can't convert, we should get UTF-8 back.");
+       assertEqualString(badname_utf8, archive_entry_pathname(entry));
+
+       failure("A non-convertible gname should cause a warning.");
+       assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+       assertEqualWString(badname, archive_entry_gname_w(entry));
+       failure("If native locale can't convert, we should get UTF-8 back.");
+       assertEqualString(badname_utf8, archive_entry_gname(entry));
+
+       failure("A non-convertible uname should cause a warning.");
+       assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+       assertEqualWString(badname, archive_entry_uname_w(entry));
+       failure("If native locale can't convert, we should get UTF-8 back.");
+       assertEqualString(badname_utf8, archive_entry_uname(entry));
+
+       failure("A non-convertible hardlink should cause a warning.");
+       assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+       assertEqualWString(badname, archive_entry_hardlink_w(entry));
+       failure("If native locale can't convert, we should get UTF-8 back.");
+       assertEqualString(badname_utf8, archive_entry_hardlink(entry));
+
+       failure("A non-convertible symlink should cause a warning.");
+       assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
+       assertEqualWString(badname, archive_entry_symlink_w(entry));
+       assertEqualWString(NULL, archive_entry_hardlink_w(entry));
+       failure("If native locale can't convert, we should get UTF-8 back.");
+       assertEqualString(badname_utf8, archive_entry_symlink(entry));
+
+       assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry));
+
+       assertEqualInt(0, archive_read_close(a));
+       assertEqualInt(0, archive_read_finish(a));
+}
diff --git a/libarchive/test/test_tar_filenames.c b/libarchive/test/test_tar_filenames.c

index 8b83b5277334bd6c84b5e471e1dbc89ac40876ed..9b98448e4a7304a54ee389f654878b53c6c2ae33 100644 (file)
--- a/libarchive/test/test_tar_filenames.c
+++ b/libarchive/test/test_tar_filenames.c
@@ -40,19 +40,22 @@ test_filename(const char *prefix, int dlen, int flen)
         struct archive_entry *ae;
         struct archive *a;
         size_t used;
-       size_t prefix_length = 0;
-       unsigned i = 0;
+       char *p;
+       int i;
  
+       p = filename;
         if (prefix) {
                 strcpy(filename, prefix);
-               i = prefix_length = strlen(prefix);
+               p += strlen(p);
         }
-       for (; i < prefix_length + dlen; i++)
-               filename[i] = 'a';
-       filename[i++] = '/';
-       for (; i < prefix_length + dlen + flen + 1; i++)
-               filename[i] = 'b';
-       filename[i++] = '\0';
+       if (dlen > 0) {
+               for (i = 0; i < dlen; i++)
+                       *p++ = 'a';
+               *p++ = '/';
+       }
+       for (i = 0; i < flen; i++)
+               *p++ = 'b';
+       *p = '\0';
  
         strcpy(dirname, filename);
  
@@ -160,15 +163,22 @@ DEFINE_TEST(test_tar_filenames)
         int dlen, flen;
  
         /* Repeat the following for a variety of dir/file lengths. */
-       for (dlen = 40; dlen < 60; dlen++) {
-               for (flen = 40; flen < 60; flen++) {
+       for (dlen = 45; dlen < 55; dlen++) {
+               for (flen = 45; flen < 55; flen++) {
+                       test_filename(NULL, dlen, flen);
+                       test_filename("/", dlen, flen);
+               }
+       }
+
+       for (dlen = 0; dlen < 140; dlen += 10) {
+               for (flen = 98; flen < 102; flen++) {
                         test_filename(NULL, dlen, flen);
                         test_filename("/", dlen, flen);
                 }
         }
  
         for (dlen = 140; dlen < 160; dlen++) {
-               for (flen = 90; flen < 110; flen++) {
+               for (flen = 95; flen < 105; flen++) {
                         test_filename(NULL, dlen, flen);
                         test_filename("/", dlen, flen);
                 }
diff --git a/libarchive/test/test_tar_large.c b/libarchive/test/test_tar_large.c

index c675ac1ee0467e3579c08f626c56c9b89ec3f484..a05b49f6ab56cbb6b385107a697dd0002d2d4de5 100644 (file)
--- a/libarchive/test/test_tar_large.c
+++ b/libarchive/test/test_tar_large.c
@@ -242,6 +242,11 @@ DEFINE_TEST(test_tar_large)
                 archive_entry_copy_pathname(ae, namebuff);
                 archive_entry_set_mode(ae, S_IFREG | 0755);
                 filesize = tests[i];
+
+               if (filesize < 0) {
+                       skipping("32-bit off_t doesn't permit testing of very large files.");
+                       return;
+               }
                 archive_entry_set_size(ae, filesize);
  
                 assertA(0 == archive_write_header(a, ae));
diff --git a/libarchive/test/test_write_format_ar.c b/libarchive/test/test_write_format_ar.c

index 6c7a4462a9055390ea06f205447466d9091b812b..432557ca1fff0a10e2ad89f9e807c9be8f8f277d 100644 (file)
--- a/libarchive/test/test_write_format_ar.c
+++ b/libarchive/test/test_write_format_ar.c
@@ -30,7 +30,7 @@ __FBSDID("$FreeBSD: src/lib/libarchive/test/test_write_format_ar.c,v 1.6 2008/03
  
  char buff[4096];
  char buff2[64];
-static unsigned char strtab[] = "abcdefghijklmn.o/\nggghhhjjjrrrttt.o/\niiijjjdddsssppp.o/\n";
+static char strtab[] = "abcdefghijklmn.o/\nggghhhjjjrrrttt.o/\niiijjjdddsssppp.o/\n";
  
  DEFINE_TEST(test_write_format_ar)
  {
diff --git a/tar/Makefile b/tar/Makefile

index fa325429e2f47cc87c5ba3250818ac72dd433b14..44098eac8ebb8fde322041d76d0d3f4787d89ffb 100644 (file)
--- a/tar/Makefile
+++ b/tar/Makefile
@@ -1,7 +1,7 @@
  # $FreeBSD: src/usr.bin/tar/Makefile,v 1.34 2008/03/18 06:18:49 kientzle Exp $
  
  PROG=  bsdtar
-BSDTAR_VERSION_STRING=2.5.0b
+BSDTAR_VERSION_STRING=2.5.1b
  SRCS=  bsdtar.c getdate.y matching.c read.c tree.c util.c write.c
  WARNS?=        5
  DPADD= ${LIBARCHIVE} ${LIBBZ2} ${LIBZ}
diff --git a/tar/bsdtar.c b/tar/bsdtar.c

index 81d9841bd94eb4d2c6d30e5cc6c6965522db3253..abd575ca2f5533f2fa729be9bcb66003166eda7c 100644 (file)
--- a/tar/bsdtar.c
+++ b/tar/bsdtar.c
@@ -788,7 +788,7 @@ version(void)
         printf("bsdtar %s - %s\n",
             BSDTAR_VERSION_STRING,
             archive_version());
-       exit(1);
+       exit(0);
  }
  
  static const char *long_help_msg =
author	Tim Kientzle <kientzle@gmail.com>
	Wed, 30 Apr 2008 21:48:09 +0000 (17:48 -0400)
committer	Tim Kientzle <kientzle@gmail.com>
	Wed, 30 Apr 2008 21:48:09 +0000 (17:48 -0400)
libarchive/Makefile		patch \| blob \| blame \| history
libarchive/archive_entry.c		patch \| blob \| blame \| history
libarchive/archive_entry.h		patch \| blob \| blame \| history
libarchive/archive_entry_link_resolver.c		patch \| blob \| blame \| history
libarchive/archive_entry_private.h		patch \| blob \| blame \| history
libarchive/archive_platform.h		patch \| blob \| blame \| history
libarchive/archive_read_support_format_iso9660.c		patch \| blob \| blame \| history
libarchive/archive_read_support_format_tar.c		patch \| blob \| blame \| history
libarchive/archive_read_support_format_zip.c		patch \| blob \| blame \| history
libarchive/archive_string.c		patch \| blob \| blame \| history
libarchive/archive_string.h		patch \| blob \| blame \| history
libarchive/archive_util.c		patch \| blob \| blame \| history
libarchive/archive_write_disk.c		patch \| blob \| blame \| history
libarchive/archive_write_set_format_pax.c		patch \| blob \| blame \| history
libarchive/archive_write_set_format_ustar.c		patch \| blob \| blame \| history
libarchive/test/Makefile		patch \| blob \| blame \| history
libarchive/test/main.c		patch \| blob \| blame \| history
libarchive/test/test_acl_pax.c		patch \| blob \| blame \| history
libarchive/test/test_archive_api_feature.c		patch \| blob \| blame \| history
libarchive/test/test_entry.c		patch \| blob \| blame \| history
libarchive/test/test_pax_filename_encoding.c		patch \| blob \| blame \| history
libarchive/test/test_tar_filenames.c		patch \| blob \| blame \| history
libarchive/test/test_tar_large.c		patch \| blob \| blame \| history
libarchive/test/test_write_format_ar.c		patch \| blob \| blame \| history
tar/Makefile		patch \| blob \| blame \| history
tar/bsdtar.c		patch \| blob \| blame \| history