]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
Checkpoint work on zip64 support
authorTim Kientzle <kientzle@gmail.com>
Fri, 13 Dec 2013 06:10:06 +0000 (22:10 -0800)
committerTim Kientzle <kientzle@gmail.com>
Fri, 13 Dec 2013 06:10:06 +0000 (22:10 -0800)
libarchive/archive_read_support_format_zip.c
libarchive/archive_write_set_format_zip.c

index bd01caa5ce639b1a14c34bd88e364a54c333b9f1..0ab2109fd05ab3dcb36469681a4f8a6ac65212eb 100644 (file)
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102
 
 struct zip_entry {
        struct archive_rb_node  node;
+       struct zip_entry        *next;
        int64_t                 local_header_offset;
        int64_t                 compressed_size;
        int64_t                 uncompressed_size;
@@ -245,8 +246,9 @@ static int
 archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid)
 {
        struct zip *zip = (struct zip *)a->format->data;
-       int64_t end_of_central_directory_offset;
+       int64_t file_size, end_of_central_directory_offset;
        const char *p;
+       int i, tail, found;
 
        /* If someone has already bid more than 32, then avoid
           trashing the look-ahead buffers with a seek. */
@@ -255,78 +257,74 @@ archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid)
 
        /* end-of-central-directory record is 22 bytes; first check
         * for it at very end of file. */
-       end_of_central_directory_offset = __archive_read_seek(a, -22, SEEK_END);
+       file_size = __archive_read_seek(a, 0, SEEK_END);
        /* If we can't seek, then we can't bid. */
-       if (end_of_central_directory_offset <= 0)
+       if (file_size <= 0)
                return 0;
 
-       if ((p = __archive_read_ahead(a, 22, NULL)) == NULL)
+       /* Search last 16k of file for end-of-central-directory
+        * record (which starts with PK\005\006) or Zip64 locator
+        * record (which begins with PK\006\007) */
+       tail = zipmin(1024 * 16, file_size);
+       end_of_central_directory_offset
+           = __archive_read_seek(a, -tail, SEEK_END);
+       if (end_of_central_directory_offset < 0)
                return 0;
-       /* First four bytes are signature for end of central directory
-        * record.  Four zero bytes ensure this isn't a multi-volume
-        * Zip file (which we don't yet support). */
-       if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0) {
-               int64_t i, tail;
-               int found;
-
-               /*
-                * End-of-central-directory isn't exactly at end of file.
-                * Try reading last 16k of file and search for it.
-                */
-               if (end_of_central_directory_offset + 22 > 1024 * 16) {
-                       tail = 1024 * 16;
-                       end_of_central_directory_offset
-                           = __archive_read_seek(a, tail * -1, SEEK_END);
-               } else {
-                       tail = end_of_central_directory_offset + 22;
-                       end_of_central_directory_offset
-                           = __archive_read_seek(a, 0, SEEK_SET);
+       if ((p = __archive_read_ahead(a, (size_t)tail, NULL)) == NULL)
+               return 0;
+       for (found = 0, i = 0; !found_eocd && !found_zip64 && i < tail - 22;) {
+               switch (p[i + 3]) {
+               case 'P': i += 3; break;
+               case 'K': i += 2; break;
+               case 005: i += 1; break;
+               case 006:
+                       if (memcmp(p + i,
+                               "PK\005\006\000\000\000\000", 8) == 0) {
+                               p += i;
+                               end_of_central_directory_offset += i;
+                               found_eocd = 1;
+                       } else
+                               i += 1; /* Look for PK\006\007 next */
+                       break;
+               case 007:
+                       if (memcmp(p + i, "PK\006\007", 4) == 0) {
+                               p += i;
+                               end_of_central_directory_offset += i;
+                               found_zip64 = 1;
+                       } else
+                               i += 4;
+                       break;
+               default: i += 4; break;
                }
-               if (end_of_central_directory_offset < 0)
+       }
+
+       if (found_eocd) {
+               /* Since we've already done the hard work of finding the
+                  end of central directory record, let's save the important
+                  information. */
+               zip->central_directory_entries = archive_le16dec(p + 10);
+               zip->central_directory_size = archive_le32dec(p + 12);
+               zip->central_directory_offset = archive_le32dec(p + 16);
+               zip->end_of_central_directory_offset
+                   = end_of_central_directory_offset;
+               /* Just one volume: central dir must all be on this volume. */
+               if (zip->central_directory_entries != archive_le16dec(p + 8))
                        return 0;
-               if ((p = __archive_read_ahead(a, (size_t)tail, NULL)) == NULL)
-                       return 0;
-               for (found = 0, i = 0; !found && i < tail - 22;) {
-                       switch (p[i]) {
-                       case 'P':
-                               if (memcmp(p+i,
-                                   "PK\005\006\000\000\000\000", 8) == 0) {
-                                       p += i;
-                                       end_of_central_directory_offset += i;
-                                       found = 1;
-                               } else
-                                       i += 8;
-                               break;
-                       case 'K': i += 7; break;
-                       case 005: i += 6; break;
-                       case 006: i += 5; break;
-                       default: i += 1; break;
-                       }
-               }
-               if (!found)
+               /* Central directory can't extend beyond start of EOCD record. */
+               if (zip->central_directory_offset
+                   + (int64_t)zip->central_directory_size
+                   > end_of_central_directory_offset)
                        return 0;
+               /* This is just a tiny bit higher than the maximum
+                  returned by the streaming Zip bidder.  This ensures
+                  that the more accurate seeking Zip parser wins
+                  whenever seek is available. */
+               return 32;
        }
 
-       /* Since we've already done the hard work of finding the
-          end of central directory record, let's save the important
-          information. */
-       zip->central_directory_entries = archive_le16dec(p + 10);
-       zip->central_directory_size = archive_le32dec(p + 12);
-       zip->central_directory_offset = archive_le32dec(p + 16);
-       zip->end_of_central_directory_offset = end_of_central_directory_offset;
-
-       /* Just one volume, so central dir must all be on this volume. */
-       if (zip->central_directory_entries != archive_le16dec(p + 8))
-               return 0;
-       /* Central directory can't extend beyond start of EOCD record. */
-       if (zip->central_directory_offset +
-           (int64_t)zip->central_directory_size > end_of_central_directory_offset)
-               return 0;
-
-       /* This is just a tiny bit higher than the maximum returned by
-          the streaming Zip bidder.  This ensures that the more accurate
-          seeking Zip parser wins whenever seek is available. */
-       return 32;
+       /* Looking at Zip64 end-of-cd locator... */
+       XXX Seek to zip64 end-of-cd record;
+       XXX parse out zip64 data;
 }
 
 static int
@@ -406,12 +404,25 @@ expose_parent_dirs(struct zip *zip, const char *name, size_t name_length)
        archive_string_free(&str);
 }
 
+static int64_t
+zip_read_consume(struct archive_read *a, int64_t bytes)
+{
+       struct zip *zip = (struct zip *)a->format->data;
+       int64_t skip;
+
+       skip = __archive_read_consume(a, bytes);
+       if (skip > 0)
+               zip->offset += skip;
+       return (skip);
+}
+
 static int
 slurp_central_directory(struct archive_read *a, struct zip *zip)
 {
-       unsigned i;
+       unsigned i, found;
        int64_t correction;
-       const char *p;
+       ssize_t bytes_avail;
+       const char *p, *end;
        static const struct archive_rb_tree_ops rb_ops = {
                &cmp_node, &cmp_key
        };
@@ -420,36 +431,69 @@ slurp_central_directory(struct archive_read *a, struct zip *zip)
        };
 
        /*
-        * Consider the archive file we are reading may be SFX.
-        * So we have to calculate a SFX header size to revise
-        * ZIP header offsets.
+        * Find the start of the central directory.  The end-of-CD
+        * record has our starting point, but there are lots of
+        * Zip archives which have had other data prepended to the
+        * file, which makes the recorded offsets all too small.
+        * So we search forward from the specified offset until we
+        * find the real start of the central directory.  Then we
+        * know the correction we need to apply to account for leading
+        * padding.
         */
-       correction = zip->end_of_central_directory_offset -
-           (zip->central_directory_offset + zip->central_directory_size);
-       /* The central directory offset is relative value, and so
-        * we revise this offset for SFX. */
-       zip->central_directory_offset += correction;
-
-       __archive_read_seek(a, zip->central_directory_offset, SEEK_SET);
-       zip->offset = zip->central_directory_offset;
+       __archive_read_seek(a, zip->central_directory_offset - 13, SEEK_SET);
+       zip->offset = zip->central_directory_offset - 13;
+
+       found = 0;
+       while (!found) {
+               if ((p = __archive_read_ahead(a, 20, &bytes_avail)) == NULL)
+                       return ARCHIVE_FATAL;
+               for (found = 0, i = 0; !found && i < bytes_avail - 4;) {
+                       switch (p[i + 3]) {
+                       case 'P': i += 3; break;
+                       case 'K': i += 2; break;
+                       case 001: i += 1; break;
+                       case 002:
+                               if (memcmp(p + i, "PK\001\002", 4) == 0) {
+                                       p += i;
+                                       found = 1;
+                               } else
+                                       i += 4;
+                               break;
+                       default: i += 4; break;
+                       }
+               }
+               zip_read_consume(a, i);
+       }
+       correction = zip->offset - zip->central_directory_offset;
+
        __archive_rb_tree_init(&zip->tree, &rb_ops);
        __archive_rb_tree_init(&zip->tree_rsrc, &rb_rsrc_ops);
 
-       zip->zip_entries = calloc(zip->central_directory_entries,
-                               sizeof(struct zip_entry));
-       for (i = 0; i < zip->central_directory_entries; ++i) {
-               struct zip_entry *zip_entry = &zip->zip_entries[i];
+       zip->central_directory_entries = 0;
+       while (1) {
+               struct zip_entry *zip_entry;
                size_t filename_length, extra_length, comment_length;
                uint32_t external_attributes;
                const char *name, *r;
 
-               if ((p = __archive_read_ahead(a, 46, NULL)) == NULL)
+               if ((p = __archive_read_ahead(a, 4, NULL)) == NULL)
                        return ARCHIVE_FATAL;
-               if (memcmp(p, "PK\001\002", 4) != 0) {
+               if (memcmp(p, "PK\006\006", 4) == 0
+                   || memcmp(p, "PK\005\006", 4) == 0) {
+                       break;
+               } else if (memcmp(p, "PK\001\002", 4) != 0) {
                        archive_set_error(&a->archive,
                            -1, "Invalid central directory signature");
                        return ARCHIVE_FATAL;
                }
+               if ((p = __archive_read_ahead(a, 46, NULL)) == NULL)
+                       return ARCHIVE_FATAL;
+
+               zip_entry = calloc(1, sizeof(struct zip_entry));
+               zip_entry->next = zip->zip_entries;
+               zip->zip_entries = zip_entry;
+               zip->central_directory_entries++;
+
                zip->have_central_directory = 1;
                /* version = p[4]; */
                zip_entry->system = p[5];
@@ -545,18 +589,6 @@ slurp_central_directory(struct archive_read *a, struct zip *zip)
        return ARCHIVE_OK;
 }
 
-static int64_t
-zip_read_consume(struct archive_read *a, int64_t bytes)
-{
-       struct zip *zip = (struct zip *)a->format->data;
-       int64_t skip;
-
-       skip = __archive_read_consume(a, bytes);
-       if (skip > 0)
-               zip->offset += skip;
-       return (skip);
-}
-
 static int
 zip_read_mac_metadata(struct archive_read *a, struct archive_entry *entry,
     struct zip_entry *rsrc)
@@ -1616,6 +1648,7 @@ static int
 archive_read_format_zip_cleanup(struct archive_read *a)
 {
        struct zip *zip;
+       struct zip_entry *zip_entry, *next_zip_entry;
 
        zip = (struct zip *)(a->format->data);
 #ifdef HAVE_ZLIB_H
@@ -1623,11 +1656,14 @@ archive_read_format_zip_cleanup(struct archive_read *a)
                inflateEnd(&zip->stream);
 #endif
        if (zip->zip_entries && zip->central_directory_entries) {
-               unsigned i;
-               for (i = 0; i < zip->central_directory_entries; i++)
-                       archive_string_free(&(zip->zip_entries[i].rsrcname));
+               zip_entry = zip->zip_entries;
+               while (zip_entry != NULL) {
+                       next_zip_entry = zip_entry->next;
+                       archive_string_free(&zip_entry->rsrcname);
+                       free(zip_entry);
+                       zip_entry = next_zip_entry;
+               }
        }
-       free(zip->zip_entries);
        free(zip->uncompressed_buffer);
        archive_string_free(&(zip->extra));
        free(zip);
index 2c8c3ca1bde56e4d24aa6ed3e7dd47d1427ec048..81886d897485e4504add99f0e5620737960e8908 100644 (file)
  * Development supported by Google Summer of Code 2008.
  */
 
-/*
- * The current implementation is very limited:
- *
- *   - No encryption support.
- *   - No ZIP64 support.
- *   - No support for splitting and spanning.
- *   - Only supports regular file and folder entries.
- *
- * Note that generally data in ZIP files is little-endian encoded,
- * with some exceptions.
- *
- * TODO: Since Libarchive is generally 64bit oriented, but this implementation
- * does not yet support sizes exceeding 32bit, it is highly fragile for
- * big archives. This should change when ZIP64 is finally implemented, otherwise
- * some serious checking has to be done.
- *
- */
-
 #include "archive_platform.h"
 __FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_format_zip.c 201168 2009-12-29 06:15:32Z kientzle $");
 
@@ -795,7 +777,7 @@ archive_write_zip_finish_entry(struct archive_write *a)
 static int
 archive_write_zip_close(struct archive_write *a)
 {
-       uint8_t end[22];
+       uint8_t buff[64];
        int64_t offset_start, offset_end;
        struct zip *zip = a->format_data;
        struct cd_segment *segment;
@@ -813,19 +795,51 @@ archive_write_zip_close(struct archive_write *a)
        }
        offset_end = zip->written_bytes;
 
-       /* TODO: If central dir info is too large, write Zip64 end-of-cd */
+       /* If central dir info is too large, write Zip64 end-of-cd */
+       if (offset_end - offset_start > 0xffffffffULL
+           || offset_start > 0xffffffffULL
+           || zip->central_directory_entries > 0xffffUL
+           || zip->force_zip64) {
+         /* Zip64 end-of-cd record */
+         memset(buff, 0, 56);
+         memcpy(buff, "PK\006\006", 4);
+         archive_le64enc(buff + 4, 44);
+         archive_le16enc(buff + 12, 45);
+         archive_le16enc(buff + 14, 45);
+         /* This is disk 0 of 0. */
+         archive_le64enc(buff + 24, zip->central_directory_entries);
+         archive_le64enc(buff + 32, zip->central_directory_entries);
+         archive_le64enc(buff + 40, offset_end - offset_start);
+         archive_le64enc(buff + 48, offset_start);
+         ret = __archive_write_output(a, buff, 56);
+         if (ret != ARCHIVE_OK)
+                 return (ARCHIVE_FATAL);
+         zip->written_bytes += 56;
+
+         /* Zip64 end-of-cd locator record. */
+         memset(buff, 0, 20);
+         memcpy(buff, "PK\006\007", 4);
+         archive_le32enc(buff + 4, 0);
+         archive_le64enc(buff + 8, offset_end);
+         archive_le32enc(buff + 16, 1);
+         ret = __archive_write_output(a, buff, 20);
+         if (ret != ARCHIVE_OK)
+                 return (ARCHIVE_FATAL);
+         zip->written_bytes += 20;
+
+       }
 
        /* Format and write end of central directory. */
-       memset(end, 0, sizeof(end));
-       memcpy(end, "PK\005\006", 4);
-       archive_le16enc(end + 8, zip->central_directory_entries);
-       archive_le16enc(end + 10, zip->central_directory_entries);
-       archive_le32enc(end + 12, (uint32_t)(offset_end - offset_start));
-       archive_le32enc(end + 16, (uint32_t)offset_start);
-       ret = __archive_write_output(a, end, sizeof(end));
+       memset(buff, 0, sizeof(buff));
+       memcpy(buff, "PK\005\006", 4);
+       archive_le16enc(buff + 8, zipmin(0xffffU, zip->central_directory_entries));
+       archive_le16enc(buff + 10, zipmin(0xffffU, zip->central_directory_entries));
+       archive_le32enc(buff + 12, zipmin(0xffffffffULL, (uint32_t)(offset_end - offset_start)));
+       archive_le32enc(buff + 16, zipmin(0xffffffffULL, (uint32_t)offset_start));
+       ret = __archive_write_output(a, buff, 22);
        if (ret != ARCHIVE_OK)
                return (ARCHIVE_FATAL);
-       zip->written_bytes += sizeof(end);
+       zip->written_bytes += 22;
        return (ARCHIVE_OK);
 }