#endif
#include "archive.h"
+#include "archive_endian.h"
#include "archive_entry.h"
#include "archive_entry_locale.h"
#include "archive_private.h"
+#include "archive_rb.h"
#include "archive_read_private.h"
-#include "archive_endian.h"
#ifndef HAVE_ZLIB_H
#include "archive_crc32.h"
#endif
struct zip_entry {
+ struct archive_rb_node node;
int64_t local_header_offset;
int64_t compressed_size;
int64_t uncompressed_size;
size_t central_directory_size;
size_t central_directory_entries;
char have_central_directory;
+ int64_t offset;
/* List of entries (seekable Zip only) */
size_t entries_remaining;
struct zip_entry *zip_entries;
struct zip_entry *entry;
+ struct archive_rb_tree tree;
size_t unconsumed;
return 32;
}
+static int
+cmp_node(const struct archive_rb_node *n1, const struct archive_rb_node *n2)
+{
+ const struct zip_entry *e1 = (const struct zip_entry *)n1;
+ const struct zip_entry *e2 = (const struct zip_entry *)n2;
+
+ return ((int)(e2->local_header_offset - e1->local_header_offset));
+}
+
+static int
+cmp_key(const struct archive_rb_node *n, const void *key)
+{
+ /* This function won't be called */
+ (void)n; /* UNUSED */
+ (void)key; /* UNUSED */
+ return 1;
+}
+
static int
slurp_central_directory(struct archive_read *a, struct zip *zip)
{
unsigned i;
+ static const struct archive_rb_tree_ops rb_ops = {
+ &cmp_node, &cmp_key
+ };
__archive_read_seek(a, zip->central_directory_offset, SEEK_SET);
+ zip->offset = zip->central_directory_offset;
+ __archive_rb_tree_init(&zip->tree, &rb_ops);
- zip->zip_entries = calloc(zip->central_directory_entries, sizeof(struct zip_entry));
+ zip->zip_entries = calloc(zip->central_directory_entries,
+ sizeof(struct zip_entry));
for (i = 0; i < zip->central_directory_entries; ++i) {
struct zip_entry *zip_entry = &zip->zip_entries[i];
size_t filename_length, extra_length, comment_length;
if (zip_entry->system == 3) {
zip_entry->mode = external_attributes >> 16;
}
+ /* Register an entry to RB tree to sort it by file offset. */
+ __archive_rb_tree_insert_node(&zip->tree, &zip_entry->node);
/* We don't read the filename until we get to the
local file header. Reading it here would speed up
46 + filename_length + extra_length + comment_length);
}
- /* TODO: Sort zip entries by file offset so that we
- can optimize get_next_header() to use skip instead of
- seek. */
-
return ARCHIVE_OK;
}
+static int64_t
+zip_read_consume(struct archive_read *a, int64_t bytes)
+{
+ struct zip *zip = (struct zip *)a->format->data;
+ int64_t skip;
+
+ skip = __archive_read_consume(a, bytes);
+ if (skip > 0)
+ zip->offset += skip;
+ return (skip);
+}
+
static int
archive_read_format_zip_seekable_read_header(struct archive_read *a,
struct archive_entry *entry)
zip->entries_remaining = zip->central_directory_entries;
if (r != ARCHIVE_OK)
return r;
- zip->entry = zip->zip_entries;
- } else {
- ++zip->entry;
+ /* Get first entry whose local header offset is lower than
+ * other entries in the archive file. */
+ zip->entry =
+ (struct zip_entry *)ARCHIVE_RB_TREE_MIN(&zip->tree);
+ } else if (zip->entry != NULL) {
+ /* Get next entry in local header offset order. */
+ zip->entry = (struct zip_entry *)__archive_rb_tree_iterate(
+ &zip->tree, &zip->entry->node, ARCHIVE_RB_DIR_RIGHT);
}
- if (zip->entries_remaining <= 0)
+ if (zip->entries_remaining <= 0 || zip->entry == NULL)
return ARCHIVE_EOF;
--zip->entries_remaining;
- /* TODO: If entries are sorted by offset within the file, we
- should be able to skip here instead of seeking. Skipping is
- typically faster (easier for I/O layer to optimize). */
- __archive_read_seek(a, zip->entry->local_header_offset, SEEK_SET);
+ if (zip->offset != zip->entry->local_header_offset) {
+ __archive_read_seek(a, zip->entry->local_header_offset,
+ SEEK_SET);
+ zip->offset = zip->entry->local_header_offset;
+ }
zip->unconsumed = 0;
r = zip_read_local_file_header(a, entry, zip);
if (r != ARCHIVE_OK)
memset(zip->entry, 0, sizeof(struct zip_entry));
/* Search ahead for the next local file header. */
- __archive_read_consume(a, zip->unconsumed);
+ zip_read_consume(a, zip->unconsumed);
zip->unconsumed = 0;
for (;;) {
int64_t skipped = 0;
if (p[2] == '\003' && p[3] == '\004') {
/* Regular file entry. */
- __archive_read_consume(a, skipped);
+ zip_read_consume(a, skipped);
return zip_read_local_file_header(a, entry, zip);
}
++p;
++skipped;
}
- __archive_read_consume(a, skipped);
+ zip_read_consume(a, skipped);
}
}
filename_length = archive_le16dec(p + 26);
extra_length = archive_le16dec(p + 28);
- __archive_read_consume(a, 30);
+ zip_read_consume(a, 30);
if (zip->have_central_directory) {
/* If we read the central dir entry, we must have size information
archive_string_conversion_charset_name(sconv));
ret = ARCHIVE_WARN;
}
- __archive_read_consume(a, filename_length);
+ zip_read_consume(a, filename_length);
if (zip_entry->mode == 0) {
/* Especially in streaming mode, we can end up
return (ARCHIVE_FATAL);
}
process_extra(h, extra_length, zip_entry);
- __archive_read_consume(a, extra_length);
+ zip_read_consume(a, extra_length);
/* Populate some additional entry fields: */
archive_entry_set_mode(entry, zip_entry->mode);
return (ARCHIVE_FAILED);
}
- __archive_read_consume(a, zip->unconsumed);
+ zip_read_consume(a, zip->unconsumed);
zip->unconsumed = 0;
switch(zip->entry->compression) {
/* Consume as much as the compressor actually used. */
bytes_avail = zip->stream.total_in;
- __archive_read_consume(a, bytes_avail);
+ zip_read_consume(a, bytes_avail);
zip->entry_bytes_remaining -= bytes_avail;
zip->entry_compressed_bytes_read += bytes_avail;
/* If we've already read to end of data, we're done. */
if (zip->end_of_entry)
return (ARCHIVE_OK);
- /* If we're seeking, we're done. */
- if (zip->have_central_directory)
- return (ARCHIVE_OK);
/* So we know we're streaming... */
if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)) {
/* We know the compressed length, so we can just skip. */
- int64_t bytes_skipped = __archive_read_consume(a,
+ int64_t bytes_skipped = zip_read_consume(a,
zip->entry_bytes_remaining + zip->unconsumed);
if (bytes_skipped < 0)
return (ARCHIVE_FATAL);
#endif
default: /* Uncompressed or unknown. */
/* Scan for a PK\007\010 signature. */
- __archive_read_consume(a, zip->unconsumed);
+ zip_read_consume(a, zip->unconsumed);
zip->unconsumed = 0;
for (;;) {
const char *p, *buff;
else if (p[3] == '\007') { p += 1; }
else if (p[3] == '\010' && p[2] == '\007'
&& p[1] == 'K' && p[0] == 'P') {
- __archive_read_consume(a, p - buff + 16);
+ zip_read_consume(a, p - buff + 16);
return ARCHIVE_OK;
} else { p += 4; }
}
- __archive_read_consume(a, p - buff);
+ zip_read_consume(a, p - buff);
}
}
return ARCHIVE_OK;