From 0debf359fc2d4796aedab04d96c6e2046551b56e Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Sun, 26 Mar 2017 20:29:34 -0400 Subject: [PATCH] support reading metadata from compressed files The raw format provides very little metadata. Allow filters to pass back state that it knows about. With gzip, we know the original file name, mtime, and file size. For now, we only pull out the first two as those are available in the file header. The latter is in the file trailer, so we'll have to add support for that later (if we can seek the input). --- libarchive/archive_read.c | 9 +++++ libarchive/archive_read_private.h | 3 ++ libarchive/archive_read_support_filter_gzip.c | 38 +++++++++++++++++-- libarchive/archive_read_support_format_raw.c | 4 +- libarchive/test/test_read_format_raw.c | 22 +++++++++++ .../test/test_read_format_raw.data.gz.uu | 4 ++ 6 files changed, 75 insertions(+), 5 deletions(-) create mode 100644 libarchive/test/test_read_format_raw.data.gz.uu diff --git a/libarchive/archive_read.c b/libarchive/archive_read.c index 0e56e76e7..de964f253 100644 --- a/libarchive/archive_read.c +++ b/libarchive/archive_read.c @@ -611,6 +611,15 @@ choose_filters(struct archive_read *a) return (ARCHIVE_FATAL); } +int +__archive_read_header(struct archive_read *a, struct archive_entry *entry) +{ + if (a->filter->read_header) + return a->filter->read_header(a->filter, entry); + else + return (ARCHIVE_OK); +} + /* * Read header of next entry. */ diff --git a/libarchive/archive_read_private.h b/libarchive/archive_read_private.h index 78546dca3..bf04f6410 100644 --- a/libarchive/archive_read_private.h +++ b/libarchive/archive_read_private.h @@ -98,6 +98,8 @@ struct archive_read_filter { int (*close)(struct archive_read_filter *self); /* Function that handles switching from reading one block to the next/prev */ int (*sswitch)(struct archive_read_filter *self, unsigned int iindex); + /* Read any header metadata if available. */ + int (*read_header)(struct archive_read_filter *self, struct archive_entry *entry); /* My private data. */ void *data; @@ -250,6 +252,7 @@ int64_t __archive_read_seek(struct archive_read*, int64_t, int); int64_t __archive_read_filter_seek(struct archive_read_filter *, int64_t, int); int64_t __archive_read_consume(struct archive_read *, int64_t); int64_t __archive_read_filter_consume(struct archive_read_filter *, int64_t); +int __archive_read_header(struct archive_read *, struct archive_entry *); int __archive_read_program(struct archive_read_filter *, const char *); void __archive_read_free_filters(struct archive_read *); struct archive_read_extract *__archive_read_get_extract(struct archive_read *); diff --git a/libarchive/archive_read_support_filter_gzip.c b/libarchive/archive_read_support_filter_gzip.c index fa8c675de..0b306df0b 100644 --- a/libarchive/archive_read_support_filter_gzip.c +++ b/libarchive/archive_read_support_filter_gzip.c @@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$"); #endif #include "archive.h" +#include "archive_entry.h" +#include "archive_endian.h" #include "archive_private.h" #include "archive_read_private.h" @@ -56,6 +58,8 @@ struct private_data { size_t out_block_size; int64_t total_out; unsigned long crc; + uint32_t mtime; + char *name; char eof; /* True = found end of compressed data. */ }; @@ -123,7 +127,8 @@ archive_read_support_filter_gzip(struct archive *_a) * count of bits verified, suitable for use by bidder. */ static ssize_t -peek_at_header(struct archive_read_filter *filter, int *pbits) +peek_at_header(struct archive_read_filter *filter, int *pbits, + struct private_data *state) { const unsigned char *p; ssize_t avail, len; @@ -144,7 +149,9 @@ peek_at_header(struct archive_read_filter *filter, int *pbits) return (0); bits += 3; header_flags = p[3]; - /* Bytes 4-7 are mod time. */ + /* Bytes 4-7 are mod time in little endian. */ + if (state) + state->mtime = archive_le32dec(p + 4); /* Byte 8 is deflate flags. */ /* XXXX TODO: return deflate flags back to consume_header for use in initializing the decompressor. */ @@ -161,6 +168,7 @@ peek_at_header(struct archive_read_filter *filter, int *pbits) /* Null-terminated optional filename. */ if (header_flags & 8) { + ssize_t file_start = len; do { ++len; if (avail < len) @@ -169,6 +177,9 @@ peek_at_header(struct archive_read_filter *filter, int *pbits) if (p == NULL) return (0); } while (p[len - 1] != 0); + + if (state) + state->name = strdup((const char *)&p[file_start]); } /* Null-terminated optional comment. */ @@ -214,11 +225,28 @@ gzip_bidder_bid(struct archive_read_filter_bidder *self, (void)self; /* UNUSED */ - if (peek_at_header(filter, &bits_checked)) + if (peek_at_header(filter, &bits_checked, NULL)) return (bits_checked); return (0); } +static int +gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry) +{ + struct private_data *state; + + state = (struct private_data *)self->data; + + /* A mtime of 0 is considered invalid/missing. */ + if (state->mtime != 0) + archive_entry_set_mtime(entry, state->mtime, 0); + + /* If the name is available, extract it. */ + if (state->name) + archive_entry_set_pathname(entry, state->name); + + return (ARCHIVE_OK); +} #ifndef HAVE_ZLIB_H @@ -272,6 +300,7 @@ gzip_bidder_init(struct archive_read_filter *self) self->read = gzip_filter_read; self->skip = NULL; /* not supported */ self->close = gzip_filter_close; + self->read_header = gzip_read_header; state->in_stream = 0; /* We're not actually within a stream yet. */ @@ -289,7 +318,7 @@ consume_header(struct archive_read_filter *self) state = (struct private_data *)self->data; /* If this is a real header, consume it. */ - len = peek_at_header(self->upstream, NULL); + len = peek_at_header(self->upstream, NULL, state); if (len == 0) return (ARCHIVE_EOF); __archive_read_filter_consume(self->upstream, len); @@ -469,6 +498,7 @@ gzip_filter_close(struct archive_read_filter *self) } } + free(state->name); free(state->out_block); free(state); return (ret); diff --git a/libarchive/archive_read_support_format_raw.c b/libarchive/archive_read_support_format_raw.c index efa2c6a33..ec0520b60 100644 --- a/libarchive/archive_read_support_format_raw.c +++ b/libarchive/archive_read_support_format_raw.c @@ -120,7 +120,9 @@ archive_read_format_raw_read_header(struct archive_read *a, archive_entry_set_filetype(entry, AE_IFREG); archive_entry_set_perm(entry, 0644); /* I'm deliberately leaving most fields unset here. */ - return (ARCHIVE_OK); + + /* Let the filter fill out any fields it might have. */ + return __archive_read_header(a, entry); } static int diff --git a/libarchive/test/test_read_format_raw.c b/libarchive/test/test_read_format_raw.c index 831bcec11..ccd9d0acb 100644 --- a/libarchive/test/test_read_format_raw.c +++ b/libarchive/test/test_read_format_raw.c @@ -36,6 +36,7 @@ DEFINE_TEST(test_read_format_raw) const char *reffile1 = "test_read_format_raw.data"; const char *reffile2 = "test_read_format_raw.data.Z"; const char *reffile3 = "test_read_format_raw.bufr"; + const char *reffile4 = "test_read_format_raw.data.gz"; /* First, try pulling data out of an uninterpretable file. */ extract_reference_file(reffile1); @@ -113,6 +114,27 @@ DEFINE_TEST(test_read_format_raw) assert(!archive_entry_ctime_is_set(ae)); assert(!archive_entry_mtime_is_set(ae)); + /* Fourth, try with gzip which has metadata. */ + extract_reference_file(reffile4); + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_open_filename(a, reffile4, 1)); + + /* First (and only!) Entry */ + assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); + assertEqualString("test-file-name.data", archive_entry_pathname(ae)); + assertEqualInt(archive_entry_is_encrypted(ae), 0); + assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); + assert(archive_entry_mtime_is_set(ae)); + assertEqualIntA(a, archive_entry_mtime(ae), 0x5cbafd25); + /* Most fields should be unset (unknown) */ + assert(!archive_entry_size_is_set(ae)); + assert(!archive_entry_atime_is_set(ae)); + assert(!archive_entry_ctime_is_set(ae)); + /* Test EOF */ assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a)); diff --git a/libarchive/test/test_read_format_raw.data.gz.uu b/libarchive/test/test_read_format_raw.data.gz.uu new file mode 100644 index 000000000..cf1f7b307 --- /dev/null +++ b/libarchive/test/test_read_format_raw.data.gz.uu @@ -0,0 +1,4 @@ +begin 644 test_read_format_raw.data.gz +L'XL(""7]NEP``W1E