]> git.ipfire.org Git - thirdparty/libarchive.git/commitdiff
support reading metadata from compressed files 958/head
authorMike Frysinger <vapier@gentoo.org>
Mon, 27 Mar 2017 00:29:34 +0000 (20:29 -0400)
committerMike Frysinger <vapier@gentoo.org>
Sun, 21 Apr 2019 13:04:16 +0000 (23:04 +1000)
The raw format provides very little metadata.  Allow filters to pass
back state that it knows about.  With gzip, we know the original file
name, mtime, and file size.  For now, we only pull out the first two
as those are available in the file header.  The latter is in the file
trailer, so we'll have to add support for that later (if we can seek
the input).

libarchive/archive_read.c
libarchive/archive_read_private.h
libarchive/archive_read_support_filter_gzip.c
libarchive/archive_read_support_format_raw.c
libarchive/test/test_read_format_raw.c
libarchive/test/test_read_format_raw.data.gz.uu [new file with mode: 0644]

index 0e56e76e73159b515481cfcacc4fa709e2631824..de964f2532843340f4cf25cffa212a3780bc852e 100644 (file)
@@ -611,6 +611,15 @@ choose_filters(struct archive_read *a)
        return (ARCHIVE_FATAL);
 }
 
+int
+__archive_read_header(struct archive_read *a, struct archive_entry *entry)
+{
+       if (a->filter->read_header)
+               return a->filter->read_header(a->filter, entry);
+       else
+               return (ARCHIVE_OK);
+}
+
 /*
  * Read header of next entry.
  */
index 78546dca34aac1dbb2dd02bc32510ab7cb04eb7b..bf04f6410438499cc094420adeec2b529685328b 100644 (file)
@@ -98,6 +98,8 @@ struct archive_read_filter {
        int (*close)(struct archive_read_filter *self);
        /* Function that handles switching from reading one block to the next/prev */
        int (*sswitch)(struct archive_read_filter *self, unsigned int iindex);
+       /* Read any header metadata if available. */
+       int (*read_header)(struct archive_read_filter *self, struct archive_entry *entry);
        /* My private data. */
        void *data;
 
@@ -250,6 +252,7 @@ int64_t     __archive_read_seek(struct archive_read*, int64_t, int);
 int64_t        __archive_read_filter_seek(struct archive_read_filter *, int64_t, int);
 int64_t        __archive_read_consume(struct archive_read *, int64_t);
 int64_t        __archive_read_filter_consume(struct archive_read_filter *, int64_t);
+int __archive_read_header(struct archive_read *, struct archive_entry *);
 int __archive_read_program(struct archive_read_filter *, const char *);
 void __archive_read_free_filters(struct archive_read *);
 struct archive_read_extract *__archive_read_get_extract(struct archive_read *);
index fa8c675de12453419a414b3a25573556a9d6d144..0b306df0b08352f2e351bff8993561ffc2d6f868 100644 (file)
@@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$");
 #endif
 
 #include "archive.h"
+#include "archive_entry.h"
+#include "archive_endian.h"
 #include "archive_private.h"
 #include "archive_read_private.h"
 
@@ -56,6 +58,8 @@ struct private_data {
        size_t           out_block_size;
        int64_t          total_out;
        unsigned long    crc;
+       uint32_t         mtime;
+       char            *name;
        char             eof; /* True = found end of compressed data. */
 };
 
@@ -123,7 +127,8 @@ archive_read_support_filter_gzip(struct archive *_a)
  * count of bits verified, suitable for use by bidder.
  */
 static ssize_t
-peek_at_header(struct archive_read_filter *filter, int *pbits)
+peek_at_header(struct archive_read_filter *filter, int *pbits,
+              struct private_data *state)
 {
        const unsigned char *p;
        ssize_t avail, len;
@@ -144,7 +149,9 @@ peek_at_header(struct archive_read_filter *filter, int *pbits)
                return (0);
        bits += 3;
        header_flags = p[3];
-       /* Bytes 4-7 are mod time. */
+       /* Bytes 4-7 are mod time in little endian. */
+       if (state)
+               state->mtime = archive_le32dec(p + 4);
        /* Byte 8 is deflate flags. */
        /* XXXX TODO: return deflate flags back to consume_header for use
           in initializing the decompressor. */
@@ -161,6 +168,7 @@ peek_at_header(struct archive_read_filter *filter, int *pbits)
 
        /* Null-terminated optional filename. */
        if (header_flags & 8) {
+               ssize_t file_start = len;
                do {
                        ++len;
                        if (avail < len)
@@ -169,6 +177,9 @@ peek_at_header(struct archive_read_filter *filter, int *pbits)
                        if (p == NULL)
                                return (0);
                } while (p[len - 1] != 0);
+
+               if (state)
+                       state->name = strdup((const char *)&p[file_start]);
        }
 
        /* Null-terminated optional comment. */
@@ -214,11 +225,28 @@ gzip_bidder_bid(struct archive_read_filter_bidder *self,
 
        (void)self; /* UNUSED */
 
-       if (peek_at_header(filter, &bits_checked))
+       if (peek_at_header(filter, &bits_checked, NULL))
                return (bits_checked);
        return (0);
 }
 
+static int
+gzip_read_header(struct archive_read_filter *self, struct archive_entry *entry)
+{
+       struct private_data *state;
+
+       state = (struct private_data *)self->data;
+
+       /* A mtime of 0 is considered invalid/missing. */
+       if (state->mtime != 0)
+               archive_entry_set_mtime(entry, state->mtime, 0);
+
+       /* If the name is available, extract it. */
+       if (state->name)
+               archive_entry_set_pathname(entry, state->name);
+
+       return (ARCHIVE_OK);
+}
 
 #ifndef HAVE_ZLIB_H
 
@@ -272,6 +300,7 @@ gzip_bidder_init(struct archive_read_filter *self)
        self->read = gzip_filter_read;
        self->skip = NULL; /* not supported */
        self->close = gzip_filter_close;
+       self->read_header = gzip_read_header;
 
        state->in_stream = 0; /* We're not actually within a stream yet. */
 
@@ -289,7 +318,7 @@ consume_header(struct archive_read_filter *self)
        state = (struct private_data *)self->data;
 
        /* If this is a real header, consume it. */
-       len = peek_at_header(self->upstream, NULL);
+       len = peek_at_header(self->upstream, NULL, state);
        if (len == 0)
                return (ARCHIVE_EOF);
        __archive_read_filter_consume(self->upstream, len);
@@ -469,6 +498,7 @@ gzip_filter_close(struct archive_read_filter *self)
                }
        }
 
+       free(state->name);
        free(state->out_block);
        free(state);
        return (ret);
index efa2c6a33c7e8b2dd23fb4b6e38138c17df3bf67..ec0520b60a6cc8bc2159df7003329aa805f86508 100644 (file)
@@ -120,7 +120,9 @@ archive_read_format_raw_read_header(struct archive_read *a,
        archive_entry_set_filetype(entry, AE_IFREG);
        archive_entry_set_perm(entry, 0644);
        /* I'm deliberately leaving most fields unset here. */
-       return (ARCHIVE_OK);
+
+       /* Let the filter fill out any fields it might have. */
+       return __archive_read_header(a, entry);
 }
 
 static int
index 831bcec11de34b76580b321911e4899b6a0c9713..ccd9d0acb111556f5692734ba8bf7f0a6f0a37e8 100644 (file)
@@ -36,6 +36,7 @@ DEFINE_TEST(test_read_format_raw)
        const char *reffile1 = "test_read_format_raw.data";
        const char *reffile2 = "test_read_format_raw.data.Z";
        const char *reffile3 = "test_read_format_raw.bufr";
+       const char *reffile4 = "test_read_format_raw.data.gz";
 
        /* First, try pulling data out of an uninterpretable file. */
        extract_reference_file(reffile1);
@@ -113,6 +114,27 @@ DEFINE_TEST(test_read_format_raw)
        assert(!archive_entry_ctime_is_set(ae));
        assert(!archive_entry_mtime_is_set(ae));
 
+       /* Fourth, try with gzip which has metadata. */
+       extract_reference_file(reffile4);
+       assert((a = archive_read_new()) != NULL);
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_raw(a));
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+       assertEqualIntA(a, ARCHIVE_OK,
+           archive_read_open_filename(a, reffile4, 1));
+
+       /* First (and only!) Entry */
+       assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
+       assertEqualString("test-file-name.data", archive_entry_pathname(ae));
+       assertEqualInt(archive_entry_is_encrypted(ae), 0);
+       assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
+       assert(archive_entry_mtime_is_set(ae));
+       assertEqualIntA(a, archive_entry_mtime(ae), 0x5cbafd25);
+       /* Most fields should be unset (unknown) */
+       assert(!archive_entry_size_is_set(ae));
+       assert(!archive_entry_atime_is_set(ae));
+       assert(!archive_entry_ctime_is_set(ae));
+
        /* Test EOF */
        assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
        assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
diff --git a/libarchive/test/test_read_format_raw.data.gz.uu b/libarchive/test/test_read_format_raw.data.gz.uu
new file mode 100644 (file)
index 0000000..cf1f7b3
--- /dev/null
@@ -0,0 +1,4 @@
+begin 644 test_read_format_raw.data.gz
+L'XL(""7]NEP``W1E<W0M9FEL92UN86UE+F1A=&$`2\O/YP(`J&4R?@0`````
+`
+end