From: Tim Kientzle Date: Wed, 5 Nov 2008 22:18:36 +0000 (-0500) Subject: Checkpoint the read filter rearchitecture. X-Git-Tag: v2.6.0~57 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=84bb320b8d9880ff14a4b434cb18ffcb704da0cb;p=thirdparty%2Flibarchive.git Checkpoint the read filter rearchitecture. The read filters now consume blocks from their upstream providers and provide blocks to their downstream consumers. All blocks are arbitrarily-sized; the reblocking code that used to be in "compression_none" has been moved into the read core and handles the output from the read filters. The big goal here is to provide support for multiple stacked read filters. While this is of little interest for decompression (people rarely stack multiple compressors), it does lay the groundwork for encryption, uudecode, and other filters that are used in combination with each other and with compression. This also simplifies the internal API a little (although the init() method signature is pretty hairy and going to get worse before I'm done) and has saved a few dozen lines of code here and there. This certainly isn't finished: I still have to convert the new LZMA decompressor, clean up some of the new code, and find better terminology. In particular "reader" and "source" are really awful names. I'll figure out something better soon; I promise. But this does pass all of the tests again (which probably means I need more tests!) so it seems a good point to check in what I have. Hopefully, over the next couple of days, I'll work out better terminology and give all the new code here a good scrubbing. SVN-Revision: 236 --- diff --git a/libarchive/Makefile b/libarchive/Makefile index 8e6e340fb..c415ae93b 100644 --- a/libarchive/Makefile +++ b/libarchive/Makefile @@ -16,6 +16,9 @@ WARNS?= 6 # Headers to be installed in /usr/include INCS= archive.h archive_entry.h +# TODO: LZMA reader needs to be converted to the new filter architecture... +# archive_read_support_compression_lzma.c \ + # Sources to be compiled. SRCS= archive_check_magic.c \ archive_entry.c \ @@ -34,7 +37,6 @@ SRCS= archive_check_magic.c \ archive_read_support_compression_bzip2.c \ archive_read_support_compression_compress.c \ archive_read_support_compression_gzip.c \ - archive_read_support_compression_lzma.c \ archive_read_support_compression_none.c \ archive_read_support_compression_program.c \ archive_read_support_format_all.c \ diff --git a/libarchive/archive_read.c b/libarchive/archive_read.c index a9c4827b5..f0ed147c1 100644 --- a/libarchive/archive_read.c +++ b/libarchive/archive_read.c @@ -53,9 +53,10 @@ __FBSDID("$FreeBSD: src/lib/libarchive/archive_read.c,v 1.38 2008/03/12 04:58:32 #include "archive_private.h" #include "archive_read_private.h" -static void choose_decompressor(struct archive_read *, const void*, size_t); +#define minimum(a, b) (a < b ? a : b) + +static int build_stream(struct archive_read *); static int choose_format(struct archive_read *); -static off_t dummy_skip(struct archive_read *, off_t); /* * Allocate, initialize and return a struct archive object. @@ -74,8 +75,15 @@ archive_read_new(void) a->archive.state = ARCHIVE_STATE_NEW; a->entry = archive_entry_new(); - /* We always support uncompressed archives. */ - archive_read_support_compression_none(&a->archive); + /* Initialize reblocking logic. */ + a->buffer_size = 64 * 1024; /* 64k */ + a->buffer = (char *)malloc(a->buffer_size); + a->next = a->buffer; + if (a->buffer == NULL) { + archive_entry_free(a->entry); + free(a); + return (NULL); + } return (&a->archive); } @@ -122,13 +130,14 @@ client_skip_proxy(struct archive_read_source *self, int64_t request) self->data, request); } -static ssize_t +static int client_close_proxy(struct archive_read_source *self) { - int r; + int r = ARCHIVE_OK; - r = (self->archive->client.closer)((struct archive *)self->archive, - self->data); + if (self->archive->client.closer != NULL) + r = (self->archive->client.closer)((struct archive *)self->archive, + self->data); free(self); return (r); } @@ -142,8 +151,6 @@ archive_read_open2(struct archive *_a, void *client_data, archive_close_callback *client_closer) { struct archive_read *a = (struct archive_read *)_a; - const void *buffer; - ssize_t bytes_read; int e; __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, @@ -153,18 +160,6 @@ archive_read_open2(struct archive *_a, void *client_data, __archive_errx(1, "No reader function provided to archive_read_open"); - /* - * Set these NULL initially. If the open or initial read fails, - * we'll leave them NULL to indicate that the file is invalid. - * (In particular, this helps ensure that the closer doesn't - * get called more than once.) - */ - a->client.opener = NULL; - a->client.reader = NULL; - a->client.skipper = NULL; - a->client.closer = NULL; - a->client.data = NULL; - /* Open data source. */ if (client_opener != NULL) { e =(client_opener)(&a->archive, client_data); @@ -176,18 +171,7 @@ archive_read_open2(struct archive *_a, void *client_data, } } - /* Read first block now for compress format detection. */ - bytes_read = (client_reader)(&a->archive, client_data, &buffer); - - if (bytes_read < 0) { - /* If the first read fails, close before returning error. */ - if (client_closer) - (client_closer)(&a->archive, client_data); - /* client_reader should have already set error information. */ - return (ARCHIVE_FATAL); - } - - /* Now that the client callbacks have worked, remember them. */ + /* Save the client functions and mock up the initial source. */ a->client.opener = client_opener; /* Do we need to remember this? */ a->client.reader = client_reader; a->client.skipper = client_skipper; @@ -201,7 +185,7 @@ archive_read_open2(struct archive *_a, void *client_data, if (source == NULL) return (ARCHIVE_FATAL); source->reader = NULL; - source->source = NULL; + source->upstream = NULL; source->archive = a; source->data = client_data; source->read = client_read_proxy; @@ -210,111 +194,80 @@ archive_read_open2(struct archive *_a, void *client_data, a->source = source; } - /* Select a decompression routine. */ - choose_decompressor(a, buffer, (size_t)bytes_read); - if (a->decompressor == NULL) - return (ARCHIVE_FATAL); - - /* Initialize decompression routine with the first block of data. */ - e = (a->decompressor->init)(a, buffer, (size_t)bytes_read); + /* In case there's no filter. */ + a->archive.compression_code = ARCHIVE_COMPRESSION_NONE; + a->archive.compression_name = "none"; + /* Build out the input pipeline. */ + e = build_stream(a); if (e == ARCHIVE_OK) a->archive.state = ARCHIVE_STATE_HEADER; - /* - * If the decompressor didn't register a skip function, provide a - * dummy compression-layer skip function. - */ - if (a->decompressor->skip2 == NULL) - a->decompressor->skip2 = dummy_skip; - return (e); } /* - * Allow each registered decompression routine to bid on whether it - * wants to handle this stream. Return index of winning bidder. + * Allow each registered stream transform to bid on whether + * it wants to handle this stream. Repeat until we've finished + * building the pipeline. */ -static void -choose_decompressor(struct archive_read *a, - const void *buffer, size_t bytes_read) +static int +build_stream(struct archive_read *a) { - int decompression_slots, i, bid, best_bid; - struct decompressor_t *decompressor, *best_decompressor; + int number_readers, i, bid, best_bid; + struct archive_reader *reader, *best_reader; + struct archive_read_source *source; + const void *block; + ssize_t bytes_read; + + /* Read first block now for compress format detection. */ + bytes_read = (a->source->read)(a->source, &block); + if (bytes_read < 0) { + /* If the first read fails, close before returning error. */ + if (a->source->close != NULL) { + (a->source->close)(a->source); + a->source = NULL; + } + /* source->read should have already set error information. */ + return (ARCHIVE_FATAL); + } - decompression_slots = sizeof(a->decompressors) / - sizeof(a->decompressors[0]); + number_readers = sizeof(a->readers) / sizeof(a->readers[0]); best_bid = 0; - a->decompressor = NULL; - best_decompressor = NULL; - - decompressor = a->decompressors; - for (i = 0; i < decompression_slots; i++) { - if (decompressor->bid) { - bid = (decompressor->bid)(buffer, bytes_read); - if (bid > best_bid || best_decompressor == NULL) { + best_reader = NULL; + + reader = a->readers; + for (i = 0, reader = a->readers; i < number_readers; i++, reader++) { + if (reader->bid != NULL) { + bid = (reader->bid)(reader, block, bytes_read); + if (bid > best_bid) { best_bid = bid; - best_decompressor = decompressor; + best_reader = reader; } } - decompressor ++; } /* - * There were no bidders; this is a serious programmer error - * and demands a quick and definitive abort. - */ - if (best_decompressor == NULL) - __archive_errx(1, "No decompressors were registered; you " - "must call at least one " - "archive_read_support_compression_XXX function in order " - "to successfully read an archive."); - - /* - * There were bidders, but no non-zero bids; this means we can't - * support this stream. + * If we have a winner, it becomes the next stage in the pipeline. */ - if (best_bid < 1) { - archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, - "Unrecognized archive format"); - return; - } - - /* Record the best decompressor for this stream. */ - a->decompressor = best_decompressor; -} - -/* - * Dummy skip function, for use if the compression layer doesn't provide - * one: This code just reads data and discards it. - */ -static off_t -dummy_skip(struct archive_read * a, off_t request) -{ - const void * dummy_buffer; - ssize_t bytes_read; - off_t bytes_skipped; - - for (bytes_skipped = 0; request > 0;) { - bytes_read = (a->decompressor->read_ahead2)(a, &dummy_buffer, 1); - if (bytes_read < 0) - return (bytes_read); - if (bytes_read == 0) { - /* Premature EOF. */ - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Truncated input file (need to skip %jd bytes)", - (intmax_t)request); + if (best_reader != NULL) { + source = (best_reader->init)(a, best_reader, a->source, + block, bytes_read); + if (source == NULL) return (ARCHIVE_FATAL); - } - if (bytes_read > request) - bytes_read = (ssize_t)request; - (a->decompressor->consume2)(a, (size_t)bytes_read); - request -= bytes_read; - bytes_skipped += bytes_read; + /* Record the best decompressor for this stream. */ + a->source = source; + /* Recurse to get next pipeline stage. */ + return (build_stream(a)); } - return (bytes_skipped); + /* Save first block of data. */ + a->client_buff = block; + a->client_total = bytes_read; + a->client_next = a->client_buff; + a->client_avail = a->client_total; + return (ARCHIVE_OK); } /* @@ -641,23 +594,24 @@ archive_read_close(struct archive *_a) /* TODO: Clean up the formatters. */ - /* Clean up the decompressors. */ - n = sizeof(a->decompressors)/sizeof(a->decompressors[0]); + /* Clean up the stream pipeline. */ + if (a->source != NULL) { + r1 = (a->source->close)(a->source); + if (r1 < r) + r = r1; + a->source = NULL; + } + + /* Release the reader objects. */ + n = sizeof(a->readers)/sizeof(a->readers[0]); for (i = 0; i < n; i++) { - if (a->decompressors[i].finish != NULL) { - r1 = (a->decompressors[i].finish)(a); + if (a->readers[i].free != NULL) { + r1 = (a->readers[i].free)(&a->readers[i]); if (r1 < r) r = r1; } } - /* Close the client stream. */ - if (a->client.closer != NULL) { - r1 = ((a->client.closer)(&a->archive, a->client.data)); - if (r1 < r) - r = r1; - } - return (r); } @@ -694,6 +648,7 @@ archive_read_finish(struct archive *_a) if (a->entry) archive_entry_free(a->entry); a->archive.magic = 0; + free(a->buffer); free(a); #if ARCHIVE_API_VERSION > 1 return (r); @@ -743,27 +698,20 @@ __archive_read_register_format(struct archive_read *a, * Used internally by decompression routines to register their bid and * initialization functions. */ -struct decompressor_t * -__archive_read_register_compression(struct archive_read *a, - int (*bid)(const void *, size_t), - int (*init)(struct archive_read *, const void *, size_t)) +struct archive_reader * +__archive_read_get_reader(struct archive_read *a) { int i, number_slots; __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW, - "__archive_read_register_compression"); + "__archive_read_get_reader"); - number_slots = sizeof(a->decompressors) / sizeof(a->decompressors[0]); + number_slots = sizeof(a->readers) / sizeof(a->readers[0]); for (i = 0; i < number_slots; i++) { - if (a->decompressors[i].bid == bid) - return (a->decompressors + i); - if (a->decompressors[i].bid == NULL) { - a->decompressors[i].bid = bid; - a->decompressors[i].init = init; - return (a->decompressors + i); - } + if (a->readers[i].bid == NULL) + return (a->readers + i); } __archive_errx(1, "Not enough slots for compression registration"); @@ -827,20 +775,168 @@ __archive_read_register_compression(struct archive_read *a, * Important: This does NOT move the file pointer. See * __archive_read_consume() below. */ + +/* + * This is tricky. We need to provide our clients with pointers to + * contiguous blocks of memory but we want to avoid copying whenever + * possible. + * + * Mostly, this code returns pointers directly into the block of data + * provided by the client_read routine. It can do this unless the + * request would split across blocks. In that case, we have to copy + * into an internal buffer to combine reads. + */ const void * -__archive_read_ahead(struct archive_read *a, size_t len, ssize_t *avail) +__archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail) { - ssize_t av; - const void *h; - - av = (a->decompressor->read_ahead2)(a, &h, len); - /* Return # bytes avail (also error code) regardless. */ - if (avail != NULL) - *avail = av; - /* If it was a short read, return NULL. */ - if (av < (ssize_t)len) + ssize_t bytes_read; + + if (a->fatal) { + *avail = ARCHIVE_FATAL; return (NULL); - return (h); + } + + /* + * Keep pulling more data until we can satisfy the request. + */ + for (;;) { + + /* + * If we can satisfy from the copy buffer, we're done. + */ + if (a->avail >= min) { + if (avail != NULL) + *avail = a->avail; + return (a->next); + } + + /* + * We can satisfy directly from client buffer if everything + * currently in the copy buffer is still in the client buffer. + */ + if (a->client_total >= a->client_avail + a->avail + && a->client_avail + a->avail >= min) { + /* "Roll back" to client buffer. */ + a->client_avail += a->avail; + a->client_next -= a->avail; + /* Copy buffer is now empty. */ + a->avail = 0; + a->next = a->buffer; + /* Return data from client buffer. */ + if (avail != NULL) + *avail = a->client_avail; + return (a->client_next); + } + + /* Move data forward in copy buffer if necessary. */ + if (a->next > a->buffer && + a->next + min > a->buffer + a->buffer_size) { + if (a->avail > 0) + memmove(a->buffer, a->next, a->avail); + a->next = a->buffer; + } + + /* If we've used up the client data, get more. */ + if (a->client_avail <= 0) { + if (a->end_of_file) { + if (avail != NULL) + *avail = 0; + return (NULL); + } + bytes_read = (a->source->read)(a->source, + &a->client_buff); + if (bytes_read < 0) { /* Read error. */ + a->client_total = a->client_avail = 0; + a->client_next = a->client_buff = NULL; + a->fatal = 1; + if (avail != NULL) + *avail = ARCHIVE_FATAL; + return (NULL); + } + if (bytes_read == 0) { /* Premature end-of-file. */ + a->client_total = a->client_avail = 0; + a->client_next = a->client_buff = NULL; + a->end_of_file = 1; + /* Return whatever we do have. */ + if (avail != NULL) + *avail = a->avail; + return (a->next); + /* TODO: I want to change this to + * return(0) as an eof marker, but a little + * more work is needed first. */ + } + a->archive.raw_position += bytes_read; + a->client_total = bytes_read; + a->client_avail = a->client_total; + a->client_next = a->client_buff; + } + else + { + /* + * We can't satisfy the request from the copy + * buffer or the existing client data, so we + * need to copy more client data over to the + * copy buffer. + */ + + /* Ensure the buffer is big enough. */ + if (min > a->buffer_size) { + size_t s, t; + char *p; + + /* Double the buffer; watch for overflow. */ + s = t = a->buffer_size; + while (s < min) { + t *= 2; + if (t <= s) { /* Integer overflow! */ + archive_set_error(&a->archive, + ENOMEM, + "Unable to allocate copy buffer"); + a->fatal = 1; + if (avail != NULL) + *avail = ARCHIVE_FATAL; + return (NULL); + } + s = t; + } + /* Now s >= min, so allocate a new buffer. */ + p = (char *)malloc(s); + if (p == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Unable to allocate copy buffer"); + a->fatal = 1; + if (avail != NULL) + *avail = ARCHIVE_FATAL; + return (NULL); + } + /* Move data into newly-enlarged buffer. */ + if (a->avail > 0) + memmove(p, a->next, a->avail); + free(a->buffer); + a->next = a->buffer = p; + a->buffer_size = s; + } + + /* We can add client data to copy buffer. */ + /* First estimate: copy to fill rest of buffer. */ + size_t tocopy = (a->buffer + a->buffer_size) + - (a->next + a->avail); + /* Don't waste time buffering more than we need to. */ + if (tocopy + a->avail > min) + tocopy = min - a->avail; + /* Don't copy more than is available. */ + if (tocopy > a->client_avail) + tocopy = a->client_avail; + + memcpy(a->next + a->avail, a->client_next, + tocopy); + /* Remove this data from client buffer. */ + a->client_next += tocopy; + a->client_avail -= tocopy; + /* add it to copy buffer. */ + a->avail += tocopy; + } + } } /* @@ -849,10 +945,25 @@ __archive_read_ahead(struct archive_read *a, size_t len, ssize_t *avail) * ahead by more than the amount of data available according to * __archive_read_ahead(). */ +/* + * Mark the appropriate data as used. Note that the request here will + * often be much smaller than the size of the previous read_ahead + * request. + */ ssize_t -__archive_read_consume(struct archive_read *a, size_t s) +__archive_read_consume(struct archive_read *a, size_t request) { - return (a->decompressor->consume2)(a, s); + if (a->avail > 0) { + /* Read came from copy buffer. */ + a->next += request; + a->avail -= request; + } else { + /* Read came from client buffer. */ + a->client_next += request; + a->client_avail -= request; + } + a->archive.file_position += request; + return (request); } /* @@ -863,7 +974,75 @@ __archive_read_consume(struct archive_read *a, size_t s) * down closer to the data source. */ int64_t -__archive_read_skip(struct archive_read *a, uint64_t s) +__archive_read_skip(struct archive_read *a, int64_t request) { - return (a->decompressor->skip2)(a, s); + off_t bytes_skipped, total_bytes_skipped = 0; + size_t min; + + if (a->fatal) + return (-1); + /* + * If there is data in the buffers already, use that first. + */ + if (a->avail > 0) { + min = minimum(request, (off_t)a->avail); + bytes_skipped = __archive_read_consume(a, min); + request -= bytes_skipped; + total_bytes_skipped += bytes_skipped; + } + if (a->client_avail > 0) { + min = minimum(request, (off_t)a->client_avail); + bytes_skipped = __archive_read_consume(a, min); + request -= bytes_skipped; + total_bytes_skipped += bytes_skipped; + } + if (request == 0) + return (total_bytes_skipped); + /* + * If a client_skipper was provided, try that first. + */ +#if ARCHIVE_API_VERSION < 2 + if ((a->source->skip != NULL) && (request < SSIZE_MAX)) { +#else + if (a->source->skip != NULL) { +#endif + bytes_skipped = (a->source->skip)(a->source, request); + if (bytes_skipped < 0) { /* error */ + a->client_total = a->client_avail = 0; + a->client_next = a->client_buff = NULL; + a->fatal = 1; + return (bytes_skipped); + } + total_bytes_skipped += bytes_skipped; + a->archive.file_position += bytes_skipped; + request -= bytes_skipped; + a->client_next = a->client_buff; + a->archive.raw_position += bytes_skipped; + a->client_avail = a->client_total = 0; + } + /* + * Note that client_skipper will usually not satisfy the + * full request (due to low-level blocking concerns), + * so even if client_skipper is provided, we may still + * have to use ordinary reads to finish out the request. + */ + while (request > 0) { + const void* dummy_buffer; + ssize_t bytes_read; + dummy_buffer = __archive_read_ahead(a, 1, &bytes_read); + if (bytes_read < 0) + return (bytes_read); + if (bytes_read == 0) { + /* We hit EOF before we satisfied the skip request. */ + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Truncated input file (need to skip %jd bytes)", + (intmax_t)request); + return (ARCHIVE_FATAL); + } + min = (size_t)(minimum(bytes_read, request)); + bytes_read = __archive_read_consume(a, min); + total_bytes_skipped += bytes_read; + request -= bytes_read; + } + return (total_bytes_skipped); } diff --git a/libarchive/archive_read_private.h b/libarchive/archive_read_private.h index efa348242..6bd345884 100644 --- a/libarchive/archive_read_private.h +++ b/libarchive/archive_read_private.h @@ -54,13 +54,14 @@ struct archive_reader { /* Configuration data for the reader. */ void *data; /* Bidder is handed the initial block from its source. */ - int (*bid)(const void *buff, size_t); + int (*bid)(struct archive_reader *, const void *buff, size_t); /* Init() is given the archive, upstream source, and the initial * block above. It returns a populated source structure. */ struct archive_read_source *(*init)(struct archive_read *, - struct archive_read_source *source, const void *, size_t); + struct archive_reader *, struct archive_read_source *source, + const void *, size_t); /* Release the reader and any configuration data it allocated. */ - void (*free)(struct archive_reader *); + int (*free)(struct archive_reader *); }; /* @@ -72,7 +73,7 @@ struct archive_read_source { /* Essentially all sources will need these values, so * just declare them here. */ struct archive_reader *reader; /* Reader that I'm an instance of. */ - struct archive_read_source *source; /* Who I get blocks from. */ + struct archive_read_source *upstream; /* Who I get blocks from. */ struct archive_read *archive; /* associated archive. */ /* Return next block. */ ssize_t (*read)(struct archive_read_source *, const void **); @@ -130,36 +131,18 @@ struct archive_read { /* File offset of beginning of most recently-read header. */ off_t header_position; - /* - * Decompressors have a very specific lifecycle: - * public setup function initializes a slot in this table - * 'config' holds minimal configuration data - * bid() examines a block of data and returns a bid [1] - * init() is called for successful bidder - * 'data' is initialized by init() - * read() returns a pointer to the next block of data - * consume() indicates how much data is used - * skip() ignores bytes of data - * finish() cleans up and frees 'data' and 'config' - * - * [1] General guideline: bid the number of bits that you actually - * test, e.g., 16 if you test a 2-byte magic value. - */ - struct decompressor_t { - void *config; - void *data; - int (*bid)(const void *buff, size_t); - int (*init)(struct archive_read *, - const void *buff, size_t); - int (*finish)(struct archive_read *); - ssize_t (*read_ahead2)(struct archive_read *, - const void **, size_t); - ssize_t (*consume2)(struct archive_read *, size_t); - off_t (*skip2)(struct archive_read *, off_t); - } decompressors[5]; - - /* Pointer to current decompressor. */ - struct decompressor_t *decompressor; + + /* Used by reblocking logic. */ + char *buffer; + size_t buffer_size; + char *next; /* Current read location. */ + size_t avail; /* Bytes in my buffer. */ + const void *client_buff; /* Client buffer information. */ + size_t client_total; + const char *client_next; + size_t client_avail; + char end_of_file; + char fatal; /* * Format detection is mostly the same as compression @@ -194,14 +177,13 @@ int __archive_read_register_format(struct archive_read *a, int (*read_data_skip)(struct archive_read *), int (*cleanup)(struct archive_read *)); -struct decompressor_t - *__archive_read_register_compression(struct archive_read *a, - int (*bid)(const void *, size_t), - int (*init)(struct archive_read *, const void *, size_t)); +struct archive_reader + *__archive_read_get_reader(struct archive_read *a); + const void - *__archive_read_ahead(struct archive_read *, size_t, size_t *); + *__archive_read_ahead(struct archive_read *, size_t, ssize_t *); ssize_t __archive_read_consume(struct archive_read *, size_t); int64_t - __archive_read_skip(struct archive_read *, uint64_t); + __archive_read_skip(struct archive_read *, int64_t); #endif diff --git a/libarchive/archive_read_support_compression_all.c b/libarchive/archive_read_support_compression_all.c index bb548732e..54226cc7a 100644 --- a/libarchive/archive_read_support_compression_all.c +++ b/libarchive/archive_read_support_compression_all.c @@ -40,7 +40,7 @@ archive_read_support_compression_all(struct archive *a) archive_read_support_compression_gzip(a); #endif #if HAVE_LZMADEC_H - archive_read_support_compression_lzma(a); +// archive_read_support_compression_lzma(a); #endif return (ARCHIVE_OK); } diff --git a/libarchive/archive_read_support_compression_bzip2.c b/libarchive/archive_read_support_compression_bzip2.c index 279c3755a..e60ec68de 100644 --- a/libarchive/archive_read_support_compression_bzip2.c +++ b/libarchive/archive_read_support_compression_bzip2.c @@ -51,30 +51,49 @@ __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_bzip2.c, #if HAVE_BZLIB_H struct private_data { bz_stream stream; - char *uncompressed_buffer; - size_t uncompressed_buffer_size; - char *read_next; + char *out_block; + size_t out_block_size; int64_t total_out; char eof; /* True = found end of compressed data. */ }; -static int finish(struct archive_read *); -static ssize_t read_ahead(struct archive_read *, const void **, size_t); -static ssize_t read_consume(struct archive_read *, size_t); -static int drive_decompressor(struct archive_read *a, struct private_data *); +/* Bzip2 source */ +static ssize_t bzip2_source_read(struct archive_read_source *, const void **); +static int bzip2_source_close(struct archive_read_source *); #endif -/* These two functions are defined even if we lack the library. See below. */ -static int bid(const void *, size_t); -static int init(struct archive_read *, const void *, size_t); +/* + * Note that we can detect bzip2 archives even if we can't decompress + * them. (In fact, we like detecting them because we can give better + * error messages.) So the bid framework here gets compiled even + * if bzlib is unavailable. + */ +static int bzip2_reader_bid(struct archive_reader *, const void *, size_t); +static struct archive_read_source *bzip2_reader_init(struct archive_read *, + struct archive_reader *, struct archive_read_source *, + const void *, size_t); +static int bzip2_reader_free(struct archive_reader *); int archive_read_support_compression_bzip2(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; - if (__archive_read_register_compression(a, bid, init) != NULL) - return (ARCHIVE_OK); - return (ARCHIVE_FATAL); + struct archive_reader *reader = __archive_read_get_reader(a); + + if (reader == NULL) + return (ARCHIVE_FATAL); + + reader->data = NULL; + reader->bid = bzip2_reader_bid; + reader->init = bzip2_reader_init; + reader->free = bzip2_reader_free; + return (ARCHIVE_OK); +} + +static int +bzip2_reader_free(struct archive_reader *self){ + (void)self; /* UNUSED */ + return (ARCHIVE_OK); } /* @@ -85,11 +104,13 @@ archive_read_support_compression_bzip2(struct archive *_a) * from verifying as much as we would like. */ static int -bid(const void *buff, size_t len) +bzip2_reader_bid(struct archive_reader *self, const void *buff, size_t len) { const unsigned char *buffer; int bits_checked; + (void)self; /* UNUSED */ + if (len < 1) return (0); @@ -151,9 +172,12 @@ bid(const void *buff, size_t len) * and emit a useful message. */ static int -init(struct archive_read *a, const void *buff, size_t n) +bzip2_reader_init(struct archive_read *a, struct archive_reader *reader, + struct archive_read_source *upstream, const void *buff, size_t n) { (void)a; /* UNUSED */ + (void)reader; /* UNUSED */ + (void)upstream; /* UNUSED */ (void)buff; /* UNUSED */ (void)n; /* UNUSED */ @@ -168,37 +192,43 @@ init(struct archive_read *a, const void *buff, size_t n) /* * Setup the callbacks. */ -static int -init(struct archive_read *a, const void *buff, size_t n) +static struct archive_read_source * +bzip2_reader_init(struct archive_read *a, struct archive_reader *reader, + struct archive_read_source *upstream, const void *buff, size_t n) { + static const size_t out_block_size = 64 * 1024; + void *out_block; + struct archive_read_source *self; struct private_data *state; int ret; + (void)reader; /* UNUSED */ + a->archive.compression_code = ARCHIVE_COMPRESSION_BZIP2; a->archive.compression_name = "bzip2"; - state = (struct private_data *)malloc(sizeof(*state)); - if (state == NULL) { + self = calloc(sizeof(*self), 1); + state = (struct private_data *)calloc(sizeof(*state), 1); + out_block = (unsigned char *)malloc(out_block_size); + if (self == NULL || state == NULL || out_block == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate data for %s decompression", a->archive.compression_name); - return (ARCHIVE_FATAL); + free(out_block); + free(state); + free(self); + return (NULL); } - memset(state, 0, sizeof(*state)); - state->uncompressed_buffer_size = 64 * 1024; - state->uncompressed_buffer = (char *)malloc(state->uncompressed_buffer_size); - state->stream.next_out = state->uncompressed_buffer; - state->read_next = state->uncompressed_buffer; - state->stream.avail_out = state->uncompressed_buffer_size; - if (state->uncompressed_buffer == NULL) { - archive_set_error(&a->archive, ENOMEM, - "Can't allocate %s decompression buffers", - a->archive.compression_name); - free(state); - return (ARCHIVE_FATAL); - } + self->archive = a; + self->data = state; + state->out_block_size = out_block_size; + state->out_block = out_block; + self->upstream = upstream; + self->read = bzip2_source_read; + self->skip = NULL; /* not supported */ + self->close = bzip2_source_close; /* * A bug in bzlib.h: stream.next_in should be marked 'const' @@ -209,10 +239,8 @@ init(struct archive_read *a, const void *buff, size_t n) state->stream.next_in = (char *)(uintptr_t)(const void *)buff; state->stream.avail_in = n; - a->decompressor->read_ahead2 = read_ahead; - a->decompressor->consume2 = read_consume; - a->decompressor->skip2 = NULL; /* not supported */ - a->decompressor->finish = finish; + state->stream.next_out = state->out_block; + state->stream.avail_out = state->out_block_size; /* Initialize compression library. */ ret = BZ2_bzDecompressInit(&(state->stream), @@ -226,17 +254,13 @@ init(struct archive_read *a, const void *buff, size_t n) 1 /* do use slow low-mem algorithm */); } - if (ret == BZ_OK) { - a->decompressor->data = state; - return (ARCHIVE_OK); - } + if (ret == BZ_OK) + return (self); /* Library setup failed: Clean up. */ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Internal error initializing %s library", a->archive.compression_name); - free(state->uncompressed_buffer); - free(state); /* Override the error message if we know what really went wrong. */ switch (ret) { @@ -257,162 +281,108 @@ init(struct archive_read *a, const void *buff, size_t n) break; } - return (ARCHIVE_FATAL); + free(state->out_block); + free(state); + free(self); + return (NULL); } /* - * Return a block of data from the decompression buffer. Decompress more - * as necessary. + * Return the next block of decompressed data. */ static ssize_t -read_ahead(struct archive_read *a, const void **p, size_t min) +bzip2_source_read(struct archive_read_source *self, const void **p) { struct private_data *state; - size_t read_avail, was_avail; + size_t read_avail, decompressed; + const void *read_buf; int ret; - state = (struct private_data *)a->decompressor->data; - read_avail = state->stream.next_out - state->read_next; + state = (struct private_data *)self->data; + read_avail = 0; - if (read_avail + state->stream.avail_out < min) { - memmove(state->uncompressed_buffer, state->read_next, - read_avail); - state->read_next = state->uncompressed_buffer; - state->stream.next_out = state->read_next + read_avail; - state->stream.avail_out - = state->uncompressed_buffer_size - read_avail; - } + /* Empty our output buffer. */ + state->stream.next_out = state->out_block; + state->stream.avail_out = state->out_block_size; - while (read_avail < min && /* Haven't satisfied min. */ - read_avail < state->uncompressed_buffer_size) { /* !full */ - was_avail = read_avail; - if ((ret = drive_decompressor(a, state)) < ARCHIVE_OK) - return (ret); - if (ret == ARCHIVE_EOF) - break; /* Break on EOF even if we haven't met min. */ - read_avail = state->stream.next_out - state->read_next; - if (was_avail == read_avail) /* No progress? */ + /* Try to fill the output buffer. */ + for (;;) { + /* If the last upstream block is done, get another one. */ + if (state->stream.avail_in == 0) { + ret = (self->upstream->read)(self->upstream, + &read_buf); + /* stream.next_in is really const, but bzlib + * doesn't declare it so. */ + state->stream.next_in + = (unsigned char *)(uintptr_t)read_buf; + if (ret < 0) + return (ARCHIVE_FATAL); + /* There is no more data, return whatever we have. */ + if (ret == 0) { + *p = state->out_block; + decompressed = state->stream.next_out + - state->out_block; + state->total_out += decompressed; + return (decompressed); + } + state->stream.avail_in = ret; + } + + /* Decompress as much as we can in one pass. */ + ret = BZ2_bzDecompress(&(state->stream)); + switch (ret) { + case BZ_STREAM_END: /* Found end of stream. */ + /* TODO: Peek ahead to see if there's another + * stream so we can mimic the behavior of gunzip + * on concatenated streams. */ + state->eof = 1; + case BZ_OK: /* Decompressor made some progress. */ + /* If we filled our buffer, update stats and return. */ + if (state->eof || state->stream.avail_out == 0) { + *p = state->out_block; + decompressed = state->stream.next_out + - state->out_block; + state->total_out += decompressed; + return (decompressed); + } break; + default: + /* Return an error. */ + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "%s decompression failed", + self->archive->archive.compression_name); + return (ARCHIVE_FATAL); + } } - - *p = state->read_next; - return (read_avail); -} - -/* - * Mark a previously-returned block of data as read. - */ -static ssize_t -read_consume(struct archive_read *a, size_t n) -{ - struct private_data *state; - - state = (struct private_data *)a->decompressor->data; - a->archive.file_position += n; - state->read_next += n; - if (state->read_next > state->stream.next_out) - __archive_errx(1, "Request to consume too many " - "bytes from bzip2 decompressor"); - return (n); } /* * Clean up the decompressor. */ static int -finish(struct archive_read *a) +bzip2_source_close(struct archive_read_source *self) { struct private_data *state; int ret; - state = (struct private_data *)a->decompressor->data; + state = (struct private_data *)self->data; ret = ARCHIVE_OK; switch (BZ2_bzDecompressEnd(&(state->stream))) { case BZ_OK: break; default: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + archive_set_error(&(self->archive->archive), + ARCHIVE_ERRNO_MISC, "Failed to clean up %s compressor", - a->archive.compression_name); + self->archive->archive.compression_name); ret = ARCHIVE_FATAL; } - free(state->uncompressed_buffer); + free(state->out_block); free(state); - - a->decompressor->data = NULL; + free(self); return (ret); } -/* - * Utility function to pull data through decompressor, reading input - * blocks as necessary. - */ -static int -drive_decompressor(struct archive_read *a, struct private_data *state) -{ - ssize_t ret; - int decompressed, total_decompressed; - char *output; - const void *read_buf; - - if (state->eof) - return (ARCHIVE_EOF); - total_decompressed = 0; - for (;;) { - if (state->stream.avail_in == 0) { - read_buf = state->stream.next_in; - ret = (a->source->read)(a->source, &read_buf); - state->stream.next_in = (void *)(uintptr_t)read_buf; - if (ret < 0) { - /* - * TODO: Find a better way to handle - * this read failure. - */ - goto fatal; - } - if (ret == 0 && total_decompressed == 0) { - archive_set_error(&a->archive, EIO, - "Premature end of %s compressed data", - a->archive.compression_name); - return (ARCHIVE_FATAL); - } - a->archive.raw_position += ret; - state->stream.avail_in = ret; - } - - { - output = state->stream.next_out; - - /* Decompress some data. */ - ret = BZ2_bzDecompress(&(state->stream)); - decompressed = state->stream.next_out - output; - - /* Accumulate the total bytes of output. */ - state->total_out += decompressed; - total_decompressed += decompressed; - - switch (ret) { - case BZ_OK: /* Decompressor made some progress. */ - if (decompressed > 0) - return (ARCHIVE_OK); - break; - case BZ_STREAM_END: /* Found end of stream. */ - state->eof = 1; - return (ARCHIVE_OK); - default: - /* Any other return value is an error. */ - goto fatal; - } - } - } - return (ARCHIVE_OK); - - /* Return a fatal error. */ -fatal: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "%s decompression failed", a->archive.compression_name); - return (ARCHIVE_FATAL); -} - #endif /* HAVE_BZLIB_H */ diff --git a/libarchive/archive_read_support_compression_compress.c b/libarchive/archive_read_support_compression_compress.c index e1c9896dc..5e1d024a4 100644 --- a/libarchive/archive_read_support_compression_compress.c +++ b/libarchive/archive_read_support_compression_compress.c @@ -100,11 +100,8 @@ struct private_data { size_t bytes_in_section; /* Output variables. */ - size_t uncompressed_buffer_size; - void *uncompressed_buffer; - unsigned char *read_next; /* Data for client. */ - unsigned char *next_out; /* Where to write new data. */ - size_t avail_out; /* Space at end of buffer. */ + size_t out_block_size; + void *out_block; /* Decompression status variables. */ int use_reset_code; @@ -133,21 +130,32 @@ struct private_data { unsigned char stack[65300]; }; -static int bid(const void *, size_t); -static int finish(struct archive_read *); -static int init(struct archive_read *, const void *, size_t); -static ssize_t read_ahead(struct archive_read *, const void **, size_t); -static ssize_t read_consume(struct archive_read *, size_t); -static int getbits(struct archive_read *, struct private_data *, int n); -static int next_code(struct archive_read *a, struct private_data *state); +static int compress_reader_bid(struct archive_reader *, const void *, size_t); +static struct archive_read_source *compress_reader_init(struct archive_read *, + struct archive_reader *, struct archive_read_source *, + const void *, size_t); +static int compress_reader_free(struct archive_reader *); + +static ssize_t compress_source_read(struct archive_read_source *, const void **); +static int compress_source_close(struct archive_read_source *); + +static int getbits(struct archive_read_source *, int n); +static int next_code(struct archive_read_source *); int archive_read_support_compression_compress(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; - if (__archive_read_register_compression(a, bid, init) != NULL) - return (ARCHIVE_OK); - return (ARCHIVE_FATAL); + struct archive_reader *reader = __archive_read_get_reader(a); + + if (reader == NULL) + return (ARCHIVE_FATAL); + + reader->data = NULL; + reader->bid = compress_reader_bid; + reader->init = compress_reader_init; + reader->free = compress_reader_free; + return (ARCHIVE_OK); } /* @@ -158,11 +166,13 @@ archive_read_support_compression_compress(struct archive *_a) * from verifying as much as we would like. */ static int -bid(const void *buff, size_t len) +compress_reader_bid(struct archive_reader *self, const void *buff, size_t len) { const unsigned char *buffer; int bits_checked; + (void)self; /* UNUSED */ + if (len < 1) return (0); @@ -190,34 +200,43 @@ bid(const void *buff, size_t len) /* * Setup the callbacks. */ -static int -init(struct archive_read *a, const void *buff, size_t n) +static struct archive_read_source * +compress_reader_init(struct archive_read *a, struct archive_reader *reader, + struct archive_read_source *upstream, const void *buff, size_t n) { + struct archive_read_source *self; struct private_data *state; int code; + (void)reader; /* UNUSED */ + a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS; a->archive.compression_name = "compress (.Z)"; - a->decompressor->read_ahead2 = read_ahead; - a->decompressor->consume2 = read_consume; - a->decompressor->skip2 = NULL; /* not supported */ - a->decompressor->finish = finish; + self = calloc(sizeof(*self), 1); + if (self == NULL) + return (NULL); + + self->read = compress_source_read; + self->skip = NULL; /* not supported */ + self->close = compress_source_close; + self->upstream = upstream; + self->archive = a; - state = (struct private_data *)malloc(sizeof(*state)); + state = (struct private_data *)calloc(sizeof(*state), 1); if (state == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate data for %s decompression", a->archive.compression_name); - return (ARCHIVE_FATAL); + free(self); + return (NULL); } - memset(state, 0, sizeof(*state)); - a->decompressor->data = state; + self->data = state; - state->uncompressed_buffer_size = 64 * 1024; - state->uncompressed_buffer = malloc(state->uncompressed_buffer_size); + state->out_block_size = 64 * 1024; + state->out_block = malloc(state->out_block_size); - if (state->uncompressed_buffer == NULL) { + if (state->out_block == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate %s decompression buffers", a->archive.compression_name); @@ -226,14 +245,12 @@ init(struct archive_read *a, const void *buff, size_t n) state->next_in = (const unsigned char *)buff; state->avail_in = n; - state->read_next = state->next_out = (unsigned char *)state->uncompressed_buffer; - state->avail_out = state->uncompressed_buffer_size; - code = getbits(a, state, 8); + code = getbits(self, 8); if (code != 037) /* This should be impossible. */ goto fatal; - code = getbits(a, state, 8); + code = getbits(self, 8); if (code != 0235) { /* This can happen if the library is receiving 1-byte * blocks and gzip and compress are both enabled. @@ -244,7 +261,7 @@ init(struct archive_read *a, const void *buff, size_t n) goto fatal; } - code = getbits(a, state, 8); + code = getbits(self, 8); state->maxcode_bits = code & 0x1f; state->maxcode = (1 << state->maxcode_bits); state->use_reset_code = code & 0x80; @@ -261,12 +278,12 @@ init(struct archive_read *a, const void *buff, size_t n) state->prefix[code] = 0; state->suffix[code] = code; } - next_code(a, state); - return (ARCHIVE_OK); + next_code(self); + return (self); fatal: - finish(a); - return (ARCHIVE_FATAL); + compress_source_close(self); + return (NULL); } /* @@ -274,86 +291,59 @@ fatal: * as necessary. */ static ssize_t -read_ahead(struct archive_read *a, const void **p, size_t min) +compress_source_read(struct archive_read_source *self, const void **pblock) { struct private_data *state; - size_t read_avail; + unsigned char *p, *start, *end; int ret; - state = (struct private_data *)a->decompressor->data; - read_avail = state->next_out - state->read_next; - - if (read_avail < min && state->end_of_stream) { - if (state->end_of_stream == ARCHIVE_EOF) - return (0); - else - return (-1); + state = (struct private_data *)self->data; + if (state->end_of_stream) { + *pblock = NULL; + return (0); } - - if (read_avail < min) { - memmove(state->uncompressed_buffer, state->read_next, - read_avail); - state->read_next = (unsigned char *)state->uncompressed_buffer; - state->next_out = state->read_next + read_avail; - state->avail_out - = state->uncompressed_buffer_size - read_avail; - - while (read_avail < state->uncompressed_buffer_size - && !state->end_of_stream) { - if (state->stackp > state->stack) { - *state->next_out++ = *--state->stackp; - state->avail_out--; - read_avail++; - } else { - ret = next_code(a, state); - if (ret == ARCHIVE_EOF) - state->end_of_stream = ret; - else if (ret != ARCHIVE_OK) - return (ret); - } + p = start = (unsigned char *)state->out_block; + end = start + state->out_block_size; + + while (p < end && !state->end_of_stream) { + if (state->stackp > state->stack) { + *p++ = *--state->stackp; + } else { + ret = next_code(self); + if (ret == ARCHIVE_EOF) + state->end_of_stream = ret; + else if (ret != ARCHIVE_OK) + return (ret); } } - *p = state->read_next; - return (read_avail); + *pblock = start; + return (p - start); } /* - * Mark a previously-returned block of data as read. + * Clean up the reader. */ -static ssize_t -read_consume(struct archive_read *a, size_t n) +static int +compress_reader_free(struct archive_reader *self) { - struct private_data *state; - - state = (struct private_data *)a->decompressor->data; - a->archive.file_position += n; - state->read_next += n; - if (state->read_next > state->next_out) - __archive_errx(1, "Request to consume too many " - "bytes from compress decompressor"); - return (n); + self->data = NULL; + return (ARCHIVE_OK); } /* - * Clean up the decompressor. + * Close and release a source. */ static int -finish(struct archive_read *a) +compress_source_close(struct archive_read_source *self) { - struct private_data *state; - int ret = ARCHIVE_OK; - - state = (struct private_data *)a->decompressor->data; + struct private_data *state = (struct private_data *)self->data; - if (state != NULL) { - if (state->uncompressed_buffer != NULL) - free(state->uncompressed_buffer); - free(state); - } - - a->decompressor->data = NULL; - return (ret); + self->upstream->close(self->upstream); + free(state->out_block); + free(state); + free(self); + return (ARCHIVE_OK); } /* @@ -362,14 +352,15 @@ finish(struct archive_read *a) * format error, ARCHIVE_EOF if we hit end of data, ARCHIVE_OK otherwise. */ static int -next_code(struct archive_read *a, struct private_data *state) +next_code(struct archive_read_source *self) { + struct private_data *state = (struct private_data *)self->data; int code, newcode; static int debug_buff[1024]; static unsigned debug_index; - code = newcode = getbits(a, state, state->bits); + code = newcode = getbits(self, state->bits); if (code < 0) return (code); @@ -391,7 +382,7 @@ next_code(struct archive_read *a, struct private_data *state) skip_bytes %= state->bits; state->bits_avail = 0; /* Discard rest of this byte. */ while (skip_bytes-- > 0) { - code = getbits(a, state, 8); + code = getbits(self, 8); if (code < 0) return (code); } @@ -401,12 +392,13 @@ next_code(struct archive_read *a, struct private_data *state) state->section_end_code = (1 << state->bits) - 1; state->free_ent = 257; state->oldcode = -1; - return (next_code(a, state)); + return (next_code(self)); } if (code > state->free_ent) { /* An invalid code is a fatal error. */ - archive_set_error(&a->archive, -1, "Invalid compressed data"); + archive_set_error(&(self->archive->archive), -1, + "Invalid compressed data"); return (ARCHIVE_FATAL); } @@ -450,8 +442,9 @@ next_code(struct archive_read *a, struct private_data *state) * -1 indicates end of available data. */ static int -getbits(struct archive_read *a, struct private_data *state, int n) +getbits(struct archive_read_source *self, int n) { + struct private_data *state = (struct private_data *)self->data; int code, ret; static const int mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff, @@ -462,13 +455,13 @@ getbits(struct archive_read *a, struct private_data *state, int n) while (state->bits_avail < n) { if (state->avail_in <= 0) { read_buf = state->next_in; - ret = (a->source->read)(a->source, &read_buf); + ret = (self->upstream->read)(self->upstream, &read_buf); state->next_in = read_buf; if (ret < 0) return (ARCHIVE_FATAL); if (ret == 0) return (ARCHIVE_EOF); - a->archive.raw_position += ret; +/* TODO: Fix this a->archive.raw_position += ret; */ state->avail_in = ret; } state->bit_buffer |= *state->next_in++ << state->bits_avail; diff --git a/libarchive/archive_read_support_compression_gzip.c b/libarchive/archive_read_support_compression_gzip.c index 25f4a3254..2066d0975 100644 --- a/libarchive/archive_read_support_compression_gzip.c +++ b/libarchive/archive_read_support_compression_gzip.c @@ -51,32 +51,54 @@ __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_gzip.c,v #ifdef HAVE_ZLIB_H struct private_data { z_stream stream; - unsigned char *uncompressed_buffer; - size_t uncompressed_buffer_size; - unsigned char *read_next; + unsigned char *out_block; + size_t out_block_size; int64_t total_out; unsigned long crc; + int header_count; char header_done; + char header_state; + char header_flags; char eof; /* True = found end of compressed data. */ }; -static int finish(struct archive_read *); -static ssize_t read_ahead(struct archive_read *, const void **, size_t); -static ssize_t read_consume(struct archive_read *, size_t); -static int drive_decompressor(struct archive_read *a, struct private_data *); +/* Gzip Source. */ +static ssize_t gzip_source_read(struct archive_read_source *, const void **); +static int gzip_source_close(struct archive_read_source *); #endif -/* These two functions are defined even if we lack the library. See below. */ -static int bid(const void *, size_t); -static int init(struct archive_read *, const void *, size_t); +/* + * Note that we can detect gzip archives even if we can't decompress + * them. (In fact, we like detecting them because we can give better + * error messages.) So the bid framework here gets compiled even + * if zlib is unavailable. + */ +static int gzip_reader_bid(struct archive_reader *, const void *, size_t); +static struct archive_read_source *gzip_reader_init(struct archive_read *, + struct archive_reader *, struct archive_read_source *, + const void *, size_t); +static int gzip_reader_free(struct archive_reader *); int archive_read_support_compression_gzip(struct archive *_a) { struct archive_read *a = (struct archive_read *)_a; - if (__archive_read_register_compression(a, bid, init) != NULL) - return (ARCHIVE_OK); - return (ARCHIVE_FATAL); + struct archive_reader *reader = __archive_read_get_reader(a); + + if (reader == NULL) + return (ARCHIVE_FATAL); + + reader->data = NULL; + reader->bid = gzip_reader_bid; + reader->init = gzip_reader_init; + reader->free = gzip_reader_free; + return (ARCHIVE_OK); +} + +static int +gzip_reader_free(struct archive_reader *self){ + (void)self; /* UNUSED */ + return (ARCHIVE_OK); } /* @@ -87,11 +109,13 @@ archive_read_support_compression_gzip(struct archive *_a) * from verifying as much as we would like. */ static int -bid(const void *buff, size_t len) +gzip_reader_bid(struct archive_reader *self, const void *buff, size_t len) { const unsigned char *buffer; int bits_checked; + (void)self; /* UNUSED */ + if (len < 1) return (0); @@ -140,7 +164,8 @@ bid(const void *buff, size_t len) * and emit a useful message. */ static int -init(struct archive_read *a, const void *buff, size_t n) +gzip_reader_init(struct archive_read *a, struct archive_read_source *upstream, + const void *buff, size_t n) { (void)a; /* UNUSED */ (void)buff; /* UNUSED */ @@ -151,47 +176,50 @@ init(struct archive_read *a, const void *buff, size_t n) return (ARCHIVE_FATAL); } - #else /* - * Setup the callbacks. + * Initialize the source object. */ -static int -init(struct archive_read *a, const void *buff, size_t n) +static struct archive_read_source * +gzip_reader_init(struct archive_read *a, struct archive_reader *reader, + struct archive_read_source *upstream, const void *buff, size_t n) { + static const size_t out_block_size = 64 * 1024; + void *out_block; + struct archive_read_source *self; struct private_data *state; - int ret; + + (void)reader; /* UNUSED */ a->archive.compression_code = ARCHIVE_COMPRESSION_GZIP; a->archive.compression_name = "gzip"; - state = (struct private_data *)malloc(sizeof(*state)); - if (state == NULL) { + self = calloc(sizeof(*self), 1); + state = (struct private_data *)calloc(sizeof(*state), 1); + out_block = (unsigned char *)malloc(out_block_size); + if (self == NULL || state == NULL || out_block == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate data for %s decompression", a->archive.compression_name); - return (ARCHIVE_FATAL); + free(out_block); + free(state); + free(self); + return (NULL); } - memset(state, 0, sizeof(*state)); + + self->archive = a; + self->data = state; + state->out_block_size = out_block_size; + state->out_block = out_block; + self->upstream = upstream; + self->read = gzip_source_read; + self->skip = NULL; /* not supported */ + self->close = gzip_source_close; state->crc = crc32(0L, NULL, 0); state->header_done = 0; /* We've not yet begun to parse header... */ - state->uncompressed_buffer_size = 64 * 1024; - state->uncompressed_buffer = (unsigned char *)malloc(state->uncompressed_buffer_size); - state->stream.next_out = state->uncompressed_buffer; - state->read_next = state->uncompressed_buffer; - state->stream.avail_out = state->uncompressed_buffer_size; - - if (state->uncompressed_buffer == NULL) { - archive_set_error(&a->archive, ENOMEM, - "Can't allocate %s decompression buffers", - a->archive.compression_name); - free(state); - return (ARCHIVE_FATAL); - } - /* * A bug in zlib.h: stream.next_in should be marked 'const' * but isn't (the library never alters data through the @@ -201,341 +229,255 @@ init(struct archive_read *a, const void *buff, size_t n) state->stream.next_in = (Bytef *)(uintptr_t)(const void *)buff; state->stream.avail_in = n; - a->decompressor->read_ahead2 = read_ahead; - a->decompressor->consume2 = read_consume; - a->decompressor->skip2 = NULL; /* not supported */ - a->decompressor->finish = finish; + return (self); +} - /* - * TODO: Do I need to parse the gzip header before calling - * inflateInit2()? In particular, one of the header bytes - * marks "best compression" or "fastest", which may be - * appropriate for setting the second parameter here. - * However, I think the only penalty for not setting it - * correctly is wasted memory. If this is necessary, it - * should probably go into drive_decompressor() below. - */ +static int +header(struct archive_read_source *self) +{ + struct private_data *state; + int ret, b; - /* Initialize compression library. */ - ret = inflateInit2(&(state->stream), - -15 /* Don't check for zlib header */); - if (ret == Z_OK) { - a->decompressor->data = state; - return (ARCHIVE_OK); - } + state = (struct private_data *)self->data; - /* Library setup failed: Clean up. */ - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Internal error initializing %s library", - a->archive.compression_name); - free(state->uncompressed_buffer); - free(state); + /* + * If still parsing the header, interpret the + * next byte. + */ + b = *(state->stream.next_in++); + state->stream.avail_in--; - /* Override the error message if we know what really went wrong. */ - switch (ret) { - case Z_STREAM_ERROR: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Internal error initializing compression library: " - "invalid setup parameter"); + /* + * Simple state machine to parse the GZip header one byte at + * a time. If you see a way to make this easier to understand, + * please let me know. ;-) + */ + switch (state->header_state) { + case 0: case 1: /* First two bytes of signature. */ + case 2: /* Compression type must be 8 == deflate. */ + if (b != (0xff & "\037\213\010"[(int)state->header_state])) { + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Invalid GZip header (saw %d at offset %d)", + self->archive->archive.compression_name, + b, state->header_state); + return (ARCHIVE_FATAL); + } + ++state->header_state; break; - case Z_MEM_ERROR: - archive_set_error(&a->archive, ENOMEM, - "Internal error initializing compression library: " - "out of memory"); + case 3: /* GZip flags. */ + state->header_flags = b; + state->header_state = 4; break; - case Z_VERSION_ERROR: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Internal error initializing compression library: " - "invalid library version"); + case 4: case 5: case 6: case 7: /* Mod time. */ + case 8: /* Deflate flags. */ + case 9: /* OS. */ + ++state->header_state; break; + case 10: /* Optional Extra: First byte of Length. */ + if ((state->header_flags & 4)) { + state->header_count = 255 & (int)b; + state->header_state = 11; + break; + } + /* Fall through if no Optional Extra field. */ + case 11: /* Optional Extra: Second byte of Length. */ + if ((state->header_flags & 4)) { + state->header_count + = (0xff00 & ((int)b << 8)) | state->header_count; + state->header_state = 12; + break; + } + /* Fall through if no Optional Extra field. */ + case 12: /* Optional Extra Field: counted length. */ + if ((state->header_flags & 4)) { + --state->header_count; + if (state->header_count == 0) state->header_state = 13; + else state->header_state = 12; + break; + } + /* Fall through if no Optional Extra field. */ + case 13: /* Optional Original Filename. */ + if ((state->header_flags & 8)) { + if (b == 0) state->header_state = 14; + else state->header_state = 13; + break; + } + /* Fall through if no Optional Original Filename. */ + case 14: /* Optional Comment. */ + if ((state->header_flags & 16)) { + if (b == 0) state->header_state = 15; + else state->header_state = 14; + break; + } + /* Fall through if no Optional Comment. */ + case 15: /* Optional Header CRC: First byte. */ + if ((state->header_flags & 2)) { + state->header_state = 16; + break; + } + /* Fall through if no Optional Header CRC. */ + case 16: /* Optional Header CRC: Second byte. */ + if ((state->header_flags & 2)) { + state->header_state = 17; + break; + } + /* Fall through if no Optional Header CRC. */ + case 17: /* First byte of compressed data. */ + state->header_done = 1; /* done with header */ + state->stream.avail_in++; /* Discard first byte. */ + state->stream.next_in--; + + /* Initialize compression library. */ + ret = inflateInit2(&(state->stream), + -15 /* Don't check for zlib header */); + + /* Decipher the error code. */ + switch (ret) { + case Z_OK: + return (ARCHIVE_OK); + case Z_STREAM_ERROR: + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "invalid setup parameter"); + break; + case Z_MEM_ERROR: + archive_set_error(&self->archive->archive, ENOMEM, + "Internal error initializing compression library: " + "out of memory"); + break; + case Z_VERSION_ERROR: + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "invalid library version"); + break; + default: + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + " Zlib error %d", ret); + break; + } + return (ARCHIVE_FATAL); } - return (ARCHIVE_FATAL); + return (ARCHIVE_OK); } -/* - * Return a block of data from the decompression buffer. Decompress more - * as necessary. - */ static ssize_t -read_ahead(struct archive_read *a, const void **p, size_t min) +gzip_source_read(struct archive_read_source *self, const void **p) { struct private_data *state; - size_t read_avail, was_avail; + size_t read_avail, decompressed; + const void *read_buf; int ret; - state = (struct private_data *)a->decompressor->data; - read_avail = state->stream.next_out - state->read_next; + state = (struct private_data *)self->data; + read_avail = 0; - if (read_avail + state->stream.avail_out < min) { - memmove(state->uncompressed_buffer, state->read_next, - read_avail); - state->read_next = state->uncompressed_buffer; - state->stream.next_out = state->read_next + read_avail; - state->stream.avail_out - = state->uncompressed_buffer_size - read_avail; - } - - while (read_avail < min && /* Haven't satisfied min. */ - read_avail < state->uncompressed_buffer_size) { /* !full */ - was_avail = read_avail; - if ((ret = drive_decompressor(a, state)) < ARCHIVE_OK) + /* If we're still parsing header bytes, walk through those. */ + while (!state->header_done) { + ret = header(self); + if (ret < ARCHIVE_OK) return (ret); - if (ret == ARCHIVE_EOF) - break; /* Break on EOF even if we haven't met min. */ - read_avail = state->stream.next_out - state->read_next; - if (was_avail == read_avail) /* No progress? */ - break; + /* Fetch another block from upstream if necessary. */ + if (state->stream.avail_in == 0) { + ret = (self->upstream->read)(self->upstream, + &read_buf); + state->stream.next_in + = (unsigned char *)(uintptr_t)read_buf; + if (ret <= 0) + return (ARCHIVE_FATAL); + state->stream.avail_in = ret; + } } - *p = state->read_next; - return (read_avail); -} + /* Empty our output buffer. */ + state->stream.next_out = state->out_block; + state->stream.avail_out = state->out_block_size; -/* - * Mark a previously-returned block of data as read. - */ -static ssize_t -read_consume(struct archive_read *a, size_t n) -{ - struct private_data *state; + /* Try to fill the output buffer. */ + for (;;) { + /* If the last upstream block is done, get another one. */ + if (state->stream.avail_in == 0) { + ret = (self->upstream->read)(self->upstream, + &read_buf); + /* stream.next_in is really const, but zlib + * doesn't declare it so. */ + state->stream.next_in + = (unsigned char *)(uintptr_t)read_buf; + if (ret < 0) + return (ARCHIVE_FATAL); + /* There is no more data, return whatever we have. */ + if (ret == 0) { + *p = state->out_block; + decompressed = state->stream.next_out + - state->out_block; + state->total_out += decompressed; + return (decompressed); + } + state->stream.avail_in = ret; + } - state = (struct private_data *)a->decompressor->data; - a->archive.file_position += n; - state->read_next += n; - if (state->read_next > state->stream.next_out) - __archive_errx(1, "Request to consume too many " - "bytes from gzip decompressor"); - return (n); + /* Decompress as much as we can in one pass. */ + ret = inflate(&(state->stream), 0); + switch (ret) { + case Z_STREAM_END: /* Found end of stream. */ + /* TODO: Peek ahead to see if there's another + * stream so we can mimic the behavior of gunzip + * on concatenated streams. */ + state->eof = 1; + case Z_OK: /* Decompressor made some progress. */ + /* If we filled our buffer, update stats and return. */ + if (state->eof || state->stream.avail_out == 0) { + *p = state->out_block; + decompressed = state->stream.next_out + - state->out_block; + state->crc = crc32(state->crc, + state->out_block, decompressed); + state->total_out += decompressed; + return (decompressed); + } + break; + default: + /* Return an error. */ + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "%s decompression failed", + self->archive->archive.compression_name); + return (ARCHIVE_FATAL); + } + } } /* * Clean up the decompressor. */ static int -finish(struct archive_read *a) +gzip_source_close(struct archive_read_source *self) { struct private_data *state; int ret; - state = (struct private_data *)a->decompressor->data; + state = (struct private_data *)self->data; ret = ARCHIVE_OK; switch (inflateEnd(&(state->stream))) { case Z_OK: break; default: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + archive_set_error(&(self->archive->archive), + ARCHIVE_ERRNO_MISC, "Failed to clean up %s compressor", - a->archive.compression_name); + self->archive->archive.compression_name); ret = ARCHIVE_FATAL; } - free(state->uncompressed_buffer); + free(state->out_block); free(state); - - a->decompressor->data = NULL; + free(self); return (ret); } -/* - * Utility function to pull data through decompressor, reading input - * blocks as necessary. - */ -static int -drive_decompressor(struct archive_read *a, struct private_data *state) -{ - ssize_t ret; - size_t decompressed, total_decompressed; - int count, flags, header_state; - unsigned char *output; - unsigned char b; - const void *read_buf; - - if (state->eof) - return (ARCHIVE_EOF); - flags = 0; - count = 0; - header_state = 0; - total_decompressed = 0; - for (;;) { - if (state->stream.avail_in == 0) { - read_buf = state->stream.next_in; - ret = (a->source->read)(a->source, &read_buf); - state->stream.next_in = (unsigned char *)(uintptr_t)read_buf; - if (ret < 0) { - /* - * TODO: Find a better way to handle - * this read failure. - */ - goto fatal; - } - if (ret == 0 && total_decompressed == 0) { - archive_set_error(&a->archive, EIO, - "Premature end of %s compressed data", - a->archive.compression_name); - return (ARCHIVE_FATAL); - } - a->archive.raw_position += ret; - state->stream.avail_in = ret; - } - - if (!state->header_done) { - /* - * If still parsing the header, interpret the - * next byte. - */ - b = *(state->stream.next_in++); - state->stream.avail_in--; - - /* - * Yes, this is somewhat crude, but it works, - * GZip format isn't likely to change anytime - * in the near future, and header parsing is - * certainly not a performance issue, so - * there's little point in making this more - * elegant. Of course, if you see an easy way - * to make this more elegant, please let me - * know.. ;-) - */ - switch (header_state) { - case 0: /* First byte of signature. */ - if (b != 037) - goto fatal; - header_state = 1; - break; - case 1: /* Second byte of signature. */ - if (b != 0213) - goto fatal; - header_state = 2; - break; - case 2: /* Compression type must be 8. */ - if (b != 8) - goto fatal; - header_state = 3; - break; - case 3: /* GZip flags. */ - flags = b; - header_state = 4; - break; - case 4: case 5: case 6: case 7: /* Mod time. */ - header_state++; - break; - case 8: /* Deflate flags. */ - header_state = 9; - break; - case 9: /* OS. */ - header_state = 10; - break; - case 10: /* Optional Extra: First byte of Length. */ - if ((flags & 4)) { - count = 255 & (int)b; - header_state = 11; - break; - } - /* - * Fall through if there is no - * Optional Extra field. - */ - case 11: /* Optional Extra: Second byte of Length. */ - if ((flags & 4)) { - count = (0xff00 & ((int)b << 8)) | count; - header_state = 12; - break; - } - /* - * Fall through if there is no - * Optional Extra field. - */ - case 12: /* Optional Extra Field: counted length. */ - if ((flags & 4)) { - --count; - if (count == 0) header_state = 13; - else header_state = 12; - break; - } - /* - * Fall through if there is no - * Optional Extra field. - */ - case 13: /* Optional Original Filename. */ - if ((flags & 8)) { - if (b == 0) header_state = 14; - else header_state = 13; - break; - } - /* - * Fall through if no Optional - * Original Filename. - */ - case 14: /* Optional Comment. */ - if ((flags & 16)) { - if (b == 0) header_state = 15; - else header_state = 14; - break; - } - /* Fall through if no Optional Comment. */ - case 15: /* Optional Header CRC: First byte. */ - if ((flags & 2)) { - header_state = 16; - break; - } - /* Fall through if no Optional Header CRC. */ - case 16: /* Optional Header CRC: Second byte. */ - if ((flags & 2)) { - header_state = 17; - break; - } - /* Fall through if no Optional Header CRC. */ - case 17: /* First byte of compressed data. */ - state->header_done = 1; /* done with header */ - state->stream.avail_in++; - state->stream.next_in--; - } - - /* - * TODO: Consider moving the inflateInit2 call - * here so it can include the compression type - * from the header? - */ - } else { - output = state->stream.next_out; - - /* Decompress some data. */ - ret = inflate(&(state->stream), 0); - decompressed = state->stream.next_out - output; - - /* Accumulate the CRC of the uncompressed data. */ - state->crc = crc32(state->crc, output, decompressed); - - /* Accumulate the total bytes of output. */ - state->total_out += decompressed; - total_decompressed += decompressed; - - switch (ret) { - case Z_OK: /* Decompressor made some progress. */ - if (decompressed > 0) - return (ARCHIVE_OK); - break; - case Z_STREAM_END: /* Found end of stream. */ - /* - * TODO: Verify gzip trailer - * (uncompressed length and CRC). - */ - state->eof = 1; - return (ARCHIVE_OK); - default: - /* Any other return value is an error. */ - goto fatal; - } - } - } - return (ARCHIVE_OK); - - /* Return a fatal error. */ -fatal: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "%s decompression failed", a->archive.compression_name); - return (ARCHIVE_FATAL); -} - #endif /* HAVE_ZLIB_H */ diff --git a/libarchive/archive_read_support_compression_none.c b/libarchive/archive_read_support_compression_none.c index 2c5c987c0..fbdd4fd81 100644 --- a/libarchive/archive_read_support_compression_none.c +++ b/libarchive/archive_read_support_compression_none.c @@ -26,388 +26,15 @@ #include "archive_platform.h" __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_compression_none.c,v 1.19 2007/12/30 04:58:21 kientzle Exp $"); -#ifdef HAVE_ERRNO_H -#include -#endif -#ifdef HAVE_LIMITS_H -#include -#endif -#ifdef HAVE_STDLIB_H -#include -#endif -#ifdef HAVE_STRING_H -#include -#endif -#ifdef HAVE_UNISTD_H -#include -#endif - #include "archive.h" -#include "archive_private.h" -#include "archive_read_private.h" - -struct archive_decompress_none { - char *buffer; - size_t buffer_size; - char *next; /* Current read location. */ - size_t avail; /* Bytes in my buffer. */ - const void *client_buff; /* Client buffer information. */ - size_t client_total; - const char *client_next; - size_t client_avail; - char end_of_file; - char fatal; -}; /* - * Initial size of internal buffer used for combining short reads. + * Uncompressed streams are handled implicitly by the read core, + * so this is now a no-op. */ -#define BUFFER_SIZE 65536 - -#define minimum(a, b) (a < b ? a : b) - -static int archive_decompressor_none_bid(const void *, size_t); -static int archive_decompressor_none_finish(struct archive_read *); -static int archive_decompressor_none_init(struct archive_read *, - const void *, size_t); -static ssize_t archive_decompressor_none_read_ahead(struct archive_read *, - const void **, size_t); -static ssize_t archive_decompressor_none_read_consume(struct archive_read *, - size_t); -static off_t archive_decompressor_none_skip(struct archive_read *, off_t); - int -archive_read_support_compression_none(struct archive *_a) -{ - struct archive_read *a = (struct archive_read *)_a; - if (__archive_read_register_compression(a, - archive_decompressor_none_bid, - archive_decompressor_none_init) != NULL) - return (ARCHIVE_OK); - return (ARCHIVE_FATAL); -} - -/* - * Try to detect an "uncompressed" archive. - */ -static int -archive_decompressor_none_bid(const void *buff, size_t len) -{ - (void)buff; - (void)len; - - return (1); /* Default: We'll take it if noone else does. */ -} - -static int -archive_decompressor_none_init(struct archive_read *a, const void *buff, size_t n) -{ - struct archive_decompress_none *state; - - a->archive.compression_code = ARCHIVE_COMPRESSION_NONE; - a->archive.compression_name = "none"; - - state = (struct archive_decompress_none *)malloc(sizeof(*state)); - if (!state) { - archive_set_error(&a->archive, ENOMEM, "Can't allocate input data"); - return (ARCHIVE_FATAL); - } - memset(state, 0, sizeof(*state)); - - state->buffer_size = BUFFER_SIZE; - state->buffer = (char *)malloc(state->buffer_size); - state->next = state->buffer; - if (state->buffer == NULL) { - free(state); - archive_set_error(&a->archive, ENOMEM, "Can't allocate input buffer"); - return (ARCHIVE_FATAL); - } - - /* Save reference to first block of data. */ - state->client_buff = buff; - state->client_total = n; - state->client_next = state->client_buff; - state->client_avail = state->client_total; - - a->decompressor->data = state; - a->decompressor->read_ahead2 = archive_decompressor_none_read_ahead; - a->decompressor->consume2 = archive_decompressor_none_read_consume; - a->decompressor->skip2 = archive_decompressor_none_skip; - a->decompressor->finish = archive_decompressor_none_finish; - - return (ARCHIVE_OK); -} - -/* - * This is tricky. We need to provide our clients with pointers to - * contiguous blocks of memory but we want to avoid copying whenever - * possible. - * - * Mostly, this code returns pointers directly into the block of data - * provided by the client_read routine. It can do this unless the - * request would split across blocks. In that case, we have to copy - * into an internal buffer to combine reads. - */ -static ssize_t -archive_decompressor_none_read_ahead(struct archive_read *a, const void **buff, - size_t min) +archive_read_support_compression_none(struct archive *a) { - struct archive_decompress_none *state; - ssize_t bytes_read; - - state = (struct archive_decompress_none *)a->decompressor->data; - if (state->fatal) - return (ARCHIVE_FATAL); - - /* - * Keep pulling more data until we can satisfy the request. - */ - for (;;) { - - /* - * If we can satisfy from the copy buffer, we're done. - */ - if (state->avail >= min) { - *buff = state->next; - return (state->avail); - } - - /* - * We can satisfy directly from client buffer if everything - * currently in the copy buffer is still in the client buffer. - */ - if (state->client_total >= state->client_avail + state->avail - && state->client_avail + state->avail >= min) { - /* "Roll back" to client buffer. */ - state->client_avail += state->avail; - state->client_next -= state->avail; - /* Copy buffer is now empty. */ - state->avail = 0; - state->next = state->buffer; - /* Return data from client buffer. */ - *buff = state->client_next; - return (state->client_avail); - } - - /* Move data forward in copy buffer if necessary. */ - if (state->next > state->buffer && - state->next + min > state->buffer + state->buffer_size) { - if (state->avail > 0) - memmove(state->buffer, state->next, state->avail); - state->next = state->buffer; - } - - /* If we've used up the client data, get more. */ - if (state->client_avail <= 0) { - if (state->end_of_file) { - *buff = state->next; - return (state->avail); - /* TODO: Change this to return(0) consistent - * with new eof handling commented below. */ - } - bytes_read = (a->source->read)(a->source, - &state->client_buff); - if (bytes_read < 0) { /* Read error. */ - state->client_total = state->client_avail = 0; - state->client_next = state->client_buff = NULL; - state->fatal = 1; - return (ARCHIVE_FATAL); - } - if (bytes_read == 0) { /* Premature end-of-file. */ - state->client_total = state->client_avail = 0; - state->client_next = state->client_buff = NULL; - state->end_of_file = 1; - /* Return whatever we do have. */ - *buff = state->next; - return (state->avail); - /* TODO: I want to change this to - * return(0) as an eof marker, but a little - * more work is needed first. */ - } - a->archive.raw_position += bytes_read; - state->client_total = bytes_read; - state->client_avail = state->client_total; - state->client_next = state->client_buff; - } - else - { - /* - * We can't satisfy the request from the copy - * buffer or the existing client data, so we - * need to copy more client data over to the - * copy buffer. - */ - - /* Ensure the buffer is big enough. */ - if (min > state->buffer_size) { - size_t s, t; - char *p; - - /* Double the buffer; watch for overflow. */ - s = t = state->buffer_size; - while (s < min) { - t *= 2; - if (t <= s) { /* Integer overflow! */ - archive_set_error(&a->archive, - ENOMEM, - "Unable to allocate copy buffer"); - state->fatal = 1; - return (ARCHIVE_FATAL); - } - s = t; - } - /* Now s >= min, so allocate a new buffer. */ - p = (char *)malloc(s); - if (p == NULL) { - archive_set_error(&a->archive, ENOMEM, - "Unable to allocate copy buffer"); - state->fatal = 1; - return (ARCHIVE_FATAL); - } - /* Move data into newly-enlarged buffer. */ - if (state->avail > 0) - memmove(p, state->next, state->avail); - free(state->buffer); - state->next = state->buffer = p; - state->buffer_size = s; - } - - /* We can add client data to copy buffer. */ - /* First estimate: copy to fill rest of buffer. */ - size_t tocopy = (state->buffer + state->buffer_size) - - (state->next + state->avail); - /* Don't waste time buffering more than we need to. */ - if (tocopy + state->avail > min) - tocopy = min - state->avail; - /* Don't copy more than is available. */ - if (tocopy > state->client_avail) - tocopy = state->client_avail; - - memcpy(state->next + state->avail, state->client_next, - tocopy); - /* Remove this data from client buffer. */ - state->client_next += tocopy; - state->client_avail -= tocopy; - /* add it to copy buffer. */ - state->avail += tocopy; - } - } -} - -/* - * Mark the appropriate data as used. Note that the request here will - * often be much smaller than the size of the previous read_ahead - * request. - */ -static ssize_t -archive_decompressor_none_read_consume(struct archive_read *a, size_t request) -{ - struct archive_decompress_none *state; - - state = (struct archive_decompress_none *)a->decompressor->data; - if (state->avail > 0) { - /* Read came from copy buffer. */ - state->next += request; - state->avail -= request; - } else { - /* Read came from client buffer. */ - state->client_next += request; - state->client_avail -= request; - } - a->archive.file_position += request; - return (request); -} - -/* - * Skip forward by exactly the requested bytes or else return - * ARCHIVE_FATAL. Note that this differs from the contract for - * read_ahead, which does not guarantee a minimum count. - */ -static off_t -archive_decompressor_none_skip(struct archive_read *a, off_t request) -{ - struct archive_decompress_none *state; - off_t bytes_skipped, total_bytes_skipped = 0; - size_t min; - - state = (struct archive_decompress_none *)a->decompressor->data; - if (state->fatal) - return (-1); - /* - * If there is data in the buffers already, use that first. - */ - if (state->avail > 0) { - min = minimum(request, (off_t)state->avail); - bytes_skipped = archive_decompressor_none_read_consume(a, min); - request -= bytes_skipped; - total_bytes_skipped += bytes_skipped; - } - if (state->client_avail > 0) { - min = minimum(request, (off_t)state->client_avail); - bytes_skipped = archive_decompressor_none_read_consume(a, min); - request -= bytes_skipped; - total_bytes_skipped += bytes_skipped; - } - if (request == 0) - return (total_bytes_skipped); - /* - * If a client_skipper was provided, try that first. - */ -#if ARCHIVE_API_VERSION < 2 - if ((a->source->skip != NULL) && (request < SSIZE_MAX)) { -#else - if (a->source->skip != NULL) { -#endif - bytes_skipped = (a->source->skip)(a->source, request); - if (bytes_skipped < 0) { /* error */ - state->client_total = state->client_avail = 0; - state->client_next = state->client_buff = NULL; - state->fatal = 1; - return (bytes_skipped); - } - total_bytes_skipped += bytes_skipped; - a->archive.file_position += bytes_skipped; - request -= bytes_skipped; - state->client_next = state->client_buff; - a->archive.raw_position += bytes_skipped; - state->client_avail = state->client_total = 0; - } - /* - * Note that client_skipper will usually not satisfy the - * full request (due to low-level blocking concerns), - * so even if client_skipper is provided, we may still - * have to use ordinary reads to finish out the request. - */ - while (request > 0) { - const void* dummy_buffer; - ssize_t bytes_read; - bytes_read = archive_decompressor_none_read_ahead(a, - &dummy_buffer, 1); - if (bytes_read < 0) - return (bytes_read); - if (bytes_read == 0) { - /* We hit EOF before we satisfied the skip request. */ - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Truncated input file (need to skip %jd bytes)", - (intmax_t)request); - return (ARCHIVE_FATAL); - } - min = (size_t)(minimum(bytes_read, request)); - bytes_read = archive_decompressor_none_read_consume(a, min); - total_bytes_skipped += bytes_read; - request -= bytes_read; - } - return (total_bytes_skipped); -} - -static int -archive_decompressor_none_finish(struct archive_read *a) -{ - struct archive_decompress_none *state; - - state = (struct archive_decompress_none *)a->decompressor->data; - free(state->buffer); - free(state); - a->decompressor->data = NULL; + (void)a; /* UNUSED */ return (ARCHIVE_OK); } diff --git a/libarchive/archive_read_support_compression_program.c b/libarchive/archive_read_support_compression_program.c index ca35125b1..dc3c3815f 100644 --- a/libarchive/archive_read_support_compression_program.c +++ b/libarchive/archive_read_support_compression_program.c @@ -75,64 +75,94 @@ archive_read_support_compression_program(struct archive *_a, const char *cmd) #include "filter_fork.h" -struct archive_decompress_program { +struct program_reader { + char *cmd; + int bid; +}; + +struct program_source { char *description; pid_t child; int child_stdin, child_stdout; - char *child_out_buf; - char *child_out_buf_next; - size_t child_out_buf_len, child_out_buf_avail; + char *out_buf; + size_t out_buf_len; const char *child_in_buf; size_t child_in_buf_avail; }; -static int archive_decompressor_program_bid(const void *, size_t); -static int archive_decompressor_program_finish(struct archive_read *); -static int archive_decompressor_program_init(struct archive_read *, +static int program_reader_bid(struct archive_reader *, const void *, size_t); -static ssize_t archive_decompressor_program_read_ahead(struct archive_read *, - const void **, size_t); -static ssize_t archive_decompressor_program_read_consume(struct archive_read *, - size_t); +static struct archive_read_source *program_reader_init(struct archive_read *, + struct archive_reader *, struct archive_read_source *, + const void *, size_t); +static int program_reader_free(struct archive_reader *); + +static ssize_t program_source_read(struct archive_read_source *, + const void **); +static int program_source_close(struct archive_read_source *); + int archive_read_support_compression_program(struct archive *_a, const char *cmd) { struct archive_read *a = (struct archive_read *)_a; - struct decompressor_t *decompressor; + struct archive_reader *reader = __archive_read_get_reader(a); + struct program_reader *state; - if (cmd == NULL || *cmd == '\0') - return (ARCHIVE_WARN); + state = (struct program_reader *)calloc(sizeof (*state), 1); - decompressor = __archive_read_register_compression(a, - archive_decompressor_program_bid, - archive_decompressor_program_init); - if (decompressor == NULL) - return (ARCHIVE_WARN); + if (state == NULL) + return (ARCHIVE_FATAL); + if (reader == NULL) + return (ARCHIVE_FATAL); - decompressor->config = strdup(cmd); + state->cmd = strdup(cmd); + state->bid = INT_MAX; + + reader->data = state; + reader->bid = program_reader_bid; + reader->init = program_reader_init; + reader->free = program_reader_free; + return (ARCHIVE_OK); +} + +static int +program_reader_free(struct archive_reader *self) +{ + free(self->data); return (ARCHIVE_OK); } /* * If the user used us to register, they must really want us to - * handle it, so this module always bids INT_MAX. + * handle it, so we always bid INT_MAX the first time we're called. + * After that, we always return zero, lest we end up instantiating + * an infinite pipeline. */ static int -archive_decompressor_program_bid(const void *buff, size_t len) +program_reader_bid(struct archive_reader *self, const void *buff, size_t len) { + struct program_reader *state = self->data; + int bid = state->bid; + (void)buff; /* UNUSED */ (void)len; /* UNUSED */ - return (INT_MAX); /* Default: We'll take it. */ + state->bid = 0; /* Don't bid again on this pipeline. */ + + return (bid); /* Default: We'll take it if we haven't yet bid. */ } +/* + * Use select() to decide whether the child is ready for read or write. + */ + static ssize_t -child_read(struct archive_read *a, char *buf, size_t buf_len) +child_read(struct archive_read_source *self, char *buf, size_t buf_len) { - struct archive_decompress_program *state = a->decompressor->data; + struct program_source *state = self->data; ssize_t ret, requested; const void *child_buf; @@ -161,7 +191,7 @@ restart_read: if (state->child_in_buf_avail == 0) { child_buf = state->child_in_buf; - ret = (a->source->read)(a->source, &child_buf); + ret = (self->upstream->read)(self->upstream, &child_buf); state->child_in_buf = (const char *)child_buf; if (ret < 0) { @@ -210,118 +240,103 @@ restart_read: } } -static int -archive_decompressor_program_init(struct archive_read *a, const void *buff, size_t n) +static struct archive_read_source * +program_reader_init(struct archive_read *a, struct archive_reader *reader, + struct archive_read_source *upstream, const void *buff, size_t n) { - struct archive_decompress_program *state; - const char *cmd = a->decompressor->config; + struct program_source *state; + struct program_reader *reader_state; + struct archive_read_source *self; + static const size_t out_buf_len = 65536; + char *out_buf; + char *description; const char *prefix = "Program: "; - state = (struct archive_decompress_program *)malloc(sizeof(*state)); - if (!state) { + reader_state = (struct program_reader *)reader->data; + + self = (struct archive_read_source *)malloc(sizeof(*self)); + state = (struct program_source *)malloc(sizeof(*state)); + out_buf = (char *)malloc(out_buf_len); + description = (char *)malloc(strlen(prefix) + strlen(reader_state->cmd) + 1); + if (self == NULL + || state == NULL + || out_buf == NULL + || description == NULL) + { archive_set_error(&a->archive, ENOMEM, "Can't allocate input data"); - return (ARCHIVE_FATAL); + free(self); + free(state); + free(out_buf); + free(description); + return (NULL); } a->archive.compression_code = ARCHIVE_COMPRESSION_PROGRAM; - state->description = (char *)malloc(strlen(prefix) + strlen(cmd) + 1); + state->description = description; strcpy(state->description, prefix); - strcat(state->description, cmd); + strcat(state->description, reader_state->cmd); a->archive.compression_name = state->description; - state->child_out_buf_next = state->child_out_buf = malloc(65536); - if (!state->child_out_buf) { - free(state); - archive_set_error(&a->archive, ENOMEM, - "Can't allocate filter buffer"); - return (ARCHIVE_FATAL); - } - state->child_out_buf_len = 65536; - state->child_out_buf_avail = 0; + state->out_buf = out_buf; + state->out_buf_len = out_buf_len; state->child_in_buf = buff; state->child_in_buf_avail = n; - if ((state->child = __archive_create_child(cmd, + if ((state->child = __archive_create_child(reader_state->cmd, &state->child_stdin, &state->child_stdout)) == -1) { - free(state->child_out_buf); + free(state->out_buf); free(state); archive_set_error(&a->archive, EINVAL, "Can't initialise filter"); - return (ARCHIVE_FATAL); + return (NULL); } - a->decompressor->data = state; - a->decompressor->read_ahead2 = archive_decompressor_program_read_ahead; - a->decompressor->consume2 = archive_decompressor_program_read_consume; - a->decompressor->skip2 = NULL; - a->decompressor->finish = archive_decompressor_program_finish; + self->data = state; + self->read = program_source_read; + self->skip = NULL; + self->close = program_source_close; + self->upstream = upstream; + self->archive = a; /* XXX Check that we can read at least one byte? */ - return (ARCHIVE_OK); + return (self); } static ssize_t -archive_decompressor_program_read_ahead(struct archive_read *a, const void **buff, - size_t min) +program_source_read(struct archive_read_source *self, const void **buff) { - struct archive_decompress_program *state; - ssize_t bytes_read; - - state = (struct archive_decompress_program *)a->decompressor->data; - - if (min > state->child_out_buf_len) - min = state->child_out_buf_len; - - while (state->child_stdout != -1 && min > state->child_out_buf_avail) { - if (state->child_out_buf != state->child_out_buf_next) { - memmove(state->child_out_buf, state->child_out_buf_next, - state->child_out_buf_avail); - state->child_out_buf_next = state->child_out_buf; - } - - bytes_read = child_read(a, - state->child_out_buf + state->child_out_buf_avail, - state->child_out_buf_len - state->child_out_buf_avail); - if (bytes_read == -1) - return (-1); - if (bytes_read == 0) + struct program_source *state; + ssize_t bytes, total; + char *p; + + state = (struct program_source *)self->data; + + total = 0; + p = state->out_buf; + while (state->child_stdout != -1) { + bytes = child_read(self, p, state->out_buf_len - total); + if (bytes < 0) + return (bytes); + if (bytes == 0) break; - state->child_out_buf_avail += bytes_read; - a->archive.raw_position += bytes_read; + total += bytes; +/* TODO: fix this */ /* a->archive.raw_position += bytes_read; */ } - *buff = state->child_out_buf_next; - return (state->child_out_buf_avail); -} - -static ssize_t -archive_decompressor_program_read_consume(struct archive_read *a, size_t request) -{ - struct archive_decompress_program *state; - - state = (struct archive_decompress_program *)a->decompressor->data; - - state->child_out_buf_next += request; - state->child_out_buf_avail -= request; - - a->archive.file_position += request; - return (request); + *buff = state->out_buf; + return (total); } static int -archive_decompressor_program_finish(struct archive_read *a) +program_source_close(struct archive_read_source *self) { - struct archive_decompress_program *state; + struct program_source *state; int status; - state = (struct archive_decompress_program *)a->decompressor->data; - - /* Release our configuration data. */ - free(a->decompressor->config); - a->decompressor->config = NULL; + state = (struct program_source *)self->data; /* Shut down the child. */ if (state->child_stdin != -1) @@ -332,10 +347,10 @@ archive_decompressor_program_finish(struct archive_read *a) continue; /* Release our private data. */ - free(state->child_out_buf); + free(state->out_buf); free(state->description); free(state); - a->decompressor->data = NULL; + free(self); return (ARCHIVE_OK); }