From: Tim Kientzle Date: Tue, 18 Nov 2008 16:14:08 +0000 (-0500) Subject: Rework Bzip2 stream management. Mostly, this makes the stream X-Git-Tag: v2.6.0~36 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=aca8370b16c85617c473bc0317f187458f8f272a;p=thirdparty%2Flibarchive.git Rework Bzip2 stream management. Mostly, this makes the stream initialization lazy so we can re-open the decompressor for a new stream. This should allow us to read pbzip2 output, which compresses large blocks separately and writes them as independent streams, while still handling files such as Gentoo binary packages, which store unrelated data after the end of the bzip2 data. Thanks to: Ivan Voras for pointing out the pbzip2 case Thanks to: Diego "Flameeyes" Petteno for pointing out the problem with Gentoo binary packages SVN-Revision: 257 --- diff --git a/libarchive/archive_read_support_compression_bzip2.c b/libarchive/archive_read_support_compression_bzip2.c index e6319f1f2..acf860be0 100644 --- a/libarchive/archive_read_support_compression_bzip2.c +++ b/libarchive/archive_read_support_compression_bzip2.c @@ -53,7 +53,7 @@ struct private_data { bz_stream stream; char *out_block; size_t out_block_size; - int64_t total_out; + char valid; /* True = decompressor is initialized */ char eof; /* True = found end of compressed data. */ }; @@ -200,7 +200,6 @@ bzip2_reader_init(struct archive_read *a, struct archive_reader *reader, void *out_block; struct archive_read_source *self; struct private_data *state; - int ret; (void)reader; /* UNUSED */ @@ -242,49 +241,7 @@ bzip2_reader_init(struct archive_read *a, struct archive_reader *reader, state->stream.next_out = state->out_block; state->stream.avail_out = state->out_block_size; - /* Initialize compression library. */ - ret = BZ2_bzDecompressInit(&(state->stream), - 0 /* library verbosity */, - 0 /* don't use slow low-mem algorithm */); - - /* If init fails, try using low-memory algorithm instead. */ - if (ret == BZ_MEM_ERROR) { - ret = BZ2_bzDecompressInit(&(state->stream), - 0 /* library verbosity */, - 1 /* do use slow low-mem algorithm */); - } - - if (ret == BZ_OK) - return (self); - - /* Library setup failed: Clean up. */ - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Internal error initializing %s library", - a->archive.compression_name); - - /* Override the error message if we know what really went wrong. */ - switch (ret) { - case BZ_PARAM_ERROR: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Internal error initializing compression library: " - "invalid setup parameter"); - break; - case BZ_MEM_ERROR: - archive_set_error(&a->archive, ENOMEM, - "Internal error initializing compression library: " - "out of memory"); - break; - case BZ_CONFIG_ERROR: - archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, - "Internal error initializing compression library: " - "mis-compiled library"); - break; - } - - free(state->out_block); - free(state); - free(self); - return (NULL); + return (self); } /* @@ -301,6 +258,11 @@ bzip2_source_read(struct archive_read_source *self, const void **p) state = (struct private_data *)self->data; read_avail = 0; + if (state->eof) { + *p = NULL; + return (0); + } + /* Empty our output buffer. */ state->stream.next_out = state->out_block; state->stream.avail_out = state->out_block_size; @@ -319,39 +281,85 @@ bzip2_source_read(struct archive_read_source *self, const void **p) return (ARCHIVE_FATAL); /* There is no more data, return whatever we have. */ if (ret == 0) { + state->eof = 1; *p = state->out_block; decompressed = state->stream.next_out - state->out_block; - state->total_out += decompressed; return (decompressed); } state->stream.avail_in = ret; } + if (!state->valid) { + if (state->stream.next_in[0] != 'B') { + state->eof = 1; + *p = state->out_block; + decompressed = state->stream.next_out + - state->out_block; + return (decompressed); + } + /* Initialize compression library. */ + ret = BZ2_bzDecompressInit(&(state->stream), + 0 /* library verbosity */, + 0 /* don't use low-mem algorithm */); + + /* If init fails, try low-memory algorithm instead. */ + if (ret == BZ_MEM_ERROR) + ret = BZ2_bzDecompressInit(&(state->stream), + 0 /* library verbosity */, + 1 /* do use low-mem algo */); + + if (ret != BZ_OK) { + const char *detail = NULL; + int err = ARCHIVE_ERRNO_MISC; + switch (ret) { + case BZ_PARAM_ERROR: + detail = "invalid setup parameter"; + break; + case BZ_MEM_ERROR: + err = ENOMEM; + detail = "out of memory"; + break; + case BZ_CONFIG_ERROR: + detail = "mis-compiled library"; + break; + } + archive_set_error(&self->archive->archive, err, + "Internal error initializing decompressor%s%s", + detail == NULL ? "" : ": ", + detail); + return (ARCHIVE_FATAL); + } + state->valid = 1; + } + /* Decompress as much as we can in one pass. */ ret = BZ2_bzDecompress(&(state->stream)); switch (ret) { case BZ_STREAM_END: /* Found end of stream. */ - /* TODO: Peek ahead to see if there's another - * stream so we can mimic the behavior of gunzip - * on concatenated streams. */ - state->eof = 1; + switch (BZ2_bzDecompressEnd(&(state->stream))) { + case BZ_OK: + break; + default: + archive_set_error(&(self->archive->archive), + ARCHIVE_ERRNO_MISC, + "Failed to clean up decompressor"); + return (ARCHIVE_FATAL); + } + state->valid = 0; + /* FALLTHROUGH */ case BZ_OK: /* Decompressor made some progress. */ /* If we filled our buffer, update stats and return. */ - if (state->eof || state->stream.avail_out == 0) { + if (state->stream.avail_out == 0) { *p = state->out_block; decompressed = state->stream.next_out - state->out_block; - state->total_out += decompressed; return (decompressed); } break; - default: - /* Return an error. */ + default: /* Return an error. */ archive_set_error(&self->archive->archive, - ARCHIVE_ERRNO_MISC, - "%s decompression failed", - self->archive->archive.compression_name); + ARCHIVE_ERRNO_MISC, "bzip decompression failed"); return (ARCHIVE_FATAL); } } @@ -364,25 +372,26 @@ static int bzip2_source_close(struct archive_read_source *self) { struct private_data *state; - int ret; + int ret = ARCHIVE_OK; state = (struct private_data *)self->data; - ret = ARCHIVE_OK; - switch (BZ2_bzDecompressEnd(&(state->stream))) { - case BZ_OK: - break; - default: - archive_set_error(&(self->archive->archive), - ARCHIVE_ERRNO_MISC, - "Failed to clean up %s compressor", - self->archive->archive.compression_name); - ret = ARCHIVE_FATAL; + + if (state->valid) { + switch (BZ2_bzDecompressEnd(&state->stream)) { + case BZ_OK: + break; + default: + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Failed to clean up decompressor"); + ret = ARCHIVE_FATAL; + } } free(state->out_block); free(state); free(self); - return (ret); + return (ARCHIVE_OK); } #endif /* HAVE_BZLIB_H */ diff --git a/libarchive/test/Makefile b/libarchive/test/Makefile index 4c3965f01..96483df0d 100644 --- a/libarchive/test/Makefile +++ b/libarchive/test/Makefile @@ -13,6 +13,7 @@ TESTS= \ test_acl_pax.c \ test_archive_api_feature.c \ test_bad_fd.c \ + test_compat_bzip2.c \ test_compat_gtar.c \ test_compat_tar_hardlink.c \ test_compat_zip.c \ diff --git a/libarchive/test/test_compat_bzip2.c b/libarchive/test/test_compat_bzip2.c new file mode 100644 index 000000000..2f5c52279 --- /dev/null +++ b/libarchive/test/test_compat_bzip2.c @@ -0,0 +1,89 @@ +/*- + * Copyright (c) 2003-2008 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" +__FBSDID("$FreeBSD$"); + +/* + * Verify our ability to read sample files compatibly with bunzip2. + * + * In particular: + * * bunzip2 will read multiple bzip2 streams, concatenating the output + * * bunzip2 will stop at the end of a stream if the following data + * doesn't start with a bzip2 signature. + */ + +/* + * All of the sample files have the same contents; they're just + * compressed in different ways. + */ +static void +compat_bzip2(const char *name) +{ + const char *n[7] = { "f1", "f2", "f3", "d1/f1", "d1/f2", "d1/f3", NULL }; + struct archive_entry *ae; + struct archive *a; + int i,r; + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_compression_all(a)); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + extract_reference_file(name); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, name, 2)); + + /* Read entries, match up names with list above. */ + for (i = 0; i < 6; ++i) { + r = archive_read_next_header(a, &ae); + failure("Could not read file %d (%s) from %s", i, n[i], name); + assertEqualIntA(a, ARCHIVE_OK, r); + if (r != ARCHIVE_OK) { + archive_read_finish(a); + return; + } + assertEqualString(n[i], archive_entry_pathname(ae)); + } + + /* Verify the end-of-archive. */ + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + + /* Verify that the format detection worked. */ + assertEqualInt(archive_compression(a), ARCHIVE_COMPRESSION_BZIP2); + assertEqualInt(archive_format(a), ARCHIVE_FORMAT_TAR_USTAR); + + assertEqualInt(ARCHIVE_OK, archive_read_close(a)); +#if ARCHIVE_VERSION_NUMBER < 2000000 + archive_read_finish(a); +#else + assertEqualInt(ARCHIVE_OK, archive_read_finish(a)); +#endif +} + + +DEFINE_TEST(test_compat_bzip2) +{ + compat_bzip2("test_compat_bzip2_1.tbz"); + compat_bzip2("test_compat_bzip2_2.tbz"); +} + + diff --git a/libarchive/test/test_compat_bzip2_1.tbz.uu b/libarchive/test/test_compat_bzip2_1.tbz.uu new file mode 100644 index 000000000..8de910147 --- /dev/null +++ b/libarchive/test/test_compat_bzip2_1.tbz.uu @@ -0,0 +1,22 @@ +begin 644 test_compat_bzip2_1.tbz +M0EIH.3%!62936;12^)(``#-;D=$00`!_@``!8RT>$`0`$```""``5#5/*'J> +MD#(&30_5!H4_5-ZH`T``327U4@&L('"(9-%8<7&$I,`:7FXH+*\GV#JF<`PK29-8'OPDG36S\7HR&C(T:/U0:$U'I +MJ!ZC0`#VECO\[$10H'-Z@F*:6A1$H$V("2G0Q(U0(8=(7AK$S04#!)RXOAP% +MP:D%#Q;NO)\4UL23'2[\7````6YC1 +M$$`$?X```6,M'A`$`!````@@`'4-4S*,U!HT!HT?J@T)E-I--!H`![60EIH.3%!629364RNM^,```#?L-$00`#_@`0```AG +M+1X0`!`$```((`!U#5-,:1IH`TT,1^J#)&H]3U`T``!CX[_.[`F40.64EC"D +M()+?KX6,VP?6Y;F%5$XR[Y/D#*9),K3^+N2*<*$@9@ +MX6(`0EIH.3%!62936>ZM4*4```);D-$00`#O@``(9ST>$`0```@@`'0:IFC2 +M&F@!B:/U0:$R&H:&@`"KS^U=Y`BC`#FY2*9-8%%&13E$@%8ZF(&J!##]!#E` +MKVL'2LUW2.*C08`$)::#DQ05DF +?4UDI/)=P````0!!```0`(``A`(*#%W)%.%"0*3R7<``` +` +end diff --git a/libarchive/test/test_compat_bzip2_2.tbz.uu b/libarchive/test/test_compat_bzip2_2.tbz.uu new file mode 100644 index 000000000..cd0dc6b93 --- /dev/null +++ b/libarchive/test/test_compat_bzip2_2.tbz.uu @@ -0,0 +1,9 @@ +begin 644 test_compat_bzip2_2.tbz +M0EIH.3%!629361HI1P<``4#;D-$00`#_@``)9RT>$`0``!@P`/@#&$Q,F`F` +M`,83$R8"8``1133"1/2-J-#$/U3@;XVF9V'`Y3882XA$*KO6\WTL`]QU&J"8 +M$-=*Q$\@=`=QJ,TQ;3UH,NPT$-(!"HV&!ZO5D&@P-1D&1@'L<8&209QV9'G` +MW&PRZ0Q(-BT%&DG*DE.!U*#J.P]*#%-P9G`W9+34:#S&M`;@^1R^![C]:Y)U +MDF9/(\AR/@?P@^@I_B[DBG"A(#12C@X!3;VUE(&UO