From: Michihiro NAKAJIMA Date: Fri, 6 Nov 2009 15:16:19 +0000 (-0500) Subject: Improve detecting LZMA stream data. X-Git-Tag: v2.8.0~231 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=24dbaced15d0e63bd850cf6b72881dc8bd502e0c;p=thirdparty%2Flibarchive.git Improve detecting LZMA stream data. - Checking of the first byte of LZMA stream. * lzma of XZ Utils with option -e records 0x5d in the first byte. * It is possible that lzma of LZMA SDK records various data in the first byte. - Checking of second and third bytes of LZMA stream. * lzma of LZMA SDK with option -d12,-d13,-d14 and -d15 records 0x1000,0x2000,0x4000 and 0x8000 in second through fifth bytes of LZMA stream. It means second and third bytes are not always zero. - Checking of sixth through fourteenth bytes of LZMA stream. * This bytes are recorded uncompressed size and lzma of XZ Utils always records -1 in this field. SVN-Revision: 1584 --- diff --git a/Makefile.am b/Makefile.am index 1f4bb1ceb..59ac650c1 100644 --- a/Makefile.am +++ b/Makefile.am @@ -223,6 +223,7 @@ libarchive_test_SOURCES= \ libarchive/test/test_compat_bzip2.c \ libarchive/test/test_compat_gtar.c \ libarchive/test/test_compat_gzip.c \ + libarchive/test/test_compat_lzma.c \ libarchive/test/test_compat_solaris_tar_acl.c \ libarchive/test/test_compat_tar_hardlink.c \ libarchive/test/test_compat_xz.c \ @@ -333,6 +334,9 @@ libarchive_test_EXTRA_DIST=\ libarchive/test/test_compat_gtar_1.tar.uu \ libarchive/test/test_compat_gzip_1.tgz.uu \ libarchive/test/test_compat_gzip_2.tgz.uu \ + libarchive/test/test_compat_lzma_1.tlz.uu \ + libarchive/test/test_compat_lzma_2.tlz.uu \ + libarchive/test/test_compat_lzma_3.tlz.uu \ libarchive/test/test_compat_solaris_tar_acl.tar.uu \ libarchive/test/test_compat_tar_hardlink_1.tar.uu \ libarchive/test/test_compat_xz_1.txz.uu \ diff --git a/libarchive/archive_read_support_compression_xz.c b/libarchive/archive_read_support_compression_xz.c index 83c7c5ec2..3b927b01e 100644 --- a/libarchive/archive_read_support_compression_xz.c +++ b/libarchive/archive_read_support_compression_xz.c @@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$"); #endif #include "archive.h" +#include "archive_endian.h" #include "archive_private.h" #include "archive_read_private.h" @@ -205,37 +206,101 @@ lzma_bidder_bid(struct archive_read_filter_bidder *self, { const unsigned char *buffer; ssize_t avail; + uint32_t dicsize; + uint64_t uncompressed_size; int bits_checked; (void)self; /* UNUSED */ - buffer = __archive_read_filter_ahead(filter, 6, &avail); + buffer = __archive_read_filter_ahead(filter, 14, &avail); if (buffer == NULL) return (0); - /* First byte of raw LZMA stream is always 0x5d. */ + /* First byte of raw LZMA stream is commonly 0x5d. + * The first byte is a special number, which consists of + * three parameters of LZMA compression, a number of literal + * context bits(which is from 0 to 8, default is 3), a number + * of literal pos bits(which is from 0 to 4, default is 0), + * a number of pos bits(which is from 0 to 4, default is 2). + * The first byte is made by + * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit, + * and so the default value in this field is + * (2 * 5 + 0) * 9 + 3 = 0x5d. + * lzma of LZMA SDK has options to change those parameters. + * It means a range of this field is from 0 to 224. And lzma of + * XZ Utils with option -e records 0x5e in this field. */ + /* NOTE: If this checking of the first byte increases false + * recognition, we should allow only 0x5d and 0x5e for the first + * byte of LZMA stream. */ bits_checked = 0; - if (buffer[0] != 0x5d) - return (0); - bits_checked += 8; - - /* Second through fifth bytes are dictionary code, stored in - * little-endian order. The two least-significant bytes are - * always zero. */ - if (buffer[1] != 0 || buffer[2] != 0) + if (buffer[0] > (4 * 5 + 4) * 9 + 8) return (0); - bits_checked += 16; - - /* ??? TODO: Fix this. ??? */ - /* NSIS format check uses this, but I've seen tar.lzma - * archives where this byte is 0xff, not 0. Can it - * ever be anything other than 0 or 0xff? - */ -#if 0 - if (buffer[5] != 0) + /* Most likely value in the first byte of LZMA stream. */ + if (buffer[0] == 0x5d || buffer[0] == 0x5e) + bits_checked += 8; + + /* Sixth through fourteenth bytes are uncompressed size, + * stored in little-endian order. `-1' means uncompressed + * size is unknown and lzma of XZ Utils always records `-1' + * in this field. */ + uncompressed_size = archive_le64dec(buffer+5); + if (uncompressed_size == (uint64_t)-1LL) + bits_checked += 64; + + /* Second through fifth bytes are dictionary size, stored in + * little-endian order. The minimum dictionary size is + * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option + * -d12 and the maxinam dictionary size is 1 << 27(128MiB) + * which the one uses with option -d27. + * NOTE: A comment of LZMA SDK source code says this dictionary + * range is from 1 << 12 to 1 << 30. */ + dicsize = archive_le32dec(buffer+1); + switch (dicsize) { + case 0x00001000:/* lzma of LZMA SDK option -d12. */ + case 0x00002000:/* lzma of LZMA SDK option -d13. */ + case 0x00004000:/* lzma of LZMA SDK option -d14. */ + case 0x00008000:/* lzma of LZMA SDK option -d15. */ + case 0x00010000:/* lzma of XZ Utils option -0 and -1. + * lzma of LZMA SDK option -d16. */ + case 0x00020000:/* lzma of LZMA SDK option -d17. */ + case 0x00040000:/* lzma of LZMA SDK option -d18. */ + case 0x00080000:/* lzma of XZ Utils option -2. + * lzma of LZMA SDK option -d19. */ + case 0x00100000:/* lzma of XZ Utils option -3. + * lzma of LZMA SDK option -d20. */ + case 0x00200000:/* lzma of XZ Utils option -4. + * lzma of LZMA SDK option -d21. */ + case 0x00400000:/* lzma of XZ Utils option -5. + * lzma of LZMA SDK option -d22. */ + case 0x00800000:/* lzma of XZ Utils option -6. + * lzma of LZMA SDK option -d23. */ + case 0x01000000:/* lzma of XZ Utils option -7. + * lzma of LZMA SDK option -d24. */ + case 0x02000000:/* lzma of XZ Utils option -8. + * lzma of LZMA SDK option -d25. */ + case 0x04000000:/* lzma of XZ Utils option -9. + * lzma of LZMA SDK option -d26. */ + case 0x08000000:/* lzma of LZMA SDK option -d27. */ + bits_checked += 32; + break; + default: + /* If a memory usage for encoding was not enough on + * the platform where LZMA stream was made, lzma of + * XZ Utils automatically decreased the dictionary + * size to enough memory for encoding by 1Mi bytes + * (1 << 20).*/ + if (dicsize <= 0x03F00000 && dicsize >= 0x00300000 && + (dicsize & ((1 << 20)-1)) == 0 && + bits_checked == 8 + 64) { + bits_checked += 32; + break; + } + /* Otherwise dictionary size is unlikely. But it is + * possible that someone makes lzma stream with + * liblzma/LZMA SDK in one's dictionary size. */ return (0); - bits_checked += 8; -#endif + break; + } /* TODO: The above test is still very weak. It would be * good to do better. */ diff --git a/libarchive/test/CMakeLists.txt b/libarchive/test/CMakeLists.txt index ef8df121c..91e249190 100644 --- a/libarchive/test/CMakeLists.txt +++ b/libarchive/test/CMakeLists.txt @@ -21,6 +21,7 @@ IF(ENABLE_TEST) test_compat_bzip2.c test_compat_gtar.c test_compat_gzip.c + test_compat_lzma.c test_compat_solaris_tar_acl.c test_compat_tar_hardlink.c test_compat_xz.c diff --git a/libarchive/test/test_compat_lzma.c b/libarchive/test/test_compat_lzma.c new file mode 100644 index 000000000..7269a4c97 --- /dev/null +++ b/libarchive/test/test_compat_lzma.c @@ -0,0 +1,155 @@ +/*- + * Copyright (c) 2009 Michihiro NAKAJIMA + * Copyright (c) 2003-2008 Tim Kientzle + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "test.h" +__FBSDID("$FreeBSD$"); + +/* +Execute the following to rebuild the data for this program: + tail -n +33 test_compat_lzma.c | /bin/sh + +# Use lzma command of XZ Utils. +name=test_compat_lzma_1 +zcmd=lzma +zsuffix=lzma +ztar_suffix=tlz +dir="$name`date +%Y%m%d%H%M%S`.$USER" +mktarfile() +{ +mkdir $dir +echo "f1" > $dir/f1 +echo "f2" > $dir/f2 +echo "f3" > $dir/f3 +mkdir $dir/d1 +echo "f1" > $dir/d1/f1 +echo "f2" > $dir/d1/f2 +echo "f3" > $dir/d1/f3 +(cd $dir; tar cf ../$name.tar f1 f2 f3 d1/f1 d1/f2 d1/f3) +rm -r $dir +} +mktarfile +$zcmd $name.tar +mv $name.tar.$zsuffix $name.$ztar_suffix +echo "This is unrelated junk data at the end of the file" >> $name.$ztar_suffix +uuencode $name.$ztar_suffix $name.$ztar_suffix > $name.$ztar_suffix.uu +rm -f $name.$ztar_suffix +# +# Use option -e +# +name=test_compat_lzma_2 +dir="$name`date +%Y%m%d%H%M%S`.$USER" +mktarfile +$zcmd -e $name.tar +mv $name.tar.$zsuffix $name.$ztar_suffix +uuencode $name.$ztar_suffix $name.$ztar_suffix > $name.$ztar_suffix.uu +rm -f $name.$ztar_suffix +# +# Use lzma command of LZMA SDK with option -d12. +# +name=test_compat_lzma_3 +zcmd=lzmasdk # Change this path to use lzma of LZMA SDK. +dir="$name`date +%Y%m%d%H%M%S`.$USER" +mktarfile +$zcmd e -d12 $name.tar $name.$ztar_suffix +rm -f $name.tar +uuencode $name.$ztar_suffix $name.$ztar_suffix > $name.$ztar_suffix.uu +rm -f $name.$ztar_suffix + +exit 0 +*/ + +/* + * Verify our ability to read sample files compatibly with unlzma. + * + * In particular: + * * unlzma will read multiple lzma streams, concatenating the output + * * unlzma will read lzma streams which is made by lzma with option -e, + * concatenating the output + * + * Verify our ability to read sample files compatibly with lzma of + * LZMA SDK. + * * lzma will read lzma streams which is made by lzma with option -d12, + * concatenating the output + */ + +/* + * All of the sample files have the same contents; they're just + * compressed in different ways. + */ +static void +compat_lzma(const char *name) +{ + const char *n[7] = { "f1", "f2", "f3", "d1/f1", "d1/f2", "d1/f3", NULL }; + struct archive_entry *ae; + struct archive *a; + int i, r; + + assert((a = archive_read_new()) != NULL); + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_compression_all(a)); + r = archive_read_support_compression_lzma(a); + if (r == ARCHIVE_WARN) { + skipping("lzma reading not fully supported on this platform"); + assertEqualInt(ARCHIVE_OK, archive_read_finish(a)); + return; + } + assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); + extract_reference_file(name); + assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, name, 2)); + + /* Read entries, match up names with list above. */ + for (i = 0; i < 6; ++i) { + failure("Could not read file %d (%s) from %s", i, n[i], name); + assertEqualIntA(a, ARCHIVE_OK, + archive_read_next_header(a, &ae)); + assertEqualString(n[i], archive_entry_pathname(ae)); + } + + /* Verify the end-of-archive. */ + assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); + + /* Verify that the format detection worked. */ + assertEqualInt(archive_compression(a), ARCHIVE_COMPRESSION_LZMA); + assertEqualString(archive_compression_name(a), "lzma"); + assertEqualInt(archive_format(a), ARCHIVE_FORMAT_TAR_USTAR); + + assertEqualInt(ARCHIVE_OK, archive_read_close(a)); + assertEqualInt(ARCHIVE_OK, archive_read_finish(a)); +} + + +DEFINE_TEST(test_compat_lzma) +{ + /* This sample has been added junk datas to its tail. */ + compat_lzma("test_compat_lzma_1.tlz"); + /* This sample has been made by lzma with option -e, + * the first byte of which is 0x5e. + * Not supported in libarchive 2.7.* and earlier */ + compat_lzma("test_compat_lzma_2.tlz"); + /* This sample has been made by lzma of LZMA SDK with + * option -d12, second byte and third byte of which is + * not zero. + * Not supported in libarchive 2.7.* and earlier */ + compat_lzma("test_compat_lzma_3.tlz"); +} diff --git a/libarchive/test/test_compat_lzma_1.tlz.uu b/libarchive/test/test_compat_lzma_1.tlz.uu new file mode 100644 index 000000000..ceee3b979 --- /dev/null +++ b/libarchive/test/test_compat_lzma_1.tlz.uu @@ -0,0 +1,8 @@ +begin 644 test_compat_lzma_1.tlz +M70``@`#__________P`S##P;IXPT!HUK`DO\DC[V2OB%Z^'=ZT59ANYMTD(/ +M^W;\8!%O7<+P&=#(9W<_!$Z.7/Y<&\(8+E0^,_-\Z"D^P'N0J^4-UH"WMJ<& +MV-P6=Y[-FY$IFNZ="RF24TO.B7EP[F]BGMJSP[]OZ_P9/#J'T=;7E&&A@J<[ +MA^C'Q*/Y&I)2^T930'MJTK-98U0D9R*-X2^5__6H:+A4:&ES(&ES('5N`W2"/3R1F1:P:&Q9A +MGH2JJI9$C?8.=WTE:O<1WA@X>DK-Y#SW;I2!P;NYG^2"-(D9/E(D_0XK_H,\ +95*/V"T#E9ZO][@'R,6E&^A([.##_\M#YU@`` +` +end diff --git a/libarchive/test/test_compat_lzma_3.tlz.uu b/libarchive/test/test_compat_lzma_3.tlz.uu new file mode 100644 index 000000000..1976f364d --- /dev/null +++ b/libarchive/test/test_compat_lzma_3.tlz.uu @@ -0,0 +1,7 @@ +begin 644 test_compat_lzma_3.tlz +M70`0````'``````````S##P;IXPT!HUK`DO\DC[V2OB%Z^'=ZT59ANYMTD(1 +M$Y^=;\4%U_CXKQ*F$OFZKEQUG)1U8="](V<2K"U1\Z6%H(UNQ[Y3.=D'>_G- +MCO71X+M*7WH7$D1&E9Y$XHW,(`[X";GGTO+,'&1?F%<@`.$-OV;8P1?*M$A" +:MA+1XONREMK,1('455L=X1>WC#1YW"('I@`` +` +end