- Checking of the first byte of LZMA stream.
* lzma of XZ Utils with option -e records 0x5d in the first byte.
* It is possible that lzma of LZMA SDK records various data in the
first byte.
- Checking of second and third bytes of LZMA stream.
* lzma of LZMA SDK with option -d12,-d13,-d14 and -d15 records
0x1000,0x2000,0x4000 and 0x8000 in second through fifth bytes
of LZMA stream. It means second and third bytes are not always
zero.
- Checking of sixth through fourteenth bytes of LZMA stream.
* This bytes are recorded uncompressed size and lzma of XZ Utils
always records -1 in this field.
SVN-Revision: 1584
libarchive/test/test_compat_bzip2.c \
libarchive/test/test_compat_gtar.c \
libarchive/test/test_compat_gzip.c \
+ libarchive/test/test_compat_lzma.c \
libarchive/test/test_compat_solaris_tar_acl.c \
libarchive/test/test_compat_tar_hardlink.c \
libarchive/test/test_compat_xz.c \
libarchive/test/test_compat_gtar_1.tar.uu \
libarchive/test/test_compat_gzip_1.tgz.uu \
libarchive/test/test_compat_gzip_2.tgz.uu \
+ libarchive/test/test_compat_lzma_1.tlz.uu \
+ libarchive/test/test_compat_lzma_2.tlz.uu \
+ libarchive/test/test_compat_lzma_3.tlz.uu \
libarchive/test/test_compat_solaris_tar_acl.tar.uu \
libarchive/test/test_compat_tar_hardlink_1.tar.uu \
libarchive/test/test_compat_xz_1.txz.uu \
#endif
#include "archive.h"
+#include "archive_endian.h"
#include "archive_private.h"
#include "archive_read_private.h"
{
const unsigned char *buffer;
ssize_t avail;
+ uint32_t dicsize;
+ uint64_t uncompressed_size;
int bits_checked;
(void)self; /* UNUSED */
- buffer = __archive_read_filter_ahead(filter, 6, &avail);
+ buffer = __archive_read_filter_ahead(filter, 14, &avail);
if (buffer == NULL)
return (0);
- /* First byte of raw LZMA stream is always 0x5d. */
+ /* First byte of raw LZMA stream is commonly 0x5d.
+ * The first byte is a special number, which consists of
+ * three parameters of LZMA compression, a number of literal
+ * context bits(which is from 0 to 8, default is 3), a number
+ * of literal pos bits(which is from 0 to 4, default is 0),
+ * a number of pos bits(which is from 0 to 4, default is 2).
+ * The first byte is made by
+ * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
+ * and so the default value in this field is
+ * (2 * 5 + 0) * 9 + 3 = 0x5d.
+ * lzma of LZMA SDK has options to change those parameters.
+ * It means a range of this field is from 0 to 224. And lzma of
+ * XZ Utils with option -e records 0x5e in this field. */
+ /* NOTE: If this checking of the first byte increases false
+ * recognition, we should allow only 0x5d and 0x5e for the first
+ * byte of LZMA stream. */
bits_checked = 0;
- if (buffer[0] != 0x5d)
- return (0);
- bits_checked += 8;
-
- /* Second through fifth bytes are dictionary code, stored in
- * little-endian order. The two least-significant bytes are
- * always zero. */
- if (buffer[1] != 0 || buffer[2] != 0)
+ if (buffer[0] > (4 * 5 + 4) * 9 + 8)
return (0);
- bits_checked += 16;
-
- /* ??? TODO: Fix this. ??? */
- /* NSIS format check uses this, but I've seen tar.lzma
- * archives where this byte is 0xff, not 0. Can it
- * ever be anything other than 0 or 0xff?
- */
-#if 0
- if (buffer[5] != 0)
+ /* Most likely value in the first byte of LZMA stream. */
+ if (buffer[0] == 0x5d || buffer[0] == 0x5e)
+ bits_checked += 8;
+
+ /* Sixth through fourteenth bytes are uncompressed size,
+ * stored in little-endian order. `-1' means uncompressed
+ * size is unknown and lzma of XZ Utils always records `-1'
+ * in this field. */
+ uncompressed_size = archive_le64dec(buffer+5);
+ if (uncompressed_size == (uint64_t)-1LL)
+ bits_checked += 64;
+
+ /* Second through fifth bytes are dictionary size, stored in
+ * little-endian order. The minimum dictionary size is
+ * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
+ * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
+ * which the one uses with option -d27.
+ * NOTE: A comment of LZMA SDK source code says this dictionary
+ * range is from 1 << 12 to 1 << 30. */
+ dicsize = archive_le32dec(buffer+1);
+ switch (dicsize) {
+ case 0x00001000:/* lzma of LZMA SDK option -d12. */
+ case 0x00002000:/* lzma of LZMA SDK option -d13. */
+ case 0x00004000:/* lzma of LZMA SDK option -d14. */
+ case 0x00008000:/* lzma of LZMA SDK option -d15. */
+ case 0x00010000:/* lzma of XZ Utils option -0 and -1.
+ * lzma of LZMA SDK option -d16. */
+ case 0x00020000:/* lzma of LZMA SDK option -d17. */
+ case 0x00040000:/* lzma of LZMA SDK option -d18. */
+ case 0x00080000:/* lzma of XZ Utils option -2.
+ * lzma of LZMA SDK option -d19. */
+ case 0x00100000:/* lzma of XZ Utils option -3.
+ * lzma of LZMA SDK option -d20. */
+ case 0x00200000:/* lzma of XZ Utils option -4.
+ * lzma of LZMA SDK option -d21. */
+ case 0x00400000:/* lzma of XZ Utils option -5.
+ * lzma of LZMA SDK option -d22. */
+ case 0x00800000:/* lzma of XZ Utils option -6.
+ * lzma of LZMA SDK option -d23. */
+ case 0x01000000:/* lzma of XZ Utils option -7.
+ * lzma of LZMA SDK option -d24. */
+ case 0x02000000:/* lzma of XZ Utils option -8.
+ * lzma of LZMA SDK option -d25. */
+ case 0x04000000:/* lzma of XZ Utils option -9.
+ * lzma of LZMA SDK option -d26. */
+ case 0x08000000:/* lzma of LZMA SDK option -d27. */
+ bits_checked += 32;
+ break;
+ default:
+ /* If a memory usage for encoding was not enough on
+ * the platform where LZMA stream was made, lzma of
+ * XZ Utils automatically decreased the dictionary
+ * size to enough memory for encoding by 1Mi bytes
+ * (1 << 20).*/
+ if (dicsize <= 0x03F00000 && dicsize >= 0x00300000 &&
+ (dicsize & ((1 << 20)-1)) == 0 &&
+ bits_checked == 8 + 64) {
+ bits_checked += 32;
+ break;
+ }
+ /* Otherwise dictionary size is unlikely. But it is
+ * possible that someone makes lzma stream with
+ * liblzma/LZMA SDK in one's dictionary size. */
return (0);
- bits_checked += 8;
-#endif
+ break;
+ }
/* TODO: The above test is still very weak. It would be
* good to do better. */
test_compat_bzip2.c
test_compat_gtar.c
test_compat_gzip.c
+ test_compat_lzma.c
test_compat_solaris_tar_acl.c
test_compat_tar_hardlink.c
test_compat_xz.c
--- /dev/null
+/*-
+ * Copyright (c) 2009 Michihiro NAKAJIMA
+ * Copyright (c) 2003-2008 Tim Kientzle
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "test.h"
+__FBSDID("$FreeBSD$");
+
+/*
+Execute the following to rebuild the data for this program:
+ tail -n +33 test_compat_lzma.c | /bin/sh
+
+# Use lzma command of XZ Utils.
+name=test_compat_lzma_1
+zcmd=lzma
+zsuffix=lzma
+ztar_suffix=tlz
+dir="$name`date +%Y%m%d%H%M%S`.$USER"
+mktarfile()
+{
+mkdir $dir
+echo "f1" > $dir/f1
+echo "f2" > $dir/f2
+echo "f3" > $dir/f3
+mkdir $dir/d1
+echo "f1" > $dir/d1/f1
+echo "f2" > $dir/d1/f2
+echo "f3" > $dir/d1/f3
+(cd $dir; tar cf ../$name.tar f1 f2 f3 d1/f1 d1/f2 d1/f3)
+rm -r $dir
+}
+mktarfile
+$zcmd $name.tar
+mv $name.tar.$zsuffix $name.$ztar_suffix
+echo "This is unrelated junk data at the end of the file" >> $name.$ztar_suffix
+uuencode $name.$ztar_suffix $name.$ztar_suffix > $name.$ztar_suffix.uu
+rm -f $name.$ztar_suffix
+#
+# Use option -e
+#
+name=test_compat_lzma_2
+dir="$name`date +%Y%m%d%H%M%S`.$USER"
+mktarfile
+$zcmd -e $name.tar
+mv $name.tar.$zsuffix $name.$ztar_suffix
+uuencode $name.$ztar_suffix $name.$ztar_suffix > $name.$ztar_suffix.uu
+rm -f $name.$ztar_suffix
+#
+# Use lzma command of LZMA SDK with option -d12.
+#
+name=test_compat_lzma_3
+zcmd=lzmasdk # Change this path to use lzma of LZMA SDK.
+dir="$name`date +%Y%m%d%H%M%S`.$USER"
+mktarfile
+$zcmd e -d12 $name.tar $name.$ztar_suffix
+rm -f $name.tar
+uuencode $name.$ztar_suffix $name.$ztar_suffix > $name.$ztar_suffix.uu
+rm -f $name.$ztar_suffix
+
+exit 0
+*/
+
+/*
+ * Verify our ability to read sample files compatibly with unlzma.
+ *
+ * In particular:
+ * * unlzma will read multiple lzma streams, concatenating the output
+ * * unlzma will read lzma streams which is made by lzma with option -e,
+ * concatenating the output
+ *
+ * Verify our ability to read sample files compatibly with lzma of
+ * LZMA SDK.
+ * * lzma will read lzma streams which is made by lzma with option -d12,
+ * concatenating the output
+ */
+
+/*
+ * All of the sample files have the same contents; they're just
+ * compressed in different ways.
+ */
+static void
+compat_lzma(const char *name)
+{
+ const char *n[7] = { "f1", "f2", "f3", "d1/f1", "d1/f2", "d1/f3", NULL };
+ struct archive_entry *ae;
+ struct archive *a;
+ int i, r;
+
+ assert((a = archive_read_new()) != NULL);
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_support_compression_all(a));
+ r = archive_read_support_compression_lzma(a);
+ if (r == ARCHIVE_WARN) {
+ skipping("lzma reading not fully supported on this platform");
+ assertEqualInt(ARCHIVE_OK, archive_read_finish(a));
+ return;
+ }
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
+ extract_reference_file(name);
+ assertEqualIntA(a, ARCHIVE_OK, archive_read_open_filename(a, name, 2));
+
+ /* Read entries, match up names with list above. */
+ for (i = 0; i < 6; ++i) {
+ failure("Could not read file %d (%s) from %s", i, n[i], name);
+ assertEqualIntA(a, ARCHIVE_OK,
+ archive_read_next_header(a, &ae));
+ assertEqualString(n[i], archive_entry_pathname(ae));
+ }
+
+ /* Verify the end-of-archive. */
+ assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
+
+ /* Verify that the format detection worked. */
+ assertEqualInt(archive_compression(a), ARCHIVE_COMPRESSION_LZMA);
+ assertEqualString(archive_compression_name(a), "lzma");
+ assertEqualInt(archive_format(a), ARCHIVE_FORMAT_TAR_USTAR);
+
+ assertEqualInt(ARCHIVE_OK, archive_read_close(a));
+ assertEqualInt(ARCHIVE_OK, archive_read_finish(a));
+}
+
+
+DEFINE_TEST(test_compat_lzma)
+{
+ /* This sample has been added junk datas to its tail. */
+ compat_lzma("test_compat_lzma_1.tlz");
+ /* This sample has been made by lzma with option -e,
+ * the first byte of which is 0x5e.
+ * Not supported in libarchive 2.7.* and earlier */
+ compat_lzma("test_compat_lzma_2.tlz");
+ /* This sample has been made by lzma of LZMA SDK with
+ * option -d12, second byte and third byte of which is
+ * not zero.
+ * Not supported in libarchive 2.7.* and earlier */
+ compat_lzma("test_compat_lzma_3.tlz");
+}
--- /dev/null
+begin 644 test_compat_lzma_1.tlz
+M70``@`#__________P`S##P;IXPT!HUK`DO\DC[V2OB%Z^'=ZT59ANYMTD(/
+M^W;\8!%O7<+P&=#(9W<_!$Z.7/Y<&\(8+E0^,_-\Z"D^P'N0J^4-UH"WMJ<&
+MV-P6=Y[-FY$IFNZ="RF24TO.B7EP[F]BGMJSP[]OZ_P9/#J'T=;7E&&A@J<[
+MA^C'Q*/Y&I)2^T930'MJTK-98U0D9R*-X2^5__6H:+A4:&ES(&ES('5N<F5L
+F871E9"!J=6YK(&1A=&$@870@=&AE(&5N9"!O9B!T:&4@9FEL90H`
+`
+end
--- /dev/null
+begin 644 test_compat_lzma_2.tlz
+M7@``@`#__________P`S##P;IXPT!HUK`DO\DC[V2OB%Z^'<FN`(!=!,)@8W
+M9R(6\QIOTA6SGM20X;2'6#3B&HC%2XOX2?D['5WD"`>`W2"/3R1F1:P:&Q9A
+MGH2JJI9$C?8.=WTE:O<1WA@X>DK-Y#SW;I2!P;NYG^2"-(D9/E(D_0XK_H,\
+95*/V"T#E9ZO][@'R,6E&^A([.##_\M#YU@``
+`
+end
--- /dev/null
+begin 644 test_compat_lzma_3.tlz
+M70`0````'``````````S##P;IXPT!HUK`DO\DC[V2OB%Z^'=ZT59ANYMTD(1
+M$Y^=;\4%U_CXKQ*F$OFZKEQUG)1U8="](V<2K"U1\Z6%H(UNQ[Y3.=D'>_G-
+MCO71X+M*7WH7$D1&E9Y$XHW,(`[X";GGTO+,'&1?F%<@`.$-OV;8P1?*M$A"
+:MA+1XONREMK,1('455L=X1>WC#1YW"('I@``
+`
+end