From: Tim Kientzle Date: Wed, 17 Sep 2008 21:34:29 +0000 (-0400) Subject: LZMA support via GPL lzmadec library. X-Git-Tag: v2.6.0~86 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b1ab1b2681e432cc1f1d9aa5861b5e31fa170ce5;p=thirdparty%2Flibarchive.git LZMA support via GPL lzmadec library. I'd be happier if there were a BSD-licensed lzma library; this cannot be enabled in FreeBSD base system until that's resolved. Submitted by: Miklos Vajna SVN-Revision: 207 --- diff --git a/Makefile.am b/Makefile.am index 9c31fa829..cbdf87a18 100644 --- a/Makefile.am +++ b/Makefile.am @@ -98,6 +98,7 @@ libarchive_la_SOURCES= \ libarchive/archive_read_support_compression_gzip.c \ libarchive/archive_read_support_compression_none.c \ libarchive/archive_read_support_compression_program.c \ + libarchive/archive_read_support_compression_lzma.c \ libarchive/archive_read_support_format_all.c \ libarchive/archive_read_support_format_ar.c \ libarchive/archive_read_support_format_cpio.c \ diff --git a/README b/README index 50a9d628f..9eb467c89 100644 --- a/README +++ b/README @@ -65,6 +65,7 @@ Currently, the library automatically detects and reads the following: * ZIP archives (with uncompressed or "deflate" compressed entries) * GNU and BSD 'ar' archives * 'mtree' format + * lzma compression The library can write: * gzip compression diff --git a/configure.ac b/configure.ac index 7cdba25f6..75cb4d143 100644 --- a/configure.ac +++ b/configure.ac @@ -164,7 +164,7 @@ AC_HEADER_DIRENT AC_HEADER_SYS_WAIT AC_CHECK_HEADERS([bzlib.h errno.h ext2fs/ext2_fs.h fcntl.h grp.h]) AC_CHECK_HEADERS([inttypes.h langinfo.h limits.h linux/fs.h]) -AC_CHECK_HEADERS([locale.h paths.h poll.h pwd.h regex.h stdarg.h]) +AC_CHECK_HEADERS([locale.h lzmadec.h paths.h poll.h pwd.h regex.h stdarg.h]) AC_CHECK_HEADERS([stdint.h stdlib.h string.h sys/acl.h sys/ioctl.h]) AC_CHECK_HEADERS([sys/param.h sys/poll.h sys/select.h sys/time.h sys/utime.h]) AC_CHECK_HEADERS([time.h unistd.h utime.h wchar.h zlib.h]) @@ -172,6 +172,7 @@ AC_CHECK_HEADERS([time.h unistd.h utime.h wchar.h zlib.h]) # Checks for libraries. AC_CHECK_LIB(bz2,BZ2_bzDecompressInit) AC_CHECK_LIB(z,inflate) +AC_CHECK_LIB(lzmadec,lzmadec_decode) # TODO: Give the user the option of using a pre-existing system # libarchive. This will define HAVE_LIBARCHIVE which will cause diff --git a/libarchive/archive.h b/libarchive/archive.h index be423a3ad..8ae1757bd 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -209,6 +209,7 @@ typedef int archive_close_callback(struct archive *, void *_client_data); #define ARCHIVE_COMPRESSION_BZIP2 2 #define ARCHIVE_COMPRESSION_COMPRESS 3 #define ARCHIVE_COMPRESSION_PROGRAM 4 +#define ARCHIVE_COMPRESSION_LZMA 5 /* * Codes returned by archive_format. diff --git a/libarchive/archive_read_private.h b/libarchive/archive_read_private.h index f4d0274b5..af25b043e 100644 --- a/libarchive/archive_read_private.h +++ b/libarchive/archive_read_private.h @@ -86,7 +86,7 @@ struct archive_read { const void **, size_t); ssize_t (*consume)(struct archive_read *, size_t); off_t (*skip)(struct archive_read *, off_t); - } decompressors[4]; + } decompressors[5]; /* Pointer to current decompressor. */ struct decompressor_t *decompressor; diff --git a/libarchive/archive_read_support_compression_all.c b/libarchive/archive_read_support_compression_all.c index da2b246be..bb548732e 100644 --- a/libarchive/archive_read_support_compression_all.c +++ b/libarchive/archive_read_support_compression_all.c @@ -38,6 +38,9 @@ archive_read_support_compression_all(struct archive *a) archive_read_support_compression_compress(a); #if HAVE_ZLIB_H archive_read_support_compression_gzip(a); +#endif +#if HAVE_LZMADEC_H + archive_read_support_compression_lzma(a); #endif return (ARCHIVE_OK); } diff --git a/libarchive/archive_read_support_compression_lzma.c b/libarchive/archive_read_support_compression_lzma.c new file mode 100644 index 000000000..85b886840 --- /dev/null +++ b/libarchive/archive_read_support_compression_lzma.c @@ -0,0 +1,371 @@ +/*- + * Copyright (c) 2008 Miklos Vajna + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" + +__FBSDID("FIXME"); + + +#ifdef HAVE_ERRNO_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#ifdef HAVE_LZMADEC_H +#include +#endif + +#include "archive.h" +#include "archive_private.h" +#include "archive_read_private.h" + +#ifdef HAVE_LZMADEC_H +struct private_data { + lzmadec_stream stream; + unsigned char *uncompressed_buffer; + size_t uncompressed_buffer_size; + unsigned char *read_next; + int64_t total_out; + char eof; /* True = found end of compressed data. */ +}; + +static int finish(struct archive_read *); +static ssize_t read_ahead(struct archive_read *, const void **, size_t); +static ssize_t read_consume(struct archive_read *, size_t); +static int drive_decompressor(struct archive_read *a, struct private_data *); +#endif + +/* These two functions are defined even if we lack the library. See below. */ +static int bid(const void *, size_t); +static int init(struct archive_read *, const void *, size_t); + +int +archive_read_support_compression_lzma(struct archive *_a) +{ + struct archive_read *a = (struct archive_read *)_a; + if (__archive_read_register_compression(a, bid, init) != NULL) + return (ARCHIVE_OK); + return (ARCHIVE_FATAL); +} + +/* + * Test whether we can handle this data. + * + * This logic returns zero if any part of the signature fails. It + * also tries to Do The Right Thing if a very short buffer prevents us + * from verifying as much as we would like. + */ +static int +bid(const void *buff, size_t len) +{ + const unsigned char *buffer; + int bits_checked; + + if (len < 1) + return (0); + + buffer = (const unsigned char *)buff; + bits_checked = 0; + if (buffer[0] != 0x5d) /* Verify first ID byte. */ + return (0); + bits_checked += 8; + + return (bits_checked); +} + + +#ifndef HAVE_LZMADEC_H + +/* + * If we don't have the library on this system, we can't actually do the + * decompression. We can, however, still detect compressed archives + * and emit a useful message. + */ +static int +init(struct archive_read *a, const void *buff, size_t n) +{ + (void)a; /* UNUSED */ + (void)buff; /* UNUSED */ + (void)n; /* UNUSED */ + + archive_set_error(&a->archive, -1, + "This version of libarchive was compiled without lzma support"); + return (ARCHIVE_FATAL); +} + + +#else + +/* + * Setup the callbacks. + */ +static int +init(struct archive_read *a, const void *buff, size_t n) +{ + struct private_data *state; + int ret; + + a->archive.compression_code = ARCHIVE_COMPRESSION_LZMA; + a->archive.compression_name = "lzma"; + + state = (struct private_data *)malloc(sizeof(*state)); + if (state == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate data for %s decompression", + a->archive.compression_name); + return (ARCHIVE_FATAL); + } + memset(state, 0, sizeof(*state)); + + state->uncompressed_buffer_size = 64 * 1024; + state->uncompressed_buffer = (unsigned char *)malloc(state->uncompressed_buffer_size); + state->stream.next_out = state->uncompressed_buffer; + state->read_next = state->uncompressed_buffer; + state->stream.avail_out = state->uncompressed_buffer_size; + + if (state->uncompressed_buffer == NULL) { + archive_set_error(&a->archive, ENOMEM, + "Can't allocate %s decompression buffers", + a->archive.compression_name); + free(state); + return (ARCHIVE_FATAL); + } + + /* + * A bug in lzmadec.h: stream.next_in should be marked 'const' + * but isn't (the library never alters data through the + * next_in pointer, only reads it). The result: this ugly + * cast to remove 'const'. + */ + state->stream.next_in = (uint8_t *)(uintptr_t)(const void *)buff; + state->stream.avail_in = n; + + a->decompressor->read_ahead = read_ahead; + a->decompressor->consume = read_consume; + a->decompressor->skip = NULL; /* not supported */ + a->decompressor->finish = finish; + + /* Initialize compression library. */ + ret = lzmadec_init(&(state->stream)); + if (ret == LZMADEC_OK) { + a->decompressor->data = state; + return (ARCHIVE_OK); + } + + /* Library setup failed: Clean up. */ + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Internal error initializing %s library", + a->archive.compression_name); + free(state->uncompressed_buffer); + free(state); + + /* Override the error message if we know what really went wrong. */ + switch (ret) { + case LZMADEC_HEADER_ERROR: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Internal error initializing compression library: " + "invalid header"); + break; + case LZMADEC_MEM_ERROR: + archive_set_error(&a->archive, ENOMEM, + "Internal error initializing compression library: " + "not enough memory"); + break; + } + + return (ARCHIVE_FATAL); +} + +/* + * Return a block of data from the decompression buffer. Decompress more + * as necessary. + */ +static ssize_t +read_ahead(struct archive_read *a, const void **p, size_t min) +{ + struct private_data *state; + size_t read_avail, was_avail; + int ret; + + state = (struct private_data *)a->decompressor->data; + if (!a->client_reader) { + archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, + "No read callback is registered? " + "This is probably an internal programming error."); + return (ARCHIVE_FATAL); + } + + read_avail = state->stream.next_out - state->read_next; + + if (read_avail + state->stream.avail_out < min) { + memmove(state->uncompressed_buffer, state->read_next, + read_avail); + state->read_next = state->uncompressed_buffer; + state->stream.next_out = state->read_next + read_avail; + state->stream.avail_out + = state->uncompressed_buffer_size - read_avail; + } + + while (read_avail < min && /* Haven't satisfied min. */ + read_avail < state->uncompressed_buffer_size) { /* !full */ + was_avail = read_avail; + if ((ret = drive_decompressor(a, state)) < ARCHIVE_OK) + return (ret); + if (ret == ARCHIVE_EOF) + break; /* Break on EOF even if we haven't met min. */ + read_avail = state->stream.next_out - state->read_next; + if (was_avail == read_avail) /* No progress? */ + break; + } + + *p = state->read_next; + return (read_avail); +} + +/* + * Mark a previously-returned block of data as read. + */ +static ssize_t +read_consume(struct archive_read *a, size_t n) +{ + struct private_data *state; + + state = (struct private_data *)a->decompressor->data; + a->archive.file_position += n; + state->read_next += n; + if (state->read_next > state->stream.next_out) + __archive_errx(1, "Request to consume too many " + "bytes from lzma decompressor"); + return (n); +} + +/* + * Clean up the decompressor. + */ +static int +finish(struct archive_read *a) +{ + struct private_data *state; + int ret; + + state = (struct private_data *)a->decompressor->data; + ret = ARCHIVE_OK; + switch (lzmadec_end(&(state->stream))) { + case LZMADEC_OK: + break; + default: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "Failed to clean up %s compressor", + a->archive.compression_name); + ret = ARCHIVE_FATAL; + } + + free(state->uncompressed_buffer); + free(state); + + a->decompressor->data = NULL; + return (ret); +} + +/* + * Utility function to pull data through decompressor, reading input + * blocks as necessary. + */ +static int +drive_decompressor(struct archive_read *a, struct private_data *state) +{ + ssize_t ret; + int decompressed, total_decompressed; + uint8_t *output; + const void *read_buf; + + if (state->eof) + return (ARCHIVE_EOF); + total_decompressed = 0; + for (;;) { + if (state->stream.avail_in == 0) { + read_buf = state->stream.next_in; + ret = (a->client_reader)(&a->archive, a->client_data, + &read_buf); + state->stream.next_in = (void *)(uintptr_t)read_buf; + if (ret < 0) { + /* + * TODO: Find a better way to handle + * this read failure. + */ + goto fatal; + } + if (ret == 0 && total_decompressed == 0) { + archive_set_error(&a->archive, EIO, + "Premature end of %s compressed data", + a->archive.compression_name); + return (ARCHIVE_FATAL); + } + a->archive.raw_position += ret; + state->stream.avail_in = ret; + } + + { + output = state->stream.next_out; + + /* Decompress some data. */ + ret = lzmadec_decode(&(state->stream), state->stream.avail_in == 0); + decompressed = state->stream.next_out - output; + + /* Accumulate the total bytes of output. */ + state->total_out += decompressed; + total_decompressed += decompressed; + + switch (ret) { + case LZMADEC_OK: /* Decompressor made some progress. */ + if (decompressed > 0) + return (ARCHIVE_OK); + break; + case LZMADEC_STREAM_END: /* Found end of stream. */ + state->eof = 1; + return (ARCHIVE_OK); + default: + /* Any other return value is an error. */ + goto fatal; + } + } + } + return (ARCHIVE_OK); + + /* Return a fatal error. */ +fatal: + archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, + "%s decompression failed", a->archive.compression_name); + return (ARCHIVE_FATAL); +} + +#endif /* HAVE_LZMADEC_H */