From: Timo Sirainen Date: Sun, 24 Nov 2013 21:02:13 +0000 (+0200) Subject: lib-compression: Added support for liblzma (xz) X-Git-Tag: 2.2.9~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=12983e9d3b4ebdfb1e14f197c153304b3af44b59;p=thirdparty%2Fdovecot%2Fcore.git lib-compression: Added support for liblzma (xz) Annoyingly this is mainly copy&pasted [io]stream-bzlib, but I'm not sure if it's worth the effort to try to create common functions for them. --- diff --git a/configure.ac b/configure.ac index 7dcdeb4766..d3c3947ecf 100644 --- a/configure.ac +++ b/configure.ac @@ -179,6 +179,11 @@ AS_HELP_STRING([--with-bzlib], [Build with bzlib compression support]), TEST_WITH(bzlib, $withval), want_bzlib=auto) +AC_ARG_WITH(lzma, +AS_HELP_STRING([--with-lzma], [Build with LZMA compression support]), + TEST_WITH(lzma, $withval), + want_lzma=auto) + AC_ARG_WITH(libcap, AS_HELP_STRING([--with-libcap], [Build with libcap support (Dropping capabilities).]), TEST_WITH(libcap, $withval), @@ -2657,6 +2662,25 @@ if test "$want_bzlib" != "no"; then fi ]) fi + +if test "$want_lzma" != "no"; then + AC_CHECK_HEADER(lzma.h, [ + AC_CHECK_LIB(lzma, lzma_stream_decoder, [ + have_lzma=yes + have_compress_lib=yes + AC_DEFINE(HAVE_LZMA,, Define if you have lzma library) + COMPRESS_LIBS="$COMPRESS_LIBS -llzma" + ], [ + if test "$want_lzma" = "yes"; then + AC_ERROR([Can't build with lzma support: liblzma not found]) + fi + ]) + ], [ + if test "$want_lzma" = "yes"; then + AC_ERROR([Can't build with lzma support: lzma.h not found]) + fi + ]) +fi AC_SUBST(COMPRESS_LIBS) AM_CONDITIONAL(BUILD_ZLIB_PLUGIN, test "$have_compress_lib" = "yes") diff --git a/src/lib-compression/Makefile.am b/src/lib-compression/Makefile.am index a119091eef..c353742aff 100644 --- a/src/lib-compression/Makefile.am +++ b/src/lib-compression/Makefile.am @@ -6,8 +6,10 @@ AM_CPPFLAGS = \ libcompression_la_SOURCES = \ compression.c \ + istream-lzma.c \ istream-zlib.c \ istream-bzlib.c \ + ostream-lzma.c \ ostream-zlib.c \ ostream-bzlib.c libcompression_la_LIBADD = \ diff --git a/src/lib-compression/compression.c b/src/lib-compression/compression.c index bed1e5504f..8fc515145d 100644 --- a/src/lib-compression/compression.c +++ b/src/lib-compression/compression.c @@ -16,6 +16,10 @@ # define i_stream_create_bz2 NULL # define o_stream_create_bz2 NULL #endif +#ifndef HAVE_LZMA +# define i_stream_create_lzma NULL +# define o_stream_create_lzma NULL +#endif static bool is_compressed_zlib(struct istream *input) { @@ -49,6 +53,16 @@ static bool is_compressed_bzlib(struct istream *input) return memcmp(data + 4, "\x31\x41\x59\x26\x53\x59", 6) == 0; } +static bool is_compressed_xz(struct istream *input) +{ + const unsigned char *data; + size_t size; + + if (i_stream_read_data(input, &data, &size, 6 - 1) <= 0) + return FALSE; + return memcmp(data, "\xfd\x37\x7a\x58\x5a", 6) == 0; +} + const struct compression_handler *compression_lookup_handler(const char *name) { unsigned int i; @@ -97,5 +111,7 @@ const struct compression_handler compression_handlers[] = { i_stream_create_bz2, o_stream_create_bz2 }, { "deflate", NULL, NULL, i_stream_create_deflate, o_stream_create_deflate }, + { "xz", ".xz", is_compressed_xz, + i_stream_create_lzma, o_stream_create_lzma }, { NULL, NULL, NULL, NULL, NULL } }; diff --git a/src/lib-compression/istream-lzma.c b/src/lib-compression/istream-lzma.c new file mode 100644 index 0000000000..03b9520b3e --- /dev/null +++ b/src/lib-compression/istream-lzma.c @@ -0,0 +1,342 @@ +/* Copyright (c) 2010-2013 Dovecot authors, see the included COPYING file */ + +#include "lib.h" + +#ifdef HAVE_LZMA + +#include "istream-private.h" +#include "istream-zlib.h" +#include + +#define CHUNK_SIZE (1024*64) + +#define LZMA_MEMORY_LIMIT (1024*1024*80) + +struct lzma_istream { + struct istream_private istream; + + lzma_stream strm; + uoff_t eof_offset, stream_size; + size_t high_pos; + struct stat last_parent_statbuf; + + unsigned int log_errors:1; + unsigned int marked:1; + unsigned int strm_closed:1; +}; + +static void i_stream_lzma_close(struct iostream_private *stream, + bool close_parent) +{ + struct lzma_istream *zstream = (struct lzma_istream *)stream; + + if (!zstream->strm_closed) { + lzma_end(&zstream->strm); + zstream->strm_closed = TRUE; + } + if (close_parent) + i_stream_close(zstream->istream.parent); +} + +static void lzma_read_error(struct lzma_istream *zstream, const char *error) +{ + io_stream_set_error(&zstream->istream.iostream, + "lzma.read(%s): %s at %"PRIuUOFF_T, + i_stream_get_name(&zstream->istream.istream), error, + zstream->istream.abs_start_offset + + zstream->istream.istream.v_offset); + if (zstream->log_errors) + i_error("%s", zstream->istream.iostream.error); +} + +static ssize_t i_stream_lzma_read(struct istream_private *stream) +{ + struct lzma_istream *zstream = (struct lzma_istream *)stream; + const unsigned char *data; + uoff_t high_offset; + size_t size, out_size; + lzma_ret ret; + + high_offset = stream->istream.v_offset + (stream->pos - stream->skip); + if (zstream->eof_offset == high_offset) { + i_assert(zstream->high_pos == 0 || + zstream->high_pos == stream->pos); + stream->istream.eof = TRUE; + return -1; + } + + if (stream->pos < zstream->high_pos) { + /* we're here because we seeked back within the read buffer. */ + ret = zstream->high_pos - stream->pos; + stream->pos = zstream->high_pos; + zstream->high_pos = 0; + + if (zstream->eof_offset != (uoff_t)-1) { + high_offset = stream->istream.v_offset + + (stream->pos - stream->skip); + i_assert(zstream->eof_offset == high_offset); + stream->istream.eof = TRUE; + } + return ret; + } + zstream->high_pos = 0; + + if (stream->pos + CHUNK_SIZE > stream->buffer_size) { + /* try to keep at least CHUNK_SIZE available */ + if (!zstream->marked && stream->skip > 0) { + /* don't try to keep anything cached if we don't + have a seek mark. */ + i_stream_compress(stream); + } + if (stream->max_buffer_size == 0 || + stream->buffer_size < stream->max_buffer_size) + i_stream_grow_buffer(stream, CHUNK_SIZE); + + if (stream->pos == stream->buffer_size) { + if (stream->skip > 0) { + /* lose our buffer cache */ + i_stream_compress(stream); + } + + if (stream->pos == stream->buffer_size) + return -2; /* buffer full */ + } + } + + if (i_stream_read_data(stream->parent, &data, &size, 0) < 0) { + if (stream->parent->stream_errno != 0) { + stream->istream.stream_errno = + stream->parent->stream_errno; + } else { + i_assert(stream->parent->eof); + lzma_read_error(zstream, "unexpected EOF"); + stream->istream.stream_errno = EINVAL; + } + return -1; + } + if (size == 0) { + /* no more input */ + i_assert(!stream->istream.blocking); + return 0; + } + + zstream->strm.next_in = data; + zstream->strm.avail_in = size; + + out_size = stream->buffer_size - stream->pos; + zstream->strm.next_out = stream->w_buffer + stream->pos; + zstream->strm.avail_out = out_size; + ret = lzma_code(&zstream->strm, LZMA_RUN); + + out_size -= zstream->strm.avail_out; + stream->pos += out_size; + + i_stream_skip(stream->parent, size - zstream->strm.avail_in); + + switch (ret) { + case LZMA_OK: + break; + case LZMA_DATA_ERROR: + case LZMA_BUF_ERROR: + lzma_read_error(zstream, "corrupted data"); + stream->istream.stream_errno = EINVAL; + return -1; + case LZMA_FORMAT_ERROR: + lzma_read_error(zstream, "wrong magic in header (not xz file?)"); + stream->istream.stream_errno = EINVAL; + return -1; + case LZMA_OPTIONS_ERROR: + lzma_read_error(zstream, "Unsupported xz options"); + stream->istream.stream_errno = EINVAL; + return -1; + case LZMA_MEM_ERROR: + i_fatal_status(FATAL_OUTOFMEM, "lzma.read(%s): Out of memory", + i_stream_get_name(&stream->istream)); + case LZMA_STREAM_END: + zstream->eof_offset = stream->istream.v_offset + + (stream->pos - stream->skip); + zstream->stream_size = zstream->eof_offset; + if (out_size == 0) { + stream->istream.eof = TRUE; + return -1; + } + break; + default: + lzma_read_error(zstream, t_strdup_printf( + "lzma_code() failed with %d", ret)); + stream->istream.stream_errno = EINVAL; + return -1; + } + if (out_size == 0) { + /* read more input */ + return i_stream_lzma_read(stream); + } + return out_size; +} + +static void i_stream_lzma_init(struct lzma_istream *zstream) +{ + lzma_ret ret; + + ret = lzma_stream_decoder(&zstream->strm, LZMA_MEMORY_LIMIT, + LZMA_CONCATENATED); + switch (ret) { + case LZMA_OK: + break; + case LZMA_MEM_ERROR: + i_fatal_status(FATAL_OUTOFMEM, "lzma: Out of memory"); + default: + i_fatal("lzma_stream_decoder() failed with ret=%d", ret); + } +} + +static void i_stream_lzma_reset(struct lzma_istream *zstream) +{ + struct istream_private *stream = &zstream->istream; + + i_stream_seek(stream->parent, stream->parent_start_offset); + zstream->eof_offset = (uoff_t)-1; + zstream->strm.next_in = NULL; + zstream->strm.avail_in = 0; + + stream->parent_expected_offset = stream->parent_start_offset; + stream->skip = stream->pos = 0; + stream->istream.v_offset = 0; + zstream->high_pos = 0; + + lzma_end(&zstream->strm); + i_stream_lzma_init(zstream); +} + +static void +i_stream_lzma_seek(struct istream_private *stream, uoff_t v_offset, bool mark) +{ + struct lzma_istream *zstream = (struct lzma_istream *) stream; + uoff_t start_offset = stream->istream.v_offset - stream->skip; + + if (v_offset < start_offset) { + /* have to seek backwards */ + i_stream_lzma_reset(zstream); + start_offset = 0; + } else if (zstream->high_pos != 0) { + stream->pos = zstream->high_pos; + zstream->high_pos = 0; + } + + if (v_offset <= start_offset + stream->pos) { + /* seeking backwards within what's already cached */ + stream->skip = v_offset - start_offset; + stream->istream.v_offset = v_offset; + zstream->high_pos = stream->pos; + stream->pos = stream->skip; + } else { + /* read and cache forward */ + do { + size_t avail = stream->pos - stream->skip; + + if (stream->istream.v_offset + avail >= v_offset) { + i_stream_skip(&stream->istream, + v_offset - + stream->istream.v_offset); + break; + } + + i_stream_skip(&stream->istream, avail); + } while (i_stream_read(&stream->istream) >= 0); + + if (stream->istream.v_offset != v_offset) { + /* some failure, we've broken it */ + if (stream->istream.stream_errno != 0) { + i_error("lzma_istream.seek(%s) failed: %s", + i_stream_get_name(&stream->istream), + strerror(stream->istream.stream_errno)); + i_stream_close(&stream->istream); + } else { + /* unexpected EOF. allow it since we may just + want to check if there's anything.. */ + i_assert(stream->istream.eof); + } + } + } + + if (mark) + zstream->marked = TRUE; +} + +static int +i_stream_lzma_stat(struct istream_private *stream, bool exact) +{ + struct lzma_istream *zstream = (struct lzma_istream *) stream; + const struct stat *st; + size_t size; + + if (i_stream_stat(stream->parent, exact, &st) < 0) + return -1; + stream->statbuf = *st; + + /* when exact=FALSE always return the parent stat's size, even if we + know the exact value. this is necessary because otherwise e.g. mbox + code can see two different values and think that a compressed mbox + file keeps changing. */ + if (!exact) + return 0; + + if (zstream->stream_size == (uoff_t)-1) { + uoff_t old_offset = stream->istream.v_offset; + + do { + size = i_stream_get_data_size(&stream->istream); + i_stream_skip(&stream->istream, size); + } while (i_stream_read(&stream->istream) > 0); + + i_stream_seek(&stream->istream, old_offset); + if (zstream->stream_size == (uoff_t)-1) + return -1; + } + stream->statbuf.st_size = zstream->stream_size; + return 0; +} + +static void i_stream_lzma_sync(struct istream_private *stream) +{ + struct lzma_istream *zstream = (struct lzma_istream *) stream; + const struct stat *st; + + if (i_stream_stat(stream->parent, FALSE, &st) < 0) { + if (memcmp(&zstream->last_parent_statbuf, + st, sizeof(*st)) == 0) { + /* a compressed file doesn't change unexpectedly, + don't clear our caches unnecessarily */ + return; + } + zstream->last_parent_statbuf = *st; + } + i_stream_lzma_reset(zstream); +} + +struct istream *i_stream_create_lzma(struct istream *input, bool log_errors) +{ + struct lzma_istream *zstream; + + zstream = i_new(struct lzma_istream, 1); + zstream->eof_offset = (uoff_t)-1; + zstream->stream_size = (uoff_t)-1; + zstream->log_errors = log_errors; + + i_stream_lzma_init(zstream); + + zstream->istream.iostream.close = i_stream_lzma_close; + zstream->istream.max_buffer_size = input->real_stream->max_buffer_size; + zstream->istream.read = i_stream_lzma_read; + zstream->istream.seek = i_stream_lzma_seek; + zstream->istream.stat = i_stream_lzma_stat; + zstream->istream.sync = i_stream_lzma_sync; + + zstream->istream.istream.readable_fd = FALSE; + zstream->istream.istream.blocking = input->blocking; + zstream->istream.istream.seekable = input->seekable; + + return i_stream_create(&zstream->istream, input, + i_stream_get_fd(input)); +} +#endif diff --git a/src/lib-compression/istream-zlib.h b/src/lib-compression/istream-zlib.h index 07497f660e..f81a0e1eb4 100644 --- a/src/lib-compression/istream-zlib.h +++ b/src/lib-compression/istream-zlib.h @@ -4,5 +4,6 @@ struct istream *i_stream_create_gz(struct istream *input, bool log_errors); struct istream *i_stream_create_deflate(struct istream *input, bool log_errors); struct istream *i_stream_create_bz2(struct istream *input, bool log_errors); +struct istream *i_stream_create_lzma(struct istream *input, bool log_errors); #endif diff --git a/src/lib-compression/ostream-lzma.c b/src/lib-compression/ostream-lzma.c new file mode 100644 index 0000000000..918e123c1e --- /dev/null +++ b/src/lib-compression/ostream-lzma.c @@ -0,0 +1,229 @@ +/* Copyright (c) 2010-2013 Dovecot authors, see the included COPYING file */ + +#include "lib.h" + +#ifdef HAVE_LZMA + +#include "ostream-private.h" +#include "ostream-zlib.h" +#include + +#define CHUNK_SIZE (1024*64) + +struct lzma_ostream { + struct ostream_private ostream; + lzma_stream strm; + + unsigned char outbuf[CHUNK_SIZE]; + unsigned int outbuf_offset, outbuf_used; + + unsigned int flushed:1; +}; + +static void o_stream_lzma_close(struct iostream_private *stream, + bool close_parent) +{ + struct lzma_ostream *zstream = (struct lzma_ostream *)stream; + + (void)o_stream_flush(&zstream->ostream.ostream); + lzma_end(&zstream->strm); + if (close_parent) + o_stream_close(zstream->ostream.parent); +} + +static int o_stream_zlib_send_outbuf(struct lzma_ostream *zstream) +{ + ssize_t ret; + size_t size; + + if (zstream->outbuf_used == 0) + return 1; + + size = zstream->outbuf_used - zstream->outbuf_offset; + i_assert(size > 0); + ret = o_stream_send(zstream->ostream.parent, + zstream->outbuf + zstream->outbuf_offset, size); + if (ret < 0) { + o_stream_copy_error_from_parent(&zstream->ostream); + return -1; + } + if ((size_t)ret != size) { + zstream->outbuf_offset += ret; + return 0; + } + zstream->outbuf_offset = 0; + zstream->outbuf_used = 0; + return 1; +} + +static ssize_t +o_stream_lzma_send_chunk(struct lzma_ostream *zstream, + const void *data, size_t size) +{ + lzma_stream *zs = &zstream->strm; + int ret; + + i_assert(zstream->outbuf_used == 0); + + zs->next_in = (void *)data; + zs->avail_in = size; + while (zs->avail_in > 0) { + if (zs->avail_out == 0) { + /* previous block was compressed. send it and start + compression for a new block. */ + zs->next_out = zstream->outbuf; + zs->avail_out = sizeof(zstream->outbuf); + + zstream->outbuf_used = sizeof(zstream->outbuf); + if ((ret = o_stream_zlib_send_outbuf(zstream)) < 0) + return -1; + if (ret == 0) { + /* parent stream's buffer full */ + break; + } + } + + switch (lzma_code(zs, LZMA_RUN)) { + case LZMA_OK: + break; + case LZMA_MEM_ERROR: + i_fatal_status(FATAL_OUTOFMEM, + "lzma.write(%s): Out of memory", + o_stream_get_name(&zstream->ostream.ostream)); + default: + i_unreached(); + } + } + size -= zs->avail_in; + + zstream->flushed = FALSE; + return size; +} + +static int o_stream_lzma_send_flush(struct lzma_ostream *zstream) +{ + lzma_stream *zs = &zstream->strm; + unsigned int len; + bool done = FALSE; + int ret; + + if (zs->avail_in != 0) { + i_assert(zstream->ostream.ostream.last_failed_errno != 0); + zstream->ostream.ostream.stream_errno = + zstream->ostream.ostream.last_failed_errno; + return -1; + } + + if (zstream->flushed) + return 0; + + if ((ret = o_stream_zlib_send_outbuf(zstream)) <= 0) + return ret; + + i_assert(zstream->outbuf_used == 0); + do { + len = sizeof(zstream->outbuf) - zs->avail_out; + if (len != 0) { + zs->next_out = zstream->outbuf; + zs->avail_out = sizeof(zstream->outbuf); + + zstream->outbuf_used = len; + if ((ret = o_stream_zlib_send_outbuf(zstream)) <= 0) + return ret; + if (done) + break; + } + + ret = lzma_code(zs, LZMA_FINISH); + switch (ret) { + case LZMA_STREAM_END: + done = TRUE; + break; + case LZMA_MEM_ERROR: + i_fatal_status(FATAL_OUTOFMEM, + "lzma.write(%s): Out of memory", + o_stream_get_name(&zstream->ostream.ostream)); + default: + i_unreached(); + } + } while (zs->avail_out != sizeof(zstream->outbuf)); + + zstream->flushed = TRUE; + return 0; +} + +static int o_stream_lzma_flush(struct ostream_private *stream) +{ + struct lzma_ostream *zstream = (struct lzma_ostream *)stream; + int ret; + + if (o_stream_lzma_send_flush(zstream) < 0) + return -1; + + ret = o_stream_flush(stream->parent); + if (ret < 0) + o_stream_copy_error_from_parent(stream); + return ret; +} + +static ssize_t +o_stream_lzma_sendv(struct ostream_private *stream, + const struct const_iovec *iov, unsigned int iov_count) +{ + struct lzma_ostream *zstream = (struct lzma_ostream *)stream; + ssize_t ret, bytes = 0; + unsigned int i; + + if ((ret = o_stream_zlib_send_outbuf(zstream)) <= 0) { + /* error / we still couldn't flush existing data to + parent stream. */ + return ret; + } + + for (i = 0; i < iov_count; i++) { + ret = o_stream_lzma_send_chunk(zstream, iov[i].iov_base, + iov[i].iov_len); + if (ret < 0) + return -1; + bytes += ret; + if ((size_t)ret != iov[i].iov_len) + break; + } + stream->ostream.offset += bytes; + + /* avail_in!=0 check is used to detect errors. if it's non-zero here + it simply means we didn't send all the data */ + zstream->strm.avail_in = 0; + return bytes; +} + +struct ostream *o_stream_create_lzma(struct ostream *output, int level) +{ + struct lzma_ostream *zstream; + lzma_ret ret; + + i_assert(level >= 1 && level <= 9); + + zstream = i_new(struct lzma_ostream, 1); + zstream->ostream.sendv = o_stream_lzma_sendv; + zstream->ostream.flush = o_stream_lzma_flush; + zstream->ostream.iostream.close = o_stream_lzma_close; + + ret = lzma_easy_encoder(&zstream->strm, level, LZMA_CHECK_CRC64); + switch (ret) { + case LZMA_OK: + break; + case LZMA_MEM_ERROR: + i_fatal_status(FATAL_OUTOFMEM, "lzma: Out of memory"); + case LZMA_OPTIONS_ERROR: + i_fatal("lzma: Invalid level"); + default: + i_fatal("lzma_easy_encoder() failed with %d", ret); + } + + zstream->strm.next_out = zstream->outbuf; + zstream->strm.avail_out = sizeof(zstream->outbuf); + return o_stream_create(&zstream->ostream, output, + o_stream_get_fd(output)); +} +#endif diff --git a/src/lib-compression/ostream-zlib.h b/src/lib-compression/ostream-zlib.h index 92480cc6ec..8be57fead1 100644 --- a/src/lib-compression/ostream-zlib.h +++ b/src/lib-compression/ostream-zlib.h @@ -4,5 +4,6 @@ struct ostream *o_stream_create_gz(struct ostream *output, int level); struct ostream *o_stream_create_deflate(struct ostream *output, int level); struct ostream *o_stream_create_bz2(struct ostream *output, int level); +struct ostream *o_stream_create_lzma(struct ostream *output, int level); #endif