From 26838cf5c17642f57192753cc5c3880b16b65ba3 Mon Sep 17 00:00:00 2001 From: Sean Purcell Date: Wed, 19 Apr 2017 15:15:09 -0700 Subject: [PATCH] Add Zstandard read support --- CMakeLists.txt | 23 ++ Makefile.am | 1 + build/cmake/config.h.in | 6 + configure.ac | 8 + contrib/android/Android.mk | 1 + libarchive/CMakeLists.txt | 1 + libarchive/archive.h | 3 + libarchive/archive_read_append_filter.c | 4 + libarchive/archive_read_filter.3 | 4 + libarchive/archive_read_support_filter_all.c | 2 + libarchive/archive_read_support_filter_zstd.c | 289 ++++++++++++++++++ 11 files changed, 342 insertions(+) create mode 100644 libarchive/archive_read_support_filter_zstd.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c818b083..b53f835a0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -533,6 +533,29 @@ IF(LZ4_FOUND) ENDIF(LZ4_FOUND) MARK_AS_ADVANCED(CLEAR LZ4_INCLUDE_DIR) MARK_AS_ADVANCED(CLEAR LZ4_LIBRARY) +# +# Find Zstd +# +IF (ZSTD_INCLUDE_DIR) + # Already in cache, be silent + SET(ZSTD_FIND_QUIETLY TRUE) +ENDIF (ZSTD_INCLUDE_DIR) + +FIND_PATH(ZSTD_INCLUDE_DIR zstd.h) +FIND_LIBRARY(ZSTD_LIBRARY NAMES zstd libzstd) +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(ZSTD DEFAULT_MSG ZSTD_LIBRARY ZSTD_INCLUDE_DIR) +IF(ZSTD_FOUND) + SET(HAVE_LIBZSTD 1) + SET(HAVE_ZSTD_H 1) + INCLUDE_DIRECTORIES(${ZSTD_INCLUDE_DIR}) + LIST(APPEND ADDITIONAL_LIBS ${ZSTD_LIBRARY}) + # + # TODO: test for static library. + # +ENDIF(ZSTD_FOUND) +MARK_AS_ADVANCED(CLEAR ZSTD_INCLUDE_DIR) +MARK_AS_ADVANCED(CLEAR ZSTD_LIBRARY) # # Check headers diff --git a/Makefile.am b/Makefile.am index 2469b4ed7..ff613d934 100644 --- a/Makefile.am +++ b/Makefile.am @@ -161,6 +161,7 @@ libarchive_la_SOURCES= \ libarchive/archive_read_support_filter_gzip.c \ libarchive/archive_read_support_filter_lrzip.c \ libarchive/archive_read_support_filter_lz4.c \ + libarchive/archive_read_support_filter_zstd.c \ libarchive/archive_read_support_filter_lzop.c \ libarchive/archive_read_support_filter_none.c \ libarchive/archive_read_support_filter_program.c \ diff --git a/build/cmake/config.h.in b/build/cmake/config.h.in index e646213d4..fd5e5ec55 100644 --- a/build/cmake/config.h.in +++ b/build/cmake/config.h.in @@ -695,6 +695,9 @@ typedef uint64_t uintmax_t; /* Define to 1 if you have the `lz4' library (-llz4). */ #cmakedefine HAVE_LIBLZ4 1 +/* Define to 1 if you have the `zstd' library (-lzstd). */ +#cmakedefine HAVE_LIBZSTD 1 + /* Define to 1 if you have the `lzma' library (-llzma). */ #cmakedefine HAVE_LIBLZMA 1 @@ -792,6 +795,9 @@ typedef uint64_t uintmax_t; /* Define to 1 if you have the header file. */ #cmakedefine HAVE_LZ4_H 1 +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_ZSTD_H 1 + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_LZMADEC_H 1 diff --git a/configure.ac b/configure.ac index 1730f4513..5009644f7 100644 --- a/configure.ac +++ b/configure.ac @@ -367,6 +367,14 @@ if test "x$with_lz4" != "xno"; then AC_CHECK_LIB(lz4,LZ4_decompress_safe) fi +AC_ARG_WITH([zstd], + AS_HELP_STRING([--without-zstd], [Don't build support for zstd through libzstd])) + +if test "x$with_zstd" != "xno"; then + AC_CHECK_HEADERS([zstd.h]) + AC_CHECK_LIB(zstd,ZSTD_decompress) +fi + AC_ARG_WITH([lzma], AS_HELP_STRING([--without-lzma], [Don't build support for xz through lzma])) diff --git a/contrib/android/Android.mk b/contrib/android/Android.mk index b82beab4a..87c06b0fc 100644 --- a/contrib/android/Android.mk +++ b/contrib/android/Android.mk @@ -68,6 +68,7 @@ libarchive_src_files := libarchive/archive_acl.c \ libarchive/archive_read_support_filter_gzip.c \ libarchive/archive_read_support_filter_lrzip.c \ libarchive/archive_read_support_filter_lz4.c \ + libarchive/archive_read_support_filter_zstd.c \ libarchive/archive_read_support_filter_lzop.c \ libarchive/archive_read_support_filter_none.c \ libarchive/archive_read_support_filter_program.c \ diff --git a/libarchive/CMakeLists.txt b/libarchive/CMakeLists.txt index 5e958da1c..db239240b 100644 --- a/libarchive/CMakeLists.txt +++ b/libarchive/CMakeLists.txt @@ -82,6 +82,7 @@ SET(libarchive_SOURCES archive_read_support_filter_grzip.c archive_read_support_filter_lrzip.c archive_read_support_filter_lz4.c + archive_read_support_filter_zstd.c archive_read_support_filter_lzop.c archive_read_support_filter_none.c archive_read_support_filter_program.c diff --git a/libarchive/archive.h b/libarchive/archive.h index d6913f35f..a6f853361 100644 --- a/libarchive/archive.h +++ b/libarchive/archive.h @@ -276,6 +276,7 @@ typedef const char *archive_passphrase_callback(struct archive *, #define ARCHIVE_FILTER_LZOP 11 #define ARCHIVE_FILTER_GRZIP 12 #define ARCHIVE_FILTER_LZ4 13 +#define ARCHIVE_FILTER_ZSTD 14 #if ARCHIVE_VERSION_NUMBER < 4000000 #define ARCHIVE_COMPRESSION_NONE ARCHIVE_FILTER_NONE @@ -421,6 +422,7 @@ __LA_DECL int archive_read_support_filter_gzip(struct archive *); __LA_DECL int archive_read_support_filter_grzip(struct archive *); __LA_DECL int archive_read_support_filter_lrzip(struct archive *); __LA_DECL int archive_read_support_filter_lz4(struct archive *); +__LA_DECL int archive_read_support_filter_zstd(struct archive *); __LA_DECL int archive_read_support_filter_lzip(struct archive *); __LA_DECL int archive_read_support_filter_lzma(struct archive *); __LA_DECL int archive_read_support_filter_lzop(struct archive *); @@ -770,6 +772,7 @@ __LA_DECL int archive_write_add_filter_grzip(struct archive *); __LA_DECL int archive_write_add_filter_gzip(struct archive *); __LA_DECL int archive_write_add_filter_lrzip(struct archive *); __LA_DECL int archive_write_add_filter_lz4(struct archive *); +__LA_DECL int archive_write_add_filter_zstd(struct archive *); __LA_DECL int archive_write_add_filter_lzip(struct archive *); __LA_DECL int archive_write_add_filter_lzma(struct archive *); __LA_DECL int archive_write_add_filter_lzop(struct archive *); diff --git a/libarchive/archive_read_append_filter.c b/libarchive/archive_read_append_filter.c index 5e4d16307..da7c55b9b 100644 --- a/libarchive/archive_read_append_filter.c +++ b/libarchive/archive_read_append_filter.c @@ -89,6 +89,10 @@ archive_read_append_filter(struct archive *_a, int code) strcpy(str, "lz4"); r1 = archive_read_support_filter_lz4(_a); break; + case ARCHIVE_FILTER_ZSTD: + strcpy(str, "zstd"); + r1 = archive_read_support_filter_zstd(_a); + break; case ARCHIVE_FILTER_LZIP: strcpy(str, "lzip"); r1 = archive_read_support_filter_lzip(_a); diff --git a/libarchive/archive_read_filter.3 b/libarchive/archive_read_filter.3 index 7f020e373..d07e940af 100644 --- a/libarchive/archive_read_filter.3 +++ b/libarchive/archive_read_filter.3 @@ -33,6 +33,7 @@ .Nm archive_read_support_filter_compress , .Nm archive_read_support_filter_gzip , .Nm archive_read_support_filter_lz4 , +.Nm archive_read_support_filter_zstd , .Nm archive_read_support_filter_lzma , .Nm archive_read_support_filter_none , .Nm archive_read_support_filter_rpm , @@ -61,6 +62,8 @@ Streaming Archive Library (libarchive, -larchive) .Ft int .Fn archive_read_support_filter_lz4 "struct archive *" .Ft int +.Fn archive_read_support_filter_zstd "struct archive *" +.Ft int .Fn archive_read_support_filter_lzma "struct archive *" .Ft int .Fn archive_read_support_filter_lzop "struct archive *" @@ -94,6 +97,7 @@ Streaming Archive Library (libarchive, -larchive) .Fn archive_read_support_filter_gzip , .Fn archive_read_support_filter_lrzip , .Fn archive_read_support_filter_lz4 , +.Fn archive_read_support_filter_zstd , .Fn archive_read_support_filter_lzma , .Fn archive_read_support_filter_lzop , .Fn archive_read_support_filter_none , diff --git a/libarchive/archive_read_support_filter_all.c b/libarchive/archive_read_support_filter_all.c index 68c53de41..edb508c1d 100644 --- a/libarchive/archive_read_support_filter_all.c +++ b/libarchive/archive_read_support_filter_all.c @@ -71,6 +71,8 @@ archive_read_support_filter_all(struct archive *a) archive_read_support_filter_grzip(a); /* Lz4 falls back to "lz4 -d" command-line program. */ archive_read_support_filter_lz4(a); + /* Zstd falls back to "zstd -d" command-line program. */ + archive_read_support_filter_zstd(a); /* Note: We always return ARCHIVE_OK here, even if some of the * above return ARCHIVE_WARN. The intent here is to enable diff --git a/libarchive/archive_read_support_filter_zstd.c b/libarchive/archive_read_support_filter_zstd.c new file mode 100644 index 000000000..f9bf2e545 --- /dev/null +++ b/libarchive/archive_read_support_filter_zstd.c @@ -0,0 +1,289 @@ +/*- + * Copyright (c) 2009-2011 Sean Purcell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "archive_platform.h" + +__FBSDID("$FreeBSD$"); + +#ifdef HAVE_ERRNO_H +#include +#endif + +#ifdef HAVE_ERRNO_H +#include +#endif +#include +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_STRING_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif +#if HAVE_ZSTD_H +#include +#endif + +#include "archive.h" +#include "archive_endian.h" +#include "archive_private.h" +#include "archive_read_private.h" + +#if HAVE_ZSTD_H && HAVE_LIBZSTD + +struct private_data { + ZSTD_DStream *dstream; + unsigned char *out_block; + size_t out_block_size; + int64_t total_out; + char eof; /* True = found end of compressed data. */ + char in_stream; +}; + +/* Zstd Filter. */ +static ssize_t zstd_filter_read(struct archive_read_filter *, const void**); +static int zstd_filter_close(struct archive_read_filter *); +#endif + +/* + * Note that we can detect zstd compressed files even if we can't decompress + * them. (In fact, we like detecting them because we can give better error + * messages.) So the bid framework here gets compiled even if no zstd library + * is available. + */ +static int zstd_bidder_bid(struct archive_read_filter_bidder *, + struct archive_read_filter *); +static int zstd_bidder_init(struct archive_read_filter *); + +int +archive_read_support_filter_zstd(struct archive *_a) +{ + struct archive_read *a = (struct archive_read *)_a; + struct archive_read_filter_bidder *bidder; + + archive_check_magic(_a, ARCHIVE_READ_MAGIC, + ARCHIVE_STATE_NEW, "archive_read_support_filter_gzip"); + + if (__archive_read_get_bidder(a, &bidder) != ARCHIVE_OK) + return (ARCHIVE_FATAL); + + bidder->data = NULL; + bidder->name = "zstd"; + bidder->bid = zstd_bidder_bid; + bidder->init = zstd_bidder_init; + bidder->options = NULL; + bidder->free = NULL; +#if HAVE_ZSTD_H && HAVE_LIBZSTD + return (ARCHIVE_OK); +#else + archive_set_error(_a, ARCHIVE_ERRNO_MISC, + "Using external zstd program for zstd decompression"); + return (ARCHIVE_WARN); +#endif +} + +/* + * Test whether we can handle this data. + */ +static int +zstd_bidder_bid(struct archive_read_filter_bidder *self, + struct archive_read_filter *filter) +{ + const unsigned char *buffer; + ssize_t avail; + unsigned prefix; + + /* Zstd frame magic values */ + const unsigned zstd_magic = 0xFD2FB528U; + const unsigned zstd_skippable_magic = 0x184D2A50U; + + (void) self; /* UNUSED */ + + buffer = __archive_read_filter_ahead(filter, 4, &avail); + if (buffer == NULL) + return (0); + + prefix = archive_le32dec(buffer); + if (prefix == zstd_magic || + (prefix & 0xFFFFFFFFU) == zstd_skippable_magic) + return (32); + + return (0); +} + +#if !(HAVE_ZSTD_H && HAVE_LIBZSTD) + +/* + * If we don't have the library on this system, we can't do the + * decompression directly. We can, however, try to run "zstd -d" + * in case that's available. + */ +static int +zstd_bidder_init(struct archive_read_filter *self) +{ + int r; + + r = __archive_read_program(self, "zstd -d -qq"); + /* Note: We set the format here even if __archive_read_program() + * above fails. We do, after all, know what the format is + * even if we weren't able to read it. */ + self->code = ARCHIVE_FILTER_ZSTD; + self->name = "zstd"; + return (r); +} + +#else + +/* + * Initialize the filter object + */ +static int +zstd_bidder_init(struct archive_read_filter *self) +{ + struct private_data *state; + const size_t out_block_size = ZSTD_DStreamOutSize(); + void *out_block; + + self->code = ARCHIVE_FILTER_ZSTD; + self->name = "zstd"; + + state = (struct private_data *)calloc(sizeof(*state), 1); + out_block = (unsigned char *)malloc(out_block_size); + if (state == NULL || out_block == NULL) { + free(out_block); + free(state); + archive_set_error(&self->archive->archive, ENOMEM, + "Can't allocate data for zstd decompression"); + return (ARCHIVE_FATAL); + } + + self->data = state; + + state->dstream = ZSTD_createDStream(); + + if (!state->dstream) { + free(out_block); + free(state); + archive_set_error(&self->archive->archive, ENOMEM, + "Can't create zstd decompressor object"); + return (ARCHIVE_FATAL); + } + + state->out_block_size = out_block_size; + state->out_block = out_block; + self->read = zstd_filter_read; + self->skip = NULL; /* not supported */ + self->close = zstd_filter_close; + + state->in_stream = 0; /* We're not actually within a stream yet. */ + + return (ARCHIVE_OK); +} + +static ssize_t +zstd_filter_read(struct archive_read_filter *self, const void **p) +{ + struct private_data *state; + size_t decompressed; + ssize_t avail_in; + size_t ret; + ZSTD_outBuffer out; + ZSTD_inBuffer in; + + state = (struct private_data *)self->data; + + out = (ZSTD_outBuffer) { state->out_block, state->out_block_size, 0 }; + + /* Try to fill the output buffer. */ + while (out.pos < out.size && !state->eof) { + if (!state->in_stream) { + if (ZSTD_isError(ZSTD_initDStream(state->dstream))) { + free(state->out_block); + free(state); + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "Error initializing zstd decompressor"); + return (ARCHIVE_FATAL); + } + state->in_stream = 1; + } + in.src = __archive_read_filter_ahead(self->upstream, 1, + &avail_in); + if (in.src == NULL && avail_in <= 0) { + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "truncated zstd input"); + return (ARCHIVE_FATAL); + } + in.size = avail_in; + in.pos = 0; + + ret = ZSTD_decompressStream(state->dstream, &out, &in); + + if (ZSTD_isError(ret)) { + archive_set_error(&self->archive->archive, + ARCHIVE_ERRNO_MISC, + "zstd decompression failed"); + return (ARCHIVE_FATAL); + } + + /* Decompressor made some progress */ + __archive_read_filter_consume(self->upstream, in.pos); + + /* Found end of the frame */ + if (ret == 0) { + state->eof = 1; + } + } + + decompressed = out.pos; + state->total_out += decompressed; + if (decompressed == 0) + *p = NULL; + else + *p = state->out_block; + return (decompressed); +} + +/* + * Clean up the decompressor. + */ +static int +zstd_filter_close(struct archive_read_filter *self) +{ + struct private_data *state; + + state = (struct private_data *)self->data; + + ZSTD_freeDStream(state->dstream); + free(state->out_block); + free(state); + + return (ARCHIVE_OK); +} + +#endif /* HAVE_ZLIB_H && HAVE_LIBZSTD */ -- 2.47.2