]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Update seekable API to simplify IO
authorSean Purcell <me@seanp.xyz>
Tue, 18 Apr 2017 23:47:28 +0000 (16:47 -0700)
committerSean Purcell <me@seanp.xyz>
Tue, 18 Apr 2017 23:48:30 +0000 (16:48 -0700)
contrib/seekable_format/examples/seekable_decompression.c
contrib/seekable_format/zstd_seekable.h
contrib/seekable_format/zstdseek_decompress.c
lib/common/error_private.c
lib/common/zstd_errors.h

index b134b87b6b97c796075fb2ff0576ef9f4ca9b0a6..d18def7cd1871a3c1a50729f55073c735a2339dd 100644 (file)
@@ -17,6 +17,7 @@
 
 #include "zstd_seekable.h"
 
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
 
 static void* malloc_orDie(size_t size)
 {
@@ -85,74 +86,31 @@ static void fseek_orDie(FILE* file, long int offset, int origin) {
 static void decompressFile_orDie(const char* fname, unsigned startOffset, unsigned endOffset)
 {
     FILE* const fin  = fopen_orDie(fname, "rb");
-    size_t const buffInSize = ZSTD_DStreamInSize();
-    void*  const buffIn  = malloc_orDie(buffInSize);
     FILE* const fout = stdout;
     size_t const buffOutSize = ZSTD_DStreamOutSize();  /* Guarantee to successfully flush at least one complete compressed block in all circumstances. */
     void*  const buffOut = malloc_orDie(buffOutSize);
 
-    ZSTD_seekable_DStream* const dstream = ZSTD_seekable_createDStream();
-    if (dstream==NULL) { fprintf(stderr, "ZSTD_seekable_createDStream() error \n"); exit(10); }
+    ZSTD_seekable* const seekable = ZSTD_seekable_create();
+    if (seekable==NULL) { fprintf(stderr, "ZSTD_seekable_create() error \n"); exit(10); }
 
-    {   size_t sizeNeeded = 0;
-        void* buffSeekTable = NULL;
+    size_t const initResult = ZSTD_seekable_initFile(seekable, fin);
+    if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_init() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
 
-        do {
-            sizeNeeded = ZSTD_seekable_loadSeekTable(dstream, buffSeekTable, sizeNeeded);
-            if (!sizeNeeded) break;
+    while (startOffset < endOffset) {
+        size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
 
-            if (ZSTD_isError(sizeNeeded)) {
-                fprintf(stderr, "ZSTD_seekable_loadSeekTable() error : %s \n",
-                        ZSTD_getErrorName(sizeNeeded));
-                exit(11);
-            }
-
-            fseek_orDie(fin, -(long) sizeNeeded, SEEK_END);
-            buffSeekTable = realloc_orDie(buffSeekTable, sizeNeeded);
-            fread_orDie(buffSeekTable, sizeNeeded, fin);
-        } while (sizeNeeded > 0);
-
-        free(buffSeekTable);
-    }
-
-    /* In more complex scenarios, a file may consist of multiple appended frames (ex : pzstd).
-    *  The following example decompresses only the first frame.
-    *  It is compatible with other provided streaming examples */
-    size_t const initResult = ZSTD_seekable_initDStream(dstream, startOffset, endOffset);
-    if (ZSTD_isError(initResult)) { fprintf(stderr, "ZSTD_seekable_initDStream() error : %s \n", ZSTD_getErrorName(initResult)); exit(11); }
-
-    size_t result, read, toRead = 0;
-
-    do {
-        read = fread_orDie(buffIn, toRead, fin);
-        {   ZSTD_inBuffer input = { buffIn, read, 0 };
-            ZSTD_outBuffer output = { buffOut, buffOutSize, 0 };
-            result = ZSTD_seekable_decompressStream(dstream, &output, &input);
-
-            if (ZSTD_isError(result)) {
-                if (ZSTD_getErrorCode(result) == ZSTD_error_needSeek) {
-                    unsigned long long const offset = ZSTD_seekable_getSeekOffset(dstream);
-                    fseek_orDie(fin, offset, SEEK_SET);
-                    ZSTD_seekable_updateOffset(dstream, offset);
-                    toRead = 0;
-                } else {
-                    fprintf(stderr,
-                            "ZSTD_seekable_decompressStream() error : %s \n",
-                            ZSTD_getErrorName(result));
-                    exit(12);
-                }
-            } else {
-                toRead = result;
-            }
-            fwrite_orDie(buffOut, output.pos, fout);
-            if (toRead > buffInSize) toRead = buffInSize;
+        if (ZSTD_isError(result)) {
+            fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
+                    ZSTD_getErrorName(result));
+            exit(12);
         }
-    } while (result > 0);
+        fwrite_orDie(buffOut, result, fout);
+        startOffset += result;
+    }
 
-    ZSTD_seekable_freeDStream(dstream);
+    ZSTD_seekable_free(seekable);
     fclose_orDie(fin);
     fclose_orDie(fout);
-    free(buffIn);
     free(buffOut);
 }
 
index 3ab4f185e7aedabe8f47e24ad68cbc93bbefea13..54b50fa50126a097281e7e1b38a6810226c1e8b1 100644 (file)
@@ -5,6 +5,8 @@
 extern "C" {
 #endif
 
+#include <stdio.h>
+
 static const unsigned ZSTD_seekTableFooterSize = 9;
 
 #define ZSTD_SEEKABLE_MAGICNUMBER 0x8F92EAB1
@@ -14,6 +16,8 @@ static const unsigned ZSTD_seekTableFooterSize = 9;
 /* Limit the maximum size to avoid any potential issues storing the compressed size */
 #define ZSTD_SEEKABLE_MAX_FRAME_DECOMPRESSED_SIZE 0x80000000U
 
+#define ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE (0ULL-2)
+
 /*-****************************************************************************
 *  Seekable Format
 *
@@ -24,7 +28,7 @@ static const unsigned ZSTD_seekTableFooterSize = 9;
 ******************************************************************************/
 
 typedef struct ZSTD_seekable_CStream_s ZSTD_seekable_CStream;
-typedef struct ZSTD_seekable_DStream_s ZSTD_seekable_DStream;
+typedef struct ZSTD_seekable_s ZSTD_seekable;
 
 /*-****************************************************************************
 *  Seekable compression - HowTo
@@ -82,55 +86,76 @@ ZSTDLIB_API size_t ZSTD_seekable_endStream(ZSTD_seekable_CStream* zcs, ZSTD_outB
 
 /*-****************************************************************************
 *  Seekable decompression - HowTo
-*  A ZSTD_seekable_DStream object is required to tracking streaming operation.
-*  Use ZSTD_seekable_createDStream() and ZSTD_seekable_freeDStream() to create/
-*  release resources.
-*
-*  Streaming objects are reusable to avoid allocation and deallocation,
-*  to start a new compression operation call ZSTD_seekable_initDStream() on the
-*  compressor.
+*  A ZSTD_seekable object is required to tracking the seekTable.
 *
-*  Use ZSTD_seekable_loadSeekTable() to load the seek table from a file.
-*  `src` should point to a block of data read from the end of the file,
-*  i.e. `src + srcSize` should always be the end of the file.
-*  @return : 0 if the table was loaded successfully, or if `srcSize` was too
-*            small, a size hint for how much data to provide.
-*            An error code may also be returned, checkable with ZSTD_isError()
+*  Call ZSTD_seekable_init* to initialize a ZSTD_seekable object with the
+*  the seek table provided in the input.
+*  There are three modes for ZSTD_seekable_init:
+*    - ZSTD_seekable_initBuff() : An in-memory API.  The data contained in
+*      `src` should be the entire seekable file, including the seek table.
+*      `src` should be kept alive and unmodified until the ZSTD_seekable object
+*      is freed or reset.
+*    - ZSTD_seekable_initFile() : A simplified file API using stdio.  fread and
+*      fseek will be used to access the required data for building the seek
+*      table and doing decompression operations.  `src` should not be closed
+*      or modified until the ZSTD_seekable object is freed or reset.
+*    - ZSTD_seekable_initAdvanced() : A general API allowing the client to
+*      provide its own read and seek callbacks.
+*        + ZSTD_seekable_read() : read exactly `n` bytes into `buffer`.
+*                                 Premature EOF should be treated as an error.
+*        + ZSTD_seekable_seek() : seek the read head to `offset` from `origin`,
+*                                 where origin is either SEEK_SET (beginning of
+*                                 file), or SEEK_END (end of file).
+*  Both functions should return a non-negative value in case of success, and a
+*  negative value in case of failure.  If implementing using this API and
+*  stdio, be careful with files larger than 4GB and fseek.  All of these
+*  functions return an error code checkable with ZSTD_isError().
 *
-*  Use ZSTD_seekable_initDStream to prepare for a new decompression operation
-*  using the seektable loaded with ZSTD_seekable_loadSeekTable().
-*  Data in the range [rangeStart, rangeEnd) will be decompressed.
+*  Call ZSTD_seekable_decompress to decompress `dstSize` bytes at decompressed
+*  offset `offset`.  ZSTD_seekable_decompress may have to decompress the entire
+*  prefix of the frame before the desired data if it has not already processed
+*  this section. If ZSTD_seekable_decompress is called multiple times for a
+*  consecutive range of data, it will efficiently retain the decompressor object
+*  and avoid redecompressing frame prefixes.  The return value is the number of
+*  bytes decompressed, or an error code checkable with ZSTD_isError().
 *
-*  Call ZSTD_seekable_decompressStream() repetitively to consume input stream.
-*  @return : There are a number of possible return codes for this function
-*           - 0, the decompression operation has completed.
-*           - An error code checkable with ZSTD_isError
-*             + If this error code is ZSTD_error_needSeek, the user should seek
-*               to the file position provided by ZSTD_seekable_getSeekOffset()
-*               and indicate this to the stream with
-*               ZSTD_seekable_updateOffset(), before resuming decompression
-*             + Otherwise, this is a regular decompression error and the input
-*               file is likely corrupted or the API was incorrectly used.
-*           - A size hint, the preferred nb of bytes to provide as input to the
-*             next function call to improve latency.
-*
-*  ZSTD_seekable_getSeekOffset() and ZSTD_seekable_updateOffset() are helper
-*  functions to indicate where the user should seek their file stream to, when
-*  a different position is required to continue decompression.
-*  Note that ZSTD_seekable_updateOffset will error if given an offset other
-*  than the one requested from ZSTD_seekable_getSeekOffset().
+*  The seek table access functions can be used to obtain the data contained
+*  in the seek table.  If frameIndex is larger than the value returned by
+*  ZSTD_seekable_getNumFrames(), they will return error codes checkable with
+*  ZSTD_isError().  Note that since the offset access functions return
+*  unsigned long long instead of size_t, in this case they will instead return
+*  the value ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE.
 ******************************************************************************/
 
 /*===== Seekable decompressor management =====*/
-ZSTDLIB_API ZSTD_seekable_DStream* ZSTD_seekable_createDStream(void);
-ZSTDLIB_API size_t ZSTD_seekable_freeDStream(ZSTD_seekable_DStream* zds);
+ZSTDLIB_API ZSTD_seekable* ZSTD_seekable_create(void);
+ZSTDLIB_API size_t ZSTD_seekable_free(ZSTD_seekable* zs);
 
 /*===== Seekable decompression functions =====*/
-ZSTDLIB_API size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_seekable_initDStream(ZSTD_seekable_DStream* zds, unsigned long long rangeStart, unsigned long long rangeEnd);
-ZSTDLIB_API size_t ZSTD_seekable_decompressStream(ZSTD_seekable_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
-ZSTDLIB_API unsigned long long ZSTD_seekable_getSeekOffset(ZSTD_seekable_DStream* zds);
-ZSTDLIB_API size_t ZSTD_seekable_updateOffset(ZSTD_seekable_DStream* zds, unsigned long long offset);
+ZSTDLIB_API size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize);
+ZSTDLIB_API size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src);
+ZSTDLIB_API size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned long long offset);
+ZSTDLIB_API size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, unsigned frameIndex);
+
+/*===== Seek Table access functions =====*/
+ZSTDLIB_API unsigned ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs);
+ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
+ZSTDLIB_API unsigned long long ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, unsigned frameIndex);
+ZSTDLIB_API size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
+ZSTDLIB_API size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, unsigned frameIndex);
+
+ZSTDLIB_API unsigned ZSTD_seekable_offsetToFrame(ZSTD_seekable* const zs, unsigned long long offset);
+
+/*===== Seekable advanced I/O API =====*/
+typedef int(ZSTD_seekable_read)(void* opaque, void* buffer, size_t n);
+typedef int(ZSTD_seekable_seek)(void* opaque, long long offset, int origin);
+typedef struct {
+    void* opaque;
+    ZSTD_seekable_read* read;
+    ZSTD_seekable_seek* seek;
+} ZSTD_seekable_customFile;
+
+ZSTDLIB_API size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src);
 
 #if defined (__cplusplus)
 }
index 87a140c02776d18a6f023b75505021a49b39ecc4..9bcfea91b7b22a057aad57d6bbc1fc77d6f8503a 100644 (file)
@@ -7,7 +7,54 @@
  * of patent rights can be found in the PATENTS file in the same directory.
  */
 
+/* *********************************************************
+*  Turn on Large Files support (>4GB) for 32-bit Linux/Unix
+***********************************************************/
+#if !defined(__64BIT__) || defined(__MINGW32__)       /* No point defining Large file for 64 bit but MinGW-w64 requires it */
+#  if !defined(_FILE_OFFSET_BITS)
+#    define _FILE_OFFSET_BITS 64                      /* turn off_t into a 64-bit type for ftello, fseeko */
+#  endif
+#  if !defined(_LARGEFILE_SOURCE)                     /* obsolete macro, replaced with _FILE_OFFSET_BITS */
+#    define _LARGEFILE_SOURCE 1                       /* Large File Support extension (LFS) - fseeko, ftello */
+#  endif
+#  if defined(_AIX) || defined(__hpux)
+#    define _LARGE_FILES                              /* Large file support on 32-bits AIX and HP-UX */
+#  endif
+#endif
+
+/* ************************************************************
+* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW
+***************************************************************/
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+#   define LONG_SEEK _fseeki64
+#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
+#  define LONG_SEEK fseeko
+#elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
+#   define LONG_SEEK fseeko64
+#elif defined(_WIN32) && !defined(__DJGPP__)
+#   include <windows.h>
+    static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
+        LARGE_INTEGER off;
+        DWORD method;
+        off.QuadPart = offset;
+        if (origin == SEEK_END)
+            method = FILE_END;
+        else if (origin == SEEK_CUR)
+            method = FILE_CURRENT;
+        else
+            method = FILE_BEGIN;
+
+        if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
+            return 0;
+        else
+            return -1;
+    }
+#else
+#   define LONG_SEEK fseek
+#endif
+
 #include <stdlib.h> /* malloc, free */
+#include <stdio.h>  /* FILE* */
 
 #define XXH_STATIC_LINKING_ONLY
 #define XXH_NAMESPACE ZSTD_
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
 #include "zstd_errors.h"
-#include "mem.h" /* includes zstd.h */
+#include "mem.h"
 #include "zstd_seekable.h"
 
 #undef ERROR
 #define ERROR(name) ((size_t)-ZSTD_error_##name)
 
+#define CHECK_IO(f) { int const errcod = (f); if (errcod < 0) return ERROR(seekableIO); }
+
 #undef MIN
 #undef MAX
 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 
+/* Special-case callbacks for FILE* and in-memory modes, so that we can treat
+ * them the same way as the advanced API */
+static int ZSTD_seekable_read_FILE(void* opaque, void* buffer, size_t n)
+{
+    size_t const result = fread(buffer, 1, n, (FILE*)opaque);
+    if (result != n) {
+        return -1;
+    }
+    return 0;
+}
+
+static int ZSTD_seekable_seek_FILE(void* opaque, S64 offset, int origin)
+{
+    int const ret = LONG_SEEK((FILE*)opaque, offset, origin);
+    if (ret) return ret;
+    return fflush((FILE*)opaque);
+}
+
+typedef struct {
+    const void *ptr;
+    size_t size;
+    size_t pos;
+} buffWrapper_t;
+
+static int ZSTD_seekable_read_buff(void* opaque, void* buffer, size_t n)
+{
+    buffWrapper_t* buff = (buffWrapper_t*) opaque;
+    if (buff->size + n > buff->pos) return -1;
+    memcpy(buffer, (const BYTE*)buff->ptr + buff->pos, n);
+    buff->pos += n;
+    return 0;
+}
+
+static int ZSTD_seekable_seek_buff(void* opaque, S64 offset, int origin)
+{
+    buffWrapper_t* buff = (buffWrapper_t*) opaque;
+    unsigned long long newOffset;
+    switch (origin) {
+    case SEEK_SET:
+        newOffset = offset;
+        break;
+    case SEEK_CUR:
+        newOffset = (unsigned long long)buff->pos + offset;
+        break;
+    case SEEK_END:
+        newOffset = (unsigned long long)buff->size - offset;
+        break;
+    }
+    if (newOffset < 0 || newOffset > buff->size) {
+        return -1;
+    }
+    buff->pos = newOffset;
+    return 0;
+}
+
 typedef struct {
     U64 cOffset;
     U64 dOffset;
@@ -40,95 +144,122 @@ typedef struct {
     int checksumFlag;
 } seekTable_t;
 
-/** ZSTD_seekable_offsetToFrame() :
- *  Performs a binary search to find the last frame with a decompressed offset
- *  <= pos
- *  @return : the frame's index */
-static U32 ZSTD_seekable_offsetToFrame(const seekTable_t* table, U64 pos)
-{
-    U32 lo = 0;
-    U32 hi = table->tableLen;
-
-    while (lo + 1 < hi) {
-        U32 const mid = lo + ((hi - lo) >> 1);
-        if (table->entries[mid].dOffset <= pos) {
-            lo = mid;
-        } else {
-            hi = mid;
-        }
-    }
-    return lo;
-}
-
-/* Stream decompressor state machine stages */
-enum ZSTD_seekable_DStream_stage {
-    zsds_init = 0,
-    zsds_seek,
-    zsds_decompress,
-    zsds_done,
-};
+#define SEEKABLE_BUFF_SIZE ZSTD_BLOCKSIZE_ABSOLUTEMAX
 
-struct ZSTD_seekable_DStream_s {
+struct ZSTD_seekable_s {
     ZSTD_DStream* dstream;
     seekTable_t seekTable;
+    ZSTD_seekable_customFile src;
 
-    U32 curFrame;
-    U64 compressedOffset;
     U64 decompressedOffset;
+    U32 curFrame;
 
-    U64 targetStart;
-    U64 targetEnd;
-
-    U64 nextSeek;
-
-    enum ZSTD_seekable_DStream_stage stage;
+    BYTE inBuff[SEEKABLE_BUFF_SIZE]; /* need to do our own input buffering */
+    BYTE outBuff[SEEKABLE_BUFF_SIZE]; /* so we can efficiently decompress the
+                                         starts of chunks before we get to the
+                                         desired section */
+    ZSTD_inBuffer in; /* maintain continuity across ZSTD_seekable_decompress operations */
+    buffWrapper_t buffWrapper; /* for `src.opaque` in in-memory mode */
 
     XXH64_state_t xxhState;
 };
 
-ZSTD_seekable_DStream* ZSTD_seekable_createDStream(void)
+ZSTD_seekable* ZSTD_seekable_create(void)
 {
-    ZSTD_seekable_DStream* zds = malloc(sizeof(ZSTD_seekable_DStream));
+    ZSTD_seekable* zs = malloc(sizeof(ZSTD_seekable));
 
-    if (zds == NULL) return NULL;
+    if (zs == NULL) return NULL;
 
     /* also initializes stage to zsds_init */
-    memset(zds, 0, sizeof(*zds));
+    memset(zs, 0, sizeof(*zs));
 
-    zds->dstream = ZSTD_createDStream();
-    if (zds->dstream == NULL) {
-        free(zds);
+    zs->dstream = ZSTD_createDStream();
+    if (zs->dstream == NULL) {
+        free(zs);
         return NULL;
     }
 
-    return zds;
+    return zs;
 }
 
-size_t ZSTD_seekable_freeDStream(ZSTD_seekable_DStream* zds)
+size_t ZSTD_seekable_free(ZSTD_seekable* zs)
 {
-    if (zds == NULL) return 0; /* support free on null */
-    ZSTD_freeDStream(zds->dstream);
-    free(zds->seekTable.entries);
-    free(zds);
+    if (zs == NULL) return 0; /* support free on null */
+    ZSTD_freeDStream(zs->dstream);
+    free(zs->seekTable.entries);
+    free(zs);
 
     return 0;
 }
 
-size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src, size_t srcSize)
+/** ZSTD_seekable_offsetToFrame() :
+ *  Performs a binary search to find the last frame with a decompressed offset
+ *  <= pos
+ *  @return : the frame's index */
+U32 ZSTD_seekable_offsetToFrame(ZSTD_seekable* const zs, U64 pos)
 {
-    const BYTE* ip = (const BYTE*)src + srcSize;
+    U32 lo = 0;
+    U32 hi = zs->seekTable.tableLen;
 
-    int checksumFlag;
+    if (pos >= zs->seekTable.entries[zs->seekTable.tableLen].dOffset) {
+        return zs->seekTable.tableLen;
+    }
 
-    /* footer is fixed size */
-    if (srcSize < ZSTD_seekTableFooterSize)
-        return ZSTD_seekTableFooterSize;
+    while (lo + 1 < hi) {
+        U32 const mid = lo + ((hi - lo) >> 1);
+        if (zs->seekTable.entries[mid].dOffset <= pos) {
+            lo = mid;
+        } else {
+            hi = mid;
+        }
+    }
+    return lo;
+}
 
-    if (MEM_readLE32(ip - 4) != ZSTD_SEEKABLE_MAGICNUMBER) {
+U32 ZSTD_seekable_getNumFrames(ZSTD_seekable* const zs)
+{
+    return zs->seekTable.tableLen;
+}
+
+U64 ZSTD_seekable_getFrameCompressedOffset(ZSTD_seekable* const zs, U32 frameIndex)
+{
+    if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
+    return zs->seekTable.entries[frameIndex].cOffset;
+}
+
+U64 ZSTD_seekable_getFrameDecompressedOffset(ZSTD_seekable* const zs, U32 frameIndex)
+{
+    if (frameIndex >= zs->seekTable.tableLen) return ZSTD_SEEKABLE_FRAMEINDEX_TOOLARGE;
+    return zs->seekTable.entries[frameIndex].dOffset;
+}
+
+size_t ZSTD_seekable_getFrameCompressedSize(ZSTD_seekable* const zs, U32 frameIndex)
+{
+    if (frameIndex >= zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
+    return zs->seekTable.entries[frameIndex + 1].cOffset -
+           zs->seekTable.entries[frameIndex].cOffset;
+}
+
+size_t ZSTD_seekable_getFrameDecompressedSize(ZSTD_seekable* const zs, U32 frameIndex)
+{
+    if (frameIndex > zs->seekTable.tableLen) return ERROR(frameIndex_tooLarge);
+    return zs->seekTable.entries[frameIndex + 1].dOffset -
+           zs->seekTable.entries[frameIndex].dOffset;
+}
+
+static size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable* zs)
+{
+    int checksumFlag;
+    ZSTD_seekable_customFile src = zs->src;
+    /* read the footer, fixed size */
+    CHECK_IO(src.seek(src.opaque, -(int)ZSTD_seekTableFooterSize, SEEK_END));
+    CHECK_IO(src.read(src.opaque, zs->inBuff, ZSTD_seekTableFooterSize));
+
+    if (MEM_readLE32(zs->inBuff + 5) != ZSTD_SEEKABLE_MAGICNUMBER) {
         return ERROR(prefix_unknown);
     }
 
-    {   BYTE const sfd = ip[-5];
+    {   BYTE const sfd = zs->inBuff[4];
         checksumFlag = sfd >> 7;
 
         /* check reserved bits */
@@ -137,30 +268,36 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
         }
     }
 
-    {   U32 const numFrames = MEM_readLE32(ip-9);
+    {   U32 const numFrames = MEM_readLE32(zs->inBuff);
         U32 const sizePerEntry = 8 + (checksumFlag?4:0);
         U32 const tableSize = sizePerEntry * numFrames;
         U32 const frameSize = tableSize + ZSTD_seekTableFooterSize + ZSTD_skippableHeaderSize;
 
-        const BYTE* base = ip - frameSize;
+        U32 remaining = frameSize - ZSTD_seekTableFooterSize; /* don't need to re-read footer */
+        {
+            U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
 
-        if (srcSize < frameSize) return frameSize;
+            CHECK_IO(src.seek(src.opaque, -(S64)frameSize, SEEK_END));
+            CHECK_IO(src.read(src.opaque, zs->inBuff, toRead));
+
+            remaining -= toRead;
+        }
 
-        if (MEM_readLE32(base) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) {
+        if (MEM_readLE32(zs->inBuff) != (ZSTD_MAGIC_SKIPPABLE_START | 0xE)) {
             return ERROR(prefix_unknown);
         }
-        if (MEM_readLE32(base+4) + ZSTD_skippableHeaderSize != frameSize) {
+        if (MEM_readLE32(zs->inBuff+4) + ZSTD_skippableHeaderSize != frameSize) {
             return ERROR(prefix_unknown);
         }
 
         {   /* Allocate an extra entry at the end so that we can do size
              * computations on the last element without special case */
-            seekEntry_t* entries =
-                    (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
-            const BYTE* tableBase = base + ZSTD_skippableHeaderSize;
+            seekEntry_t* entries = (seekEntry_t*)malloc(sizeof(seekEntry_t) * (numFrames + 1));
+            const BYTE* tableBase = zs->inBuff + ZSTD_skippableHeaderSize;
+
+            U32 idx = 0;
+            U32 pos = 8;
 
-            U32 idx;
-            size_t pos;
 
             U64 cOffset = 0;
             U64 dOffset = 0;
@@ -171,202 +308,153 @@ size_t ZSTD_seekable_loadSeekTable(ZSTD_seekable_DStream* zds, const void* src,
             }
 
             /* compute cumulative positions */
-            for (idx = 0, pos = 0; idx < numFrames; idx++) {
+            for (; idx < numFrames; idx++) {
+                if (pos + sizePerEntry > SEEKABLE_BUFF_SIZE) {
+                    U32 const toRead = MIN(remaining, SEEKABLE_BUFF_SIZE);
+                    U32 const offset = SEEKABLE_BUFF_SIZE - pos;
+                    memmove(zs->inBuff, zs->inBuff + pos, offset); /* move any data we haven't read yet */
+                    CHECK_IO(src.read(src.opaque, zs->inBuff+offset, toRead));
+                    remaining -= toRead;
+                    pos = 0;
+                }
                 entries[idx].cOffset = cOffset;
                 entries[idx].dOffset = dOffset;
 
-                cOffset += MEM_readLE32(tableBase + pos);
+                cOffset += MEM_readLE32(zs->inBuff + pos);
                 pos += 4;
-                dOffset += MEM_readLE32(tableBase + pos);
+                dOffset += MEM_readLE32(zs->inBuff + pos);
                 pos += 4;
                 if (checksumFlag) {
-                    entries[idx].checksum = MEM_readLE32(tableBase + pos);
+                    entries[idx].checksum = MEM_readLE32(zs->inBuff + pos);
                     pos += 4;
                 }
             }
             entries[numFrames].cOffset = cOffset;
             entries[numFrames].dOffset = dOffset;
 
-            zds->seekTable.entries = entries;
-            zds->seekTable.tableLen = numFrames;
-            zds->seekTable.checksumFlag = checksumFlag;
+            zs->seekTable.entries = entries;
+            zs->seekTable.tableLen = numFrames;
+            zs->seekTable.checksumFlag = checksumFlag;
             return 0;
         }
     }
 }
 
-size_t ZSTD_seekable_initDStream(ZSTD_seekable_DStream* zds, U64 rangeStart, U64 rangeEnd)
+size_t ZSTD_seekable_initBuff(ZSTD_seekable* zs, const void* src, size_t srcSize)
 {
-    /* restrict range to the end of the file, of non-negative size */
-    rangeEnd = MIN(rangeEnd, zds->seekTable.entries[zds->seekTable.tableLen].dOffset);
-    rangeStart = MIN(rangeStart, rangeEnd);
-
-    zds->targetStart = rangeStart;
-    zds->targetEnd = rangeEnd;
-    zds->stage = zsds_seek;
-
-    /* force a seek first */
-    zds->curFrame = (U32)-1;
-    zds->compressedOffset = (U64)-1;
-    zds->decompressedOffset = (U64)-1;
-
-    if (zds->seekTable.checksumFlag) {
-        XXH64_reset(&zds->xxhState, 0);
-    }
-
-    if (rangeStart == rangeEnd) zds->stage = zsds_done;
-
-    {   const size_t ret = ZSTD_initDStream(zds->dstream);
-        if (ZSTD_isError(ret)) return ret; }
-    return 0;
+    zs->buffWrapper = (buffWrapper_t){src, srcSize, 0};
+    {   ZSTD_seekable_customFile srcFile = {&zs->buffWrapper,
+                                            &ZSTD_seekable_read_buff,
+                                            &ZSTD_seekable_seek_buff};
+        return ZSTD_seekable_initAdvanced(zs, srcFile); }
 }
 
-U64 ZSTD_seekable_getSeekOffset(ZSTD_seekable_DStream* zds)
+size_t ZSTD_seekable_initFile(ZSTD_seekable* zs, FILE* src)
 {
-    return zds->nextSeek;
+    ZSTD_seekable_customFile srcFile = {src, &ZSTD_seekable_read_FILE,
+                                        &ZSTD_seekable_seek_FILE};
+    return ZSTD_seekable_initAdvanced(zs, srcFile);
 }
 
-size_t ZSTD_seekable_updateOffset(ZSTD_seekable_DStream* zds, U64 offset)
+size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile src)
 {
-    if (zds->stage != zsds_seek) {
-        return ERROR(stage_wrong);
-    }
-    if (offset != zds->nextSeek) {
-        return ERROR(needSeek);
-    }
+    zs->src = src;
 
-    zds->stage = zsds_decompress;
-    zds->compressedOffset = offset;
+    {   const size_t seekTableInit = ZSTD_seekable_loadSeekTable(zs);
+        if (ZSTD_isError(seekTableInit)) return seekTableInit; }
+
+    zs->decompressedOffset = (U64)-1;
+    zs->curFrame = (U32)-1;
+
+    {   const size_t dstreamInit = ZSTD_initDStream(zs->dstream);
+        if (ZSTD_isError(dstreamInit)) return dstreamInit; }
     return 0;
 }
 
-size_t ZSTD_seekable_decompressStream(ZSTD_seekable_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
+size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, U64 offset)
 {
-    const seekTable_t* const jt = &zds->seekTable;
-    while (1) {
-        switch (zds->stage) {
-        case zsds_init:
-            return ERROR(init_missing); /* ZSTD_seekable_initDStream should be called first */
-        case zsds_decompress: {
-            BYTE* const outBase = (BYTE*)output->dst + output->pos;
-            size_t const outLen = output->size - output->pos;
-            while (zds->decompressedOffset < zds->targetStart) {
-                U64 const toDecompress =
-                        zds->targetStart - zds->decompressedOffset;
-                size_t const prevInputPos = input->pos;
-
-                ZSTD_outBuffer outTmp = {
-                        outBase, (size_t)MIN((U64)outLen, toDecompress), 0};
-
-                size_t const ret =
-                        ZSTD_decompressStream(zds->dstream, &outTmp, input);
-
-                if (ZSTD_isError(ret)) return ret;
-                if (ret == 0) {
-                    /* should not happen at this stage */
-                    return ERROR(corruption_detected);
-                }
-
-                zds->compressedOffset += input->pos - prevInputPos;
-                zds->decompressedOffset += outTmp.pos;
-
-                if (jt->checksumFlag) {
-                    XXH64_update(&zds->xxhState, outTmp.dst, outTmp.pos);
-                }
+    U32 targetFrame = ZSTD_seekable_offsetToFrame(zs, offset);
+    do {
+        /* check if we can continue from a previous decompress job */
+        if (targetFrame != zs->curFrame || offset != zs->decompressedOffset) {
+            zs->decompressedOffset = zs->seekTable.entries[targetFrame].dOffset;
+            zs->curFrame = targetFrame;
+
+            CHECK_IO(zs->src.seek(zs->src.opaque,
+                                  zs->seekTable.entries[targetFrame].cOffset,
+                                  SEEK_SET));
+            zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
+            XXH64_reset(&zs->xxhState, 0);
+            ZSTD_resetDStream(zs->dstream);
+        }
 
-                if (input->pos == input->size) {
-                    /* need more input */
-                    return MIN(
-                            ZSTD_DStreamInSize(),
-                            (size_t)(jt->entries[zds->curFrame + 1]
-                                             .cOffset -
-                                     zds->compressedOffset));
-                }
+        while (zs->decompressedOffset < offset + len) {
+            size_t toRead;
+            ZSTD_outBuffer outTmp;
+            size_t prevOutPos;
+            if (zs->decompressedOffset < offset) {
+                /* dummy decompressions until we get to the target offset */
+                outTmp = (ZSTD_outBuffer){zs->outBuff, MIN(SEEKABLE_BUFF_SIZE, offset - zs->decompressedOffset), 0};
+            } else {
+                outTmp = (ZSTD_outBuffer){dst, len, zs->decompressedOffset - offset};
             }
 
-            /* do actual decompression */
-            {
-                U64 const toDecompress =
-                        MIN(zds->targetEnd,
-                            jt->entries[zds->curFrame + 1].dOffset) -
-                        zds->decompressedOffset;
-                size_t const prevInputPos = input->pos;
-
-                ZSTD_outBuffer outTmp = {
-                        outBase, (size_t)MIN((U64)outLen, toDecompress), 0};
-
-                size_t const ret =
-                        ZSTD_decompressStream(zds->dstream, &outTmp, input);
-
-                if (ZSTD_isError(ret)) return ret;
-
-                zds->compressedOffset += input->pos - prevInputPos;
-                zds->decompressedOffset += outTmp.pos;
-
-                output->pos += outTmp.pos;
+            prevOutPos = outTmp.pos;
+            toRead = ZSTD_decompressStream(zs->dstream, &outTmp, &zs->in);
+            if (ZSTD_isError(toRead)) {
+                return toRead;
+            }
 
-                if (jt->checksumFlag) {
-                    XXH64_update(&zds->xxhState, outTmp.dst, outTmp.pos);
-                    if (ret == 0) {
-                        /* verify the checksum */
-                        U32 const digest = XXH64_digest(&zds->xxhState) & 0xFFFFFFFFU;
-                        if (digest != jt->entries[zds->curFrame].checksum) {
-                            return ERROR(checksum_wrong);
-                        }
+            if (zs->seekTable.checksumFlag) {
+                XXH64_update(&zs->xxhState, outTmp.dst, outTmp.pos);
+            }
+            zs->decompressedOffset += outTmp.pos - prevOutPos;
 
-                        XXH64_reset(&zds->xxhState, 0);
-                    }
-                }
+            if (toRead == 0) {
+                /* frame complete */
 
-                if (zds->decompressedOffset == zds->targetEnd) {
-                    /* done */
-                    zds->stage = zsds_done;
-                    return 0;
+                /* verify checksum */
+                if (zs->seekTable.checksumFlag &&
+                    (XXH64_digest(&zs->xxhState) & 0xFFFFFFFFU) !=
+                            zs->seekTable.entries[targetFrame].checksum) {
+                    return ERROR(corruption_detected);
                 }
 
-                if (ret == 0) {
-                    /* frame is done */
-                    /* make sure this lines up with the expected frame border */
-                    if (zds->decompressedOffset !=
-                                jt->entries[zds->curFrame + 1].dOffset ||
-                        zds->compressedOffset !=
-                                jt->entries[zds->curFrame + 1].cOffset)
-                        return ERROR(corruption_detected);
-                    ZSTD_resetDStream(zds->dstream);
-                    zds->stage = zsds_seek;
-                    break;
+                if (zs->decompressedOffset < offset + len) {
+                    /* go back to the start and force a reset of the stream */
+                    targetFrame = ZSTD_seekable_offsetToFrame(zs, zs->decompressedOffset);
                 }
+                break;
+            }
 
-                /* need more input */
-                return MIN(ZSTD_DStreamInSize(), (size_t)(
-                        jt->entries[zds->curFrame + 1].cOffset -
-                        zds->compressedOffset));
+            /* read in more data if we're done with this buffer */
+            if (zs->in.pos == zs->in.size) {
+                toRead = MIN(toRead, SEEKABLE_BUFF_SIZE);
+                CHECK_IO(zs->src.read(zs->src.opaque, zs->inBuff, toRead));
+                zs->in.size = toRead;
+                zs->in.pos = 0;
             }
         }
-        case zsds_seek: {
-            U32 targetFrame;
-            if (zds->decompressedOffset < zds->targetStart ||
-                    zds->decompressedOffset >= zds->targetEnd) {
-                /* haven't started yet */
-                targetFrame = ZSTD_seekable_offsetToFrame(jt, zds->targetStart);
-            } else {
-                targetFrame = ZSTD_seekable_offsetToFrame(jt, zds->decompressedOffset);
-            }
+    } while (zs->decompressedOffset != offset + len);
 
-            zds->curFrame = targetFrame;
+    return len;
+}
 
-            if (zds->compressedOffset == jt->entries[targetFrame].cOffset) {
-                zds->stage = zsds_decompress;
-                break;
-            }
+size_t ZSTD_seekable_decompressFrame(ZSTD_seekable* zs, void* dst, size_t dstSize, U32 frameIndex)
+{
+    if (frameIndex >= zs->seekTable.tableLen) {
+        return ERROR(frameIndex_tooLarge);
+    }
 
-            zds->nextSeek = jt->entries[targetFrame].cOffset;
-            zds->decompressedOffset = jt->entries[targetFrame].dOffset;
-            /* signal to user that a seek is required */
-            return ERROR(needSeek);
-        }
-        case zsds_done:
-            return 0;
+    {
+        size_t const decompressedSize =
+                zs->seekTable.entries[frameIndex + 1].dOffset -
+                zs->seekTable.entries[frameIndex].dOffset;
+        if (dstSize < decompressedSize) {
+            return ERROR(dstSize_tooSmall);
         }
+        return ZSTD_seekable_decompress(
+                zs, dst, zs->seekTable.entries[frameIndex].dOffset,
+                decompressedSize);
     }
 }
index f32c6abdaac8ab3766ef5576ad39ee1a8d9ba7e8..c94ea181c14d910db7e3cdf18aa621496c3dff9c 100644 (file)
@@ -39,7 +39,7 @@ const char* ERR_getErrorString(ERR_enum code)
     case PREFIX(dictionary_wrong): return "Dictionary mismatch";
     case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples";
     case PREFIX(frameIndex_tooLarge): return "Frame index is too large";
-    case PREFIX(needSeek): return "Wrong file position, a seek is required to continue";
+    case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking";
     case PREFIX(maxCode):
     default: return notErrorCode;
     }
index d11c1ba2160958f4b1d0765febd720a9e347590a..de0fc8984fd10a50e736d0d0eeea6987a5192982 100644 (file)
@@ -59,7 +59,7 @@ typedef enum {
   ZSTD_error_dictionary_wrong,
   ZSTD_error_dictionaryCreation_failed,
   ZSTD_error_frameIndex_tooLarge,
-  ZSTD_error_needSeek,
+  ZSTD_error_seekableIO,
   ZSTD_error_maxCode
 } ZSTD_ErrorCode;