]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
seekable decompression fixes (#2594)
authorAzat Khuzhin <a.khuzhin@semrush.com>
Wed, 5 May 2021 14:05:41 +0000 (17:05 +0300)
committerGitHub <noreply@github.com>
Wed, 5 May 2021 14:05:41 +0000 (10:05 -0400)
* seekable_format: fix from-file reading (not in-memory)

It tries to check the buffer boundary, but there is no buffer for
from-file reading.

* seekable_decompression: break when ZSTD_seekable_decompress() returns zero

* seekable_decompression_mem: break when ZSTD_seekable_decompress() returns zero

* seekable_format: cap the offset+len up to the last dOffset

This will allow to read the whole file w/o gotting corruption error if
the offset is more then the data left in file, i.e.:

    $ ./seekable_compression seekable_compression.c 8192 | head
    $ zstd -cdq seekable_compression.c.zst | wc -c
    4737

Before this patch:

    $ ./seekable_decompression seekable_compression.c.zst 0 10000000 | wc -c
    ZSTD_seekable_decompress() error : Corrupted block detected
    0

After:

    $ ./seekable_decompression seekable_compression.c.zst 0 10000000 | wc -c
    4737

contrib/seekable_format/examples/seekable_decompression.c
contrib/seekable_format/examples/seekable_decompression_mem.c
contrib/seekable_format/zstdseek_decompress.c

index 7050e0fa5c6417c1179d0de5e4e32730cfbed0b6..e9e2013331e58288559f35c4640ccc68e692fc9f 100644 (file)
@@ -99,6 +99,9 @@ static void decompressFile_orDie(const char* fname, off_t startOffset, off_t end
 
     while (startOffset < endOffset) {
         size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
+        if (!result) {
+            break;
+        }
 
         if (ZSTD_isError(result)) {
             fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
index c36d2221f97e7ce60b894875b3b30c5d24280c3e..e7b1c65059c1881057825c13d940d62c2e2f208c 100644 (file)
@@ -104,6 +104,9 @@ static void decompressFile_orDie(const char* fname, off_t startOffset, off_t end
 
     while (startOffset < endOffset) {
         size_t const result = ZSTD_seekable_decompress(seekable, buffOut, MIN(endOffset - startOffset, buffOutSize), startOffset);
+        if (!result) {
+            break;
+        }
 
         if (ZSTD_isError(result)) {
             fprintf(stderr, "ZSTD_seekable_decompress() error : %s \n",
index b78c6341ad9b137e82cd69d58cef07807bcf3f23..ecf816c172f1770b6dcb62b2c5c2bdaea197a0b1 100644 (file)
@@ -433,6 +433,11 @@ size_t ZSTD_seekable_initAdvanced(ZSTD_seekable* zs, ZSTD_seekable_customFile sr
 
 size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsigned long long offset)
 {
+    unsigned long long const eos = zs->seekTable.entries[zs->seekTable.tableLen].dOffset;
+    if (offset + len > eos) {
+        len = eos - offset;
+    }
+
     U32 targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, offset);
     U32 noOutputProgressCount = 0;
     size_t srcBytesRead = 0;
@@ -449,7 +454,7 @@ size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsign
             zs->in = (ZSTD_inBuffer){zs->inBuff, 0, 0};
             XXH64_reset(&zs->xxhState, 0);
             ZSTD_DCtx_reset(zs->dstream, ZSTD_reset_session_only);
-            if (srcBytesRead > zs->buffWrapper.size) {
+            if (zs->buffWrapper.size && srcBytesRead > zs->buffWrapper.size) {
                 return ERROR(seekableIO);
             }
         }
@@ -502,6 +507,8 @@ size_t ZSTD_seekable_decompress(ZSTD_seekable* zs, void* dst, size_t len, unsign
                 if (zs->decompressedOffset < offset + len) {
                     /* go back to the start and force a reset of the stream */
                     targetFrame = ZSTD_seekable_offsetToFrameIndex(zs, zs->decompressedOffset);
+                    /* in this case it will fail later with corruption_detected, since last block does not have checksum */
+                    assert(targetFrame != zs->seekTable.tableLen);
                 }
                 break;
             }