]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Add a nbSeq argument to compressSequences()
authorsenhuang42 <senhuang96@fb.com>
Mon, 12 Oct 2020 22:33:11 +0000 (18:33 -0400)
committerSen Huang <senhuang96@fb.com>
Tue, 16 Mar 2021 21:04:22 +0000 (14:04 -0700)
Refactor ZSTD_compressBlock_internal() to do the block header write within and add nbSeq argument to compressSequences()

lib/compress/zstd_compress.c
lib/decompress/zstd_decompress.c
lib/decompress/zstd_decompress_block.c

index 2f4d5b4c3de43f89394a40bf49f77b3610dd917f..d19f4a2a6362e33740a1cd45e9c29f78500a754c 100644 (file)
@@ -2196,13 +2196,13 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
     FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
     U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
     const seqDef* const sequences = seqStorePtr->sequencesStart;
+    const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
     const BYTE* const llCodeTable = seqStorePtr->llCode;
     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart;
-    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
     BYTE* seqHead;
     BYTE* lastNCount = NULL;
 
@@ -2472,6 +2472,16 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
 
 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
 
+static U32 countLiteralsBytes2(const seqStore_t* seqStore) {
+    U32 literalsBytes = 0;
+    U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    for (int i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        literalsBytes += seq.litLength;
+    }
+    return literalsBytes;
+}
+
 static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
 {
     ZSTD_matchState_t* const ms = &zc->blockState.matchState;
@@ -2695,12 +2705,194 @@ static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
 
 /* Writes the block header */
 static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
+    DEBUGLOG(3, "writeBlockHeader: cSize: %u blockSize: %u lastBlock: %u", cSize, blockSize, lastBlock);
     U32 const cBlockHeader = cSize == 1 ?
                         lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
                         lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
     MEM_writeLE24(op, cBlockHeader);
 }
 
+static U32 countLiteralsBytes(const seqStore_t* seqStore) {
+    U32 literalsBytes = 0;
+    U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    for (int i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        literalsBytes += seq.litLength;
+        if (i == seqStore->longLengthPos && seqStore->longLengthID == 1) {
+            literalsBytes += 0x10000;
+        }
+    }
+    return literalsBytes;
+}
+
+static U32 countMatchBytes(const seqStore_t* seqStore) {
+    U32 matchBytes = 0;
+    U32 nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    for (int i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        matchBytes += seq.matchLength + MINMATCH;
+        if (i == seqStore->longLengthPos && seqStore->longLengthID == 2) {
+            matchBytes += 0x10000;
+        }
+    }
+    return matchBytes;
+}
+
+/* The issue is with setting the end of the literals. Existence of last literals in the seq store make it so that
+   we have to be careful with where we put our litEnds and whatnot. */
+static void setUpSeqStores(seqStore_t* firstSeqStore, seqStore_t* secondSeqStore, U32 nbSeq, U32 srcSize) {
+    size_t nbSeqFirstHalf = nbSeq/2;
+    size_t nbSeqSecondHalf = (nbSeq % 2 == 0) ? nbSeq/2 : nbSeq/2 + 1;
+    DEBUGLOG(2, "first half nbseq: %u second half nbseq: %u", nbSeqFirstHalf, nbSeqSecondHalf);
+
+    const BYTE* const litEnd = firstSeqStore->lit;
+    const BYTE* const seqEnd = firstSeqStore->sequences;
+
+    if (firstSeqStore->longLengthID != 0) {
+        DEBUGLOG(2, "long lenght ID present");
+        if (firstSeqStore->longLengthPos < nbSeqFirstHalf) {
+            secondSeqStore->longLengthID = 0;
+        } else {
+            firstSeqStore->longLengthID = 0;
+            secondSeqStore->longLengthPos = secondSeqStore->longLengthPos - nbSeqFirstHalf;
+        }
+    }
+
+    firstSeqStore->sequences = firstSeqStore->sequencesStart+nbSeqFirstHalf;
+
+    U32 literalsBytesFirstHalf = countLiteralsBytes(firstSeqStore);
+    firstSeqStore->lit = firstSeqStore->litStart+literalsBytesFirstHalf;
+
+    secondSeqStore->sequencesStart += nbSeqFirstHalf;
+    secondSeqStore->sequences = seqEnd;
+    secondSeqStore->litStart += literalsBytesFirstHalf;
+    secondSeqStore->lit = litEnd;
+    secondSeqStore->llCode += nbSeqFirstHalf;
+    secondSeqStore->mlCode += nbSeqFirstHalf;
+    secondSeqStore->ofCode += nbSeqFirstHalf;
+
+}
+
+static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
+                                        void* dst, size_t dstCapacity,
+                                        const void* src, size_t srcSize, U32 frame, U32 lastBlock, U32 nbSeq) {
+    /* This the upper bound for the length of an rle block.
+     * This isn't the actual upper bound. Finding the real threshold
+     * needs further investigation.
+     */
+    const U32 rleMaxLength = 25;
+    size_t cSize;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    DEBUGLOG(5, "ZSTD_compressBlock_splitBlock (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    /* Attempt block splitting here */
+    DEBUGLOG(3, "Block size pre-split is: %u - lastBlock: %u, dst ptr: %u op: %u", srcSize, lastBlock, dst, op);
+    DEBUGLOG(3, "srcSize: %u seq store size: %u", srcSize, countLiteralsBytes(&zc->seqStore) + countMatchBytes(&zc->seqStore));
+    seqStore_t firstHalfSeqStore = zc->seqStore;
+    seqStore_t secondHalfSeqStore = zc->seqStore;
+
+    setUpSeqStores(&firstHalfSeqStore, &secondHalfSeqStore, nbSeq, srcSize);
+
+    assert((U32)(firstHalfSeqStore.lit - firstHalfSeqStore.litStart) + (U32)(secondHalfSeqStore.lit - secondHalfSeqStore.litStart) == (U32)(zc->seqStore.lit - zc->seqStore.litStart));
+    assert((U32)(firstHalfSeqStore.sequences - firstHalfSeqStore.sequencesStart) + (U32)(secondHalfSeqStore.sequences - secondHalfSeqStore.sequencesStart) == (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart));
+
+    size_t cSizeFirstHalf;
+    size_t cSizeSecondHalf;
+
+    size_t literalsBytesFirstHalf = countLiteralsBytes(&firstHalfSeqStore);
+    size_t srcBytesFirstHalf = literalsBytesFirstHalf + countMatchBytes(&firstHalfSeqStore);
+    size_t srcBytesSecondHalf = srcSize - srcBytesFirstHalf;
+    DEBUGLOG(3, "literals bytes first half: %zu literals bytes second half: %zu, orig: %zu", literalsBytesFirstHalf, countLiteralsBytes(&secondHalfSeqStore), countLiteralsBytes(&zc->seqStore));
+    DEBUGLOG(3, "match bytes first half: %zu match bytes second half: %zu, orig: %zu", countMatchBytes(&firstHalfSeqStore), countMatchBytes(&secondHalfSeqStore), countMatchBytes(&zc->seqStore));
+    DEBUGLOG(2, "Src bytes first half: %zu src bytes second half: %zu", srcBytesFirstHalf, srcBytesSecondHalf);
+
+    {   U32 cSeqsSizeFirstHalf = ZSTD_compressSequences(&firstHalfSeqStore,
+                &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+                &zc->appliedParams,
+                op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
+                srcBytesFirstHalf,
+                zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                zc->bmi2);
+
+        if (!zc->isFirstBlock &&
+            ZSTD_maybeRLE(&firstHalfSeqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize)) {
+            /* We don't want to emit our first block as a RLE even if it qualifies because
+            * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+            * This is only an issue for zstd <= v1.4.3
+            */
+            cSeqsSizeFirstHalf = 1;
+        }
+
+        if (cSeqsSizeFirstHalf == 0) {
+            cSizeFirstHalf = ZSTD_noCompressBlock(op, dstCapacity, ip, srcBytesFirstHalf, 0);
+            FORWARD_IF_ERROR(cSizeFirstHalf, "Nocompress block failed");
+            DEBUGLOG(2, "1: Writing out nocompress block, size: %zu", cSizeFirstHalf);
+        } else if (cSeqsSizeFirstHalf == 1) {
+            cSizeFirstHalf = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcBytesFirstHalf, 0);
+            FORWARD_IF_ERROR(cSizeFirstHalf, "RLE compress block failed");
+            DEBUGLOG(2, "1: Writing out RLE block, size: %zu", cSizeFirstHalf);
+        } else {
+            U32 cBlockHeader;
+            /* Error checking and repcodes update */
+            ZSTD_confirmRepcodesAndEntropyTables(zc);
+            if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+            writeBlockHeader(op, cSeqsSizeFirstHalf, srcBytesFirstHalf, 0);
+            cSizeFirstHalf = ZSTD_blockHeaderSize + cSeqsSizeFirstHalf;
+            DEBUGLOG(3, "1: Writing out compressed block, size: %zu", cSizeFirstHalf);
+        }
+    }
+
+    {   int i;
+        for (i = 0; i < ZSTD_REP_NUM; ++i)
+            zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
+    }
+
+    ip += srcBytesFirstHalf;
+    op += cSizeFirstHalf;
+    dstCapacity - cSizeFirstHalf;
+
+    {   U32 cSeqsSizeSecondHalf = ZSTD_compressSequences(&secondHalfSeqStore,
+                &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+                &zc->appliedParams,
+                op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
+                srcBytesSecondHalf,
+                zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                zc->bmi2);
+        if (ZSTD_maybeRLE(&firstHalfSeqStore) &&
+            ZSTD_isRLE((BYTE const*)src, srcSize)) {
+            cSeqsSizeSecondHalf = 1;
+        }
+
+        if (cSeqsSizeSecondHalf == 0) {
+            cSizeSecondHalf = ZSTD_noCompressBlock(op, dstCapacity, ip, srcBytesSecondHalf, lastBlock);
+            FORWARD_IF_ERROR(cSizeFirstHalf, "Nocompress block failed");
+            DEBUGLOG(2, "2: Writing out nocompress block, size: %zu", cSizeSecondHalf);
+        } else if (cSeqsSizeSecondHalf == 1) {
+            cSizeSecondHalf = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcBytesSecondHalf, lastBlock);
+            FORWARD_IF_ERROR(cSizeFirstHalf, "RLE compress block failed");
+            DEBUGLOG(2, "2: Writing out RLE block, size: %zu", cSizeSecondHalf);
+        } else {
+            U32 cBlockHeader;
+            /* Error checking and repcodes update */
+            ZSTD_confirmRepcodesAndEntropyTables(zc);
+            if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+            writeBlockHeader(op, cSeqsSizeSecondHalf,  srcBytesSecondHalf, lastBlock);
+            cSizeSecondHalf = ZSTD_blockHeaderSize + cSeqsSizeSecondHalf;
+            DEBUGLOG(3, "2: Writing out compressed block, size: %zu", cSizeSecondHalf);
+        }
+    }
+
+    DEBUGLOG(2, "cSizeFirstHalf: %u cSizeSecondHalf: %u", cSizeFirstHalf, cSizeSecondHalf);
+    cSize = cSizeFirstHalf + cSizeSecondHalf;
+    return cSize;
+}
+
 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
                                         void* dst, size_t dstCapacity,
                                         const void* src, size_t srcSize, U32 frame, U32 lastBlock)
@@ -2711,8 +2903,9 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
      */
     const U32 rleMaxLength = 25;
     size_t cSize;
+    size_t nbSeq;
     const BYTE* ip = (const BYTE*)src;
-    BYTE* op = (BYTE*)(dst + ZSTD_blockHeaderSize);
+    BYTE* op = (BYTE*)dst;
     DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
                 (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
                 (unsigned)zc->blockState.matchState.nextToUpdate);
@@ -2720,6 +2913,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
     {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
         FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
         if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+        nbSeq = (size_t)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
     }
 
     if (zc->seqCollector.collectSequences) {
@@ -2728,11 +2922,15 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
         return 0;
     }
 
+    if (nbSeq >= 2) {
+        return ZSTD_compressBlock_splitBlock(zc, dst, dstCapacity, src, srcSize, frame, lastBlock, nbSeq);
+    }
+    
     /* encode sequences and literals */
     cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
             &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
             &zc->appliedParams,
-            op, dstCapacity,
+            op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
             srcSize,
             zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
             zc->bmi2);
@@ -2770,7 +2968,7 @@ out:
         cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
         FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
     } else {
-        writeBlockHeader(dst, cSize, srcSize, lastBlock);
+        writeBlockHeader(op, cSize, srcSize, lastBlock);
         cSize += ZSTD_blockHeaderSize;
     }
     return cSize;
@@ -2936,7 +3134,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
             assert(dstCapacity >= cSize);
             dstCapacity -= cSize;
             cctx->isFirstBlock = 0;
-            DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u",
+            DEBUGLOG(2, "ZSTD_compress_frameChunk: adding a block of size %u",
                         (unsigned)cSize);
     }   }
 
index 15139501bea196fbe770ffdc1c824341ae0ef9ec..9f753911d0da241fa4457ff30dc3bbeff1161031 100644 (file)
@@ -763,7 +763,7 @@ size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSiz
 static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
                           const void* src, size_t srcSize)
 {
-    DEBUGLOG(5, "ZSTD_copyRawBlock");
+    DEBUGLOG(2, "ZSTD_copyRawBlock: %u", srcSize);
     RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, "");
     if (dst == NULL) {
         if (srcSize == 0) return 0;
@@ -847,6 +847,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
 
     /* Loop on each block */
     while (1) {
+        DEBUGLOG(2, "Remaining dstCap: %u", (size_t)(oend-op));
         size_t decodedSize;
         blockProperties_t blockProperties;
         size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
@@ -875,8 +876,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         if (ZSTD_isError(decodedSize)) return decodedSize;
         if (dctx->validateChecksum)
             XXH64_update(&dctx->xxhState, op, decodedSize);
-        if (decodedSize != 0)
+        if (decodedSize != 0) {
+            DEBUGLOG(2, "Decoded: %u", decodedSize);
             op += decodedSize;
+        }
         assert(ip != NULL);
         ip += cBlockSize;
         remainingSrcSize -= cBlockSize;
@@ -1189,7 +1192,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
             }
             FORWARD_IF_ERROR(rSize, "");
             RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum");
-            DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
+            DEBUGLOG(2, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize);
             dctx->decodedSize += rSize;
             if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize);
             dctx->previousDstEnd = (char*)dst + rSize;
index b71bc20d65b4e8e72dca056329cef42151a35088..56e4b5ef87ae8beffd7e507342f60f62575b9fcc 100644 (file)
@@ -775,6 +775,9 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
 
     /* bounds checks : careful of address space overflow in 32-bit mode */
     RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+    DEBUGLOG(2, "sequence length: %u", sequenceLength);
+    DEBUGLOG(2, "oLitEnd: %u iLitEnd: %u match: %u", oLitEnd, iLitEnd, match);
+    DEBUGLOG(2, "seq ll: %u, condition: %u", sequence.litLength, (size_t)(litLimit - *litPtr));
     RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
     assert(op < op + sequenceLength);
     assert(oLitEnd < op + sequenceLength);
@@ -850,8 +853,13 @@ size_t ZSTD_execSequence(BYTE* op,
     op = oLitEnd;
     *litPtr = iLitEnd;   /* update for next sequence */
 
+    
     /* Copy Match */
     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /*DEBUGLOG(2, "oLitEnd: %u, oMatchEnd: %u iLitEnd: %u matchPos: %u", oLitEnd, oMatchEnd, iLitEnd, match);
+        DEBUGLOG(2, "off: %u ml: %u ll: %u", sequence.offset, sequence.matchLength, sequence.litLength);
+        DEBUGLOG(2, "first condition: %u", (size_t)(oLitEnd - prefixStart));
+        DEBUGLOG(2, "break condition: %u", (size_t)(oLitEnd - virtualStart));*/
         /* offset beyond prefix -> go into extDict */
         RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
         match = dictEnd + (match - prefixStart);
@@ -1210,6 +1218,9 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
 
     /* last literal segment */
     {   size_t const lastLLSize = litEnd - litPtr;
+        if (lastLLSize > (size_t)(oend-op)) {
+            DEBUGLOG(2, "too small lastll");
+        }
         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
         if (op != NULL) {
             ZSTD_memcpy(op, litPtr, lastLLSize);
@@ -1458,7 +1469,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
      * (note: but it could be evaluated from current-lowLimit)
      */
     ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
-    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+    DEBUGLOG(2, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
 
     RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");