void* dst, size_t dstCapacity,
size_t srcSize,
void* entropyWorkspace, size_t entropyWkspSize,
- int bmi2, U32 const canEmitUncompressed)
+ int bmi2)
{
size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
seqStorePtr, prevEntropy, nextEntropy, cctxParams,
/* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
* Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
*/
- if (canEmitUncompressed) {
- if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
- return 0; /* block not compressed */
- FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
-
- /* Check compressibility */
- { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
- if (cSize >= maxCSize) return 0; /* block not compressed */
- }
+ if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
+ return 0; /* block not compressed */
+ FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
+
+ /* Check compressibility */
+ { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
+ if (cSize >= maxCSize) return 0; /* block not compressed */
}
DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
return cSize;
return nbSeqs < 4 && nbLits < 10;
}
-static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
+static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
{
- ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
- zc->blockState.prevCBlock = zc->blockState.nextCBlock;
- zc->blockState.nextCBlock = tmp;
+ ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
+ bs->prevCBlock = bs->nextCBlock;
+ bs->nextCBlock = tmp;
}
/* Writes the block header */
resultSeqStore->ofCode += startIdx;
}
+/**
+ * ZSTD_seqStore_updateRepcodes(): Starting from an array of initial repcodes and a seqStore,
+ * construct the final repcodes at the conclusion of compressing the seqStore, stored in dstRep.
+ */
+static void ZSTD_seqStore_updateRepcodes(U32 dstRep[ZSTD_REP_NUM],
+ const U32 initialRep[ZSTD_REP_NUM],
+ const seqStore_t* const seqStore, U32 const nbSeq) {
+ repcodes_t updatedRepcodes;
+ U32 idx = 0;
+ ZSTD_memcpy(updatedRepcodes.rep, initialRep, sizeof(repcodes_t));
+ for (; idx < nbSeq; ++idx) {
+ seqDef const seq = seqStore->sequencesStart[idx];
+ U32 const ll0 = (seq.litLength == 0);
+ U32 const offCode = seq.offset - 1;
+ assert(seq.offset >= 1); /* seqDef::offset == offCode+1, and ZSTD_updateRep() expects an offCode */
+ updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+ }
+ ZSTD_memcpy(dstRep, updatedRepcodes.rep, sizeof(repcodes_t));
+}
+
/* ZSTD_compressSeqStore_singleBlock():
* Compresses a seqStore into a block with a block header, into the buffer dst.
*
static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* seqStore,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
- U32 lastBlock, U32 canEmitRLEorNoCompress) {
+ U32 lastBlock, U32 isPartition) {
const U32 rleMaxLength = 25;
BYTE* op = (BYTE*)dst;
const BYTE* ip = (const BYTE*)src;
op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
srcSize,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
- zc->bmi2, canEmitRLEorNoCompress);
+ zc->bmi2);
FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
if (!zc->isFirstBlock &&
cSeqsSize < rleMaxLength &&
- ZSTD_isRLE((BYTE const*)src, srcSize)&&
- canEmitRLEorNoCompress) {
+ ZSTD_isRLE((BYTE const*)src, srcSize)) {
/* We don't want to emit our first block as a RLE even if it qualifies because
* doing so will cause the decoder (cli only) to throw a "should consume all input error."
* This is only an issue for zstd <= v1.4.3
if (zc->seqCollector.collectSequences) {
ZSTD_copyBlockSequences(zc);
- ZSTD_confirmRepcodesAndEntropyTables(zc);
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
return 0;
}
if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
- if (cSeqsSize == 0 && canEmitRLEorNoCompress) {
+ if (cSeqsSize == 0) {
cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
FORWARD_IF_ERROR(cSize, "Nocompress block failed");
DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
- } else if (cSeqsSize == 1 && canEmitRLEorNoCompress) {
+ } else if (cSeqsSize == 1) {
cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
FORWARD_IF_ERROR(cSize, "RLE compress block failed");
DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
} else {
- /* Error checking and repcodes update */
- ZSTD_confirmRepcodesAndEntropyTables(zc);
+ if (isPartition) {
+ /* We manually update repcodes if we are currently compressing a partition. Otherwise,
+ * for non-split blocks, the repcodes are already correct as-is.
+ */
+ ZSTD_seqStore_updateRepcodes(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep,
+ seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
+ }
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
cSize = ZSTD_blockHeaderSize + cSeqsSize;
DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
return splits.idx;
}
-/* Return 1 if if the first three sequences of seqstore/block use repcodes */
-static U32 ZSTD_seqStore_firstThreeContainRepcodes(const seqStore_t* const seqStore) {
- U32 const seqLimit = MIN((U32)(seqStore->sequences - seqStore->sequencesStart), ZSTD_REP_NUM);
- U32 seqIdx = 0;
- for (; seqIdx < seqLimit; ++seqIdx) {
- if (seqStore->sequencesStart[seqIdx].offset <= ZSTD_REP_MOVE) {
- return 1;
- }
- }
- return 0;
-}
-
/* ZSTD_compressBlock_splitBlock():
* Attempts to split a given block into multiple blocks to improve compression ratio.
*
size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
seqStore_t nextSeqStore;
seqStore_t currSeqStore;
- U32 canEmitRLEorNoCompress = 1;
- const size_t dstCapacityInitial = dstCapacity;
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
(unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
(unsigned)zc->blockState.matchState.nextToUpdate);
if (numSplits == 0) {
- size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, op, dstCapacity, ip, blockSize, lastBlock, 1);
+ size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
+ op, dstCapacity,
+ ip, blockSize,
+ lastBlock, 0 /* isPartition */);
FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
+ assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
return cSizeSingleBlock;
}
for (i = 0; i <= numSplits; ++i) {
size_t srcBytes;
size_t cSizeChunk;
- U32 lastBlockActual;
+ U32 const lastPartition = (i == numSplits);
+ U32 lastBlockEntireSrc = 0;
srcBytes = ZSTD_countSeqStoreLiteralsBytes(&currSeqStore) + ZSTD_countSeqStoreMatchBytes(&currSeqStore);
- lastBlockActual = lastBlock && (i == numSplits);
srcBytesTotal += srcBytes;
- if (i == numSplits) {
+ if (lastPartition) {
/* This is the final partition, need to account for possible last literals */
srcBytes += blockSize - srcBytesTotal;
+ lastBlockEntireSrc = lastBlock;
} else {
ZSTD_deriveSeqStoreChunk(&nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
- if (ZSTD_seqStore_firstThreeContainRepcodes(&nextSeqStore)) {
- DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: Next block contains rep in first three seqs!");
- canEmitRLEorNoCompress = 0;
- }
}
- cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, &currSeqStore, op, dstCapacity, ip, srcBytes, lastBlockActual, canEmitRLEorNoCompress);
+ cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, &currSeqStore,
+ op, dstCapacity,
+ ip, srcBytes,
+ lastBlockEntireSrc, 1 /* isPartition */);
DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&currSeqStore, zc), cSizeChunk);
FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
- ZSTD_memcpy(zc->blockState.nextCBlock->rep, zc->blockState.prevCBlock->rep, sizeof(U32)*ZSTD_REP_NUM);
ip += srcBytes;
op += cSizeChunk;
dstCapacity -= cSizeChunk;
cSize += cSizeChunk;
currSeqStore = nextSeqStore;
+ assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
}
-
- if (cSize > ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize) {
- /* If too large, recompress the original block to avoid any chance of a single block exceeding ZSTD_BLOCKSIZE_MAX */
- cSize = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, (BYTE*)dst, dstCapacityInitial, (const BYTE*)src, blockSize, lastBlock, 1);
- FORWARD_IF_ERROR(cSize, "Compressing single block from splitBlock_internal() fallback failed!");
- DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: Compressed split block too large, recompressed");
- }
- assert(cSize <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
return cSize;
}
if (zc->seqCollector.collectSequences) {
ZSTD_copyBlockSequences(zc);
- ZSTD_confirmRepcodesAndEntropyTables(zc);
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
return 0;
}
dst, dstCapacity,
srcSize,
zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
- zc->bmi2, 1 /* Can emit uncompressed blocks */);
+ zc->bmi2);
if (zc->seqCollector.collectSequences) {
ZSTD_copyBlockSequences(zc);
out:
if (!ZSTD_isError(cSize) && cSize > 1) {
- ZSTD_confirmRepcodesAndEntropyTables(zc);
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
}
/* We check that dictionaries have offset codes available for the first
* block. After the first block, the offcode table might not have large
size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
- ZSTD_confirmRepcodesAndEntropyTables(zc);
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
return cSize;
}
}
op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
blockSize,
cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
- cctx->bmi2, 1 /* Can emit uncompressed blocks */);
+ cctx->bmi2);
FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed");
DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize);
} else {
U32 cBlockHeader;
/* Error checking and repcodes update */
- ZSTD_confirmRepcodesAndEntropyTables(cctx);
+ ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);
if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;