#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
-typedef enum { bt_raw, bt_rle, bt_compressed, bt_end } blockType_e;
+typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
size_t outBuffContentSize;
size_t outBuffFlushedSize;
ZBUFF_cStage stage;
+ U32 checksum;
ZSTD_customMem customMem;
}; /* typedef'd tp ZBUFF_CCtx within "zstd_buffered.h" */
zbc->inBuffTarget = zbc->blockSize;
zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0;
zbc->stage = ZBUFFcs_load;
+ zbc->checksum = params.fParams.checksumFlag > 0;
return 0; /* ready to go */
}
op += outSize;
if (remainingToFlush) {
*dstCapacityPtr = op-ostart;
- return remainingToFlush + ZBUFF_endFrameSize;
+ return remainingToFlush + ZBUFF_endFrameSize + (zbc->checksum * 4);
}
/* create epilogue */
zbc->stage = ZBUFFcs_final;
- zbc->outBuffContentSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize); /* epilogue into outBuff */
+ zbc->outBuffContentSize = ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize); /* epilogue into outBuff */
}
/* flush epilogue */
}
+/*! ZSTD_compress_generic() :
+* Compress a chunk of data into one or multiple blocks.
+* All blocks will be terminated, all input will be consumed.
+* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
+* Frame is supposed already started (header already produced)
+* @return : compressed size, or an error code
+*/
static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity,
- const void* src, size_t srcSize)
+ const void* src, size_t srcSize,
+ U32 lastFrameChunk)
{
size_t blockSize = cctx->blockSize;
size_t remaining = srcSize;
XXH64_update(&cctx->xxhState, src, srcSize);
while (remaining) {
+ U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
size_t cSize;
ZSTD_statsResetFreqs(stats); /* debug only */
if (ZSTD_isError(cSize)) return cSize;
if (cSize == 0) { /* block is not compressible */
- cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize);
- if (ZSTD_isError(cSize)) return cSize;
+ U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
+ if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
+ MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
+ memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
+ cSize = ZSTD_blockHeaderSize+blockSize;
} else {
- U32 const cBlockHeader24 = (U32)bt_compressed + (U32)(cSize << 2);
+ U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
MEM_writeLE24(op, cBlockHeader24);
- cSize += 3;
+ cSize += ZSTD_blockHeaderSize;
}
remaining -= blockSize;
op += cSize;
}
+ if (lastFrameChunk) cctx->stage = ZSTDcs_ending;
ZSTD_statsPrint(stats, cctx->params.cParams.searchLength); /* debug only */
return op-ostart;
}
static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize,
- U32 frame)
+ U32 frame, U32 lastFrameChunk)
{
const BYTE* const ip = (const BYTE*) src;
size_t fhSize = 0;
zc->nextSrc = ip + srcSize;
{ size_t const cSize = frame ?
- ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize) :
+ ZSTD_compress_generic (zc, dst, dstCapacity, src, srcSize, lastFrameChunk) :
ZSTD_compressBlock_internal (zc, dst, dstCapacity, src, srcSize);
if (ZSTD_isError(cSize)) return cSize;
return cSize + fhSize;
void* dst, size_t dstCapacity,
const void* src, size_t srcSize)
{
- return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1);
+ return ZSTD_compressContinue_internal(zc, dst, dstCapacity, src, srcSize, 1, 0);
}
size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
ZSTD_LOG_BLOCK("%p: ZSTD_compressBlock searchLength=%d\n", cctx->base, cctx->params.cParams.searchLength);
- return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0);
+ return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
}
* @return : nb of bytes written into dst (or an error code) */
size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
{
- BYTE* op = (BYTE*)dst;
+ BYTE* const ostart = (BYTE*)dst;
+ BYTE* op = ostart;
size_t fhSize = 0;
- if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /*< not even init ! */
+ if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /*< not even init ! */
/* special case : empty frame */
- if (cctx->stage==ZSTDcs_init) {
+ if (cctx->stage == ZSTDcs_init) {
fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
if (ZSTD_isError(fhSize)) return fhSize;
dstCapacity -= fhSize;
cctx->stage = ZSTDcs_ongoing;
}
- /* frame epilogue */
- if (dstCapacity < ZSTD_blockHeaderSize) return ERROR(dstSize_tooSmall);
- { U32 const checksum = cctx->params.fParams.checksumFlag ?
- (U32)(XXH64_digest(&cctx->xxhState) >> 11) :
- 0;
- MEM_writeLE24(op, (U32)bt_end + (checksum << 2));
+ if (cctx->stage != ZSTDcs_ending) {
+ /* write one last empty block, make it the "last" block */
+ U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
+ if (dstCapacity<4) return ERROR(dstSize_tooSmall);
+ MEM_writeLE32(op, cBlockHeader24);
+ op += ZSTD_blockHeaderSize;
+ dstCapacity -= ZSTD_blockHeaderSize;
+ }
+
+ if (cctx->params.fParams.checksumFlag) {
+ U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
+ if (dstCapacity<4) return ERROR(dstSize_tooSmall);
+ MEM_writeLE32(op, checksum);
+ op += 4;
}
cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
- return ZSTD_blockHeaderSize+fhSize;
+ return op-ostart;
}
if(ZSTD_isError(errorCode)) return errorCode; }
/* body (compression) */
- { size_t const oSize = ZSTD_compressContinue (ctx, op, dstCapacity, src, srcSize);
+ { size_t const oSize = ZSTD_compressContinue_internal(ctx, op, dstCapacity, src, srcSize, 1, 1);
if(ZSTD_isError(oSize)) return oSize;
op += oSize;
dstCapacity -= oSize; }
***************************************************************/
typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader,
ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock,
+ ZSTDds_decompressLastBlock, ZSTDds_checkChecksum,
ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage;
struct ZSTD_DCtx_s
ZSTD_customMem customMem;
size_t litBufSize;
size_t litSize;
+ size_t rleSize;
BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH];
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
}; /* typedef'd to ZSTD_DCtx within "zstd_static.h" */
typedef struct
{
blockType_e blockType;
+ U32 lastBlock;
U32 origSize;
} blockProperties_t;
{
if (srcSize < ZSTD_blockHeaderSize) return ERROR(srcSize_wrong);
{ U32 const cBlockHeader = MEM_readLE24(src);
- U32 const cSize = cBlockHeader >> 2;
- bpPtr->blockType = (blockType_e)(cBlockHeader & 3);
+ U32 const cSize = cBlockHeader >> 3;
+ bpPtr->lastBlock = cBlockHeader & 1;
+ bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
bpPtr->origSize = cSize; /* only useful for RLE */
- if (bpPtr->blockType == bt_end) return 0;
if (bpPtr->blockType == bt_rle) return 1;
+ if (bpPtr->blockType == bt_reserved) return ERROR(corruption_detected);
return cSize;
}
}
}
+static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize, size_t regenSize)
+{
+ if (srcSize != 1) return ERROR(srcSize_wrong);
+ if (regenSize > dstCapacity) return ERROR(dstSize_tooSmall);
+ memset(dst, *(const BYTE*)src, regenSize);
+ return regenSize;
+}
+
/*! ZSTD_decodeLiteralsBlock() :
@return : nb of bytes read from src (< srcSize ) */
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
case bt_rle :
decodedSize = ZSTD_generateNxBytes(op, oend-op, *ip, blockProperties.origSize);
break;
- case bt_end :
- /* end of frame */
- if (remainingSize) return ERROR(srcSize_wrong);
- if (dctx->fParams.checksumFlag) {
- U64 const h64 = XXH64_digest(&dctx->xxhState);
- U32 const h32 = (U32)(h64>>11) & ((1<<22)-1);
- U32 const check32 = MEM_readLE24(src) >> 2;
- if (check32 != h32) return ERROR(checksum_wrong);
- }
- decodedSize = 0;
- break;
+ case bt_reserved :
default:
- return ERROR(GENERIC); /* impossible */
+ return ERROR(corruption_detected);
}
- if (blockProperties.blockType == bt_end) break; /* bt_end */
if (ZSTD_isError(decodedSize)) return decodedSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, op, decodedSize);
op += decodedSize;
ip += cBlockSize;
remainingSize -= cBlockSize;
+ if (blockProperties.lastBlock) break;
}
+ if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */
+ U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState);
+ U32 checkRead;
+ if (remainingSize<4) return ERROR(checksum_wrong);
+ checkRead = MEM_readLE32(ip);
+ if (checkRead != checkCalc) return ERROR(checksum_wrong);
+ remainingSize -= 4;
+ }
+
+ if (remainingSize) return ERROR(srcSize_wrong);
return op-ostart;
}
{ blockProperties_t bp;
size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
if (ZSTD_isError(cBlockSize)) return cBlockSize;
- if (bp.blockType == bt_end) {
+ dctx->expected = cBlockSize;
+ dctx->bType = bp.blockType;
+ dctx->rleSize = bp.origSize;
+ if (cBlockSize) {
+ dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock;
+ return 0;
+ }
+ /* empty block */
+ if (bp.lastBlock) {
if (dctx->fParams.checksumFlag) {
- U64 const h64 = XXH64_digest(&dctx->xxhState);
- U32 const h32 = (U32)(h64>>11) & ((1<<22)-1);
- U32 const check32 = MEM_readLE24(src) >> 2;
- if (check32 != h32) return ERROR(checksum_wrong);
+ dctx->expected = 4;
+ dctx->stage = ZSTDds_checkChecksum;
+ } else {
+ dctx->expected = 0; /* end of frame */
+ dctx->stage = ZSTDds_getFrameHeaderSize;
}
- dctx->expected = 0;
- dctx->stage = ZSTDds_getFrameHeaderSize;
} else {
- dctx->expected = cBlockSize;
- dctx->bType = bp.blockType;
- dctx->stage = ZSTDds_decompressBlock;
+ dctx->expected = 3; /* go directly to next header */
+ dctx->stage = ZSTDds_decodeBlockHeader;
}
return 0;
}
+ case ZSTDds_decompressLastBlock:
case ZSTDds_decompressBlock:
{ size_t rSize;
switch(dctx->bType)
rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize);
break;
case bt_rle :
- return ERROR(GENERIC); /* not yet handled */
- break;
- case bt_end : /* should never happen (filtered at phase 1) */
- rSize = 0;
+ rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize);
break;
+ case bt_reserved : /* should never happen */
default:
- return ERROR(GENERIC); /* impossible */
+ return ERROR(corruption_detected);
}
- dctx->stage = ZSTDds_decodeBlockHeader;
- dctx->expected = ZSTD_blockHeaderSize;
- dctx->previousDstEnd = (char*)dst + rSize;
if (ZSTD_isError(rSize)) return rSize;
if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize);
+
+ if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */
+ if (dctx->fParams.checksumFlag) { /* another round for frame checksum */
+ dctx->expected = 0;
+ dctx->stage = ZSTDds_checkChecksum;
+ }
+ dctx->expected = 0; /* ends here */
+ dctx->stage = ZSTDds_getFrameHeaderSize;
+ } else {
+ dctx->stage = ZSTDds_decodeBlockHeader;
+ dctx->expected = ZSTD_blockHeaderSize;
+ dctx->previousDstEnd = (char*)dst + rSize;
+ }
return rSize;
}
+ case ZSTDds_checkChecksum:
+ { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState);
+ U32 const check32 = MEM_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */
+ if (check32 != h32) return ERROR(checksum_wrong);
+ dctx->expected = 0;
+ dctx->stage = ZSTDds_getFrameHeaderSize;
+ return 0;
+ }
case ZSTDds_decodeSkippableHeader:
{ memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_min, src, dctx->expected);
dctx->expected = MEM_readLE32(dctx->headerBuffer + 4);
DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : decompress %u bytes : ", testNb++, (U32)CNBuffSize);
- CHECKPLUS( r , ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize),
- if (r != CNBuffSize) goto _output_error);
+ { size_t const r = ZSTD_decompress(decodedBuffer, CNBuffSize, compressedBuffer, cSize);
+ if (r != CNBuffSize) goto _output_error; }
DISPLAYLEVEL(4, "OK \n");
DISPLAYLEVEL(4, "test%3i : check decompressed result : ", testNb++);
-------------------------------------------
The structure of a single Zstandard frame is following:
-| `Magic_Number` | `Frame_Header` |`Data_Block`| [More data blocks] |`End_Marker`|
-|:--------------:|:--------------:|:----------:| ------------------ |:----------:|
-| 4 bytes | 2-14 bytes | n bytes | | 3 bytes |
+| `Magic_Number` | `Frame_Header` |`Data_Block`| [More data blocks] | [`Content_Checksum`] |
+|:--------------:|:--------------:|:----------:| ------------------ |:--------------------:|
+| 4 bytes | 2-14 bytes | n bytes | | 0-4 bytes |
__`Magic_Number`__
Detailed in [next chapter](#the-structure-of-data_block).
That’s where compressed data is stored.
-__`End_Marker`__
+__`Content_Checksum`__
-The flow of blocks ends when the last block header brings an _end signal_.
-This last block header may optionally host a `Content_Checksum`.
-
-##### __`Content_Checksum`__
-
-`Content_Checksum` allow to verify that frame content has been regenerated correctly.
+An optional 32-bit checksum, only present if `Content_Checksum_flag` is set.
The content checksum is the result
of [xxh64() hash function](https://www.xxHash.com)
digesting the original (decoded) data as input, and a seed of zero.
-Bits from 11 to 32 (included) are extracted to form a 22 bits checksum
-stored within `End_Marker`.
-```
-mask22bits = (1<<22)-1;
-contentChecksum = (XXH64(content, size, 0) >> 11) & mask22bits;
-```
-`Content_Checksum` is only present when its associated flag
-is set in the frame descriptor.
-Its usage is optional.
-
+The low 4 bytes of the checksum are stored in little endian format.
The structure of `Frame_Header`
This is a 2-bits flag (`= Frame_Header_Descriptor >> 6`),
specifying if decompressed data size is provided within the header.
-The `Value` can be converted to `Field_Size` that is number of bytes used by `Frame_Content_Size` according to the following table:
+The `Flag_Value` can be converted into `Field_Size`,
+which is the number of bytes used by `Frame_Content_Size`
+according to the following table:
-| `Value` | 0 | 1 | 2 | 3 |
+|`Flag_Value`| 0 | 1 | 2 | 3 |
| ---------- | --- | --- | --- | --- |
|`Field_Size`| 0-1 | 2 | 4 | 8 |
-The meaning of `Value` equal to `0` depends on `Single_Segment_flag` :
-it either means `0` (size not provided) _if_ the `Window_Descriptor` byte is present,
-or `1` (frame content size <= 255 bytes) otherwise.
+When `Flag_Value` is `0`, `Field_Size` depends on `Single_Segment_flag` :
+if `Single_Segment_flag` is set, `Field_Size` is 1.
+Otherwise, `Field_Size` is 0 (content size not provided).
__`Single_Segment_flag`__
If this flag is set,
-data shall be regenerated within a single continuous memory segment.
+data must be regenerated within a single continuous memory segment.
-In this case, `Window_Descriptor` byte __is not present__,
-but `Frame_Content_Size_flag` field necessarily is.
+In this case, `Frame_Content_Size` is necessarily present,
+but `Window_Descriptor` byte is skipped.
As a consequence, the decoder must allocate a memory segment
of size equal or bigger than `Frame_Content_Size`.
__`Unused_bit`__
The value of this bit should be set to zero.
-A decoder compliant with this specification version should not interpret it.
+A decoder compliant with this specification version shall not interpret it.
It might be used in a future version,
to signal a property which is not mandatory to properly decode the frame.
Its value _must be zero_.
A decoder compliant with this specification version must ensure it is not set.
This bit may be used in a future revision,
-to signal a feature that must be interpreted in order to decode the frame.
+to signal a feature that must be interpreted to decode the frame correctly.
__`Content_Checksum_flag`__
-If this flag is set, a content checksum will be present within `End_Marker`.
-The checksum is a 22 bits value extracted from the XXH64() of data,
-and stored within `End_Marker`. See [`Content_Checksum`](#content_checksum) .
+If this flag is set, a 32-bits `Content_Checksum` will be present at frame's end.
+See `Content_Checksum` paragraph.
__`Dictionary_ID_flag`__
### `Window_Descriptor`
Provides guarantees on maximum back-reference distance
-that will be present within compressed data.
-This information is useful for decoders to allocate enough memory.
+that will be used within compressed data.
+This information is important for decoders to allocate enough memory.
-The `Window_Descriptor` byte is optional. It should be absent if `Single_Segment_flag` is set.
+The `Window_Descriptor` byte is optional. It is absent when `Single_Segment_flag` is set.
In this case, the maximum back-reference distance is the content size itself,
which can be any value from 1 to 2^64-1 bytes (16 EB).
which requests a memory size beyond decoder's authorized range.
For improved interoperability,
-decoders are recommended to be compatible with window sizes of 8 MB.
-Encoders are recommended to not request more than 8 MB.
+decoders are recommended to be compatible with window sizes of 8 MB,
+and encoders are recommended to not request more than 8 MB.
It's merely a recommendation though,
decoders are free to support larger or lower limits,
depending on local limitations.
When `Field_Size` is 2, _the offset of 256 is added_.
It's allowed to represent a small size (for example `18`) using any compatible variant.
-In order to preserve decoder from unreasonable memory requirement,
-a decoder can refuse a compressed frame
-which requests a memory size beyond decoder's authorized range.
-
The structure of `Data_Block`
-----------------------------
The structure of `Data_Block` is following:
-| `Block_Type` | `Block_Size` | `Block_Content` |
-|:------------:|:------------:|:---------------:|
-| 2 bits | 22 bits | n bytes |
+| `Last_Block` | `Block_Type` | `Block_Size` | `Block_Content` |
+|:------------:|:------------:|:------------:|:---------------:|
+| 1 bit | 2 bits | 21 bits | n bytes |
+
+The block header uses 3-bytes.
+
+__`Last_Block`__
+
+The lowest bit signals if this block is the last one.
+Frame ends right after this block.
+It may be followed by an optional `Content_Checksum` .
__`Block_Type` and `Block_Size`__
-The block header uses 3-bytes, format is __little-endian__.
-The 2 highest bits represent the `Block_Type`,
-while the remaining 22 bits represent the (compressed) `Block_Size`.
+The next 2 bits represent the `Block_Type`,
+while the remaining 21 bits represent the `Block_Size`.
+Format is __little-endian__.
There are 4 block types :
| Value | 0 | 1 | 2 | 3 |
| ------------ | ----------- | ----------- | ------------------ | --------- |
-| `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `EndMark` |
+| `Block_Type` | `Raw_Block` | `RLE_Block` | `Compressed_Block` | `Reserved`|
- `Raw_Block` - this is an uncompressed block.
`Block_Size` is the number of bytes to read and copy.
`Block_Size` is the compressed size.
Decompressed size is unknown,
but its maximum possible value is guaranteed (see below)
-- `EndMark` - this is not a block. It signals the end of the frame.
- The rest of the field may be optionally filled by a checksum
- (see [`Content_Checksum`](#content_checksum)).
+- `Reserved` - this is not a block.
+ This value cannot be used with current version of this specification.
Block sizes must respect a few rules :
- In compressed mode, compressed size if always strictly `< decompressed size`.