From: Yann Collet Date: Fri, 8 Jul 2016 17:16:57 +0000 (+0200) Subject: updated spec X-Git-Tag: v0.7.3^2~2 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bd106070632a8b5fb40147f73e10d344be0ad86a;p=thirdparty%2Fzstd.git updated spec --- diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index a72a244ff..42acd0d6a 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1284,8 +1284,8 @@ size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t struct ZSTD_DDict_s { - void* dictContent; - size_t dictContentSize; + void* dict; + size_t dictSize; ZSTD_DCtx* refContext; }; /* typedef'd tp ZSTD_CDict within zstd.h */ @@ -1317,8 +1317,8 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_cu return NULL; } } - ddict->dictContent = dictContent; - ddict->dictContentSize = dictSize; + ddict->dict = dictContent; + ddict->dictSize = dictSize; ddict->refContext = dctx; return ddict; } @@ -1338,7 +1338,7 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict) ZSTD_freeFunction const cFree = ddict->refContext->customMem.customFree; void* const opaque = ddict->refContext->customMem.opaque; ZSTD_freeDCtx(ddict->refContext); - cFree(opaque, ddict->dictContent); + cFree(opaque, ddict->dict); cFree(opaque, ddict); return 0; } diff --git a/zstd_compression_format.md b/zstd_compression_format.md index bb5d71be9..0db184cc3 100644 --- a/zstd_compression_format.md +++ b/zstd_compression_format.md @@ -16,7 +16,7 @@ Distribution of this document is unlimited. ### Version -0.0.2 (July 2016 - Work in progress - unfinished) +0.1.0 (08/07/16) Introduction @@ -1119,6 +1119,37 @@ with the new offset taking first spot. pushing the other ones by one position. +Dictionary format +----------------- + +`zstd` is compatible with "pure content" dictionaries, free of any format restriction. +But dictionaries created by `zstd --train` follow a format, described here. + +__Pre-requisites__ : a dictionary has a known length, + defined either by a buffer limit, or a file size. + +| Header | DictID | Stats | Content | +| ------ | ------ | ----- | ------- | + +__Header__ : 4 bytes ID, value 0xEC30A437, Little Endian format + +__Dict_ID__ : 4 bytes, stored in Little Endian format. + DictID can be any value, except 0 (which means no DictID). + It's used by decoders to check they use the correct dictionary. + +__Stats__ : Entropy tables, following the same format as a [compressed blocks]. + They are stored in following order : + Huffman tables for literals, FSE table for offset, + FSE table for matchLenth, and finally FSE table for litLength. + It's then followed by 3 offset values, populating recent offsets, + stored in order 4-bytes little endian each, for a total of 12 bytes. + +__Content__ : Where the actual dictionary content is. + Content depends on Dictionary size. + +[compressed blocks]: #compressed-block-format + Version changes --------------- +0.1.0 initial release