-/***************************
- * Multiple DDicts Hashset *
- ***************************/
+/*************************************
+ * Multiple DDicts Hashset internals *
+ *************************************/
#define DDICT_HASHSET_MAX_LOAD_FACTOR 0.75
#define DDICT_HASHSET_TABLE_BASE_SIZE 64
* Returns an index between [0, hashSet->ddictPtrTableSize]
*/
static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) {
- U32 hash = XXH32(&dictID, sizeof(U32), 0);
+ U32 hash = XXH64(&dictID, sizeof(U32), 0);
/* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */
return hash & (hashSet->ddictPtrTableSize - 1);
}
* Returns 0 if successful (ddict may or may not have actually been added), or a zstd error code if something went wrong
*/
static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, U32 dictID) {
- RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID);
+ RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!");
DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx);
while (hashSet->ddictPtrTable[idx] != NULL) {
/* Replace existing ddict if inserting ddict with same dictID */
const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem);
const ZSTD_DDict** oldTable = hashSet->ddictPtrTable;
size_t oldTableSize = hashSet->ddictPtrTableSize;
- size_t i = 0;
+ size_t i;
DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize);
RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!");
size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]);
if (currDictID == dictID || currDictID == 0) {
/* currDictID == 0 implies a NULL ddict entry */
- DEBUGLOG(4, "Dict ID not found");
break;
} else {
if (idx == hashSet->ddictPtrTableSize - 1) {
*/
static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) {
ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem);
+ DEBUGLOG(4, "Allocating new hash set");
ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem);
ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE;
ret->ddictPtrCount = 0;
}
/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself.
+ * Note: The ZSTD_DDict* within the table are NOT freed.
*/
static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) {
- if (hashSet->ddictPtrTable) ZSTD_customFree(hashSet->ddictPtrTable, customMem);
- ZSTD_customFree(hashSet, customMem);
+ DEBUGLOG(4, "Freeing ddict hash set");
+ if (hashSet && hashSet->ddictPtrTable) {
+ ZSTD_customFree(hashSet->ddictPtrTable, customMem);
+ }
+ if (hashSet) {
+ ZSTD_customFree(hashSet, customMem);
+ }
}
/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set.
dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
dctx->outBufferMode = ZSTD_bm_buffered;
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
+ dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
}
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
dctx->noForwardProgress = 0;
dctx->oversizedDuration = 0;
dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
- ZSTD_DCtx_resetParameters(dctx);
- dctx->validateChecksum = 1;
- dctx->refMultipleDDicts = 0;
dctx->ddictSet = NULL;
+ ZSTD_DCtx_resetParameters(dctx);
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
dctx->dictContentEndForFuzzing = NULL;
#endif
ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */
}
-/* Public function: Given a dctx with a digested frame params, re-selects the correct DDict based on
- * the requested dict ID from the frame. ZSTD_d_refMultipleDDicts must be enabled for this function
- * to be called.
+/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on
+ * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then
+ * accordingly sets the ddict to be used to decompress the frame.
+ *
+ * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is.
+ *
+ * ZSTD_d_refMultipleDDicts must be enabled for this function to be called.
*/
static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) {
assert(dctx->refMultipleDDicts && dctx->ddictSet);
DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame");
- { const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
+ if (dctx->ddict) {
+ const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID);
if (frameDDict) {
DEBUGLOG(4, "DDict found!");
ZSTD_clearDict(dctx);
RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small");
/* Reference DDict requested by frame if dctx references multiple ddicts */
- if (dctx->refMultipleDDicts && dctx->ddictSet) {
+ if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) {
ZSTD_DCtx_selectFrameDDict(dctx);
}
return bound;
}
+
/*-*************************************************************
* Frame decoding
***************************************************************/
if (ddict) {
dctx->ddict = ddict;
dctx->dictUses = ZSTD_use_indefinitely;
- if (dctx->refMultipleDDicts && dctx->ddictSet) {
+ if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) {
+ if (dctx->ddictSet == NULL) {
+ dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
+ if (!dctx->ddictSet) {
+ RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
+ }
+ }
assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */
FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict,
ZSTD_getDictID_fromDDict(ddict), dctx->customMem), "");
bounds.upperBound = (int)ZSTD_d_ignoreChecksum;
return bounds;
case ZSTD_d_refMultipleDDicts:
- bounds.lowerBound = (int)ZSTD_d_refSingleDict;
- bounds.upperBound = (int)ZSTD_d_refMultipleDicts;
+ bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict;
+ bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts;
return bounds;
default:;
}
return 0;
case ZSTD_d_refMultipleDDicts:
CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value);
- DEBUGLOG(4, "Referencing multiple ddicts param enabled");
if (dctx->staticSize != 0) {
RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!");
}
dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value;
- if (dctx->refMultipleDDicts == ZSTD_d_refMultipleDicts && dctx->ddictSet == NULL) {
- DEBUGLOG(4, "Allocating new hash set");
- dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem);
- if (!dctx->ddictSet) {
- RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!");
- }
- } else {
- if (dctx->ddictSet) {
- ZSTD_freeDDictHashSet(dctx->ddictSet, dctx->customMem);
- dctx->ddictSet = NULL;
- }
- }
return 0;
default:;
}
/*! ZSTD_DCtx_refDDict() :
* Reference a prepared dictionary, to be used to decompress next frames.
* The dictionary remains active for decompression of future frames using same DCtx.
+ *
+ * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function
+ * will store the DDict references in a table, and the DDict used for decompression
+ * will be determined at decompression time, as per the dict ID in the frame.
+ * The memory for the table is allocated on the first call to refDDict, and can be
+ * freed with ZSTD_freeDCtx().
+ *
* @result : 0, or an error code (which can be tested with ZSTD_isError()).
* Note 1 : Currently, only one dictionary can be managed.
* Referencing a new dictionary effectively "discards" any previous one.
typedef enum {
/* Note: this enum controls ZSTD_d_refMultipleDDicts */
- ZSTD_d_refSingleDict = 0,
- ZSTD_d_refMultipleDicts = 1,
+ ZSTD_rmd_refSingleDDict = 0,
+ ZSTD_rmd_refMultipleDDicts = 1
} ZSTD_refMultipleDDicts_e;
typedef enum {
*
* If enabled and dctx is allocated on the heap, then additional memory will be allocated
* to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict()
- * using a given ZSTD_DCtx, rather than overwriting the previous DCtx referenced, will
- * store all references, and at decompression time, the appropriate dictID is selected
- * from the set of DDicts based on the dictID in the frame.
+ * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead
+ * store all references. At decompression time, the appropriate dictID is selected
+ * from the set of DDicts based on the dictID in the frame.
+ *
+ * Usage is simply calling ZSTD_refDDict() on multiple dict buffers.
+ *
+ * Param has values of byte ZSTD_refMultipleDDicts_e
+ *
+ * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory
+ * allocation for the hash table. ZSTD_freeDCtx() also frees this memory.
+ * Memory is allocated as per ZSTD_DCtx::customMem.
*
- * WARNING: Enabling this parameter will trigger memory allocation for the hash table, and disabling
- * this parameter will free the memory allocated for the hashtable. Memory is allocated as per
- * ZSTD_DCtx::customMem.
+ * Although this function allocates memory for the table, the user is still responsible for
+ * memory management of the underlying ZSTD_DDict* themselves.
*/
#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4
size_t dictSize;
U32 dictID;
size_t dictHeaderSize;
+ size_t dictBufferFixedSize = 144;
+ unsigned char const dictBufferFixed[144] = {0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f,
+ 0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01,
+ 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
+ 0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18,
+ 0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c,
+ 0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04,
+ 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61,
+ 0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65,
+ 0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69};
if (dictBuffer==NULL || samplesSizes==NULL) {
free(dictBuffer);
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
DISPLAYLEVEL(3, "test%3i : check dict header size correctness : ", testNb++);
- { unsigned char const dictBufferFixed[144] = { 0x37, 0xa4, 0x30, 0xec, 0x63, 0x00, 0x00, 0x00, 0x08, 0x10, 0x00, 0x1f,
- 0x0f, 0x00, 0x28, 0xe5, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x80, 0x0f, 0x9e, 0x0f, 0x00, 0x00, 0x24, 0x40, 0x80, 0x00, 0x01,
- 0x02, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0xde, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
- 0x08, 0x08, 0x08, 0x08, 0xbc, 0xe1, 0x4b, 0x92, 0x0e, 0xb4, 0x7b, 0x18,
- 0x86, 0x61, 0x18, 0xc6, 0x18, 0x63, 0x8c, 0x31, 0xc6, 0x18, 0x63, 0x8c,
- 0x31, 0x66, 0x66, 0x66, 0x66, 0xb6, 0x6d, 0x01, 0x00, 0x00, 0x00, 0x04,
- 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x73, 0x6f, 0x64, 0x61,
- 0x6c, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x72, 0x74, 0x6f, 0x72, 0x20, 0x65,
- 0x6c, 0x65, 0x69, 0x66, 0x65, 0x6e, 0x64, 0x2e, 0x20, 0x41, 0x6c, 0x69 };
- dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, 144);
+ { dictHeaderSize = ZDICT_getDictHeaderSize(dictBufferFixed, dictBufferFixedSize);
if (dictHeaderSize != 115) goto _output_error;
}
DISPLAYLEVEL(3, "OK : %u \n", (unsigned)dictHeaderSize);
}
DISPLAYLEVEL(3, "OK \n");
+ DISPLAYLEVEL(3, "test%3i : ZSTD_decompressDCtx() with multiple ddicts : ", testNb++);
+ {
+ const size_t numDicts = 128;
+ const size_t numFrames = 4;
+ size_t i;
+ ZSTD_DCtx* dctx = ZSTD_createDCtx();
+ ZSTD_DDict** ddictTable = (ZSTD_DDict**)malloc(sizeof(ZSTD_DDict*)*numDicts);
+ ZSTD_CDict** cdictTable = (ZSTD_CDict**)malloc(sizeof(ZSTD_CDict*)*numDicts);
+ U32 dictIDSeed = seed;
+ /* Create new compressed buffer that will hold frames with differing dictIDs */
+ char* dictBufferMulti = (char*)malloc(sizeof(char) * dictBufferFixedSize); /* Modifiable copy of fixed full dict buffer */
+
+ ZSTD_memcpy(dictBufferMulti, dictBufferFixed, dictBufferFixedSize);
+ cSize = 0;
+ /* Create a bunch of DDicts with random dict IDs */
+ for (i = 0; i < numDicts; ++i) {
+ U32 currDictID = FUZ_rand(&dictIDSeed);
+ MEM_writeLE32(dictBufferMulti+ZSTD_FRAMEIDSIZE, currDictID);
+ ddictTable[i] = ZSTD_createDDict(dictBufferMulti, dictBufferFixedSize);
+ cdictTable[i] = ZSTD_createCDict(dictBufferMulti, dictBufferFixedSize, 3);
+ if (!ddictTable[i] || !cdictTable[i] || ZSTD_getDictID_fromCDict(cdictTable[i]) != ZSTD_getDictID_fromDDict(ddictTable[i])) {
+ goto _output_error;
+ }
+ }
+ /* Compress a few frames using random CDicts */
+ {
+ size_t off = 0;
+ /* only use the first half so we don't push against size limit of compressedBuffer */
+ size_t const segSize = (CNBuffSize / 2) / numFrames;
+ for (i = 0; i < numFrames; i++) {
+ size_t dictIdx = FUZ_rand(&dictIDSeed) % numDicts;
+ ZSTD_CCtx_reset(cctx, ZSTD_reset_session_and_parameters);
+ { CHECK_NEWV(r, ZSTD_compress_usingCDict(cctx,
+ (BYTE*)compressedBuffer + off, CNBuffSize - off,
+ (BYTE*)CNBuffer + segSize * (size_t)i, segSize,
+ cdictTable[dictIdx]));
+ off += r;
+ }
+ }
+ cSize = off;
+ }
+
+ /* We should succeed to decompression even though different dicts were used on different frames */
+ ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters);
+ ZSTD_DCtx_setParameter(dctx, ZSTD_d_refMultipleDDicts, ZSTD_rmd_refMultipleDDicts);
+ /* Reference every single ddict we made */
+ for (i = 0; i < numDicts; ++i) {
+ CHECK_Z( ZSTD_DCtx_refDDict(dctx, ddictTable[i]));
+ }
+ CHECK_Z( ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize) );
+ ZSTD_freeDCtx(dctx);
+ for (i = 0; i < numDicts; ++i) {
+ ZSTD_freeCDict(cdictTable[i]);
+ ZSTD_freeDDict(ddictTable[i]);
+ }
+ free(dictBufferMulti);
+ free(ddictTable);
+ free(cdictTable);
+ }
+ DISPLAYLEVEL(3, "OK \n");
+
ZSTD_freeCCtx(cctx);
free(dictBuffer);
free(samplesSizes);