static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
{
/* note : magic number already checked */
- size_t const dictSizeStart = dictSize;
+ const BYTE* dictPtr = (const BYTE*)dict;
+ const BYTE* const dictEnd = dictPtr + dictSize;
{ size_t const hufHeaderSize = HUF_readCTable(zc->hufTable, 255, dict, dictSize);
if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
- dict = (const char*)dict + hufHeaderSize;
- dictSize -= hufHeaderSize;
+ dictPtr += hufHeaderSize;
}
{ short offcodeNCount[MaxOff+1];
unsigned offcodeMaxValue = MaxOff, offcodeLog = OffFSELog;
- size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dict, dictSize);
+ size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
{ size_t const errorCode = FSE_buildCTable(zc->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog);
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
- dict = (const char*)dict + offcodeHeaderSize;
- dictSize -= offcodeHeaderSize;
+ dictPtr += offcodeHeaderSize;
}
{ short matchlengthNCount[MaxML+1];
unsigned matchlengthMaxValue = MaxML, matchlengthLog = MLFSELog;
- size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dict, dictSize);
+ size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
{ size_t const errorCode = FSE_buildCTable(zc->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog);
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
- dict = (const char*)dict + matchlengthHeaderSize;
- dictSize -= matchlengthHeaderSize;
+ dictPtr += matchlengthHeaderSize;
}
{ short litlengthNCount[MaxLL+1];
unsigned litlengthMaxValue = MaxLL, litlengthLog = LLFSELog;
- size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dict, dictSize);
+ size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
{ size_t const errorCode = FSE_buildCTable(zc->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog);
if (FSE_isError(errorCode)) return ERROR(dictionary_corrupted); }
- dictSize -= litlengthHeaderSize;
+ dictPtr += litlengthHeaderSize;
}
+ if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
+ zc->rep[0] = MEM_readLE32(dictPtr+0); if (zc->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
+ zc->rep[1] = MEM_readLE32(dictPtr+4); if (zc->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
+ zc->rep[2] = MEM_readLE32(dictPtr+8); if (zc->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
+ dictPtr += 12;
+
zc->flagStaticTables = 1;
- return (dictSizeStart-dictSize);
+ return dictPtr - (const BYTE*)dict;
}
/** ZSTD_compress_insertDictionary() :
void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
} EStats_ress_t;
+#define MAXREPOFFSET 1024
static void ZDICT_countEStats(EStats_ress_t esr,
- U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount,
+ U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount, U32* repOffsets,
const void* src, size_t srcSize)
{
const seqStore_t* seqStorePtr;
size_t u;
for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
} }
+
+ /* rep offsets */
+ { const U32* const offsetPtr = seqStorePtr->offsetStart;
+ U32 offset1 = offsetPtr[0] - 3;
+ U32 offset2 = offsetPtr[1] - 3;
+ if (offset1 >= MAXREPOFFSET) offset1 = 0;
+ if (offset2 >= MAXREPOFFSET) offset2 = 0;
+ repOffsets[offset1] += 3;
+ repOffsets[offset2] += 1;
+ }
+
}
/*
static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
{
- size_t total;
+ size_t total=0;
unsigned u;
- for (u=0, total=0; u<nbFiles; u++) total += fileSizes[u];
+ for (u=0; u<nbFiles; u++) total += fileSizes[u];
return total;
}
+typedef struct { U32 offset; U32 count; } offsetCount_t;
+
+static void ZDICT_insertSortCount(offsetCount_t table[ZSTD_REP_NUM+1], U32 val, U32 count)
+{
+ U32 u;
+ table[ZSTD_REP_NUM].offset = val;
+ table[ZSTD_REP_NUM].count = count;
+ for (u=ZSTD_REP_NUM; u>0; u--) {
+ offsetCount_t tmp;
+ if (table[u-1].count >= table[u].count) break;
+ tmp = table[u-1];
+ table[u-1] = table[u];
+ table[u] = tmp;
+ }
+}
+
+
#define OFFCODE_MAX 18 /* only applicable to first block */
static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize,
unsigned compressionLevel,
short matchLengthNCount[MaxML+1];
U32 litLengthCount[MaxLL+1];
short litLengthNCount[MaxLL+1];
+ U32 repOffset[MAXREPOFFSET] = { 0 };
+ offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
EStats_ress_t esr;
ZSTD_parameters params;
U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
size_t eSize = 0;
size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles);
size_t const averageSampleSize = totalSrcSize / nbFiles;
+ BYTE* dstPtr = (BYTE*)dstBuffer;
/* init */
for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
+ repOffset[1] = repOffset[4] = repOffset[8] = 1;
+ memset(bestRepOffset, 0, sizeof(bestRepOffset));
esr.ref = ZSTD_createCCtx();
esr.zc = ZSTD_createCCtx();
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
/* collect stats on all files */
for (u=0; u<nbFiles; u++) {
ZDICT_countEStats(esr,
- countLit, offcodeCount, matchLengthCount, litLengthCount,
+ countLit, offcodeCount, matchLengthCount, litLengthCount, repOffset,
(const char*)srcBuffer + pos, fileSizes[u]);
pos += fileSizes[u];
}
}
llLog = (U32)errorCode;
+ { U32 offset;
+ for (offset=1; offset<MAXREPOFFSET; offset++)
+ ZDICT_insertSortCount(bestRepOffset, offset, repOffset[offset]);
+ }
+
+
/* write result to buffer */
- errorCode = HUF_writeCTable(dstBuffer, maxDstSize, hufTable, 255, huffLog);
- if (HUF_isError(errorCode)) {
- eSize = ERROR(GENERIC);
- DISPLAYLEVEL(1, "HUF_writeCTable error");
- goto _cleanup;
+ { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
+ if (HUF_isError(hhSize)) {
+ eSize = ERROR(GENERIC);
+ DISPLAYLEVEL(1, "HUF_writeCTable error");
+ goto _cleanup;
+ }
+ dstPtr += hhSize;
+ maxDstSize -= hhSize;
+ eSize += hhSize;
}
- dstBuffer = (char*)dstBuffer + errorCode;
- maxDstSize -= errorCode;
- eSize += errorCode;
- errorCode = FSE_writeNCount(dstBuffer, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
- if (FSE_isError(errorCode)) {
- eSize = ERROR(GENERIC);
- DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
- goto _cleanup;
+ { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
+ if (FSE_isError(ohSize)) {
+ eSize = ERROR(GENERIC);
+ DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount");
+ goto _cleanup;
+ }
+ dstPtr += ohSize;
+ maxDstSize -= ohSize;
+ eSize += ohSize;
}
- dstBuffer = (char*)dstBuffer + errorCode;
- maxDstSize -= errorCode;
- eSize += errorCode;
- errorCode = FSE_writeNCount(dstBuffer, maxDstSize, matchLengthNCount, MaxML, mlLog);
- if (FSE_isError(errorCode)) {
- eSize = ERROR(GENERIC);
- DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
- goto _cleanup;
+ { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
+ if (FSE_isError(mhSize)) {
+ eSize = ERROR(GENERIC);
+ DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount");
+ goto _cleanup;
+ }
+ dstPtr += mhSize;
+ maxDstSize -= mhSize;
+ eSize += mhSize;
}
- dstBuffer = (char*)dstBuffer + errorCode;
- maxDstSize -= errorCode;
- eSize += errorCode;
- errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litLengthNCount, MaxLL, llLog);
- if (FSE_isError(errorCode)) {
+ { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
+ if (FSE_isError(lhSize)) {
+ eSize = ERROR(GENERIC);
+ DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
+ goto _cleanup;
+ }
+ dstPtr += lhSize;
+ maxDstSize -= lhSize;
+ eSize += lhSize;
+ }
+
+ if (maxDstSize<12) {
eSize = ERROR(GENERIC);
- DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
+ DISPLAYLEVEL(1, "not enough space to write RepOffsets");
goto _cleanup;
}
- dstBuffer = (char*)dstBuffer + errorCode;
- maxDstSize -= errorCode;
- eSize += errorCode;
+ MEM_writeLE32(dstPtr+0, bestRepOffset[0].offset);
+ MEM_writeLE32(dstPtr+4, bestRepOffset[1].offset);
+ MEM_writeLE32(dstPtr+8, bestRepOffset[2].offset);
+ //MEM_writeLE32(dstPtr+0, 1);
+ //MEM_writeLE32(dstPtr+4, 4);
+ //MEM_writeLE32(dstPtr+8, 8);
+ dstPtr += 12;
+ eSize += 12;
_cleanup:
ZSTD_freeCCtx(esr.ref);