{
unsigned acc = PRIME1;
size_t p=0;;
-
for (p=0; p<length; p++) {
acc *= PRIME2;
((unsigned char*)buffer)[p] = (unsigned char)(acc >> 21);
U32* countLit, U32* offsetcodeCount, U32* matchlengthCount, U32* litlengthCount,
const void* src, size_t srcSize)
{
- const BYTE* bytePtr;
- const U32* u32Ptr;
- seqStore_t seqStore;
+ const seqStore_t* seqStorePtr;
if (srcSize > ZSTD_BLOCKSIZE_MAX) srcSize = ZSTD_BLOCKSIZE_MAX; /* protection vs large samples */
ZSTD_copyCCtx(esr.zc, esr.ref);
ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
- seqStore = ZSTD_copySeqStore(esr.zc);
-
- /* count stats */
- for(bytePtr = seqStore.litStart; bytePtr < seqStore.lit; bytePtr++)
- countLit[*bytePtr]++;
- for(u32Ptr = seqStore.offsetStart; u32Ptr < seqStore.offset; u32Ptr++) {
- BYTE offcode = (BYTE)ZSTD_highbit(*u32Ptr) + 1;
- if (*u32Ptr==0) offcode=0;
- offsetcodeCount[offcode]++;
+ seqStorePtr = ZSTD_getSeqStore(esr.zc);
+
+ /* literals stats */
+ { const BYTE* bytePtr;
+ for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++)
+ countLit[*bytePtr]++;
}
- (void)matchlengthCount; (void)litlengthCount;
- /*
- for(bytePtr = seqStore.matchLengthStart; bytePtr < seqStore.matchLength; bytePtr++)
- matchlengthCount[*bytePtr]++;
- for(bytePtr = seqStore.litLengthStart; bytePtr < seqStore.litLength; bytePtr++)
- litlengthCount[*bytePtr]++;
- */
+
+ /* seqStats */
+ { size_t const nbSeq = (size_t)(seqStorePtr->offset - seqStorePtr->offsetStart);
+ ZSTD_seqToCodes(seqStorePtr, nbSeq);
+
+ { const BYTE* codePtr = seqStorePtr->offCodeStart;
+ size_t u;
+ for (u=0; u<nbSeq; u++) offsetcodeCount[codePtr[u]]++;
+ }
+
+ { const BYTE* codePtr = seqStorePtr->mlCodeStart;
+ size_t u;
+ for (u=0; u<nbSeq; u++) matchlengthCount[codePtr[u]]++;
+ }
+
+ { const BYTE* codePtr = seqStorePtr->llCodeStart;
+ size_t u;
+ for (u=0; u<nbSeq; u++) litlengthCount[codePtr[u]]++;
+ } }
}
static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
const void* dictBuffer, size_t dictBufferSize)
{
U32 countLit[256];
- U32 offcodeCount[MaxOff+1];
HUF_CREATE_STATIC_CTABLE(hufTable, 255);
- short offcodeNCount[MaxOff+1];
+ U32 offcodeCount[OFFCODE_MAX+1];
+ short offcodeNCount[OFFCODE_MAX+1];
U32 matchLengthCount[MaxML+1];
short matchLengthNCount[MaxML+1];
- U32 litlengthCount[MaxLL+1];
- short litlengthNCount[MaxLL+1];
+ U32 litLengthCount[MaxLL+1];
+ short litLengthNCount[MaxLL+1];
EStats_ress_t esr;
ZSTD_parameters params;
U32 u, huffLog = 12, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
for (u=0; u<256; u++) countLit[u]=1; /* any character must be described */
for (u=0; u<=OFFCODE_MAX; u++) offcodeCount[u]=1;
for (u=0; u<=MaxML; u++) matchLengthCount[u]=1;
- for (u=0; u<=MaxLL; u++) litlengthCount[u]=1;
+ for (u=0; u<=MaxLL; u++) litLengthCount[u]=1;
esr.ref = ZSTD_createCCtx();
esr.zc = ZSTD_createCCtx();
esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
/* collect stats on all files */
for (u=0; u<nbFiles; u++) {
ZDICT_countEStats(esr,
- countLit, offcodeCount, matchLengthCount, litlengthCount,
+ countLit, offcodeCount, matchLengthCount, litLengthCount,
(const char*)srcBuffer + pos, fileSizes[u]);
pos += fileSizes[u];
}
}
mlLog = (U32)errorCode;
- total=0; for (u=0; u<=MaxLL; u++) total+=litlengthCount[u];
- errorCode = FSE_normalizeCount(litlengthNCount, llLog, litlengthCount, total, MaxLL);
+ total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
+ errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
- DISPLAYLEVEL(1, "FSE_normalizeCount error with litlengthCount");
+ DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount");
goto _cleanup;
}
llLog = (U32)errorCode;
maxDstSize -= errorCode;
eSize += errorCode;
- errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litlengthNCount, MaxLL, llLog);
+ errorCode = FSE_writeNCount(dstBuffer, maxDstSize, litLengthNCount, MaxLL, llLog);
if (FSE_isError(errorCode)) {
eSize = ERROR(GENERIC);
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount");
}
+#define DIB_MINSAMPLESSIZE (DIB_FASTSEGMENTSIZE*3)
+/*! ZDICT_trainFromBuffer_unsafe() :
+* `samplesBuffer` must be followed by noisy guard band.
+* @return : size of dictionary.
+*/
size_t ZDICT_trainFromBuffer_unsafe(
void* dictBuffer, size_t maxDictSize,
const void* samplesBuffer, const size_t* sampleSizes, unsigned nbSamples,
ZDICT_params_t params)
{
- const U32 dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
+ U32 const dictListSize = MAX( MAX(DICTLISTSIZE, nbSamples), (U32)(maxDictSize/16));
dictItem* dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
unsigned selectivity = params.selectivityLevel;
unsigned compressionLevel = params.compressionLevel;
/* checks */
if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) return ERROR(dstSize_tooSmall);
+ if (!dictList) return ERROR(memory_allocation);
/* init */
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += sampleSizes[u]; }
- if (!dictList) return ERROR(memory_allocation);
+ if (sBuffSize < DIB_MINSAMPLESSIZE) return 0; /* not enough source to create dictionary */
ZDICT_initDictItem(dictList);
g_displayLevel = params.notificationLevel;
if (selectivity==0) selectivity = g_selectivity_default;
/* display best matches */
if (g_displayLevel>= 3) {
- const U32 nb = 25;
+ U32 const nb = 25;
+ U32 const dictContentSize = ZDICT_dictSize(dictList);
U32 u;
- U32 dictContentSize = ZDICT_dictSize(dictList);
DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize);
DISPLAYLEVEL(3, "list %u best segments \n", nb);
for (u=1; u<=nb; u++) {
} } }
/* create dictionary */
- {
- U32 dictContentSize = ZDICT_dictSize(dictList);
+ { U32 dictContentSize = ZDICT_dictSize(dictList);
size_t hSize;
BYTE* ptr;
U32 u;
}
+/* issue : samplesBuffer need to be followed by a noisy guard band.
+* work around : duplicate the buffer, and add the noise */
size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t params)
{
- size_t sBuffSize;
void* newBuff;
- size_t result;
+ size_t sBuffSize;
{ unsigned u; for (u=0, sBuffSize=0; u<nbSamples; u++) sBuffSize += samplesSizes[u]; }
+ if (sBuffSize==0) return 0; /* empty content => no dictionary */
newBuff = malloc(sBuffSize + NOISELENGTH);
if (!newBuff) return ERROR(memory_allocation);
memcpy(newBuff, samplesBuffer, sBuffSize);
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
- result = ZDICT_trainFromBuffer_unsafe(dictBuffer, dictBufferCapacity,
+ { size_t const result = ZDICT_trainFromBuffer_unsafe(
+ dictBuffer, dictBufferCapacity,
newBuff, samplesSizes, nbSamples,
params);
- free(newBuff);
- return result;
+ free(newBuff);
+ return result; }
}
-/* issue : samplesBuffer need to be followed by a noisy guard band.
-* work around : duplicate the buffer, and add the noise ? */
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
{
return 0; /* reserved as a potential error code in the future */
}
-seqStore_t ZSTD_copySeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
+const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
{
- return ctx->seqStore;
+ return &(ctx->seqStore);
}
ostart[4] = (BYTE)(cLitSize);
break;
}
-
return lhSize+cLitSize;
}
+void ZSTD_seqToCodes(const seqStore_t* seqStorePtr, size_t const nbSeq)
+{
+ /* LL codes */
+ { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 16, 17, 17, 18, 18, 19, 19,
+ 20, 20, 20, 20, 21, 21, 21, 21,
+ 22, 22, 22, 22, 22, 22, 22, 22,
+ 23, 23, 23, 23, 23, 23, 23, 23,
+ 24, 24, 24, 24, 24, 24, 24, 24,
+ 24, 24, 24, 24, 24, 24, 24, 24 };
+ const BYTE LL_deltaCode = 19;
+ U16* const llTable = seqStorePtr->litLengthStart;
+ BYTE* const llCodeTable = seqStorePtr->llCodeStart;
+ size_t u;
+ for (u=0; u<nbSeq; u++) {
+ U32 ll = llTable[u];
+ if (llTable[u] == 65535) { ll = seqStorePtr->longLength; llTable[u] = (U16)ll; }
+ llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll];
+ } }
+
+ /* Offset codes */
+ { const U32* const offsetTable = seqStorePtr->offsetStart;
+ BYTE* const ofCodeTable = seqStorePtr->offCodeStart;
+ size_t u;
+ for (u=0; u<nbSeq; u++) ofCodeTable[u] = (BYTE)ZSTD_highbit(offsetTable[u]);
+ }
+
+ /* ML codes */
+ { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
+ 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
+ 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
+ 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
+ 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
+ const BYTE ML_deltaCode = 36;
+ U16* const mlTable = seqStorePtr->matchLengthStart;
+ BYTE* const mlCodeTable = seqStorePtr->mlCodeStart;
+ size_t u;
+ for (u=0; u<nbSeq; u++) {
+ U32 ml = mlTable[u];
+ if (mlTable[u] == 65535) { ml = seqStorePtr->longLength; mlTable[u] = (U16)ml; }
+ mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml];
+ } }
+}
+
+
size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
void* dst, size_t dstCapacity,
size_t srcSize)
#define MIN_SEQ_FOR_DYNAMIC_FSE 64
#define MAX_SEQ_FOR_STATIC_FSE 1000
- /* LL codes */
- { static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 16, 17, 17, 18, 18, 19, 19,
- 20, 20, 20, 20, 21, 21, 21, 21,
- 22, 22, 22, 22, 22, 22, 22, 22,
- 23, 23, 23, 23, 23, 23, 23, 23,
- 24, 24, 24, 24, 24, 24, 24, 24,
- 24, 24, 24, 24, 24, 24, 24, 24 };
- const BYTE LL_deltaCode = 19;
- size_t u;
- for (u=0; u<nbSeq; u++) {
- U32 ll = llTable[u];
- if (llTable[u] == 65535) { ll = seqStorePtr->longLength; llTable[u] = (U16)ll; }
- llCodeTable[u] = (ll>63) ? (BYTE)ZSTD_highbit(ll) + LL_deltaCode : LL_Code[ll];
- } }
+ /* convert length/distances into codes */
+ ZSTD_seqToCodes(seqStorePtr, nbSeq);
/* CTable for Literal Lengths */
{ U32 max = MaxLL;
LLtype = FSE_ENCODING_DYNAMIC;
} }
- /* Offset codes */
- { size_t i; for (i=0; i<nbSeq; i++) ofCodeTable[i] = (BYTE)ZSTD_highbit(offsetTable[i]); }
-
+ /* CTable for Offsets */
{ U32 max = MaxOff;
size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq);
if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
Offtype = FSE_ENCODING_DYNAMIC;
} }
- /* ML codes */
- { static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
- 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
- 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
- 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
- 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
- const BYTE ML_deltaCode = 36;
- size_t u;
- for (u=0; u<nbSeq; u++) {
- U32 ml = mlTable[u];
- if (mlTable[u] == 65535) { ml = seqStorePtr->longLength; mlTable[u] = (U16)ml; }
- mlCodeTable[u] = (ml>127) ? (BYTE)ZSTD_highbit(ml) + ML_deltaCode : ML_Code[ml];
- } }
-
/* CTable for MatchLengths */
{ U32 max = MaxML;
size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq);
{
FILE* srcFile = ress.srcFile;
FILE* dstFile = ress.dstFile;
- U64 filesize = 0;
+ U64 readsize = 0;
U64 compressedfilesize = 0;
size_t dictSize = ress.dictBufferSize;
size_t sizeCheck, errorCode;
ZSTD_parameters params;
/* init */
- filesize = MAX(FIO_getFileSize(srcFileName),dictSize);
- params = ZSTD_getParams(cLevel, filesize);
- params.srcSize = filesize;
+ { U64 const filesize = FIO_getFileSize(srcFileName);
+ U64 const levelsize = MAX(FIO_getFileSize(srcFileName), dictSize);
+ params = ZSTD_getParams(cLevel, levelsize);
+ params.srcSize = filesize;
+ }
if (g_maxWLog) if (params.windowLog > g_maxWLog) params.windowLog = g_maxWLog;
errorCode = ZBUFF_compressInit_advanced(ress.ctx, ress.dictBuffer, ress.dictBufferSize, params);
if (ZBUFF_isError(errorCode)) EXM_THROW(21, "Error initializing compression : %s", ZBUFF_getErrorName(errorCode));
/* Main compression loop */
- filesize = 0;
+ readsize = 0;
while (1) {
/* Fill input Buffer */
- size_t inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
+ size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
if (inSize==0) break;
- filesize += inSize;
- DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(filesize>>20));
+ readsize += inSize;
+ DISPLAYUPDATE(2, "\rRead : %u MB ", (U32)(readsize>>20));
{ /* Compress using buffered streaming */
size_t usedInSize = inSize;
if (sizeCheck!=cSize) EXM_THROW(25, "Write error : cannot write compressed block into %s", dstFileName);
compressedfilesize += cSize;
}
- DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(filesize>>20), (double)compressedfilesize/filesize*100);
+ DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", (U32)(readsize>>20), (double)compressedfilesize/readsize*100);
}
/* End of Frame */
- {
- size_t cSize = ress.dstBufferSize;
- size_t result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize);
+ { size_t cSize = ress.dstBufferSize;
+ size_t const result = ZBUFF_compressEnd(ress.ctx, ress.dstBuffer, &cSize);
if (result!=0) EXM_THROW(26, "Compression error : cannot create frame end");
sizeCheck = fwrite(ress.dstBuffer, 1, cSize, dstFile);
/* Status */
DISPLAYLEVEL(2, "\r%79s\r", "");
DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n",
- (unsigned long long) filesize, (unsigned long long) compressedfilesize, (double)compressedfilesize/filesize*100);
+ (unsigned long long)readsize, (unsigned long long) compressedfilesize, (double)compressedfilesize/readsize*100);
return 0;
}