return rand32 >> 5;
}
+typedef U32 fixedPoint_24_8;
-static void RDG_fillLiteralDistrib(BYTE* ldt, double ld)
+static void RDG_fillLiteralDistrib(BYTE* ldt, fixedPoint_24_8 ld)
{
BYTE const firstChar = (ld<=0.0) ? 0 : '(';
BYTE const lastChar = (ld<=0.0) ? 255 : '}';
if (ld<=0.0) ld = 0.0;
for (u=0; u<LTSIZE; ) {
- U32 const weight = (U32)((double)(LTSIZE - u) * ld) + 1;
+ U32 const weight = (((LTSIZE - u) * ld) >> 8) + 1;
U32 const end = MIN ( u + weight , LTSIZE);
while (u < end) ldt[u++] = character;
character++;
return (RDG_rand(seedPtr) & 0x1FF) + 0xF;
}
-static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, const BYTE* ldt, U32* seedPtr)
+static void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize,
+ double matchProba, const BYTE* ldt, U32* seedPtr)
{
BYTE* const buffPtr = (BYTE*)buffer;
U32 const matchProba32 = (U32)(32768 * matchProba);
U32 const randOffset = RDG_rand15Bits(seedPtr) + 1;
U32 const offset = repeatOffset ? prevOffset : (U32) MIN(randOffset , pos);
size_t match = pos - offset;
- while (pos < d) buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */
+ while (pos < d) { buffPtr[pos++] = buffPtr[match++]; /* correctly manages overlaps */ }
prevOffset = offset;
} else {
/* Literal (noise) */
U32 const length = RDG_randLength(seedPtr);
U32 const d = (U32) MIN(pos + length, buffSize);
- while (pos < d) buffPtr[pos++] = RDG_genChar(seedPtr, ldt);
+ while (pos < d) { buffPtr[pos++] = RDG_genChar(seedPtr, ldt); }
} }
}
BYTE ldt[LTSIZE];
memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */
if (litProba<=0.0) litProba = matchProba / 4.5;
- RDG_fillLiteralDistrib(ldt, litProba);
+ RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001));
RDG_genBlock(buffer, size, 0, matchProba, ldt, &seed32);
}
if (buff==NULL) { perror("datagen"); exit(1); }
if (litProba<=0.0) litProba = matchProba / 4.5;
memset(ldt, '0', sizeof(ldt)); /* yes, character '0', this is intentional */
- RDG_fillLiteralDistrib(ldt, litProba);
+ RDG_fillLiteralDistrib(ldt, (fixedPoint_24_8)(litProba * 256 + 0.001));
SET_BINARY_MODE(stdout);
/* Generate initial dict */
* Unit tests
=============================================*/
-static int basicUnitTests(U32 seed, double compressibility)
+static int basicUnitTests(U32 const seed, double compressibility)
{
size_t const CNBuffSize = 5 MB;
void* const CNBuffer = malloc(CNBuffSize);
size_t const contentSize = 9 KB;
const void* const dict = (const char*)CNBuffer;
const void* const contentStart = (const char*)dict + flatdictSize;
- size_t const target_nodict_cSize[22+1] = { 3840, 3740, 3840, 3810, 3750,
- 3750, 3740, 3740, 3740, 3740,
- 3740, 3670, 3660, 3660, 3660,
- 3650, 3650, 3650, 3650, 3650,
- 3650, 3650, 3650 };
- size_t const target_wdict_cSize[22+1] = { 2820, 2850, 2860, 2820, 2940,
- 2930, 2930, 2920, 2890, 2890,
- 2890, 2900, 2900, 2770, 2760,
+ size_t const target_nodict_cSize[22+1] = { 3840, 3770, 3870, 3830, 3770,
+ 3770, 3770, 3770, 3750, 3750,
+ 3740, 3670, 3670, 3660, 3660,
+ 3660, 3660, 3660, 3660, 3660,
+ 3660, 3660, 3660 };
+ size_t const target_wdict_cSize[22+1] = { 2830, 2890, 2890, 2820, 2940,
+ 2950, 2950, 2920, 2900, 2890,
+ 2910, 2910, 2910, 2770, 2760,
2750, 2750, 2750, 2750, 2750,
2750, 2750, 2750 };
int l = 1;
DISPLAYLEVEL(3, "test%3i : flat-dictionary efficiency test : \n", testNb++);
assert(maxLevel == 22);
RDG_genBuffer(CNBuffer, flatdictSize + contentSize, compressibility, 0., seed);
+ DISPLAYLEVEL(4, "content hash : %016llx; dict hash : %016llx \n", XXH64(contentStart, contentSize, 0), XXH64(dict, flatdictSize, 0));
for ( ; l <= maxLevel; l++) {
size_t const nodict_cSize = ZSTD_compress(compressedBuffer, compressedBufferSize,
size_t const sampleUnitSize = 8 KB;
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
+ U32 seed32 = seed;
ZDICT_cover_params_t params;
U32 dictID;
DISPLAYLEVEL(3, "test%3i : ZDICT_trainFromBuffer_cover : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
memset(¶ms, 0, sizeof(params));
- params.d = 1 + (FUZ_rand(&seed) % 16);
- params.k = params.d + (FUZ_rand(&seed) % 256);
+ params.d = 1 + (FUZ_rand(&seed32) % 16);
+ params.k = params.d + (FUZ_rand(&seed32) % 256);
dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, dictSize,
CNBuffer, samplesSizes, nbSamples,
params);