From: Paul Cruz Date: Fri, 9 Jun 2017 00:06:30 +0000 (-0700) Subject: added code for generating dictionary/test files randomly. Still need to make sure... X-Git-Tag: v1.3.0~1^2~15^2~72 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c2d909e3965e3b6eb5d342847b20ba78cbee5f8b;p=thirdparty%2Fzstd.git added code for generating dictionary/test files randomly. Still need to make sure dictionary ID matches --- diff --git a/tests/decodecorpus.c b/tests/decodecorpus.c index f7b3c854f..8f2e652cd 100644 --- a/tests/decodecorpus.c +++ b/tests/decodecorpus.c @@ -1121,13 +1121,12 @@ static void initFrame(frame_t* fr) } /* Return the final seed */ -static U32 generateFrame(U32 seed, frame_t* fr) +static U32 generateFrame(U32 seed, frame_t* fr, int genDict, size_t dictSize) { /* generate a complete frame */ DISPLAYLEVEL(1, "frame seed: %u\n", seed); - initFrame(fr); - + writeFrameHeader(&seed, fr); writeBlocks(&seed, fr); writeChecksum(fr); @@ -1135,6 +1134,7 @@ static U32 generateFrame(U32 seed, frame_t* fr) return seed; } + /*-******************************************************* * Test Mode *********************************************************/ @@ -1215,7 +1215,7 @@ static int runTestMode(U32 seed, unsigned numFiles, unsigned const testDurationS else DISPLAYUPDATE("\r%u ", fnum); - seed = generateFrame(seed, &fr); + seed = generateFrame(seed, &fr, 0, 0); { size_t const r = testDecodeSimple(&fr); if (ZSTD_isError(r)) { @@ -1250,7 +1250,7 @@ static int generateFile(U32 seed, const char* const path, DISPLAY("seed: %u\n", seed); - generateFrame(seed, &fr); + generateFrame(seed, &fr, 0, 0); outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, path); if (origPath) { @@ -1272,7 +1272,7 @@ static int generateCorpus(U32 seed, unsigned numFiles, const char* const path, DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); - seed = generateFrame(seed, &fr); + seed = generateFrame(seed, &fr, 0, 0); if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { DISPLAY("Error: path too long\n"); @@ -1294,6 +1294,81 @@ static int generateCorpus(U32 seed, unsigned numFiles, const char* const path, return 0; } +static int generateCorpusWithDict(U32 seed, unsigned numFiles, const char* const path, + const char* const origPath, const size_t dictSize) +{ + const size_t minDictSize = 8; + char outPath[MAX_PATH]; + U32 dictID; + BYTE* dictStart; + unsigned fnum; + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + if(snprintf(outPath, MAX_PATH, "%s/dictionary", path) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + + /* Generate the dictionary randomly first */ + if(dictSize < minDictSize){ + DISPLAY("Error: dictionary size (%zu) is too small\n", dictSize); + } + else{ + /* variable declaration */ + dictStart = malloc(dictSize); + size_t pos = 0; + dictID = RAND(&seed) + 1; + + /* write dictionary magic number */ + MEM_writeLE32(dictStart + pos, ZSTD_DICT_MAGIC); + pos += 4; + + /* write random dictionary ID */ + MEM_writeLE32(dictStart + pos, dictID); + pos += 4; + + /* randomly generate the rest of the dictionary */ + RAND_buffer(&seed, dictStart + pos, dictSize-8); + outputBuffer(dictStart, dictSize, outPath); + } + + /* generate random compressed/decompressed files */ + for (fnum = 0; fnum < numFiles; fnum++) { + frame_t fr; + size_t returnValue; + BYTE* decompressedPtr = malloc(MAX_DECOMPRESSED_SIZE); + + DISPLAYUPDATE("\r%u/%u ", fnum, numFiles); + + seed = generateFrame(seed, &fr, 1, dictSize); + + if (snprintf(outPath, MAX_PATH, "%s/z%06u.zst", path, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + outputBuffer(fr.dataStart, (BYTE*)fr.data - (BYTE*)fr.dataStart, outPath); + + if (origPath) { + if (snprintf(outPath, MAX_PATH, "%s/z%06u", origPath, fnum) + 1 > MAX_PATH) { + DISPLAY("Error: path too long\n"); + return 1; + } + outputBuffer(fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, outPath); + } + + /* if asked, supply the decompressed version */ + + returnValue = ZSTD_decompress_usingDict(dctx, decompressedPtr, MAX_DECOMPRESSED_SIZE, + fr.srcStart, (BYTE*)fr.src - (BYTE*)fr.srcStart, + dictStart,dictSize); + + } + + + /* write uncompressed versions of files */ + DISPLAY("This is origPath: %s\nAnd this is numFiles: %d\n", origPath, numFiles); + return 0; +} + /*_******************************************************* * Command line @@ -1350,6 +1425,8 @@ int main(int argc, char** argv) int testMode = 0; const char* path = NULL; const char* origPath = NULL; + int genDict = 0; + unsigned dictSize = (10 << 10); /* 10 kB default */ int argNb; @@ -1410,6 +1487,10 @@ int main(int argc, char** argv) argument++; if (strcmp(argument, "content-size") == 0) { opts.contentSize = 1; + } else if(strcmp(argument, "train-dict") == 0){ + argument += 11; + dictSize = readInt(&argument); + genDict = 1; } else { advancedUsage(argv[0]); return 1; @@ -1441,9 +1522,16 @@ int main(int argc, char** argv) return 1; } - if (numFiles == 0) { + if (numFiles == 0 && genDict == 0) { return generateFile(seed, path, origPath); - } else { + } else if (genDict == 0){ return generateCorpus(seed, numFiles, path, origPath); + } else if (numFiles == 0){ + /* should generate a single file with a dictionary */ + return generateCorpusWithDict(seed, 1, path, origPath, dictSize); + } else{ + /* should generate multiple files with a dictionary */ + return generateCorpusWithDict(seed, numFiles, path, origPath, dictSize); } + }