}sampleInfo;
+
+/*! getSampleInfo():
+ * Load from input files and add samples to buffer
+ * @return: a sampleInfo struct containing infomation about buffer where samples are stored,
+ * size of each sample, and total number of samples
+ */
sampleInfo* getSampleInfo(const char** fileNamesTable, unsigned nbFiles, size_t chunkSize,
unsigned maxDictSize, const unsigned displayLevel);
+
+/*! freeSampleInfo():
+ * Free memory allocated for info
+ */
+void freeSampleInfo(sampleInfo *info);
+
+
+
+/*! saveDict():
+ * Save data stored on buff to dictFileName
+ */
void saveDict(const char* dictFileName, const void* buff, size_t buffSize);
***************************************/
static const unsigned g_defaultMaxDictSize = 110 KB;
#define DEFAULT_CLEVEL 3
-#define DEFAULT_INPUTFILE ""
#define DEFAULT_k 200
#define DEFAULT_OUTPUTFILE "defaultDict"
#define DEFAULT_DICTID 0
const char* programName = argv[0];
int operationResult = 0;
- char* inputFile = DEFAULT_INPUTFILE;
+ /* Initialize arguments to default values */
unsigned k = DEFAULT_k;
- char* outputFile = DEFAULT_OUTPUTFILE;
+ const char* outputFile = DEFAULT_OUTPUTFILE;
unsigned dictID = DEFAULT_DICTID;
unsigned maxDictSize = g_defaultMaxDictSize;
+ /* Initialize table to store input files */
const char** filenameTable = (const char**)malloc(argCount * sizeof(const char*));
unsigned filenameIdx = 0;
+ /* Parse arguments */
for (int i = 1; i < argCount; i++) {
const char* argument = argv[i];
if (longCommandWArg(&argument, "k=")) { k = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "dictID=")) { dictID = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "maxdict=")) { maxDictSize = readU32FromChar(&argument); continue; }
if (longCommandWArg(&argument, "in=")) {
- inputFile = malloc(strlen(argument) + 1);
- strcpy(inputFile, argument);
- filenameTable[filenameIdx] = inputFile;
+ filenameTable[filenameIdx] = argument;
filenameIdx++;
continue;
}
if (longCommandWArg(&argument, "out=")) {
- outputFile = malloc(strlen(argument) + 1);
- strcpy(outputFile, argument);
+ outputFile = argument;
continue;
}
DISPLAYLEVEL(1, "Incorrect parameters\n");
char* fileNamesBuf = NULL;
unsigned fileNamesNb = filenameIdx;
- int followLinks = 0;
+ int followLinks = 0; /* follow directory recursively */
const char** extendedFileList = NULL;
extendedFileList = UTIL_createFileList(filenameTable, filenameIdx, &fileNamesBuf,
&fileNamesNb, followLinks);
filenameIdx, blockSize, maxDictSize, zParams.notificationLevel);
operationResult = RANDOM_trainFromFiles(outputFile, info, maxDictSize, ¶ms);
+ /* Free allocated memory */
+ UTIL_freeFileList(extendedFileList, fileNamesBuf);
+ freeSampleInfo(info);
+
return operationResult;
}
}
-/*! ZDICT_trainFromBuffer_random():
- * Train a dictionary from an array of samples using the RANDOM algorithm.
- * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- * supplied with an array of sizes `samplesSizes`, providing the size of each
- * sample, in order.
- * The resulting dictionary will be saved into `dictBuffer`.
- * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- * or an error code, which can be tested with ZDICT_isError().
- */
+
+
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random(
void *dictBuffer, size_t dictBufferCapacity,
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
} ZDICT_random_params_t;
-
+/*! ZDICT_trainFromBuffer_random():
+ * Train a dictionary from an array of samples.
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ * The resulting dictionary will be saved into `dictBuffer`.
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ * or an error code, which can be tested with ZDICT_isError().
+ */
ZDICTLIB_API size_t ZDICT_trainFromBuffer_random( void *dictBuffer, size_t dictBufferCapacity,
const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
ZDICT_random_params_t parameters);