From: Yann Collet Date: Tue, 20 Feb 2024 23:24:25 +0000 (-0800) Subject: datagen generates lorem ipsum by default X-Git-Tag: v1.5.6^2~64^2~8 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=83598aa106ba0edaa8b449b2fe5d63773eeebc4e;p=thirdparty%2Fzstd.git datagen generates lorem ipsum by default --- diff --git a/programs/lorem.c b/programs/lorem.c index 2fb977b07..56e229058 100644 --- a/programs/lorem.c +++ b/programs/lorem.c @@ -23,11 +23,11 @@ * and lacks a regularity more representative of text. * * The compression ratio achievable on the generated lorem ipsum - * is still a bit too good, presumably because the dictionary is too small. - * It would be possible to create some more complex scheme, - * notably by enlarging the dictionary with a word generator, - * and adding grammatical rules (composition) and syntax rules. - * But that's probably overkill for the intended goal. + * is still a bit too good, presumably because the dictionary is a bit too + * small. It would be possible to create some more complex scheme, notably by + * enlarging the dictionary with a word generator, and adding grammatical rules + * (composition) and syntax rules. But that's probably overkill for the intended + * goal. */ #include "lorem.h" diff --git a/tests/Makefile b/tests/Makefile index 35be1039b..ed7638b74 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -206,7 +206,7 @@ paramgrill : LDLIBS += -lm paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/timefn.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c $(PRGDIR)/lorem.c paramgrill.c CLEAN += datagen -datagen : $(PRGDIR)/datagen.c datagencli.c +datagen : $(PRGDIR)/datagen.c $(PRGDIR)/lorem.c loremOut.c datagencli.c $(LINK.c) $^ -o $@$(EXT) CLEAN += roundTripCrash diff --git a/tests/datagencli.c b/tests/datagencli.c index 09ec5e9ae..b30209953 100644 --- a/tests/datagencli.c +++ b/tests/datagencli.c @@ -8,122 +8,142 @@ * You may select, at your option, one of the above-listed licenses. */ - /*-************************************ -* Dependencies -**************************************/ -#include "util.h" /* Compiler options */ -#include /* fprintf, stderr */ -#include "datagen.h" /* RDG_generate */ - + * Dependencies + **************************************/ +#include /* fprintf, stderr */ +#include "datagen.h" /* RDG_generate */ +#include "loremOut.h" /* LOREM_genOut */ +#include "util.h" /* Compiler options */ /*-************************************ -* Constants -**************************************/ -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) + * Constants + **************************************/ +#define KB *(1 << 10) +#define MB *(1 << 20) +#define GB *(1U << 30) #define SIZE_DEFAULT ((64 KB) + 1) #define SEED_DEFAULT 0 -#define COMPRESSIBILITY_DEFAULT 50 - +#define COMPRESSIBILITY_DEFAULT 9999 /*-************************************ -* Macros -**************************************/ -#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) -#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } + * Macros + **************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } static unsigned displayLevel = 2; - /*-******************************************************* -* Command line -*********************************************************/ + * Command line + *********************************************************/ static int usage(const char* programName) { - DISPLAY( "Compressible data generator\n"); - DISPLAY( "Usage :\n"); - DISPLAY( " %s [args]\n", programName); - DISPLAY( "\n"); - DISPLAY( "Arguments :\n"); - DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT); - DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT); - DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", - COMPRESSIBILITY_DEFAULT); - DISPLAY( " -h : display help and exit\n"); + DISPLAY("Compressible data generator\n"); + DISPLAY("Usage :\n"); + DISPLAY(" %s [args]\n", programName); + DISPLAY("\n"); + DISPLAY("Arguments :\n"); + DISPLAY(" -g# : generate # data (default:%i)\n", SIZE_DEFAULT); + DISPLAY(" -s# : Select seed (default:%i)\n", SEED_DEFAULT); + DISPLAY(" -P# : Select compressibility in %% (range [0-100])\n"); + DISPLAY(" -h : display help and exit\n"); return 0; } - int main(int argc, const char** argv) { - unsigned probaU32 = COMPRESSIBILITY_DEFAULT; - double litProba = 0.0; - U64 size = SIZE_DEFAULT; - U32 seed = SEED_DEFAULT; + unsigned probaU32 = COMPRESSIBILITY_DEFAULT; + double litProba = 0.0; + U64 size = SIZE_DEFAULT; + U32 seed = SEED_DEFAULT; const char* const programName = argv[0]; int argNb; - for(argNb=1; argNb='0') && (*argument<='9')) - size *= 10, size += *argument++ - '0'; - if (*argument=='K') { size <<= 10; argument++; } - if (*argument=='M') { size <<= 20; argument++; } - if (*argument=='G') { size <<= 30; argument++; } - if (*argument=='B') { argument++; } - break; - case 's': - argument++; - seed=0; - while ((*argument>='0') && (*argument<='9')) - seed *= 10, seed += *argument++ - '0'; - break; - case 'P': - argument++; - probaU32 = 0; - while ((*argument>='0') && (*argument<='9')) - probaU32 *= 10, probaU32 += *argument++ - '0'; - if (probaU32>100) probaU32 = 100; - break; - case 'L': /* hidden argument : Literal distribution probability */ - argument++; - litProba=0.; - while ((*argument>='0') && (*argument<='9')) - litProba *= 10, litProba += *argument++ - '0'; - if (litProba>100.) litProba=100.; - litProba /= 100.; - break; - case 'v': - displayLevel = 4; - argument++; - break; - default: - return usage(programName); + while (*argument != 0) { + switch (*argument) { + case 'h': + return usage(programName); + case 'g': + argument++; + size = 0; + while ((*argument >= '0') && (*argument <= '9')) + size *= 10, size += (U64)(*argument++ - '0'); + if (*argument == 'K') { + size <<= 10; + argument++; + } + if (*argument == 'M') { + size <<= 20; + argument++; + } + if (*argument == 'G') { + size <<= 30; + argument++; + } + if (*argument == 'B') { + argument++; + } + break; + case 's': + argument++; + seed = 0; + while ((*argument >= '0') && (*argument <= '9')) + seed *= 10, seed += (U32)(*argument++ - '0'); + break; + case 'P': + argument++; + probaU32 = 0; + while ((*argument >= '0') && (*argument <= '9')) + probaU32 *= 10, + probaU32 += (U32)(*argument++ - '0'); + if (probaU32 > 100) + probaU32 = 100; + break; + case 'L': /* hidden argument : Literal distribution + probability */ + argument++; + litProba = 0.; + while ((*argument >= '0') && (*argument <= '9')) + litProba *= 10, litProba += *argument++ - '0'; + if (litProba > 100.) + litProba = 100.; + litProba /= 100.; + break; + case 'v': + displayLevel = 4; + argument++; + break; + default: + return usage(programName); } - } } } /* for(argNb=1; argNb 4 GB). + * Note that, beyond 1 paragraph, this generator produces + * a different content than LOREM_genBuffer (even when using same seed). + */ + +#include "loremOut.h" +#include +#include +#include "lorem.h" /* LOREM_genBlock */ +#include "platform.h" /* Compiler options, SET_BINARY_MODE */ + +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define LOREM_BLOCKSIZE (1 << 10) +void LOREM_genOut(unsigned long long size, unsigned seed) +{ + char buff[LOREM_BLOCKSIZE] = { 0 }; + unsigned long long total = 0; + size_t genBlockSize = (size_t)MIN(size, LOREM_BLOCKSIZE); + + /* init */ + SET_BINARY_MODE(stdout); + + /* Generate Ipsum text, one paragraph at a time */ + while (total < size) { + size_t generated = + LOREM_genBlock(buff, genBlockSize, seed++, total == 0, 0); + assert(generated <= genBlockSize); + total += generated; + assert(total <= size); + fwrite(buff, + 1, + generated, + stdout); /* note: should check potential write error */ + if (size - total < genBlockSize) + genBlockSize = (size_t)(size - total); + } + assert(total == size); +} diff --git a/tests/loremOut.h b/tests/loremOut.h new file mode 100644 index 000000000..3a32e1161 --- /dev/null +++ b/tests/loremOut.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* LOREM_genOut(): + * Generate @size bytes of compressible data using lorem ipsum generator into + * stdout. + */ +void LOREM_genOut(unsigned long long size, unsigned seed);