#define WORD_MAX_SIZE 20
/* Define the word pool */
-static const char* words[] = {
+static const char* kWords[] = {
"lorem", "ipsum", "dolor", "sit", "amet",
"consectetur", "adipiscing", "elit", "sed", "do",
"eiusmod", "tempor", "incididunt", "ut", "labore",
"quam", "nihil", "molestiae", "illum", "fugiat",
"quo", "pariatur"
};
+static const unsigned kNbWords = sizeof(kWords) / sizeof(kWords[0]);
-/* simple 1-dimension distribution that favors small words :
- * 1 letter : weight 3
- * 2-3 letters : weight 2
- * 4+ letters : weight 1
+/* simple 1-dimension distribution, based on word's length, favors small words
*/
-static const int distrib[] = {
- 0, 1, 2, 3, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 11,
- 12, 13, 13, 14, 15, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23,
- 24, 25, 26, 26, 26, 27, 28, 29, 30, 31, 32, 33, 34, 34, 35,
- 36, 37, 38, 39, 40, 41, 41, 42, 43, 43, 44, 45, 45, 46, 47,
- 48, 49, 50, 51, 52, 53, 54, 55, 55, 56, 57, 58, 58, 59, 60,
- 60, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72, 72,
- 73, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 84, 85,
- 86, 87, 88, 89, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
- 100, 101, 101, 102, 103, 104, 105, 106, 106, 107, 108, 109, 110, 111, 112,
- 113, 114, 115, 116, 117, 118, 119, 129, 121, 122, 123, 124, 125, 126, 127,
- 128, 128, 129, 129, 130, 131, 132, 133, 134, 135, 136, 136, 137, 138, 139,
- 140, 141, 142, 143, 144, 145, 146, 146, 147, 148, 149, 150, 151, 152, 153,
- 154, 155, 156, 156, 157, 157, 158, 159, 160, 161, 161, 162, 163, 164, 165,
- 166, 167, 168, 169, 170, 170, 171,
-};
-static const unsigned distribCount = sizeof(distrib) / sizeof(distrib[0]);
+static const int kWeights[] = { 0, 8, 6, 4, 3, 2 };
+static const unsigned kNbWeights = sizeof(kWeights) / sizeof(kWeights[0]);
+
+#define DISTRIB_SIZE_MAX 500
+static int g_distrib[DISTRIB_SIZE_MAX] = { 0 };
+static unsigned g_distribCount = 0;
+
+static void countFreqs(
+ const char* words[],
+ size_t nbWords,
+ const int* weights,
+ unsigned long nbWeights)
+{
+ unsigned total = 0;
+ size_t w;
+ for (w = 0; w < nbWords; w++) {
+ unsigned long len = strlen(words[w]);
+ int lmax;
+ if (len >= nbWeights)
+ len = nbWeights - 1;
+ lmax = weights[len];
+ total += (unsigned)lmax;
+ }
+ g_distribCount = total;
+ assert(g_distribCount <= DISTRIB_SIZE_MAX);
+}
+
+static void init_word_distrib(
+ const char* words[],
+ size_t nbWords,
+ const int* weights,
+ unsigned long nbWeights)
+{
+ size_t w, d = 0;
+ countFreqs(words, nbWords, weights, nbWeights);
+ for (w = 0; w < nbWords; w++) {
+ unsigned long len = strlen(words[w]);
+ int l, lmax;
+ if (len >= nbWeights)
+ len = nbWeights - 1;
+ lmax = weights[len];
+ for (l = 0; l < lmax; l++) {
+ g_distrib[d++] = (int)w;
+ }
+ }
+}
/* Note: this unit only works when invoked sequentially.
* No concurrent access is allowed */
const char* endSep = qmark ? "? " : ". ";
int i;
for (i = 0; i < nbWords; i++) {
- int const wordID = distrib[LOREM_rand(distribCount)];
- const char* const word = words[wordID];
+ int const wordID = g_distrib[LOREM_rand(g_distribCount)];
+ const char* const word = kWords[wordID];
const char* sep = " ";
if (i == commaPos)
sep = ", ";
{
int i;
for (i = 0; i < 18; i++) {
- const char* word = words[i];
+ const char* word = kWords[i];
const char* separator = " ";
if (i == 4)
separator = ", ";
separator = ", ";
generateWord(word, separator, i == 0);
}
- generateWord(words[18], ". ", 0);
+ generateWord(kWords[18], ". ", 0);
}
size_t
g_maxChars = size;
g_nbChars = 0;
g_randRoot = seed;
+ if (g_distribCount == 0) {
+ init_word_distrib(kWords, kNbWords, kWeights, kNbWeights);
+ }
+
if (first) {
generateFirstSentence();
}