]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/journal/lookup3.c
resolved: add enablers for DNS-SD
[thirdparty/systemd.git] / src / journal / lookup3.c
CommitLineData
87d2c1ff
LP
1/* Slightly modified by Lennart Poettering, to avoid name clashes, and
2 * unexport a few functions. */
3
4#include "lookup3.h"
5
6/*
7-------------------------------------------------------------------------------
8lookup3.c, by Bob Jenkins, May 2006, Public Domain.
9
10These are functions for producing 32-bit hashes for hash table lookup.
11hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
12are externally useful functions. Routines to test the hash are included
13if SELF_TEST is defined. You can use this free for any purpose. It's in
14the public domain. It has no warranty.
15
16You probably want to use hashlittle(). hashlittle() and hashbig()
c5315881 17hash byte arrays. hashlittle() is faster than hashbig() on
87d2c1ff
LP
18little-endian machines. Intel and AMD are little-endian machines.
19On second thought, you probably want hashlittle2(), which is identical to
20hashlittle() except it returns two 32-bit hashes for the price of one.
21You could implement hashbig2() if you wanted but I haven't bothered here.
22
23If you want to find a hash of, say, exactly 7 integers, do
24 a = i1; b = i2; c = i3;
25 mix(a,b,c);
26 a += i4; b += i5; c += i6;
27 mix(a,b,c);
28 a += i7;
29 final(a,b,c);
30then use c as the hash value. If you have a variable length array of
314-byte integers to hash, use hashword(). If you have a byte array (like
32a character string), use hashlittle(). If you have several byte arrays, or
33a mix of things, see the comments above hashlittle().
34
35Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
36then mix those integers. This is fast (you can do a lot more thorough
37mixing with 12*3 instructions on 3 integers than you can with 3 instructions
38on 1 byte), but shoehorning those bytes into integers efficiently is messy.
39-------------------------------------------------------------------------------
40*/
41/* #define SELF_TEST 1 */
42
87d2c1ff 43#include <stdint.h> /* defines uint32_t etc */
cf0fbc49 44#include <stdio.h> /* defines printf for tests */
87d2c1ff 45#include <sys/param.h> /* attempt to define endianness */
cf0fbc49 46#include <time.h> /* defines time_t for timings in the test */
87d2c1ff
LP
47#ifdef linux
48# include <endian.h> /* attempt to define endianness */
49#endif
50
ae50101a
ZJS
51#if __GNUC__ >= 7
52_Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"")
53#endif
54
87d2c1ff
LP
55/*
56 * My best guess at if you are big-endian or little-endian. This may
57 * need adjustment.
58 */
59#if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
60 __BYTE_ORDER == __LITTLE_ENDIAN) || \
61 (defined(i386) || defined(__i386__) || defined(__i486__) || \
62 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
63# define HASH_LITTLE_ENDIAN 1
64# define HASH_BIG_ENDIAN 0
65#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
66 __BYTE_ORDER == __BIG_ENDIAN) || \
67 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
68# define HASH_LITTLE_ENDIAN 0
69# define HASH_BIG_ENDIAN 1
70#else
71# define HASH_LITTLE_ENDIAN 0
72# define HASH_BIG_ENDIAN 0
73#endif
74
75#define hashsize(n) ((uint32_t)1<<(n))
76#define hashmask(n) (hashsize(n)-1)
77#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
78
79/*
80-------------------------------------------------------------------------------
81mix -- mix 3 32-bit values reversibly.
82
83This is reversible, so any information in (a,b,c) before mix() is
84still in (a,b,c) after mix().
85
86If four pairs of (a,b,c) inputs are run through mix(), or through
87mix() in reverse, there are at least 32 bits of the output that
88are sometimes the same for one pair and different for another pair.
89This was tested for:
90* pairs that differed by one bit, by two bits, in any combination
91 of top bits of (a,b,c), or in any combination of bottom bits of
92 (a,b,c).
93* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
94 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
95 is commonly produced by subtraction) look like a single 1-bit
96 difference.
97* the base values were pseudorandom, all zero but one bit set, or
98 all zero plus a counter that starts at zero.
99
100Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
101satisfy this are
102 4 6 8 16 19 4
103 9 15 3 18 27 15
104 14 9 3 7 17 3
105Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
106for "differ" defined as + with a one-bit base and a two-bit delta. I
107used http://burtleburtle.net/bob/hash/avalanche.html to choose
108the operations, constants, and arrangements of the variables.
109
110This does not achieve avalanche. There are input bits of (a,b,c)
111that fail to affect some output bits of (a,b,c), especially of a. The
112most thoroughly mixed value is c, but it doesn't really even achieve
113avalanche in c.
114
115This allows some parallelism. Read-after-writes are good at doubling
116the number of bits affected, so the goal of mixing pulls in the opposite
117direction as the goal of parallelism. I did what I could. Rotates
118seem to cost as much as shifts on every machine I could lay my hands
119on, and rotates are much kinder to the top and bottom bits, so I used
120rotates.
121-------------------------------------------------------------------------------
122*/
123#define mix(a,b,c) \
124{ \
125 a -= c; a ^= rot(c, 4); c += b; \
126 b -= a; b ^= rot(a, 6); a += c; \
127 c -= b; c ^= rot(b, 8); b += a; \
128 a -= c; a ^= rot(c,16); c += b; \
129 b -= a; b ^= rot(a,19); a += c; \
130 c -= b; c ^= rot(b, 4); b += a; \
131}
132
133/*
134-------------------------------------------------------------------------------
135final -- final mixing of 3 32-bit values (a,b,c) into c
136
137Pairs of (a,b,c) values differing in only a few bits will usually
138produce values of c that look totally different. This was tested for
139* pairs that differed by one bit, by two bits, in any combination
140 of top bits of (a,b,c), or in any combination of bottom bits of
141 (a,b,c).
142* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
143 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
144 is commonly produced by subtraction) look like a single 1-bit
145 difference.
146* the base values were pseudorandom, all zero but one bit set, or
147 all zero plus a counter that starts at zero.
148
149These constants passed:
150 14 11 25 16 4 14 24
151 12 14 25 16 4 14 24
152and these came close:
153 4 8 15 26 3 22 24
154 10 8 15 26 3 22 24
155 11 8 15 26 3 22 24
156-------------------------------------------------------------------------------
157*/
158#define final(a,b,c) \
159{ \
160 c ^= b; c -= rot(b,14); \
161 a ^= c; a -= rot(c,11); \
162 b ^= a; b -= rot(a,25); \
163 c ^= b; c -= rot(b,16); \
164 a ^= c; a -= rot(c,4); \
165 b ^= a; b -= rot(a,14); \
166 c ^= b; c -= rot(b,24); \
167}
168
169/*
170--------------------------------------------------------------------
171 This works on all machines. To be useful, it requires
172 -- that the key be an array of uint32_t's, and
173 -- that the length be the number of uint32_t's in the key
174
175 The function hashword() is identical to hashlittle() on little-endian
176 machines, and identical to hashbig() on big-endian machines,
177 except that the length has to be measured in uint32_ts rather than in
178 bytes. hashlittle() is more complicated than hashword() only because
179 hashlittle() has to dance around fitting the key bytes into registers.
180--------------------------------------------------------------------
181*/
182uint32_t jenkins_hashword(
183const uint32_t *k, /* the key, an array of uint32_t values */
184size_t length, /* the length of the key, in uint32_ts */
185uint32_t initval) /* the previous hash, or an arbitrary value */
186{
187 uint32_t a,b,c;
188
189 /* Set up the internal state */
190 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
191
192 /*------------------------------------------------- handle most of the key */
193 while (length > 3)
194 {
195 a += k[0];
196 b += k[1];
197 c += k[2];
198 mix(a,b,c);
199 length -= 3;
200 k += 3;
201 }
202
203 /*------------------------------------------- handle the last 3 uint32_t's */
204 switch(length) /* all the case statements fall through */
205 {
206 case 3 : c+=k[2];
207 case 2 : b+=k[1];
208 case 1 : a+=k[0];
209 final(a,b,c);
210 case 0: /* case 0: nothing left to add */
211 break;
212 }
213 /*------------------------------------------------------ report the result */
214 return c;
215}
216
217
218/*
219--------------------------------------------------------------------
220hashword2() -- same as hashword(), but take two seeds and return two
22132-bit values. pc and pb must both be nonnull, and *pc and *pb must
222both be initialized with seeds. If you pass in (*pb)==0, the output
223(*pc) will be the same as the return value from hashword().
224--------------------------------------------------------------------
225*/
226void jenkins_hashword2 (
227const uint32_t *k, /* the key, an array of uint32_t values */
228size_t length, /* the length of the key, in uint32_ts */
229uint32_t *pc, /* IN: seed OUT: primary hash value */
230uint32_t *pb) /* IN: more seed OUT: secondary hash value */
231{
232 uint32_t a,b,c;
233
234 /* Set up the internal state */
235 a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
236 c += *pb;
237
238 /*------------------------------------------------- handle most of the key */
239 while (length > 3)
240 {
241 a += k[0];
242 b += k[1];
243 c += k[2];
244 mix(a,b,c);
245 length -= 3;
246 k += 3;
247 }
248
249 /*------------------------------------------- handle the last 3 uint32_t's */
250 switch(length) /* all the case statements fall through */
251 {
252 case 3 : c+=k[2];
253 case 2 : b+=k[1];
254 case 1 : a+=k[0];
255 final(a,b,c);
256 case 0: /* case 0: nothing left to add */
257 break;
258 }
259 /*------------------------------------------------------ report the result */
260 *pc=c; *pb=b;
261}
262
263
264/*
265-------------------------------------------------------------------------------
266hashlittle() -- hash a variable-length key into a 32-bit value
267 k : the key (the unaligned variable-length array of bytes)
268 length : the length of the key, counting by bytes
269 initval : can be any 4-byte value
270Returns a 32-bit value. Every bit of the key affects every bit of
271the return value. Two keys differing by one or two bits will have
272totally different hash values.
273
274The best hash table sizes are powers of 2. There is no need to do
275mod a prime (mod is sooo slow!). If you need less than 32 bits,
276use a bitmask. For example, if you need only 10 bits, do
277 h = (h & hashmask(10));
278In which case, the hash table should have hashsize(10) elements.
279
280If you are hashing n strings (uint8_t **)k, do it like this:
281 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
282
283By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
284code any way you wish, private, educational, or commercial. It's free.
285
286Use for hash table lookup, or anything where one collision in 2^^32 is
287acceptable. Do NOT use for cryptographic purposes.
288-------------------------------------------------------------------------------
289*/
290
291uint32_t jenkins_hashlittle( const void *key, size_t length, uint32_t initval)
292{
293 uint32_t a,b,c; /* internal state */
294 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
295
296 /* Set up the internal state */
297 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
298
299 u.ptr = key;
300 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
301 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
302
303 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
304 while (length > 12)
305 {
306 a += k[0];
307 b += k[1];
308 c += k[2];
309 mix(a,b,c);
310 length -= 12;
311 k += 3;
312 }
313
314 /*----------------------------- handle the last (probably partial) block */
315 /*
316 * "k[2]&0xffffff" actually reads beyond the end of the string, but
317 * then masks off the part it's not allowed to read. Because the
318 * string is aligned, the masked-off tail is in the same word as the
319 * rest of the string. Every machine with memory protection I've seen
320 * does it on word boundaries, so is OK with this. But VALGRIND will
321 * still catch it and complain. The masking trick does make the hash
49f43d5f 322 * noticeably faster for short strings (like English words).
87d2c1ff 323 */
7dbe0b72 324#if !defined(VALGRIND) && !defined(__SANITIZE_ADDRESS__)
87d2c1ff
LP
325
326 switch(length)
327 {
328 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
329 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
330 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
331 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
332 case 8 : b+=k[1]; a+=k[0]; break;
333 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
334 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
335 case 5 : b+=k[1]&0xff; a+=k[0]; break;
336 case 4 : a+=k[0]; break;
337 case 3 : a+=k[0]&0xffffff; break;
338 case 2 : a+=k[0]&0xffff; break;
339 case 1 : a+=k[0]&0xff; break;
340 case 0 : return c; /* zero length strings require no mixing */
341 }
342
343#else /* make valgrind happy */
87d2c1ff 344 {
1b4bb4fd
ZJS
345 const uint8_t *k8 = (const uint8_t *) k;
346
347 switch(length)
348 {
349 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
350 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
351 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
352 case 9 : c+=k8[8]; /* fall through */
353 case 8 : b+=k[1]; a+=k[0]; break;
354 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
355 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
356 case 5 : b+=k8[4]; /* fall through */
357 case 4 : a+=k[0]; break;
358 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
359 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
360 case 1 : a+=k8[0]; break;
361 case 0 : return c;
362 }
87d2c1ff
LP
363 }
364
365#endif /* !valgrind */
366
367 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
368 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
369 const uint8_t *k8;
370
371 /*--------------- all but last block: aligned reads and different mixing */
372 while (length > 12)
373 {
374 a += k[0] + (((uint32_t)k[1])<<16);
375 b += k[2] + (((uint32_t)k[3])<<16);
376 c += k[4] + (((uint32_t)k[5])<<16);
377 mix(a,b,c);
378 length -= 12;
379 k += 6;
380 }
381
382 /*----------------------------- handle the last (probably partial) block */
383 k8 = (const uint8_t *)k;
384 switch(length)
385 {
386 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
387 b+=k[2]+(((uint32_t)k[3])<<16);
388 a+=k[0]+(((uint32_t)k[1])<<16);
389 break;
390 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
391 case 10: c+=k[4];
392 b+=k[2]+(((uint32_t)k[3])<<16);
393 a+=k[0]+(((uint32_t)k[1])<<16);
394 break;
395 case 9 : c+=k8[8]; /* fall through */
396 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
397 a+=k[0]+(((uint32_t)k[1])<<16);
398 break;
399 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
400 case 6 : b+=k[2];
401 a+=k[0]+(((uint32_t)k[1])<<16);
402 break;
403 case 5 : b+=k8[4]; /* fall through */
404 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
405 break;
406 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
407 case 2 : a+=k[0];
408 break;
409 case 1 : a+=k8[0];
410 break;
411 case 0 : return c; /* zero length requires no mixing */
412 }
413
414 } else { /* need to read the key one byte at a time */
415 const uint8_t *k = (const uint8_t *)key;
416
417 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
418 while (length > 12)
419 {
420 a += k[0];
421 a += ((uint32_t)k[1])<<8;
422 a += ((uint32_t)k[2])<<16;
423 a += ((uint32_t)k[3])<<24;
424 b += k[4];
425 b += ((uint32_t)k[5])<<8;
426 b += ((uint32_t)k[6])<<16;
427 b += ((uint32_t)k[7])<<24;
428 c += k[8];
429 c += ((uint32_t)k[9])<<8;
430 c += ((uint32_t)k[10])<<16;
431 c += ((uint32_t)k[11])<<24;
432 mix(a,b,c);
433 length -= 12;
434 k += 12;
435 }
436
437 /*-------------------------------- last block: affect all 32 bits of (c) */
438 switch(length) /* all the case statements fall through */
439 {
440 case 12: c+=((uint32_t)k[11])<<24;
441 case 11: c+=((uint32_t)k[10])<<16;
442 case 10: c+=((uint32_t)k[9])<<8;
443 case 9 : c+=k[8];
444 case 8 : b+=((uint32_t)k[7])<<24;
445 case 7 : b+=((uint32_t)k[6])<<16;
446 case 6 : b+=((uint32_t)k[5])<<8;
447 case 5 : b+=k[4];
448 case 4 : a+=((uint32_t)k[3])<<24;
449 case 3 : a+=((uint32_t)k[2])<<16;
450 case 2 : a+=((uint32_t)k[1])<<8;
451 case 1 : a+=k[0];
452 break;
453 case 0 : return c;
454 }
455 }
456
457 final(a,b,c);
458 return c;
459}
460
461
462/*
463 * hashlittle2: return 2 32-bit hash values
464 *
465 * This is identical to hashlittle(), except it returns two 32-bit hash
466 * values instead of just one. This is good enough for hash table
467 * lookup with 2^^64 buckets, or if you want a second hash if you're not
468 * happy with the first, or if you want a probably-unique 64-bit ID for
469 * the key. *pc is better mixed than *pb, so use *pc first. If you want
470 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
471 */
472void jenkins_hashlittle2(
473 const void *key, /* the key to hash */
474 size_t length, /* length of the key */
475 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
476 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
477{
478 uint32_t a,b,c; /* internal state */
479 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
480
481 /* Set up the internal state */
482 a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
483 c += *pb;
484
485 u.ptr = key;
486 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
487 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
488
489 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
490 while (length > 12)
491 {
492 a += k[0];
493 b += k[1];
494 c += k[2];
495 mix(a,b,c);
496 length -= 12;
497 k += 3;
498 }
499
500 /*----------------------------- handle the last (probably partial) block */
501 /*
502 * "k[2]&0xffffff" actually reads beyond the end of the string, but
503 * then masks off the part it's not allowed to read. Because the
504 * string is aligned, the masked-off tail is in the same word as the
505 * rest of the string. Every machine with memory protection I've seen
506 * does it on word boundaries, so is OK with this. But VALGRIND will
507 * still catch it and complain. The masking trick does make the hash
49f43d5f 508 * noticeably faster for short strings (like English words).
87d2c1ff 509 */
7dbe0b72 510#if !defined(VALGRIND) && !defined(__SANITIZE_ADDRESS__)
87d2c1ff
LP
511
512 switch(length)
513 {
514 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
515 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
516 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
517 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
518 case 8 : b+=k[1]; a+=k[0]; break;
519 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
520 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
521 case 5 : b+=k[1]&0xff; a+=k[0]; break;
522 case 4 : a+=k[0]; break;
523 case 3 : a+=k[0]&0xffffff; break;
524 case 2 : a+=k[0]&0xffff; break;
525 case 1 : a+=k[0]&0xff; break;
526 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
527 }
528
529#else /* make valgrind happy */
530
87d2c1ff 531 {
1b4bb4fd
ZJS
532 const uint8_t *k8 = (const uint8_t *)k;
533 switch(length)
534 {
535 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
536 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
537 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
538 case 9 : c+=k8[8]; /* fall through */
539 case 8 : b+=k[1]; a+=k[0]; break;
540 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
541 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
542 case 5 : b+=k8[4]; /* fall through */
543 case 4 : a+=k[0]; break;
544 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
545 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
546 case 1 : a+=k8[0]; break;
547 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
548 }
87d2c1ff
LP
549 }
550
551#endif /* !valgrind */
552
553 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
554 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
555 const uint8_t *k8;
556
557 /*--------------- all but last block: aligned reads and different mixing */
558 while (length > 12)
559 {
560 a += k[0] + (((uint32_t)k[1])<<16);
561 b += k[2] + (((uint32_t)k[3])<<16);
562 c += k[4] + (((uint32_t)k[5])<<16);
563 mix(a,b,c);
564 length -= 12;
565 k += 6;
566 }
567
568 /*----------------------------- handle the last (probably partial) block */
569 k8 = (const uint8_t *)k;
570 switch(length)
571 {
572 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
573 b+=k[2]+(((uint32_t)k[3])<<16);
574 a+=k[0]+(((uint32_t)k[1])<<16);
575 break;
576 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
577 case 10: c+=k[4];
578 b+=k[2]+(((uint32_t)k[3])<<16);
579 a+=k[0]+(((uint32_t)k[1])<<16);
580 break;
581 case 9 : c+=k8[8]; /* fall through */
582 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
583 a+=k[0]+(((uint32_t)k[1])<<16);
584 break;
585 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
586 case 6 : b+=k[2];
587 a+=k[0]+(((uint32_t)k[1])<<16);
588 break;
589 case 5 : b+=k8[4]; /* fall through */
590 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
591 break;
592 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
593 case 2 : a+=k[0];
594 break;
595 case 1 : a+=k8[0];
596 break;
597 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
598 }
599
600 } else { /* need to read the key one byte at a time */
601 const uint8_t *k = (const uint8_t *)key;
602
603 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
604 while (length > 12)
605 {
606 a += k[0];
607 a += ((uint32_t)k[1])<<8;
608 a += ((uint32_t)k[2])<<16;
609 a += ((uint32_t)k[3])<<24;
610 b += k[4];
611 b += ((uint32_t)k[5])<<8;
612 b += ((uint32_t)k[6])<<16;
613 b += ((uint32_t)k[7])<<24;
614 c += k[8];
615 c += ((uint32_t)k[9])<<8;
616 c += ((uint32_t)k[10])<<16;
617 c += ((uint32_t)k[11])<<24;
618 mix(a,b,c);
619 length -= 12;
620 k += 12;
621 }
622
623 /*-------------------------------- last block: affect all 32 bits of (c) */
624 switch(length) /* all the case statements fall through */
625 {
626 case 12: c+=((uint32_t)k[11])<<24;
627 case 11: c+=((uint32_t)k[10])<<16;
628 case 10: c+=((uint32_t)k[9])<<8;
629 case 9 : c+=k[8];
630 case 8 : b+=((uint32_t)k[7])<<24;
631 case 7 : b+=((uint32_t)k[6])<<16;
632 case 6 : b+=((uint32_t)k[5])<<8;
633 case 5 : b+=k[4];
634 case 4 : a+=((uint32_t)k[3])<<24;
635 case 3 : a+=((uint32_t)k[2])<<16;
636 case 2 : a+=((uint32_t)k[1])<<8;
637 case 1 : a+=k[0];
638 break;
639 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
640 }
641 }
642
643 final(a,b,c);
644 *pc=c; *pb=b;
645}
646
647
648
649/*
650 * hashbig():
651 * This is the same as hashword() on big-endian machines. It is different
652 * from hashlittle() on all machines. hashbig() takes advantage of
653 * big-endian byte ordering.
654 */
655uint32_t jenkins_hashbig( const void *key, size_t length, uint32_t initval)
656{
657 uint32_t a,b,c;
658 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
659
660 /* Set up the internal state */
661 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
662
663 u.ptr = key;
664 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
665 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
666
667 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
668 while (length > 12)
669 {
670 a += k[0];
671 b += k[1];
672 c += k[2];
673 mix(a,b,c);
674 length -= 12;
675 k += 3;
676 }
677
678 /*----------------------------- handle the last (probably partial) block */
679 /*
680 * "k[2]<<8" actually reads beyond the end of the string, but
681 * then shifts out the part it's not allowed to read. Because the
682 * string is aligned, the illegal read is in the same word as the
683 * rest of the string. Every machine with memory protection I've seen
684 * does it on word boundaries, so is OK with this. But VALGRIND will
685 * still catch it and complain. The masking trick does make the hash
49f43d5f 686 * noticeably faster for short strings (like English words).
87d2c1ff 687 */
7dbe0b72 688#if !defined(VALGRIND) && !defined(__SANITIZE_ADDRESS__)
87d2c1ff
LP
689
690 switch(length)
691 {
692 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
693 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
694 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
695 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
696 case 8 : b+=k[1]; a+=k[0]; break;
697 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
698 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
699 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
700 case 4 : a+=k[0]; break;
701 case 3 : a+=k[0]&0xffffff00; break;
702 case 2 : a+=k[0]&0xffff0000; break;
703 case 1 : a+=k[0]&0xff000000; break;
704 case 0 : return c; /* zero length strings require no mixing */
705 }
706
707#else /* make valgrind happy */
708
87d2c1ff 709 {
1b4bb4fd
ZJS
710 const uint8_t *k8 = (const uint8_t *)k;
711 switch(length) /* all the case statements fall through */
712 {
713 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
714 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
715 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
716 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
717 case 8 : b+=k[1]; a+=k[0]; break;
718 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
719 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
720 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
721 case 4 : a+=k[0]; break;
722 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
723 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
724 case 1 : a+=((uint32_t)k8[0])<<24; break;
725 case 0 : return c;
726 }
87d2c1ff
LP
727 }
728
729#endif /* !VALGRIND */
730
731 } else { /* need to read the key one byte at a time */
732 const uint8_t *k = (const uint8_t *)key;
733
734 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
735 while (length > 12)
736 {
737 a += ((uint32_t)k[0])<<24;
738 a += ((uint32_t)k[1])<<16;
739 a += ((uint32_t)k[2])<<8;
740 a += ((uint32_t)k[3]);
741 b += ((uint32_t)k[4])<<24;
742 b += ((uint32_t)k[5])<<16;
743 b += ((uint32_t)k[6])<<8;
744 b += ((uint32_t)k[7]);
745 c += ((uint32_t)k[8])<<24;
746 c += ((uint32_t)k[9])<<16;
747 c += ((uint32_t)k[10])<<8;
748 c += ((uint32_t)k[11]);
749 mix(a,b,c);
750 length -= 12;
751 k += 12;
752 }
753
754 /*-------------------------------- last block: affect all 32 bits of (c) */
755 switch(length) /* all the case statements fall through */
756 {
757 case 12: c+=k[11];
758 case 11: c+=((uint32_t)k[10])<<8;
759 case 10: c+=((uint32_t)k[9])<<16;
760 case 9 : c+=((uint32_t)k[8])<<24;
761 case 8 : b+=k[7];
762 case 7 : b+=((uint32_t)k[6])<<8;
763 case 6 : b+=((uint32_t)k[5])<<16;
764 case 5 : b+=((uint32_t)k[4])<<24;
765 case 4 : a+=k[3];
766 case 3 : a+=((uint32_t)k[2])<<8;
767 case 2 : a+=((uint32_t)k[1])<<16;
768 case 1 : a+=((uint32_t)k[0])<<24;
769 break;
770 case 0 : return c;
771 }
772 }
773
774 final(a,b,c);
775 return c;
776}
777
778
779#ifdef SELF_TEST
780
781/* used for timings */
782void driver1()
783{
784 uint8_t buf[256];
785 uint32_t i;
786 uint32_t h=0;
787 time_t a,z;
788
789 time(&a);
790 for (i=0; i<256; ++i) buf[i] = 'x';
791 for (i=0; i<1; ++i)
792 {
793 h = hashlittle(&buf[0],1,h);
794 }
795 time(&z);
796 if (z-a > 0) printf("time %d %.8x\n", z-a, h);
797}
798
799/* check that every input bit changes every output bit half the time */
800#define HASHSTATE 1
801#define HASHLEN 1
802#define MAXPAIR 60
803#define MAXLEN 70
804void driver2()
805{
806 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
807 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
808 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
809 uint32_t x[HASHSTATE],y[HASHSTATE];
810 uint32_t hlen;
811
812 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
813 for (hlen=0; hlen < MAXLEN; ++hlen)
814 {
815 z=0;
816 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
817 {
818 for (j=0; j<8; ++j) /*------------------------ for each input bit, */
819 {
820 for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
821 {
822 for (l=0; l<HASHSTATE; ++l)
823 e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
824
825 /*---- check that every output bit is affected by that input bit */
826 for (k=0; k<MAXPAIR; k+=2)
827 {
828 uint32_t finished=1;
829 /* keys have one bit different */
830 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
831 /* have a and b be two keys differing in only one bit */
832 a[i] ^= (k<<j);
833 a[i] ^= (k>>(8-j));
834 c[0] = hashlittle(a, hlen, m);
835 b[i] ^= ((k+1)<<j);
836 b[i] ^= ((k+1)>>(8-j));
837 d[0] = hashlittle(b, hlen, m);
838 /* check every bit is 1, 0, set, and not set at least once */
839 for (l=0; l<HASHSTATE; ++l)
840 {
841 e[l] &= (c[l]^d[l]);
842 f[l] &= ~(c[l]^d[l]);
843 g[l] &= c[l];
844 h[l] &= ~c[l];
845 x[l] &= d[l];
846 y[l] &= ~d[l];
847 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
848 }
849 if (finished) break;
850 }
851 if (k>z) z=k;
852 if (k==MAXPAIR)
853 {
854 printf("Some bit didn't change: ");
855 printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
856 e[0],f[0],g[0],h[0],x[0],y[0]);
857 printf("i %d j %d m %d len %d\n", i, j, m, hlen);
858 }
859 if (z==MAXPAIR) goto done;
860 }
861 }
862 }
863 done:
864 if (z < MAXPAIR)
865 {
866 printf("Mix success %2d bytes %2d initvals ",i,m);
867 printf("required %d trials\n", z/2);
868 }
869 }
870 printf("\n");
871}
872
873/* Check for reading beyond the end of the buffer and alignment problems */
874void driver3()
875{
876 uint8_t buf[MAXLEN+20], *b;
877 uint32_t len;
878 uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
879 uint32_t h;
880 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
881 uint32_t i;
882 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
883 uint32_t j;
884 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
885 uint32_t ref,x,y;
886 uint8_t *p;
887
888 printf("Endianness. These lines should all be the same (for values filled in):\n");
889 printf("%.8x %.8x %.8x\n",
890 hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
891 hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
892 hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
893 p = q;
894 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
895 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
896 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
897 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
898 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
899 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
900 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
901 p = &qq[1];
902 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
903 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
904 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
905 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
906 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
907 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
908 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
909 p = &qqq[2];
910 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
911 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
912 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
913 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
914 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
915 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
916 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
917 p = &qqqq[3];
918 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
919 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
920 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
921 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
922 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
923 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
924 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
925 printf("\n");
926
927 /* check that hashlittle2 and hashlittle produce the same results */
928 i=47; j=0;
929 hashlittle2(q, sizeof(q), &i, &j);
930 if (hashlittle(q, sizeof(q), 47) != i)
931 printf("hashlittle2 and hashlittle mismatch\n");
932
933 /* check that hashword2 and hashword produce the same results */
934 len = 0xdeadbeef;
935 i=47, j=0;
936 hashword2(&len, 1, &i, &j);
937 if (hashword(&len, 1, 47) != i)
938 printf("hashword2 and hashword mismatch %x %x\n",
939 i, hashword(&len, 1, 47));
940
941 /* check hashlittle doesn't read before or after the ends of the string */
942 for (h=0, b=buf+1; h<8; ++h, ++b)
943 {
944 for (i=0; i<MAXLEN; ++i)
945 {
946 len = i;
947 for (j=0; j<i; ++j) *(b+j)=0;
948
949 /* these should all be equal */
950 ref = hashlittle(b, len, (uint32_t)1);
951 *(b+i)=(uint8_t)~0;
952 *(b-1)=(uint8_t)~0;
953 x = hashlittle(b, len, (uint32_t)1);
954 y = hashlittle(b, len, (uint32_t)1);
955 if ((ref != x) || (ref != y))
956 {
957 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
958 h, i);
959 }
960 }
961 }
962}
963
964/* check for problems with nulls */
965 void driver4()
966{
967 uint8_t buf[1];
968 uint32_t h,i,state[HASHSTATE];
969
970
971 buf[0] = ~0;
972 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
973 printf("These should all be different\n");
974 for (i=0, h=0; i<8; ++i)
975 {
976 h = hashlittle(buf, 0, h);
977 printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
978 }
979}
980
981void driver5()
982{
983 uint32_t b,c;
984 b=0, c=0, hashlittle2("", 0, &c, &b);
985 printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */
986 b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
987 printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */
988 b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
989 printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */
990 b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
991 printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */
992 b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
993 printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */
994 b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
995 printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */
996 c = hashlittle("Four score and seven years ago", 30, 0);
997 printf("hash is %.8lx\n", c); /* 17770551 */
998 c = hashlittle("Four score and seven years ago", 30, 1);
999 printf("hash is %.8lx\n", c); /* cd628161 */
1000}
1001
1002
1003int main()
1004{
1005 driver1(); /* test that the key is hashed: used for timings */
1006 driver2(); /* test that whole key is hashed thoroughly */
1007 driver3(); /* test that nothing but the key is hashed */
1008 driver4(); /* test hashing multiple buffers (all buffers are null) */
1009 driver5(); /* test the hash against known vectors */
1010 return 1;
1011}
1012
1013#endif /* SELF_TEST */