]> git.ipfire.org Git - people/ms/u-boot.git/blame - lib/hashtable.c
Add hash table support as base for new environment code
[people/ms/u-boot.git] / lib / hashtable.c
CommitLineData
a6826fbc
WD
1/*
2 * This implementation is based on code from uClibc-0.9.30.3 but was
3 * modified and extended for use within U-Boot.
4 *
5 * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
6 *
7 * Original license header:
8 *
9 * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
10 * This file is part of the GNU C Library.
11 * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
12 *
13 * The GNU C Library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2.1 of the License, or (at your option) any later version.
17 *
18 * The GNU C Library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
22 *
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with the GNU C Library; if not, write to the Free
25 * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 * 02111-1307 USA.
27 */
28
29#include <errno.h>
30#include <malloc.h>
31
32#ifdef USE_HOSTCC /* HOST build */
33# include <string.h>
34# include <assert.h>
35
36# ifndef debug
37# ifdef DEBUG
38# define debug(fmt,args...) printf(fmt ,##args)
39# else
40# define debug(fmt,args...)
41# endif
42# endif
43#else /* U-Boot build */
44# include <common.h>
45# include <linux/string.h>
46#endif
47
48#include "search.h"
49
50/*
51 * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
52 * [Knuth] The Art of Computer Programming, part 3 (6.4)
53 */
54
55/*
56 * The non-reentrant version use a global space for storing the hash table.
57 */
58static struct hsearch_data htab;
59
60/*
61 * The reentrant version has no static variables to maintain the state.
62 * Instead the interface of all functions is extended to take an argument
63 * which describes the current status.
64 */
65typedef struct _ENTRY {
66 unsigned int used;
67 ENTRY entry;
68} _ENTRY;
69
70
71/*
72 * hcreate()
73 */
74
75/*
76 * For the used double hash method the table size has to be a prime. To
77 * correct the user given table size we need a prime test. This trivial
78 * algorithm is adequate because
79 * a) the code is (most probably) called a few times per program run and
80 * b) the number is small because the table must fit in the core
81 * */
82static int isprime(unsigned int number)
83{
84 /* no even number will be passed */
85 unsigned int div = 3;
86
87 while (div * div < number && number % div != 0)
88 div += 2;
89
90 return number % div != 0;
91}
92
93int hcreate(size_t nel)
94{
95 return hcreate_r(nel, &htab);
96}
97
98/*
99 * Before using the hash table we must allocate memory for it.
100 * Test for an existing table are done. We allocate one element
101 * more as the found prime number says. This is done for more effective
102 * indexing as explained in the comment for the hsearch function.
103 * The contents of the table is zeroed, especially the field used
104 * becomes zero.
105 */
106int hcreate_r(size_t nel, struct hsearch_data *htab)
107{
108 /* Test for correct arguments. */
109 if (htab == NULL) {
110 __set_errno(EINVAL);
111 return 0;
112 }
113
114 /* There is still another table active. Return with error. */
115 if (htab->table != NULL)
116 return 0;
117
118 /* Change nel to the first prime number not smaller as nel. */
119 nel |= 1; /* make odd */
120 while (!isprime(nel))
121 nel += 2;
122
123 htab->size = nel;
124 htab->filled = 0;
125
126 /* allocate memory and zero out */
127 htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
128 if (htab->table == NULL)
129 return 0;
130
131 /* everything went alright */
132 return 1;
133}
134
135
136/*
137 * hdestroy()
138 */
139void hdestroy(void)
140{
141 hdestroy_r(&htab);
142}
143
144/*
145 * After using the hash table it has to be destroyed. The used memory can
146 * be freed and the local static variable can be marked as not used.
147 */
148void hdestroy_r(struct hsearch_data *htab)
149{
150 int i;
151
152 /* Test for correct arguments. */
153 if (htab == NULL) {
154 __set_errno(EINVAL);
155 return;
156 }
157
158 /* free used memory */
159 for (i = 1; i <= htab->size; ++i) {
160 if (htab->table[i].used) {
161 ENTRY *ep = &htab->table[i].entry;
162
163 free(ep->key);
164 free(ep->data);
165 }
166 }
167 free(htab->table);
168
169 /* the sign for an existing table is an value != NULL in htable */
170 htab->table = NULL;
171}
172
173/*
174 * hsearch()
175 */
176
177/*
178 * This is the search function. It uses double hashing with open addressing.
179 * The argument item.key has to be a pointer to an zero terminated, most
180 * probably strings of chars. The function for generating a number of the
181 * strings is simple but fast. It can be replaced by a more complex function
182 * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
183 *
184 * We use an trick to speed up the lookup. The table is created by hcreate
185 * with one more element available. This enables us to use the index zero
186 * special. This index will never be used because we store the first hash
187 * index in the field used where zero means not used. Every other value
188 * means used. The used field can be used as a first fast comparison for
189 * equality of the stored and the parameter value. This helps to prevent
190 * unnecessary expensive calls of strcmp.
191 *
192 * This implementation differs from the standard library version of
193 * this function in a number of ways:
194 *
195 * - While the standard version does not make any assumptions about
196 * the type of the stored data objects at all, this implementation
197 * works with NUL terminated strings only.
198 * - Instead of storing just pointers to the original objects, we
199 * create local copies so the caller does not need to care about the
200 * data any more.
201 * - The standard implementation does not provide a way to update an
202 * existing entry. This version will create a new entry or update an
203 * existing one when both "action == ENTER" and "item.data != NULL".
204 * - Instead of returning 1 on success, we return the index into the
205 * internal hash table, which is also guaranteed to be positive.
206 * This allows us direct access to the found hash table slot for
207 * example for functions like hdelete().
208 */
209
210ENTRY *hsearch(ENTRY item, ACTION action)
211{
212 ENTRY *result;
213
214 (void) hsearch_r(item, action, &result, &htab);
215
216 return result;
217}
218
219int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
220 struct hsearch_data *htab)
221{
222 unsigned int hval;
223 unsigned int count;
224 unsigned int len = strlen(item.key);
225 unsigned int idx;
226
227 /* Compute an value for the given string. Perhaps use a better method. */
228 hval = len;
229 count = len;
230 while (count-- > 0) {
231 hval <<= 4;
232 hval += item.key[count];
233 }
234
235 /*
236 * First hash function:
237 * simply take the modul but prevent zero.
238 */
239 hval %= htab->size;
240 if (hval == 0)
241 ++hval;
242
243 /* The first index tried. */
244 idx = hval;
245
246 if (htab->table[idx].used) {
247 /*
248 * Further action might be required according to the
249 * action value.
250 */
251 unsigned hval2;
252
253 if (htab->table[idx].used == hval
254 && strcmp(item.key, htab->table[idx].entry.key) == 0) {
255 /* Overwrite existing value? */
256 if ((action == ENTER) && (item.data != NULL)) {
257 free(htab->table[idx].entry.data);
258 htab->table[idx].entry.data =
259 strdup(item.data);
260 if (!htab->table[idx].entry.data) {
261 __set_errno(ENOMEM);
262 *retval = NULL;
263 return 0;
264 }
265 }
266 /* return found entry */
267 *retval = &htab->table[idx].entry;
268 return idx;
269 }
270
271 /*
272 * Second hash function:
273 * as suggested in [Knuth]
274 */
275 hval2 = 1 + hval % (htab->size - 2);
276
277 do {
278 /*
279 * Because SIZE is prime this guarantees to
280 * step through all available indices.
281 */
282 if (idx <= hval2)
283 idx = htab->size + idx - hval2;
284 else
285 idx -= hval2;
286
287 /*
288 * If we visited all entries leave the loop
289 * unsuccessfully.
290 */
291 if (idx == hval)
292 break;
293
294 /* If entry is found use it. */
295 if ((htab->table[idx].used == hval)
296 && strcmp(item.key, htab->table[idx].entry.key) == 0) {
297 /* Overwrite existing value? */
298 if ((action == ENTER) && (item.data != NULL)) {
299 free(htab->table[idx].entry.data);
300 htab->table[idx].entry.data =
301 strdup(item.data);
302 if (!htab->table[idx].entry.data) {
303 __set_errno(ENOMEM);
304 *retval = NULL;
305 return 0;
306 }
307 }
308 /* return found entry */
309 *retval = &htab->table[idx].entry;
310 return idx;
311 }
312 }
313 while (htab->table[idx].used);
314 }
315
316 /* An empty bucket has been found. */
317 if (action == ENTER) {
318 /*
319 * If table is full and another entry should be
320 * entered return with error.
321 */
322 if (htab->filled == htab->size) {
323 __set_errno(ENOMEM);
324 *retval = NULL;
325 return 0;
326 }
327
328 /*
329 * Create new entry;
330 * create copies of item.key and item.data
331 */
332 htab->table[idx].used = hval;
333 htab->table[idx].entry.key = strdup(item.key);
334 htab->table[idx].entry.data = strdup(item.data);
335 if (!htab->table[idx].entry.key ||
336 !htab->table[idx].entry.data) {
337 __set_errno(ENOMEM);
338 *retval = NULL;
339 return 0;
340 }
341
342 ++htab->filled;
343
344 /* return new entry */
345 *retval = &htab->table[idx].entry;
346 return 1;
347 }
348
349 __set_errno(ESRCH);
350 *retval = NULL;
351 return 0;
352}
353
354
355/*
356 * hdelete()
357 */
358
359/*
360 * The standard implementation of hsearch(3) does not provide any way
361 * to delete any entries from the hash table. We extend the code to
362 * do that.
363 */
364
365int hdelete(const char *key)
366{
367 return hdelete_r(key, &htab);
368}
369
370int hdelete_r(const char *key, struct hsearch_data *htab)
371{
372 ENTRY e, *ep;
373 int idx;
374
375 debug("hdelete: DELETE key \"%s\"\n", key);
376
377 e.key = (char *)key;
378
379 if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
380 __set_errno(ESRCH);
381 return 0; /* not found */
382 }
383
384 /* free used ENTRY */
385 debug("hdelete: DELETING key \"%s\"\n", key);
386
387 free(ep->key);
388 free(ep->data);
389 htab->table[idx].used = 0;
390
391 --htab->filled;
392
393 return 1;
394}
395
396/*
397 * hexport()
398 */
399
400/*
401 * Export the data stored in the hash table in linearized form.
402 *
403 * Entries are exported as "name=value" strings, separated by an
404 * arbitrary (non-NUL, of course) separator character. This allows to
405 * use this function both when formatting the U-Boot environment for
406 * external storage (using '\0' as separator), but also when using it
407 * for the "printenv" command to print all variables, simply by using
408 * as '\n" as separator. This can also be used for new features like
409 * exporting the environment data as text file, including the option
410 * for later re-import.
411 *
412 * The entries in the result list will be sorted by ascending key
413 * values.
414 *
415 * If the separator character is different from NUL, then any
416 * separator characters and backslash characters in the values will
417 * be escaped by a preceeding backslash in output. This is needed for
418 * example to enable multi-line values, especially when the output
419 * shall later be parsed (for example, for re-import).
420 *
421 * There are several options how the result buffer is handled:
422 *
423 * *resp size
424 * -----------
425 * NULL 0 A string of sufficient length will be allocated.
426 * NULL >0 A string of the size given will be
427 * allocated. An error will be returned if the size is
428 * not sufficient. Any unused bytes in the string will
429 * be '\0'-padded.
430 * !NULL 0 The user-supplied buffer will be used. No length
431 * checking will be performed, i. e. it is assumed that
432 * the buffer size will always be big enough. DANGEROUS.
433 * !NULL >0 The user-supplied buffer will be used. An error will
434 * be returned if the size is not sufficient. Any unused
435 * bytes in the string will be '\0'-padded.
436 */
437
438ssize_t hexport(const char sep, char **resp, size_t size)
439{
440 return hexport_r(&htab, sep, resp, size);
441}
442
443static int cmpkey(const void *p1, const void *p2)
444{
445 ENTRY *e1 = *(ENTRY **) p1;
446 ENTRY *e2 = *(ENTRY **) p2;
447
448 return (strcmp(e1->key, e2->key));
449}
450
451ssize_t hexport_r(struct hsearch_data *htab, const char sep,
452 char **resp, size_t size)
453{
454 ENTRY *list[htab->size];
455 char *res, *p;
456 size_t totlen;
457 int i, n;
458
459 /* Test for correct arguments. */
460 if ((resp == NULL) || (htab == NULL)) {
461 __set_errno(EINVAL);
462 return (-1);
463 }
464
465 debug("EXPORT table = %p, htab.size = %d, htab.filled = %d, size = %d\n",
466 htab, htab->size, htab->filled, size);
467 /*
468 * Pass 1:
469 * search used entries,
470 * save addresses and compute total length
471 */
472 for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
473
474 if (htab->table[i].used) {
475 ENTRY *ep = &htab->table[i].entry;
476
477 list[n++] = ep;
478
479 totlen += strlen(ep->key) + 2;
480
481 if (sep == '\0') {
482 totlen += strlen(ep->data);
483 } else { /* check if escapes are needed */
484 char *s = ep->data;
485
486 while (*s) {
487 ++totlen;
488 /* add room for needed escape chars */
489 if ((*s == sep) || (*s == '\\'))
490 ++totlen;
491 ++s;
492 }
493 }
494 totlen += 2; /* for '=' and 'sep' char */
495 }
496 }
497
498#ifdef DEBUG
499 /* Pass 1a: print unsorted list */
500 printf("Unsorted: n=%d\n", n);
501 for (i = 0; i < n; ++i) {
502 printf("\t%3d: %p ==> %-10s => %s\n",
503 i, list[i], list[i]->key, list[i]->data);
504 }
505#endif
506
507 /* Sort list by keys */
508 qsort(list, n, sizeof(ENTRY *), cmpkey);
509
510 /* Check if the user supplied buffer size is sufficient */
511 if (size) {
512 if (size < totlen + 1) { /* provided buffer too small */
513 debug("### buffer too small: %d, but need %d\n",
514 size, totlen + 1);
515 __set_errno(ENOMEM);
516 return (-1);
517 }
518 } else {
519 size = totlen + 1;
520 }
521
522 /* Check if the user provided a buffer */
523 if (*resp) {
524 /* yes; clear it */
525 res = *resp;
526 memset(res, '\0', size);
527 } else {
528 /* no, allocate and clear one */
529 *resp = res = calloc(1, size);
530 if (res == NULL) {
531 __set_errno(ENOMEM);
532 return (-1);
533 }
534 }
535 /*
536 * Pass 2:
537 * export sorted list of result data
538 */
539 for (i = 0, p = res; i < n; ++i) {
540 char *s;
541
542 s = list[i]->key;
543 while (*s)
544 *p++ = *s++;
545 *p++ = '=';
546
547 s = list[i]->data;
548
549 while (*s) {
550 if ((*s == sep) || (*s == '\\'))
551 *p++ = '\\'; /* escape */
552 *p++ = *s++;
553 }
554 *p++ = sep;
555 }
556 *p = '\0'; /* terminate result */
557
558 return size;
559}
560
561
562/*
563 * himport()
564 */
565
566/*
567 * Import linearized data into hash table.
568 *
569 * This is the inverse function to hexport(): it takes a linear list
570 * of "name=value" pairs and creates hash table entries from it.
571 *
572 * Entries without "value", i. e. consisting of only "name" or
573 * "name=", will cause this entry to be deleted from the hash table.
574 *
575 * The "flag" argument can be used to control the behaviour: when the
576 * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
577 * new data will be added to an existing hash table; otherwise, old
578 * data will be discarded and a new hash table will be created.
579 *
580 * The separator character for the "name=value" pairs can be selected,
581 * so we both support importing from externally stored environment
582 * data (separated by NUL characters) and from plain text files
583 * (entries separated by newline characters).
584 *
585 * To allow for nicely formatted text input, leading white space
586 * (sequences of SPACE and TAB chars) is ignored, and entries starting
587 * (after removal of any leading white space) with a '#' character are
588 * considered comments and ignored.
589 *
590 * [NOTE: this means that a variable name cannot start with a '#'
591 * character.]
592 *
593 * When using a non-NUL separator character, backslash is used as
594 * escape character in the value part, allowing for example for
595 * multi-line values.
596 *
597 * In theory, arbitrary separator characters can be used, but only
598 * '\0' and '\n' have really been tested.
599 */
600
601int himport(const char *env, size_t size, const char sep, int flag)
602{
603 return himport_r(&htab, env, size, sep, flag);
604}
605
606int himport_r(struct hsearch_data *htab,
607 const char *env, size_t size, const char sep, int flag)
608{
609 char *data, *sp, *dp, *name, *value;
610
611 /* Test for correct arguments. */
612 if (htab == NULL) {
613 __set_errno(EINVAL);
614 return 0;
615 }
616
617 /* we allocate new space to make sure we can write to the array */
618 if ((data = malloc(size)) == NULL) {
619 debug("himport_r: can't malloc %d bytes\n", size);
620 __set_errno(ENOMEM);
621 return 0;
622 }
623 memcpy(data, env, size);
624 dp = data;
625
626 if ((flag & H_NOCLEAR) == 0) {
627 /* Destroy old hash table if one exists */
628 debug("Destroy Hash Table: %p table = %p\n", htab,
629 htab->table);
630 if (htab->table)
631 hdestroy_r(htab);
632 }
633
634 /*
635 * Create new hash table (if needed). The computation of the hash
636 * table size is based on heuristics: in a sample of some 70+
637 * existing systems we found an average size of 39+ bytes per entry
638 * in the environment (for the whole key=value pair). Assuming a
639 * size of 7 per entry (= safety factor of >5) should provide enough
640 * safety margin for any existing environment definitons and still
641 * allow for more than enough dynamic additions. Note that the
642 * "size" argument is supposed to give the maximum enviroment size
643 * (CONFIG_ENV_SIZE).
644 */
645
646 if (!htab->table) {
647 int nent = size / 7;
648
649 debug("Create Hash Table: N=%d\n", nent);
650
651 if (hcreate_r(nent, htab) == 0) {
652 free(data);
653 return 0;
654 }
655 }
656
657 /* Parse environment; allow for '\0' and 'sep' as separators */
658 do {
659 ENTRY e, *rv;
660
661 /* skip leading white space */
662 while ((*dp == ' ') || (*dp == '\t'))
663 ++dp;
664
665 /* skip comment lines */
666 if (*dp == '#') {
667 while (*dp && (*dp != sep))
668 ++dp;
669 ++dp;
670 continue;
671 }
672
673 /* parse name */
674 for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
675 ;
676
677 /* deal with "name" and "name=" entries (delete var) */
678 if (*dp == '\0' || *(dp + 1) == '\0' ||
679 *dp == sep || *(dp + 1) == sep) {
680 if (*dp == '=')
681 *dp++ = '\0';
682 *dp++ = '\0'; /* terminate name */
683
684 debug("DELETE CANDIDATE: \"%s\"\n", name);
685
686 if (hdelete_r(name, htab) == 0)
687 debug("DELETE ERROR ##############################\n");
688
689 continue;
690 }
691 *dp++ = '\0'; /* terminate name */
692
693 /* parse value; deal with escapes */
694 for (value = sp = dp; *dp && (*dp != sep); ++dp) {
695 if ((*dp == '\\') && *(dp + 1))
696 ++dp;
697 *sp++ = *dp;
698 }
699 *sp++ = '\0'; /* terminate value */
700 ++dp;
701
702 /* enter into hash table */
703 e.key = name;
704 e.data = value;
705
706 hsearch_r(e, ENTER, &rv, htab);
707 if (rv == NULL) {
708 printf("himport_r: can't insert \"%s=%s\" into hash table\n", name, value);
709 return 0;
710 }
711
712 debug("INSERT: %p ==> name=\"%s\" value=\"%s\"\n", rv, name,
713 value);
714 debug(" table = %p, size = %d, filled = %d\n", htab,
715 htab->size, htab->filled);
716 } while ((dp < data + size) && *dp); /* size check needed for text */
717 /* without '\0' termination */
718 free(data);
719
720 return 1; /* everything OK */
721}