lib/hashtable.c

   1 /*
   2  * This implementation is based on code from uClibc-0.9.30.3 but was
   3  * modified and extended for use within U-Boot.
   4  *
   5  * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
   6  *
   7  * Original license header:
   8  *
   9  * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
  10  * This file is part of the GNU C Library.
  11  * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
  12  *
  13  * The GNU C Library is free software; you can redistribute it and/or
  14  * modify it under the terms of the GNU Lesser General Public
  15  * License as published by the Free Software Foundation; either
  16  * version 2.1 of the License, or (at your option) any later version.
  17  *
  18  * The GNU C Library is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21  * Lesser General Public License for more details.
  22  *
  23  * You should have received a copy of the GNU Lesser General Public
  24  * License along with the GNU C Library; if not, write to the Free
  25  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  26  * 02111-1307 USA.
  27  */
  28
  29 #include <errno.h>
  30 #include <malloc.h>
  31
  32 #ifdef USE_HOSTCC               /* HOST build */
  33 # include <string.h>
  34 # include <assert.h>
  35
  36 # ifndef debug
  37 #  ifdef DEBUG
  38 #   define debug(fmt,args...)   printf(fmt ,##args)
  39 #  else
  40 #   define debug(fmt,args...)
  41 #  endif
  42 # endif
  43 #else                           /* U-Boot build */
  44 # include <common.h>
  45 # include <linux/string.h>
  46 #endif
  47
  48 #include "search.h"
  49
  50 /*
  51  * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
  52  * [Knuth]            The Art of Computer Programming, part 3 (6.4)
  53  */
  54
  55 /*
  56  * The non-reentrant version use a global space for storing the hash table.
  57  */
  58 static struct hsearch_data htab;
  59
  60 /*
  61  * The reentrant version has no static variables to maintain the state.
  62  * Instead the interface of all functions is extended to take an argument
  63  * which describes the current status.
  64  */
  65 typedef struct _ENTRY {
  66         unsigned int used;
  67         ENTRY entry;
  68 } _ENTRY;
  69
  70
  71 /*
  72  * hcreate()
  73  */
  74
  75 /*
  76  * For the used double hash method the table size has to be a prime. To
  77  * correct the user given table size we need a prime test.  This trivial
  78  * algorithm is adequate because
  79  * a)  the code is (most probably) called a few times per program run and
  80  * b)  the number is small because the table must fit in the core
  81  * */
  82 static int isprime(unsigned int number)
  83 {
  84         /* no even number will be passed */
  85         unsigned int div = 3;
  86
  87         while (div * div < number && number % div != 0)
  88                 div += 2;
  89
  90         return number % div != 0;
  91 }
  92
  93 int hcreate(size_t nel)
  94 {
  95         return hcreate_r(nel, &htab);
  96 }
  97
  98 /*
  99  * Before using the hash table we must allocate memory for it.
 100  * Test for an existing table are done. We allocate one element
 101  * more as the found prime number says. This is done for more effective
 102  * indexing as explained in the comment for the hsearch function.
 103  * The contents of the table is zeroed, especially the field used
 104  * becomes zero.
 105  */
 106 int hcreate_r(size_t nel, struct hsearch_data *htab)
 107 {
 108         /* Test for correct arguments.  */
 109         if (htab == NULL) {
 110                 __set_errno(EINVAL);
 111                 return 0;
 112         }
 113
 114         /* There is still another table active. Return with error. */
 115         if (htab->table != NULL)
 116                 return 0;
 117
 118         /* Change nel to the first prime number not smaller as nel. */
 119         nel |= 1;               /* make odd */
 120         while (!isprime(nel))
 121                 nel += 2;
 122
 123         htab->size = nel;
 124         htab->filled = 0;
 125
 126         /* allocate memory and zero out */
 127         htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
 128         if (htab->table == NULL)
 129                 return 0;
 130
 131         /* everything went alright */
 132         return 1;
 133 }
 134
 135
 136 /*
 137  * hdestroy()
 138  */
 139 void hdestroy(void)
 140 {
 141         hdestroy_r(&htab);
 142 }
 143
 144 /*
 145  * After using the hash table it has to be destroyed. The used memory can
 146  * be freed and the local static variable can be marked as not used.
 147  */
 148 void hdestroy_r(struct hsearch_data *htab)
 149 {
 150         int i;
 151
 152         /* Test for correct arguments.  */
 153         if (htab == NULL) {
 154                 __set_errno(EINVAL);
 155                 return;
 156         }
 157
 158         /* free used memory */
 159         for (i = 1; i <= htab->size; ++i) {
 160                 if (htab->table[i].used) {
 161                         ENTRY *ep = &htab->table[i].entry;
 162
 163                         free(ep->key);
 164                         free(ep->data);
 165                 }
 166         }
 167         free(htab->table);
 168
 169         /* the sign for an existing table is an value != NULL in htable */
 170         htab->table = NULL;
 171 }
 172
 173 /*
 174  * hsearch()
 175  */
 176
 177 /*
 178  * This is the search function. It uses double hashing with open addressing.
 179  * The argument item.key has to be a pointer to an zero terminated, most
 180  * probably strings of chars. The function for generating a number of the
 181  * strings is simple but fast. It can be replaced by a more complex function
 182  * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
 183  *
 184  * We use an trick to speed up the lookup. The table is created by hcreate
 185  * with one more element available. This enables us to use the index zero
 186  * special. This index will never be used because we store the first hash
 187  * index in the field used where zero means not used. Every other value
 188  * means used. The used field can be used as a first fast comparison for
 189  * equality of the stored and the parameter value. This helps to prevent
 190  * unnecessary expensive calls of strcmp.
 191  *
 192  * This implementation differs from the standard library version of
 193  * this function in a number of ways:
 194  *
 195  * - While the standard version does not make any assumptions about
 196  *   the type of the stored data objects at all, this implementation
 197  *   works with NUL terminated strings only.
 198  * - Instead of storing just pointers to the original objects, we
 199  *   create local copies so the caller does not need to care about the
 200  *   data any more.
 201  * - The standard implementation does not provide a way to update an
 202  *   existing entry.  This version will create a new entry or update an
 203  *   existing one when both "action == ENTER" and "item.data != NULL".
 204  * - Instead of returning 1 on success, we return the index into the
 205  *   internal hash table, which is also guaranteed to be positive.
 206  *   This allows us direct access to the found hash table slot for
 207  *   example for functions like hdelete().
 208  */
 209
 210 ENTRY *hsearch(ENTRY item, ACTION action)
 211 {
 212         ENTRY *result;
 213
 214         (void) hsearch_r(item, action, &result, &htab);
 215
 216         return result;
 217 }
 218
 219 int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
 220               struct hsearch_data *htab)
 221 {
 222         unsigned int hval;
 223         unsigned int count;
 224         unsigned int len = strlen(item.key);
 225         unsigned int idx;
 226
 227         /* Compute an value for the given string. Perhaps use a better method. */
 228         hval = len;
 229         count = len;
 230         while (count-- > 0) {
 231                 hval <<= 4;
 232                 hval += item.key[count];
 233         }
 234
 235         /*
 236          * First hash function:
 237          * simply take the modul but prevent zero.
 238          */
 239         hval %= htab->size;
 240         if (hval == 0)
 241                 ++hval;
 242
 243         /* The first index tried. */
 244         idx = hval;
 245
 246         if (htab->table[idx].used) {
 247                 /*
 248                  * Further action might be required according to the
 249                  * action value.
 250                  */
 251                 unsigned hval2;
 252
 253                 if (htab->table[idx].used == hval
 254                     && strcmp(item.key, htab->table[idx].entry.key) == 0) {
 255                         /* Overwrite existing value? */
 256                         if ((action == ENTER) && (item.data != NULL)) {
 257                                 free(htab->table[idx].entry.data);
 258                                 htab->table[idx].entry.data =
 259                                         strdup(item.data);
 260                                 if (!htab->table[idx].entry.data) {
 261                                         __set_errno(ENOMEM);
 262                                         *retval = NULL;
 263                                         return 0;
 264                                 }
 265                         }
 266                         /* return found entry */
 267                         *retval = &htab->table[idx].entry;
 268                         return idx;
 269                 }
 270
 271                 /*
 272                  * Second hash function:
 273                  * as suggested in [Knuth]
 274                  */
 275                 hval2 = 1 + hval % (htab->size - 2);
 276
 277                 do {
 278                         /*
 279                          * Because SIZE is prime this guarantees to
 280                          * step through all available indices.
 281                          */
 282                         if (idx <= hval2)
 283                                 idx = htab->size + idx - hval2;
 284                         else
 285                                 idx -= hval2;
 286
 287                         /*
 288                          * If we visited all entries leave the loop
 289                          * unsuccessfully.
 290                          */
 291                         if (idx == hval)
 292                                 break;
 293
 294                         /* If entry is found use it. */
 295                         if ((htab->table[idx].used == hval)
 296                             && strcmp(item.key, htab->table[idx].entry.key) == 0) {
 297                                 /* Overwrite existing value? */
 298                                 if ((action == ENTER) && (item.data != NULL)) {
 299                                         free(htab->table[idx].entry.data);
 300                                         htab->table[idx].entry.data =
 301                                                 strdup(item.data);
 302                                         if (!htab->table[idx].entry.data) {
 303                                                 __set_errno(ENOMEM);
 304                                                 *retval = NULL;
 305                                                 return 0;
 306                                         }
 307                                 }
 308                                 /* return found entry */
 309                                 *retval = &htab->table[idx].entry;
 310                                 return idx;
 311                         }
 312                 }
 313                 while (htab->table[idx].used);
 314         }
 315
 316         /* An empty bucket has been found. */
 317         if (action == ENTER) {
 318                 /*
 319                  * If table is full and another entry should be
 320                  * entered return with error.
 321                  */
 322                 if (htab->filled == htab->size) {
 323                         __set_errno(ENOMEM);
 324                         *retval = NULL;
 325                         return 0;
 326                 }
 327
 328                 /*
 329                  * Create new entry;
 330                  * create copies of item.key and item.data
 331                  */
 332                 htab->table[idx].used = hval;
 333                 htab->table[idx].entry.key = strdup(item.key);
 334                 htab->table[idx].entry.data = strdup(item.data);
 335                 if (!htab->table[idx].entry.key ||
 336                     !htab->table[idx].entry.data) {
 337                         __set_errno(ENOMEM);
 338                         *retval = NULL;
 339                         return 0;
 340                 }
 341
 342                 ++htab->filled;
 343
 344                 /* return new entry */
 345                 *retval = &htab->table[idx].entry;
 346                 return 1;
 347         }
 348
 349         __set_errno(ESRCH);
 350         *retval = NULL;
 351         return 0;
 352 }
 353
 354
 355 /*
 356  * hdelete()
 357  */
 358
 359 /*
 360  * The standard implementation of hsearch(3) does not provide any way
 361  * to delete any entries from the hash table.  We extend the code to
 362  * do that.
 363  */
 364
 365 int hdelete(const char *key)
 366 {
 367         return hdelete_r(key, &htab);
 368 }
 369
 370 int hdelete_r(const char *key, struct hsearch_data *htab)
 371 {
 372         ENTRY e, *ep;
 373         int idx;
 374
 375         debug("hdelete: DELETE key \"%s\"\n", key);
 376
 377         e.key = (char *)key;
 378
 379         if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
 380                 __set_errno(ESRCH);
 381                 return 0;       /* not found */
 382         }
 383
 384         /* free used ENTRY */
 385         debug("hdelete: DELETING key \"%s\"\n", key);
 386
 387         free(ep->key);
 388         free(ep->data);
 389         htab->table[idx].used = 0;
 390
 391         --htab->filled;
 392
 393         return 1;
 394 }
 395
 396 /*
 397  * hexport()
 398  */
 399
 400 /*
 401  * Export the data stored in the hash table in linearized form.
 402  *
 403  * Entries are exported as "name=value" strings, separated by an
 404  * arbitrary (non-NUL, of course) separator character. This allows to
 405  * use this function both when formatting the U-Boot environment for
 406  * external storage (using '\0' as separator), but also when using it
 407  * for the "printenv" command to print all variables, simply by using
 408  * as '\n" as separator. This can also be used for new features like
 409  * exporting the environment data as text file, including the option
 410  * for later re-import.
 411  *
 412  * The entries in the result list will be sorted by ascending key
 413  * values.
 414  *
 415  * If the separator character is different from NUL, then any
 416  * separator characters and backslash characters in the values will
 417  * be escaped by a preceeding backslash in output. This is needed for
 418  * example to enable multi-line values, especially when the output
 419  * shall later be parsed (for example, for re-import).
 420  *
 421  * There are several options how the result buffer is handled:
 422  *
 423  * *resp  size
 424  * -----------
 425  *  NULL    0   A string of sufficient length will be allocated.
 426  *  NULL   >0   A string of the size given will be
 427  *              allocated. An error will be returned if the size is
 428  *              not sufficient.  Any unused bytes in the string will
 429  *              be '\0'-padded.
 430  * !NULL    0   The user-supplied buffer will be used. No length
 431  *              checking will be performed, i. e. it is assumed that
 432  *              the buffer size will always be big enough. DANGEROUS.
 433  * !NULL   >0   The user-supplied buffer will be used. An error will
 434  *              be returned if the size is not sufficient.  Any unused
 435  *              bytes in the string will be '\0'-padded.
 436  */
 437
 438 ssize_t hexport(const char sep, char **resp, size_t size)
 439 {
 440         return hexport_r(&htab, sep, resp, size);
 441 }
 442
 443 static int cmpkey(const void *p1, const void *p2)
 444 {
 445         ENTRY *e1 = *(ENTRY **) p1;
 446         ENTRY *e2 = *(ENTRY **) p2;
 447
 448         return (strcmp(e1->key, e2->key));
 449 }
 450
 451 ssize_t hexport_r(struct hsearch_data *htab, const char sep,
 452                  char **resp, size_t size)
 453 {
 454         ENTRY *list[htab->size];
 455         char *res, *p;
 456         size_t totlen;
 457         int i, n;
 458
 459         /* Test for correct arguments.  */
 460         if ((resp == NULL) || (htab == NULL)) {
 461                 __set_errno(EINVAL);
 462                 return (-1);
 463         }
 464
 465         debug("EXPORT  table = %p, htab.size = %d, htab.filled = %d, size = %d\n",
 466                 htab, htab->size, htab->filled, size);
 467         /*
 468          * Pass 1:
 469          * search used entries,
 470          * save addresses and compute total length
 471          */
 472         for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
 473
 474                 if (htab->table[i].used) {
 475                         ENTRY *ep = &htab->table[i].entry;
 476
 477                         list[n++] = ep;
 478
 479                         totlen += strlen(ep->key) + 2;
 480
 481                         if (sep == '\0') {
 482                                 totlen += strlen(ep->data);
 483                         } else {        /* check if escapes are needed */
 484                                 char *s = ep->data;
 485
 486                                 while (*s) {
 487                                         ++totlen;
 488                                         /* add room for needed escape chars */
 489                                         if ((*s == sep) || (*s == '\\'))
 490                                                 ++totlen;
 491                                         ++s;
 492                                 }
 493                         }
 494                         totlen += 2;    /* for '=' and 'sep' char */
 495                 }
 496         }
 497
 498 #ifdef DEBUG
 499         /* Pass 1a: print unsorted list */
 500         printf("Unsorted: n=%d\n", n);
 501         for (i = 0; i < n; ++i) {
 502                 printf("\t%3d: %p ==> %-10s => %s\n",
 503                        i, list[i], list[i]->key, list[i]->data);
 504         }
 505 #endif
 506
 507         /* Sort list by keys */
 508         qsort(list, n, sizeof(ENTRY *), cmpkey);
 509
 510         /* Check if the user supplied buffer size is sufficient */
 511         if (size) {
 512                 if (size < totlen + 1) {        /* provided buffer too small */
 513                         debug("### buffer too small: %d, but need %d\n",
 514                                 size, totlen + 1);
 515                         __set_errno(ENOMEM);
 516                         return (-1);
 517                 }
 518         } else {
 519                 size = totlen + 1;
 520         }
 521
 522         /* Check if the user provided a buffer */
 523         if (*resp) {
 524                 /* yes; clear it */
 525                 res = *resp;
 526                 memset(res, '\0', size);
 527         } else {
 528                 /* no, allocate and clear one */
 529                 *resp = res = calloc(1, size);
 530                 if (res == NULL) {
 531                         __set_errno(ENOMEM);
 532                         return (-1);
 533                 }
 534         }
 535         /*
 536          * Pass 2:
 537          * export sorted list of result data
 538          */
 539         for (i = 0, p = res; i < n; ++i) {
 540                 char *s;
 541
 542                 s = list[i]->key;
 543                 while (*s)
 544                         *p++ = *s++;
 545                 *p++ = '=';
 546
 547                 s = list[i]->data;
 548
 549                 while (*s) {
 550                         if ((*s == sep) || (*s == '\\'))
 551                                 *p++ = '\\';    /* escape */
 552                         *p++ = *s++;
 553                 }
 554                 *p++ = sep;
 555         }
 556         *p = '\0';              /* terminate result */
 557
 558         return size;
 559 }
 560
 561
 562 /*
 563  * himport()
 564  */
 565
 566 /*
 567  * Import linearized data into hash table.
 568  *
 569  * This is the inverse function to hexport(): it takes a linear list
 570  * of "name=value" pairs and creates hash table entries from it.
 571  *
 572  * Entries without "value", i. e. consisting of only "name" or
 573  * "name=", will cause this entry to be deleted from the hash table.
 574  *
 575  * The "flag" argument can be used to control the behaviour: when the
 576  * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
 577  * new data will be added to an existing hash table; otherwise, old
 578  * data will be discarded and a new hash table will be created.
 579  *
 580  * The separator character for the "name=value" pairs can be selected,
 581  * so we both support importing from externally stored environment
 582  * data (separated by NUL characters) and from plain text files
 583  * (entries separated by newline characters).
 584  *
 585  * To allow for nicely formatted text input, leading white space
 586  * (sequences of SPACE and TAB chars) is ignored, and entries starting
 587  * (after removal of any leading white space) with a '#' character are
 588  * considered comments and ignored.
 589  *
 590  * [NOTE: this means that a variable name cannot start with a '#'
 591  * character.]
 592  *
 593  * When using a non-NUL separator character, backslash is used as
 594  * escape character in the value part, allowing for example for
 595  * multi-line values.
 596  *
 597  * In theory, arbitrary separator characters can be used, but only
 598  * '\0' and '\n' have really been tested.
 599  */
 600
 601 int himport(const char *env, size_t size, const char sep, int flag)
 602 {
 603         return himport_r(&htab, env, size, sep, flag);
 604 }
 605
 606 int himport_r(struct hsearch_data *htab,
 607               const char *env, size_t size, const char sep, int flag)
 608 {
 609         char *data, *sp, *dp, *name, *value;
 610
 611         /* Test for correct arguments.  */
 612         if (htab == NULL) {
 613                 __set_errno(EINVAL);
 614                 return 0;
 615         }
 616
 617         /* we allocate new space to make sure we can write to the array */
 618         if ((data = malloc(size)) == NULL) {
 619                 debug("himport_r: can't malloc %d bytes\n", size);
 620                 __set_errno(ENOMEM);
 621                 return 0;
 622         }
 623         memcpy(data, env, size);
 624         dp = data;
 625
 626         if ((flag & H_NOCLEAR) == 0) {
 627                 /* Destroy old hash table if one exists */
 628                 debug("Destroy Hash Table: %p table = %p\n", htab,
 629                        htab->table);
 630                 if (htab->table)
 631                         hdestroy_r(htab);
 632         }
 633
 634         /*
 635          * Create new hash table (if needed).  The computation of the hash
 636          * table size is based on heuristics: in a sample of some 70+
 637          * existing systems we found an average size of 39+ bytes per entry
 638          * in the environment (for the whole key=value pair). Assuming a
 639          * size of 7 per entry (= safety factor of >5) should provide enough
 640          * safety margin for any existing environment definitons and still
 641          * allow for more than enough dynamic additions. Note that the
 642          * "size" argument is supposed to give the maximum enviroment size
 643          * (CONFIG_ENV_SIZE).
 644          */
 645
 646         if (!htab->table) {
 647                 int nent = size / 7;
 648
 649                 debug("Create Hash Table: N=%d\n", nent);
 650
 651                 if (hcreate_r(nent, htab) == 0) {
 652                         free(data);
 653                         return 0;
 654                 }
 655         }
 656
 657         /* Parse environment; allow for '\0' and 'sep' as separators */
 658         do {
 659                 ENTRY e, *rv;
 660
 661                 /* skip leading white space */
 662                 while ((*dp == ' ') || (*dp == '\t'))
 663                         ++dp;
 664
 665                 /* skip comment lines */
 666                 if (*dp == '#') {
 667                         while (*dp && (*dp != sep))
 668                                 ++dp;
 669                         ++dp;
 670                         continue;
 671                 }
 672
 673                 /* parse name */
 674                 for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
 675                         ;
 676
 677                 /* deal with "name" and "name=" entries (delete var) */
 678                 if (*dp == '\0' || *(dp + 1) == '\0' ||
 679                     *dp == sep || *(dp + 1) == sep) {
 680                         if (*dp == '=')
 681                                 *dp++ = '\0';
 682                         *dp++ = '\0';   /* terminate name */
 683
 684                         debug("DELETE CANDIDATE: \"%s\"\n", name);
 685
 686                         if (hdelete_r(name, htab) == 0)
 687                                 debug("DELETE ERROR ##############################\n");
 688
 689                         continue;
 690                 }
 691                 *dp++ = '\0';   /* terminate name */
 692
 693                 /* parse value; deal with escapes */
 694                 for (value = sp = dp; *dp && (*dp != sep); ++dp) {
 695                         if ((*dp == '\\') && *(dp + 1))
 696                                 ++dp;
 697                         *sp++ = *dp;
 698                 }
 699                 *sp++ = '\0';   /* terminate value */
 700                 ++dp;
 701
 702                 /* enter into hash table */
 703                 e.key = name;
 704                 e.data = value;
 705
 706                 hsearch_r(e, ENTER, &rv, htab);
 707                 if (rv == NULL) {
 708                         printf("himport_r: can't insert \"%s=%s\" into hash table\n", name, value);
 709                         return 0;
 710                 }
 711
 712                 debug("INSERT: %p ==> name=\"%s\" value=\"%s\"\n", rv, name,
 713                        value);
 714                 debug("        table = %p, size = %d, filled = %d\n", htab,
 715                        htab->size, htab->filled);
 716         } while ((dp < data + size) && *dp);    /* size check needed for text */
 717                                                 /* without '\0' termination */
 718         free(data);
 719
 720         return 1;               /* everything OK */
 721 }