lib/hashtable.c

   1 /*
   2  * This implementation is based on code from uClibc-0.9.30.3 but was
   3  * modified and extended for use within U-Boot.
   4  *
   5  * Copyright (C) 2010 Wolfgang Denk <wd@denx.de>
   6  *
   7  * Original license header:
   8  *
   9  * Copyright (C) 1993, 1995, 1996, 1997, 2002 Free Software Foundation, Inc.
  10  * This file is part of the GNU C Library.
  11  * Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1993.
  12  *
  13  * The GNU C Library is free software; you can redistribute it and/or
  14  * modify it under the terms of the GNU Lesser General Public
  15  * License as published by the Free Software Foundation; either
  16  * version 2.1 of the License, or (at your option) any later version.
  17  *
  18  * The GNU C Library is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21  * Lesser General Public License for more details.
  22  *
  23  * You should have received a copy of the GNU Lesser General Public
  24  * License along with the GNU C Library; if not, write to the Free
  25  * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  26  * 02111-1307 USA.
  27  */
  28
  29 #include <errno.h>
  30 #include <malloc.h>
  31
  32 #ifdef USE_HOSTCC               /* HOST build */
  33 # include <string.h>
  34 # include <assert.h>
  35
  36 # ifndef debug
  37 #  ifdef DEBUG
  38 #   define debug(fmt,args...)   printf(fmt ,##args)
  39 #  else
  40 #   define debug(fmt,args...)
  41 #  endif
  42 # endif
  43 #else                           /* U-Boot build */
  44 # include <common.h>
  45 # include <linux/string.h>
  46 #endif
  47
  48 #ifndef CONFIG_ENV_MIN_ENTRIES  /* minimum number of entries */
  49 #define CONFIG_ENV_MIN_ENTRIES 64
  50 #endif
  51 #ifndef CONFIG_ENV_MAX_ENTRIES  /* maximum number of entries */
  52 #define CONFIG_ENV_MAX_ENTRIES 512
  53 #endif
  54
  55 #include "search.h"
  56
  57 /*
  58  * [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
  59  * [Knuth]            The Art of Computer Programming, part 3 (6.4)
  60  */
  61
  62 /*
  63  * The reentrant version has no static variables to maintain the state.
  64  * Instead the interface of all functions is extended to take an argument
  65  * which describes the current status.
  66  */
  67 typedef struct _ENTRY {
  68         unsigned int used;
  69         ENTRY entry;
  70 } _ENTRY;
  71
  72
  73 /*
  74  * hcreate()
  75  */
  76
  77 /*
  78  * For the used double hash method the table size has to be a prime. To
  79  * correct the user given table size we need a prime test.  This trivial
  80  * algorithm is adequate because
  81  * a)  the code is (most probably) called a few times per program run and
  82  * b)  the number is small because the table must fit in the core
  83  * */
  84 static int isprime(unsigned int number)
  85 {
  86         /* no even number will be passed */
  87         unsigned int div = 3;
  88
  89         while (div * div < number && number % div != 0)
  90                 div += 2;
  91
  92         return number % div != 0;
  93 }
  94
  95 /*
  96  * Before using the hash table we must allocate memory for it.
  97  * Test for an existing table are done. We allocate one element
  98  * more as the found prime number says. This is done for more effective
  99  * indexing as explained in the comment for the hsearch function.
 100  * The contents of the table is zeroed, especially the field used
 101  * becomes zero.
 102  */
 103
 104 int hcreate_r(size_t nel, struct hsearch_data *htab)
 105 {
 106         /* Test for correct arguments.  */
 107         if (htab == NULL) {
 108                 __set_errno(EINVAL);
 109                 return 0;
 110         }
 111
 112         /* There is still another table active. Return with error. */
 113         if (htab->table != NULL)
 114                 return 0;
 115
 116         /* Change nel to the first prime number not smaller as nel. */
 117         nel |= 1;               /* make odd */
 118         while (!isprime(nel))
 119                 nel += 2;
 120
 121         htab->size = nel;
 122         htab->filled = 0;
 123
 124         /* allocate memory and zero out */
 125         htab->table = (_ENTRY *) calloc(htab->size + 1, sizeof(_ENTRY));
 126         if (htab->table == NULL)
 127                 return 0;
 128
 129         /* everything went alright */
 130         return 1;
 131 }
 132
 133
 134 /*
 135  * hdestroy()
 136  */
 137
 138 /*
 139  * After using the hash table it has to be destroyed. The used memory can
 140  * be freed and the local static variable can be marked as not used.
 141  */
 142
 143 void hdestroy_r(struct hsearch_data *htab)
 144 {
 145         int i;
 146
 147         /* Test for correct arguments.  */
 148         if (htab == NULL) {
 149                 __set_errno(EINVAL);
 150                 return;
 151         }
 152
 153         /* free used memory */
 154         for (i = 1; i <= htab->size; ++i) {
 155                 if (htab->table[i].used) {
 156                         ENTRY *ep = &htab->table[i].entry;
 157
 158                         free(ep->key);
 159                         free(ep->data);
 160                 }
 161         }
 162         free(htab->table);
 163
 164         /* the sign for an existing table is an value != NULL in htable */
 165         htab->table = NULL;
 166 }
 167
 168 /*
 169  * hsearch()
 170  */
 171
 172 /*
 173  * This is the search function. It uses double hashing with open addressing.
 174  * The argument item.key has to be a pointer to an zero terminated, most
 175  * probably strings of chars. The function for generating a number of the
 176  * strings is simple but fast. It can be replaced by a more complex function
 177  * like ajw (see [Aho,Sethi,Ullman]) if the needs are shown.
 178  *
 179  * We use an trick to speed up the lookup. The table is created by hcreate
 180  * with one more element available. This enables us to use the index zero
 181  * special. This index will never be used because we store the first hash
 182  * index in the field used where zero means not used. Every other value
 183  * means used. The used field can be used as a first fast comparison for
 184  * equality of the stored and the parameter value. This helps to prevent
 185  * unnecessary expensive calls of strcmp.
 186  *
 187  * This implementation differs from the standard library version of
 188  * this function in a number of ways:
 189  *
 190  * - While the standard version does not make any assumptions about
 191  *   the type of the stored data objects at all, this implementation
 192  *   works with NUL terminated strings only.
 193  * - Instead of storing just pointers to the original objects, we
 194  *   create local copies so the caller does not need to care about the
 195  *   data any more.
 196  * - The standard implementation does not provide a way to update an
 197  *   existing entry.  This version will create a new entry or update an
 198  *   existing one when both "action == ENTER" and "item.data != NULL".
 199  * - Instead of returning 1 on success, we return the index into the
 200  *   internal hash table, which is also guaranteed to be positive.
 201  *   This allows us direct access to the found hash table slot for
 202  *   example for functions like hdelete().
 203  */
 204
 205 int hsearch_r(ENTRY item, ACTION action, ENTRY ** retval,
 206               struct hsearch_data *htab)
 207 {
 208         unsigned int hval;
 209         unsigned int count;
 210         unsigned int len = strlen(item.key);
 211         unsigned int idx;
 212
 213         /* Compute an value for the given string. Perhaps use a better method. */
 214         hval = len;
 215         count = len;
 216         while (count-- > 0) {
 217                 hval <<= 4;
 218                 hval += item.key[count];
 219         }
 220
 221         /*
 222          * First hash function:
 223          * simply take the modul but prevent zero.
 224          */
 225         hval %= htab->size;
 226         if (hval == 0)
 227                 ++hval;
 228
 229         /* The first index tried. */
 230         idx = hval;
 231
 232         if (htab->table[idx].used) {
 233                 /*
 234                  * Further action might be required according to the
 235                  * action value.
 236                  */
 237                 unsigned hval2;
 238
 239                 if (htab->table[idx].used == hval
 240                     && strcmp(item.key, htab->table[idx].entry.key) == 0) {
 241                         /* Overwrite existing value? */
 242                         if ((action == ENTER) && (item.data != NULL)) {
 243                                 free(htab->table[idx].entry.data);
 244                                 htab->table[idx].entry.data =
 245                                         strdup(item.data);
 246                                 if (!htab->table[idx].entry.data) {
 247                                         __set_errno(ENOMEM);
 248                                         *retval = NULL;
 249                                         return 0;
 250                                 }
 251                         }
 252                         /* return found entry */
 253                         *retval = &htab->table[idx].entry;
 254                         return idx;
 255                 }
 256
 257                 /*
 258                  * Second hash function:
 259                  * as suggested in [Knuth]
 260                  */
 261                 hval2 = 1 + hval % (htab->size - 2);
 262
 263                 do {
 264                         /*
 265                          * Because SIZE is prime this guarantees to
 266                          * step through all available indices.
 267                          */
 268                         if (idx <= hval2)
 269                                 idx = htab->size + idx - hval2;
 270                         else
 271                                 idx -= hval2;
 272
 273                         /*
 274                          * If we visited all entries leave the loop
 275                          * unsuccessfully.
 276                          */
 277                         if (idx == hval)
 278                                 break;
 279
 280                         /* If entry is found use it. */
 281                         if ((htab->table[idx].used == hval)
 282                             && strcmp(item.key, htab->table[idx].entry.key) == 0) {
 283                                 /* Overwrite existing value? */
 284                                 if ((action == ENTER) && (item.data != NULL)) {
 285                                         free(htab->table[idx].entry.data);
 286                                         htab->table[idx].entry.data =
 287                                                 strdup(item.data);
 288                                         if (!htab->table[idx].entry.data) {
 289                                                 __set_errno(ENOMEM);
 290                                                 *retval = NULL;
 291                                                 return 0;
 292                                         }
 293                                 }
 294                                 /* return found entry */
 295                                 *retval = &htab->table[idx].entry;
 296                                 return idx;
 297                         }
 298                 }
 299                 while (htab->table[idx].used);
 300         }
 301
 302         /* An empty bucket has been found. */
 303         if (action == ENTER) {
 304                 /*
 305                  * If table is full and another entry should be
 306                  * entered return with error.
 307                  */
 308                 if (htab->filled == htab->size) {
 309                         __set_errno(ENOMEM);
 310                         *retval = NULL;
 311                         return 0;
 312                 }
 313
 314                 /*
 315                  * Create new entry;
 316                  * create copies of item.key and item.data
 317                  */
 318                 htab->table[idx].used = hval;
 319                 htab->table[idx].entry.key = strdup(item.key);
 320                 htab->table[idx].entry.data = strdup(item.data);
 321                 if (!htab->table[idx].entry.key ||
 322                     !htab->table[idx].entry.data) {
 323                         __set_errno(ENOMEM);
 324                         *retval = NULL;
 325                         return 0;
 326                 }
 327
 328                 ++htab->filled;
 329
 330                 /* return new entry */
 331                 *retval = &htab->table[idx].entry;
 332                 return 1;
 333         }
 334
 335         __set_errno(ESRCH);
 336         *retval = NULL;
 337         return 0;
 338 }
 339
 340
 341 /*
 342  * hdelete()
 343  */
 344
 345 /*
 346  * The standard implementation of hsearch(3) does not provide any way
 347  * to delete any entries from the hash table.  We extend the code to
 348  * do that.
 349  */
 350
 351 int hdelete_r(const char *key, struct hsearch_data *htab)
 352 {
 353         ENTRY e, *ep;
 354         int idx;
 355
 356         debug("hdelete: DELETE key \"%s\"\n", key);
 357
 358         e.key = (char *)key;
 359
 360         if ((idx = hsearch_r(e, FIND, &ep, htab)) == 0) {
 361                 __set_errno(ESRCH);
 362                 return 0;       /* not found */
 363         }
 364
 365         /* free used ENTRY */
 366         debug("hdelete: DELETING key \"%s\"\n", key);
 367
 368         free(ep->key);
 369         free(ep->data);
 370         htab->table[idx].used = 0;
 371
 372         --htab->filled;
 373
 374         return 1;
 375 }
 376
 377 /*
 378  * hexport()
 379  */
 380
 381 /*
 382  * Export the data stored in the hash table in linearized form.
 383  *
 384  * Entries are exported as "name=value" strings, separated by an
 385  * arbitrary (non-NUL, of course) separator character. This allows to
 386  * use this function both when formatting the U-Boot environment for
 387  * external storage (using '\0' as separator), but also when using it
 388  * for the "printenv" command to print all variables, simply by using
 389  * as '\n" as separator. This can also be used for new features like
 390  * exporting the environment data as text file, including the option
 391  * for later re-import.
 392  *
 393  * The entries in the result list will be sorted by ascending key
 394  * values.
 395  *
 396  * If the separator character is different from NUL, then any
 397  * separator characters and backslash characters in the values will
 398  * be escaped by a preceeding backslash in output. This is needed for
 399  * example to enable multi-line values, especially when the output
 400  * shall later be parsed (for example, for re-import).
 401  *
 402  * There are several options how the result buffer is handled:
 403  *
 404  * *resp  size
 405  * -----------
 406  *  NULL    0   A string of sufficient length will be allocated.
 407  *  NULL   >0   A string of the size given will be
 408  *              allocated. An error will be returned if the size is
 409  *              not sufficient.  Any unused bytes in the string will
 410  *              be '\0'-padded.
 411  * !NULL    0   The user-supplied buffer will be used. No length
 412  *              checking will be performed, i. e. it is assumed that
 413  *              the buffer size will always be big enough. DANGEROUS.
 414  * !NULL   >0   The user-supplied buffer will be used. An error will
 415  *              be returned if the size is not sufficient.  Any unused
 416  *              bytes in the string will be '\0'-padded.
 417  */
 418
 419 static int cmpkey(const void *p1, const void *p2)
 420 {
 421         ENTRY *e1 = *(ENTRY **) p1;
 422         ENTRY *e2 = *(ENTRY **) p2;
 423
 424         return (strcmp(e1->key, e2->key));
 425 }
 426
 427 ssize_t hexport_r(struct hsearch_data *htab, const char sep,
 428                  char **resp, size_t size)
 429 {
 430         ENTRY *list[htab->size];
 431         char *res, *p;
 432         size_t totlen;
 433         int i, n;
 434
 435         /* Test for correct arguments.  */
 436         if ((resp == NULL) || (htab == NULL)) {
 437                 __set_errno(EINVAL);
 438                 return (-1);
 439         }
 440
 441         debug("EXPORT  table = %p, htab.size = %d, htab.filled = %d, size = %d\n",
 442                 htab, htab->size, htab->filled, size);
 443         /*
 444          * Pass 1:
 445          * search used entries,
 446          * save addresses and compute total length
 447          */
 448         for (i = 1, n = 0, totlen = 0; i <= htab->size; ++i) {
 449
 450                 if (htab->table[i].used) {
 451                         ENTRY *ep = &htab->table[i].entry;
 452
 453                         list[n++] = ep;
 454
 455                         totlen += strlen(ep->key) + 2;
 456
 457                         if (sep == '\0') {
 458                                 totlen += strlen(ep->data);
 459                         } else {        /* check if escapes are needed */
 460                                 char *s = ep->data;
 461
 462                                 while (*s) {
 463                                         ++totlen;
 464                                         /* add room for needed escape chars */
 465                                         if ((*s == sep) || (*s == '\\'))
 466                                                 ++totlen;
 467                                         ++s;
 468                                 }
 469                         }
 470                         totlen += 2;    /* for '=' and 'sep' char */
 471                 }
 472         }
 473
 474 #ifdef DEBUG
 475         /* Pass 1a: print unsorted list */
 476         printf("Unsorted: n=%d\n", n);
 477         for (i = 0; i < n; ++i) {
 478                 printf("\t%3d: %p ==> %-10s => %s\n",
 479                        i, list[i], list[i]->key, list[i]->data);
 480         }
 481 #endif
 482
 483         /* Sort list by keys */
 484         qsort(list, n, sizeof(ENTRY *), cmpkey);
 485
 486         /* Check if the user supplied buffer size is sufficient */
 487         if (size) {
 488                 if (size < totlen + 1) {        /* provided buffer too small */
 489                         debug("### buffer too small: %d, but need %d\n",
 490                                 size, totlen + 1);
 491                         __set_errno(ENOMEM);
 492                         return (-1);
 493                 }
 494         } else {
 495                 size = totlen + 1;
 496         }
 497
 498         /* Check if the user provided a buffer */
 499         if (*resp) {
 500                 /* yes; clear it */
 501                 res = *resp;
 502                 memset(res, '\0', size);
 503         } else {
 504                 /* no, allocate and clear one */
 505                 *resp = res = calloc(1, size);
 506                 if (res == NULL) {
 507                         __set_errno(ENOMEM);
 508                         return (-1);
 509                 }
 510         }
 511         /*
 512          * Pass 2:
 513          * export sorted list of result data
 514          */
 515         for (i = 0, p = res; i < n; ++i) {
 516                 char *s;
 517
 518                 s = list[i]->key;
 519                 while (*s)
 520                         *p++ = *s++;
 521                 *p++ = '=';
 522
 523                 s = list[i]->data;
 524
 525                 while (*s) {
 526                         if ((*s == sep) || (*s == '\\'))
 527                                 *p++ = '\\';    /* escape */
 528                         *p++ = *s++;
 529                 }
 530                 *p++ = sep;
 531         }
 532         *p = '\0';              /* terminate result */
 533
 534         return size;
 535 }
 536
 537
 538 /*
 539  * himport()
 540  */
 541
 542 /*
 543  * Import linearized data into hash table.
 544  *
 545  * This is the inverse function to hexport(): it takes a linear list
 546  * of "name=value" pairs and creates hash table entries from it.
 547  *
 548  * Entries without "value", i. e. consisting of only "name" or
 549  * "name=", will cause this entry to be deleted from the hash table.
 550  *
 551  * The "flag" argument can be used to control the behaviour: when the
 552  * H_NOCLEAR bit is set, then an existing hash table will kept, i. e.
 553  * new data will be added to an existing hash table; otherwise, old
 554  * data will be discarded and a new hash table will be created.
 555  *
 556  * The separator character for the "name=value" pairs can be selected,
 557  * so we both support importing from externally stored environment
 558  * data (separated by NUL characters) and from plain text files
 559  * (entries separated by newline characters).
 560  *
 561  * To allow for nicely formatted text input, leading white space
 562  * (sequences of SPACE and TAB chars) is ignored, and entries starting
 563  * (after removal of any leading white space) with a '#' character are
 564  * considered comments and ignored.
 565  *
 566  * [NOTE: this means that a variable name cannot start with a '#'
 567  * character.]
 568  *
 569  * When using a non-NUL separator character, backslash is used as
 570  * escape character in the value part, allowing for example for
 571  * multi-line values.
 572  *
 573  * In theory, arbitrary separator characters can be used, but only
 574  * '\0' and '\n' have really been tested.
 575  */
 576
 577 int himport_r(struct hsearch_data *htab,
 578               const char *env, size_t size, const char sep, int flag)
 579 {
 580         char *data, *sp, *dp, *name, *value;
 581
 582         /* Test for correct arguments.  */
 583         if (htab == NULL) {
 584                 __set_errno(EINVAL);
 585                 return 0;
 586         }
 587
 588         /* we allocate new space to make sure we can write to the array */
 589         if ((data = malloc(size)) == NULL) {
 590                 debug("himport_r: can't malloc %d bytes\n", size);
 591                 __set_errno(ENOMEM);
 592                 return 0;
 593         }
 594         memcpy(data, env, size);
 595         dp = data;
 596
 597         if ((flag & H_NOCLEAR) == 0) {
 598                 /* Destroy old hash table if one exists */
 599                 debug("Destroy Hash Table: %p table = %p\n", htab,
 600                        htab->table);
 601                 if (htab->table)
 602                         hdestroy_r(htab);
 603         }
 604
 605         /*
 606          * Create new hash table (if needed).  The computation of the hash
 607          * table size is based on heuristics: in a sample of some 70+
 608          * existing systems we found an average size of 39+ bytes per entry
 609          * in the environment (for the whole key=value pair). Assuming a
 610          * size of 8 per entry (= safety factor of ~5) should provide enough
 611          * safety margin for any existing environment definitions and still
 612          * allow for more than enough dynamic additions. Note that the
 613          * "size" argument is supposed to give the maximum enviroment size
 614          * (CONFIG_ENV_SIZE).  This heuristics will result in
 615          * unreasonably large numbers (and thus memory footprint) for
 616          * big flash environments (>8,000 entries for 64 KB
 617          * envrionment size), so we clip it to a reasonable value.
 618          * On the other hand we need to add some more entries for free
 619          * space when importing very small buffers. Both boundaries can
 620          * be overwritten in the board config file if needed.
 621          */
 622
 623         if (!htab->table) {
 624                 int nent = CONFIG_ENV_MIN_ENTRIES + size / 8;
 625
 626                 if (nent > CONFIG_ENV_MAX_ENTRIES)
 627                         nent = CONFIG_ENV_MAX_ENTRIES;
 628
 629                 debug("Create Hash Table: N=%d\n", nent);
 630
 631                 if (hcreate_r(nent, htab) == 0) {
 632                         free(data);
 633                         return 0;
 634                 }
 635         }
 636
 637         /* Parse environment; allow for '\0' and 'sep' as separators */
 638         do {
 639                 ENTRY e, *rv;
 640
 641                 /* skip leading white space */
 642                 while ((*dp == ' ') || (*dp == '\t'))
 643                         ++dp;
 644
 645                 /* skip comment lines */
 646                 if (*dp == '#') {
 647                         while (*dp && (*dp != sep))
 648                                 ++dp;
 649                         ++dp;
 650                         continue;
 651                 }
 652
 653                 /* parse name */
 654                 for (name = dp; *dp != '=' && *dp && *dp != sep; ++dp)
 655                         ;
 656
 657                 /* deal with "name" and "name=" entries (delete var) */
 658                 if (*dp == '\0' || *(dp + 1) == '\0' ||
 659                     *dp == sep || *(dp + 1) == sep) {
 660                         if (*dp == '=')
 661                                 *dp++ = '\0';
 662                         *dp++ = '\0';   /* terminate name */
 663
 664                         debug("DELETE CANDIDATE: \"%s\"\n", name);
 665
 666                         if (hdelete_r(name, htab) == 0)
 667                                 debug("DELETE ERROR ##############################\n");
 668
 669                         continue;
 670                 }
 671                 *dp++ = '\0';   /* terminate name */
 672
 673                 /* parse value; deal with escapes */
 674                 for (value = sp = dp; *dp && (*dp != sep); ++dp) {
 675                         if ((*dp == '\\') && *(dp + 1))
 676                                 ++dp;
 677                         *sp++ = *dp;
 678                 }
 679                 *sp++ = '\0';   /* terminate value */
 680                 ++dp;
 681
 682                 /* enter into hash table */
 683                 e.key = name;
 684                 e.data = value;
 685
 686                 hsearch_r(e, ENTER, &rv, htab);
 687                 if (rv == NULL) {
 688                         printf("himport_r: can't insert \"%s=%s\" into hash table\n",
 689                                 name, value);
 690                         return 0;
 691                 }
 692
 693                 debug("INSERT: table %p, filled %d/%d rv %p ==> name=\"%s\" value=\"%s\"\n",
 694                         htab, htab->filled, htab->size,
 695                         rv, name, value);
 696         } while ((dp < data + size) && *dp);    /* size check needed for text */
 697                                                 /* without '\0' termination */
 698         debug("INSERT: free(data = %p)\n", data);
 699         free(data);
 700
 701         debug("INSERT: done\n");
 702         return 1;               /* everything OK */
 703 }