mtree: use hash table for duplicate entry search

author Andrew Gregory <andrew.gregory.8@gmail.com>

Tue, 1 Nov 2016 16:42:14 +0000 (12:42 -0400)

committer Andrew Gregory <andrew.gregory.8@gmail.com>

Wed, 2 Nov 2016 00:40:26 +0000 (20:40 -0400)
author Andrew Gregory <andrew.gregory.8@gmail.com>
Tue, 1 Nov 2016 16:42:14 +0000 (12:42 -0400)
committer Andrew Gregory <andrew.gregory.8@gmail.com>
Wed, 2 Nov 2016 00:40:26 +0000 (20:40 -0400)
diff --git a/libarchive/archive_read_support_format_mtree.c b/libarchive/archive_read_support_format_mtree.c

index ae58e87505345ec3c103f7f63ddf08b42907c193..85c655fbbbc66107633cc81e31be09b8946186ab 100644 (file)
--- a/libarchive/archive_read_support_format_mtree.c
+++ b/libarchive/archive_read_support_format_mtree.c
@@ -75,6 +75,8 @@ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_mtree.c 2011
  #define        MTREE_HAS_OPTIONAL      0x0800
  #define        MTREE_HAS_NOCHANGE      0x1000 /* FreeBSD specific */
  
+#define        MTREE_HASHTABLE_SIZE 1024
+
  struct mtree_option {
         struct mtree_option *next;
         char *value;
@@ -86,6 +88,8 @@ struct mtree_entry {
         char *name;
         char full;
         char used;
+       unsigned int name_hash;
+       struct mtree_entry *hashtable_next;
  };
  
  struct mtree {
@@ -98,6 +102,7 @@ struct mtree {
         const char              *archive_format_name;
         struct mtree_entry      *entries;
         struct mtree_entry      *this_entry;
+       struct mtree_entry      *entry_hashtable[MTREE_HASHTABLE_SIZE];
         struct archive_string    current_dir;
         struct archive_string    contents_name;
  
@@ -110,6 +115,7 @@ struct mtree {
  static int     bid_keycmp(const char *, const char *, ssize_t);
  static int     cleanup(struct archive_read *);
  static int     detect_form(struct archive_read *, int *);
+static unsigned int    hash(const char *);
  static int     mtree_bid(struct archive_read *, int);
  static int     parse_file(struct archive_read *, struct archive_entry *,
                     struct mtree *, struct mtree_entry *, int *);
@@ -862,11 +868,12 @@ process_add_entry(struct archive_read *a, struct mtree *mtree,
      struct mtree_option **global, const char *line, ssize_t line_len,
      struct mtree_entry **last_entry, int is_form_d)
  {
-       struct mtree_entry *entry;
+       struct mtree_entry *entry, *ht_iter;
         struct mtree_option *iter;
         const char *next, *eq, *name, *end;
         size_t name_len, len;
         int r, i;
+       unsigned int ht_idx;
  
         if ((entry = malloc(sizeof(*entry))) == NULL) {
                 archive_set_error(&a->archive, errno, "Can't allocate memory");
@@ -877,6 +884,8 @@ process_add_entry(struct archive_read *a, struct mtree *mtree,
         entry->name = NULL;
         entry->used = 0;
         entry->full = 0;
+       entry->name_hash = 0;
+       entry->hashtable_next = NULL;
  
         /* Add this entry to list. */
         if (*last_entry == NULL)
@@ -929,6 +938,16 @@ process_add_entry(struct archive_read *a, struct mtree *mtree,
         memcpy(entry->name, name, name_len);
         entry->name[name_len] = '\0';
         parse_escapes(entry->name, entry);
+       entry->name_hash = hash(entry->name);
+
+       ht_idx = entry->name_hash % MTREE_HASHTABLE_SIZE;
+       if ((ht_iter = mtree->entry_hashtable[ht_idx]) != NULL) {
+               while (ht_iter->hashtable_next)
+                       ht_iter = ht_iter->hashtable_next;
+               ht_iter->hashtable_next = entry;
+       } else {
+               mtree->entry_hashtable[ht_idx] = entry;
+       }
  
         for (iter = *global; iter != NULL; iter = iter->next) {
                 r = add_option(a, &entry->options, iter->value,
@@ -1122,9 +1141,10 @@ parse_file(struct archive_read *a, struct archive_entry *entry,
                  * with pathname canonicalization, which is a very
                  * tricky subject.)
                  */
-               for (mp = mentry->next; mp != NULL; mp = mp->next) {
+               for (mp = mentry->hashtable_next; mp != NULL; mp = mp->hashtable_next) {
                         if (mp->full && !mp->used
-                           && strcmp(mentry->name, mp->name) == 0) {
+                                       && mentry->name_hash == mp->name_hash
+                                       && strcmp(mentry->name, mp->name) == 0) {
                                 /* Later lines override earlier ones. */
                                 mp->used = 1;
                                 r1 = parse_line(a, entry, mtree, mp,
@@ -2000,3 +2020,19 @@ readline(struct archive_read *a, struct mtree *mtree, char **start,
                 find_off = u - mtree->line.s;
         }
  }
+
+static unsigned int
+hash(const char *p)
+{
+       /* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
+          as used by ELF for hashing function names. */
+       unsigned g, h = 0;
+       while (*p != '\0') {
+               h = (h << 4) + *p++;
+               if ((g = h & 0xF0000000) != 0) {
+                       h ^= g >> 24;
+                       h &= 0x0FFFFFFF;
+               }
+       }
+       return h;
+}
author	Andrew Gregory <andrew.gregory.8@gmail.com>
	Tue, 1 Nov 2016 16:42:14 +0000 (12:42 -0400)
committer	Andrew Gregory <andrew.gregory.8@gmail.com>
	Wed, 2 Nov 2016 00:40:26 +0000 (20:40 -0400)