From: Michihiro NAKAJIMA Date: Wed, 28 Apr 2010 09:42:48 +0000 (-0400) Subject: Rewrite the code building ISO9660/joliet identifiers to use red black tree. X-Git-Tag: v3.0.0a~1066 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e1aceecdd4c4fc0985bd02010d4363cb7d5430b9;p=thirdparty%2Flibarchive.git Rewrite the code building ISO9660/joliet identifiers to use red black tree. This change gives a very good performance to ISO writer. SVN-Revision: 2326 --- diff --git a/libarchive/archive_write_set_format_iso9660.c b/libarchive/archive_write_set_format_iso9660.c index 02985f1d3..0d9a1056d 100644 --- a/libarchive/archive_write_set_format_iso9660.c +++ b/libarchive/archive_write_set_format_iso9660.c @@ -185,19 +185,29 @@ static const unsigned char zisofs_magic[8] = { RB_AUGMENT(RB_PARENT(tmp)); \ } while (/*CONSTCOND*/ 0) - -#if 0 -#define RB_FOREACH(x, name, head) \ - for ((x) = RB_MIN(name, head); \ +#define RB_NEGINF -1 +#define RB_FOREACH(x, head) \ + for ((x) = rb_MINMAX(head, RB_NEGINF); \ (x) != NULL; \ - (x) = name##_RB_NEXT(x)) - + (x) = rb_NEXT(x)) +#if 0 #define RB_FOREACH_REVERSE(x, name, head) \ for ((x) = RB_MAX(name, head); \ (x) != NULL; \ (x) = name##_RB_PREV(x)) #endif +struct rb_node { + struct rb_node *rbe_left; /* left element */ + struct rb_node *rbe_right; /* right element */ + struct rb_node *rbe_parent; /* parent element */ + int rbe_color; /* node color */ +}; + +struct rb_head { + struct rb_node *rbh_root; /* root of the tree */ +}; + typedef int (*rb_cmp_node)(const struct rb_node *, const struct rb_node *); typedef int (*rb_cmp_key)(const void *, const struct rb_node *); @@ -292,17 +302,6 @@ struct isofile { } zisofs; }; -struct rb_node { - struct rb_node *rbe_left; /* left element */ - struct rb_node *rbe_right; /* right element */ - struct rb_node *rbe_parent; /* parent element */ - int rbe_color; /* node color */ -}; - -struct rb_head { - struct rb_node *rbh_root; /* root of the tree */ -}; - struct isoent { /* Keep `rbnode' at the first member of struct isoent. */ struct rb_node rbnode; @@ -956,39 +955,31 @@ enum vdc { * Used for resolving duplicated filenames. */ struct idr { - struct wlist { - /* Chain wlist in flist. */ - struct wlist *next; - /* Chain wlist in llist. */ - struct { - struct wlist *next; - struct wlist **prev; - } elqueue; + struct idrent { + struct rb_node rbnode; + /* Used in wait_list. */ + struct idrent *wnext; + struct idrent *avail; + struct isoent *isoent; int weight; int noff; - } *wlistpool; + int rename_num; + } *idrent_pool; - /* A list sorted by a filename of identifier. */ - struct flist { - struct isoent *isoent; - int noff; - struct flist_link { - struct wlist *first; - struct wlist **last; - } pri, low; - } *flistpool; + struct rb_head head; + rb_cmp_node cmp_node; + rb_cmp_key cmp_key; - int pool_size; - int wlistidx; - int flistidx; + struct { + struct idrent *first; + struct idrent **last; + } wait_list; - /* A list sorted by a length of identifier. */ -#define IDR_LLIST_MAX 207 - struct llist { - struct wlist *first; - struct wlist **last; - } llist[IDR_LLIST_MAX]; + int pool_size; + int pool_idx; + int num_size; + int null_size; char char_map[0x80]; }; @@ -1092,29 +1083,32 @@ static struct isoent *isoent_find_entry(struct isoent *, const char *); static void idr_relaxed_filenames(char *); static void idr_init(struct iso9660 *, struct vdd *, struct idr *); static void idr_cleanup(struct idr *); -static inline void idr_llist_insert(struct llist *, struct wlist *); -static inline void idr_llist_remove(struct llist *, struct wlist *); static int idr_ensure_poolsize(struct archive_write *, struct idr *, int); static int idr_start(struct archive_write *, struct idr *, - int, int); -static struct wlist * idr_flist_add(struct idr *, struct flist *, - struct isoent *, int, int); + int, int, int, int, rb_cmp_node, rb_cmp_key); static void idr_register(struct idr *, struct isoent *, int, - int, int); -static struct llist * idr_extend_identifier(struct llist *, - struct wlist *, int, int); -static int idr_has_same_identifier(struct llist *, struct wlist *); -static void idr_resolve(struct idr *, int, int, - void (*)(unsigned char *, int)); + int); +static void idr_extend_identifier(struct idrent *, int, int); +static void idr_resolve(struct idr *, void (*)(unsigned char *, int)); static void idr_set_num(unsigned char *, int); static void idr_set_num_beutf16(unsigned char *, int); static int isoent_gen_iso9660_identifier(struct archive_write *, struct isoent *, struct idr *); static int isoent_gen_joliet_identifier(struct archive_write *, struct isoent *, struct idr *); -static int _compare_iso9660_identifier(const void *, const void *); -static int _compare_joliet_identifier(const void *, const void *); +static int isoent_cmp_iso9660_identifier(const struct isoent *, + const struct isoent *); +static int isoent_cmp_node_iso9660(const struct rb_node *, + const struct rb_node *); +static int isoent_cmp_key_iso9660(const void *, + const struct rb_node *); +static int isoent_cmp_joliet_identifier(const struct isoent *, + const struct isoent *); +static int isoent_cmp_node_joliet(const struct rb_node *, + const struct rb_node *); +static int isoent_cmp_key_joliet(const void *, + const struct rb_node *); static inline void path_table_add_entry(struct path_table *, struct isoent *); static inline struct isoent * path_table_last_entry(struct path_table *); static int isoent_make_path_table(struct archive_write *); @@ -1145,10 +1139,12 @@ static struct rb_node *rb_INSERT(struct rb_head *, struct rb_node *, rb_cmp_node static struct rb_node *rb_FIND(struct rb_head *, const void *, rb_cmp_key); #if 0 static struct rb_node *rb_NFIND(struct rb_head *, struct rb_node *, rb_cmp_node); +#endif static struct rb_node *rb_NEXT(struct rb_node *); +#if 0 static struct rb_node *rb_PREV(struct rb_node *); -static struct rb_node *rb_MINMAX(struct rb_head *, int); #endif +static struct rb_node *rb_MINMAX(struct rb_head *, int); /****************************************************************/ @@ -5574,8 +5570,7 @@ static void idr_init(struct iso9660 *iso9660, struct vdd *vdd, struct idr *idr) { - idr->wlistpool = NULL; - idr->flistpool = NULL; + idr->idrent_pool = NULL; idr->pool_size = 0; if (vdd->vdd_type != VDD_JOLIET) { if (iso9660->opt.iso_level <= 3) { @@ -5592,29 +5587,7 @@ idr_init(struct iso9660 *iso9660, struct vdd *vdd, struct idr *idr) static void idr_cleanup(struct idr *idr) { - free(idr->wlistpool); - free(idr->flistpool); -} - -static inline void -idr_llist_insert(struct llist *llist, struct wlist *wnp) -{ - if ((wnp->elqueue.next = llist->first) != NULL) - llist->first->elqueue.prev = &(wnp->elqueue.next); - else - llist->last = &(wnp->elqueue.next); - llist->first = wnp; - wnp->elqueue.prev = &(llist->first); -} - -static inline void -idr_llist_remove(struct llist *llist, struct wlist *wnp) -{ - if (wnp->elqueue.next != NULL) - wnp->elqueue.next->elqueue.prev = wnp->elqueue.prev; - else - llist->last = wnp->elqueue.prev; - *wnp->elqueue.prev = wnp->elqueue.next; + free(idr->idrent_pool); } static int @@ -5627,16 +5600,9 @@ idr_ensure_poolsize(struct archive_write *a, struct idr *idr, int psize; psize = (cnt + bk) & ~bk; - free(idr->wlistpool); - idr->wlistpool = malloc(sizeof(struct wlist) * psize); - if (idr->wlistpool == NULL) { - archive_set_error(&a->archive, ENOMEM, - "Can't allocate memory"); - return (ARCHIVE_FATAL); - } - free(idr->flistpool); - idr->flistpool = malloc(sizeof(struct flist) * psize); - if (idr->flistpool == NULL) { + idr->idrent_pool = realloc(idr->idrent_pool, + sizeof(struct idrent) * psize); + if (idr->idrent_pool == NULL) { archive_set_error(&a->archive, ENOMEM, "Can't allocate memory"); return (ARCHIVE_FATAL); @@ -5647,110 +5613,50 @@ idr_ensure_poolsize(struct archive_write *a, struct idr *idr, } static int -idr_start(struct archive_write *a, struct idr *idr, int cnt, int ffmax) +idr_start(struct archive_write *a, struct idr *idr, int cnt, int ffmax, + int num_size, int null_size, rb_cmp_node cmp_node, rb_cmp_key cmp_key) { - int i, r; + int r; r = idr_ensure_poolsize(a, idr, cnt); if (r != ARCHIVE_OK) return (r); - idr->wlistidx = 0; - idr->flistidx = 0; - if (ffmax > IDR_LLIST_MAX) - ffmax = IDR_LLIST_MAX; - for (i = 0; i < ffmax; i++) { - idr->llist[i].first = NULL; - idr->llist[i].last = &(idr->llist[i].first); - } + idr->head.rbh_root = NULL; + idr->wait_list.first = NULL; + idr->wait_list.last = &(idr->wait_list.first); + idr->pool_idx = 0; + idr->num_size = num_size; + idr->null_size = null_size; + idr->cmp_node = cmp_node; + idr->cmp_key = cmp_key; return (ARCHIVE_OK); } -static inline void -idr_flist_link_add(struct flist_link *fll, struct wlist *wl) +static void +idr_register(struct idr *idr, struct isoent *isoent, int weight, int noff) { - if ((wl->next = fll->first) == NULL) - fll->last = &(wl->next); - fll->first = wl; -} + struct idrent *idrent, *n; -static struct wlist * -idr_flist_add(struct idr *idr, struct flist *fl, struct isoent *ent, - int weight, int noff) -{ - struct wlist *wlist; + idrent = &(idr->idrent_pool[idr->pool_idx++]); + idrent->wnext = idrent->avail = NULL; + idrent->isoent = isoent; + idrent->weight = weight; + idrent->noff = noff; + idrent->rename_num = 0; - if (fl == NULL) { - fl = &idr->flistpool[idr->flistidx++]; - fl->pri.first = NULL; - fl->pri.last = &(fl->pri.first); - fl->low.first = NULL; - fl->low.last = &(fl->low.first); - fl->isoent = ent; - fl->noff = noff; + n = (struct idrent *)rb_FIND(&(idr->head), isoent, idr->cmp_key); + if (n == NULL) + rb_INSERT(&(idr->head), &(idrent->rbnode), idr->cmp_node); + else { + /* this `idrent' needs to rename. */ + idrent->avail = n; + *idr->wait_list.last = idrent; + idr->wait_list.last = &(idrent->wnext); } - - wlist = &idr->wlistpool[idr->wlistidx++]; - wlist->isoent = ent; - wlist->weight = weight; - wlist->noff = noff; - if (weight < 0) - idr_flist_link_add(&(fl->pri), wlist); - else - idr_flist_link_add(&(fl->low), wlist); - return (wlist); } static void -idr_register(struct idr *idr, struct isoent *ent, int weight, - int noff, int nsize) -{ - struct wlist *wlist; - struct flist *fl; - const char *id; - int cmp, cmplen, ext_off; - int i; - - wlist = NULL; - ext_off = ent->ext_off; - for (i = 0; i < idr->flistidx; i++) { - fl = &(idr->flistpool[i]); - if (fl->noff != noff) - continue; - id = fl->isoent->identifier; - if (noff < 0) { - cmplen = ent->id_len - nsize; - if (fl->isoent->id_len < cmplen) - cmp = 1; - else - cmp = memcmp(id, ent->identifier, - cmplen); - } else { - cmp = memcmp(id, ent->identifier, noff); - if (cmp == 0) { - cmplen = fl->isoent->ext_len; - if (cmplen != ent->ext_len) - cmp = 1; - else - cmp = memcmp(id+ext_off, - &ent->identifier[ext_off], - cmplen); - } - } - if (cmp == 0) { - wlist = idr_flist_add( - idr, fl, ent, weight, noff); - break; - } - } - if (wlist == NULL) - wlist = idr_flist_add(idr, NULL, ent, weight, noff); - if (ext_off >= nsize && noff + nsize != ext_off) - idr_llist_insert(&idr->llist[ext_off-nsize], wlist); -} - -static struct llist * -idr_extend_identifier(struct llist *llist, struct wlist *wnp, int numsize, - int nullsize) +idr_extend_identifier(struct idrent *wnp, int numsize, int nullsize) { unsigned char *p; int wnp_ext_off; @@ -5759,79 +5665,27 @@ idr_extend_identifier(struct llist *llist, struct wlist *wnp, int numsize, if (wnp->noff + numsize != wnp_ext_off) { p = (unsigned char *)wnp->isoent->identifier; /* Extend the filename; foo.c --> foo___.c */ - if (wnp_ext_off >= numsize) - idr_llist_remove(&llist[wnp_ext_off - numsize], wnp); memmove(p + wnp->noff + numsize, p + wnp_ext_off, wnp->isoent->ext_len + nullsize); wnp->isoent->ext_off = wnp_ext_off = wnp->noff + numsize; wnp->isoent->id_len = wnp_ext_off + wnp->isoent->ext_len; } - return (&llist[wnp_ext_off - numsize]); -} - -static int -idr_has_same_identifier(struct llist *llist, struct wlist *wnp) -{ - unsigned char *p; - struct wlist *wnptmp, *w2; - int p_len; - - p = (unsigned char *)wnp->isoent->identifier; - p_len = wnp->isoent->id_len; - - for (wnptmp = wnp->next; wnptmp != NULL; wnptmp = wnptmp->next) { - if (p_len == wnptmp->isoent->id_len && - memcmp(p, wnptmp->isoent->identifier, p_len) == 0) - return (1); /* Found in following entries */ - } - if (llist == NULL) - return (0);/* Not Found */ - - for (w2 = llist->first; w2 != NULL; w2 = w2->elqueue.next) { - if (p_len == w2->isoent->id_len && - memcmp(p, w2->isoent->identifier, p_len) == 0) - return (1); /* Found */ - } - idr_llist_insert(llist, wnp); - - return (0); } static void -idr_resolve(struct idr *idr, int numsize, int nullsize, - void (*fsetnum)(unsigned char *p, int num)) +idr_resolve(struct idr *idr, void (*fsetnum)(unsigned char *p, int num)) { - struct flist *fl; + struct idrent *n; unsigned char *p; - int i; - - /* Find out the same identifier. */ - for (i = 0; i < idr->flistidx; i++) { - struct wlist *wnp; - int num = 0; - - fl = &(idr->flistpool[i]); - if (fl->pri.first != NULL) { - *fl->low.last = fl->pri.first; - fl->low.last = fl->pri.last; - } - if (fl->low.first == NULL) - break; - - for (wnp = fl->low.first; wnp->next != NULL; - wnp = wnp->next) { - struct llist *ep; - if (!idr_has_same_identifier(NULL, wnp)) - continue; - ep = idr_extend_identifier( - idr->llist, wnp, numsize, nullsize); - p = (unsigned char *)wnp->isoent->identifier - + wnp->noff; - do { - fsetnum(p, num++); - } while (idr_has_same_identifier(ep, wnp)); - } + for (n = idr->wait_list.first; n != NULL; n = n->wnext) { + idr_extend_identifier(n, idr->num_size, idr->null_size); + p = (unsigned char *)n->isoent->identifier + n->noff; + do { + fsetnum(p, n->avail->rename_num++); + } while (rb_FIND(&(idr->head), n->isoent, + idr->cmp_key) != NULL); + rb_INSERT(&(idr->head), &(n->rbnode), idr->cmp_node); } } @@ -5929,7 +5783,8 @@ isoent_gen_iso9660_identifier(struct archive_write *a, struct isoent *isoent, fnmax = ffmax = dnmax = 207; } - r = idr_start(a, idr, isoent->children.cnt, ffmax); + r = idr_start(a, idr, isoent->children.cnt, ffmax, 3, 1, + isoent_cmp_node_iso9660, isoent_cmp_key_iso9660); if (r < 0) return (r); @@ -6090,11 +5945,11 @@ isoent_gen_iso9660_identifier(struct archive_write *a, struct isoent *isoent, noff = ext_off; } /* Register entry to the identifier resolver. */ - idr_register(idr, np, weight, noff, 3); + idr_register(idr, np, weight, noff); } /* Resolve duplicate identifier. */ - idr_resolve(idr, 3, 1, idr_set_num); + idr_resolve(idr, idr_set_num); /* Add a period and a version number to identifiers. */ for (np = isoent->children.first; np != NULL; np = np->chnext) { @@ -6119,7 +5974,6 @@ isoent_gen_iso9660_identifier(struct archive_write *a, struct isoent *isoent, np->id_len = np->ext_off + np->ext_len; np->mb_len = np->id_len; } - return (ARCHIVE_OK); } @@ -6146,7 +6000,8 @@ isoent_gen_joliet_identifier(struct archive_write *a, struct isoent *isoent, else ffmax = 128; - r = idr_start(a, idr, isoent->children.cnt, ffmax); + r = idr_start(a, idr, isoent->children.cnt, ffmax, 6, 2, + isoent_cmp_node_joliet, isoent_cmp_key_joliet); if (r < 0) return (r); @@ -6220,11 +6075,11 @@ isoent_gen_joliet_identifier(struct archive_write *a, struct isoent *isoent, else noff = ext_off; /* Register entry to the identifier resolver. */ - idr_register(idr, np, weight, noff, 6); + idr_register(idr, np, weight, noff); } /* Resolve duplicate identifier with Joliet Volume. */ - idr_resolve(idr, 6, 2, idr_set_num_beutf16); + idr_resolve(idr, idr_set_num_beutf16); return (ARCHIVE_OK); } @@ -6233,15 +6088,12 @@ isoent_gen_joliet_identifier(struct archive_write *a, struct isoent *isoent, * This comparing rule is acording to ISO9660 Standard 9.3 */ static int -_compare_iso9660_identifier(const void *v1, const void *v2) +isoent_cmp_iso9660_identifier(const struct isoent *p1, const struct isoent *p2) { - const struct isoent *p1, *p2; const char *s1, *s2; int cmp; int l; - p1 = *((const struct isoent **)(uintptr_t)v1); - p2 = *((const struct isoent **)(uintptr_t)v2); s1 = p1->identifier; s2 = p2->identifier; @@ -6268,6 +6120,10 @@ _compare_iso9660_identifier(const void *v1, const void *v2) - 0x20); } /* Compare File Name Extension */ + if (p1->ext_len == 0 && p2->ext_len == 0) + return (0); + if (p1->ext_len == 1 && p2->ext_len == 1) + return (0); if (p1->ext_len <= 1) return (-1); if (p2->ext_len <= 1) @@ -6304,15 +6160,30 @@ _compare_iso9660_identifier(const void *v1, const void *v2) } static int -_compare_joliet_identifier(const void *v1, const void *v2) +isoent_cmp_node_iso9660(const struct rb_node *n1, const struct rb_node *n2) +{ + struct idrent *e1 = (struct idrent *)n1; + struct idrent *e2 = (struct idrent *)n2; + + return (isoent_cmp_iso9660_identifier(e1->isoent, e2->isoent)); +} + +static int +isoent_cmp_key_iso9660(const void *key, const struct rb_node *node) +{ + struct isoent *isoent = (struct isoent *)key; + struct idrent *idrent = (struct idrent *)node; + + return (isoent_cmp_iso9660_identifier(isoent, idrent->isoent)); +} + +static int +isoent_cmp_joliet_identifier(const struct isoent *p1, const struct isoent *p2) { - const struct isoent *p1, *p2; const unsigned char *s1, *s2; int cmp; int l; - p1 = *((const struct isoent **)(uintptr_t)v1); - p2 = *((const struct isoent **)(uintptr_t)v2); s1 = (const unsigned char *)p1->identifier; s2 = (const unsigned char *)p2->identifier; @@ -6337,6 +6208,10 @@ _compare_joliet_identifier(const void *v1, const void *v2) return (*(const unsigned char *)(s1 - 1)); } /* Compare File Name Extension */ + if (p1->ext_len == 0 && p2->ext_len == 0) + return (0); + if (p1->ext_len == 2 && p2->ext_len == 2) + return (0); if (p1->ext_len <= 2) return (-1); if (p2->ext_len <= 2) @@ -6371,15 +6246,29 @@ _compare_joliet_identifier(const void *v1, const void *v2) } static int -isoent_make_sorted_files(struct archive_write *a, struct vdd *vdd, - struct isoent *isoent) +isoent_cmp_node_joliet(const struct rb_node *n1, const struct rb_node *n2) { - struct isoent **children; - struct isoent *np; - int (*compare)(const void *v1, const void *v2); + struct idrent *e1 = (struct idrent *)n1; + struct idrent *e2 = (struct idrent *)n2; - if (isoent->children.cnt == 0) - return (ARCHIVE_OK); + return (isoent_cmp_joliet_identifier(e1->isoent, e2->isoent)); +} + +static int +isoent_cmp_key_joliet(const void *key, const struct rb_node *node) +{ + struct isoent *isoent = (struct isoent *)key; + struct idrent *idrent = (struct idrent *)node; + + return (isoent_cmp_joliet_identifier(isoent, idrent->isoent)); +} + +static int +isoent_make_sorted_files(struct archive_write *a, struct isoent *isoent, + struct idr *idr) +{ + struct rb_node *rn; + struct isoent **children; children = malloc(isoent->children.cnt * sizeof(struct isoent *)); if (children == NULL) { @@ -6387,24 +6276,12 @@ isoent_make_sorted_files(struct archive_write *a, struct vdd *vdd, "Can't allocate memory"); return (ARCHIVE_FATAL); } - compare = NULL;/* gcc complain it's uninitialized. */ isoent->children_sorted = children; - switch (vdd->vdd_type) { - case VDD_PRIMARY: - case VDD_ENHANCED: - compare = _compare_iso9660_identifier; - break; - case VDD_JOLIET: - compare = _compare_joliet_identifier; - break; - } - - for (np = isoent->children.first; np != NULL; np = np->chnext) - *children ++ = np; - children = isoent->children_sorted; - qsort(children, isoent->children.cnt, - sizeof(struct isoent *), compare); + RB_FOREACH(rn, &(idr->head)) { + struct idrent *idrent = (struct idrent *)rn; + *children ++ = idrent->isoent; + } return (ARCHIVE_OK); } @@ -6451,7 +6328,7 @@ isoent_traverse_tree(struct archive_write *a, struct vdd* vdd) r = genid(a, np, &idr); if (r < 0) goto exit_traverse_tree; - r = isoent_make_sorted_files(a, vdd, np); + r = isoent_make_sorted_files(a, np, &idr); if (r < 0) goto exit_traverse_tree; @@ -8230,6 +8107,7 @@ rb_NFIND(struct rb_head *head, struct rb_node *elm, rb_cmp_node rb_cmp) } return (res); } +#endif /* ARGSUSED */ static struct rb_node * @@ -8253,6 +8131,7 @@ rb_NEXT(struct rb_node *elm) return (elm); } +#if 0 /* ARGSUSED */ static struct rb_node * rb_PREV(struct rb_node *elm) @@ -8274,6 +8153,7 @@ rb_PREV(struct rb_node *elm) } return (elm); } +#endif static struct rb_node * rb_MINMAX(struct rb_head *head, int val) @@ -8289,4 +8169,3 @@ rb_MINMAX(struct rb_head *head, int val) } return (parent); } -#endif