From: Michael Schroeder Date: Wed, 22 Feb 2017 13:20:35 +0000 (+0100) Subject: Refactor disk usage generation code X-Git-Tag: 0.6.27~26 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2de6819b4f2764f4b7857a087c269a1f4946f688;p=thirdparty%2Flibsolv.git Refactor disk usage generation code --- diff --git a/ext/CMakeLists.txt b/ext/CMakeLists.txt index 586eda84..702dda49 100644 --- a/ext/CMakeLists.txt +++ b/ext/CMakeLists.txt @@ -116,6 +116,11 @@ IF (ENABLE_APPDATA) repo_appdata.h) ENDIF (ENABLE_APPDATA) +IF (ENABLE_RPMMD OR ENABLE_SUSEREPO) + SET (libsolvext_SRCS ${libsolvext_SRCS} + repodata_diskusage.c) +ENDIF (ENABLE_RPMMD OR ENABLE_SUSEREPO) + SET (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") IF (HAVE_LINKER_VERSION_SCRIPT) SET (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${LINK_FLAGS} -Wl,--version-script=${CMAKE_SOURCE_DIR}/ext/libsolvext.ver") diff --git a/ext/repo_rpmmd.c b/ext/repo_rpmmd.c index ff41906e..42f40cce 100644 --- a/ext/repo_rpmmd.c +++ b/ext/repo_rpmmd.c @@ -22,6 +22,7 @@ #ifdef ENABLE_COMPLEX_DEPS #include "pool_parserpmrichdep.h" #endif +#include "repodata_diskusage.h" enum state { STATE_START, @@ -247,8 +248,7 @@ struct parsedata { Id chksumtype; Id handle; XML_Parser *parser; - Id (*dirs)[3]; /* dirid, size, nfiles */ - int ndirs; + Queue diskusageq; const char *language; /* default language */ Id langcache[ID_NUM_INTERNAL]; /* cache for the default language */ @@ -287,74 +287,6 @@ langtag(struct parsedata *pd, Id tag, const char *language) return pd->langcache[tag]; } -static int -id3_cmp (const void *v1, const void *v2, void *dp) -{ - Id *i1 = (Id*)v1; - Id *i2 = (Id*)v2; - return i1[0] - i2[0]; -} - -static void -commit_diskusage (struct parsedata *pd, Id handle) -{ - int i; - Dirpool *dp = &pd->data->dirpool; - /* Now sort in dirid order. This ensures that parents come before - their children. */ - if (pd->ndirs > 1) - solv_sort(pd->dirs, pd->ndirs, sizeof (pd->dirs[0]), id3_cmp, 0); - /* Substract leaf numbers from all parents to make the numbers - non-cumulative. This must be done post-order (i.e. all leafs - adjusted before parents). We ensure this by starting at the end of - the array moving to the start, hence seeing leafs before parents. */ - for (i = pd->ndirs; i--;) - { - Id p = dirpool_parent(dp, pd->dirs[i][0]); - int j = i; - for (; p; p = dirpool_parent(dp, p)) - { - for (; j--;) - if (pd->dirs[j][0] == p) - break; - if (j >= 0) - { - if (pd->dirs[j][1] < pd->dirs[i][1]) - pd->dirs[j][1] = 0; - else - pd->dirs[j][1] -= pd->dirs[i][1]; - if (pd->dirs[j][2] < pd->dirs[i][2]) - pd->dirs[j][2] = 0; - else - pd->dirs[j][2] -= pd->dirs[i][2]; - } - else - /* Haven't found this parent in the list, look further if - we maybe find the parents parent. */ - j = i; - } - } -#if 0 - char sbuf[1024]; - char *buf = sbuf; - unsigned slen = sizeof (sbuf); - for (i = 0; i < pd->ndirs; i++) - { - dir2str (attr, pd->dirs[i][0], &buf, &slen); - fprintf (stderr, "have dir %d %d %d %s\n", pd->dirs[i][0], pd->dirs[i][1], pd->dirs[i][2], buf); - } - if (buf != sbuf) - free (buf); -#endif - for (i = 0; i < pd->ndirs; i++) - if (pd->dirs[i][1] || pd->dirs[i][2]) - { - repodata_add_dirnumnum(pd->data, handle, SOLVABLE_DISKUSAGE, pd->dirs[i][0], pd->dirs[i][1], pd->dirs[i][2]); - } - pd->ndirs = 0; -} - - /* * makeevr_atts * parse 'epoch', 'ver' and 'rel', return evr Id @@ -1013,7 +945,8 @@ startElement(void *userData, const char *name, const char **atts) case STATE_DIR: { long filesz = 0, filenum = 0; - Id dirid; + Id did; + if ((str = find_attr("name", atts)) == 0) { pd->ret = pool_error(pool, -1, " tag without 'name' attribute"); @@ -1036,16 +969,16 @@ startElement(void *userData, const char *name, const char **atts) str = pd->content; } } - dirid = repodata_str2dir(pd->data, str, 1); + did = repodata_str2dir(pd->data, str, 1); if ((str = find_attr("size", atts)) != 0) filesz = strtol(str, 0, 0); if ((str = find_attr("count", atts)) != 0) filenum = strtol(str, 0, 0); - pd->dirs = solv_extend(pd->dirs, pd->ndirs, 1, sizeof(pd->dirs[0]), 31); - pd->dirs[pd->ndirs][0] = dirid; - pd->dirs[pd->ndirs][1] = filesz; - pd->dirs[pd->ndirs][2] = filenum; - pd->ndirs++; + if (filesz || filenum) + { + queue_push(&pd->diskusageq, did); + queue_push2(&pd->diskusageq, filesz, filenum); + } break; } case STATE_CHANGELOG: @@ -1258,8 +1191,8 @@ endElement(void *userData, const char *name) repodata_add_poolstr_array(pd->data, handle, SOLVABLE_KEYWORDS, pd->content); break; case STATE_DISKUSAGE: - if (pd->ndirs) - commit_diskusage(pd, handle); + if (pd->diskusageq.count) + repodata_add_diskusage(pd->data, handle, &pd->diskusageq); break; case STATE_ORDER: if (pd->content[0]) @@ -1350,6 +1283,7 @@ repo_add_rpmmd(Repo *repo, FILE *fp, const char *language, int flags) pd.lcontent = 0; pd.kind = 0; pd.language = language && *language && strcmp(language, "en") != 0 ? language : 0; + queue_init(&pd.diskusageq); init_cshash(&pd); if ((flags & REPO_EXTEND_SOLVABLES) != 0) @@ -1381,6 +1315,7 @@ repo_add_rpmmd(Repo *repo, FILE *fp, const char *language, int flags) join_freemem(&pd.jd); free_cshash(&pd); repodata_free_dircache(data); + queue_free(&pd.diskusageq); if (!(flags & REPO_NO_INTERNALIZE)) repodata_internalize(data); diff --git a/ext/repo_susetags.c b/ext/repo_susetags.c index 448b06c0..45a7f5bd 100644 --- a/ext/repo_susetags.c +++ b/ext/repo_susetags.c @@ -21,6 +21,7 @@ #ifdef ENABLE_COMPLEX_DEPS #include "pool_parserpmrichdep.h" #endif +#include "repodata_diskusage.h" struct datashare { Id name; @@ -38,8 +39,7 @@ struct parsedata { int last_found_source; struct datashare *share_with; int nshare; - Id (*dirs)[3]; /* dirid, size, nfiles */ - int ndirs; + Queue diskusageq; struct joindata jd; char *language; /* the default language */ Id langcache[ID_NUM_INTERNAL]; /* cache for the default language */ @@ -180,39 +180,6 @@ add_source(struct parsedata *pd, char *line, Solvable *s, Id handle) repodata_set_constantid(pd->data, handle, SOLVABLE_SOURCEARCH, arch); } -/* - * add_dirline - * add a line with directory information - * - */ - -static void -add_dirline(struct parsedata *pd, char *line) -{ - char *sp[6]; - long filesz; - long filenum; - Id dirid; - if (split(line, sp, 6) != 5) - return; - pd->dirs = solv_extend(pd->dirs, pd->ndirs, 1, sizeof(pd->dirs[0]), 31); - filesz = strtol(sp[1], 0, 0); - filesz += strtol(sp[2], 0, 0); - filenum = strtol(sp[3], 0, 0); - filenum += strtol(sp[4], 0, 0); - /* hack: we know that there's room for a / */ - if (*sp[0] != '/') - *--sp[0] = '/'; - dirid = repodata_str2dir(pd->data, sp[0], 1); -#if 0 -fprintf(stderr, "%s -> %d\n", sp[0], dirid); -#endif - pd->dirs[pd->ndirs][0] = dirid; - pd->dirs[pd->ndirs][1] = filesz; - pd->dirs[pd->ndirs][2] = filenum; - pd->ndirs++; -} - static void set_checksum(struct parsedata *pd, Repodata *data, Id handle, Id keyname, char *line) { @@ -238,86 +205,6 @@ set_checksum(struct parsedata *pd, Repodata *data, Id handle, Id keyname, char * } -/* - * id3_cmp - * compare - * - */ - -static int -id3_cmp(const void *v1, const void *v2, void *dp) -{ - Id *i1 = (Id*)v1; - Id *i2 = (Id*)v2; - return i1[0] - i2[0]; -} - - -/* - * commit_diskusage - * - */ - -static void -commit_diskusage(struct parsedata *pd, Id handle) -{ - int i; - Dirpool *dp = &pd->data->dirpool; - /* Now sort in dirid order. This ensures that parents come before - their children. */ - if (pd->ndirs > 1) - solv_sort(pd->dirs, pd->ndirs, sizeof(pd->dirs[0]), id3_cmp, 0); - /* Substract leaf numbers from all parents to make the numbers - non-cumulative. This must be done post-order (i.e. all leafs - adjusted before parents). We ensure this by starting at the end of - the array moving to the start, hence seeing leafs before parents. */ - for (i = pd->ndirs; i--;) - { - Id p = dirpool_parent(dp, pd->dirs[i][0]); - int j = i; - for (; p; p = dirpool_parent(dp, p)) - { - for (; j--;) - if (pd->dirs[j][0] == p) - break; - if (j >= 0) - { - if (pd->dirs[j][1] < pd->dirs[i][1]) - pd->dirs[j][1] = 0; - else - pd->dirs[j][1] -= pd->dirs[i][1]; - if (pd->dirs[j][2] < pd->dirs[i][2]) - pd->dirs[j][2] = 0; - else - pd->dirs[j][2] -= pd->dirs[i][2]; - } - else - /* Haven't found this parent in the list, look further if - we maybe find the parents parent. */ - j = i; - } - } -#if 0 - char sbuf[1024]; - char *buf = sbuf; - unsigned slen = sizeof(sbuf); - for (i = 0; i < pd->ndirs; i++) - { - dir2str(attr, pd->dirs[i][0], &buf, &slen); - fprintf(stderr, "have dir %d %d %d %s\n", pd->dirs[i][0], pd->dirs[i][1], pd->dirs[i][2], buf); - } - if (buf != sbuf) - free (buf); -#endif - for (i = 0; i < pd->ndirs; i++) - if (pd->dirs[i][1] || pd->dirs[i][2]) - { - repodata_add_dirnumnum(pd->data, handle, SOLVABLE_DISKUSAGE, pd->dirs[i][0], pd->dirs[i][1], pd->dirs[i][2]); - } - pd->ndirs = 0; -} - - /* Unfortunately "a"[0] is no constant expression in the C languages, so we need to pass the four characters individually :-/ */ #define CTAG(a,b,c,d) ((unsigned)(((unsigned char)a) << 24) \ @@ -385,13 +272,13 @@ finish_solvable(struct parsedata *pd, Solvable *s, Offset freshens) } pd->nfilelist = 0; } - /* A self provide, except for source packages. This is harmless + /* Add self provide, except for source packages. This is harmless to do twice (in case we see the same package twice). */ if (s->name && s->arch != ARCH_SRC && s->arch != ARCH_NOSRC) s->provides = repo_addid_dep(pd->repo, s->provides, pool_rel2id(pool, s->name, s->evr, REL_EQ, 1), 0); repo_rewrite_suse_deps(s, freshens); - if (pd->ndirs) - commit_diskusage(pd, handle); + if (pd->diskusageq.count) + repodata_add_diskusage(pd->data, handle, &pd->diskusageq); } static Hashtable @@ -484,7 +371,7 @@ repo_add_susetags(Repo *repo, FILE *fp, Id defvendor, const char *language, int int indelta = 0; int last_found_pack = 0; Id first_new_pkg = 0; - char *sp[5]; + char *sp[6]; struct parsedata pd; Repodata *data = 0; Id handle = 0; @@ -509,6 +396,7 @@ repo_add_susetags(Repo *repo, FILE *fp, Id defvendor, const char *language, int pd.data = data; pd.flags = flags; pd.language = language && *language ? solv_strdup(language) : 0; + queue_init(&pd.diskusageq); linep = line; s = 0; @@ -1034,8 +922,22 @@ repo_add_susetags(Repo *repo, FILE *fp, Id defvendor, const char *language, int continue; } case CTAG('=', 'D', 'i', 'r'): - add_dirline(&pd, line + 6); - continue; + if (split(line + 6, sp, 6) == 5) + { + long filesz, filenum; + Id did; + + filesz = strtol(sp[1], 0, 0); + filesz += strtol(sp[2], 0, 0); + filenum = strtol(sp[3], 0, 0); + filenum += strtol(sp[4], 0, 0); + if (*sp[0] != '/') + *--sp[0] = '/'; /* hack: we know that there's room for a / */ + did = repodata_str2dir(data, sp[0], 1); + queue_push(&pd.diskusageq, did); + queue_push2(&pd.diskusageq, (Id)filesz, (Id)filenum); + } + break; case CTAG('=', 'C', 'a', 't'): repodata_set_poolstr(data, handle, langtag(&pd, SOLVABLE_CATEGORY, line_lang), line + 3 + keylen); break; @@ -1173,5 +1075,6 @@ repo_add_susetags(Repo *repo, FILE *fp, Id defvendor, const char *language, int solv_free(pd.language); solv_free(line); join_freemem(&pd.jd); + queue_free(&pd.diskusageq); return pd.ret; } diff --git a/ext/repodata_diskusage.c b/ext/repodata_diskusage.c new file mode 100644 index 00000000..fd9c5cc2 --- /dev/null +++ b/ext/repodata_diskusage.c @@ -0,0 +1,78 @@ +/* + * repodata_diskusage.c + * + * Small helper to convert diskusage data from sustags or rpmmd + * + * Copyright (c) 2017, Novell Inc. + * + * This program is licensed under the BSD license, read LICENSE.BSD + * for further information + */ + +#include +#include +#include +#include + +#include "pool.h" +#include "repo.h" +#include "util.h" +#include "repodata_diskusage.h" + +/* The queue contains (dirid, kbytes, inodes) triplets */ + +static int +add_diskusage_sortfn(const void *ap, const void *bp, void *dp) +{ + return *(Id *)ap - *(Id *)bp; +} + +void +repodata_add_diskusage(Repodata *data, Id handle, Queue *q) +{ + int i, j; + Dirpool *dp = &data->dirpool; + + /* Sort in dirid order. This ensures that parents come before + * their children. */ + if (q->count > 3) + solv_sort(q->elements, q->count / 3, 3 * sizeof(Id), add_diskusage_sortfn, 0); + for (i = 3; i < q->count; i += 3) + { + /* subtract data from parent */ + Id did = q->elements[i]; + if (i + 3 < q->count && q->elements[i + 3] == did) + { + /* identical directory entry! zero this one */ + q->elements[i + 1] = 0; + q->elements[i + 2] = 0; + continue; + } + while (did) + { + did = dirpool_parent(dp, did); + for (j = i - 3; j >= 0; j -= 3) + if (q->elements[j] == did) + break; + if (j >= 0) + { + if ((unsigned int)q->elements[j + 1] > (unsigned int)q->elements[i + 1]) + q->elements[j + 1] -= q->elements[i + 1]; + else + q->elements[j + 1] = 0; + if ((unsigned int)q->elements[j + 2] > (unsigned int)q->elements[i + 2]) + q->elements[j + 2] -= q->elements[i + 2]; + else + q->elements[j + 2] = 0; + break; + } + } + } + /* now commit data */ + for (i = 0; i < q->count; i += 3) + if (q->elements[i + 1] || q->elements[i + 2]) + repodata_add_dirnumnum(data, handle, SOLVABLE_DISKUSAGE, q->elements[i], q->elements[i + 1], q->elements[i + 2]); + /* empty queue */ + queue_empty(q); +} + diff --git a/ext/repodata_diskusage.h b/ext/repodata_diskusage.h new file mode 100644 index 00000000..1beafea7 --- /dev/null +++ b/ext/repodata_diskusage.h @@ -0,0 +1,10 @@ +/* + * Copyright (c) 2017, SUSE Inc. + * + * This program is licensed under the BSD license, read LICENSE.BSD + * for further information + */ + +extern void repodata_add_diskusage(Repodata *data, Id handle, Queue *q); + +