From 6f2fdbc28eac5f1aa8819242c6506bc3bffb7ebe Mon Sep 17 00:00:00 2001 From: Michael Schroeder Date: Wed, 6 Mar 2013 15:42:26 +0100 Subject: [PATCH] fix dataiterator returning random data in some cases Fixed two issues: 1) iterating over attributes in with vertical storage could return junk when some other operation paged in some other data. 2) the returned string could also be in the paged area, or also in tmpspace if stringification was done. We can't simply alloc memory as this makes things slower, but in most cases the storage does not matter. So we offer a new function, dataiterator_strdup(), that makes the kv.str pointer persistent. WARNING: this commit is an ABI change as it changes the dataiterator structure. Sorry. --- bindings/solv.i | 1 + src/dataiterator.h | 11 ++++ src/libsolv.ver | 1 + src/repodata.c | 126 ++++++++++++++++++++++++++++++++++++++++++--- src/repodata.h | 1 + src/repopack.h | 4 ++ 6 files changed, 137 insertions(+), 7 deletions(-) diff --git a/bindings/solv.i b/bindings/solv.i index 16564ff1..cbb8a20e 100644 --- a/bindings/solv.i +++ b/bindings/solv.i @@ -1649,6 +1649,7 @@ rb_eval_string( } ndi = solv_calloc(1, sizeof(*ndi)); dataiterator_init_clone(ndi, $self); + dataiterator_strdup(ndi); return ndi; } #ifdef SWIGRUBY diff --git a/src/dataiterator.h b/src/dataiterator.h index 6f8fd322..a77d902f 100644 --- a/src/dataiterator.h +++ b/src/dataiterator.h @@ -132,6 +132,16 @@ typedef struct _Dataiterator } parents[3]; int nparents; + /* vertical data */ + unsigned char *vert_ddp; + Id vert_off; + Id vert_len; + Id vert_storestate; + + /* strdup data */ + char *dupstr; + int dupstrn; + } Dataiterator; @@ -165,6 +175,7 @@ void dataiterator_jump_to_repo(Dataiterator *di, struct _Repo *repo); void dataiterator_entersub(Dataiterator *di); void dataiterator_clonepos(Dataiterator *di, Dataiterator *from); void dataiterator_seek(Dataiterator *di, int whence); +void dataiterator_strdup(Dataiterator *di); #define DI_SEEK_STAY (1 << 16) #define DI_SEEK_CHILD 1 diff --git a/src/libsolv.ver b/src/libsolv.ver index bc487cef..aa23112a 100644 --- a/src/libsolv.ver +++ b/src/libsolv.ver @@ -19,6 +19,7 @@ SOLV_1.0 { dataiterator_skip_repo; dataiterator_skip_solvable; dataiterator_step; + dataiterator_strdup; datamatcher_free; datamatcher_init; datamatcher_match; diff --git a/src/repodata.c b/src/repodata.c index 9d3bb6b3..2462c10c 100644 --- a/src/repodata.c +++ b/src/repodata.c @@ -474,7 +474,7 @@ static unsigned char * get_vertical_data(Repodata *data, Repokey *key, Id off, Id len) { unsigned char *dp; - if (!len) + if (len <= 0) return 0; if (off >= data->lastverticaloffset) { @@ -489,6 +489,7 @@ get_vertical_data(Repodata *data, Repokey *key, Id off, Id len) off += data->verticaloffset[key - data->keys]; /* fprintf(stderr, "key %d page %d\n", key->name, off / REPOPAGE_BLOBSIZE); */ dp = repopagestore_load_page_range(&data->store, off / REPOPAGE_BLOBSIZE, (off + len - 1) / REPOPAGE_BLOBSIZE); + data->storestate++; if (dp) dp += off % REPOPAGE_BLOBSIZE; return dp; @@ -834,19 +835,21 @@ repodata_stringify(Pool *pool, Repodata *data, Repokey *key, KeyValue *kv, int f case REPOKEY_TYPE_DIRSTRARRAY: if (!(flags & SEARCH_FILES)) return 1; /* match just the basename */ + if (kv->num) + return 1; /* already stringified */ /* Put the full filename into kv->str. */ kv->str = repodata_dir2str(data, kv->id, kv->str); - /* And to compensate for that put the "empty" directory into - kv->id, so that later calls to repodata_dir2str on this data - come up with the same filename again. */ - kv->id = 0; + kv->num = 1; /* mark stringification */ return 1; case REPOKEY_TYPE_MD5: case REPOKEY_TYPE_SHA1: case REPOKEY_TYPE_SHA256: if (!(flags & SEARCH_CHECKSUMS)) return 0; /* skip em */ + if (kv->num) + return 1; /* already stringified */ kv->str = repodata_chk2str(data, key->type, (const unsigned char *)kv->str); + kv->num = 1; /* mark stringification */ return 1; default: return 0; @@ -1235,6 +1238,19 @@ void dataiterator_init_clone(Dataiterator *di, Dataiterator *from) { *di = *from; + if (di->dupstr) + { + if (di->dupstr == di->kv.str) + { + di->dupstr = solv_malloc(di->dupstrn); + memcpy(di->dupstr, from->dupstr, di->dupstrn); + } + else + { + di->dupstr = 0; + di->dupstrn = 0; + } + } memset(&di->matcher, 0, sizeof(di->matcher)); if (from->matcher.match) datamatcher_init(&di->matcher, from->matcher.match, from->matcher.flags); @@ -1319,6 +1335,8 @@ dataiterator_free(Dataiterator *di) { if (di->matcher.match) datamatcher_free(&di->matcher); + if (di->dupstr) + solv_free(di->dupstr); } static inline unsigned char * @@ -1366,6 +1384,15 @@ dataiterator_step(Dataiterator *di) { Id schema; + if (di->state == di_nextattr && di->key->storage == KEY_STORAGE_VERTICAL_OFFSET && di->vert_ddp && di->vert_storestate != di->data->storestate) { + unsigned int ddpoff = di->ddp - di->vert_ddp; + di->vert_off += ddpoff; + di->vert_len -= ddpoff; + di->ddp = di->vert_ddp = get_vertical_data(di->data, di->key, di->vert_off, di->vert_len); + di->vert_storestate = di->data->storestate; + if (!di->ddp) + di->state = di_nextkey; + } for (;;) { switch (di->state) @@ -1429,7 +1456,27 @@ dataiterator_step(Dataiterator *di) case di_enterkey: di_enterkey: di->kv.entry = -1; di->key = di->data->keys + *di->keyp; - di->ddp = get_data(di->data, di->key, &di->dp, di->keyp[1] && (!di->keyname || (di->flags & SEARCH_SUB) != 0) ? 1 : 0); + if (!di->dp) + goto di_nextkey; + /* this is get_data() modified to store vert_ data */ + if (di->key->storage == KEY_STORAGE_VERTICAL_OFFSET) + { + Id off, len; + di->dp = data_read_id(di->dp, &off); + di->dp = data_read_id(di->dp, &len); + di->vert_ddp = di->ddp = get_vertical_data(di->data, di->key, off, len); + di->vert_off = off; + di->vert_len = len; + di->vert_storestate = di->data->storestate; + } + else if (di->key->storage == KEY_STORAGE_INCORE) + { + di->ddp = di->dp; + if (di->keyp[1] && (!di->keyname || (di->flags & SEARCH_SUB) != 0)) + di->dp = data_skip_key(di->data, di->dp, di->key); + } + else + di->ddp = 0; if (!di->ddp) goto di_nextkey; if (di->key->type == REPOKEY_TYPE_DELETED) @@ -1683,6 +1730,14 @@ dataiterator_clonepos(Dataiterator *di, Dataiterator *from) di->parents[i].kv.parent = &di->parents[i - 1].kv; di->kv.parent = &di->parents[di->nparents - 1].kv; } + di->dupstr = 0; + di->dupstrn = 0; + if (from->dupstr && from->dupstr == from->kv.str) + { + di->dupstrn = from->dupstrn; + di->dupstr = solv_malloc(from->dupstrn); + memcpy(di->dupstr, from->dupstr, di->dupstrn); + } } void @@ -1829,6 +1884,60 @@ dataiterator_match(Dataiterator *di, Datamatcher *ma) return datamatcher_match(ma, di->kv.str); } +void +dataiterator_strdup(Dataiterator *di) +{ + int l = -1; + + if (!di->kv.str || di->kv.str == di->dupstr) + return; + switch (di->key->type) + { + case REPOKEY_TYPE_MD5: + case REPOKEY_TYPE_SHA1: + case REPOKEY_TYPE_SHA256: + case REPOKEY_TYPE_DIRSTRARRAY: + if (di->kv.num) /* was it stringified into tmp space? */ + l = strlen(di->kv.str) + 1; + break; + default: + break; + } + if (l < 0 && di->key->storage == KEY_STORAGE_VERTICAL_OFFSET) + { + switch (di->key->type) + { + case REPOKEY_TYPE_STR: + case REPOKEY_TYPE_DIRSTRARRAY: + l = strlen(di->kv.str) + 1; + break; + case REPOKEY_TYPE_MD5: + l = SIZEOF_MD5; + break; + case REPOKEY_TYPE_SHA1: + l = SIZEOF_SHA1; + break; + case REPOKEY_TYPE_SHA256: + l = SIZEOF_SHA256; + break; + case REPOKEY_TYPE_BINARY: + l = di->kv.num; + break; + } + } + if (l >= 0) + { + if (!di->dupstrn || di->dupstrn < l) + { + di->dupstrn = l + 16; + di->dupstr = solv_realloc(di->dupstr, di->dupstrn); + } + if (l) + memcpy(di->dupstr, di->kv.str, l); + di->kv.str = di->dupstr; + } +} + /************************************************************************ * data modify functions */ @@ -3071,7 +3180,10 @@ void repodata_disable_paging(Repodata *data) { if (maybe_load_repodata(data, 0)) - repopagestore_disable_paging(&data->store); + { + repopagestore_disable_paging(&data->store); + data->storestate++; + } } static void diff --git a/src/repodata.h b/src/repodata.h index 7ce415bd..54fb3df0 100644 --- a/src/repodata.h +++ b/src/repodata.h @@ -96,6 +96,7 @@ typedef struct _Repodata { Id lastverticaloffset; /* end of verticals */ Repopagestore store; /* our page store */ + Id storestate; /* incremented every time the store might change */ unsigned char *vincore; /* internal vertical data */ unsigned int vincorelen; /* data size */ diff --git a/src/repopack.h b/src/repopack.h index 81b7b88a..d75e61a3 100644 --- a/src/repopack.h +++ b/src/repopack.h @@ -164,12 +164,15 @@ data_fetch(unsigned char *dp, KeyValue *kv, Repokey *key) kv->num2 = 0; return data_read_u32(dp, &kv->num); case REPOKEY_TYPE_MD5: + kv->num = 0; /* not stringified yet */ kv->str = (const char *)dp; return dp + SIZEOF_MD5; case REPOKEY_TYPE_SHA1: + kv->num = 0; /* not stringified yet */ kv->str = (const char *)dp; return dp + SIZEOF_SHA1; case REPOKEY_TYPE_SHA256: + kv->num = 0; /* not stringified yet */ kv->str = (const char *)dp; return dp + SIZEOF_SHA256; case REPOKEY_TYPE_BINARY: @@ -180,6 +183,7 @@ data_fetch(unsigned char *dp, KeyValue *kv, Repokey *key) return data_read_ideof(dp, &kv->id, &kv->eof); case REPOKEY_TYPE_DIRSTRARRAY: dp = data_read_ideof(dp, &kv->id, &kv->eof); + kv->num = 0; /* not stringified yet */ kv->str = (const char *)dp; return dp + strlen(kv->str) + 1; case REPOKEY_TYPE_DIRNUMNUMARRAY: -- 2.47.2