From: Michael Schroeder Date: Thu, 10 Dec 2015 14:35:41 +0000 (+0100) Subject: repodata_internalize: implement support for key type changed X-Git-Tag: 0.6.15~10 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9a0b869c0bbe0a7ab434fa34be9bfce23aba0d11;p=thirdparty%2Flibsolv.git repodata_internalize: implement support for key type changed The old code just added both types to the schema. For quite some time now, we only want one type for each keyname. --- diff --git a/src/repodata.c b/src/repodata.c index 905ca471..0660684e 100644 --- a/src/repodata.c +++ b/src/repodata.c @@ -3192,18 +3192,57 @@ repodata_serialize_key(Repodata *data, struct extdata *newincore, } } +/* create a circular linked list of all keys that share + * the same keyname */ +static Id * +calculate_keylink(Repodata *data) +{ + int i, j; + Id *link; + Id maxkeyname = 0, *keytable = 0; + link = solv_calloc(data->nkeys, sizeof(Id)); + if (data->nkeys <= 2) + return link; + for (i = 1; i < data->nkeys; i++) + { + Id n = data->keys[i].name; + if (n >= maxkeyname) + { + keytable = solv_realloc2(keytable, n + 128, sizeof(Id)); + memset(keytable + maxkeyname, 0, (n + 128 - maxkeyname) * sizeof(Id)); + maxkeyname = n + 128; + } + j = keytable[n]; + if (j) + link[i] = link[j]; + else + j = i; + link[j] = i; + keytable[n] = i; + } + /* remove links that just point to themselfs */ + for (i = 1; i < data->nkeys; i++) + if (link[i] == i) + link[i] = 0; + solv_free(keytable); + return link; +} + void repodata_internalize(Repodata *data) { Repokey *key, solvkey; Id entry, nentry; - Id schemaid, keyid, *schema, *sp, oldschema, *keyp, *seen; + Id schemaid, keyid, *schema, *sp, oldschemaid, *keyp, *seen; + Offset *oldincoreoffs = 0; int schemaidx; unsigned char *dp, *ndp; - int newschema, oldcount; + int neednewschema; struct extdata newincore; struct extdata newvincore; Id solvkeyid; + Id *keylink; + int haveoldkl; if (!data->attrs && !data->xattrs) return; @@ -3236,140 +3275,181 @@ repodata_internalize(Repodata *data) data->mainschema = 0; data->mainschemaoffsets = solv_free(data->mainschemaoffsets); + keylink = calculate_keylink(data); /* join entry data */ /* we start with the meta data, entry -1 */ for (entry = -1; entry < nentry; entry++) { - memset(seen, 0, data->nkeys * sizeof(Id)); - oldschema = 0; + oldschemaid = 0; dp = data->incoredata; if (dp) { dp += entry >= 0 ? data->incoreoffset[entry] : 1; - dp = data_read_id(dp, &oldschema); + dp = data_read_id(dp, &oldschemaid); } + memset(seen, 0, data->nkeys * sizeof(Id)); #if 0 -fprintf(stderr, "oldschema %d\n", oldschema); -fprintf(stderr, "schemata %d\n", data->schemata[oldschema]); +fprintf(stderr, "oldschemaid %d\n", oldschemaid); +fprintf(stderr, "schemata %d\n", data->schemata[oldschemaid]); fprintf(stderr, "schemadata %p\n", data->schemadata); #endif - /* seen: -1: old data 0: skipped >0: id + 1 */ - newschema = 0; - oldcount = 0; + + /* seen: -1: old data, 0: skipped, >0: id + 1 */ + neednewschema = 0; sp = schema; - for (keyp = data->schemadata + data->schemata[oldschema]; *keyp; keyp++) + haveoldkl = 0; + for (keyp = data->schemadata + data->schemata[oldschemaid]; *keyp; keyp++) { if (seen[*keyp]) { - pool_debug(data->repo->pool, SOLV_FATAL, "repodata_internalize: inconsistent old data (key occured twice).\n"); - exit(1); + /* oops, should not happen */ + neednewschema = 1; + continue; } - seen[*keyp] = -1; + seen[*keyp] = -1; /* use old marker */ *sp++ = *keyp; - oldcount++; + if (keylink[*keyp]) + haveoldkl = 1; /* potential keylink conflict */ } - if (entry >= 0) - keyp = data->attrs ? data->attrs[entry] : 0; - else + + /* strip solvables key */ + if (entry < 0 && solvkeyid && seen[solvkeyid]) { - /* strip solvables key */ *sp = 0; for (sp = keyp = schema; *sp; sp++) if (*sp != solvkeyid) *keyp++ = *sp; - else - oldcount--; sp = keyp; seen[solvkeyid] = 0; - keyp = data->xattrs ? data->xattrs[1] : 0; + neednewschema = 1; } + + /* add new entries */ + if (entry >= 0) + keyp = data->attrs ? data->attrs[entry] : 0; + else + keyp = data->xattrs ? data->xattrs[1] : 0; if (keyp) for (; *keyp; keyp += 2) { if (!seen[*keyp]) { - newschema = 1; + neednewschema = 1; *sp++ = *keyp; + if (haveoldkl && keylink[*keyp]) /* this should be pretty rare */ + { + Id kl; + for (kl = keylink[*keyp]; kl != *keyp; kl = keylink[kl]) + if (seen[kl] == -1) + { + /* replacing old key kl, remove from schema and seen */ + Id *osp; + for (osp = schema; osp < sp; osp++) + if (*osp == kl) + { + memmove(osp, osp + 1, (sp - osp) * sizeof(Id)); + sp--; + seen[kl] = 0; + break; + } + } + } } seen[*keyp] = keyp[1] + 1; } + + /* add solvables key if needed */ if (entry < 0 && data->end != data->start) { - *sp++ = solvkeyid; - newschema = 1; + *sp++ = solvkeyid; /* always last in schema */ + neednewschema = 1; } + + /* commit schema */ *sp = 0; - if (newschema) + if (neednewschema) /* Ideally we'd like to sort the new schema here, to ensure - schema equality independend of the ordering. We can't do that - yet. For once see below (old ids need to come before new ids). - An additional difficulty is that we also need to move - the values with the keys. */ + schema equality independend of the ordering. */ schemaid = repodata_schema2id(data, schema, 1); else - schemaid = oldschema; + schemaid = oldschemaid; + + if (entry < 0) + { + data->mainschemaoffsets = solv_calloc(sp - schema, sizeof(Id)); + data->mainschema = schemaid; + } + /* find offsets in old incore data */ + if (oldschemaid) + { + Id *lastneeded = 0; + for (sp = data->schemadata + data->schemata[oldschemaid]; *sp; sp++) + if (seen[*sp] == -1) + lastneeded = sp + 1; + if (lastneeded) + { + if (!oldincoreoffs) + oldincoreoffs = solv_malloc2(data->nkeys, 2 * sizeof(Offset)); + for (sp = data->schemadata + data->schemata[oldschemaid]; sp != lastneeded; sp++) + { + /* Skip the data associated with this old key. */ + key = data->keys + *sp; + ndp = dp; + if (key->storage == KEY_STORAGE_VERTICAL_OFFSET) + { + ndp = data_skip(ndp, REPOKEY_TYPE_ID); + ndp = data_skip(ndp, REPOKEY_TYPE_ID); + } + else if (key->storage == KEY_STORAGE_INCORE) + ndp = data_skip_key(data, ndp, key); + oldincoreoffs[*sp * 2] = dp - data->incoredata; + oldincoreoffs[*sp * 2 + 1] = ndp - dp; + dp = ndp; + } + } + } /* Now create data blob. We walk through the (possibly new) schema and either copy over old data, or insert the new. */ - /* XXX Here we rely on the fact that the (new) schema has the form - o1 o2 o3 o4 ... | n1 n2 n3 ... - (oX being the old keyids (possibly overwritten), and nX being - the new keyids). This rules out sorting the keyids in order - to ensure a small schema count. */ if (entry >= 0) data->incoreoffset[entry] = newincore.len; - data_addid(&newincore, schemaid); - if (entry == -1) + + if (entry >= 0 && !neednewschema && oldschemaid && (!data->attrs || !data->attrs[entry]) && dp) { - data->mainschema = schemaid; - data->mainschemaoffsets = solv_calloc(sp - schema, sizeof(Id)); + /* just copy over the complete old entry (including the schemaid) */ + ndp = data->incoredata + data->incoreoffset[entry]; + data_addblob(&newincore, ndp, dp - ndp); + goto entrydone; } + + data_addid(&newincore, schemaid); + /* we don't use a pointer to the schemadata here as repodata_serialize_key * may call repodata_schema2id() which might realloc our schemadata */ for (schemaidx = data->schemata[schemaid]; (keyid = data->schemadata[schemaidx]) != 0; schemaidx++) { - if (entry == -1) - data->mainschemaoffsets[schemaidx - data->schemata[schemaid]] = newincore.len; - if (keyid == solvkeyid) + if (entry < 0) { - /* add flexarray entry count */ - data_addid(&newincore, data->end - data->start); - break; - } - key = data->keys + keyid; -#if 0 - fprintf(stderr, "internalize %d(%d):%s:%s\n", entry, entry + data->start, pool_id2str(data->repo->pool, key->name), pool_id2str(data->repo->pool, key->type)); -#endif - ndp = dp; - if (oldcount) - { - /* Skip the data associated with this old key. */ - if (key->storage == KEY_STORAGE_VERTICAL_OFFSET) + data->mainschemaoffsets[schemaidx - data->schemata[schemaid]] = newincore.len; + if (keyid == solvkeyid) { - ndp = data_skip(dp, REPOKEY_TYPE_ID); - ndp = data_skip(ndp, REPOKEY_TYPE_ID); + /* add flexarray entry count */ + data_addid(&newincore, data->end - data->start); + break; /* always the last entry */ } - else if (key->storage == KEY_STORAGE_INCORE) - ndp = data_skip_key(data, dp, key); - oldcount--; } if (seen[keyid] == -1) { - /* If this key was an old one _and_ was not overwritten with - a different value copy over the old value (we skipped it - above). */ - if (dp != ndp) - data_addblob(&newincore, dp, ndp - dp); - seen[keyid] = 0; + if (oldincoreoffs[keyid * 2 + 1]) + data_addblob(&newincore, data->incoredata + oldincoreoffs[keyid], oldincoreoffs[keyid * 2 + 1]); } else if (seen[keyid]) - { - /* Otherwise we have a new value. Parse it into the internal form. */ - repodata_serialize_key(data, &newincore, &newvincore, schema, key, seen[keyid] - 1); - } - dp = ndp; + repodata_serialize_key(data, &newincore, &newvincore, schema, data->keys + keyid, seen[keyid] - 1); } + +entrydone: + /* free memory */ if (entry >= 0 && data->attrs) { if (data->attrs[entry]) @@ -3399,6 +3479,8 @@ fprintf(stderr, "schemadata %p\n", data->schemadata); data->lastdatalen = 0; solv_free(schema); solv_free(seen); + solv_free(keylink); + solv_free(oldincoreoffs); repodata_free_schemahash(data); solv_free(data->incoredata);