From: Michael Schroeder Date: Fri, 25 Feb 2022 16:21:17 +0000 (+0100) Subject: solv format: support storing of package dependencies in a compressed block X-Git-Tag: 0.7.21~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=85ced9742b10cd2ce9c43dc8722dbd6aa84662ac;p=thirdparty%2Flibsolv.git solv format: support storing of package dependencies in a compressed block Using a block enables us to do decent compression, which makes it possible to retain the original dependency order I.e. no more REPOKEY_TYPE_REL_IDARRAY hacksA. Writing this format is currently turned off but will be made the default after the next release. --- diff --git a/src/pooltypes.h b/src/pooltypes.h index 3bde155a..c69f3755 100644 --- a/src/pooltypes.h +++ b/src/pooltypes.h @@ -28,6 +28,7 @@ #define SOLV_FLAG_PREFIX_POOL 4 #define SOLV_FLAG_SIZE_BYTES 8 #define SOLV_FLAG_USERDATA 16 +#define SOLV_FLAG_IDARRAYBLOCK 32 struct s_Stringpool; typedef struct s_Stringpool Stringpool; diff --git a/src/repo_solv.c b/src/repo_solv.c index 2ba602b2..629ac683 100644 --- a/src/repo_solv.c +++ b/src/repo_solv.c @@ -178,6 +178,67 @@ read_idarray(Repodata *data, Id max, Id *map, Id *store, Id *end) } } +static void +read_idarray_block(Repodata *data, Id *block, int size) +{ + unsigned char buf[65536 + 5 + 1], *bp = buf, *oldbp; + unsigned char cbuf[65536 + 4]; /* can overshoot 4 bytes */ + int left = 0; + int eof = 0; + int clen, flags; + Id x; + for (;;) + { + if (left < 5 && !eof) + { + if (left) + memmove(buf, bp, left); + bp = buf + left; + flags = read_u8(data); + clen = read_u8(data); + clen = (clen << 8) | read_u8(data); + if (data->error) + return; + if (!clen) + clen = 65536; + eof = flags & 0x80; + if (fread(flags & 0x40 ? cbuf : bp, clen, 1, data->fp) != 1) + { + data->error = pool_error(data->repo->pool, SOLV_ERROR_EOF, "unexpected EOF"); + return; + } + if (flags & 0x40) /* compressed block */ + clen = repopagestore_decompress_page(cbuf, clen, bp, 65536); + bp = buf; + left += clen; + bp[left] = 0; /* make data_read_id return */ + continue; + } + if (size < 2) + { + data->error = pool_error(data->repo->pool, SOLV_ERROR_EOF, "idarray data overrun in block decompression"); + return; + } + oldbp = bp; + bp = data_read_id(bp, &x); + left -= bp - oldbp; + if (left < 0) + { + data->error = pool_error(data->repo->pool, SOLV_ERROR_EOF, "compression buffer underrun"); + return; + } + size--; + *block++ = (x & 63) + (((unsigned int)x & ~127) >> 1) + 1; + if ((x & 64) == 0) + { + *block++ = 0; + if (--size == 0) + break; + } + } + if (left || !eof) + data->error = pool_error(data->repo->pool, SOLV_ERROR_EOF, "idarray size overrun in block decompression"); +} /******************************************************************************* * functions to extract data from memory @@ -234,13 +295,18 @@ data_read_idarray(unsigned char *dp, Id **storep, Id *map, int max, Repodata *da } static unsigned char * -data_read_rel_idarray(unsigned char *dp, Id **storep, Id *map, int max, Repodata *data, Id marker) +data_read_rel_idarray(unsigned char *dp, Id **storep, Id *map, int max, Repodata *data, Id keyid) { + Id marker = 0; Id *store = *storep; Id old = 0; unsigned int x = 0; int c; + if (keyid == SOLVABLE_REQUIRES) + marker = SOLVABLE_PREREQMARKER; + if (keyid == SOLVABLE_PROVIDES) + marker = SOLVABLE_FILEMARKER; for (;;) { c = *dp++; @@ -386,13 +452,45 @@ incore_map_idarray(Repodata *data, unsigned char *dp, Id *map, Id max) data->error = pool_error(data->repo->pool, SOLV_ERROR_ID_RANGE, "incore_map_idarray: id too large (%u/%u)", id, max); break; } - id = map[id]; + if (map) + id = map[id]; incore_add_ideof(data, id, eof); if (eof) break; } } +static int +convert_idarray_block(Repodata *data, Id *block, Id *map, Id max) +{ + int cnt = 0; + int old = 0; + for (;;) + { + Id id = *block; + cnt++; + if (!id) + return cnt; + id--; /* idarray_block unpacking added 1 */ + if (id < 2 * old) + { + if (id & 1) + id = old - (id >> 1) - 1; + else + id = old + (id >> 1); + } + old = id; + if (id < 0 || (max && id >= max)) + { + data->error = pool_error(data->repo->pool, SOLV_ERROR_ID_RANGE, "convert_idarray_block: id too large (%u/%u)", id, max); + return cnt; + } + if (map) + id = map[id]; + *block++ = id; + } +} + #if 0 static void incore_add_u32(Repodata *data, unsigned int x) @@ -484,6 +582,8 @@ repo_add_solv(Repo *repo, FILE *fp, int flags) Repodata data; int extendstart = 0, extendend = 0; /* set in case we're extending */ + int idarray_block_offset = 0; + int idarray_block_end = 0; now = solv_timems(0); @@ -862,6 +962,9 @@ repo_add_solv(Repo *repo, FILE *fp, int flags) /* keys start at 1 */ for (i = 1; i < numkeys; i++) { + Repokey *key; + if (data.error) + break; id = read_id(&data, numid); if (idmap) id = idmap[id]; @@ -877,42 +980,51 @@ repo_add_solv(Repo *repo, FILE *fp, int flags) data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "unsupported data type '%s'", pool_id2str(pool, type)); type = REPOKEY_TYPE_VOID; } - keys[i].name = id; - keys[i].type = type; - keys[i].size = read_id(&data, keys[i].type == REPOKEY_TYPE_CONSTANTID ? numid + numrel : 0); - keys[i].storage = read_id(&data, 0); + key = keys + i; + key->name = id; + key->type = type; + key->size = read_id(&data, type == REPOKEY_TYPE_CONSTANTID ? numid + numrel : 0); + key->storage = read_id(&data, 0); /* old versions used SOLVABLE for main solvable data */ - if (keys[i].storage == KEY_STORAGE_SOLVABLE) - keys[i].storage = KEY_STORAGE_INCORE; - if (keys[i].storage != KEY_STORAGE_INCORE && keys[i].storage != KEY_STORAGE_VERTICAL_OFFSET) - data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "unsupported storage type %d", keys[i].storage); + if (key->storage != KEY_STORAGE_INCORE && key->storage != KEY_STORAGE_VERTICAL_OFFSET && key->storage != KEY_STORAGE_SOLVABLE && key->storage != KEY_STORAGE_IDARRAYBLOCK) + data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "unsupported storage type %d", key->storage); + /* change KEY_STORAGE_SOLVABLE to KEY_STORAGE_INCORE */ + if (key->storage == KEY_STORAGE_SOLVABLE) + key->storage = KEY_STORAGE_INCORE; + if (key->storage == KEY_STORAGE_IDARRAYBLOCK && type != REPOKEY_TYPE_IDARRAY) + data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "typr %d does not support idarrayblock storage\n", type); if (id >= SOLVABLE_NAME && id <= RPM_RPMDBID) { - if (keys[i].storage != KEY_STORAGE_INCORE) - data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "main solvable data must use incore storage %d", keys[i].storage); - keys[i].storage = KEY_STORAGE_SOLVABLE; + /* we will put those directly into the storable */ + if (key->storage != KEY_STORAGE_INCORE && key->storage != KEY_STORAGE_IDARRAYBLOCK) + data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "main solvable data must use incore storage, not %d", key->storage); } - if ((type == REPOKEY_TYPE_FIXARRAY || type == REPOKEY_TYPE_FLEXARRAY) && keys[i].storage != KEY_STORAGE_INCORE) + if ((type == REPOKEY_TYPE_FIXARRAY || type == REPOKEY_TYPE_FLEXARRAY) && key->storage != KEY_STORAGE_INCORE) data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "flex/fixarrays must use incore storage\n"); /* cannot handle rel idarrays in incore/vertical */ - if (type == REPOKEY_TYPE_REL_IDARRAY && keys[i].storage != KEY_STORAGE_SOLVABLE) - data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "type REL_IDARRAY is only supported for STORAGE_SOLVABLE"); + if (type == REPOKEY_TYPE_REL_IDARRAY && keys[i].storage != KEY_STORAGE_INCORE) + data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "type REL_IDARRAY is only supported for STORAGE_INCORE"); /* cannot handle mapped ids in vertical */ - if (!(flags & REPO_LOCALPOOL) && keys[i].storage == KEY_STORAGE_VERTICAL_OFFSET && (type == REPOKEY_TYPE_ID || type == REPOKEY_TYPE_IDARRAY)) + if (!(flags & REPO_LOCALPOOL) && key->storage == KEY_STORAGE_VERTICAL_OFFSET && (type == REPOKEY_TYPE_ID || type == REPOKEY_TYPE_IDARRAY)) data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "mapped ids are not supported for STORAGE_VERTICAL_OFFSET"); - if (keys[i].type == REPOKEY_TYPE_CONSTANTID && idmap) - keys[i].size = idmap[keys[i].size]; + if (type == REPOKEY_TYPE_CONSTANTID && idmap) + key->size = idmap[key->size]; #if 0 - fprintf(stderr, "key %d %s %s %d %d\n", i, pool_id2str(pool,id), pool_id2str(pool, keys[i].type), - keys[i].size, keys[i].storage); + fprintf(stderr, "key %d %s %s %d %d\n", i, pool_id2str(pool, id), pool_id2str(pool, type), key->size, key->storage); #endif } have_incoredata = 0; for (i = 1; i < numkeys; i++) - if (keys[i].storage == KEY_STORAGE_INCORE || keys[i].storage == KEY_STORAGE_VERTICAL_OFFSET) + { + id = keys[i].name; + if (id == REPOSITORY_SOLVABLES && keys[i].type == REPOKEY_TYPE_FLEXARRAY) + continue; + if (id >= SOLVABLE_NAME && id <= RPM_RPMDBID) + continue; have_incoredata = 1; + } data.keys = keys; data.nkeys = numkeys; @@ -946,7 +1058,20 @@ repo_add_solv(Repo *repo, FILE *fp, int flags) data.schemadata = schemadata; data.schemadatalen = schemadataend - data.schemadata; - /******* Part 6: Data ********************************************/ + /******* Part 6: Idarray block ***********************************/ + if ((solvflags & SOLV_FLAG_IDARRAYBLOCK) != 0) + { + unsigned int idarray_block_size = read_id(&data, 0x30000000); + repo_reserve_ids(repo, 0, idarray_block_size + 1); + idarray_block_offset = repo->idarraysize; + repo->idarraysize += idarray_block_size; + idarray_block_end = repo->idarraysize; + repo->idarraydata[repo->idarraysize++] = 0; + if (idarray_block_size) + read_idarray_block(&data, repo->idarraydata + idarray_block_offset, idarray_block_size); + } + + /******* Part 7: Data ********************************************/ idarraydatap = idarraydataend = 0; size_idarray = 0; @@ -968,7 +1093,8 @@ repo_add_solv(Repo *repo, FILE *fp, int flags) l = allsize; if (!l || fread(buf, l, 1, data.fp) != 1) { - data.error = pool_error(pool, SOLV_ERROR_EOF, "unexpected EOF"); + if (!data.error) + data.error = pool_error(pool, SOLV_ERROR_EOF, "unexpected EOF"); id = 0; } else @@ -1103,31 +1229,49 @@ printf("=> %s %s %p\n", pool_id2str(pool, keys[key].name), pool_id2str(pool, key break; case REPOKEY_TYPE_IDARRAY: case REPOKEY_TYPE_REL_IDARRAY: - if (!s || id < INTERESTED_START || id > INTERESTED_END) + if (keys[key].storage == KEY_STORAGE_IDARRAYBLOCK) { - dps = dp; - dp = data_skip(dp, REPOKEY_TYPE_IDARRAY); - if (keys[key].storage != KEY_STORAGE_INCORE) - break; - if (idmap) - incore_map_idarray(&data, dps, idmap, numid + numrel); - else - incore_add_blob(&data, dps, dp - dps); - break; + int cnt = convert_idarray_block(&data, repo->idarraydata + idarray_block_offset, idmap, numid + numrel); + ido = idarray_block_offset; + idarray_block_offset += cnt; + if (idarray_block_offset > idarray_block_end) + { + data.error = pool_error(pool, SOLV_ERROR_OVERFLOW, "idarray block underflow"); + idarray_block_offset = idarray_block_end; + break; + } + if (!s || id < INTERESTED_START || id > INTERESTED_END) + { + do + incore_add_ideof(&data, repo->idarraydata[ido++], --cnt > 1 ? 0 : 1); + while (cnt > 1); + break; + } } - ido = idarraydatap - repo->idarraydata; - if (keys[key].type == REPOKEY_TYPE_IDARRAY) - dp = data_read_idarray(dp, &idarraydatap, idmap, numid + numrel, &data); - else if (id == SOLVABLE_REQUIRES) - dp = data_read_rel_idarray(dp, &idarraydatap, idmap, numid + numrel, &data, SOLVABLE_PREREQMARKER); - else if (id == SOLVABLE_PROVIDES) - dp = data_read_rel_idarray(dp, &idarraydatap, idmap, numid + numrel, &data, SOLVABLE_FILEMARKER); else - dp = data_read_rel_idarray(dp, &idarraydatap, idmap, numid + numrel, &data, 0); - if (idarraydatap > idarraydataend) { - data.error = pool_error(pool, SOLV_ERROR_OVERFLOW, "idarray overflow"); - break; + if (!s || id < INTERESTED_START || id > INTERESTED_END) + { + dps = dp; + dp = data_skip(dp, REPOKEY_TYPE_IDARRAY); + if (keys[key].storage != KEY_STORAGE_INCORE) + break; + if (idmap) + incore_map_idarray(&data, dps, idmap, numid + numrel); + else + incore_add_blob(&data, dps, dp - dps); + break; + } + ido = idarraydatap - repo->idarraydata; + if (keys[key].type == REPOKEY_TYPE_IDARRAY) + dp = data_read_idarray(dp, &idarraydatap, idmap, numid + numrel, &data); + else + dp = data_read_rel_idarray(dp, &idarraydatap, idmap, numid + numrel, &data, id); + if (idarraydatap > idarraydataend) + { + data.error = pool_error(pool, SOLV_ERROR_OVERFLOW, "idarray overflow"); + break; + } } if (id == SOLVABLE_PROVIDES) s->provides = ido; @@ -1229,9 +1373,7 @@ printf("=> %s %s %p\n", pool_id2str(pool, keys[key].name), pool_id2str(pool, key incore_add_sizek(&data, (unsigned int)id); break; } - /* FALLTHROUGH */ - default: - if (id == RPM_RPMDBID && s && keys[key].type == REPOKEY_TYPE_NUM) + if (s && id == RPM_RPMDBID) { dp = data_read_id(dp, &id); if (!repo->rpmdbid) @@ -1239,6 +1381,8 @@ printf("=> %s %s %p\n", pool_id2str(pool, keys[key].name), pool_id2str(pool, key repo->rpmdbid[(s - pool->solvables) - repo->start] = id; break; } + /* FALLTHROUGH */ + default: dps = dp; dp = data_skip(dp, keys[key].type); if (keys[key].storage == KEY_STORAGE_INCORE) @@ -1254,6 +1398,8 @@ printf("=> %s %s %p\n", pool_id2str(pool, keys[key].name), pool_id2str(pool, key { if (dp > bufend) data.error = pool_error(pool, SOLV_ERROR_EOF, "buffer overrun"); + else if (idarray_block_offset != idarray_block_end) + data.error = pool_error(pool, SOLV_ERROR_EOF, "unconsumed idarray block entries"); } solv_free(buf); @@ -1276,10 +1422,16 @@ printf("=> %s %s %p\n", pool_id2str(pool, keys[key].name), pool_id2str(pool, key } solv_free(idmap); - /* fixup the special idarray type */ + /* fixup key data */ for (i = 1; i < numkeys; i++) - if (keys[i].type == REPOKEY_TYPE_REL_IDARRAY) - keys[i].type = REPOKEY_TYPE_IDARRAY; + { + if (keys[i].type == REPOKEY_TYPE_REL_IDARRAY) + keys[i].type = REPOKEY_TYPE_IDARRAY; + if (keys[i].storage == KEY_STORAGE_IDARRAYBLOCK) + keys[i].storage = KEY_STORAGE_INCORE; + if (keys[i].name >= SOLVABLE_NAME && keys[i].name <= RPM_RPMDBID) + keys[i].storage = KEY_STORAGE_SOLVABLE; + } for (i = 1; i < numkeys; i++) if (keys[i].storage == KEY_STORAGE_VERTICAL_OFFSET && keys[i].size) diff --git a/src/repo_write.c b/src/repo_write.c index a11de002..b3a6bbcb 100644 --- a/src/repo_write.c +++ b/src/repo_write.c @@ -29,6 +29,9 @@ #include "repo_write.h" #include "repopage.h" +#undef USE_IDARRAYBLOCK +#define USE_REL_IDARRAY + /*------------------------------------------------------------------*/ /* Id map optimizations */ @@ -160,6 +163,36 @@ write_blob(Repodata *data, void *blob, int len) } } +static void +write_compressed_blob(Repodata *data, void *blob, int len) +{ + unsigned char cpage[65536]; + if (data->error) + return; + while (len > 0) + { + int chunk = len > sizeof(cpage) ? sizeof(cpage) : len; + int flag = (chunk == len ? 0x80 : 0x00); + int clen = repopagestore_compress_page(blob, chunk, cpage, sizeof(cpage) - 1); + if (!clen) + { + write_u8(data, flag); + write_u8(data, chunk >> 8); + write_u8(data, chunk); + write_blob(data, blob, chunk); + } + else + { + write_u8(data, flag | 0x40); + write_u8(data, clen >> 8); + write_u8(data, clen); + write_blob(data, cpage, clen); + } + blob += chunk; + len -= chunk; + } +} + /* * Id */ @@ -350,7 +383,6 @@ data_addid64(struct extdata *xd, unsigned int x, unsigned int hx) data_addid(xd, (Id)x); } -#define USE_REL_IDARRAY #ifdef USE_REL_IDARRAY static int @@ -368,11 +400,9 @@ data_adddepids(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marke Id lids[64], *sids; Id id, old; - if (!ids) - return; - if (!*ids) + if (!ids || !*ids) { - data_addid(xd, 0); + data_addideof(xd, 0, 1); return; } for (len = 0; len < 64 && ids[len]; len++) @@ -449,13 +479,41 @@ data_adddepids(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marke #else +#ifdef USE_IDARRAYBLOCK + +static void +data_adddepids(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marker) +{ + Id id; + Id last = 0, tmp; + if (!ids || !*ids) + { + data_addideof(xd, 0, 1); + return; + } + while ((id = *ids++) != 0) + { + if (needid) + id = needid[NEEDIDOFF(id)].need; + tmp = id; + if (id < last) + id = (last - id) * 2 - 1; /* [1, 2 * last - 1] odd */ + else if (id < 2 * last) + id = (id - last) * 2; /* [0, 2 * last - 2] even */ + last = tmp; + data_addideof(xd, id, *ids ? 0 : 1); + } +} + +#else + static void data_adddepids(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marker) { Id id; if (!ids || !*ids) { - data_addid(xd, 0); + data_addideof(xd, 0, 1); return; } while ((id = *ids++) != 0) @@ -468,6 +526,8 @@ data_adddepids(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marke #endif +#endif + static inline void data_addblob(struct extdata *xd, unsigned char *blob, int len) { @@ -845,6 +905,12 @@ collect_data_solvable(struct cbdata *cbdata, Solvable *s, Id *keymap) Repo *repo = s->repo; Pool *pool = repo->pool; struct extdata *xd = cbdata->extdata; +#ifdef USE_IDARRAYBLOCK + struct extdata *xda = xd + cbdata->target->nkeys; /* idarray block */ +#else + struct extdata *xda = xd; +#endif + NeedId *needid = cbdata->needid; Id *idarraydata = repo->idarraydata; @@ -857,21 +923,21 @@ collect_data_solvable(struct cbdata *cbdata, Solvable *s, Id *keymap) if (s->vendor && keymap[SOLVABLE_VENDOR]) data_addid(xd, needid[s->vendor].need); if (s->provides && keymap[SOLVABLE_PROVIDES]) - data_adddepids(xd, pool, needid, idarraydata + s->provides, SOLVABLE_FILEMARKER); + data_adddepids(xda, pool, needid, idarraydata + s->provides, SOLVABLE_FILEMARKER); if (s->obsoletes && keymap[SOLVABLE_OBSOLETES]) - data_adddepids(xd, pool, needid, idarraydata + s->obsoletes, 0); + data_adddepids(xda, pool, needid, idarraydata + s->obsoletes, 0); if (s->conflicts && keymap[SOLVABLE_CONFLICTS]) - data_adddepids(xd, pool, needid, idarraydata + s->conflicts, 0); + data_adddepids(xda, pool, needid, idarraydata + s->conflicts, 0); if (s->requires && keymap[SOLVABLE_REQUIRES]) - data_adddepids(xd, pool, needid, idarraydata + s->requires, SOLVABLE_PREREQMARKER); + data_adddepids(xda, pool, needid, idarraydata + s->requires, SOLVABLE_PREREQMARKER); if (s->recommends && keymap[SOLVABLE_RECOMMENDS]) - data_adddepids(xd, pool, needid, idarraydata + s->recommends, 0); + data_adddepids(xda, pool, needid, idarraydata + s->recommends, 0); if (s->suggests && keymap[SOLVABLE_SUGGESTS]) - data_adddepids(xd, pool, needid, idarraydata + s->suggests, 0); + data_adddepids(xda, pool, needid, idarraydata + s->suggests, 0); if (s->supplements && keymap[SOLVABLE_SUPPLEMENTS]) - data_adddepids(xd, pool, needid, idarraydata + s->supplements, 0); + data_adddepids(xda, pool, needid, idarraydata + s->supplements, 0); if (s->enhances && keymap[SOLVABLE_ENHANCES]) - data_adddepids(xd, pool, needid, idarraydata + s->enhances, 0); + data_adddepids(xda, pool, needid, idarraydata + s->enhances, 0); if (repo->rpmdbid && keymap[RPM_RPMDBID]) data_addid(xd, repo->rpmdbid[(s - pool->solvables) - repo->start]); } @@ -1217,13 +1283,13 @@ repowriter_write(Repowriter *writer, FILE *fp) if (i < SOLVABLE_PROVIDES) keyd.type = REPOKEY_TYPE_ID; else if (i < RPM_RPMDBID) -#ifdef USE_REL_IDARRAY - keyd.type = REPOKEY_TYPE_REL_IDARRAY; -#else keyd.type = REPOKEY_TYPE_IDARRAY; -#endif else keyd.type = REPOKEY_TYPE_NUM; +#ifdef USE_REL_IDARRAY + if (keyd.type == REPOKEY_TYPE_IDARRAY) + keyd.type = REPOKEY_TYPE_REL_IDARRAY; +#endif keyd.size = 0; keyd.storage = KEY_STORAGE_SOLVABLE; if (writer->keyfilter) @@ -1233,6 +1299,10 @@ repowriter_write(Repowriter *writer, FILE *fp) continue; keyd.storage = KEY_STORAGE_SOLVABLE; } +#ifdef USE_IDARRAYBLOCK + if (keyd.type == REPOKEY_TYPE_IDARRAY) + keyd.storage = KEY_STORAGE_IDARRAYBLOCK; +#endif poolusage = 1; clonepool = 1; keymap[keyd.name] = repodata_key2id(&target, &keyd, 1); @@ -1313,6 +1383,8 @@ repowriter_write(Repowriter *writer, FILE *fp) keymap[n] = 0; continue; } + if (keyd.storage != KEY_STORAGE_VERTICAL_OFFSET) + keyd.storage = KEY_STORAGE_INCORE; /* do not mess with us */ } if (data->state != REPODATA_STUB) id = repodata_key2id(&target, &keyd, 1); @@ -1841,11 +1913,12 @@ for (i = 1; i < target.nkeys; i++) /* collect all data * we use extdata[0] for incore data and extdata[keyid] for vertical data + * we use extdata[nkeys] for the idarray_block data * * this must match the code above that creates the schema data! */ - cbdata.extdata = solv_calloc(target.nkeys, sizeof(struct extdata)); + cbdata.extdata = solv_calloc(target.nkeys + 1, sizeof(struct extdata)); xd = cbdata.extdata; cbdata.current_sub = 0; @@ -1907,15 +1980,21 @@ for (i = 1; i < target.nkeys; i++) target.fp = fp; /* write header */ + solv_flags = 0; + solv_flags |= SOLV_FLAG_PREFIX_POOL; + solv_flags |= SOLV_FLAG_SIZE_BYTES; + if (writer->userdatalen) + solv_flags |= SOLV_FLAG_USERDATA; + if (cbdata.extdata[target.nkeys].len) + solv_flags |= SOLV_FLAG_IDARRAYBLOCK; /* write file header */ write_u32(&target, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V'); - if (writer->userdatalen) + if ((solv_flags & (SOLV_FLAG_USERDATA | SOLV_FLAG_IDARRAYBLOCK)) != 0) write_u32(&target, SOLV_VERSION_9); else write_u32(&target, SOLV_VERSION_8); - /* write counts */ write_u32(&target, nstrings); write_u32(&target, nrels); @@ -1923,13 +2002,10 @@ for (i = 1; i < target.nkeys; i++) write_u32(&target, anysolvableused ? nsolvables : 0); write_u32(&target, target.nkeys); write_u32(&target, target.nschemata); - solv_flags = 0; - solv_flags |= SOLV_FLAG_PREFIX_POOL; - solv_flags |= SOLV_FLAG_SIZE_BYTES; - if (writer->userdatalen) - solv_flags |= SOLV_FLAG_USERDATA; write_u32(&target, solv_flags); - if (writer->userdatalen) + + /* write userdata */ + if ((solv_flags & SOLV_FLAG_USERDATA) != 0) { write_u32(&target, writer->userdatalen); write_blob(&target, writer->userdata, writer->userdatalen); @@ -1973,8 +2049,8 @@ for (i = 1; i < target.nkeys; i++) } else { - write_u32(&target, 0); - write_u32(&target, 0); + write_u32(&target, 0); /* unpacked size */ + write_u32(&target, 0); /* compressed size */ } /* @@ -2026,14 +2102,36 @@ for (i = 1; i < target.nkeys; i++) for (i = 1; i < target.nschemata; i++) write_idarray(&target, pool, 0, repodata_id2schema(&target, i)); + /* write idarray_block data if not empty */ + if (cbdata.extdata[target.nkeys].len) + { + unsigned int cnt = 0; + unsigned char *b; + unsigned int l; + + xd = cbdata.extdata + target.nkeys; + /* calculate number of entries */ + for (l = xd->len, b = xd->buf; l--;) + { + unsigned char x = *b++; + if ((x & 0x80) == 0) + cnt += (x & 0x40) ? 1 : 2; + } + write_id(&target, cnt); + if (cnt) + write_compressed_blob(&target, xd->buf, xd->len); + solv_free(xd->buf); + } + /* * write incore data */ + xd = cbdata.extdata; write_id(&target, cbdata.maxdata); - write_id(&target, cbdata.extdata[0].len); - if (cbdata.extdata[0].len) - write_blob(&target, cbdata.extdata[0].buf, cbdata.extdata[0].len); - solv_free(cbdata.extdata[0].buf); + write_id(&target, xd->len); + if (xd->len) + write_blob(&target, xd->buf, xd->len); + solv_free(xd->buf); /* * write vertical data if we have any diff --git a/src/repodata.h b/src/repodata.h index 7dd5259d..2504f2ae 100644 --- a/src/repodata.h +++ b/src/repodata.h @@ -48,6 +48,7 @@ typedef struct s_Repokey { #define KEY_STORAGE_SOLVABLE 1 #define KEY_STORAGE_INCORE 2 #define KEY_STORAGE_VERTICAL_OFFSET 3 +#define KEY_STORAGE_IDARRAYBLOCK 4 #ifdef LIBSOLV_INTERNAL struct dircache; diff --git a/src/repopage.c b/src/repopage.c index 9e9694f4..77c7fcc7 100644 --- a/src/repopage.c +++ b/src/repopage.c @@ -105,6 +105,8 @@ compress_buf(const unsigned char *in, unsigned int in_len, unsigned int litofs = 0; memset(htab, -1, sizeof (htab)); memset(hnext, -1, sizeof (hnext)); + if (in_len > BLOCK_SIZE) + return 0; /* Hey! */ while (io + 2 < in_len) { /* Search for a match of the string starting at IN, we have at @@ -119,84 +121,53 @@ compress_buf(const unsigned char *in, unsigned int in_len, mlen = 0; mofs = 0; - for (tries = 0; try != -1 && tries < 12; tries++) + for (tries = 0; try != (Ref)-1 && tries < 12; tries++, try = hnext[try]) { - if (try < io - && in[try] == in[io] && in[try + 1] == in[io + 1]) + if (in[try] == in[io] && in[try + 1] == in[io + 1]) { mlen = 2; mofs = (io - try) - 1; break; } - try = hnext[try]; } - for (; try != -1 && tries < 12; tries++) + for (; try != (Ref)-1 && tries < 12; tries++, try = hnext[try]) { - /* assert(mlen >= 2); */ /* assert(io + mlen < in_len); */ /* Try a match starting from [io] with the strings at [try]. - That's only sensible if TRY actually is before IO (can happen - with uninit hash table). If we have a previous match already - we're only going to take the new one if it's longer, hence - check the potentially last character. */ - if (try < io && in[try + mlen] == in[io + mlen]) + If we have a previous match already we're only going to take + the new one if it's longer, hence check the potentially last + character first. */ + if (in[try + mlen] == in[io + mlen] && !memcmp(in + try, in + io, mlen)) { - unsigned int this_len, this_ofs; - if (memcmp(in + try, in + io, mlen)) - goto no_match; - this_len = mlen + 1; + /* Found a longer match */ + mlen++; /* Now try extending the match by more characters. */ - for (; - io + this_len < in_len - && in[try + this_len] == in[io + this_len]; this_len++) - ; -#if 0 - unsigned int testi; - for (testi = 0; testi < this_len; testi++) - assert(in[try + testi] == in[io + testi]); -#endif - this_ofs = (io - try) - 1; - /*if (this_ofs > 65535) - goto no_match; */ -#if 0 - assert(this_len >= 2); - assert(this_len >= mlen); - assert(this_len > mlen || (this_len == mlen && this_ofs > mofs)); -#endif - mlen = this_len, mofs = this_ofs; + while (io + mlen < in_len && in[try + mlen] == in[io + mlen]) + mlen++; + mofs = (io - try) - 1; /* If our match extends up to the end of input, no next match can become better. This is not just an - optimization, it establishes a loop invariant + optimization, it establishes the loop invariant (io + mlen < in_len). */ if (io + mlen >= in_len) - goto match_done; + break; } - no_match: - try = hnext[try]; - /*if (io - try - 1 >= 65536) - break;*/ } - -match_done: + if (mlen < 3) + mlen = 0; if (mlen) { /*fprintf(stderr, "%d %d\n", mlen, mofs);*/ - if (mlen == 2 && (litofs || mofs >= 1024)) - mlen = 0; - /*else if (mofs >= 65536) - mlen = 0;*/ - else if (mofs >= 65536) +#if BLOCK_SIZE > 65536 + if (mofs >= 65536) { if (mlen >= 2048 + 5) mlen = 2047 + 5; else if (mlen < 5) mlen = 0; } - else if (mlen < 3) - mlen = 0; - /*else if (mlen >= 4096 + 19) - mlen = 4095 + 19;*/ - else if (mlen >= 2048 + 19) +#endif + if (mlen >= 2048 + 19) mlen = 2047 + 19; /* Skip this match if the next character would deliver a better one, but only do this if we have the chance to really extend the @@ -210,16 +181,11 @@ match_done: hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5; hval = hval & (HS - 1); try = htab[hval]; - if (try < io + 1 - && in[try] == in[io + 1] && in[try + 1] == in[io + 2]) + if (try != (Ref)-1 && in[try] == in[io + 1] && in[try + 1] == in[io + 2]) { - unsigned int this_len; - this_len = 2; - for (; - io + 1 + this_len < in_len - && in[try + this_len] == in[io + 1 + this_len]; - this_len++) - ; + unsigned int this_len = 2; + while (io + 1 + this_len < in_len && in[try + this_len] == in[io + 1 + this_len]) + this_len++; if (this_len >= mlen) mlen = 0; } @@ -227,12 +193,14 @@ match_done: } if (!mlen) { + /* Found no match, start/extend literal */ if (!litofs) litofs = io + 1; io++; } else { + /* Found a match. First dump literals */ if (litofs) { unsigned litlen; @@ -303,6 +271,9 @@ match_done: } else if (mofs >= 65536) { +#if BLOCK_SIZE <= 65536 + return 0; +#else assert(mlen >= 5 && mlen < 2048 + 5); if (oo + 5 >= out_len) return 0; @@ -311,6 +282,7 @@ match_done: out[oo++] = mofs & 0xff; out[oo++] = (mofs >> 8) & 0xff; out[oo++] = mofs >> 16; +#endif } else if (mlen >= 3 && mlen <= 18) { @@ -350,7 +322,7 @@ match_done: htab[hval] = io; } io++; - }; + } } } /* We might have some characters left. */ @@ -466,14 +438,12 @@ unchecked_decompress_buf(const unsigned char *in, unsigned int in_len, { o = in[0] | (in[1] << 8); in += 2; - first = first & 31; - first += 3; + first = (first & 15) + 3; break; } case 15: - /* f1 1111llll <8o> <8o> <8l> */ - /* f2 11110lll <8o> <8o> <8l> */ - /* g 11111lll <8o> <8o> <8o> <8l> */ + /* f2 11110lll <8l> <8o> <8o> */ + /* g 11111lll <8l> <8o> <8o> <8o> */ { first = first & 15; if (first >= 8) @@ -557,6 +527,77 @@ unchecked_decompress_buf(const unsigned char *in, unsigned int in_len, return out - orig_out; } +static unsigned int +check_decompress_buf(const unsigned char *in, unsigned int in_len) +{ + unsigned int out_len = 0; + const unsigned char *in_end = in + in_len; + while (in < in_end) + { + unsigned int first = *in++; + int o; + switch (first >> 4) + { + default: + /* This default case can't happen, but GCCs VRP is not strong + enough to see this, so make this explicitely not fall to + the end of the switch, so that we don't have to initialize + o above. */ + continue; + case 0: case 1: + case 2: case 3: + case 4: case 5: + case 6: case 7: + out_len++; + continue; + case 8: case 9: + /* b 100lllll */ + first = (first & 31) + 1; + in += first; + out_len += first; + continue; + case 10: case 11: + /* c 101oolll <8o> */ + o = (first & (3 << 3)) << 5 | *in++; + first = (first & 7) + 2; + break; + case 12: case 13: + /* d 110lllll <8o> */ + o = *in++; + first = (first & 31) + 10; + break; + case 14: + /* e 1110llll <8o> <8o> */ + o = in[0] | (in[1] << 8); + in += 2; + first = (first & 15) + 3; + break; + case 15: + /* f1 1111llll <8l> <8o> <8o> */ + /* g 11111lll <8l> <8o> <8o> <8o> */ + first = first & 15; + if (first >= 8) + { + first = (((first - 8) << 8) | in[0]) + 5; + o = in[1] | (in[2] << 8) | (in[3] << 16); + in += 4; + } + else + { + first = ((first << 8) | in[0]) + 19; + o = in[1] | (in[2] << 8); + in += 3; + } + break; + } + /* fprintf(stderr, "ref: %d @ %d\n", first, o); */ + if (o >= out_len) + return 0; + out_len += first; + } + return out_len; +} + /**********************************************************************/ void repopagestore_init(Repopagestore *store) @@ -757,6 +798,16 @@ repopagestore_compress_page(unsigned char *page, unsigned int len, unsigned char return compress_buf(page, len, cpage, max); } +unsigned int +repopagestore_decompress_page(const unsigned char *cpage, unsigned int len, unsigned char *page, unsigned int max) +{ + unsigned int l = check_decompress_buf(cpage, len); + if (l == 0 || l > max) + return 0; + return unchecked_decompress_buf(cpage, len, page, max); +} + + #define SOLV_ERROR_EOF 3 #define SOLV_ERROR_CORRUPT 6 diff --git a/src/repopage.h b/src/repopage.h index b5f2eee9..9fb84f0f 100644 --- a/src/repopage.h +++ b/src/repopage.h @@ -53,6 +53,8 @@ unsigned char *repopagestore_load_page_range(Repopagestore *store, unsigned int /* compress a page, return compressed len */ unsigned int repopagestore_compress_page(unsigned char *page, unsigned int len, unsigned char *cpage, unsigned int max); +/* uncompress a page, return uncompressed len */ +unsigned int repopagestore_decompress_page(const unsigned char *cpage, unsigned int len, unsigned char *page, unsigned int max); /* setup page data for repodata_load_page_range */ int repopagestore_read_or_setup_pages(Repopagestore *store, FILE *fp, unsigned int pagesz, unsigned int blobsz);