From: Michael Schroeder <mls@suse.de>
Date: Fri, 25 Feb 2022 16:21:17 +0000 (+0100)
Subject: solv format: support storing of package dependencies in a compressed block
X-Git-Tag: 0.7.21~1
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=85ced9742b10cd2ce9c43dc8722dbd6aa84662ac;p=thirdparty%2Flibsolv.git

solv format: support storing of package dependencies in a compressed block

Using a block enables us to do decent compression, which makes it
possible to retain the original dependency order

I.e. no more REPOKEY_TYPE_REL_IDARRAY hacksA.

Writing this format is currently turned off but will be made the default
after the next release.
---

diff --git a/src/pooltypes.h b/src/pooltypes.h
index 3bde155a..c69f3755 100644
--- a/src/pooltypes.h
+++ b/src/pooltypes.h
@@ -28,6 +28,7 @@
 #define SOLV_FLAG_PREFIX_POOL	4
 #define SOLV_FLAG_SIZE_BYTES	8
 #define SOLV_FLAG_USERDATA	16
+#define SOLV_FLAG_IDARRAYBLOCK	32
 
 struct s_Stringpool;
 typedef struct s_Stringpool Stringpool;
diff --git a/src/repo_solv.c b/src/repo_solv.c
index 2ba602b2..629ac683 100644
--- a/src/repo_solv.c
+++ b/src/repo_solv.c
@@ -178,6 +178,67 @@ read_idarray(Repodata *data, Id max, Id *map, Id *store, Id *end)
     }
 }
 
+static void
+read_idarray_block(Repodata *data, Id *block, int size)
+{
+  unsigned char buf[65536 + 5 + 1], *bp = buf, *oldbp;
+  unsigned char cbuf[65536 + 4];	/* can overshoot 4 bytes */
+  int left = 0;
+  int eof = 0;
+  int clen, flags;
+  Id x;
+  for (;;)
+    {
+      if (left < 5 && !eof)
+	{
+	  if (left)
+	    memmove(buf, bp, left);
+	  bp = buf + left;
+	  flags = read_u8(data);
+          clen = read_u8(data);
+          clen = (clen << 8) | read_u8(data);
+	  if (data->error)
+	    return;
+	  if (!clen)
+	    clen = 65536;
+	  eof = flags & 0x80;
+	  if (fread(flags & 0x40 ? cbuf : bp, clen, 1, data->fp) != 1)
+	    {
+	      data->error = pool_error(data->repo->pool, SOLV_ERROR_EOF, "unexpected EOF");
+	      return;
+	    }
+	  if (flags & 0x40)	/* compressed block */
+	    clen = repopagestore_decompress_page(cbuf, clen, bp, 65536);
+	  bp = buf;
+	  left += clen;
+	  bp[left] = 0;		/* make data_read_id return */
+	  continue;
+	}
+      if (size < 2)
+	{
+	  data->error = pool_error(data->repo->pool, SOLV_ERROR_EOF, "idarray data overrun in block decompression");
+	  return;
+	}
+      oldbp = bp;
+      bp = data_read_id(bp, &x);
+      left -= bp - oldbp;
+      if (left < 0)
+	{
+	  data->error = pool_error(data->repo->pool, SOLV_ERROR_EOF, "compression buffer underrun");
+	  return;
+	}
+      size--;
+      *block++ = (x & 63) + (((unsigned int)x & ~127) >> 1) + 1;
+      if ((x & 64) == 0)
+	{
+          *block++ = 0;
+	  if (--size == 0)
+	    break;
+	}
+    }
+  if (left || !eof)
+    data->error = pool_error(data->repo->pool, SOLV_ERROR_EOF, "idarray size overrun in block decompression");
+}
 
 /*******************************************************************************
  * functions to extract data from memory
@@ -234,13 +295,18 @@ data_read_idarray(unsigned char *dp, Id **storep, Id *map, int max, Repodata *da
 }
 
 static unsigned char *
-data_read_rel_idarray(unsigned char *dp, Id **storep, Id *map, int max, Repodata *data, Id marker)
+data_read_rel_idarray(unsigned char *dp, Id **storep, Id *map, int max, Repodata *data, Id keyid)
 {
+  Id marker = 0;
   Id *store = *storep;
   Id old = 0;
   unsigned int x = 0;
   int c;
 
+  if (keyid == SOLVABLE_REQUIRES)
+    marker = SOLVABLE_PREREQMARKER;
+  if (keyid == SOLVABLE_PROVIDES)
+    marker = SOLVABLE_FILEMARKER;
   for (;;)
     {
       c = *dp++;
@@ -386,13 +452,45 @@ incore_map_idarray(Repodata *data, unsigned char *dp, Id *map, Id max)
 	  data->error = pool_error(data->repo->pool, SOLV_ERROR_ID_RANGE, "incore_map_idarray: id too large (%u/%u)", id, max);
 	  break;
 	}
-      id = map[id];
+      if (map)
+        id = map[id];
       incore_add_ideof(data, id, eof);
       if (eof)
 	break;
     }
 }
 
+static int
+convert_idarray_block(Repodata *data, Id *block, Id *map, Id max)
+{
+  int cnt = 0;
+  int old = 0;
+  for (;;)
+    {
+      Id id = *block;
+      cnt++;
+      if (!id)
+	return cnt;
+      id--;	/* idarray_block unpacking added 1 */
+      if (id < 2 * old)
+	{
+	  if (id & 1)
+	    id = old - (id >> 1) - 1;
+	  else
+	    id = old + (id >> 1);
+	}
+      old = id;
+      if (id < 0 || (max && id >= max))
+	{
+	  data->error = pool_error(data->repo->pool, SOLV_ERROR_ID_RANGE, "convert_idarray_block: id too large (%u/%u)", id, max);
+	  return cnt;
+	}
+      if (map)
+        id = map[id];
+      *block++ = id;
+    }
+}
+
 #if 0
 static void
 incore_add_u32(Repodata *data, unsigned int x)
@@ -484,6 +582,8 @@ repo_add_solv(Repo *repo, FILE *fp, int flags)
   Repodata data;
 
   int extendstart = 0, extendend = 0;	/* set in case we're extending */
+  int idarray_block_offset = 0;
+  int idarray_block_end = 0;
 
   now = solv_timems(0);
 
@@ -862,6 +962,9 @@ repo_add_solv(Repo *repo, FILE *fp, int flags)
   /* keys start at 1 */
   for (i = 1; i < numkeys; i++)
     {
+      Repokey *key;
+      if (data.error)
+	break;
       id = read_id(&data, numid);
       if (idmap)
 	id = idmap[id];
@@ -877,42 +980,51 @@ repo_add_solv(Repo *repo, FILE *fp, int flags)
 	  data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "unsupported data type '%s'", pool_id2str(pool, type));
 	  type = REPOKEY_TYPE_VOID;
 	}
-      keys[i].name = id;
-      keys[i].type = type;
-      keys[i].size = read_id(&data, keys[i].type == REPOKEY_TYPE_CONSTANTID ? numid + numrel : 0);
-      keys[i].storage = read_id(&data, 0);
+      key = keys + i;
+      key->name = id;
+      key->type = type;
+      key->size = read_id(&data, type == REPOKEY_TYPE_CONSTANTID ? numid + numrel : 0);
+      key->storage = read_id(&data, 0);
       /* old versions used SOLVABLE for main solvable data */
-      if (keys[i].storage == KEY_STORAGE_SOLVABLE)
-	keys[i].storage = KEY_STORAGE_INCORE;
-      if (keys[i].storage != KEY_STORAGE_INCORE && keys[i].storage != KEY_STORAGE_VERTICAL_OFFSET)
-	data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "unsupported storage type %d", keys[i].storage);
+      if (key->storage != KEY_STORAGE_INCORE && key->storage != KEY_STORAGE_VERTICAL_OFFSET && key->storage != KEY_STORAGE_SOLVABLE && key->storage != KEY_STORAGE_IDARRAYBLOCK)
+	data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "unsupported storage type %d", key->storage);
+      /* change KEY_STORAGE_SOLVABLE to KEY_STORAGE_INCORE */
+      if (key->storage == KEY_STORAGE_SOLVABLE)
+	key->storage = KEY_STORAGE_INCORE;
+      if (key->storage == KEY_STORAGE_IDARRAYBLOCK && type != REPOKEY_TYPE_IDARRAY)
+	data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "typr %d does not support idarrayblock storage\n", type);
       if (id >= SOLVABLE_NAME && id <= RPM_RPMDBID)
 	{
-	  if (keys[i].storage != KEY_STORAGE_INCORE)
-	    data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "main solvable data must use incore storage %d", keys[i].storage);
-	  keys[i].storage = KEY_STORAGE_SOLVABLE;
+	  /* we will put those directly into the storable */
+	  if (key->storage != KEY_STORAGE_INCORE && key->storage != KEY_STORAGE_IDARRAYBLOCK)
+	    data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "main solvable data must use incore storage, not %d", key->storage);
 	}
-      if ((type == REPOKEY_TYPE_FIXARRAY || type == REPOKEY_TYPE_FLEXARRAY) && keys[i].storage != KEY_STORAGE_INCORE)
+      if ((type == REPOKEY_TYPE_FIXARRAY || type == REPOKEY_TYPE_FLEXARRAY) && key->storage != KEY_STORAGE_INCORE)
 	data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "flex/fixarrays must use incore storage\n");
       /* cannot handle rel idarrays in incore/vertical */
-      if (type == REPOKEY_TYPE_REL_IDARRAY && keys[i].storage != KEY_STORAGE_SOLVABLE)
-	data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "type REL_IDARRAY is only supported for STORAGE_SOLVABLE");
+      if (type == REPOKEY_TYPE_REL_IDARRAY && keys[i].storage != KEY_STORAGE_INCORE)
+	data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "type REL_IDARRAY is only supported for STORAGE_INCORE");
       /* cannot handle mapped ids in vertical */
-      if (!(flags & REPO_LOCALPOOL) && keys[i].storage == KEY_STORAGE_VERTICAL_OFFSET && (type == REPOKEY_TYPE_ID || type == REPOKEY_TYPE_IDARRAY))
+      if (!(flags & REPO_LOCALPOOL) && key->storage == KEY_STORAGE_VERTICAL_OFFSET && (type == REPOKEY_TYPE_ID || type == REPOKEY_TYPE_IDARRAY))
 	data.error = pool_error(pool, SOLV_ERROR_UNSUPPORTED, "mapped ids are not supported for STORAGE_VERTICAL_OFFSET");
 
-      if (keys[i].type == REPOKEY_TYPE_CONSTANTID && idmap)
-	keys[i].size = idmap[keys[i].size];
+      if (type == REPOKEY_TYPE_CONSTANTID && idmap)
+	key->size = idmap[key->size];
 #if 0
-      fprintf(stderr, "key %d %s %s %d %d\n", i, pool_id2str(pool,id), pool_id2str(pool, keys[i].type),
-               keys[i].size, keys[i].storage);
+      fprintf(stderr, "key %d %s %s %d %d\n", i, pool_id2str(pool, id), pool_id2str(pool, type), key->size, key->storage);
 #endif
     }
 
   have_incoredata = 0;
   for (i = 1; i < numkeys; i++)
-    if (keys[i].storage == KEY_STORAGE_INCORE || keys[i].storage == KEY_STORAGE_VERTICAL_OFFSET)
+    {
+      id = keys[i].name;
+      if (id == REPOSITORY_SOLVABLES && keys[i].type == REPOKEY_TYPE_FLEXARRAY)
+	continue;
+      if (id >= SOLVABLE_NAME && id <= RPM_RPMDBID)
+	continue;
       have_incoredata = 1;
+    }
 
   data.keys = keys;
   data.nkeys = numkeys;
@@ -946,7 +1058,20 @@ repo_add_solv(Repo *repo, FILE *fp, int flags)
   data.schemadata = schemadata;
   data.schemadatalen = schemadataend - data.schemadata;
 
-  /*******  Part 6: Data ********************************************/
+  /*******  Part 6: Idarray block ***********************************/
+  if ((solvflags & SOLV_FLAG_IDARRAYBLOCK) != 0)
+    {
+      unsigned int idarray_block_size = read_id(&data, 0x30000000);
+      repo_reserve_ids(repo, 0, idarray_block_size + 1);
+      idarray_block_offset = repo->idarraysize;
+      repo->idarraysize += idarray_block_size;
+      idarray_block_end = repo->idarraysize;
+      repo->idarraydata[repo->idarraysize++] = 0;
+      if (idarray_block_size)
+        read_idarray_block(&data, repo->idarraydata + idarray_block_offset, idarray_block_size);
+    }
+
+  /*******  Part 7: Data ********************************************/
 
   idarraydatap = idarraydataend = 0;
   size_idarray = 0;
@@ -968,7 +1093,8 @@ repo_add_solv(Repo *repo, FILE *fp, int flags)
     l = allsize;
   if (!l || fread(buf, l, 1, data.fp) != 1)
     {
-      data.error = pool_error(pool, SOLV_ERROR_EOF, "unexpected EOF");
+      if (!data.error)
+        data.error = pool_error(pool, SOLV_ERROR_EOF, "unexpected EOF");
       id = 0;
     }
   else
@@ -1103,31 +1229,49 @@ printf("=> %s %s %p\n", pool_id2str(pool, keys[key].name), pool_id2str(pool, key
 	  break;
 	case REPOKEY_TYPE_IDARRAY:
 	case REPOKEY_TYPE_REL_IDARRAY:
-	  if (!s || id < INTERESTED_START || id > INTERESTED_END)
+	  if (keys[key].storage == KEY_STORAGE_IDARRAYBLOCK)
 	    {
-	      dps = dp;
-	      dp = data_skip(dp, REPOKEY_TYPE_IDARRAY);
-	      if (keys[key].storage != KEY_STORAGE_INCORE)
-		break;
-	      if (idmap)
-		incore_map_idarray(&data, dps, idmap, numid + numrel);
-	      else
-		incore_add_blob(&data, dps, dp - dps);
-	      break;
+	      int cnt = convert_idarray_block(&data, repo->idarraydata + idarray_block_offset, idmap, numid + numrel);
+	      ido = idarray_block_offset;
+	      idarray_block_offset += cnt;
+	      if (idarray_block_offset > idarray_block_end)
+		{
+		  data.error = pool_error(pool, SOLV_ERROR_OVERFLOW, "idarray block underflow");
+		  idarray_block_offset = idarray_block_end;
+		  break;
+		}
+	      if (!s || id < INTERESTED_START || id > INTERESTED_END)
+		{
+		  do
+		    incore_add_ideof(&data, repo->idarraydata[ido++], --cnt > 1 ? 0 : 1);
+		  while (cnt > 1);
+		  break;
+		}
 	    }
-	  ido = idarraydatap - repo->idarraydata;
-	  if (keys[key].type == REPOKEY_TYPE_IDARRAY)
-	    dp = data_read_idarray(dp, &idarraydatap, idmap, numid + numrel, &data);
-	  else if (id == SOLVABLE_REQUIRES)
-	    dp = data_read_rel_idarray(dp, &idarraydatap, idmap, numid + numrel, &data, SOLVABLE_PREREQMARKER);
-	  else if (id == SOLVABLE_PROVIDES)
-	    dp = data_read_rel_idarray(dp, &idarraydatap, idmap, numid + numrel, &data, SOLVABLE_FILEMARKER);
 	  else
-	    dp = data_read_rel_idarray(dp, &idarraydatap, idmap, numid + numrel, &data, 0);
-	  if (idarraydatap > idarraydataend)
 	    {
-	      data.error = pool_error(pool, SOLV_ERROR_OVERFLOW, "idarray overflow");
-	      break;
+	      if (!s || id < INTERESTED_START || id > INTERESTED_END)
+		{
+		  dps = dp;
+		  dp = data_skip(dp, REPOKEY_TYPE_IDARRAY);
+		  if (keys[key].storage != KEY_STORAGE_INCORE)
+		    break;
+		  if (idmap)
+		    incore_map_idarray(&data, dps, idmap, numid + numrel);
+		  else
+		    incore_add_blob(&data, dps, dp - dps);
+		  break;
+		}
+	      ido = idarraydatap - repo->idarraydata;
+	      if (keys[key].type == REPOKEY_TYPE_IDARRAY)
+		dp = data_read_idarray(dp, &idarraydatap, idmap, numid + numrel, &data);
+	      else
+		dp = data_read_rel_idarray(dp, &idarraydatap, idmap, numid + numrel, &data, id);
+	      if (idarraydatap > idarraydataend)
+		{
+		  data.error = pool_error(pool, SOLV_ERROR_OVERFLOW, "idarray overflow");
+		  break;
+		}
 	    }
 	  if (id == SOLVABLE_PROVIDES)
 	    s->provides = ido;
@@ -1229,9 +1373,7 @@ printf("=> %s %s %p\n", pool_id2str(pool, keys[key].name), pool_id2str(pool, key
 	      incore_add_sizek(&data, (unsigned int)id);
 	      break;
 	    }
-	  /* FALLTHROUGH */
-	default:
-	  if (id == RPM_RPMDBID && s && keys[key].type == REPOKEY_TYPE_NUM)
+	  if (s && id == RPM_RPMDBID)
 	    {
 	      dp = data_read_id(dp, &id);
 	      if (!repo->rpmdbid)
@@ -1239,6 +1381,8 @@ printf("=> %s %s %p\n", pool_id2str(pool, keys[key].name), pool_id2str(pool, key
 	      repo->rpmdbid[(s - pool->solvables) - repo->start] = id;
 	      break;
 	    }
+	  /* FALLTHROUGH */
+	default:
 	  dps = dp;
 	  dp = data_skip(dp, keys[key].type);
 	  if (keys[key].storage == KEY_STORAGE_INCORE)
@@ -1254,6 +1398,8 @@ printf("=> %s %s %p\n", pool_id2str(pool, keys[key].name), pool_id2str(pool, key
     {
       if (dp > bufend)
 	data.error = pool_error(pool, SOLV_ERROR_EOF, "buffer overrun");
+      else if (idarray_block_offset != idarray_block_end)
+	data.error = pool_error(pool, SOLV_ERROR_EOF, "unconsumed idarray block entries");
     }
   solv_free(buf);
 
@@ -1276,10 +1422,16 @@ printf("=> %s %s %p\n", pool_id2str(pool, keys[key].name), pool_id2str(pool, key
     }
   solv_free(idmap);
 
-  /* fixup the special idarray type */
+  /* fixup key data */
   for (i = 1; i < numkeys; i++)
-    if (keys[i].type == REPOKEY_TYPE_REL_IDARRAY)
-      keys[i].type = REPOKEY_TYPE_IDARRAY;
+    {
+      if (keys[i].type == REPOKEY_TYPE_REL_IDARRAY)
+        keys[i].type = REPOKEY_TYPE_IDARRAY;
+      if (keys[i].storage == KEY_STORAGE_IDARRAYBLOCK)
+        keys[i].storage = KEY_STORAGE_INCORE;
+      if (keys[i].name >= SOLVABLE_NAME && keys[i].name <= RPM_RPMDBID)
+        keys[i].storage = KEY_STORAGE_SOLVABLE;
+    }
 
   for (i = 1; i < numkeys; i++)
     if (keys[i].storage == KEY_STORAGE_VERTICAL_OFFSET && keys[i].size)
diff --git a/src/repo_write.c b/src/repo_write.c
index a11de002..b3a6bbcb 100644
--- a/src/repo_write.c
+++ b/src/repo_write.c
@@ -29,6 +29,9 @@
 #include "repo_write.h"
 #include "repopage.h"
 
+#undef USE_IDARRAYBLOCK
+#define USE_REL_IDARRAY
+
 /*------------------------------------------------------------------*/
 /* Id map optimizations */
 
@@ -160,6 +163,36 @@ write_blob(Repodata *data, void *blob, int len)
     }
 }
 
+static void
+write_compressed_blob(Repodata *data, void *blob, int len)
+{
+  unsigned char cpage[65536];
+  if (data->error)
+    return;
+  while (len > 0)
+    {
+      int chunk = len > sizeof(cpage) ? sizeof(cpage) : len;
+      int flag = (chunk == len ? 0x80 : 0x00);
+      int clen = repopagestore_compress_page(blob, chunk, cpage, sizeof(cpage) - 1);
+      if (!clen)
+	{
+	  write_u8(data, flag);
+	  write_u8(data, chunk >> 8);
+	  write_u8(data, chunk);
+	  write_blob(data, blob, chunk);
+	}
+      else
+	{
+	  write_u8(data, flag | 0x40);
+	  write_u8(data, clen >> 8);
+	  write_u8(data, clen);
+	  write_blob(data, cpage, clen);
+	}
+      blob += chunk;
+      len -= chunk;
+    }
+}
+
 /*
  * Id
  */
@@ -350,7 +383,6 @@ data_addid64(struct extdata *xd, unsigned int x, unsigned int hx)
     data_addid(xd, (Id)x);
 }
 
-#define USE_REL_IDARRAY
 #ifdef USE_REL_IDARRAY
 
 static int
@@ -368,11 +400,9 @@ data_adddepids(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marke
   Id lids[64], *sids;
   Id id, old;
 
-  if (!ids)
-    return;
-  if (!*ids)
+  if (!ids || !*ids)
     {
-      data_addid(xd, 0);
+      data_addideof(xd, 0, 1);
       return;
     }
   for (len = 0; len < 64 && ids[len]; len++)
@@ -449,13 +479,41 @@ data_adddepids(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marke
 
 #else
 
+#ifdef USE_IDARRAYBLOCK
+
+static void
+data_adddepids(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marker)
+{
+  Id id;
+  Id last = 0, tmp;
+  if (!ids || !*ids)
+    {
+      data_addideof(xd, 0, 1);
+      return;
+    }
+  while ((id = *ids++) != 0)
+    {
+      if (needid)
+        id = needid[NEEDIDOFF(id)].need;
+      tmp = id;
+      if (id < last)
+	id = (last - id) * 2 - 1;	/* [1, 2 * last - 1] odd */
+      else if (id < 2 * last)
+	id = (id - last) * 2;		/* [0, 2 * last - 2] even */
+      last = tmp;
+      data_addideof(xd, id, *ids ? 0 : 1);
+    }
+}
+
+#else
+
 static void
 data_adddepids(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marker)
 {
   Id id;
   if (!ids || !*ids)
     {
-      data_addid(xd, 0);
+      data_addideof(xd, 0, 1);
       return;
     }
   while ((id = *ids++) != 0)
@@ -468,6 +526,8 @@ data_adddepids(struct extdata *xd, Pool *pool, NeedId *needid, Id *ids, Id marke
 
 #endif
 
+#endif
+
 static inline void
 data_addblob(struct extdata *xd, unsigned char *blob, int len)
 {
@@ -845,6 +905,12 @@ collect_data_solvable(struct cbdata *cbdata, Solvable *s, Id *keymap)
   Repo *repo = s->repo;
   Pool *pool = repo->pool;
   struct extdata *xd = cbdata->extdata;
+#ifdef USE_IDARRAYBLOCK
+  struct extdata *xda = xd + cbdata->target->nkeys;	/* idarray block */
+#else
+  struct extdata *xda = xd;
+#endif
+
   NeedId *needid = cbdata->needid;
   Id *idarraydata = repo->idarraydata;
 
@@ -857,21 +923,21 @@ collect_data_solvable(struct cbdata *cbdata, Solvable *s, Id *keymap)
   if (s->vendor && keymap[SOLVABLE_VENDOR])
     data_addid(xd, needid[s->vendor].need);
   if (s->provides && keymap[SOLVABLE_PROVIDES])
-    data_adddepids(xd, pool, needid, idarraydata + s->provides, SOLVABLE_FILEMARKER);
+    data_adddepids(xda, pool, needid, idarraydata + s->provides, SOLVABLE_FILEMARKER);
   if (s->obsoletes && keymap[SOLVABLE_OBSOLETES])
-    data_adddepids(xd, pool, needid, idarraydata + s->obsoletes, 0);
+    data_adddepids(xda, pool, needid, idarraydata + s->obsoletes, 0);
   if (s->conflicts && keymap[SOLVABLE_CONFLICTS])
-    data_adddepids(xd, pool, needid, idarraydata + s->conflicts, 0);
+    data_adddepids(xda, pool, needid, idarraydata + s->conflicts, 0);
   if (s->requires && keymap[SOLVABLE_REQUIRES])
-    data_adddepids(xd, pool, needid, idarraydata + s->requires, SOLVABLE_PREREQMARKER);
+    data_adddepids(xda, pool, needid, idarraydata + s->requires, SOLVABLE_PREREQMARKER);
   if (s->recommends && keymap[SOLVABLE_RECOMMENDS])
-    data_adddepids(xd, pool, needid, idarraydata + s->recommends, 0);
+    data_adddepids(xda, pool, needid, idarraydata + s->recommends, 0);
   if (s->suggests && keymap[SOLVABLE_SUGGESTS])
-    data_adddepids(xd, pool, needid, idarraydata + s->suggests, 0);
+    data_adddepids(xda, pool, needid, idarraydata + s->suggests, 0);
   if (s->supplements && keymap[SOLVABLE_SUPPLEMENTS])
-    data_adddepids(xd, pool, needid, idarraydata + s->supplements, 0);
+    data_adddepids(xda, pool, needid, idarraydata + s->supplements, 0);
   if (s->enhances && keymap[SOLVABLE_ENHANCES])
-    data_adddepids(xd, pool, needid, idarraydata + s->enhances, 0);
+    data_adddepids(xda, pool, needid, idarraydata + s->enhances, 0);
   if (repo->rpmdbid && keymap[RPM_RPMDBID])
     data_addid(xd, repo->rpmdbid[(s - pool->solvables) - repo->start]);
 }
@@ -1217,13 +1283,13 @@ repowriter_write(Repowriter *writer, FILE *fp)
 	  if (i < SOLVABLE_PROVIDES)
 	    keyd.type = REPOKEY_TYPE_ID;
 	  else if (i < RPM_RPMDBID)
-#ifdef USE_REL_IDARRAY
-	    keyd.type = REPOKEY_TYPE_REL_IDARRAY;
-#else
 	    keyd.type = REPOKEY_TYPE_IDARRAY;
-#endif
 	  else
 	    keyd.type = REPOKEY_TYPE_NUM;
+#ifdef USE_REL_IDARRAY
+	  if (keyd.type == REPOKEY_TYPE_IDARRAY)
+	    keyd.type = REPOKEY_TYPE_REL_IDARRAY;
+#endif
 	  keyd.size = 0;
 	  keyd.storage = KEY_STORAGE_SOLVABLE;
 	  if (writer->keyfilter)
@@ -1233,6 +1299,10 @@ repowriter_write(Repowriter *writer, FILE *fp)
 		continue;
 	      keyd.storage = KEY_STORAGE_SOLVABLE;
 	    }
+#ifdef USE_IDARRAYBLOCK
+	  if (keyd.type == REPOKEY_TYPE_IDARRAY)
+	    keyd.storage = KEY_STORAGE_IDARRAYBLOCK;
+#endif
 	  poolusage = 1;
 	  clonepool = 1;
 	  keymap[keyd.name] = repodata_key2id(&target, &keyd, 1);
@@ -1313,6 +1383,8 @@ repowriter_write(Repowriter *writer, FILE *fp)
 		      keymap[n] = 0;
 		      continue;
 		    }
+		  if (keyd.storage != KEY_STORAGE_VERTICAL_OFFSET)
+		    keyd.storage = KEY_STORAGE_INCORE;		/* do not mess with us */
 		}
 	      if (data->state != REPODATA_STUB)
 	        id = repodata_key2id(&target, &keyd, 1);
@@ -1841,11 +1913,12 @@ for (i = 1; i < target.nkeys; i++)
 
   /* collect all data
    * we use extdata[0] for incore data and extdata[keyid] for vertical data
+   * we use extdata[nkeys] for the idarray_block data
    *
    * this must match the code above that creates the schema data!
    */
 
-  cbdata.extdata = solv_calloc(target.nkeys, sizeof(struct extdata));
+  cbdata.extdata = solv_calloc(target.nkeys + 1, sizeof(struct extdata));
 
   xd = cbdata.extdata;
   cbdata.current_sub = 0;
@@ -1907,15 +1980,21 @@ for (i = 1; i < target.nkeys; i++)
   target.fp = fp;
 
   /* write header */
+  solv_flags = 0;
+  solv_flags |= SOLV_FLAG_PREFIX_POOL;
+  solv_flags |= SOLV_FLAG_SIZE_BYTES;
+  if (writer->userdatalen)
+    solv_flags |= SOLV_FLAG_USERDATA;
+  if (cbdata.extdata[target.nkeys].len)
+    solv_flags |= SOLV_FLAG_IDARRAYBLOCK;
 
   /* write file header */
   write_u32(&target, 'S' << 24 | 'O' << 16 | 'L' << 8 | 'V');
-  if (writer->userdatalen)
+  if ((solv_flags & (SOLV_FLAG_USERDATA | SOLV_FLAG_IDARRAYBLOCK)) != 0)
     write_u32(&target, SOLV_VERSION_9);
   else
     write_u32(&target, SOLV_VERSION_8);
 
-
   /* write counts */
   write_u32(&target, nstrings);
   write_u32(&target, nrels);
@@ -1923,13 +2002,10 @@ for (i = 1; i < target.nkeys; i++)
   write_u32(&target, anysolvableused ? nsolvables : 0);
   write_u32(&target, target.nkeys);
   write_u32(&target, target.nschemata);
-  solv_flags = 0;
-  solv_flags |= SOLV_FLAG_PREFIX_POOL;
-  solv_flags |= SOLV_FLAG_SIZE_BYTES;
-  if (writer->userdatalen)
-    solv_flags |= SOLV_FLAG_USERDATA;
   write_u32(&target, solv_flags);
-  if (writer->userdatalen)
+
+  /* write userdata */
+  if ((solv_flags & SOLV_FLAG_USERDATA) != 0)
     {
       write_u32(&target, writer->userdatalen);
       write_blob(&target, writer->userdata, writer->userdatalen);
@@ -1973,8 +2049,8 @@ for (i = 1; i < target.nkeys; i++)
     }
   else
     {
-      write_u32(&target, 0);
-      write_u32(&target, 0);
+      write_u32(&target, 0);	/* unpacked size */
+      write_u32(&target, 0);	/* compressed size */
     }
 
   /*
@@ -2026,14 +2102,36 @@ for (i = 1; i < target.nkeys; i++)
   for (i = 1; i < target.nschemata; i++)
     write_idarray(&target, pool, 0, repodata_id2schema(&target, i));
 
+  /* write idarray_block data if not empty */
+  if (cbdata.extdata[target.nkeys].len)
+    {
+      unsigned int cnt = 0;
+      unsigned char *b;
+      unsigned int l;
+	
+      xd = cbdata.extdata + target.nkeys;
+      /* calculate number of entries */
+      for (l = xd->len, b = xd->buf; l--;)
+	{
+	  unsigned char x = *b++;
+	  if ((x & 0x80) == 0)
+	    cnt += (x & 0x40) ? 1 : 2;
+	}
+      write_id(&target, cnt);
+      if (cnt)
+        write_compressed_blob(&target, xd->buf, xd->len);
+      solv_free(xd->buf);
+    }
+
   /*
    * write incore data
    */
+  xd = cbdata.extdata;
   write_id(&target, cbdata.maxdata);
-  write_id(&target, cbdata.extdata[0].len);
-  if (cbdata.extdata[0].len)
-    write_blob(&target, cbdata.extdata[0].buf, cbdata.extdata[0].len);
-  solv_free(cbdata.extdata[0].buf);
+  write_id(&target, xd->len);
+  if (xd->len)
+    write_blob(&target, xd->buf, xd->len);
+  solv_free(xd->buf);
 
   /*
    * write vertical data if we have any
diff --git a/src/repodata.h b/src/repodata.h
index 7dd5259d..2504f2ae 100644
--- a/src/repodata.h
+++ b/src/repodata.h
@@ -48,6 +48,7 @@ typedef struct s_Repokey {
 #define KEY_STORAGE_SOLVABLE            1
 #define KEY_STORAGE_INCORE              2
 #define KEY_STORAGE_VERTICAL_OFFSET     3
+#define KEY_STORAGE_IDARRAYBLOCK	4
 
 #ifdef LIBSOLV_INTERNAL
 struct dircache;
diff --git a/src/repopage.c b/src/repopage.c
index 9e9694f4..77c7fcc7 100644
--- a/src/repopage.c
+++ b/src/repopage.c
@@ -105,6 +105,8 @@ compress_buf(const unsigned char *in, unsigned int in_len,
   unsigned int litofs = 0;
   memset(htab, -1, sizeof (htab));
   memset(hnext, -1, sizeof (hnext));
+  if (in_len > BLOCK_SIZE)
+    return 0;			/* Hey! */
   while (io + 2 < in_len)
     {
       /* Search for a match of the string starting at IN, we have at
@@ -119,84 +121,53 @@ compress_buf(const unsigned char *in, unsigned int in_len,
       mlen = 0;
       mofs = 0;
 
-      for (tries = 0; try != -1 && tries < 12; tries++)
+      for (tries = 0; try != (Ref)-1 && tries < 12; tries++, try = hnext[try])
         {
-	  if (try < io
-	      && in[try] == in[io] && in[try + 1] == in[io + 1])
+	  if (in[try] == in[io] && in[try + 1] == in[io + 1])
 	    {
 	      mlen = 2;
 	      mofs = (io - try) - 1;
 	      break;
 	    }
-	  try = hnext[try];
 	}
-      for (; try != -1 && tries < 12; tries++)
+      for (; try != (Ref)-1 && tries < 12; tries++, try = hnext[try])
 	{
-	  /* assert(mlen >= 2); */
 	  /* assert(io + mlen < in_len); */
 	  /* Try a match starting from [io] with the strings at [try].
-	     That's only sensible if TRY actually is before IO (can happen
-	     with uninit hash table).  If we have a previous match already
-	     we're only going to take the new one if it's longer, hence
-	     check the potentially last character.  */
-	  if (try < io && in[try + mlen] == in[io + mlen])
+	     If we have a previous match already we're only going to take
+             the new one if it's longer, hence check the potentially last
+             character first.  */
+	  if (in[try + mlen] == in[io + mlen] && !memcmp(in + try, in + io, mlen))
 	    {
-	      unsigned int this_len, this_ofs;
-	      if (memcmp(in + try, in + io, mlen))
-		goto no_match;
-	      this_len = mlen + 1;
+	      /* Found a longer match */
+	      mlen++;
 	      /* Now try extending the match by more characters.  */
-	      for (;
-		   io + this_len < in_len
-		   && in[try + this_len] == in[io + this_len]; this_len++)
-		;
-#if 0
-	      unsigned int testi;
-	      for (testi = 0; testi < this_len; testi++)
-		assert(in[try + testi] == in[io + testi]);
-#endif
-	      this_ofs = (io - try) - 1;
-	      /*if (this_ofs > 65535)
-		 goto no_match; */
-#if 0
-	      assert(this_len >= 2);
-	      assert(this_len >= mlen);
-	      assert(this_len > mlen || (this_len == mlen && this_ofs > mofs));
-#endif
-	      mlen = this_len, mofs = this_ofs;
+	      while (io + mlen < in_len && in[try + mlen] == in[io + mlen])
+		mlen++;
+	      mofs = (io - try) - 1;
 	      /* If our match extends up to the end of input, no next
 		 match can become better.  This is not just an
-		 optimization, it establishes a loop invariant
+		 optimization, it establishes the loop invariant
 		 (io + mlen < in_len).  */
 	      if (io + mlen >= in_len)
-		goto match_done;
+		break;
 	    }
-	no_match:
-	  try = hnext[try];
-	  /*if (io - try - 1 >= 65536)
-	    break;*/
 	}
-
-match_done:
+      if (mlen < 3)
+	mlen = 0;
       if (mlen)
 	{
 	  /*fprintf(stderr, "%d %d\n", mlen, mofs);*/
-	  if (mlen == 2 && (litofs || mofs >= 1024))
-	    mlen = 0;
-	  /*else if (mofs >= 65536)
-	    mlen = 0;*/
-	  else if (mofs >= 65536)
+#if BLOCK_SIZE > 65536
+	  if (mofs >= 65536)
 	    {
 	      if (mlen >= 2048 + 5)
 	        mlen = 2047 + 5;
 	      else if (mlen < 5)
 	        mlen = 0;
 	    }
-	  else if (mlen < 3)
-	    mlen = 0;
-	  /*else if (mlen >= 4096 + 19)
-	    mlen = 4095 + 19;*/
-	  else if (mlen >= 2048 + 19)
+#endif
+	  if (mlen >= 2048 + 19)
 	    mlen = 2047 + 19;
 	  /* Skip this match if the next character would deliver a better one,
 	     but only do this if we have the chance to really extend the
@@ -210,16 +181,11 @@ match_done:
 	      hval = (hval ^ (hval << 5) ^ (hval >> 5)) - hval * 5;
 	      hval = hval & (HS - 1);
 	      try = htab[hval];
-	      if (try < io + 1
-		  && in[try] == in[io + 1] && in[try + 1] == in[io + 2])
+	      if (try != (Ref)-1 && in[try] == in[io + 1] && in[try + 1] == in[io + 2])
 		{
-		  unsigned int this_len;
-		  this_len = 2;
-		  for (;
-		       io + 1 + this_len < in_len
-		       && in[try + this_len] == in[io + 1 + this_len];
-		       this_len++)
-		    ;
+		  unsigned int this_len = 2;
+		  while (io + 1 + this_len < in_len && in[try + this_len] == in[io + 1 + this_len])
+		    this_len++;
 		  if (this_len >= mlen)
 		    mlen = 0;
 		}
@@ -227,12 +193,14 @@ match_done:
 	}
       if (!mlen)
 	{
+	  /* Found no match, start/extend literal */
 	  if (!litofs)
 	    litofs = io + 1;
 	  io++;
 	}
       else
 	{
+	  /* Found a match. First dump literals */
 	  if (litofs)
 	    {
 	      unsigned litlen;
@@ -303,6 +271,9 @@ match_done:
 	    }
 	  else if (mofs >= 65536)
 	    {
+#if BLOCK_SIZE <= 65536
+	      return 0;
+#else
 	      assert(mlen >= 5 && mlen < 2048 + 5);
 	      if (oo + 5 >= out_len)
 	        return 0;
@@ -311,6 +282,7 @@ match_done:
 	      out[oo++] = mofs & 0xff;
 	      out[oo++] = (mofs >> 8) & 0xff;
 	      out[oo++] = mofs >> 16;
+#endif
 	    }
 	  else if (mlen >= 3 && mlen <= 18)
 	    {
@@ -350,7 +322,7 @@ match_done:
 		  htab[hval] = io;
 		}
 	      io++;
-	    };
+	    }
 	}
     }
   /* We might have some characters left.  */
@@ -466,14 +438,12 @@ unchecked_decompress_buf(const unsigned char *in, unsigned int in_len,
 	  {
 	    o = in[0] | (in[1] << 8);
 	    in += 2;
-	    first = first & 31;
-	    first += 3;
+	    first = (first & 15) + 3;
 	    break;
 	  }
 	case 15:
-	  /* f1 1111llll <8o> <8o> <8l> */
-	  /* f2 11110lll <8o> <8o> <8l> */
-	  /* g 11111lll <8o> <8o> <8o> <8l> */
+	  /* f2 11110lll <8l> <8o> <8o> */
+	  /* g  11111lll <8l> <8o> <8o> <8o> */
 	  {
 	    first = first & 15;
 	    if (first >= 8)
@@ -557,6 +527,77 @@ unchecked_decompress_buf(const unsigned char *in, unsigned int in_len,
   return out - orig_out;
 }
 
+static unsigned int
+check_decompress_buf(const unsigned char *in, unsigned int in_len)
+{
+  unsigned int out_len = 0;
+  const unsigned char *in_end = in + in_len;
+  while (in < in_end)
+    {
+      unsigned int first = *in++;
+      int o;
+      switch (first >> 4)
+	{
+	default:
+	  /* This default case can't happen, but GCCs VRP is not strong
+	     enough to see this, so make this explicitely not fall to
+	     the end of the switch, so that we don't have to initialize
+	     o above.  */
+	  continue;
+	case 0: case 1:
+	case 2: case 3:
+	case 4: case 5:
+	case 6: case 7:
+	  out_len++;
+	  continue;
+	case 8: case 9:
+	  /* b 100lllll <l+1 bytes> */
+	  first = (first & 31) + 1;
+	  in += first;
+	  out_len += first;
+	  continue;
+	case 10: case 11:
+	  /* c 101oolll <8o> */
+	  o = (first & (3 << 3)) << 5 | *in++;
+	  first = (first & 7) + 2;
+	  break;
+	case 12: case 13:
+	  /* d 110lllll <8o> */
+	  o = *in++;
+	  first = (first & 31) + 10;
+	  break;
+	case 14:
+	  /* e 1110llll <8o> <8o> */
+	  o = in[0] | (in[1] << 8);
+	  in += 2;
+	  first = (first & 15) + 3;
+	  break;
+	case 15:
+	  /* f1 1111llll <8l> <8o> <8o> */
+	  /* g  11111lll <8l> <8o> <8o> <8o> */
+	  first = first & 15;
+	  if (first >= 8)
+	    {
+	      first = (((first - 8) << 8) | in[0]) + 5;
+	      o = in[1] | (in[2] << 8) | (in[3] << 16);
+	      in += 4;
+	    }
+	  else
+	    {
+	      first = ((first << 8) | in[0]) + 19;
+	      o = in[1] | (in[2] << 8);
+	      in += 3;
+	    }
+	  break;
+	}
+      /* fprintf(stderr, "ref: %d @ %d\n", first, o); */
+      if (o >= out_len)
+	return 0;
+      out_len += first;
+    }
+  return out_len;
+}
+
 /**********************************************************************/
 
 void repopagestore_init(Repopagestore *store)
@@ -757,6 +798,16 @@ repopagestore_compress_page(unsigned char *page, unsigned int len, unsigned char
   return compress_buf(page, len, cpage, max);
 }
 
+unsigned int
+repopagestore_decompress_page(const unsigned char *cpage, unsigned int len, unsigned char *page, unsigned int max)
+{
+  unsigned int l = check_decompress_buf(cpage, len);
+  if (l == 0 || l > max)
+    return 0;
+  return unchecked_decompress_buf(cpage, len, page, max);
+}
+
+
 #define SOLV_ERROR_EOF		3
 #define SOLV_ERROR_CORRUPT	6
 
diff --git a/src/repopage.h b/src/repopage.h
index b5f2eee9..9fb84f0f 100644
--- a/src/repopage.h
+++ b/src/repopage.h
@@ -53,6 +53,8 @@ unsigned char *repopagestore_load_page_range(Repopagestore *store, unsigned int
 
 /* compress a page, return compressed len */
 unsigned int repopagestore_compress_page(unsigned char *page, unsigned int len, unsigned char *cpage, unsigned int max);
+/* uncompress a page, return uncompressed len */
+unsigned int repopagestore_decompress_page(const unsigned char *cpage, unsigned int len, unsigned char *page, unsigned int max);
 
 /* setup page data for repodata_load_page_range */
 int repopagestore_read_or_setup_pages(Repopagestore *store, FILE *fp, unsigned int pagesz, unsigned int blobsz);