[thirdparty/glibc.git] / nscd / mem.c

/* Cache memory handling.
   Copyright (C) 2004 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, write to the Free
   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307 USA.  */

#include <assert.h>
#include <errno.h>
#include <error.h>
#include <inttypes.h>
#include <libintl.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/param.h>

#include "dbg_log.h"
#include "nscd.h"


/* Maximum alignment requirement we will encounter.  */
#define BLOCK_ALIGN_LOG 3
#define BLOCK_ALIGN (1 << BLOCK_ALIGN_LOG)
#define BLOCK_ALIGN_M1 (BLOCK_ALIGN - 1)


static int
sort_he (const void *p1, const void *p2)
{
  struct hashentry *h1 = *(struct hashentry **) p1;
  struct hashentry *h2 = *(struct hashentry **) p2;

  if (h1 < h2)
    return -1;
  if (h1 > h2)
    return 1;
  return 0;
}


static int
sort_he_data (const void *p1, const void *p2)
{
  struct hashentry *h1 = *(struct hashentry **) p1;
  struct hashentry *h2 = *(struct hashentry **) p2;

  if (h1->packet < h2->packet)
    return -1;
  if (h1->packet > h2->packet)
    return 1;
  return 0;
}


/* Basic definitions for the bitmap implementation.  Only BITMAP_T
   needs to be changed to choose a different word size.  */
#define BITMAP_T uint8_t
#define BITS (CHAR_BIT * sizeof (BITMAP_T))
#define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
#define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))


static void
markrange (BITMAP_T *mark, ref_t start, size_t len)
{
  /* Adjust parameters for block alignment.  */
  start /= BLOCK_ALIGN;
  len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;

  size_t elem = start / BITS;

  if (start % BITS != 0)
    {
      if (start % BITS + len <= BITS)
	{
	  /* All fits in the partial byte.  */
	  mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
	  return;
	}

      mark[elem++] |= 0xff << (start % BITS);
      len -= BITS - (start % BITS);
    }

  while (len >= BITS)
    {
      mark[elem++] = ALLBITS;
      len -= BITS;
    }

  if (len > 0)
    mark[elem] |= ALLBITS >> (BITS - len);
}


void
gc (struct database_dyn *db)
{
  /* We need write access.  */
  pthread_rwlock_wrlock (&db->lock);

  /* And the memory handling lock.  */
  pthread_mutex_lock (&db->memlock);

  /* We need an array representing the data area.  All memory
     allocation is BLOCK_ALIGN aligned so this is the level at which
     we have to look at the memory.  We use a mark and sweep algorithm
     where the marks are placed in this array.  */
  assert (db->head->first_free % BLOCK_ALIGN == 0);
  BITMAP_T mark[(db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS];
  memset (mark, '\0', sizeof (mark));

  /* Create an array which can hold pointer to all the entries in hash
     entries.  */
  struct hashentry *he[db->head->nentries];
  struct hashentry *he_data[db->head->nentries];

  size_t cnt = 0;
  for (size_t idx = 0; idx < db->head->module; ++idx)
    {
      ref_t *prevp = &db->head->array[idx];
      ref_t run = *prevp;

      while (run != ENDREF)
	{
	  assert (cnt < db->head->nentries);
	  he[cnt] = (struct hashentry *) (db->data + run);

	  he[cnt]->prevp = prevp;
	  prevp = &he[cnt]->next;

	  /* This is the hash entry itself.  */
	  markrange (mark, run, sizeof (struct hashentry));

	  /* Add the information for the data itself.  We do this
	     only for the one special entry marked with FIRST.  */
	  if (he[cnt]->first)
	    {
	      struct datahead *dh
		= (struct datahead *) (db->data + he[cnt]->packet);
	      markrange (mark, he[cnt]->packet, dh->allocsize);
	    }

	  run = he[cnt]->next;

	  ++cnt;
	}
    }
  assert (cnt == db->head->nentries);

  /* Sort the entries by the addresses of the referenced data.  All
     the entries pointing to the same DATAHEAD object will have the
     same key.  Stability of the sorting is unimportant.  */
  memcpy (he_data, he, cnt * sizeof (struct hashentry *));
  qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);

  /* Sort the entries by their address.  */
  qsort (he, cnt, sizeof (struct hashentry *), sort_he);

  /* Determine the highest used address.  */
  size_t high = sizeof (mark);
  while (high > 0 && mark[high - 1] == 0)
    --high;

  /* No memory used.  */
  if (high == 0)
    {
      db->head->first_free = 0;
      goto out;
    }

  /* Determine the highest offset.  */
  BITMAP_T mask = HIGHBIT;
  ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
  while ((mark[high - 1] & mask) == 0)
    {
      mask >>= 1;
      highref -= BLOCK_ALIGN;
    }

  /* No we can iterate over the MARK array and find bits which are not
     set.  These represent memory which can be recovered.  */
  size_t byte = 0;
  /* Find the first gap.  */
  while (byte < high && mark[byte] == ALLBITS)
    ++byte;

  if (byte == high
      || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
    /* No gap.  */
    goto out;

  mask = 1;
  cnt = 0;
  while ((mark[byte] & mask) != 0)
    {
      ++cnt;
      mask <<= 1;
    }
  ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
  assert (off_free <= db->head->first_free);

  struct hashentry **next_hash = he;
  struct hashentry **next_data = he_data;

  /* Skip over the hash entries in the first block which does not get
     moved.  */
  while (next_hash < &he[db->head->nentries]
	 && *next_hash < (struct hashentry *) (db->data + off_free))
    ++next_hash;

  while (next_data < &he_data[db->head->nentries]
	 && (*next_data)->packet < off_free)
    ++next_data;


  /* We do not perform the move operations right away since the
     he_data array is not sorted by the address of the data.  */
  struct moveinfo
  {
    void *from;
    void *to;
    size_t size;
    struct moveinfo *next;
  } *moves = NULL;

  while (byte < high)
    {
      /* Search for the next filled block.  BYTE is the index of the
	 entry in MARK, MASK is the bit, and CNT is the bit number.
	 OFF_FILLED is the corresponding offset.  */
      if ((mark[byte] & ~(mask - 1)) == 0)
	{
	  /* No other bit set in the same element of MARK.  Search in the
	     following memory.  */
	  do
	    ++byte;
	  while (byte < high && mark[byte] == 0);

	  if (byte == high)
	    /* That was it.  */
	    break;

	  mask = 1;
	  cnt = 0;
	}
      /* Find the exact bit.  */
      while ((mark[byte] & mask) == 0)
	{
	  ++cnt;
	  mask <<= 1;
	}

      ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
      assert (off_alloc <= db->head->first_free);

      /* Find the end of the used area.  */
      if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
	{
	  /* All other bits set.  Search the next bytes in MARK.  */
	  do
	    ++byte;
	  while (byte < high && mark[byte] == ALLBITS);

	  mask = 1;
	  cnt = 0;
	}
      if (byte < high)
	{
	  /* Find the exact bit.  */
	  while ((mark[byte] & mask) != 0)
	    {
	      ++cnt;
	      mask <<= 1;
	    }
	}

      ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
      assert (off_allocend <= db->head->first_free);
      /* Now we know that we can copy the area from OFF_ALLOC to
	 OFF_ALLOCEND (not included) to the memory starting at
	 OFF_FREE.  First fix up all the entries for the
	 displacement.  */
      ref_t disp = off_alloc - off_free;

      struct moveinfo *new_move
	= (struct moveinfo *) alloca (sizeof (*new_move));
      new_move->from = db->data + off_alloc;
      new_move->to = db->data + off_free;
      new_move->size = off_allocend - off_alloc;
      /* Create a circular list to be always able to append at the end.  */
      if (moves == NULL)
	moves = new_move->next = new_move;
      else
	{
	  new_move->next = moves->next;
	  moves = moves->next = new_move;
	}

      /* The following loop will prepare to move this much data.  */
      off_free += off_allocend - off_alloc;

      while (off_alloc < off_allocend)
	{
	  /* Determine whether the next entry is for a hash entry or
	     the data.  */
	  if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
	    {
	      /* Just correct the forward reference.  */
	      *(*next_hash++)->prevp -= disp;

	      off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
			    & ~BLOCK_ALIGN_M1);
	    }
	  else
	    {
	      assert (next_data < &he_data[db->head->nentries]);
	      assert ((*next_data)->packet == off_alloc);

	      struct datahead *dh = (struct datahead *) (db->data + off_alloc);
	      do
		{
		  assert ((*next_data)->key >= (*next_data)->packet);
		  assert ((*next_data)->key + (*next_data)->len
			  <= (*next_data)->packet + dh->allocsize);

		  (*next_data)->packet -= disp;
		  (*next_data)->key -= disp;
		  ++next_data;
		}
	      while (next_data < &he_data[db->head->nentries]
		     && (*next_data)->packet == off_alloc);

	      off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
	    }
	}
      assert (off_alloc == off_allocend);

      assert (off_alloc <= db->head->first_free);
      if (off_alloc == db->head->first_free)
	/* We are done, that was the last block.  */
	break;
    }
  assert (next_hash == &he[db->head->nentries]);
  assert (next_data == &he_data[db->head->nentries]);

  /* Now perform the actual moves.  */
  if (moves != NULL)
    {
      struct moveinfo *runp = moves->next;
      do
	{
	  assert ((char *) runp->to >= db->data);
	  assert ((char *) runp->to + runp->size
		  <= db->data  + db->head->first_free);
	  assert ((char *) runp->from >= db->data);
	  assert ((char *) runp->from + runp->size
		  <= db->data  + db->head->first_free);

	  /* The regions may overlap.  */
	  memmove (runp->to, runp->from, runp->size);
	  runp = runp->next;
	}
      while (runp != moves->next);

      if (__builtin_expect (debug_level >= 3, 0))
	dbg_log (_("freed %zu bytes in %s cache"),
		 db->head->first_free
		 - ((char *) moves->to + moves->size - db->data),
		 dbnames[db - dbs]);

      /* The byte past the end of the last copied block is the next
	 available byte.  */
      db->head->first_free = (char *) moves->to + moves->size - db->data;

      /* Consistency check.  */
      if (__builtin_expect (debug_level >= 3, 0))
	{
	  for (size_t idx = 0; idx < db->head->module; ++idx)
	    {
	      ref_t run = db->head->array[idx];
	      size_t cnt = 0;

	      while (run != ENDREF)
		{
		  if (run + sizeof (struct hashentry) > db->head->first_free)
		    {
		      dbg_log ("entry %zu in hash bucket %zu out of bounds: "
			       "%" PRIu32 "+%zu > %zu\n",
			       cnt, idx, run, sizeof (struct hashentry),
			       db->head->first_free);
		      break;
		    }

		  struct hashentry *he = (struct hashentry *) (db->data + run);

		  if (he->key + he->len > db->head->first_free)
		    dbg_log ("key of entry %zu in hash bucket %zu out of "
			     "bounds: %" PRIu32 "+%zu > %zu\n",
			     cnt, idx, he->key, he->len, db->head->first_free);

		  if (he->packet + sizeof (struct datahead)
		      > db->head->first_free)
		    dbg_log ("packet of entry %zu in hash bucket %zu out of "
			     "bounds: %" PRIu32 "+%zu > %zu\n",
			     cnt, idx, he->packet, sizeof (struct datahead),
			     db->head->first_free);
		  else
		    {
		      struct datahead *dh = (struct datahead *) (db->data
								 + he->packet);
		      if (he->packet + dh->allocsize
			  > db->head->first_free)
			dbg_log ("full key of entry %zu in hash bucket %zu "
				 "out of bounds: %" PRIu32 "+%zu > %zu",
				 cnt, idx, he->packet, dh->allocsize,
				 db->head->first_free);
		    }

		  run = he->next;
		  ++cnt;
		}
	    }
	}
    }

  /* Make sure the data on disk is updated.  */
  if (db->persistent)
    msync (db->head, db->data + db->head->first_free - (char *) db->head,
	   MS_ASYNC);

  /* We are done.  */
 out:
  pthread_mutex_unlock (&db->memlock);
  pthread_rwlock_unlock (&db->lock);
}


void *
mempool_alloc (struct database_dyn *db, size_t len)
{
  /* Make sure LEN is a multiple of our maximum alignment so we can
     keep track of used memory is multiples of this alignment value.  */
  if ((len & BLOCK_ALIGN_M1) != 0)
    len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);

  pthread_mutex_lock (&db->memlock);

  assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);

  bool tried_resize = false;
  void *res;
 retry:
  res = db->data + db->head->first_free;

  if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
    {
      if (! tried_resize)
	{
	  /* Try to resize the database.  Grow size of 1/8th.  */
	  size_t new_data_size = db->head->data_size + db->head->data_size / 8;
	  size_t oldtotal = (sizeof (struct database_pers_head)
			     + db->head->module * sizeof (ref_t)
			     + db->head->data_size);
	  size_t newtotal = (sizeof (struct database_pers_head)
			     + db->head->module * sizeof (ref_t)
			     + new_data_size);

	  if ((!db->mmap_used || ftruncate (db->wr_fd, newtotal) != 0)
	      /* Try to resize the mapping.  Note: no MREMAP_MAYMOVE.  */
	      && mremap (db->head, oldtotal, newtotal, 0) == 0)
	    {
	      db->head->data_size = new_data_size;
	      tried_resize = true;
	      goto retry;
	    }
	}

      if (! db->last_alloc_failed)
	{
	  dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);

	  db->last_alloc_failed = true;
	}

      /* No luck.  */
      res = NULL;
    }
  else
    {
      db->head->first_free += len;

      db->last_alloc_failed = false;
    }

  pthread_mutex_unlock (&db->memlock);

  return res;
}
Commit	Line	Data
a95a08b4 UD	1	/* Cache memory handling.
	2	Copyright (C) 2004 Free Software Foundation, Inc.
	3	This file is part of the GNU C Library.
	4	Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
	5
	6	The GNU C Library is free software; you can redistribute it and/or
	7	modify it under the terms of the GNU Lesser General Public
	8	License as published by the Free Software Foundation; either
	9	version 2.1 of the License, or (at your option) any later version.
	10
	11	The GNU C Library is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	Lesser General Public License for more details.
	15
	16	You should have received a copy of the GNU Lesser General Public
	17	License along with the GNU C Library; if not, write to the Free
	18	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
	19	02111-1307 USA. */
	20
	21	#include <assert.h>
	22	#include <errno.h>
	23	#include <error.h>
	24	#include <inttypes.h>
	25	#include <libintl.h>
	26	#include <limits.h>
	27	#include <stdlib.h>
	28	#include <string.h>
	29	#include <unistd.h>
	30	#include <sys/mman.h>
	31	#include <sys/param.h>
	32
	33	#include "dbg_log.h"
	34	#include "nscd.h"
	35
	36
	37	/* Maximum alignment requirement we will encounter. */
	38	#define BLOCK_ALIGN_LOG 3
	39	#define BLOCK_ALIGN (1 << BLOCK_ALIGN_LOG)
	40	#define BLOCK_ALIGN_M1 (BLOCK_ALIGN - 1)
	41
	42
	43	static int
	44	sort_he (const void p1, const void p2)
	45	{
	46	struct hashentry h1 = (struct hashentry **) p1;
	47	struct hashentry h2 = (struct hashentry **) p2;
	48
	49	if (h1 < h2)
	50	return -1;
	51	if (h1 > h2)
	52	return 1;
	53	return 0;
	54	}
	55
	56
	57	static int
	58	sort_he_data (const void p1, const void p2)
	59	{
	60	struct hashentry h1 = (struct hashentry **) p1;
	61	struct hashentry h2 = (struct hashentry **) p2;
	62
	63	if (h1->packet < h2->packet)
	64	return -1;
65	if (h1->packet > h2->packet)
66	return 1;
67	return 0;
68	}
69
70
71	/* Basic definitions for the bitmap implementation. Only BITMAP_T
72	needs to be changed to choose a different word size. */
73	#define BITMAP_T uint8_t
74	#define BITS (CHAR_BIT * sizeof (BITMAP_T))
75	#define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
76	#define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
77
78
79	static void
80	markrange (BITMAP_T *mark, ref_t start, size_t len)
81	{
82	/* Adjust parameters for block alignment. */
83	start /= BLOCK_ALIGN;
84	len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
85
86	size_t elem = start / BITS;
87
88	if (start % BITS != 0)
89	{
90	if (start % BITS + len <= BITS)
91	{
92	/* All fits in the partial byte. */
93	mark[elem] \|= (ALLBITS >> (BITS - len)) << (start % BITS);
94	return;
95	}
96
97	mark[elem++] \|= 0xff << (start % BITS);
98	len -= BITS - (start % BITS);
99	}
100
101	while (len >= BITS)
102	{
103	mark[elem++] = ALLBITS;
104	len -= BITS;
105	}
106
107	if (len > 0)
108	mark[elem] \|= ALLBITS >> (BITS - len);
109	}
110
111
112	void
113	gc (struct database_dyn *db)
114	{
115	/* We need write access. */
116	pthread_rwlock_wrlock (&db->lock);
117
118	/* And the memory handling lock. */
119	pthread_mutex_lock (&db->memlock);
120
121	/* We need an array representing the data area. All memory
122	allocation is BLOCK_ALIGN aligned so this is the level at which
123	we have to look at the memory. We use a mark and sweep algorithm
124	where the marks are placed in this array. */
125	assert (db->head->first_free % BLOCK_ALIGN == 0);
126	BITMAP_T mark[(db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS];
127	memset (mark, '\0', sizeof (mark));
128
129	/* Create an array which can hold pointer to all the entries in hash
130	entries. */
131	struct hashentry *he[db->head->nentries];
132	struct hashentry *he_data[db->head->nentries];
133
134	size_t cnt = 0;
135	for (size_t idx = 0; idx < db->head->module; ++idx)
136	{
137	ref_t *prevp = &db->head->array[idx];
138	ref_t run = *prevp;
139
140	while (run != ENDREF)
141	{
142	assert (cnt < db->head->nentries);
143	he[cnt] = (struct hashentry *) (db->data + run);
144
145	he[cnt]->prevp = prevp;
146	prevp = &he[cnt]->next;
147
148	/* This is the hash entry itself. */
149	markrange (mark, run, sizeof (struct hashentry));
150
151	/* Add the information for the data itself. We do this
152	only for the one special entry marked with FIRST. */
153	if (he[cnt]->first)
154	{
155	struct datahead *dh
156	= (struct datahead *) (db->data + he[cnt]->packet);
157	markrange (mark, he[cnt]->packet, dh->allocsize);
158	}
159
160	run = he[cnt]->next;
161
162	++cnt;
163	}
164	}
165	assert (cnt == db->head->nentries);
166
167	/* Sort the entries by the addresses of the referenced data. All
168	the entries pointing to the same DATAHEAD object will have the
169	same key. Stability of the sorting is unimportant. */
170	memcpy (he_data, he, cnt * sizeof (struct hashentry *));
171	qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
172
173	/* Sort the entries by their address. */
174	qsort (he, cnt, sizeof (struct hashentry *), sort_he);
175
176	/* Determine the highest used address. */
177	size_t high = sizeof (mark);
178	while (high > 0 && mark[high - 1] == 0)
179	--high;
180
181	/* No memory used. */
182	if (high == 0)
183	{
184	db->head->first_free = 0;
185	goto out;
186	}
187
188	/* Determine the highest offset. */
189	BITMAP_T mask = HIGHBIT;
190	ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
191	while ((mark[high - 1] & mask) == 0)
192	{
193	mask >>= 1;
194	highref -= BLOCK_ALIGN;
195	}
196
197	/* No we can iterate over the MARK array and find bits which are not
198	set. These represent memory which can be recovered. */
199	size_t byte = 0;
200	/* Find the first gap. */
201	while (byte < high && mark[byte] == ALLBITS)
202	++byte;
203
204	if (byte == high
205	\|\| (byte == high - 1 && (mark[byte] & ~(mask \| (mask - 1))) == 0))
206	/* No gap. */
207	goto out;
208
209	mask = 1;
210	cnt = 0;
211	while ((mark[byte] & mask) != 0)
212	{
213	++cnt;
214	mask <<= 1;
215	}
216	ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
217	assert (off_free <= db->head->first_free);
218
219	struct hashentry **next_hash = he;
220	struct hashentry **next_data = he_data;
221
222	/* Skip over the hash entries in the first block which does not get
223	moved. */
224	while (next_hash < &he[db->head->nentries]
225	&& next_hash < (struct hashentry ) (db->data + off_free))
226	++next_hash;
227
228	while (next_data < &he_data[db->head->nentries]
229	&& (*next_data)->packet < off_free)
230	++next_data;
231
232
233	/* We do not perform the move operations right away since the
234	he_data array is not sorted by the address of the data. */
235	struct moveinfo
236	{
237	void *from;
238	void *to;
239	size_t size;
240	struct moveinfo *next;
241	} *moves = NULL;
242
243	while (byte < high)
244	{
245	/* Search for the next filled block. BYTE is the index of the
246	entry in MARK, MASK is the bit, and CNT is the bit number.
247	OFF_FILLED is the corresponding offset. */
248	if ((mark[byte] & ~(mask - 1)) == 0)
249	{
250	/* No other bit set in the same element of MARK. Search in the
251	following memory. */
252	do
253	++byte;
254	while (byte < high && mark[byte] == 0);
255
256	if (byte == high)
257	/* That was it. */
258	break;
259
260	mask = 1;
261	cnt = 0;
262	}
263	/* Find the exact bit. */
264	while ((mark[byte] & mask) == 0)
265	{
266	++cnt;
267	mask <<= 1;
268	}
269
270	ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
271	assert (off_alloc <= db->head->first_free);
272
273	/* Find the end of the used area. */
274	if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
275	{
276	/* All other bits set. Search the next bytes in MARK. */
277	do
278	++byte;
279	while (byte < high && mark[byte] == ALLBITS);
280
281	mask = 1;
282	cnt = 0;
283	}
284	if (byte < high)
285	{
286	/* Find the exact bit. */
287	while ((mark[byte] & mask) != 0)
288	{
289	++cnt;
290	mask <<= 1;
291	}
292	}
293
294	ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
295	assert (off_allocend <= db->head->first_free);
296	/* Now we know that we can copy the area from OFF_ALLOC to
297	OFF_ALLOCEND (not included) to the memory starting at
298	OFF_FREE. First fix up all the entries for the
299	displacement. */
300	ref_t disp = off_alloc - off_free;
301
302	struct moveinfo *new_move
303	= (struct moveinfo ) alloca (sizeof (new_move));
304	new_move->from = db->data + off_alloc;
305	new_move->to = db->data + off_free;
306	new_move->size = off_allocend - off_alloc;
307	/* Create a circular list to be always able to append at the end. */
308	if (moves == NULL)
309	moves = new_move->next = new_move;
310	else
311	{
312	new_move->next = moves->next;
313	moves = moves->next = new_move;
314	}
315
316	/* The following loop will prepare to move this much data. */
317	off_free += off_allocend - off_alloc;
318
319	while (off_alloc < off_allocend)
320	{
321	/* Determine whether the next entry is for a hash entry or
322	the data. */
323	if ((struct hashentry ) (db->data + off_alloc) == next_hash)
324	{
325	/* Just correct the forward reference. */
326	(next_hash++)->prevp -= disp;
327
328	off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
329	& ~BLOCK_ALIGN_M1);
330	}
331	else
332	{
333	assert (next_data < &he_data[db->head->nentries]);
334	assert ((*next_data)->packet == off_alloc);
335
336	struct datahead dh = (struct datahead ) (db->data + off_alloc);
337	do
338	{
339	assert ((next_data)->key >= (next_data)->packet);
340	assert ((next_data)->key + (next_data)->len
341	<= (*next_data)->packet + dh->allocsize);
342
343	(*next_data)->packet -= disp;
344	(*next_data)->key -= disp;
345	++next_data;
346	}
347	while (next_data < &he_data[db->head->nentries]
348	&& (*next_data)->packet == off_alloc);
349
350	off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
351	}
352	}
353	assert (off_alloc == off_allocend);
354
355	assert (off_alloc <= db->head->first_free);
356	if (off_alloc == db->head->first_free)
357	/* We are done, that was the last block. */
358	break;
359	}
360	assert (next_hash == &he[db->head->nentries]);
361	assert (next_data == &he_data[db->head->nentries]);
362
363	/* Now perform the actual moves. */
364	if (moves != NULL)
365	{
366	struct moveinfo *runp = moves->next;
367	do
368	{
369	assert ((char *) runp->to >= db->data);
370	assert ((char *) runp->to + runp->size
371	<= db->data + db->head->first_free);
372	assert ((char *) runp->from >= db->data);
373	assert ((char *) runp->from + runp->size
374	<= db->data + db->head->first_free);
375
376	/* The regions may overlap. */
377	memmove (runp->to, runp->from, runp->size);
378	runp = runp->next;
379	}
380	while (runp != moves->next);
381
382	if (__builtin_expect (debug_level >= 3, 0))
383	dbg_log (_("freed %zu bytes in %s cache"),
384	db->head->first_free
385	- ((char *) moves->to + moves->size - db->data),
386	dbnames[db - dbs]);
387
388	/* The byte past the end of the last copied block is the next
389	available byte. */
390	db->head->first_free = (char *) moves->to + moves->size - db->data;
391
392	/* Consistency check. */
393	if (__builtin_expect (debug_level >= 3, 0))
394	{
395	for (size_t idx = 0; idx < db->head->module; ++idx)
396	{
397	ref_t run = db->head->array[idx];
398	size_t cnt = 0;
399
400	while (run != ENDREF)
401	{
402	if (run + sizeof (struct hashentry) > db->head->first_free)
403	{
404	dbg_log ("entry %zu in hash bucket %zu out of bounds: "
405	"%" PRIu32 "+%zu > %zu\n",
406	cnt, idx, run, sizeof (struct hashentry),
407	db->head->first_free);
408	break;
409	}
410
411	struct hashentry he = (struct hashentry ) (db->data + run);
412
413	if (he->key + he->len > db->head->first_free)
414	dbg_log ("key of entry %zu in hash bucket %zu out of "
415	"bounds: %" PRIu32 "+%zu > %zu\n",
416	cnt, idx, he->key, he->len, db->head->first_free);
417
418	if (he->packet + sizeof (struct datahead)
419	> db->head->first_free)
420	dbg_log ("packet of entry %zu in hash bucket %zu out of "
421	"bounds: %" PRIu32 "+%zu > %zu\n",
422	cnt, idx, he->packet, sizeof (struct datahead),
423	db->head->first_free);
424	else
425	{
426	struct datahead dh = (struct datahead ) (db->data
427	+ he->packet);
428	if (he->packet + dh->allocsize
429	> db->head->first_free)
430	dbg_log ("full key of entry %zu in hash bucket %zu "
431	"out of bounds: %" PRIu32 "+%zu > %zu",
432	cnt, idx, he->packet, dh->allocsize,
433	db->head->first_free);
434	}
435
436	run = he->next;
437	++cnt;
438	}
439	}
440	}
441	}
442
443	/* Make sure the data on disk is updated. */
444	if (db->persistent)
445	msync (db->head, db->data + db->head->first_free - (char *) db->head,
446	MS_ASYNC);
447
448	/* We are done. */
449	out:
450	pthread_mutex_unlock (&db->memlock);
451	pthread_rwlock_unlock (&db->lock);
452	}
453
454
455	void *
456	mempool_alloc (struct database_dyn *db, size_t len)
457	{
458	/* Make sure LEN is a multiple of our maximum alignment so we can
459	keep track of used memory is multiples of this alignment value. */
460	if ((len & BLOCK_ALIGN_M1) != 0)
461	len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
462
463	pthread_mutex_lock (&db->memlock);
464
465	assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
466
467	bool tried_resize = false;
468	void *res;
469	retry:
470	res = db->data + db->head->first_free;
471
472	if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
473	{
474	if (! tried_resize)
475	{
476	/* Try to resize the database. Grow size of 1/8th. */
477	size_t new_data_size = db->head->data_size + db->head->data_size / 8;
478	size_t oldtotal = (sizeof (struct database_pers_head)
479	+ db->head->module * sizeof (ref_t)
480	+ db->head->data_size);
481	size_t newtotal = (sizeof (struct database_pers_head)
482	+ db->head->module * sizeof (ref_t)
483	+ new_data_size);
484
485	if ((!db->mmap_used \|\| ftruncate (db->wr_fd, newtotal) != 0)
486	/* Try to resize the mapping. Note: no MREMAP_MAYMOVE. */
487	&& mremap (db->head, oldtotal, newtotal, 0) == 0)
488	{
489	db->head->data_size = new_data_size;
490	tried_resize = true;
491	goto retry;
492	}
493	}
494
495	if (! db->last_alloc_failed)
496	{
497	dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
498
499	db->last_alloc_failed = true;
500	}
501
502	/* No luck. */
503	res = NULL;
504	}
505	else
506	{
507	db->head->first_free += len;
508
509	db->last_alloc_failed = false;
510	}
511
512	pthread_mutex_unlock (&db->memlock);
513
514	return res;
515	}