[thirdparty/glibc.git] / nscd / mem.c

/* Cache memory handling.
   Copyright (C) 2004, 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published
   by the Free Software Foundation; version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */

#include <assert.h>
#include <errno.h>
#include <error.h>
#include <fcntl.h>
#include <inttypes.h>
#include <libintl.h>
#include <limits.h>
#include <obstack.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/param.h>

#include "dbg_log.h"
#include "nscd.h"


/* Wrapper functions with error checking for standard functions.  */
extern void *xmalloc (size_t n);
extern void *xcalloc (size_t n, size_t s);


static int
sort_he (const void *p1, const void *p2)
{
  struct hashentry *h1 = *(struct hashentry **) p1;
  struct hashentry *h2 = *(struct hashentry **) p2;

  if (h1 < h2)
    return -1;
  if (h1 > h2)
    return 1;
  return 0;
}


static int
sort_he_data (const void *p1, const void *p2)
{
  struct hashentry *h1 = *(struct hashentry **) p1;
  struct hashentry *h2 = *(struct hashentry **) p2;

  if (h1->packet < h2->packet)
    return -1;
  if (h1->packet > h2->packet)
    return 1;
  return 0;
}


/* Basic definitions for the bitmap implementation.  Only BITMAP_T
   needs to be changed to choose a different word size.  */
#define BITMAP_T uint8_t
#define BITS (CHAR_BIT * sizeof (BITMAP_T))
#define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
#define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))


static void
markrange (BITMAP_T *mark, ref_t start, size_t len)
{
  /* Adjust parameters for block alignment.  */
  assert ((start & BLOCK_ALIGN_M1) == 0);
  start /= BLOCK_ALIGN;
  len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;

  size_t elem = start / BITS;

  if (start % BITS != 0)
    {
      if (start % BITS + len <= BITS)
	{
	  /* All fits in the partial byte.  */
	  mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS);
	  return;
	}

      mark[elem++] |= ALLBITS << (start % BITS);
      len -= BITS - (start % BITS);
    }

  while (len >= BITS)
    {
      mark[elem++] = ALLBITS;
      len -= BITS;
    }

  if (len > 0)
    mark[elem] |= ALLBITS >> (BITS - len);
}


void
gc (struct database_dyn *db)
{
  /* We need write access.  */
  pthread_rwlock_wrlock (&db->lock);

  /* And the memory handling lock.  */
  pthread_mutex_lock (&db->memlock);

  /* We need an array representing the data area.  All memory
     allocation is BLOCK_ALIGN aligned so this is the level at which
     we have to look at the memory.  We use a mark and sweep algorithm
     where the marks are placed in this array.  */
  assert (db->head->first_free % BLOCK_ALIGN == 0);

  BITMAP_T *mark;
  bool mark_use_malloc;
  /* In prune_cache we are also using a dynamically allocated array.
     If the array in the caller is too large we have malloc'ed it.  */
  size_t stack_used = sizeof (bool) * db->head->module;
  if (__builtin_expect (stack_used > MAX_STACK_USE, 0))
    stack_used = 0;
  size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS;
  size_t memory_needed = nmark * sizeof (BITMAP_T);
  if (__builtin_expect (stack_used + memory_needed <= MAX_STACK_USE, 1))
    {
      mark = (BITMAP_T *) alloca_account (memory_needed, stack_used);
      mark_use_malloc = false;
      memset (mark, '\0', memory_needed);
    }
  else
    {
      mark = (BITMAP_T *) xcalloc (1, memory_needed);
      mark_use_malloc = true;
    }

  /* Create an array which can hold pointer to all the entries in hash
     entries.  */
  memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
  struct hashentry **he;
  struct hashentry **he_data;
  bool he_use_malloc;
  if (__builtin_expect (stack_used + memory_needed <= MAX_STACK_USE, 1))
    {
      he = alloca_account (memory_needed, stack_used);
      he_use_malloc = false;
    }
  else
    {
      he = xmalloc (memory_needed);
      he_use_malloc = true;
    }
  he_data = &he[db->head->nentries];

  size_t cnt = 0;
  for (size_t idx = 0; idx < db->head->module; ++idx)
    {
      ref_t *prevp = &db->head->array[idx];
      ref_t run = *prevp;

      while (run != ENDREF)
	{
	  assert (cnt < db->head->nentries);
	  he[cnt] = (struct hashentry *) (db->data + run);

	  he[cnt]->prevp = prevp;
	  prevp = &he[cnt]->next;

	  /* This is the hash entry itself.  */
	  markrange (mark, run, sizeof (struct hashentry));

	  /* Add the information for the data itself.  We do this
	     only for the one special entry marked with FIRST.  */
	  if (he[cnt]->first)
	    {
	      struct datahead *dh
		= (struct datahead *) (db->data + he[cnt]->packet);
	      markrange (mark, he[cnt]->packet, dh->allocsize);
	    }

	  run = he[cnt]->next;

	  ++cnt;
	}
    }
  assert (cnt == db->head->nentries);

  /* Go through the list of in-flight memory blocks.  */
  struct mem_in_flight *mrunp = mem_in_flight_list;
  while (mrunp != NULL)
    {
      /* NB: There can be no race between this test and another thread
        setting the field to the index we are looking for because
        this would require the other thread to also have the memlock
        for the database.

	NB2: we do not have to look at latter blocks (higher indices) if
	earlier blocks are not in flight.  They are always allocated in
	sequence.  */
      for (enum in_flight idx = IDX_result_data;
	   idx < IDX_last && mrunp->block[idx].dbidx == db - dbs; ++idx)
	{
	  assert (mrunp->block[idx].blockoff >= 0);
	  assert (mrunp->block[idx].blocklen < db->memsize);
	  assert (mrunp->block[idx].blockoff
		  + mrunp->block[0].blocklen <= db->memsize);
	  markrange (mark, mrunp->block[idx].blockoff,
		     mrunp->block[idx].blocklen);
	}

      mrunp = mrunp->next;
    }

  /* Sort the entries by the addresses of the referenced data.  All
     the entries pointing to the same DATAHEAD object will have the
     same key.  Stability of the sorting is unimportant.  */
  memcpy (he_data, he, cnt * sizeof (struct hashentry *));
  qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);

  /* Sort the entries by their address.  */
  qsort (he, cnt, sizeof (struct hashentry *), sort_he);

#define obstack_chunk_alloc xmalloc
#define obstack_chunk_free free
  struct obstack ob;
  obstack_init (&ob);

  /* Determine the highest used address.  */
  size_t high = nmark;
  while (high > 0 && mark[high - 1] == 0)
    --high;

  /* No memory used.  */
  if (high == 0)
    {
      db->head->first_free = 0;
      goto out;
    }

  /* Determine the highest offset.  */
  BITMAP_T mask = HIGHBIT;
  ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
  while ((mark[high - 1] & mask) == 0)
    {
      mask >>= 1;
      highref -= BLOCK_ALIGN;
    }

  /* Now we can iterate over the MARK array and find bits which are not
     set.  These represent memory which can be recovered.  */
  size_t byte = 0;
  /* Find the first gap.  */
  while (byte < high && mark[byte] == ALLBITS)
    ++byte;

  if (byte == high
      || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0))
    /* No gap.  */
    goto out;

  mask = 1;
  cnt = 0;
  while ((mark[byte] & mask) != 0)
    {
      ++cnt;
      mask <<= 1;
    }
  ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
  assert (off_free <= db->head->first_free);

  struct hashentry **next_hash = he;
  struct hashentry **next_data = he_data;

  /* Skip over the hash entries in the first block which does not get
     moved.  */
  while (next_hash < &he[db->head->nentries]
	 && *next_hash < (struct hashentry *) (db->data + off_free))
    ++next_hash;

  while (next_data < &he_data[db->head->nentries]
	 && (*next_data)->packet < off_free)
    ++next_data;


  /* Now we start modifying the data.  Make sure all readers of the
     data are aware of this and temporarily don't use the data.  */
  ++db->head->gc_cycle;
  assert ((db->head->gc_cycle & 1) == 1);


  /* We do not perform the move operations right away since the
     he_data array is not sorted by the address of the data.  */
  struct moveinfo
  {
    void *from;
    void *to;
    size_t size;
    struct moveinfo *next;
  } *moves = NULL;

  while (byte < high)
    {
      /* Search for the next filled block.  BYTE is the index of the
	 entry in MARK, MASK is the bit, and CNT is the bit number.
	 OFF_FILLED is the corresponding offset.  */
      if ((mark[byte] & ~(mask - 1)) == 0)
	{
	  /* No other bit set in the same element of MARK.  Search in the
	     following memory.  */
	  do
	    ++byte;
	  while (byte < high && mark[byte] == 0);

	  if (byte == high)
	    /* That was it.  */
	    break;

	  mask = 1;
	  cnt = 0;
	}
      /* Find the exact bit.  */
      while ((mark[byte] & mask) == 0)
	{
	  ++cnt;
	  mask <<= 1;
	}

      ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
      assert (off_alloc <= db->head->first_free);

      /* Find the end of the used area.  */
      if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
	{
	  /* All other bits set.  Search the next bytes in MARK.  */
	  do
	    ++byte;
	  while (byte < high && mark[byte] == ALLBITS);

	  mask = 1;
	  cnt = 0;
	}
      if (byte < high)
	{
	  /* Find the exact bit.  */
	  while ((mark[byte] & mask) != 0)
	    {
	      ++cnt;
	      mask <<= 1;
	    }
	}

      ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
      assert (off_allocend <= db->head->first_free);
      /* Now we know that we can copy the area from OFF_ALLOC to
	 OFF_ALLOCEND (not included) to the memory starting at
	 OFF_FREE.  First fix up all the entries for the
	 displacement.  */
      ref_t disp = off_alloc - off_free;

      struct moveinfo *new_move;
      if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE,
			    1))
	new_move = alloca_account (sizeof (*new_move), stack_used);
      else
	new_move = obstack_alloc (&ob, sizeof (*new_move));
      new_move->from = db->data + off_alloc;
      new_move->to = db->data + off_free;
      new_move->size = off_allocend - off_alloc;
      /* Create a circular list to be always able to append at the end.  */
      if (moves == NULL)
	moves = new_move->next = new_move;
      else
	{
	  new_move->next = moves->next;
	  moves = moves->next = new_move;
	}

      /* The following loop will prepare to move this much data.  */
      off_free += off_allocend - off_alloc;

      while (off_alloc < off_allocend)
	{
	  /* Determine whether the next entry is for a hash entry or
	     the data.  */
	  if ((struct hashentry *) (db->data + off_alloc) == *next_hash)
	    {
	      /* Just correct the forward reference.  */
	      *(*next_hash++)->prevp -= disp;

	      off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
			    & ~BLOCK_ALIGN_M1);
	    }
	  else
	    {
	      assert (next_data < &he_data[db->head->nentries]);
	      assert ((*next_data)->packet == off_alloc);

	      struct datahead *dh = (struct datahead *) (db->data + off_alloc);
	      do
		{
		  assert ((*next_data)->key >= (*next_data)->packet);
		  assert ((*next_data)->key + (*next_data)->len
			  <= (*next_data)->packet + dh->allocsize);

		  (*next_data)->packet -= disp;
		  (*next_data)->key -= disp;
		  ++next_data;
		}
	      while (next_data < &he_data[db->head->nentries]
		     && (*next_data)->packet == off_alloc);

	      off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
	    }
	}
      assert (off_alloc == off_allocend);

      assert (off_alloc <= db->head->first_free);
      if (off_alloc == db->head->first_free)
	/* We are done, that was the last block.  */
	break;
    }
  assert (next_hash == &he[db->head->nentries]);
  assert (next_data == &he_data[db->head->nentries]);

  /* Now perform the actual moves.  */
  if (moves != NULL)
    {
      struct moveinfo *runp = moves->next;
      do
	{
	  assert ((char *) runp->to >= db->data);
	  assert ((char *) runp->to + runp->size
		  <= db->data  + db->head->first_free);
	  assert ((char *) runp->from >= db->data);
	  assert ((char *) runp->from + runp->size
		  <= db->data  + db->head->first_free);

	  /* The regions may overlap.  */
	  memmove (runp->to, runp->from, runp->size);
	  runp = runp->next;
	}
      while (runp != moves->next);

      if (__builtin_expect (debug_level >= 3, 0))
	dbg_log (_("freed %zu bytes in %s cache"),
		 db->head->first_free
		 - ((char *) moves->to + moves->size - db->data),
		 dbnames[db - dbs]);

      /* The byte past the end of the last copied block is the next
	 available byte.  */
      db->head->first_free = (char *) moves->to + moves->size - db->data;

      /* Consistency check.  */
      if (__builtin_expect (debug_level >= 3, 0))
	{
	  for (size_t idx = 0; idx < db->head->module; ++idx)
	    {
	      ref_t run = db->head->array[idx];
	      size_t cnt = 0;

	      while (run != ENDREF)
		{
		  if (run + sizeof (struct hashentry) > db->head->first_free)
		    {
		      dbg_log ("entry %zu in hash bucket %zu out of bounds: "
			       "%" PRIu32 "+%zu > %zu\n",
			       cnt, idx, run, sizeof (struct hashentry),
			       (size_t) db->head->first_free);
		      break;
		    }

		  struct hashentry *he = (struct hashentry *) (db->data + run);

		  if (he->key + he->len > db->head->first_free)
		    dbg_log ("key of entry %zu in hash bucket %zu out of "
			     "bounds: %" PRIu32 "+%zu > %zu\n",
			     cnt, idx, he->key, (size_t) he->len,
			     (size_t) db->head->first_free);

		  if (he->packet + sizeof (struct datahead)
		      > db->head->first_free)
		    dbg_log ("packet of entry %zu in hash bucket %zu out of "
			     "bounds: %" PRIu32 "+%zu > %zu\n",
			     cnt, idx, he->packet, sizeof (struct datahead),
			     (size_t) db->head->first_free);
		  else
		    {
		      struct datahead *dh = (struct datahead *) (db->data
								 + he->packet);
		      if (he->packet + dh->allocsize
			  > db->head->first_free)
			dbg_log ("full key of entry %zu in hash bucket %zu "
				 "out of bounds: %" PRIu32 "+%zu > %zu",
				 cnt, idx, he->packet, (size_t) dh->allocsize,
				 (size_t) db->head->first_free);
		    }

		  run = he->next;
		  ++cnt;
		}
	    }
	}
    }

  /* Make sure the data on disk is updated.  */
  if (db->persistent)
    msync (db->head, db->data + db->head->first_free - (char *) db->head,
	   MS_ASYNC);


  /* Now we are done modifying the data.  */
  ++db->head->gc_cycle;
  assert ((db->head->gc_cycle & 1) == 0);

  /* We are done.  */
 out:
  pthread_mutex_unlock (&db->memlock);
  pthread_rwlock_unlock (&db->lock);

  if (he_use_malloc)
    free (he);
  if (mark_use_malloc)
    free (mark);

  obstack_free (&ob, NULL);
}


void *
mempool_alloc (struct database_dyn *db, size_t len, enum in_flight idx)
{
  /* Make sure LEN is a multiple of our maximum alignment so we can
     keep track of used memory is multiples of this alignment value.  */
  if ((len & BLOCK_ALIGN_M1) != 0)
    len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);

  pthread_mutex_lock (&db->memlock);

  assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);

  bool tried_resize = false;
  void *res;
 retry:
  res = db->data + db->head->first_free;

  if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
    {
      if (! tried_resize)
	{
	  /* Try to resize the database.  Grow size of 1/8th.  */
	  size_t oldtotal = (sizeof (struct database_pers_head)
			     + roundup (db->head->module * sizeof (ref_t),
					ALIGN)
			     + db->head->data_size);
	  size_t new_data_size = (db->head->data_size
				  + MAX (2 * len, db->head->data_size / 8));
	  size_t newtotal = (sizeof (struct database_pers_head)
			     + roundup (db->head->module * sizeof (ref_t), ALIGN)
			     + new_data_size);
	  if (newtotal > db->max_db_size)
	    {
	      new_data_size -= newtotal - db->max_db_size;
	      newtotal = db->max_db_size;
	    }

	  if (db->mmap_used && newtotal > oldtotal
	      /* We only have to adjust the file size.  The new pages
		 become magically available.  */
	      && TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
							  newtotal
							  - oldtotal)) == 0)
	    {
	      db->head->data_size = new_data_size;
	      tried_resize = true;
	      goto retry;
	    }
	}

      if (! db->last_alloc_failed)
	{
	  dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);

	  db->last_alloc_failed = true;
	}

      /* No luck.  */
      res = NULL;
    }
  else
    {
      /* Remember that we have allocated this memory.  */
      assert (idx >= 0 && idx < IDX_last);
      mem_in_flight.block[idx].dbidx = db - dbs;
      mem_in_flight.block[idx].blocklen = len;
      mem_in_flight.block[idx].blockoff = db->head->first_free;

      db->head->first_free += len;

      db->last_alloc_failed = false;

    }

  pthread_mutex_unlock (&db->memlock);

  return res;
}
Commit	Line	Data
a95a08b4	1	/* Cache memory handling.
5627534a	2	Copyright (C) 2004, 2005, 2006, 2008, 2009 Free Software Foundation, Inc.
a95a08b4 UD	3	This file is part of the GNU C Library.
	4	Contributed by Ulrich Drepper <drepper@redhat.com>, 2004.
	5
43bc8ac6	6	This program is free software; you can redistribute it and/or modify
2e2efe65 RM	7	it under the terms of the GNU General Public License as published
	8	by the Free Software Foundation; version 2 of the License, or
	9	(at your option) any later version.
a95a08b4	10
43bc8ac6	11	This program is distributed in the hope that it will be useful,
a95a08b4	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
43bc8ac6 UD	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
43bc8ac6 UD	14	GNU General Public License for more details.
a95a08b4	15
43bc8ac6 UD	16	You should have received a copy of the GNU General Public License
	17	along with this program; if not, write to the Free Software Foundation,
	18	Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
a95a08b4 UD	19
	20	#include <assert.h>
	21	#include <errno.h>
	22	#include <error.h>
f54a329a	23	#include <fcntl.h>
a95a08b4 UD	24	#include <inttypes.h>
	25	#include <libintl.h>
	26	#include <limits.h>
5811d72b	27	#include <obstack.h>
a95a08b4 UD	28	#include <stdlib.h>
	29	#include <string.h>
	30	#include <unistd.h>
	31	#include <sys/mman.h>
	32	#include <sys/param.h>
	33
	34	#include "dbg_log.h"
	35	#include "nscd.h"
	36
	37
7ea8eb02 UD	38	/* Wrapper functions with error checking for standard functions. */
	39	extern void *xmalloc (size_t n);
	40	extern void *xcalloc (size_t n, size_t s);
	41
	42
a95a08b4 UD	43	static int
	44	sort_he (const void p1, const void p2)
	45	{
	46	struct hashentry h1 = (struct hashentry **) p1;
	47	struct hashentry h2 = (struct hashentry **) p2;
	48
	49	if (h1 < h2)
	50	return -1;
	51	if (h1 > h2)
	52	return 1;
	53	return 0;
	54	}
	55
	56
	57	static int
	58	sort_he_data (const void p1, const void p2)
	59	{
	60	struct hashentry h1 = (struct hashentry **) p1;
	61	struct hashentry h2 = (struct hashentry **) p2;
	62
	63	if (h1->packet < h2->packet)
	64	return -1;
	65	if (h1->packet > h2->packet)
	66	return 1;
	67	return 0;
	68	}
	69
	70
	71	/* Basic definitions for the bitmap implementation. Only BITMAP_T
	72	needs to be changed to choose a different word size. */
	73	#define BITMAP_T uint8_t
	74	#define BITS (CHAR_BIT * sizeof (BITMAP_T))
	75	#define ALLBITS ((((BITMAP_T) 1) << BITS) - 1)
	76	#define HIGHBIT (((BITMAP_T) 1) << (BITS - 1))
	77
	78
	79	static void
	80	markrange (BITMAP_T *mark, ref_t start, size_t len)
	81	{
	82	/* Adjust parameters for block alignment. */
77d40f10	83	assert ((start & BLOCK_ALIGN_M1) == 0);
a95a08b4 UD	84	start /= BLOCK_ALIGN;
	85	len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN;
	86
	87	size_t elem = start / BITS;
	88
	89	if (start % BITS != 0)
	90	{
	91	if (start % BITS + len <= BITS)
	92	{
	93	/* All fits in the partial byte. */
	94	mark[elem] \|= (ALLBITS >> (BITS - len)) << (start % BITS);
	95	return;
	96	}
	97
77d40f10	98	mark[elem++] \|= ALLBITS << (start % BITS);
a95a08b4 UD	99	len -= BITS - (start % BITS);
	100	}
	101
	102	while (len >= BITS)
	103	{
	104	mark[elem++] = ALLBITS;
	105	len -= BITS;
	106	}
	107
	108	if (len > 0)
	109	mark[elem] \|= ALLBITS >> (BITS - len);
	110	}
	111
	112
	113	void
	114	gc (struct database_dyn *db)
	115	{
	116	/* We need write access. */
	117	pthread_rwlock_wrlock (&db->lock);
	118
	119	/* And the memory handling lock. */
	120	pthread_mutex_lock (&db->memlock);
	121
	122	/* We need an array representing the data area. All memory
	123	allocation is BLOCK_ALIGN aligned so this is the level at which
	124	we have to look at the memory. We use a mark and sweep algorithm
	125	where the marks are placed in this array. */
	126	assert (db->head->first_free % BLOCK_ALIGN == 0);
7ea8eb02 UD	127
	128	BITMAP_T *mark;
	129	bool mark_use_malloc;
30294ea4 UD	130	/* In prune_cache we are also using a dynamically allocated array.
	131	If the array in the caller is too large we have malloc'ed it. */
	132	size_t stack_used = sizeof (bool) * db->head->module;
	133	if (__builtin_expect (stack_used > MAX_STACK_USE, 0))
	134	stack_used = 0;
fa526148 UD	135	size_t nmark = (db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS;
fa526148 UD	136	size_t memory_needed = nmark * sizeof (BITMAP_T);
fd537e53	137	if (__builtin_expect (stack_used + memory_needed <= MAX_STACK_USE, 1))
7ea8eb02	138	{
fd537e53	139	mark = (BITMAP_T *) alloca_account (memory_needed, stack_used);
7ea8eb02 UD	140	mark_use_malloc = false;
7ea8eb02 UD	141	memset (mark, '\0', memory_needed);
7ea8eb02 UD	142	}
	143	else
	144	{
	145	mark = (BITMAP_T *) xcalloc (1, memory_needed);
	146	mark_use_malloc = true;
	147	}
a95a08b4 UD	148
	149	/* Create an array which can hold pointer to all the entries in hash
	150	entries. */
7ea8eb02 UD	151	memory_needed = 2 * db->head->nentries * sizeof (struct hashentry *);
	152	struct hashentry **he;
	153	struct hashentry **he_data;
	154	bool he_use_malloc;
fd537e53	155	if (__builtin_expect (stack_used + memory_needed <= MAX_STACK_USE, 1))
7ea8eb02	156	{
fd537e53	157	he = alloca_account (memory_needed, stack_used);
7ea8eb02 UD	158	he_use_malloc = false;
	159	}
	160	else
	161	{
	162	he = xmalloc (memory_needed);
7ea8eb02 UD	163	he_use_malloc = true;
7ea8eb02 UD	164	}
fd537e53	165	he_data = &he[db->head->nentries];
a95a08b4 UD	166
	167	size_t cnt = 0;
	168	for (size_t idx = 0; idx < db->head->module; ++idx)
	169	{
	170	ref_t *prevp = &db->head->array[idx];
	171	ref_t run = *prevp;
	172
	173	while (run != ENDREF)
	174	{
	175	assert (cnt < db->head->nentries);
	176	he[cnt] = (struct hashentry *) (db->data + run);
	177
	178	he[cnt]->prevp = prevp;
	179	prevp = &he[cnt]->next;
	180
	181	/* This is the hash entry itself. */
	182	markrange (mark, run, sizeof (struct hashentry));
	183
	184	/* Add the information for the data itself. We do this
	185	only for the one special entry marked with FIRST. */
	186	if (he[cnt]->first)
	187	{
	188	struct datahead *dh
	189	= (struct datahead *) (db->data + he[cnt]->packet);
	190	markrange (mark, he[cnt]->packet, dh->allocsize);
	191	}
	192
	193	run = he[cnt]->next;
	194
	195	++cnt;
	196	}
	197	}
	198	assert (cnt == db->head->nentries);
	199
c52137d3 UD	200	/* Go through the list of in-flight memory blocks. */
	201	struct mem_in_flight *mrunp = mem_in_flight_list;
	202	while (mrunp != NULL)
	203	{
	204	/* NB: There can be no race between this test and another thread
	205	setting the field to the index we are looking for because
	206	this would require the other thread to also have the memlock
	207	for the database.
	208
	209	NB2: we do not have to look at latter blocks (higher indices) if
	210	earlier blocks are not in flight. They are always allocated in
	211	sequence. */
	212	for (enum in_flight idx = IDX_result_data;
	213	idx < IDX_last && mrunp->block[idx].dbidx == db - dbs; ++idx)
	214	{
8884028c UD	215	assert (mrunp->block[idx].blockoff >= 0);
	216	assert (mrunp->block[idx].blocklen < db->memsize);
	217	assert (mrunp->block[idx].blockoff
	218	+ mrunp->block[0].blocklen <= db->memsize);
	219	markrange (mark, mrunp->block[idx].blockoff,
	220	mrunp->block[idx].blocklen);
c52137d3 UD	221	}
	222
	223	mrunp = mrunp->next;
	224	}
	225
a95a08b4 UD	226	/* Sort the entries by the addresses of the referenced data. All
	227	the entries pointing to the same DATAHEAD object will have the
	228	same key. Stability of the sorting is unimportant. */
	229	memcpy (he_data, he, cnt * sizeof (struct hashentry *));
	230	qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data);
	231
	232	/* Sort the entries by their address. */
	233	qsort (he, cnt, sizeof (struct hashentry *), sort_he);
	234
9ad58cc3 UD	235	#define obstack_chunk_alloc xmalloc
	236	#define obstack_chunk_free free
	237	struct obstack ob;
	238	obstack_init (&ob);
	239
a95a08b4	240	/* Determine the highest used address. */
fa526148	241	size_t high = nmark;
a95a08b4 UD	242	while (high > 0 && mark[high - 1] == 0)
	243	--high;
	244
	245	/* No memory used. */
	246	if (high == 0)
	247	{
	248	db->head->first_free = 0;
	249	goto out;
	250	}
	251
	252	/* Determine the highest offset. */
	253	BITMAP_T mask = HIGHBIT;
	254	ref_t highref = (high * BITS - 1) * BLOCK_ALIGN;
	255	while ((mark[high - 1] & mask) == 0)
	256	{
	257	mask >>= 1;
	258	highref -= BLOCK_ALIGN;
	259	}
	260
dc4bb1c2	261	/* Now we can iterate over the MARK array and find bits which are not
a95a08b4 UD	262	set. These represent memory which can be recovered. */
	263	size_t byte = 0;
	264	/* Find the first gap. */
	265	while (byte < high && mark[byte] == ALLBITS)
	266	++byte;
	267
	268	if (byte == high
	269	\|\| (byte == high - 1 && (mark[byte] & ~(mask \| (mask - 1))) == 0))
	270	/* No gap. */
	271	goto out;
	272
	273	mask = 1;
	274	cnt = 0;
	275	while ((mark[byte] & mask) != 0)
	276	{
	277	++cnt;
	278	mask <<= 1;
	279	}
	280	ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN;
	281	assert (off_free <= db->head->first_free);
	282
	283	struct hashentry **next_hash = he;
	284	struct hashentry **next_data = he_data;
	285
	286	/* Skip over the hash entries in the first block which does not get
	287	moved. */
	288	while (next_hash < &he[db->head->nentries]
	289	&& next_hash < (struct hashentry ) (db->data + off_free))
	290	++next_hash;
	291
	292	while (next_data < &he_data[db->head->nentries]
	293	&& (*next_data)->packet < off_free)
	294	++next_data;
	295
	296
c207f23b UD	297	/* Now we start modifying the data. Make sure all readers of the
	298	data are aware of this and temporarily don't use the data. */
	299	++db->head->gc_cycle;
	300	assert ((db->head->gc_cycle & 1) == 1);
	301
	302
a95a08b4 UD	303	/* We do not perform the move operations right away since the
	304	he_data array is not sorted by the address of the data. */
	305	struct moveinfo
	306	{
	307	void *from;
	308	void *to;
	309	size_t size;
	310	struct moveinfo *next;
	311	} *moves = NULL;
	312
	313	while (byte < high)
	314	{
	315	/* Search for the next filled block. BYTE is the index of the
	316	entry in MARK, MASK is the bit, and CNT is the bit number.
	317	OFF_FILLED is the corresponding offset. */
	318	if ((mark[byte] & ~(mask - 1)) == 0)
	319	{
	320	/* No other bit set in the same element of MARK. Search in the
	321	following memory. */
	322	do
	323	++byte;
	324	while (byte < high && mark[byte] == 0);
	325
	326	if (byte == high)
	327	/* That was it. */
	328	break;
	329
	330	mask = 1;
	331	cnt = 0;
	332	}
	333	/* Find the exact bit. */
	334	while ((mark[byte] & mask) == 0)
	335	{
	336	++cnt;
	337	mask <<= 1;
	338	}
	339
	340	ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN;
	341	assert (off_alloc <= db->head->first_free);
	342
	343	/* Find the end of the used area. */
	344	if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1))
	345	{
	346	/* All other bits set. Search the next bytes in MARK. */
	347	do
	348	++byte;
	349	while (byte < high && mark[byte] == ALLBITS);
	350
	351	mask = 1;
	352	cnt = 0;
	353	}
	354	if (byte < high)
	355	{
	356	/* Find the exact bit. */
	357	while ((mark[byte] & mask) != 0)
	358	{
	359	++cnt;
	360	mask <<= 1;
	361	}
	362	}
	363
	364	ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN;
	365	assert (off_allocend <= db->head->first_free);
	366	/* Now we know that we can copy the area from OFF_ALLOC to
367	OFF_ALLOCEND (not included) to the memory starting at
368	OFF_FREE. First fix up all the entries for the
369	displacement. */
370	ref_t disp = off_alloc - off_free;
371
5811d72b	372	struct moveinfo *new_move;
fd537e53 UD	373	if (__builtin_expect (stack_used + sizeof (*new_move) <= MAX_STACK_USE,
	374	1))
	375	new_move = alloca_account (sizeof (*new_move), stack_used);
5811d72b UD	376	else
5811d72b UD	377	new_move = obstack_alloc (&ob, sizeof (*new_move));
a95a08b4 UD	378	new_move->from = db->data + off_alloc;
	379	new_move->to = db->data + off_free;
	380	new_move->size = off_allocend - off_alloc;
	381	/* Create a circular list to be always able to append at the end. */
	382	if (moves == NULL)
	383	moves = new_move->next = new_move;
	384	else
	385	{
	386	new_move->next = moves->next;
	387	moves = moves->next = new_move;
	388	}
	389
	390	/* The following loop will prepare to move this much data. */
	391	off_free += off_allocend - off_alloc;
	392
	393	while (off_alloc < off_allocend)
	394	{
	395	/* Determine whether the next entry is for a hash entry or
	396	the data. */
	397	if ((struct hashentry ) (db->data + off_alloc) == next_hash)
	398	{
	399	/* Just correct the forward reference. */
	400	(next_hash++)->prevp -= disp;
	401
	402	off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1)
	403	& ~BLOCK_ALIGN_M1);
	404	}
	405	else
	406	{
	407	assert (next_data < &he_data[db->head->nentries]);
	408	assert ((*next_data)->packet == off_alloc);
	409
	410	struct datahead dh = (struct datahead ) (db->data + off_alloc);
	411	do
	412	{
	413	assert ((next_data)->key >= (next_data)->packet);
	414	assert ((next_data)->key + (next_data)->len
	415	<= (*next_data)->packet + dh->allocsize);
	416
	417	(*next_data)->packet -= disp;
	418	(*next_data)->key -= disp;
	419	++next_data;
	420	}
	421	while (next_data < &he_data[db->head->nentries]
	422	&& (*next_data)->packet == off_alloc);
	423
	424	off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1;
	425	}
	426	}
	427	assert (off_alloc == off_allocend);
	428
	429	assert (off_alloc <= db->head->first_free);
	430	if (off_alloc == db->head->first_free)
	431	/* We are done, that was the last block. */
	432	break;
	433	}
	434	assert (next_hash == &he[db->head->nentries]);
	435	assert (next_data == &he_data[db->head->nentries]);
	436
	437	/* Now perform the actual moves. */
	438	if (moves != NULL)
	439	{
	440	struct moveinfo *runp = moves->next;
	441	do
442	{
443	assert ((char *) runp->to >= db->data);
444	assert ((char *) runp->to + runp->size
445	<= db->data + db->head->first_free);
446	assert ((char *) runp->from >= db->data);
447	assert ((char *) runp->from + runp->size
448	<= db->data + db->head->first_free);
449
450	/* The regions may overlap. */
451	memmove (runp->to, runp->from, runp->size);
452	runp = runp->next;
453	}
454	while (runp != moves->next);
455
456	if (__builtin_expect (debug_level >= 3, 0))
457	dbg_log (_("freed %zu bytes in %s cache"),
458	db->head->first_free
459	- ((char *) moves->to + moves->size - db->data),
460	dbnames[db - dbs]);
461
462	/* The byte past the end of the last copied block is the next
463	available byte. */
464	db->head->first_free = (char *) moves->to + moves->size - db->data;
465
466	/* Consistency check. */
467	if (__builtin_expect (debug_level >= 3, 0))
468	{
469	for (size_t idx = 0; idx < db->head->module; ++idx)
470	{
471	ref_t run = db->head->array[idx];
472	size_t cnt = 0;
473
474	while (run != ENDREF)
475	{
476	if (run + sizeof (struct hashentry) > db->head->first_free)
477	{
478	dbg_log ("entry %zu in hash bucket %zu out of bounds: "
479	"%" PRIu32 "+%zu > %zu\n",
480	cnt, idx, run, sizeof (struct hashentry),
568470bb	481	(size_t) db->head->first_free);
a95a08b4 UD	482	break;
	483	}
	484
	485	struct hashentry he = (struct hashentry ) (db->data + run);
	486
	487	if (he->key + he->len > db->head->first_free)
	488	dbg_log ("key of entry %zu in hash bucket %zu out of "
	489	"bounds: %" PRIu32 "+%zu > %zu\n",
568470bb UD	490	cnt, idx, he->key, (size_t) he->len,
568470bb UD	491	(size_t) db->head->first_free);
a95a08b4 UD	492
	493	if (he->packet + sizeof (struct datahead)
	494	> db->head->first_free)
	495	dbg_log ("packet of entry %zu in hash bucket %zu out of "
	496	"bounds: %" PRIu32 "+%zu > %zu\n",
	497	cnt, idx, he->packet, sizeof (struct datahead),
568470bb	498	(size_t) db->head->first_free);
a95a08b4 UD	499	else
	500	{
	501	struct datahead dh = (struct datahead ) (db->data
	502	+ he->packet);
	503	if (he->packet + dh->allocsize
	504	> db->head->first_free)
	505	dbg_log ("full key of entry %zu in hash bucket %zu "
	506	"out of bounds: %" PRIu32 "+%zu > %zu",
568470bb UD	507	cnt, idx, he->packet, (size_t) dh->allocsize,
568470bb UD	508	(size_t) db->head->first_free);
a95a08b4 UD	509	}
	510
	511	run = he->next;
	512	++cnt;
	513	}
	514	}
	515	}
	516	}
	517
	518	/* Make sure the data on disk is updated. */
	519	if (db->persistent)
	520	msync (db->head, db->data + db->head->first_free - (char *) db->head,
	521	MS_ASYNC);
	522
c207f23b UD	523
	524	/* Now we are done modifying the data. */
	525	++db->head->gc_cycle;
	526	assert ((db->head->gc_cycle & 1) == 0);
	527
a95a08b4 UD	528	/* We are done. */
	529	out:
	530	pthread_mutex_unlock (&db->memlock);
	531	pthread_rwlock_unlock (&db->lock);
7ea8eb02 UD	532
	533	if (he_use_malloc)
	534	free (he);
	535	if (mark_use_malloc)
	536	free (mark);
5811d72b UD	537
5811d72b UD	538	obstack_free (&ob, NULL);
a95a08b4 UD	539	}
	540
	541
	542	void *
c52137d3	543	mempool_alloc (struct database_dyn *db, size_t len, enum in_flight idx)
a95a08b4 UD	544	{
	545	/* Make sure LEN is a multiple of our maximum alignment so we can
	546	keep track of used memory is multiples of this alignment value. */
	547	if ((len & BLOCK_ALIGN_M1) != 0)
	548	len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1);
	549
	550	pthread_mutex_lock (&db->memlock);
	551
	552	assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0);
	553
	554	bool tried_resize = false;
	555	void *res;
	556	retry:
	557	res = db->data + db->head->first_free;
	558
	559	if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0))
	560	{
	561	if (! tried_resize)
	562	{
	563	/* Try to resize the database. Grow size of 1/8th. */
a95a08b4	564	size_t oldtotal = (sizeof (struct database_pers_head)
7ea8eb02 UD	565	+ roundup (db->head->module * sizeof (ref_t),
7ea8eb02 UD	566	ALIGN)
a95a08b4	567	+ db->head->data_size);
2c210d1e UD	568	size_t new_data_size = (db->head->data_size
2c210d1e UD	569	+ MAX (2 * len, db->head->data_size / 8));
a95a08b4	570	size_t newtotal = (sizeof (struct database_pers_head)
0b25a49a	571	+ roundup (db->head->module * sizeof (ref_t), ALIGN)
a95a08b4	572	+ new_data_size);
2c210d1e UD	573	if (newtotal > db->max_db_size)
	574	{
	575	new_data_size -= newtotal - db->max_db_size;
	576	newtotal = db->max_db_size;
	577	}
a95a08b4	578
2c210d1e UD	579	if (db->mmap_used && newtotal > oldtotal
	580	/* We only have to adjust the file size. The new pages
	581	become magically available. */
	582	&& TEMP_FAILURE_RETRY_VAL (posix_fallocate (db->wr_fd, oldtotal,
	583	newtotal
	584	- oldtotal)) == 0)
a95a08b4 UD	585	{
	586	db->head->data_size = new_data_size;
	587	tried_resize = true;
	588	goto retry;
	589	}
	590	}
	591
	592	if (! db->last_alloc_failed)
	593	{
	594	dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]);
	595
	596	db->last_alloc_failed = true;
	597	}
	598
	599	/* No luck. */
	600	res = NULL;
	601	}
	602	else
	603	{
c52137d3 UD	604	/* Remember that we have allocated this memory. */
	605	assert (idx >= 0 && idx < IDX_last);
	606	mem_in_flight.block[idx].dbidx = db - dbs;
	607	mem_in_flight.block[idx].blocklen = len;
8884028c UD	608	mem_in_flight.block[idx].blockoff = db->head->first_free;
	609
	610	db->head->first_free += len;
	611
	612	db->last_alloc_failed = false;
	613
a95a08b4 UD	614	}
	615
	616	pthread_mutex_unlock (&db->memlock);
	617
	618	return res;
	619	}