+2937. [bug] Worked around an apparent race condition in over
+ memory conditions. Without this fix a DNS cache DB or
+ ADB could incorrectly stay in an over memory state,
+ effectively refusing further caching, which
+ subsequently made a BIND 9 caching server unworkable.
+ This fix prevents this problem from happening by
+ polling the state of the memory context, rather than
+ making a copy of the state, which appeared to cause
+ a race. This is a "workaround" in that it doesn't
+ solve the possible race per se, but several experiments
+ proved this change solves the symptom. Also, the
+ polling overhead hasn't been reported to be an issue.
+ This bug should only affect a caching server that
+ specifies a finite max-cache-size. It's also quite
+ likely that the bug happens only when enabling threads,
+ but it's not confirmed yet. [RT #21818]
--- 9.5.3b1 released ---
* PERFORMANCE OF THIS SOFTWARE.
*/
-/* $Id: adb.c,v 1.233.36.14 2009/02/03 22:35:00 jinmei Exp $ */
+/* $Id: adb.c,v 1.233.36.15 2010/08/11 23:09:18 jinmei Exp $ */
/*! \file
*
isc_taskmgr_t *taskmgr;
isc_task_t *task;
- isc_boolean_t overmem;
isc_interval_t tick_interval;
int next_cleanbucket;
static inline void inc_adb_erefcnt(dns_adb_t *);
static inline void inc_entry_refcnt(dns_adb_t *, dns_adbentry_t *,
isc_boolean_t);
-static inline isc_boolean_t dec_entry_refcnt(dns_adb_t *, dns_adbentry_t *,
- isc_boolean_t);
+static inline isc_boolean_t dec_entry_refcnt(dns_adb_t *, isc_boolean_t,
+ dns_adbentry_t *, isc_boolean_t);
static inline void violate_locking_hierarchy(isc_mutex_t *, isc_mutex_t *);
static isc_boolean_t clean_namehooks(dns_adb_t *, dns_adbnamehooklist_t *);
static void clean_target(dns_adb_t *, dns_name_t *);
int i;
dns_adbentry_t *e;
- if (adb->overmem) {
+ if (isc_mem_isovermem(adb->mctx)) {
for (i = 0; i < 2; i++) {
e = ISC_LIST_TAIL(adb->entries[bucket]);
if (e == NULL)
dns_adbnamehook_t *namehook;
int addr_bucket;
isc_boolean_t result = ISC_FALSE;
+ isc_boolean_t overmem = isc_mem_isovermem(adb->mctx);
addr_bucket = DNS_ADB_INVALIDBUCKET;
namehook = ISC_LIST_HEAD(*namehooks);
LOCK(&adb->entrylocks[addr_bucket]);
}
- result = dec_entry_refcnt(adb, entry, ISC_FALSE);
+ result = dec_entry_refcnt(adb, overmem, entry,
+ ISC_FALSE);
}
/*
}
static inline isc_boolean_t
-dec_entry_refcnt(dns_adb_t *adb, dns_adbentry_t *entry, isc_boolean_t lock) {
+dec_entry_refcnt(dns_adb_t *adb, isc_boolean_t overmem, dns_adbentry_t *entry,
+ isc_boolean_t lock)
+{
int bucket;
isc_boolean_t destroy_entry;
isc_boolean_t result = ISC_FALSE;
destroy_entry = ISC_FALSE;
if (entry->refcnt == 0 &&
- (adb->entry_sd[bucket] || entry->expires == 0 || adb->overmem ||
+ (adb->entry_sd[bucket] || entry->expires == 0 || overmem ||
(entry->flags & ENTRY_IS_DEAD) != 0)) {
destroy_entry = ISC_TRUE;
result = unlink_entry(adb, entry);
int victims, max_victims;
isc_boolean_t result;
dns_adbname_t *victim, *next_victim;
- isc_boolean_t overmem = adb->overmem;
+ isc_boolean_t overmem = isc_mem_isovermem(adb->mctx);
int scans = 0;
INSIST(bucket != DNS_ADB_INVALIDBUCKET);
adb, NULL, NULL);
adb->cevent_sent = ISC_FALSE;
adb->shutting_down = ISC_FALSE;
- adb->overmem = ISC_FALSE;
ISC_LIST_INIT(adb->whenshutdown);
isc_mem_attach(mem, &adb->mctx);
dns_adbaddrinfo_t *ai;
int bucket;
dns_adb_t *adb;
+ isc_boolean_t overmem;
REQUIRE(findp != NULL && DNS_ADBFIND_VALID(*findp));
find = *findp;
* Return the find to the memory pool, and decrement the adb's
* reference count.
*/
+ overmem = isc_mem_isovermem(adb->mctx);
ai = ISC_LIST_HEAD(find->list);
while (ai != NULL) {
ISC_LIST_UNLINK(find->list, ai, publink);
entry = ai->entry;
ai->entry = NULL;
INSIST(DNS_ADBENTRY_VALID(entry));
- RUNTIME_CHECK(dec_entry_refcnt(adb, entry, ISC_TRUE) ==
+ RUNTIME_CHECK(dec_entry_refcnt(adb, overmem, entry, ISC_TRUE) ==
ISC_FALSE);
free_adbaddrinfo(adb, &ai);
ai = ISC_LIST_HEAD(find->list);
int bucket;
isc_stdtime_t now;
isc_boolean_t want_check_exit = ISC_FALSE;
+ isc_boolean_t overmem;
REQUIRE(DNS_ADB_VALID(adb));
REQUIRE(addrp != NULL);
isc_stdtime_get(&now);
*addrp = NULL;
+ overmem = isc_mem_isovermem(adb->mctx);
bucket = addr->entry->lock_bucket;
LOCK(&adb->entrylocks[bucket]);
entry->expires = now + ADB_ENTRY_WINDOW;
- want_check_exit = dec_entry_refcnt(adb, entry, ISC_FALSE);
+ want_check_exit = dec_entry_refcnt(adb, overmem, entry, ISC_FALSE);
UNLOCK(&adb->entrylocks[bucket]);
static void
water(void *arg, int mark) {
+ /*
+ * We're going to change the way to handle overmem condition: use
+ * isc_mem_isovermem() instead of storing the state via this callback,
+ * since the latter way tends to cause race conditions.
+ * To minimize the change, and in case we re-enable the callback
+ * approach, however, keep this function at the moment.
+ */
+
dns_adb_t *adb = arg;
isc_boolean_t overmem = ISC_TF(mark == ISC_MEM_HIWATER);
DP(ISC_LOG_DEBUG(1),
"adb reached %s water mark", overmem ? "high" : "low");
-
- /*
- * We can't use adb->lock as there is potential for water
- * to be called when adb->lock is held.
- */
- LOCK(&adb->overmemlock);
- if (adb->overmem != overmem) {
- adb->overmem = overmem;
- isc_mem_waterack(adb->mctx, mark);
- }
- UNLOCK(&adb->overmemlock);
}
void
* PERFORMANCE OF THIS SOFTWARE.
*/
-/* $Id: rbtdb.c,v 1.248.12.27 2010/05/10 01:46:05 marka Exp $ */
+/* $Id: rbtdb.c,v 1.248.12.28 2010/08/11 23:09:18 jinmei Exp $ */
/*! \file */
rbtdb_version_t * current_version;
rbtdb_version_t * future_version;
rbtdb_versionlist_t open_versions;
- isc_boolean_t overmem;
isc_task_t * task;
dns_dbnode_t *soanode;
dns_dbnode_t *nsnode;
if (now == 0)
isc_stdtime_get(&now);
- if (rbtdb->overmem) {
+ if (isc_mem_isovermem(rbtdb->common.mctx)) {
isc_uint32_t val;
isc_random_get(&val);
force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
/*
- * Note that 'log' can be true IFF rbtdb->overmem is also true.
- * rbtdb->overmem can currently only be true for cache
+ * Note that 'log' can be true IFF overmem is also true.
+ * overmem can currently only be true for cache
* databases -- hence all of the "overmem cache" log strings.
*/
log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
"reprieve by RETAIN() %s",
printname);
}
- } else if (rbtdb->overmem && log)
+ } else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
isc_log_write(dns_lctx, category, module, level,
"overmem cache: saved %s", printname);
static void
overmem(dns_db_t *db, isc_boolean_t overmem) {
- dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
+ /* This is an empty callback. See adb.c:water() */
- if (IS_CACHE(rbtdb))
- rbtdb->overmem = overmem;
+ UNUSED(db);
+ UNUSED(overmem);
+
+ return;
}
static void
isc_result_t result;
isc_boolean_t delegating;
isc_boolean_t tree_locked = ISC_FALSE;
+ isc_boolean_t cache_is_overmem = ISC_FALSE;
REQUIRE(VALID_RBTDB(rbtdb));
* the lock does not necessarily have to be acquired but it will help
* purge stale entries more effectively.
*/
- if (delegating || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
+ if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
+ cache_is_overmem = ISC_TRUE;
+ if (delegating || cache_is_overmem) {
tree_locked = ISC_TRUE;
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
}
- if (IS_CACHE(rbtdb) && rbtdb->overmem)
+ if (cache_is_overmem)
overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
}
rbtdb->attributes = 0;
rbtdb->secure = ISC_FALSE;
- rbtdb->overmem = ISC_FALSE;
rbtdb->task = NULL;
/*
* PERFORMANCE OF THIS SOFTWARE.
*/
-/* $Id: mem.h,v 1.72.128.8 2010/03/04 23:46:49 tbox Exp $ */
+/* $Id: mem.h,v 1.72.128.9 2010/08/11 23:09:18 jinmei Exp $ */
#ifndef ISC_MEM_H
#define ISC_MEM_H 1
* allocated from the system but not yet used.
*/
+isc_boolean_t
+isc_mem_isovermem(isc_mem_t *mctx);
+/*%<
+ * Return true iff the memory context is in "over memory" state, i.e.,
+ * a hiwater mark has been set and the used amount of memory has exceeds
+ * the mark.
+ */
+
void
isc_mem_setwater(isc_mem_t *mctx, isc_mem_water_t water, void *water_arg,
size_t hiwater, size_t lowater);
* PERFORMANCE OF THIS SOFTWARE.
*/
-/* $Id: mem.c,v 1.137.16.11 2010/05/12 00:52:36 marka Exp $ */
+/* $Id: mem.c,v 1.137.16.12 2010/08/11 23:09:18 jinmei Exp $ */
/*! \file */
size_t hi_water;
size_t lo_water;
isc_boolean_t hi_called;
+ isc_boolean_t is_overmem;
isc_mem_water_t water;
void * water_arg;
ISC_LIST(isc_mempool_t) pools;
ctx->hi_water = 0;
ctx->lo_water = 0;
ctx->hi_called = ISC_FALSE;
+ ctx->is_overmem = ISC_FALSE;
ctx->water = NULL;
ctx->water_arg = NULL;
ctx->magic = MEM_MAGIC;
}
ADD_TRACE(ctx, ptr, size, file, line);
+ if (ctx->hi_water != 0U && ctx->inuse > ctx->hi_water &&
+ !ctx->is_overmem) {
+ ctx->is_overmem = ISC_TRUE;
+ }
if (ctx->hi_water != 0U && !ctx->hi_called &&
ctx->inuse > ctx->hi_water) {
call_water = ISC_TRUE;
* when the context was pushed over hi_water but then had
* isc_mem_setwater() called with 0 for hi_water and lo_water.
*/
+ if (ctx->is_overmem &&
+ (ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) {
+ ctx->is_overmem = ISC_FALSE;
+ }
if (ctx->hi_called &&
(ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) {
if (ctx->water != NULL)
#if ISC_MEM_TRACKLINES
ADD_TRACE(ctx, si, si[-1].u.size, file, line);
#endif
+ if (ctx->hi_water != 0U && ctx->inuse > ctx->hi_water &&
+ !ctx->is_overmem) {
+ ctx->is_overmem = ISC_TRUE;
+ }
+
if (ctx->hi_water != 0U && !ctx->hi_called &&
ctx->inuse > ctx->hi_water) {
ctx->hi_called = ISC_TRUE;
* when the context was pushed over hi_water but then had
* isc_mem_setwater() called with 0 for hi_water and lo_water.
*/
+ if (ctx->is_overmem &&
+ (ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) {
+ ctx->is_overmem = ISC_FALSE;
+ }
+
if (ctx->hi_called &&
(ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) {
ctx->hi_called = ISC_FALSE;
(oldwater)(oldwater_arg, ISC_MEM_LOWATER);
}
+isc_boolean_t
+isc_mem_isovermem(isc_mem_t *ctx) {
+ REQUIRE(VALID_CONTEXT(ctx));
+
+ /*
+ * We don't bother to lock the context because 100% accuracy isn't
+ * necessary (and even if we locked the context the returned value
+ * could be different from the actual state when it's used anyway)
+ */
+ return (ctx->is_overmem);
+}
+
void
isc_mem_setname(isc_mem_t *ctx, const char *name, void *tag) {
REQUIRE(VALID_CONTEXT(ctx));