From: Ondřej Surý Date: Wed, 17 Sep 2025 13:08:10 +0000 (+0200) Subject: Use lock-free hashtable for storing resolver fetch contexts X-Git-Tag: v9.21.14~30^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6011fb5484ca3693f453ec7dacb0c8761fc2395f;p=thirdparty%2Fbind9.git Use lock-free hashtable for storing resolver fetch contexts Previously, the fetch contexts were stored inside rwlocked hashmap table. This was one of the most contended places for the resolver, especially in the cold cache situation. Replace the locked hashmap with the lock-free hashtable from the RCU library and protect the fetch contexts against reuse by replacing the libisc reference counting with urcu_ref that can soft-fail in situation where the reference count is already zero. This allows us to easily skip re-using the fetch context if it is already in process of being destroyed. --- diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index fe0c36fd081..cf4f465c01a 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -246,6 +247,8 @@ STATIC_ASSERT(NS_PROCESSING_LIMIT > NS_RR_LIMIT, #define RES_DOMAIN_HASH_BITS 12 #endif /* ifndef RES_DOMAIN_HASH_BITS */ +#define RES_DOMAIN_HASH_SIZE (1 << RES_DOMAIN_HASH_BITS) + /*% * Maximum EDNS0 input packet size. */ @@ -356,7 +359,7 @@ struct fetchctx { dns_edectx_t edectx; /* Atomic */ - isc_refcount_t references; + struct urcu_ref ref; /*% Locked by lock. */ isc_mutex_t lock; @@ -482,6 +485,9 @@ struct fetchctx { isc_counter_t *nvalidations; isc_counter_t *nfails; + + struct cds_lfht_node ht_node; + struct rcu_head rcu_head; }; #define FCTX_MAGIC ISC_MAGIC('F', '!', '!', '!') @@ -560,8 +566,7 @@ struct dns_resolver { dns_dispatchset_t *dispatches4; dns_dispatchset_t *dispatches6; - isc_hashmap_t *fctxs; - isc_rwlock_t fctxs_lock; + struct cds_lfht *fctxs_ht; isc_hashmap_t *counters; isc_rwlock_t counters_lock; @@ -679,37 +684,66 @@ findnoqname(fetchctx_t *fctx, dns_message_t *message, dns_name_t *name, #define fctx_failure_detach(fctxp, result) \ REQUIRE(result != ISC_R_SUCCESS); \ + rcu_read_lock(); \ if (fctx__done(*fctxp, result, __func__, __FILE__, __LINE__)) { \ fetchctx_detach(fctxp); \ - } + }; \ + rcu_read_unlock() #define fctx_failure_unref(fctx, result) \ REQUIRE(result != ISC_R_SUCCESS); \ + rcu_read_lock(); \ if (fctx__done(fctx, result, __func__, __FILE__, __LINE__)) { \ fetchctx_unref(fctx); \ - } + }; \ + rcu_read_unlock() #define fctx_success_detach(fctxp) \ + rcu_read_lock(); \ if (fctx__done(*fctxp, ISC_R_SUCCESS, __func__, __FILE__, __LINE__)) { \ fetchctx_detach(fctxp); \ - } + }; \ + rcu_read_unlock() #define fctx_success_unref(fctx) \ + rcu_read_lock(); \ if (fctx__done(fctx, ISC_R_SUCCESS, __func__, __FILE__, __LINE__)) { \ fetchctx_unref(fctx); \ + }; \ + rcu_read_unlock() + +static bool +fetchctx_attach(fetchctx_t *fctx, fetchctx_t **fctxp) { + if (urcu_ref_get_unless_zero(&fctx->ref)) { + *fctxp = fctx; + return true; } + return false; +} -#if DNS_RESOLVER_TRACE -#define fetchctx_ref(ptr) fetchctx__ref(ptr, __func__, __FILE__, __LINE__) -#define fetchctx_unref(ptr) fetchctx__unref(ptr, __func__, __FILE__, __LINE__) -#define fetchctx_attach(ptr, ptrp) \ - fetchctx__attach(ptr, ptrp, __func__, __FILE__, __LINE__) -#define fetchctx_detach(ptrp) \ - fetchctx__detach(ptrp, __func__, __FILE__, __LINE__) -ISC_REFCOUNT_TRACE_DECL(fetchctx); -#else -ISC_REFCOUNT_DECL(fetchctx); -#endif +static bool +fetchctx_ref(fetchctx_t *fctx) { + return urcu_ref_get_unless_zero(&fctx->ref); +} + +static void +fetchctx_destroy(struct urcu_ref *ref) { + fetchctx_t *fctx = caa_container_of(ref, fetchctx_t, ref); + fctx_destroy(fctx); +} + +static void +fetchctx_detach(fetchctx_t **fctxp) { + fetchctx_t *fctx = *fctxp; + *fctxp = NULL; + + urcu_ref_put(&fctx->ref, fetchctx_destroy); +} + +static void +fetchctx_unref(fetchctx_t *fctx) { + urcu_ref_put(&fctx->ref, fetchctx_destroy); +} static bool fctx__done(fetchctx_t *fctx, isc_result_t result, const char *func, @@ -1635,9 +1669,10 @@ fctx_hash(fetchctx_t *fctx) { return isc_hash32_finalize(&hash32); } -static bool -fctx_match(void *node, const void *key) { - const fetchctx_t *fctx0 = node; +static int +fctx_match(struct cds_lfht_node *ht_node, const void *key) { + const fetchctx_t *fctx0 = caa_container_of(ht_node, fetchctx_t, + ht_node); const fetchctx_t *fctx1 = key; return fctx0->options == fctx1->options && fctx0->type == fctx1->type && @@ -1674,13 +1709,11 @@ fctx__done(fetchctx_t *fctx, isc_result_t result, const char *func, } fctx->state = fetchstate_done; FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT); - UNLOCK(&fctx->lock); /* The fctx will get deleted either here or in get_attached_fctx() */ - RWLOCK(&fctx->res->fctxs_lock, isc_rwlocktype_write); - (void)isc_hashmap_delete(fctx->res->fctxs, fctx_hash(fctx), match_ptr, - fctx); - RWUNLOCK(&fctx->res->fctxs_lock, isc_rwlocktype_write); + cds_lfht_del(fctx->res->fctxs_ht, &fctx->ht_node); + + UNLOCK(&fctx->lock); if (result == ISC_R_SUCCESS) { if (fctx->qmin_warning != ISC_R_SUCCESS) { @@ -4377,6 +4410,15 @@ cleanup: fetchctx_detach(&fctx); } +static void +fctx_destroy_rcu(struct rcu_head *rcu_head) { + fetchctx_t *fctx = caa_container_of(rcu_head, fetchctx_t, rcu_head); + + isc_mutex_destroy(&fctx->lock); + + isc_mem_putanddetach(&fctx->mctx, fctx, sizeof(*fctx)); +} + static void fctx_destroy(fetchctx_t *fctx) { dns_resolver_t *res = NULL; @@ -4431,10 +4473,9 @@ fctx_destroy(fetchctx_t *fctx) { dns_ede_invalidate(&fctx->edectx); - isc_mutex_destroy(&fctx->lock); - isc_mem_free(fctx->mctx, fctx->info); - isc_mem_putanddetach(&fctx->mctx, fctx, sizeof(*fctx)); + + call_rcu(&fctx->rcu_head, fctx_destroy_rcu); } static void @@ -4659,7 +4700,7 @@ fctx_create(dns_resolver_t *res, isc_loop_t *loop, const dns_name_t *name, fprintf(stderr, "fetchctx__init:%s:%s:%d:%p:%p->references = 1\n", __func__, __FILE__, __LINE__, fctx, fctx); #endif - isc_refcount_init(&fctx->references, 1); + urcu_ref_set(&fctx->ref, 1); ISC_LIST_INIT(fctx->queries); ISC_LIST_INIT(fctx->finds); @@ -6703,12 +6744,6 @@ validinanswer(dns_rdataset_t *rdataset, fetchctx_t *fctx) { return true; } -#if DNS_RESOLVER_TRACE -ISC_REFCOUNT_TRACE_IMPL(fetchctx, fctx_destroy); -#else -ISC_REFCOUNT_IMPL(fetchctx, fctx_destroy); -#endif - static void resume_dslookup(void *arg) { dns_fetchresponse_t *resp = (dns_fetchresponse_t *)arg; @@ -9486,9 +9521,7 @@ dns_resolver__destroy(dns_resolver_t *res) { isc_mutex_destroy(&res->primelock); isc_mutex_destroy(&res->lock); - INSIST(isc_hashmap_count(res->fctxs) == 0); - isc_hashmap_destroy(&res->fctxs); - isc_rwlock_destroy(&res->fctxs_lock); + RUNTIME_CHECK(cds_lfht_destroy(res->fctxs_ht, NULL) == 0); INSIST(isc_hashmap_count(res->counters) == 0); isc_hashmap_destroy(&res->counters); @@ -9598,8 +9631,10 @@ dns_resolver_create(dns_view_t *view, unsigned int options, #endif isc_refcount_init(&res->references, 1); - isc_hashmap_create(view->mctx, RES_DOMAIN_HASH_BITS, &res->fctxs); - isc_rwlock_init(&res->fctxs_lock); + res->fctxs_ht = + cds_lfht_new(RES_DOMAIN_HASH_SIZE, RES_DOMAIN_HASH_SIZE, 0, + CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL); + RUNTIME_CHECK(res->fctxs_ht != NULL); isc_hashmap_create(view->mctx, RES_DOMAIN_HASH_BITS, &res->counters); isc_rwlock_init(&res->counters_lock); @@ -9748,7 +9783,6 @@ dns_resolver_freeze(dns_resolver_t *res) { void dns_resolver_shutdown(dns_resolver_t *res) { - isc_result_t result; bool is_false = false; REQUIRE(VALID_RESOLVER(res)); @@ -9756,26 +9790,14 @@ dns_resolver_shutdown(dns_resolver_t *res) { RTRACE("shutdown"); if (atomic_compare_exchange_strong(&res->exiting, &is_false, true)) { - isc_hashmap_iter_t *it = NULL; - RTRACE("exiting"); - RWLOCK(&res->fctxs_lock, isc_rwlocktype_write); - isc_hashmap_iter_create(res->fctxs, &it); - for (result = isc_hashmap_iter_first(it); - result == ISC_R_SUCCESS; - result = isc_hashmap_iter_next(it)) - { - fetchctx_t *fctx = NULL; - - isc_hashmap_iter_current(it, (void **)&fctx); - INSIST(fctx != NULL); - + fetchctx_t *fctx = NULL; + struct cds_lfht_iter iter; + cds_lfht_for_each_entry(res->fctxs_ht, &iter, fctx, ht_node) { fetchctx_ref(fctx); isc_async_run(fctx->loop, fctx_shutdown, fctx); } - isc_hashmap_iter_destroy(&it); - RWUNLOCK(&res->fctxs_lock, isc_rwlocktype_write); LOCK(&res->lock); if (res->spillattimer != NULL) { @@ -9937,32 +9959,36 @@ get_attached_fctx(dns_resolver_t *res, isc_loop_t *loop, const dns_name_t *name, .type = type, }; fetchctx_t *fctx = NULL; - isc_rwlocktype_t locktype = isc_rwlocktype_read; uint32_t hashval = fctx_hash(&key); -again: - RWLOCK(&res->fctxs_lock, locktype); - result = isc_hashmap_find(res->fctxs, hashval, fctx_match, &key, - (void **)&fctx); - switch (result) { - case ISC_R_SUCCESS: - break; - case ISC_R_NOTFOUND: + rcu_read_lock(); + + struct cds_lfht_iter iter; + cds_lfht_lookup(res->fctxs_ht, hashval, fctx_match, &key, &iter); + + fctx = cds_lfht_entry(cds_lfht_iter_get_node(&iter), fetchctx_t, + ht_node); + + if (fctx == NULL) { + create: result = fctx_create(res, loop, name, type, domain, nameservers, client, options, depth, qc, gqc, &fctx); if (result != ISC_R_SUCCESS) { - RWUNLOCK(&res->fctxs_lock, locktype); + rcu_read_unlock(); return result; } - UPGRADELOCK(&res->fctxs_lock, locktype); + LOCK(&fctx->lock); - void *found = NULL; - result = isc_hashmap_add(res->fctxs, hashval, fctx_match, fctx, - fctx, &found); - if (result == ISC_R_SUCCESS) { + struct cds_lfht_node *ht_node = + cds_lfht_add_unique(res->fctxs_ht, hashval, fctx_match, + &key, &fctx->ht_node); + + if (ht_node == &fctx->ht_node) { + /* Success */ *new_fctx = true; } else { + UNLOCK(&fctx->lock); /* * The fctx_done() tries to acquire the fctxs_lock. * Destroy the newly created fetchctx directly. @@ -9971,25 +9997,29 @@ again: isc_timer_destroy(&fctx->timer); fetchctx_detach(&fctx); - fctx = found; - result = ISC_R_SUCCESS; + fctx = caa_container_of(ht_node, fetchctx_t, ht_node); + LOCK(&fctx->lock); } - break; - default: - UNREACHABLE(); + } else { + LOCK(&fctx->lock); } - INSIST(result == ISC_R_SUCCESS); - fetchctx_ref(fctx); - /* - * We need to lock the fetch context before unlocking the hash table to - * prevent other threads from looking up this thread before it has been - * properly initialized and started. - */ - LOCK(&fctx->lock); - RWUNLOCK(&res->fctxs_lock, locktype); + if (!fetchctx_ref(fctx)) { + UNLOCK(&fctx->lock); + fctx = NULL; + goto create; + } + + if (cds_lfht_is_node_deleted(&fctx->ht_node)) { + UNLOCK(&fctx->lock); + fetchctx_detach(&fctx); + goto create; + } if (SHUTTINGDOWN(fctx) || fctx->cloned) { + /* The fctx will get deleted either here or in fctx__done() */ + cds_lfht_del(res->fctxs_ht, &fctx->ht_node); + /* * This is the single place where fctx might get * accesses from a different thread, so we need to @@ -9997,15 +10027,8 @@ again: * help with the release if the fctx has been cloned. */ UNLOCK(&fctx->lock); - - /* The fctx will get deleted either here or in fctx__done() */ - RWLOCK(&res->fctxs_lock, isc_rwlocktype_write); - (void)isc_hashmap_delete(res->fctxs, fctx_hash(fctx), match_ptr, - fctx); - RWUNLOCK(&res->fctxs_lock, isc_rwlocktype_write); - fetchctx_detach(&fctx); - goto again; + goto create; } /* @@ -10013,7 +10036,9 @@ again: */ *fctxp = fctx; - return result; + rcu_read_unlock(); + + return ISC_R_SUCCESS; } isc_result_t @@ -10501,9 +10526,6 @@ dns_resolver_getmaxqueries(dns_resolver_t *resolver) { void dns_resolver_dumpfetches(dns_resolver_t *res, isc_statsformat_t format, FILE *fp) { - isc_result_t result; - isc_hashmap_iter_t *it = NULL; - REQUIRE(VALID_RESOLVER(res)); REQUIRE(fp != NULL); REQUIRE(format == isc_statsformat_file); @@ -10513,18 +10535,15 @@ dns_resolver_dumpfetches(dns_resolver_t *res, isc_statsformat_t format, res->spillat, res->spillatmax); UNLOCK(&res->lock); - RWLOCK(&res->fctxs_lock, isc_rwlocktype_read); - isc_hashmap_iter_create(res->fctxs, &it); - for (result = isc_hashmap_iter_first(it); result == ISC_R_SUCCESS; - result = isc_hashmap_iter_next(it)) - { + rcu_read_lock(); + + fetchctx_t *fctx = NULL; + struct cds_lfht_iter iter; + cds_lfht_for_each_entry(res->fctxs_ht, &iter, fctx, ht_node) { char typebuf[DNS_RDATATYPE_FORMATSIZE]; char timebuf[1024]; - fetchctx_t *fctx = NULL; unsigned int resp_count = 0, query_count = 0; - isc_hashmap_iter_current(it, (void **)&fctx); - LOCK(&fctx->lock); dns_name_print(fctx->name, fp); @@ -10562,8 +10581,8 @@ dns_resolver_dumpfetches(dns_resolver_t *res, isc_statsformat_t format, UNLOCK(&fctx->lock); } - isc_hashmap_iter_destroy(&it); - RWUNLOCK(&res->fctxs_lock, isc_rwlocktype_read); + + rcu_read_unlock(); } isc_result_t diff --git a/lib/isc/include/isc/urcu.h b/lib/isc/include/isc/urcu.h index 88e68bf80ce..e2ed8557590 100644 --- a/lib/isc/include/isc/urcu.h +++ b/lib/isc/include/isc/urcu.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #pragma GCC diagnostic pop