]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Use lock-free hashtable for storing resolver fetch contexts
authorOndřej Surý <ondrej@isc.org>
Wed, 17 Sep 2025 13:08:10 +0000 (15:08 +0200)
committerOndřej Surý <ondrej@isc.org>
Tue, 23 Sep 2025 22:08:21 +0000 (00:08 +0200)
Previously, the fetch contexts were stored inside rwlocked hashmap
table.  This was one of the most contended places for the resolver,
especially in the cold cache situation.

Replace the locked hashmap with the lock-free hashtable from the RCU
library and protect the fetch contexts against reuse by replacing the
libisc reference counting with urcu_ref that can soft-fail in situation
where the reference count is already zero.  This allows us to easily
skip re-using the fetch context if it is already in process of being
destroyed.

lib/dns/resolver.c
lib/isc/include/isc/urcu.h

index fe0c36fd081559d611ca068fd2d448c2d4331eee..cf4f465c01afa5911673ce1c3ff273d53bdb9d2f 100644 (file)
@@ -38,6 +38,7 @@
 #include <isc/tid.h>
 #include <isc/time.h>
 #include <isc/timer.h>
+#include <isc/urcu.h>
 #include <isc/util.h>
 
 #include <dns/acl.h>
@@ -246,6 +247,8 @@ STATIC_ASSERT(NS_PROCESSING_LIMIT > NS_RR_LIMIT,
 #define RES_DOMAIN_HASH_BITS 12
 #endif /* ifndef RES_DOMAIN_HASH_BITS */
 
+#define RES_DOMAIN_HASH_SIZE (1 << RES_DOMAIN_HASH_BITS)
+
 /*%
  * Maximum EDNS0 input packet size.
  */
@@ -356,7 +359,7 @@ struct fetchctx {
        dns_edectx_t edectx;
 
        /* Atomic */
-       isc_refcount_t references;
+       struct urcu_ref ref;
 
        /*% Locked by lock. */
        isc_mutex_t lock;
@@ -482,6 +485,9 @@ struct fetchctx {
 
        isc_counter_t *nvalidations;
        isc_counter_t *nfails;
+
+       struct cds_lfht_node ht_node;
+       struct rcu_head rcu_head;
 };
 
 #define FCTX_MAGIC      ISC_MAGIC('F', '!', '!', '!')
@@ -560,8 +566,7 @@ struct dns_resolver {
        dns_dispatchset_t *dispatches4;
        dns_dispatchset_t *dispatches6;
 
-       isc_hashmap_t *fctxs;
-       isc_rwlock_t fctxs_lock;
+       struct cds_lfht *fctxs_ht;
 
        isc_hashmap_t *counters;
        isc_rwlock_t counters_lock;
@@ -679,37 +684,66 @@ findnoqname(fetchctx_t *fctx, dns_message_t *message, dns_name_t *name,
 
 #define fctx_failure_detach(fctxp, result)                              \
        REQUIRE(result != ISC_R_SUCCESS);                               \
+       rcu_read_lock();                                                \
        if (fctx__done(*fctxp, result, __func__, __FILE__, __LINE__)) { \
                fetchctx_detach(fctxp);                                 \
-       }
+       };                                                              \
+       rcu_read_unlock()
 
 #define fctx_failure_unref(fctx, result)                              \
        REQUIRE(result != ISC_R_SUCCESS);                             \
+       rcu_read_lock();                                              \
        if (fctx__done(fctx, result, __func__, __FILE__, __LINE__)) { \
                fetchctx_unref(fctx);                                 \
-       }
+       };                                                            \
+       rcu_read_unlock()
 
 #define fctx_success_detach(fctxp)                                             \
+       rcu_read_lock();                                                       \
        if (fctx__done(*fctxp, ISC_R_SUCCESS, __func__, __FILE__, __LINE__)) { \
                fetchctx_detach(fctxp);                                        \
-       }
+       };                                                                     \
+       rcu_read_unlock()
 
 #define fctx_success_unref(fctx)                                             \
+       rcu_read_lock();                                                     \
        if (fctx__done(fctx, ISC_R_SUCCESS, __func__, __FILE__, __LINE__)) { \
                fetchctx_unref(fctx);                                        \
+       };                                                                   \
+       rcu_read_unlock()
+
+static bool
+fetchctx_attach(fetchctx_t *fctx, fetchctx_t **fctxp) {
+       if (urcu_ref_get_unless_zero(&fctx->ref)) {
+               *fctxp = fctx;
+               return true;
        }
+       return false;
+}
 
-#if DNS_RESOLVER_TRACE
-#define fetchctx_ref(ptr)   fetchctx__ref(ptr, __func__, __FILE__, __LINE__)
-#define fetchctx_unref(ptr) fetchctx__unref(ptr, __func__, __FILE__, __LINE__)
-#define fetchctx_attach(ptr, ptrp) \
-       fetchctx__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
-#define fetchctx_detach(ptrp) \
-       fetchctx__detach(ptrp, __func__, __FILE__, __LINE__)
-ISC_REFCOUNT_TRACE_DECL(fetchctx);
-#else
-ISC_REFCOUNT_DECL(fetchctx);
-#endif
+static bool
+fetchctx_ref(fetchctx_t *fctx) {
+       return urcu_ref_get_unless_zero(&fctx->ref);
+}
+
+static void
+fetchctx_destroy(struct urcu_ref *ref) {
+       fetchctx_t *fctx = caa_container_of(ref, fetchctx_t, ref);
+       fctx_destroy(fctx);
+}
+
+static void
+fetchctx_detach(fetchctx_t **fctxp) {
+       fetchctx_t *fctx = *fctxp;
+       *fctxp = NULL;
+
+       urcu_ref_put(&fctx->ref, fetchctx_destroy);
+}
+
+static void
+fetchctx_unref(fetchctx_t *fctx) {
+       urcu_ref_put(&fctx->ref, fetchctx_destroy);
+}
 
 static bool
 fctx__done(fetchctx_t *fctx, isc_result_t result, const char *func,
@@ -1635,9 +1669,10 @@ fctx_hash(fetchctx_t *fctx) {
        return isc_hash32_finalize(&hash32);
 }
 
-static bool
-fctx_match(void *node, const void *key) {
-       const fetchctx_t *fctx0 = node;
+static int
+fctx_match(struct cds_lfht_node *ht_node, const void *key) {
+       const fetchctx_t *fctx0 = caa_container_of(ht_node, fetchctx_t,
+                                                  ht_node);
        const fetchctx_t *fctx1 = key;
 
        return fctx0->options == fctx1->options && fctx0->type == fctx1->type &&
@@ -1674,13 +1709,11 @@ fctx__done(fetchctx_t *fctx, isc_result_t result, const char *func,
        }
        fctx->state = fetchstate_done;
        FCTX_ATTR_CLR(fctx, FCTX_ATTR_ADDRWAIT);
-       UNLOCK(&fctx->lock);
 
        /* The fctx will get deleted either here or in get_attached_fctx() */
-       RWLOCK(&fctx->res->fctxs_lock, isc_rwlocktype_write);
-       (void)isc_hashmap_delete(fctx->res->fctxs, fctx_hash(fctx), match_ptr,
-                                fctx);
-       RWUNLOCK(&fctx->res->fctxs_lock, isc_rwlocktype_write);
+       cds_lfht_del(fctx->res->fctxs_ht, &fctx->ht_node);
+
+       UNLOCK(&fctx->lock);
 
        if (result == ISC_R_SUCCESS) {
                if (fctx->qmin_warning != ISC_R_SUCCESS) {
@@ -4377,6 +4410,15 @@ cleanup:
        fetchctx_detach(&fctx);
 }
 
+static void
+fctx_destroy_rcu(struct rcu_head *rcu_head) {
+       fetchctx_t *fctx = caa_container_of(rcu_head, fetchctx_t, rcu_head);
+
+       isc_mutex_destroy(&fctx->lock);
+
+       isc_mem_putanddetach(&fctx->mctx, fctx, sizeof(*fctx));
+}
+
 static void
 fctx_destroy(fetchctx_t *fctx) {
        dns_resolver_t *res = NULL;
@@ -4431,10 +4473,9 @@ fctx_destroy(fetchctx_t *fctx) {
 
        dns_ede_invalidate(&fctx->edectx);
 
-       isc_mutex_destroy(&fctx->lock);
-
        isc_mem_free(fctx->mctx, fctx->info);
-       isc_mem_putanddetach(&fctx->mctx, fctx, sizeof(*fctx));
+
+       call_rcu(&fctx->rcu_head, fctx_destroy_rcu);
 }
 
 static void
@@ -4659,7 +4700,7 @@ fctx_create(dns_resolver_t *res, isc_loop_t *loop, const dns_name_t *name,
        fprintf(stderr, "fetchctx__init:%s:%s:%d:%p:%p->references = 1\n",
                __func__, __FILE__, __LINE__, fctx, fctx);
 #endif
-       isc_refcount_init(&fctx->references, 1);
+       urcu_ref_set(&fctx->ref, 1);
 
        ISC_LIST_INIT(fctx->queries);
        ISC_LIST_INIT(fctx->finds);
@@ -6703,12 +6744,6 @@ validinanswer(dns_rdataset_t *rdataset, fetchctx_t *fctx) {
        return true;
 }
 
-#if DNS_RESOLVER_TRACE
-ISC_REFCOUNT_TRACE_IMPL(fetchctx, fctx_destroy);
-#else
-ISC_REFCOUNT_IMPL(fetchctx, fctx_destroy);
-#endif
-
 static void
 resume_dslookup(void *arg) {
        dns_fetchresponse_t *resp = (dns_fetchresponse_t *)arg;
@@ -9486,9 +9521,7 @@ dns_resolver__destroy(dns_resolver_t *res) {
        isc_mutex_destroy(&res->primelock);
        isc_mutex_destroy(&res->lock);
 
-       INSIST(isc_hashmap_count(res->fctxs) == 0);
-       isc_hashmap_destroy(&res->fctxs);
-       isc_rwlock_destroy(&res->fctxs_lock);
+       RUNTIME_CHECK(cds_lfht_destroy(res->fctxs_ht, NULL) == 0);
 
        INSIST(isc_hashmap_count(res->counters) == 0);
        isc_hashmap_destroy(&res->counters);
@@ -9598,8 +9631,10 @@ dns_resolver_create(dns_view_t *view, unsigned int options,
 #endif
        isc_refcount_init(&res->references, 1);
 
-       isc_hashmap_create(view->mctx, RES_DOMAIN_HASH_BITS, &res->fctxs);
-       isc_rwlock_init(&res->fctxs_lock);
+       res->fctxs_ht =
+               cds_lfht_new(RES_DOMAIN_HASH_SIZE, RES_DOMAIN_HASH_SIZE, 0,
+                            CDS_LFHT_AUTO_RESIZE | CDS_LFHT_ACCOUNTING, NULL);
+       RUNTIME_CHECK(res->fctxs_ht != NULL);
 
        isc_hashmap_create(view->mctx, RES_DOMAIN_HASH_BITS, &res->counters);
        isc_rwlock_init(&res->counters_lock);
@@ -9748,7 +9783,6 @@ dns_resolver_freeze(dns_resolver_t *res) {
 
 void
 dns_resolver_shutdown(dns_resolver_t *res) {
-       isc_result_t result;
        bool is_false = false;
 
        REQUIRE(VALID_RESOLVER(res));
@@ -9756,26 +9790,14 @@ dns_resolver_shutdown(dns_resolver_t *res) {
        RTRACE("shutdown");
 
        if (atomic_compare_exchange_strong(&res->exiting, &is_false, true)) {
-               isc_hashmap_iter_t *it = NULL;
-
                RTRACE("exiting");
 
-               RWLOCK(&res->fctxs_lock, isc_rwlocktype_write);
-               isc_hashmap_iter_create(res->fctxs, &it);
-               for (result = isc_hashmap_iter_first(it);
-                    result == ISC_R_SUCCESS;
-                    result = isc_hashmap_iter_next(it))
-               {
-                       fetchctx_t *fctx = NULL;
-
-                       isc_hashmap_iter_current(it, (void **)&fctx);
-                       INSIST(fctx != NULL);
-
+               fetchctx_t *fctx = NULL;
+               struct cds_lfht_iter iter;
+               cds_lfht_for_each_entry(res->fctxs_ht, &iter, fctx, ht_node) {
                        fetchctx_ref(fctx);
                        isc_async_run(fctx->loop, fctx_shutdown, fctx);
                }
-               isc_hashmap_iter_destroy(&it);
-               RWUNLOCK(&res->fctxs_lock, isc_rwlocktype_write);
 
                LOCK(&res->lock);
                if (res->spillattimer != NULL) {
@@ -9937,32 +9959,36 @@ get_attached_fctx(dns_resolver_t *res, isc_loop_t *loop, const dns_name_t *name,
                .type = type,
        };
        fetchctx_t *fctx = NULL;
-       isc_rwlocktype_t locktype = isc_rwlocktype_read;
        uint32_t hashval = fctx_hash(&key);
 
-again:
-       RWLOCK(&res->fctxs_lock, locktype);
-       result = isc_hashmap_find(res->fctxs, hashval, fctx_match, &key,
-                                 (void **)&fctx);
-       switch (result) {
-       case ISC_R_SUCCESS:
-               break;
-       case ISC_R_NOTFOUND:
+       rcu_read_lock();
+
+       struct cds_lfht_iter iter;
+       cds_lfht_lookup(res->fctxs_ht, hashval, fctx_match, &key, &iter);
+
+       fctx = cds_lfht_entry(cds_lfht_iter_get_node(&iter), fetchctx_t,
+                             ht_node);
+
+       if (fctx == NULL) {
+       create:
                result = fctx_create(res, loop, name, type, domain, nameservers,
                                     client, options, depth, qc, gqc, &fctx);
                if (result != ISC_R_SUCCESS) {
-                       RWUNLOCK(&res->fctxs_lock, locktype);
+                       rcu_read_unlock();
                        return result;
                }
 
-               UPGRADELOCK(&res->fctxs_lock, locktype);
+               LOCK(&fctx->lock);
 
-               void *found = NULL;
-               result = isc_hashmap_add(res->fctxs, hashval, fctx_match, fctx,
-                                        fctx, &found);
-               if (result == ISC_R_SUCCESS) {
+               struct cds_lfht_node *ht_node =
+                       cds_lfht_add_unique(res->fctxs_ht, hashval, fctx_match,
+                                           &key, &fctx->ht_node);
+
+               if (ht_node == &fctx->ht_node) {
+                       /* Success */
                        *new_fctx = true;
                } else {
+                       UNLOCK(&fctx->lock);
                        /*
                         * The fctx_done() tries to acquire the fctxs_lock.
                         * Destroy the newly created fetchctx directly.
@@ -9971,25 +9997,29 @@ again:
                        isc_timer_destroy(&fctx->timer);
 
                        fetchctx_detach(&fctx);
-                       fctx = found;
-                       result = ISC_R_SUCCESS;
+                       fctx = caa_container_of(ht_node, fetchctx_t, ht_node);
+                       LOCK(&fctx->lock);
                }
-               break;
-       default:
-               UNREACHABLE();
+       } else {
+               LOCK(&fctx->lock);
        }
-       INSIST(result == ISC_R_SUCCESS);
-       fetchctx_ref(fctx);
 
-       /*
-        * We need to lock the fetch context before unlocking the hash table to
-        * prevent other threads from looking up this thread before it has been
-        * properly initialized and started.
-        */
-       LOCK(&fctx->lock);
-       RWUNLOCK(&res->fctxs_lock, locktype);
+       if (!fetchctx_ref(fctx)) {
+               UNLOCK(&fctx->lock);
+               fctx = NULL;
+               goto create;
+       }
+
+       if (cds_lfht_is_node_deleted(&fctx->ht_node)) {
+               UNLOCK(&fctx->lock);
+               fetchctx_detach(&fctx);
+               goto create;
+       }
 
        if (SHUTTINGDOWN(fctx) || fctx->cloned) {
+               /* The fctx will get deleted either here or in fctx__done() */
+               cds_lfht_del(res->fctxs_ht, &fctx->ht_node);
+
                /*
                 * This is the single place where fctx might get
                 * accesses from a different thread, so we need to
@@ -9997,15 +10027,8 @@ again:
                 * help with the release if the fctx has been cloned.
                 */
                UNLOCK(&fctx->lock);
-
-               /* The fctx will get deleted either here or in fctx__done() */
-               RWLOCK(&res->fctxs_lock, isc_rwlocktype_write);
-               (void)isc_hashmap_delete(res->fctxs, fctx_hash(fctx), match_ptr,
-                                        fctx);
-               RWUNLOCK(&res->fctxs_lock, isc_rwlocktype_write);
-
                fetchctx_detach(&fctx);
-               goto again;
+               goto create;
        }
 
        /*
@@ -10013,7 +10036,9 @@ again:
         */
        *fctxp = fctx;
 
-       return result;
+       rcu_read_unlock();
+
+       return ISC_R_SUCCESS;
 }
 
 isc_result_t
@@ -10501,9 +10526,6 @@ dns_resolver_getmaxqueries(dns_resolver_t *resolver) {
 void
 dns_resolver_dumpfetches(dns_resolver_t *res, isc_statsformat_t format,
                         FILE *fp) {
-       isc_result_t result;
-       isc_hashmap_iter_t *it = NULL;
-
        REQUIRE(VALID_RESOLVER(res));
        REQUIRE(fp != NULL);
        REQUIRE(format == isc_statsformat_file);
@@ -10513,18 +10535,15 @@ dns_resolver_dumpfetches(dns_resolver_t *res, isc_statsformat_t format,
                res->spillat, res->spillatmax);
        UNLOCK(&res->lock);
 
-       RWLOCK(&res->fctxs_lock, isc_rwlocktype_read);
-       isc_hashmap_iter_create(res->fctxs, &it);
-       for (result = isc_hashmap_iter_first(it); result == ISC_R_SUCCESS;
-            result = isc_hashmap_iter_next(it))
-       {
+       rcu_read_lock();
+
+       fetchctx_t *fctx = NULL;
+       struct cds_lfht_iter iter;
+       cds_lfht_for_each_entry(res->fctxs_ht, &iter, fctx, ht_node) {
                char typebuf[DNS_RDATATYPE_FORMATSIZE];
                char timebuf[1024];
-               fetchctx_t *fctx = NULL;
                unsigned int resp_count = 0, query_count = 0;
 
-               isc_hashmap_iter_current(it, (void **)&fctx);
-
                LOCK(&fctx->lock);
                dns_name_print(fctx->name, fp);
 
@@ -10562,8 +10581,8 @@ dns_resolver_dumpfetches(dns_resolver_t *res, isc_statsformat_t format,
 
                UNLOCK(&fctx->lock);
        }
-       isc_hashmap_iter_destroy(&it);
-       RWUNLOCK(&res->fctxs_lock, isc_rwlocktype_read);
+
+       rcu_read_unlock();
 }
 
 isc_result_t
index 88e68bf80ceb25a93a91b6d30137ac7a3e0b4745..e2ed8557590809dec6d90145952eebde0e613369 100644 (file)
@@ -33,6 +33,7 @@
 #include <urcu/list.h>
 #include <urcu/rculfhash.h>
 #include <urcu/rculist.h>
+#include <urcu/ref.h>
 #include <urcu/wfstack.h>
 
 #pragma GCC diagnostic pop