From: Ondřej Surý Date: Wed, 3 Jun 2026 17:58:03 +0000 (+0200) Subject: Simplify the delegation database LRU to a single shared SIEVE X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=c50e96f8d60a10e549667f1c4ae39850fbfc3979;p=thirdparty%2Fbind9.git Simplify the delegation database LRU to a single shared SIEVE The delegation database kept one SIEVE LRU list per loop so that node eviction could run lock-free on each node's owning loop; this required every node to hold a loop reference and to defer its own destruction to that loop via isc_async_run(). Move the SIEVE unlink into the QP write transaction, taking the evicted node directly from dns_qp_deletename(), which serialises every list mutation under the qpmulti writer lock and lets a single shared list replace the per-loop arrays. Node and database teardown are now synchronous. The QP trie and the SIEVE list are wrapped in a reference-counted holder. Each node keeps a reference to the holder so it (and its memory context) stays valid until the node is destroyed, while shutdown drains the SIEVE and destroys the trie from an RCU callback and frees the holder once the last node drops its reference. Reuse across a reconfiguration now moves ownership of the holder to the new view instead of sharing it through a separate owners counter, so dns_delegdb_reuse() is removed. --- diff --git a/bin/named/server.c b/bin/named/server.c index ef02634b6b7..0a1992d658a 100644 --- a/bin/named/server.c +++ b/bin/named/server.c @@ -4330,7 +4330,7 @@ configure_view(dns_view_t *view, dns_viewlist_t *viewlist, cfg_obj_t *config, * server. */ if (pview != NULL) { - dns_delegdb_reuse(pview, view); + dns_delegdb_attach(pview->deleg, &view->deleg); } else { dns_delegdb_create(&view->deleg); } @@ -11263,9 +11263,10 @@ cleanup: static void flush_delegdb(dns_view_t *view) { + REQUIRE(view->deleg != NULL); + dns_delegdb_config_t config = dns_delegdb_getconfig(view->deleg); - dns_delegdb_shutdown(view->deleg); dns_delegdb_detach(&view->deleg); dns_delegdb_create(&view->deleg); dns_delegdb_setconfig(view->deleg, &config); diff --git a/lib/dns/deleg.c b/lib/dns/deleg.c index 0566f2fe062..d0ae712a7ff 100644 --- a/lib/dns/deleg.c +++ b/lib/dns/deleg.c @@ -10,7 +10,6 @@ * See the COPYRIGHT file distributed with this work for additional * information regarding copyright ownership. */ -#include #include #include #include @@ -36,6 +35,30 @@ typedef struct delegdb_node delegdb_node_t; +typedef struct qplru { + isc_mem_t *mctx; + isc_refcount_t references; + dns_qpmulti_t *nodes; + ISC_SIEVE(delegdb_node_t) lru; + struct rcu_head rcu_head; +} qplru_t; + +static void +qplru_destroy(qplru_t *qplru); + +#ifdef DNS_DELEGDB_NODETRACE +#define qplru_ref(ptr) qplru__ref(ptr, __func__, __FILE__, __LINE__) +#define qplru_unref(ptr) qplru__unref(ptr, __func__, __FILE__, __LINE__) +#define qplru_attach(ptr, ptrp) \ + qplru__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define qplru_detach(ptrp) qplru__detach(ptrp, __func__, __FILE__, __LINE__) +ISC_REFCOUNT_STATIC_TRACE_DECL(qplru); +ISC_REFCOUNT_STATIC_TRACE_IMPL(qplru, qplru_destroy); +#else +ISC_REFCOUNT_STATIC_DECL(qplru); +ISC_REFCOUNT_STATIC_IMPL(qplru, qplru_destroy); +#endif + struct dns_delegdb { unsigned int magic; @@ -46,42 +69,30 @@ struct dns_delegdb { isc_mem_t *mctx; isc_refcount_t references; - size_t nloops; - ISC_SIEVE(delegdb_node_t) * lru; - - dns_qpmulti_t *nodes; - - /* - * Keep track of now many owners are actually using the delegdb. For - * instance: - * - * - During a server reload, the new view will (by default) - * start owning the existing delegdb from the previous instance of the - * same view using `dns_delegdb_reuse()`. This will increase `owners` - * by one. - * - * - Later on, either the old instance of the view (or the new one, - * in case of reload failure) will call `dns_delegdb_shutdown()` on - * the delegdb. This will decrement `owners` by one. - * - * If `owners` is bigger than 1 when `dns_delegdb_shutdown()` is called, - * it means the delegdb must not be shutdown because there are other - * owners using it, so `dns_delegdb_shutdown()` bails off in this case. - * (After decrementing `owners`.) - */ - isc_refcount_t owners; + qplru_t *qplru; dns_delegdb_config_t config; }; +static void +qplru_shutdown_rcu(struct rcu_head *rcu_head); + static void delegdb_destroy(dns_delegdb_t *delegdb) { REQUIRE(VALID_DELEGDB(delegdb)); - REQUIRE(delegdb->nodes == NULL); delegdb->magic = 0; - isc_mem_cput(delegdb->mctx, delegdb->lru, delegdb->nloops, - sizeof(delegdb->lru[0])); + + qplru_t *qplru = rcu_xchg_pointer(&delegdb->qplru, NULL); + INSIST(qplru != NULL); + + /* + * Offload the LRU list node deletion to RCU thread (as well as qptrie + * deletion). + */ + call_rcu(&qplru->rcu_head, qplru_shutdown_rcu); + + LIBDNS_DELEGDB_SHUTDOWN(delegdb); isc_mem_putanddetach(&delegdb->mctx, delegdb, sizeof(*delegdb)); } @@ -90,11 +101,12 @@ ISC_REFCOUNT_IMPL(dns_delegdb, delegdb_destroy); struct delegdb_node { unsigned int magic; - dns_delegdb_t *delegdb; + + qplru_t *qplru; + isc_refcount_t references; /* LRU */ - isc_loop_t *loop; ISC_LINK(delegdb_node_t) link; bool visited; @@ -112,48 +124,21 @@ struct delegdb_node { dns_delegset_t *delegset; }; -/* - * All node cleanup is done on the node's owning loop so that the node - * remains fully valid (name, delegset, SIEVE link) until it is actually - * destroyed. This is important because after a node is removed from the - * QP trie, it may still be linked in the owning loop's SIEVE list; if - * another thread's eviction could encounter a half-destroyed node, we - * would get a use-after-free. By deferring everything to the owning - * loop, the node is intact until the SIEVE unlink happens. - */ static void -delegdb_node_destroy_async(void *arg) { - delegdb_node_t *node = arg; - isc_mem_t *mctx = NULL; - +delegdb_node_destroy(delegdb_node_t *node) { REQUIRE(VALID_DELEGDB_NODE(node)); REQUIRE(DNS_DELEGSET_VALID(node->delegset)); - node->magic = 0; - - isc_mem_attach(node->delegdb->mctx, &mctx); + qplru_t *qplru = node->qplru; - if (ISC_SIEVE_LINKED(node, link)) { - ISC_SIEVE_UNLINK(node->delegdb->lru[isc_tid()], node, link); - } + node->magic = 0; - dns_name_free(&node->zonecut, mctx); + dns_name_free(&node->zonecut, qplru->mctx); dns_delegset_detach(&node->delegset); - dns_delegdb_detach(&node->delegdb); - isc_loop_unref(node->loop); - isc_mem_putanddetach(&mctx, node, sizeof(*node)); -} - -static void -delegdb_node_destroy(delegdb_node_t *node) { - REQUIRE(VALID_DELEGDB_NODE(node)); + isc_mem_put(qplru->mctx, node, sizeof(*node)); - if (node->loop == isc_loop()) { - delegdb_node_destroy_async(node); - } else { - isc_async_run(node->loop, delegdb_node_destroy_async, node); - } + qplru_detach(&qplru); } #ifdef DNS_DELEGDB_NODETRACE @@ -161,6 +146,10 @@ delegdb_node_destroy(delegdb_node_t *node) { delegdb_node__ref(ptr, __func__, __FILE__, __LINE__) #define delegdb_node_unref(ptr) \ delegdb_node__unref(ptr, __func__, __FILE__, __LINE__) +#define delegdb_node_attach(ptr, ptrp) \ + delegdb_node__attach(ptr, ptrp, __func__, __FILE__, __LINE__) +#define delegdb_node_detach(ptrp) \ + delegdb_node__detach(ptrp, __func__, __FILE__, __LINE__) ISC_REFCOUNT_STATIC_TRACE_DECL(delegdb_node); ISC_REFCOUNT_STATIC_TRACE_IMPL(delegdb_node, delegdb_node_destroy); #else @@ -218,49 +207,27 @@ dns_delegdb_create(dns_delegdb_t **delegdbp) { *delegdb = (dns_delegdb_t){ .magic = DELEGDB_MAGIC, .mctx = mctx, .references = ISC_REFCOUNT_INITIALIZER(1), - .nloops = isc_loopmgr_nloops(), - .owners = ISC_REFCOUNT_INITIALIZER(1), .config = {} }; - dns_qpmulti_create(mctx, &qpmethods, &delegdb->nodes, &delegdb->nodes); + qplru_t *qplru = isc_mem_get(mctx, sizeof(*qplru)); + *qplru = (qplru_t){ + .mctx = isc_mem_ref(mctx), + .references = ISC_REFCOUNT_INITIALIZER(1), + }; + + dns_qpmulti_create(mctx, &qpmethods, &qplru->nodes, &qplru->nodes); + ISC_SIEVE_INIT(qplru->lru); - delegdb->lru = isc_mem_cget(mctx, delegdb->nloops, - sizeof(delegdb->lru[0])); - for (size_t i = 0; i < delegdb->nloops; i++) { - ISC_SIEVE_INIT(delegdb->lru[i]); - } + delegdb->qplru = MOVE_OWNERSHIP(qplru); LIBDNS_DELEGDB_CREATE(delegdb); *delegdbp = delegdb; } -void -dns_delegdb_reuse(dns_view_t *oldview, dns_view_t *newview) { - REQUIRE(isc_loop_get(isc_tid()) == isc_loop_main()); - REQUIRE(DNS_VIEW_VALID(oldview)); - REQUIRE(DNS_VIEW_VALID(newview)); - - dns_delegdb_attach(oldview->deleg, &newview->deleg); - isc_refcount_increment(&oldview->deleg->owners); - - LIBDNS_DELEGDB_REUSE(newview->deleg); -} - -typedef struct nodes_rcu_head { - isc_mem_t *mctx; - dns_qpmulti_t *nodes; - struct rcu_head rcu_head; -} nodes_rcu_head_t; - static void -deleg_destroy_qpmulti(struct rcu_head *rcu_head) { - nodes_rcu_head_t *nrh = caa_container_of(rcu_head, nodes_rcu_head_t, - rcu_head); - - dns_qpmulti_destroy(&nrh->nodes); - - isc_mem_putanddetach(&nrh->mctx, nrh, sizeof(*nrh)); +qplru_destroy(qplru_t *qplru) { + isc_mem_putanddetach(&qplru->mctx, qplru, sizeof(*qplru)); } inline static bool @@ -362,7 +329,6 @@ dns_delegdb_lookup(dns_delegdb_t *delegdb, const dns_name_t *name, isc_stdtime_t now, unsigned int options, dns_name_t *zonecut, dns_name_t *deepestzonecut, dns_delegset_t **delegsetp) { isc_result_t result = ISC_R_SHUTTINGDOWN; - dns_qpmulti_t *nodes = NULL; dns_qpread_t qpr = {}; char namebuf[DNS_NAME_FORMATSIZE]; @@ -373,16 +339,10 @@ dns_delegdb_lookup(dns_delegdb_t *delegdb, const dns_name_t *name, } LIBDNS_DELEGDB_LOOKUP_START(delegdb, namebuf); - rcu_read_lock(); - nodes = rcu_dereference(delegdb->nodes); - if (nodes != NULL) { - dns_qpmulti_query(nodes, &qpr); - - result = dns__deleg_lookup(delegdb, &qpr, name, now, options, - zonecut, deepestzonecut, delegsetp); - dns_qpread_destroy(nodes, &qpr); - } - rcu_read_unlock(); + dns_qpmulti_query(delegdb->qplru->nodes, &qpr); + result = dns__deleg_lookup(delegdb, &qpr, name, now, options, zonecut, + deepestzonecut, delegsetp); + dns_qpread_destroy(delegdb->qplru->nodes, &qpr); LIBDNS_DELEGDB_LOOKUP_DONE(delegdb, namebuf, result); @@ -491,7 +451,7 @@ dns_delegset_addns(dns_delegset_t *delegset, dns_deleg_t *deleg, } static void -delegdb_cleanup(dns_qp_t *qp, dns_delegdb_t *delegdb, size_t requested) { +delegdb_cleanup(dns_delegdb_t *delegdb, dns_qp_t *qp, size_t requested) { delegdb_node_t *node = NULL; size_t reclaimed = 0; @@ -502,7 +462,7 @@ delegdb_cleanup(dns_qp_t *qp, dns_delegdb_t *delegdb, size_t requested) { LIBDNS_DELEGDB_CLEANUP_START(delegdb, (int)requested); while (reclaimed < requested) { - node = ISC_SIEVE_NEXT(delegdb->lru[isc_tid()], visited, link); + node = ISC_SIEVE_NEXT(delegdb->qplru->lru, visited, link); if (node == NULL) { break; @@ -516,9 +476,14 @@ delegdb_cleanup(dns_qp_t *qp, dns_delegdb_t *delegdb, size_t requested) { LIBDNS_DELEGDB_EVICT(delegdb, node, namebuf); } - ISC_SIEVE_UNLINK(delegdb->lru[isc_tid()], node, link); - (void)dns_qp_deletename(qp, &node->zonecut, - DNS_DBNAMESPACE_NORMAL, NULL, NULL); + delegdb_node_t *old_node = NULL; + isc_result_t result = dns_qp_deletename( + qp, &node->zonecut, DNS_DBNAMESPACE_NORMAL, + (void *)&old_node, NULL); + if (result == ISC_R_SUCCESS) { + ISC_SIEVE_UNLINK(delegdb->qplru->lru, old_node, link); + delegdb_node_detach(&old_node); + } } LIBDNS_DELEGDB_CLEANUP_DONE(delegdb, (int)reclaimed); @@ -554,7 +519,7 @@ delegdb_node_size(const dns_name_t *zonecut, dns_delegset_t *delegset) { } static size_t -delegdb_node_prepare(dns_delegdb_t *delegdb, isc_stdtime_t now, dns_ttl_t ttl, +delegdb_node_prepare(qplru_t *qplru, isc_stdtime_t now, dns_ttl_t ttl, const dns_name_t *zonecut, dns_delegset_t *delegset, delegdb_node_t **nodep) { if (ttl == 0) { @@ -562,19 +527,19 @@ delegdb_node_prepare(dns_delegdb_t *delegdb, isc_stdtime_t now, dns_ttl_t ttl, } delegset->expires = ttl + now; - *nodep = isc_mem_get(delegdb->mctx, sizeof(**nodep)); - **nodep = - (delegdb_node_t){ .magic = DELEGDB_NODE_MAGIC, - .references = ISC_REFCOUNT_INITIALIZER(1), - .zonecut = DNS_NAME_INITEMPTY, - .link = ISC_LINK_INITIALIZER, - .deadlink = ISC_LINK_INITIALIZER, - .size = delegdb_node_size(zonecut, delegset), - .loop = isc_loop_ref(isc_loop()) }; - - dns_delegdb_attach(delegdb, &(*nodep)->delegdb); + *nodep = isc_mem_get(qplru->mctx, sizeof(**nodep)); + **nodep = (delegdb_node_t){ + .magic = DELEGDB_NODE_MAGIC, + .references = ISC_REFCOUNT_INITIALIZER(1), + .zonecut = DNS_NAME_INITEMPTY, + .link = ISC_LINK_INITIALIZER, + .deadlink = ISC_LINK_INITIALIZER, + .size = delegdb_node_size(zonecut, delegset), + .qplru = qplru_ref(qplru), + }; + dns_delegset_attach(delegset, &(*nodep)->delegset); - dns_name_dup(zonecut, delegdb->mctx, &(*nodep)->zonecut); + dns_name_dup(zonecut, qplru->mctx, &(*nodep)->zonecut); return sizeof(**nodep) + (*nodep)->size; } @@ -587,7 +552,6 @@ dns_delegset_insert(dns_delegdb_t *delegdb, const dns_name_t *zonecut, dns_qp_t *qp = NULL; dns_qpread_t qpr = {}; isc_stdtime_t now = isc_stdtime_now(); - dns_qpmulti_t *nodes = NULL; char zonecutbuf[DNS_NAME_FORMATSIZE]; REQUIRE(VALID_DELEGDB(delegdb)); @@ -608,51 +572,45 @@ dns_delegset_insert(dns_delegdb_t *delegdb, const dns_name_t *zonecut, } LIBDNS_DELEGDB_INSERT_START(delegdb, zonecutbuf); - rcu_read_lock(); - nodes = rcu_dereference(delegdb->nodes); - if (nodes == NULL) { - CLEANUP(ISC_R_SHUTTINGDOWN); - } - /* * First, check (without write txn) if the node already exists and is * still valid. */ - dns_qpmulti_query(nodes, &qpr); + dns_qpmulti_query(delegdb->qplru->nodes, &qpr); result = dns_qp_lookup(&qpr, zonecut, DNS_DBNAMESPACE_NORMAL, NULL, NULL, (void **)&node, NULL); if (result == ISC_R_SUCCESS) { INSIST(VALID_DELEGDB_NODE(node)); if (node->delegset->expires > now) { - dns_qpread_destroy(nodes, &qpr); + dns_qpread_destroy(delegdb->qplru->nodes, &qpr); CLEANUP(ISC_R_EXISTS); } } - dns_qpread_destroy(nodes, &qpr); + dns_qpread_destroy(delegdb->qplru->nodes, &qpr); /* * We're about to add a new delegation, check for state of overmem, and * clean up expired/least recently used delegation, then allocate and * initialize a new node. */ - size_t requested = delegdb_node_prepare(delegdb, now, ttl, zonecut, - delegset, &node); + size_t requested = delegdb_node_prepare(delegdb->qplru, now, ttl, + zonecut, delegset, &node); /* * Add the node in the DB */ - dns_qpmulti_write(nodes, &qp); + dns_qpmulti_write(delegdb->qplru->nodes, &qp); - delegdb_cleanup(qp, delegdb, requested); + delegdb_cleanup(delegdb, qp, requested); if (result == ISC_R_SUCCESS) { - /* - * A node at the same zonecut exists, and it is expired. Ignore - * the return value, in case the overridden node would be - * removed in meantime by someone else. - */ - (void)dns_qp_deletename(qp, zonecut, DNS_DBNAMESPACE_NORMAL, - NULL, NULL); + delegdb_node_t *old_node = NULL; + result = dns_qp_deletename(qp, zonecut, DNS_DBNAMESPACE_NORMAL, + (void *)&old_node, NULL); + if (result == ISC_R_SUCCESS) { + ISC_SIEVE_UNLINK(delegdb->qplru->lru, old_node, link); + delegdb_node_detach(&old_node); + } } result = dns_qp_insert(qp, node, 0); @@ -668,22 +626,21 @@ dns_delegset_insert(dns_delegdb_t *delegdb, const dns_name_t *zonecut, * Since not using an update (but write) transaction, * _rollback() won't work here. */ - dns_qpmulti_commit(nodes, &qp); + dns_qpmulti_commit(delegdb->qplru->nodes, &qp); CLEANUP(ISC_R_EXISTS); } /* * The new delegation is added, and can be referenced by SIEVE */ - ISC_SIEVE_INSERT(delegdb->lru[isc_tid()], node, link); + delegdb_node_ref(node); + ISC_SIEVE_INSERT(delegdb->qplru->lru, node, link); delegdb_node_unref(node); dns_qp_compact(qp, DNS_QPGC_MAYBE); - dns_qpmulti_commit(nodes, &qp); + dns_qpmulti_commit(delegdb->qplru->nodes, &qp); cleanup: - rcu_read_unlock(); - LIBDNS_DELEGDB_INSERT_DONE(delegdb, zonecutbuf, result); return result; @@ -841,16 +798,8 @@ dns_delegdb_dump(dns_delegdb_t *delegdb, bool expired, FILE *fp) { dns_qpread_t qpr = {}; delegdb_node_t *node = NULL; isc_stdtime_t now = isc_stdtime_now(); - dns_qpmulti_t *nodes = NULL; - - rcu_read_lock(); - nodes = rcu_dereference(delegdb->nodes); - if (nodes == NULL) { - rcu_read_unlock(); - return; - } - dns_qpmulti_query(nodes, &qpr); + dns_qpmulti_query(delegdb->qplru->nodes, &qpr); dns_qpiter_init(&qpr, &it); while (dns_qpiter_next(&it, (void **)&node, NULL) == ISC_R_SUCCESS) { @@ -862,9 +811,7 @@ dns_delegdb_dump(dns_delegdb_t *delegdb, bool expired, FILE *fp) { fp); } - dns_qpread_destroy(nodes, &qpr); - - rcu_read_unlock(); + dns_qpread_destroy(delegdb->qplru->nodes, &qpr); } void @@ -911,7 +858,7 @@ dns_delegset_fromnsrdataset(isc_mem_t *mctx, dns_rdataset_t *rdataset, } static isc_result_t -deleg_deletetree(dns_qp_t *qp, const dns_name_t *name) { +deleg_deletetree(qplru_t *qplru, dns_qp_t *qp, const dns_name_t *name) { isc_result_t result; delegdb_node_t *node = NULL; dns_qpiter_t it; @@ -954,10 +901,14 @@ out: * Let's actually delete the deadnodes! */ ISC_LIST_FOREACH(deadnodes, deadnode, deadlink) { + delegdb_node_t *old_node = NULL; result = dns_qp_deletename(qp, &deadnode->zonecut, - DNS_DBNAMESPACE_NORMAL, NULL, - NULL); + DNS_DBNAMESPACE_NORMAL, + (void *)&old_node, NULL); INSIST(result == ISC_R_SUCCESS); + INSIST(old_node == deadnode); + ISC_SIEVE_UNLINK(qplru->lru, old_node, link); + delegdb_node_detach(&old_node); } } @@ -965,8 +916,16 @@ out: } static isc_result_t -deleg_deletenode(dns_qp_t *qp, const dns_name_t *name) { - return dns_qp_deletename(qp, name, DNS_DBNAMESPACE_NORMAL, NULL, NULL); +deleg_deletenode(qplru_t *qplru, dns_qp_t *qp, const dns_name_t *name) { + delegdb_node_t *old_node = NULL; + isc_result_t result = dns_qp_deletename( + qp, name, DNS_DBNAMESPACE_NORMAL, (void *)&old_node, NULL); + if (result == ISC_R_SUCCESS) { + ISC_SIEVE_UNLINK(qplru->lru, old_node, link); + delegdb_node_detach(&old_node); + } + + return result; } isc_result_t @@ -974,7 +933,6 @@ dns_delegdb_delete(dns_delegdb_t *delegdb, const dns_name_t *name, bool tree) { REQUIRE(VALID_DELEGDB(delegdb)); REQUIRE(DNS_NAME_VALID(name)); - dns_qpmulti_t *nodes = NULL; dns_qp_t *qp = NULL; isc_result_t result = ISC_R_SHUTTINGDOWN; char namebuf[DNS_NAME_FORMATSIZE]; @@ -983,21 +941,16 @@ dns_delegdb_delete(dns_delegdb_t *delegdb, const dns_name_t *name, bool tree) { dns_name_format(name, namebuf, sizeof(namebuf)); } - rcu_read_lock(); - nodes = rcu_dereference(delegdb->nodes); - if (nodes != NULL) { - dns_qpmulti_write(nodes, &qp); - if (tree) { - result = deleg_deletetree(qp, name); - } else { - result = deleg_deletenode(qp, name); - } - if (result == ISC_R_SUCCESS) { - dns_qp_compact(qp, DNS_QPGC_MAYBE); - } - dns_qpmulti_commit(nodes, &qp); + dns_qpmulti_write(delegdb->qplru->nodes, &qp); + if (tree) { + result = deleg_deletetree(delegdb->qplru, qp, name); + } else { + result = deleg_deletenode(delegdb->qplru, qp, name); } - rcu_read_unlock(); + if (result == ISC_R_SUCCESS) { + dns_qp_compact(qp, DNS_QPGC_MAYBE); + } + dns_qpmulti_commit(delegdb->qplru->nodes, &qp); LIBDNS_DELEGDB_DELETE(delegdb, namebuf, (int)tree, result); @@ -1005,35 +958,17 @@ dns_delegdb_delete(dns_delegdb_t *delegdb, const dns_name_t *name, bool tree) { } static void -delegdb_shutdown_async(void *arg) { - dns_delegdb_t *delegdb = arg; +qplru_shutdown_rcu(struct rcu_head *rcu_head) { + qplru_t *qplru = caa_container_of(rcu_head, qplru_t, rcu_head); - REQUIRE(isc_loop_get(isc_tid()) == isc_loop_main()); - REQUIRE(delegdb != NULL && VALID_DELEGDB(delegdb)); - - if (isc_refcount_decrement(&delegdb->owners) == 1) { - dns_qpmulti_t *nodes = rcu_xchg_pointer(&delegdb->nodes, NULL); - - if (nodes != NULL) { - nodes_rcu_head_t *nrh = isc_mem_get(delegdb->mctx, - sizeof(*nrh)); - *nrh = (nodes_rcu_head_t){ - .mctx = isc_mem_ref(delegdb->mctx), - .nodes = nodes, - }; - call_rcu(&nrh->rcu_head, deleg_destroy_qpmulti); - } - LIBDNS_DELEGDB_SHUTDOWN(delegdb); + ISC_SIEVE_FOREACH(qplru->lru, node, link) { + ISC_SIEVE_UNLINK(qplru->lru, node, link); + delegdb_node_detach(&node); } -} -void -dns_delegdb_shutdown(dns_delegdb_t *delegdb) { - if (isc_loop_get(isc_tid()) == isc_loop_main()) { - delegdb_shutdown_async(delegdb); - } else { - isc_async_run(isc_loop_main(), delegdb_shutdown_async, delegdb); - } + dns_qpmulti_destroy(&qplru->nodes); + + qplru_detach(&qplru); } static void diff --git a/lib/dns/include/dns/deleg.h b/lib/dns/include/dns/deleg.h index 8e85e2182f1..4b621345212 100644 --- a/lib/dns/include/dns/deleg.h +++ b/lib/dns/include/dns/deleg.h @@ -122,20 +122,6 @@ dns_delegdb_setconfig(dns_delegdb_t *delegdb, dns_delegdb_config_t dns_delegdb_getconfig(dns_delegdb_t *delegdb); -/* - * Attach a delegation DB from an existing view to another view. Used when - * reloading the server and the delegation DB is reused. - */ -void -dns_delegdb_reuse(dns_view_t *oldview, dns_view_t *newview); - -/* - * Shutdown the delegation database. Must be called from any view shutting down - * which either created a delegdb or reused a delegdb. - */ -void -dns_delegdb_shutdown(dns_delegdb_t *delegdb); - /* * Lookup for delegations of a given name in the DB. If found, the zonecut is * written and the delegation set is attached to the caller, so it must be diff --git a/lib/dns/view.c b/lib/dns/view.c index 2dfd15c2a16..f4f6daf9fe4 100644 --- a/lib/dns/view.c +++ b/lib/dns/view.c @@ -424,10 +424,6 @@ shutdown_view(dns_view_t *view) { dns_resolver_shutdown(view->resolver); } - if (view->deleg != NULL) { - dns_delegdb_shutdown(view->deleg); - } - rcu_read_lock(); adb = rcu_dereference(view->adb); if (adb != NULL) { diff --git a/tests/dns/deleg_test.c b/tests/dns/deleg_test.c index 953aaeb8431..28b789a2f1c 100644 --- a/tests/dns/deleg_test.c +++ b/tests/dns/deleg_test.c @@ -37,8 +37,10 @@ isc_stdtime_now(void) { #include #include +#include #include #include +#include #include #include @@ -68,7 +70,6 @@ shutdownloop(ISC_ATTR_UNUSED void *arg) { static void shutdowntest(dns_delegdb_t **dbp) { - dns_delegdb_shutdown(*dbp); dns_delegdb_detach(dbp); shutdownloop(NULL); } @@ -574,90 +575,13 @@ deletetests(ISC_ATTR_UNUSED void *arg) { shutdowntest(&db); } -/* - * The cleanup test is split into phases because node destruction is now - * fully deferred to the node's owning loop via isc_async_run(). After - * rcu_barrier() completes, the QP reclamation has fired (calling - * delegdb_node_destroy which schedules the async callback), but the - * actual memory free hasn't happened yet — it's pending on the loop's - * event queue. We must return to the loop between phases so it can - * process the pending node destroys before we check memory usage. - */ -typedef struct { - dns_delegdb_t *db; - isc_stdtime_t now; -} cleanup_ctx_t; - -static void -cleanuptests_phase3(void *arg) { - cleanup_ctx_t *ctx = arg; - dns_delegdb_t *db = ctx->db; - isc_stdtime_t now = ctx->now; - dns_delegset_t *delegset = NULL; - isc_result_t result; - - assert_int_in_range(isc_mem_inuse(db->mctx), ENTRIES_MEM(2 * NENTRIES), - ENTRIES_MEM(2 * NENTRIES) + 100000); - - /* - * baz. is there, but bar. is gone, as it has been - * removed (even if it wasn't expired.) - */ - result = lookupdb(db, "baz.", now, 0, "baz.", &delegset); - assert_int_equal(result, ISC_R_SUCCESS); - dns_delegset_detach(&delegset); - - result = lookupdb(db, "bar.", now, 0, "bar.", &delegset); - assert_int_equal(result, ISC_R_NOTFOUND); - - shutdowntest(&db); -} - -static void -cleanuptests_phase2(void *arg) { - cleanup_ctx_t *ctx = arg; - dns_delegdb_t *db = ctx->db; - isc_stdtime_t now = ctx->now; - dns_deleg_t *deleg = NULL; - dns_delegset_t *delegset = NULL; - isc_result_t result; - - assert_int_in_range(isc_mem_inuse(db->mctx), ENTRIES_MEM(NENTRIES), - ENTRIES_MEM(NENTRIES) + 100000); - - /* - * bar. is there - */ - result = lookupdb(db, "bar.", now, 0, "bar.", &delegset); - assert_int_equal(result, ISC_R_SUCCESS); - dns_delegset_detach(&delegset); - - /* - * Add yet another non expired record. But LRU will have to get - * rid of it because we're hitting the hiwater mark again. - */ - dns_delegset_allocset(db, &delegset); - dns_delegset_allocdeleg(delegset, DNS_DELEGTYPE_DELEG_ADDRESSES, - &deleg); - - for (size_t i = 0; i < NENTRIES; i++) { - addipdeleg(AF_INET6, "1111::2222", delegset, deleg); - } - assert_int_in_range(isc_mem_inuse(db->mctx), ENTRIES_MEM(2 * NENTRIES), - ENTRIES_MEM(2 * NENTRIES) + 100000); - writedb(db, "baz.", 30, &delegset, true); - deleg = NULL; - - rcu_barrier(); - isc_async_run(isc_loop(), cleanuptests_phase3, ctx); -} - static void cleanuptests(ISC_ATTR_UNUSED void *arg) { - static cleanup_ctx_t ctx; dns_delegdb_t *db = NULL; dns_deleg_t *deleg = NULL; dns_delegset_t *delegset = NULL; + isc_stdtime_t now; + isc_result_t result; /* * hiwater is 4375000 = 5000000 - (5000000 >> 3) @@ -668,7 +592,7 @@ cleanuptests(ISC_ATTR_UNUSED void *arg) { dns_delegdb_create(&db); assert_non_null(db); - ctx = (cleanup_ctx_t){ .db = db, .now = isc_stdtime_now() }; + now = isc_stdtime_now(); dns_delegdb_setconfig(db, &config); @@ -724,14 +648,60 @@ cleanuptests(ISC_ATTR_UNUSED void *arg) { deleg = NULL; /* - * stuff. internal node (and delegset) is now removed. rcu_barrier() - * is needed to kick off QP reclamation flow (and run the detaching - * functions from the DB nodes). The actual memory free is deferred - * to the loop via isc_async_run(), so we continue in phase2 to let - * the loop process the pending node destroys. + * stuff. internal node (and delegset) is now removed. Node + * destruction runs synchronously inside the QP-trie chunk reclamation, + * so rcu_barrier() is enough: once it returns, the evicted nodes have + * been detached and freed. */ rcu_barrier(); - isc_async_run(isc_loop(), cleanuptests_phase2, &ctx); + + assert_int_in_range(isc_mem_inuse(db->mctx), ENTRIES_MEM(NENTRIES), + ENTRIES_MEM(NENTRIES) + 100000); + + /* + * bar. is there + */ + result = lookupdb(db, "bar.", now, 0, "bar.", &delegset); + assert_int_equal(result, ISC_R_SUCCESS); + dns_delegset_detach(&delegset); + + /* + * Add yet another non expired record. But LRU will have to get + * rid of it because we're hitting the hiwater mark again. + */ + dns_delegset_allocset(db, &delegset); + dns_delegset_allocdeleg(delegset, DNS_DELEGTYPE_DELEG_ADDRESSES, + &deleg); + + for (size_t i = 0; i < NENTRIES; i++) { + addipdeleg(AF_INET6, "1111::2222", delegset, deleg); + } + assert_int_in_range(isc_mem_inuse(db->mctx), ENTRIES_MEM(2 * NENTRIES), + ENTRIES_MEM(2 * NENTRIES) + 100000); + writedb(db, "baz.", 30, &delegset, true); + deleg = NULL; + + /* + * Re-adding baz. hit the hiwater mark and evicted bar.; wait for the + * reclamation to free it before checking memory and final state. + */ + rcu_barrier(); + + assert_int_in_range(isc_mem_inuse(db->mctx), ENTRIES_MEM(2 * NENTRIES), + ENTRIES_MEM(2 * NENTRIES) + 100000); + + /* + * baz. is there, but bar. is gone, as it has been + * removed (even if it wasn't expired.) + */ + result = lookupdb(db, "baz.", now, 0, "baz.", &delegset); + assert_int_equal(result, ISC_R_SUCCESS); + dns_delegset_detach(&delegset); + + result = lookupdb(db, "bar.", now, 0, "bar.", &delegset); + assert_int_equal(result, ISC_R_NOTFOUND); + + shutdowntest(&db); } ISC_RUN_TEST_IMPL(dns_deleg_basictests) { rundelegtest(basictests); }