]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Replace per-zone lock buckets with global buckets
authorAlessio Podda <alessio@isc.org>
Wed, 7 May 2025 12:52:11 +0000 (14:52 +0200)
committerAlessio Podda <alessio@isc.org>
Wed, 9 Jul 2025 13:27:38 +0000 (15:27 +0200)
Qpzone employs a locking strategy where rwlocks are grouped into
buckets, and each zone gets 17 buckets.
This strategy is suboptimal in two ways:
 - If named is serving a single zone or a zone is the majority of the
   traffic, this strategy pretty much guarantees contention when using
   more than a dozen threads.
 - If named is serving many small zones, it causes substantial memory
   usage.

This commit switches the locking to a global table initialized at start
time. This should have three effects:
 - Performance should improve in the single zone case, since now we are
   selecting from a bigger pool of locks.
 - Memory consumption should go down significantly in the many zone
   cases.
 - Performance should not degrade substantially in the many zone cases.
   The reason for this is that, while we could have substantially more
   zones than locks, we can query/edit only O(num threads) at the same
   time. So by making the global table much bigger than the expected
   number of threads, we can limit contention.

lib/dns/lib.c
lib/dns/qpzone.c
lib/dns/qpzone_p.h
tests/dns/qpzone_test.c

index 4825e374873fdabd6284ebf633676da137745956..03e204f1ab73f3157b52c89bbd697b308b5e43f6 100644 (file)
@@ -22,6 +22,7 @@
 #include "dst_internal.h"
 #include "dyndb_p.h"
 #include "qp_p.h"
+#include "qpzone_p.h"
 
 /***
  *** Functions
@@ -46,6 +47,7 @@ dns__lib_initialize(void) {
        dns__db_initialize();
        dns__dyndb_initialize();
        dns__qp_initialize();
+       dns__qpzone_initialize();
 }
 
 void
@@ -54,6 +56,7 @@ dns__lib_shutdown(void) {
                return;
        }
 
+       dns__qpzone_shutdown();
        dns__qp_shutdown();
        dns__dyndb_shutdown();
        dns__db_shutdown();
index 51f30ddd39c8edd23197642fcd44d67332ab1285..13fa4e7e2072b802318b341fcc5c3067ffad1a9e 100644 (file)
 #define QPDB_ATTR_LOADED  0x01
 #define QPDB_ATTR_LOADING 0x02
 
-#ifndef DEFAULT_BUCKETS_COUNT
-#define DEFAULT_BUCKETS_COUNT 17 /*%< Should be prime. */
-#endif
-
 #define QPDBITER_NSEC3_ORIGIN_NODE(qpdb, iterator)        \
        ((iterator)->current == &(iterator)->nsec3iter && \
         (iterator)->node == (qpdb)->nsec3_origin)
@@ -119,6 +115,8 @@ typedef struct qpzone_bucket {
                          (sizeof(isc_rwlock_t)) % ISC_OS_CACHELINE_SIZE];
 } qpzone_bucket_t;
 
+static qpzone_bucket_t qpzone_buckets_g[1024];
+
 typedef struct qpz_changed {
        qpznode_t *node;
        bool dirty;
@@ -253,8 +251,6 @@ struct qpzonedb {
        dns_qpmulti_t *tree;  /* Main QP trie for data storage */
        dns_qpmulti_t *nsec;  /* NSEC nodes only */
        dns_qpmulti_t *nsec3; /* NSEC3 nodes only */
-
-       qpzone_bucket_t buckets[DEFAULT_BUCKETS_COUNT];
 };
 
 #ifdef DNS_DB_NODETRACE
@@ -418,14 +414,28 @@ static atomic_uint_fast16_t init_count = 0;
  * Failure to follow this hierarchy can result in deadlock.
  */
 
+void
+dns__qpzone_initialize(void) {
+       for (size_t idx = 0; idx < ARRAY_SIZE(qpzone_buckets_g); ++idx) {
+               NODE_INITLOCK(&qpzone_buckets_g[idx].lock);
+       }
+}
+
+void
+dns__qpzone_shutdown(void) {
+       for (size_t idx = 0; idx < ARRAY_SIZE(qpzone_buckets_g); ++idx) {
+               NODE_DESTROYLOCK(&qpzone_buckets_g[idx].lock);
+       }
+}
+
 static isc_rwlock_t *
-qpzone_get_lock(qpzonedb_t *qpdb, qpznode_t *node) {
-       return &qpdb->buckets[node->locknum].lock;
+qpzone_get_lock(qpznode_t *node) {
+       return &qpzone_buckets_g[node->locknum].lock;
 }
 
 static uint16_t
 qpzone_get_locknum(void) {
-       return isc_random_uniform(DEFAULT_BUCKETS_COUNT);
+       return isc_random_uniform(ARRAY_SIZE(qpzone_buckets_g));
 }
 
 /*%
@@ -532,10 +542,6 @@ free_db_rcu(struct rcu_head *rcu_head) {
                dns_name_free(&qpdb->common.origin, qpdb->common.mctx);
        }
 
-       for (size_t i = 0; i < DEFAULT_BUCKETS_COUNT; i++) {
-               NODE_DESTROYLOCK(&qpdb->buckets[i].lock);
-       }
-
        qpz_heap_detach(&qpdb->heap);
 
        if (qpdb->gluecachestats != NULL) {
@@ -730,10 +736,6 @@ dns__qpzone_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
 
        qpdb->heap = new_qpz_heap(mctx);
 
-       for (size_t i = 0; i < DEFAULT_BUCKETS_COUNT; i++) {
-               NODE_INITLOCK(&qpdb->buckets[i].lock);
-       }
-
        /*
         * Attach to the mctx.  The database will persist so long as there
         * are references to it, and attaching to the mctx ensures that our
@@ -1012,7 +1014,7 @@ qpznode_release(qpzonedb_t *qpdb, qpznode_t *node, uint32_t least_serial,
                 * erefs_increment.  If another thread acquires reference it
                 * will be larger than 0, if it doesn't it is going to be 0.
                 */
-               isc_rwlock_t *nlock = qpzone_get_lock(qpdb, node);
+               isc_rwlock_t *nlock = qpzone_get_lock(node);
                qpznode_erefs_increment(qpdb, node DNS__DB_FLARG_PASS);
                NODE_FORCEUPGRADE(nlock, nlocktypep);
                if (!qpznode_erefs_decrement(qpdb, node DNS__DB_FLARG_PASS)) {
@@ -1106,7 +1108,7 @@ setnsec3parameters(dns_db_t *db, qpz_version_t *version) {
 
        version->havensec3 = false;
        node = qpdb->origin;
-       nlock = qpzone_get_lock(qpdb, node);
+       nlock = qpzone_get_lock(node);
        NODE_RDLOCK(nlock, &nlocktype);
        for (header = node->data; header != NULL; header = header_next) {
                header_next = header->next;
@@ -1564,7 +1566,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp,
 
                ISC_LIST_UNLINK(resigned_list, header, link);
 
-               nlock = qpzone_get_lock(qpdb, HEADERNODE(header));
+               nlock = qpzone_get_lock(HEADERNODE(header));
                NODE_WRLOCK(nlock, &nlocktype);
                if (rollback && !IGNORE(header)) {
                        resigninsert(header);
@@ -1584,7 +1586,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp,
                isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
 
                node = changed->node;
-               nlock = qpzone_get_lock(qpdb, node);
+               nlock = qpzone_get_lock(node);
 
                NODE_WRLOCK(nlock, &nlocktype);
                if (rollback) {
@@ -1628,7 +1630,7 @@ qpzone_findrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
        }
        serial = version->serial;
 
-       nlock = qpzone_get_lock(qpdb, node);
+       nlock = qpzone_get_lock(node);
        NODE_RDLOCK(nlock, &nlocktype);
 
        matchtype = DNS_TYPEPAIR_VALUE(type, covers);
@@ -2234,7 +2236,7 @@ loading_addrdataset(void *arg, const dns_name_t *name,
                newheader->resign_lsb = rdataset->resign & 0x1;
        }
 
-       nlock = qpzone_get_lock(qpdb, node);
+       nlock = qpzone_get_lock(node);
        NODE_WRLOCK(nlock, &nlocktype);
        result = add(qpdb, node, name, qpdb->current_version, newheader,
                     DNS_DBADD_MERGE, true, NULL, 0 DNS__DB_FLARG_PASS);
@@ -2443,7 +2445,7 @@ setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
 
        header = dns_rdataset_getheader(rdataset);
 
-       nlock = qpzone_get_lock(qpdb, HEADERNODE(header));
+       nlock = qpzone_get_lock(HEADERNODE(header));
        NODE_WRLOCK(nlock, &nlocktype);
 
        oldheader = *header;
@@ -2501,7 +2503,7 @@ getsigningtime(dns_db_t *db, isc_stdtime_t *resign, dns_name_t *foundname,
                UNLOCK(&qpdb->heap->lock);
                return ISC_R_NOTFOUND;
        }
-       nlock = qpzone_get_lock(qpdb, HEADERNODE(header));
+       nlock = qpzone_get_lock(HEADERNODE(header));
        UNLOCK(&qpdb->heap->lock);
 
 again:
@@ -2510,13 +2512,11 @@ again:
        LOCK(&qpdb->heap->lock);
        header = isc_heap_element(qpdb->heap->heap, 1);
 
-       if (header != NULL &&
-           qpzone_get_lock(qpdb, HEADERNODE(header)) != nlock)
-       {
+       if (header != NULL && qpzone_get_lock(HEADERNODE(header)) != nlock) {
                UNLOCK(&qpdb->heap->lock);
                NODE_UNLOCK(nlock, &nlocktype);
 
-               nlock = qpzone_get_lock(qpdb, HEADERNODE(header));
+               nlock = qpzone_get_lock(HEADERNODE(header));
                goto again;
        }
 
@@ -2701,7 +2701,7 @@ qpzone_setup_delegation(qpz_search_t *search, dns_dbnode_t **nodep,
        }
        if (rdataset != NULL) {
                isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
-               isc_rwlock_t *nlock = qpzone_get_lock(search->qpdb, node);
+               isc_rwlock_t *nlock = qpzone_get_lock(node);
                NODE_RDLOCK(nlock, &nlocktype);
                bindrdataset(search->qpdb, node, search->zonecut_header,
                             rdataset DNS__DB_FLARG_PASS);
@@ -2732,16 +2732,13 @@ step(qpz_search_t *search, dns_qpiter_t *it, direction_t direction,
      dns_name_t *nextname) {
        dns_fixedname_t fnodename;
        dns_name_t *nodename = dns_fixedname_initname(&fnodename);
-       qpzonedb_t *qpdb = NULL;
        qpznode_t *node = NULL;
        isc_result_t result = ISC_R_SUCCESS;
        dns_slabheader_t *header = NULL;
 
-       qpdb = search->qpdb;
-
        result = dns_qpiter_current(it, nodename, (void **)&node, NULL);
        while (result == ISC_R_SUCCESS) {
-               isc_rwlock_t *nlock = qpzone_get_lock(qpdb, node);
+               isc_rwlock_t *nlock = qpzone_get_lock(node);
                isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
                dns_slabheader_t *header_next = NULL;
 
@@ -2887,7 +2884,6 @@ find_wildcard(qpz_search_t *search, qpznode_t **nodep,
              const dns_name_t *qname) {
        dns_slabheader_t *header = NULL;
        isc_result_t result = ISC_R_NOTFOUND;
-       qpzonedb_t *qpdb = search->qpdb;
 
        /*
         * Examine each ancestor level.  If the level's wild bit
@@ -2906,7 +2902,7 @@ find_wildcard(qpz_search_t *search, qpznode_t **nodep,
 
                dns_qpchain_node(&search->chain, i, NULL, (void **)&node, NULL);
 
-               nlock = qpzone_get_lock(qpdb, node);
+               nlock = qpzone_get_lock(node);
                NODE_RDLOCK(nlock, &nlocktype);
                /*
                 * First we try to figure out if this node is active in
@@ -2950,7 +2946,7 @@ find_wildcard(qpz_search_t *search, qpznode_t **nodep,
                                 * is active in the search's version, we're
                                 * done.
                                 */
-                               nlock = qpzone_get_lock(qpdb, wnode);
+                               nlock = qpzone_get_lock(wnode);
                                NODE_RDLOCK(nlock, &nlocktype);
                                for (header = wnode->data; header != NULL;
                                     header = header->next)
@@ -3133,7 +3129,7 @@ again:
        do {
                dns_slabheader_t *found = NULL, *foundsig = NULL;
                isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
-               isc_rwlock_t *nlock = qpzone_get_lock(search->qpdb, node);
+               isc_rwlock_t *nlock = qpzone_get_lock(node);
                NODE_RDLOCK(nlock, &nlocktype);
                empty_node = true;
                for (header = node->data; header != NULL; header = header_next)
@@ -3276,7 +3272,7 @@ qpzone_check_zonecut(qpznode_t *node, void *arg DNS__DB_FLARG) {
        dns_slabheader_t *found = NULL;
        isc_result_t result = DNS_R_CONTINUE;
        isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
-       isc_rwlock_t *nlock = qpzone_get_lock(search->qpdb, node);
+       isc_rwlock_t *nlock = qpzone_get_lock(node);
 
        NODE_RDLOCK(nlock, &nlocktype);
 
@@ -3565,7 +3561,7 @@ found:
         * have matched a wildcard.
         */
 
-       nlock = qpzone_get_lock(search.qpdb, node);
+       nlock = qpzone_get_lock(node);
        NODE_RDLOCK(nlock, &nlocktype);
 
        if (search.zonecut != NULL) {
@@ -3927,7 +3923,7 @@ tree_exit:
        if (search.need_cleanup) {
                node = search.zonecut;
                INSIST(node != NULL);
-               nlock = qpzone_get_lock(search.qpdb, node);
+               nlock = qpzone_get_lock(node);
 
                NODE_RDLOCK(nlock, &nlocktype);
                qpznode_release(search.qpdb, node, 0,
@@ -4003,7 +3999,7 @@ qpzone_detachnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) {
 
        node = (qpznode_t *)(*nodep);
        *nodep = NULL;
-       nlock = qpzone_get_lock(qpdb, node);
+       nlock = qpzone_get_lock(node);
 
        /*
         * qpzone_destroy() uses call_rcu() API to destroy the node locks, so it
@@ -4083,19 +4079,19 @@ getoriginnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) {
 }
 
 static void
-locknode(dns_db_t *db, dns_dbnode_t *dbnode, isc_rwlocktype_t type) {
-       qpzonedb_t *qpdb = (qpzonedb_t *)db;
+locknode(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *dbnode,
+        isc_rwlocktype_t type) {
        qpznode_t *node = (qpznode_t *)dbnode;
 
-       RWLOCK(qpzone_get_lock(qpdb, node), type);
+       RWLOCK(qpzone_get_lock(node), type);
 }
 
 static void
-unlocknode(dns_db_t *db, dns_dbnode_t *dbnode, isc_rwlocktype_t type) {
-       qpzonedb_t *qpdb = (qpzonedb_t *)db;
+unlocknode(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *dbnode,
+          isc_rwlocktype_t type) {
        qpznode_t *node = (qpznode_t *)dbnode;
 
-       RWUNLOCK(qpzone_get_lock(qpdb, node), type);
+       RWUNLOCK(qpzone_get_lock(node), type);
 }
 
 static void
@@ -4136,12 +4132,11 @@ rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) {
 static isc_result_t
 rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG) {
        qpdb_rdatasetiter_t *qrditer = (qpdb_rdatasetiter_t *)iterator;
-       qpzonedb_t *qpdb = (qpzonedb_t *)(qrditer->common.db);
        qpznode_t *node = (qpznode_t *)qrditer->common.node;
        qpz_version_t *version = (qpz_version_t *)qrditer->common.version;
        dns_slabheader_t *header = NULL, *top_next = NULL;
        isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
-       isc_rwlock_t *nlock = qpzone_get_lock(qpdb, node);
+       isc_rwlock_t *nlock = qpzone_get_lock(node);
 
        NODE_RDLOCK(nlock, &nlocktype);
 
@@ -4178,13 +4173,12 @@ rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG) {
 static isc_result_t
 rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG) {
        qpdb_rdatasetiter_t *qrditer = (qpdb_rdatasetiter_t *)iterator;
-       qpzonedb_t *qpdb = (qpzonedb_t *)(qrditer->common.db);
        qpznode_t *node = (qpznode_t *)qrditer->common.node;
        qpz_version_t *version = (qpz_version_t *)qrditer->common.version;
        dns_slabheader_t *header = NULL;
        dns_slabheader_t *topheader, *topheader_next = NULL;
        isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
-       isc_rwlock_t *nlock = qpzone_get_lock(qpdb, node);
+       isc_rwlock_t *nlock = qpzone_get_lock(node);
 
        header = qrditer->current;
        if (header == NULL) {
@@ -4242,7 +4236,7 @@ rdatasetiter_current(dns_rdatasetiter_t *iterator,
        qpznode_t *node = (qpznode_t *)qrditer->common.node;
        dns_slabheader_t *header = NULL;
        isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
-       isc_rwlock_t *nlock = qpzone_get_lock(qpdb, node);
+       isc_rwlock_t *nlock = qpzone_get_lock(node);
 
        header = qrditer->current;
        REQUIRE(header != NULL);
@@ -4281,7 +4275,7 @@ dereference_iter_node(qpdb_dbiterator_t *iter DNS__DB_FLARG) {
        }
 
        iter->node = NULL;
-       nlock = qpzone_get_lock(qpdb, node);
+       nlock = qpzone_get_lock(node);
 
        NODE_RDLOCK(nlock, &nlocktype);
        qpznode_release(qpdb, node, 0, &nlocktype DNS__DB_FLARG_PASS);
@@ -4777,7 +4771,7 @@ qpzone_addrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
         * (Note: node lock must be acquired after starting
         * the QPDB transaction and released before committing.)
         */
-       nlock = qpzone_get_lock(qpdb, node);
+       nlock = qpzone_get_lock(node);
 
        NODE_WRLOCK(nlock, &nlocktype);
 
@@ -4871,7 +4865,7 @@ qpzone_subtractrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
                newheader->resign_lsb = rdataset->resign & 0x1;
        }
 
-       nlock = qpzone_get_lock(qpdb, node);
+       nlock = qpzone_get_lock(node);
        NODE_WRLOCK(nlock, &nlocktype);
 
        changed = add_changed(newheader, version DNS__DB_FLARG_PASS);
@@ -5033,7 +5027,7 @@ qpzone_deleterdataset(dns_db_t *db, dns_dbnode_t *dbnode,
 
        dns_name_copy(&node->name, nodename);
 
-       nlock = qpzone_get_lock(qpdb, node);
+       nlock = qpzone_get_lock(node);
        NODE_WRLOCK(nlock, &nlocktype);
        result = add(qpdb, node, nodename, version, newheader, DNS_DBADD_FORCE,
                     false, NULL, 0 DNS__DB_FLARG_PASS);
@@ -5052,7 +5046,7 @@ nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) {
        REQUIRE(node != NULL);
        REQUIRE(name != NULL);
 
-       nlock = qpzone_get_lock(qpdb, qpnode);
+       nlock = qpzone_get_lock(qpnode);
 
        NODE_RDLOCK(nlock, &nlocktype);
        dns_name_copy(&qpnode->name, name);
index 45502f95a26e29fc12044262e6d2a0e04361f725..93d0782fef05d633d89867602ecb6e0ba43b5655 100644 (file)
@@ -14,6 +14,7 @@
 #pragma once
 
 #include <isc/heap.h>
+#include <isc/rwlock.h>
 #include <isc/urcu.h>
 
 #include <dns/nsec3.h>
@@ -45,3 +46,8 @@ dns__qpzone_create(isc_mem_t *mctx, const dns_name_t *base, dns_dbtype_t type,
  *
  * \li argc == 0 or argv[0] is a valid memory context.
  */
+
+void
+dns__qpzone_initialize(void);
+void
+dns__qpzone_shutdown(void);
index c464a9e2c1fc7eddc1b70a503fe3d069ded92914..1e0a3c2c160ca0ac109b53bc7e342f7eec7d1162 100644 (file)
@@ -39,7 +39,6 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wshadow"
 #undef CHECK
-#define DEFAULT_BUCKETS_COUNT 1
 #include "qpzone.c"
 #pragma GCC diagnostic pop
 
@@ -130,8 +129,6 @@ ownercase_test_one(const char *str1, const char *str2) {
        dns_name_t *name2 = dns_fixedname_initname(&fname2);
 
        /* Minimal initialization of the mock objects */
-       NODE_INITLOCK(&qpdb->buckets[0].lock);
-
        isc_buffer_constinit(&b, str1, strlen(str1));
        isc_buffer_add(&b, strlen(str1));
        result = dns_name_fromtext(name1, &b, dns_rootname, 0);
@@ -150,8 +147,6 @@ ownercase_test_one(const char *str1, const char *str2) {
        /* Retrieve the case to name2 */
        dns_rdataset_getownercase(&rdataset, name2);
 
-       NODE_DESTROYLOCK(&qpdb->buckets[0].lock);
-
        return dns_name_caseequal(name1, name2);
 }
 
@@ -201,8 +196,6 @@ ISC_RUN_TEST_IMPL(setownercase) {
        UNUSED(state);
 
        /* Minimal initialization of the mock objects */
-       NODE_INITLOCK(&qpdb->buckets[0].lock);
-
        isc_buffer_constinit(&b, str1, strlen(str1));
        isc_buffer_add(&b, strlen(str1));
        result = dns_name_fromtext(name1, &b, dns_rootname, 0);
@@ -218,8 +211,6 @@ ISC_RUN_TEST_IMPL(setownercase) {
        /* Retrieve the case to name2 */
        dns_rdataset_getownercase(&rdataset, name2);
 
-       NODE_DESTROYLOCK(&qpdb->buckets[0].lock);
-
        assert_true(dns_name_caseequal(name1, name2));
 }