]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Implement qpzone specific update path
authorAlessio Podda <alessio@isc.org>
Sat, 25 Oct 2025 09:01:35 +0000 (11:01 +0200)
committerAlessio Podda <alessio@isc.org>
Tue, 9 Dec 2025 11:55:30 +0000 (12:55 +0100)
This commit implements a batch update function for qpzone. The main
reason for this is speed: using addrdataset would cause a qp transaction
per rrdataset added, leading to a substantial slowdown compared to
RBTDB. The new API results in a qp transaction per applied diff.

lib/dns/db.c
lib/dns/include/dns/db.h
lib/dns/qpzone.c
lib/dns/xfrin.c
tests/dns/master_test.c

index 85758bcf2f5dae8b8888a9356a1f3e28734b55cc..36416e402a9c53b9d86d090c1142697002f792f3 100644 (file)
@@ -301,7 +301,7 @@ dns_db_endload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
 }
 
 isc_result_t
-dns_db_beginupdate(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
+dns_db_beginupdate(dns_db_t *db, dns_dbversion_t *ver, dns_rdatacallbacks_t *callbacks) {
        /*
         * Begin updating 'db'.
         */
@@ -311,7 +311,7 @@ dns_db_beginupdate(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
        REQUIRE(DNS_CALLBACK_VALID(callbacks));
 
        if (db->methods->beginupdate != NULL) {
-               return (db->methods->beginupdate)(db, callbacks);
+               return (db->methods->beginupdate)(db, ver, callbacks);
        }
        return ISC_R_NOTIMPLEMENTED;
 }
index 975b0d91329b2ea55c822b58edabc55414147c9e..92b81496014e853e8ac2b3771ff7bfe56ad95cf1 100644 (file)
@@ -90,6 +90,7 @@ typedef struct dns_db_methods {
                                  dns_rdatacallbacks_t *callbacks);
        isc_result_t (*endload)(dns_db_t *db, dns_rdatacallbacks_t *callbacks);
        isc_result_t (*beginupdate)(dns_db_t           *db,
+                                   dns_dbversion_t            *ver,
                                    dns_rdatacallbacks_t *callbacks);
        isc_result_t (*commitupdate)(dns_db_t          *db,
                                     dns_rdatacallbacks_t *callbacks);
@@ -537,7 +538,7 @@ dns_db_endload(dns_db_t *db, dns_rdatacallbacks_t *callbacks);
  */
 
 isc_result_t
-dns_db_beginupdate(dns_db_t *db, dns_rdatacallbacks_t *callbacks);
+dns_db_beginupdate(dns_db_t *db, dns_dbversion_t *ver, dns_rdatacallbacks_t *callbacks);
 /*%<
  * Begin updating 'db'.
  *
index f2e1d86cfc17fefaafbd5b1099ccc82dc43e7756..ae3f4a43ce683e5d42105f3782aad57ec2d82769 100644 (file)
@@ -42,6 +42,7 @@
 #include <dns/callbacks.h>
 #include <dns/db.h>
 #include <dns/dbiterator.h>
+#include <dns/diff.h>
 #include <dns/dnssec.h>
 #include <dns/fixedname.h>
 #include <dns/masterdump.h>
@@ -96,6 +97,15 @@ typedef struct qpzone_bucket {
                          (sizeof(isc_rwlock_t)) % ISC_OS_CACHELINE_SIZE];
 } qpzone_bucket_t;
 
+/*
+ * Qpzone-specific update context that extends dns_updatectx_t, used in IXFR.
+ */
+typedef struct qpzone_updatectx {
+       dns_updatectx_t base;
+       dns_qp_t *qp;
+       dns_qpread_t qpr;
+} qpzone_updatectx_t;
+
 static qpzone_bucket_t qpzone_buckets_g[1024];
 
 typedef struct qpz_changed {
@@ -2430,57 +2440,99 @@ setgluecachestats(dns_db_t *db, isc_stats_t *stats) {
        return ISC_R_SUCCESS;
 }
 
-static isc_result_t
-findnodeintree(qpzonedb_t *qpdb, const dns_name_t *name, bool create,
-              bool nsec3, dns_dbnode_t **nodep DNS__DB_FLARG) {
-       isc_result_t result;
-       qpznode_t *node = NULL;
-       dns_namespace_t nspace = nsec3 ? DNS_DBNAMESPACE_NSEC3
-                                      : DNS_DBNAMESPACE_NORMAL;
-       dns_qpread_t qpr = { 0 };
+static dns_qp_t*
+begin_transaction(qpzonedb_t *qpdb, dns_qpread_t *qprp, bool create) {
        dns_qp_t *qp = NULL;
 
        if (create) {
                dns_qpmulti_write(qpdb->tree, &qp);
        } else {
-               dns_qpmulti_query(qpdb->tree, &qpr);
-               qp = (dns_qp_t *)&qpr;
+               dns_qpmulti_query(qpdb->tree, qprp);
+               qp = (dns_qp_t *)qprp;
        }
 
+       return qp;
+}
+
+static void
+end_transaction(qpzonedb_t *qpdb, dns_qp_t *qp, bool create) {
+       if (create) {
+               dns_qp_compact(qp, DNS_QPGC_MAYBE);
+               dns_qpmulti_commit(qpdb->tree, &qp);
+       } else {
+               dns_qpread_t *qprp = (dns_qpread_t*) qp;
+               dns_qpread_destroy(qpdb->tree, qprp);
+       }
+}
+
+static isc_result_t
+findnodeintree(qpzonedb_t *qpdb, dns_qp_t *qp, const dns_name_t *name, bool create,
+              bool nsec3, dns_dbnode_t **nodep DNS__DB_FLARG) {
+       isc_result_t result;
+       qpznode_t *node = NULL;
+       dns_namespace_t nspace = nsec3 ? DNS_DBNAMESPACE_NSEC3
+                                      : DNS_DBNAMESPACE_NORMAL;
+       /*
+        * findnodeintree is a wrapper around dns_qp_getname that does some
+        * qpzone-specific bookkeeping before returning the lookup result to the
+        * caller.
+        *
+        * First, we do a lookup ...
+        */
        result = dns_qp_getname(qp, name, nspace, (void **)&node, NULL);
-       if (result != ISC_R_SUCCESS) {
-               if (!create) {
-                       dns_qpread_destroy(qpdb->tree, &qpr);
-                       return result;
-               }
+       if (result == ISC_R_SUCCESS) {
+               /*
+                * ... if the lookup is successful, we need to increase both the
+                * internal and external reference count before returning to
+                * the caller. qpznode_acquire takes care of that.
+                */
+               qpznode_acquire(node DNS__DB_FLARG_PASS);
 
+               INSIST(node->nspace == DNS_DBNAMESPACE_NSEC3 || !nsec3);
+       } else if (result != ISC_R_SUCCESS && create) {
+               /*
+                * ... if the lookup is unsuccesful, but the caller asked us to
+                * create a new node, create one and insert it into the tree.
+                */
                node = new_qpznode(qpdb, name, nspace);
                result = dns_qp_insert(qp, node, 0);
                INSIST(result == ISC_R_SUCCESS);
-               qpznode_unref(node);
+
+               /*
+                * The new node now has two internal references:
+                *  - One from new_qpznode, that initializes references at 1.
+                *  - One from attach_leaf, that increases the reference by
+                *    one at insertion in the qp-tree.
+                * We want the node to have two internal and one external
+                * reference:
+                *  - One internal reference from the qp-tree.
+                *  - One internal and one external reference from the caller.
+                *
+                * So we increase the external reference count by one.
+                */
+               qpznode_erefs_increment(node DNS__DB_FLARG_PASS);
 
                if (!nsec3) {
+                       /*
+                        * Add empty non-terminal nodes to help with wildcards.
+                        */
                        addwildcards(qpdb, qp, name, nspace);
                        if (dns_name_iswildcard(name)) {
                                wildcardmagic(qpdb, qp, name, nspace);
                        }
                }
-       }
-
-       INSIST(node->nspace == DNS_DBNAMESPACE_NSEC3 || !nsec3);
-
-       qpznode_acquire(node DNS__DB_FLARG_PASS);
 
-       if (create) {
-               dns_qp_compact(qp, DNS_QPGC_MAYBE);
-               dns_qpmulti_commit(qpdb->tree, &qp);
-       } else {
-               dns_qpread_destroy(qpdb->tree, &qpr);
+               INSIST(node->nspace == DNS_DBNAMESPACE_NSEC3 || !nsec3);
        }
+       /*
+        * ... if the lookup is unsucessful, and the caller didn't ask us
+        * to create a new node, there is nothing to do. Return the result
+        * of the lookup to the caller, and set *nodep to NULL
+        */
 
        *nodep = (dns_dbnode_t *)node;
 
-       return ISC_R_SUCCESS;
+       return result;
 }
 
 static isc_result_t
@@ -2492,8 +2544,14 @@ qpzone_findnode(dns_db_t *db, const dns_name_t *name, bool create,
 
        REQUIRE(VALID_QPZONE(qpdb));
 
-       return findnodeintree(qpdb, name, create, false,
-                             nodep DNS__DB_FLARG_PASS);
+       dns_qpread_t qpr = { 0 };
+       dns_qp_t *qp = begin_transaction(qpdb, &qpr, create);
+
+       isc_result_t result =  findnodeintree(qpdb, qp, name, create, false, nodep DNS__DB_FLARG_PASS);
+
+       end_transaction(qpdb, qp, create);
+
+       return result;
 }
 
 static isc_result_t
@@ -2503,8 +2561,14 @@ qpzone_findnsec3node(dns_db_t *db, const dns_name_t *name, bool create,
 
        REQUIRE(VALID_QPZONE(qpdb));
 
-       return findnodeintree(qpdb, name, create, true,
-                             nodep DNS__DB_FLARG_PASS);
+       dns_qpread_t qpr = { 0 };
+       dns_qp_t *qp = begin_transaction(qpdb, &qpr, create);
+
+       isc_result_t result =  findnodeintree(qpdb, qp, name, create, true, nodep DNS__DB_FLARG_PASS);
+
+       end_transaction(qpdb, qp, create);
+
+       return result;
 }
 
 static bool
@@ -4592,12 +4656,22 @@ qpzone_createiterator(dns_db_t *db, unsigned int options,
        return ISC_R_SUCCESS;
 }
 
+/*
+ * Main logic to add a new rdataset to a node.
+ *
+ * The reason this is split from qpzone_addrdataset is to allow the reuse of
+ * the same qp transaction for multiple adds.
+ *
+ * qpzone_subtractrdataset doesn't have the same problem since it cannot delete
+ * nodes, only rdatasets.
+ */
 static isc_result_t
-qpzone_addrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
+qpzone_addrdataset_inner(dns_db_t *db, dns_dbnode_t *dbnode,
                   dns_dbversion_t *dbversion,
                   isc_stdtime_t now ISC_ATTR_UNUSED, dns_rdataset_t *rdataset,
                   unsigned int options,
-                  dns_rdataset_t *addedrdataset DNS__DB_FLARG) {
+                  dns_rdataset_t *addedrdataset,
+                  dns_qp_t *nsec) {
        isc_result_t result;
        qpzonedb_t *qpdb = (qpzonedb_t *)db;
        qpznode_t *node = (qpznode_t *)dbnode;
@@ -4608,7 +4682,6 @@ qpzone_addrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
        isc_rwlock_t *nlock = NULL;
        dns_fixedname_t fn;
        dns_name_t *name = dns_fixedname_initname(&fn);
-       dns_qp_t *nsec = NULL;
 
        REQUIRE(VALID_QPZONE(qpdb));
        REQUIRE(version != NULL && version->qpdb == qpdb);
@@ -4659,12 +4732,12 @@ qpzone_addrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
                newheader->resign_lsb = rdataset->resign & 0x1;
        }
 
+
        /*
         * Add to the auxiliary NSEC tree if we're adding an NSEC record.
         */
-       if (!node->havensec && rdataset->type == dns_rdatatype_nsec) {
-               dns_qpmulti_write(qpdb->tree, &nsec);
-       }
+       bool is_nsec = !node->havensec && rdataset->type == dns_rdatatype_nsec;
+       REQUIRE(!is_nsec || nsec != NULL);
 
        /*
         * If we're adding a delegation type or adding to the auxiliary NSEC
@@ -4680,7 +4753,7 @@ qpzone_addrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
        NODE_WRLOCK(nlock, &nlocktype);
 
        result = ISC_R_SUCCESS;
-       if (nsec != NULL) {
+       if (is_nsec) {
                node->havensec = true;
 
                /*
@@ -4690,6 +4763,10 @@ qpzone_addrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
                 */
                qpznode_t *nsecnode = new_qpznode(qpdb, name,
                                                  DNS_DBNAMESPACE_NSEC);
+               /*
+                * We don't need a separate transaction since the NSEC tree and
+                * the normal tree are part of the same qp-tree.
+                */
                (void)dns_qp_insert(nsec, nsecnode, 0);
                qpznode_detach(&nsecnode);
        }
@@ -4711,6 +4788,32 @@ qpzone_addrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
 
        NODE_UNLOCK(nlock, &nlocktype);
 
+       return result;
+}
+
+static isc_result_t
+qpzone_addrdataset(dns_db_t *db, dns_dbnode_t *dbnode,
+                  dns_dbversion_t *dbversion,
+                  isc_stdtime_t now ISC_ATTR_UNUSED, dns_rdataset_t *rdataset,
+                  unsigned int options,
+                  dns_rdataset_t *addedrdataset DNS__DB_FLARG) {
+       qpzonedb_t *qpdb = (qpzonedb_t *)db;
+       qpznode_t *node = (qpznode_t *)dbnode;
+       dns_qp_t *nsec = NULL;
+
+       REQUIRE(VALID_QPZONE(qpdb));
+
+       /*
+        * Add to the auxiliary NSEC tree if we're adding an NSEC record.
+        */
+       if (!node->havensec && rdataset->type == dns_rdatatype_nsec) {
+               dns_qpmulti_write(qpdb->tree, &nsec);
+       }
+
+       isc_result_t result = qpzone_addrdataset_inner(db, dbnode, dbversion,
+                                                      now, rdataset, options,
+                                                      addedrdataset, nsec);
+
        if (nsec != NULL) {
                dns_qpmulti_commit(qpdb->tree, &nsec);
        }
@@ -5270,10 +5373,164 @@ setmaxtypepername(dns_db_t *db, uint32_t value) {
        qpdb->maxtypepername = value;
 }
 
+/*
+ * Qpzone specialization of the update function from dns_rdatacallbacks_t,
+ * meant to reuse the same qp transaction for multiple operations.
+ */
+static isc_result_t
+qpzone_update_rdataset(qpzonedb_t *qpdb, qpz_version_t *version, dns_qp_t *qp,
+                      dns_name_t *name, dns_rdataset_t *rds, dns_diffop_t op) {
+       isc_result_t result;
+       unsigned int options;
+       dns_rdataset_t ardataset;
+       dns_dbnode_t *node = NULL;
+       bool is_nsec3;
+
+       dns_rdataset_init(&ardataset);
+
+       is_nsec3 = (rds->type == dns_rdatatype_nsec3 || rds->covers == dns_rdatatype_nsec3);
+
+       result = findnodeintree(qpdb, qp, name, true, is_nsec3, &node DNS__DB_FLARG_PASS);
+
+       if (result != ISC_R_SUCCESS) {
+               goto failure;
+       }
+
+       switch (op) {
+       case DNS_DIFFOP_ADD:
+       case DNS_DIFFOP_ADDRESIGN:
+               /*
+                * See the comment on qpzone_addrdataset_inner for why we
+                * cannot use qpzone_addrdataset directly.
+                */
+               options = DNS_DBADD_MERGE | DNS_DBADD_EXACT |
+                         DNS_DBADD_EXACTTTL;
+               result = qpzone_addrdataset_inner((dns_db_t *)qpdb, node,
+                                           (dns_dbversion_t *)version, 0,
+                                           rds, options,
+                                           &ardataset, qp DNS__DB_FLARG_PASS);
+               break;
+       case DNS_DIFFOP_DEL:
+       case DNS_DIFFOP_DELRESIGN:
+               options = DNS_DBSUB_EXACT | DNS_DBSUB_WANTOLD;
+               result = qpzone_subtractrdataset((dns_db_t *)qpdb, node,
+                                                (dns_dbversion_t *)version,
+                                                rds, options,
+                                                &ardataset DNS__DB_FLARG_PASS);
+               break;
+       default:
+               UNREACHABLE();
+       }
+
+       bool is_resign = rds->type == dns_rdatatype_rrsig &&
+                   (op == DNS_DIFFOP_DELRESIGN || op == DNS_DIFFOP_ADDRESIGN);
+
+       if (result == ISC_R_SUCCESS && is_resign) {
+               isc_stdtime_t resign;
+               resign = dns_rdataset_minresign(&ardataset);
+               dns_db_setsigningtime((dns_db_t *)qpdb, &ardataset, resign);
+       }
+
+failure:
+       if (node != NULL) {
+               dns_db_detachnode(&node);
+       }
+       if (dns_rdataset_isassociated(&ardataset)) {
+               dns_rdataset_disassociate(&ardataset);
+       }
+       return result;
+}
+
+static isc_result_t
+qpzone_update_callback(void *arg, const dns_name_t *name, dns_rdataset_t *rds,
+                      dns_diffop_t op DNS__DB_FLARG) {
+       qpzone_updatectx_t *ctx = arg;
+       qpzonedb_t *qpdb = (qpzonedb_t *)ctx->base.db;
+       qpz_version_t *version = (qpz_version_t *)ctx->base.ver;
+
+       return qpzone_update_rdataset(qpdb, version, ctx->qp, (dns_name_t *)name, rds, op);
+}
+
+static isc_result_t
+qpzone_beginupdate(dns_db_t *db, dns_dbversion_t *ver, dns_rdatacallbacks_t *callbacks) {
+       qpzonedb_t *qpdb = (qpzonedb_t *)db;
+
+       REQUIRE(VALID_QPZONE(qpdb));
+       REQUIRE(ver != NULL);
+       REQUIRE(DNS_CALLBACK_VALID(callbacks));
+
+       qpzone_updatectx_t* ctx = isc_mem_get(qpdb->common.mctx, sizeof(*ctx));
+       *ctx = (qpzone_updatectx_t) {
+               .base.db = db,
+               .base.ver = ver,
+               .base.warn = true,
+               .qpr = (dns_qpread_t){ 0 },
+       };
+       ctx->qp = begin_transaction(qpdb, &ctx->qpr, true);
+
+       callbacks->update = qpzone_update_callback;
+       callbacks->add_private = ctx;
+
+       return ISC_R_SUCCESS;
+}
+
+static isc_result_t
+qpzone_commitupdate(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
+       qpzonedb_t *qpdb = (qpzonedb_t *)db;
+       qpzone_updatectx_t *ctx;
+
+       REQUIRE(VALID_QPZONE(qpdb));
+       REQUIRE(DNS_CALLBACK_VALID(callbacks));
+
+       ctx = (qpzone_updatectx_t *)callbacks->add_private;
+       if (ctx != NULL) {
+               end_transaction(qpdb, ctx->qp, true);
+
+               isc_mem_put(qpdb->common.mctx, ctx, sizeof(*ctx));
+               /*
+                * We need to allow the context to be committed or aborted
+                * multiple times, so we set the callback data to NULL
+                * to signal that nothing needs to be done with this context.
+                */
+               callbacks->add_private = NULL;
+       }
+
+       return ISC_R_SUCCESS;
+}
+
+/*
+ * For now, abortupdate needs to *commit* the results, so that closeversion
+ * cleanup might work.
+ */
+static isc_result_t
+qpzone_abortupdate(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
+       qpzonedb_t *qpdb = (qpzonedb_t *)db;
+       qpzone_updatectx_t *ctx;
+
+       REQUIRE(VALID_QPZONE(qpdb));
+       REQUIRE(DNS_CALLBACK_VALID(callbacks));
+
+       ctx = (qpzone_updatectx_t *)callbacks->add_private;
+       if (ctx != NULL) {
+               end_transaction(qpdb, ctx->qp, true);
+
+               isc_mem_put(qpdb->common.mctx, ctx, sizeof(*ctx));
+               /*
+                * See qpzone_commitupdate.
+                */
+               callbacks->add_private = NULL;
+       }
+
+       return ISC_R_SUCCESS;
+}
+
 static dns_dbmethods_t qpdb_zonemethods = {
        .destroy = qpdb_destroy,
        .beginload = beginload,
        .endload = endload,
+       .beginupdate = qpzone_beginupdate,
+       .commitupdate = qpzone_commitupdate,
+       .abortupdate = qpzone_abortupdate,
        .currentversion = currentversion,
        .newversion = newversion,
        .attachversion = attachversion,
index 1464717f0dffbfdedd99d3dcdcbb01d1d0201640..783b7d0dd1ffcd7a219f4317fe764dacfb6bbb80 100644 (file)
@@ -518,12 +518,12 @@ ixfr_apply_one(dns_xfrin_t *xfr, ixfr_apply_data_t *data) {
 
        dns_rdatacallbacks_t callbacks;
        dns_rdatacallbacks_init(&callbacks);
-       dns_db_beginupdate(xfr->db, &callbacks);
-
 
        CHECK(ixfr_begin_transaction(&xfr->ixfr));
 
-       CHECK(dns_diff_apply(&data->diff, xfr->db, xfr->ver));
+       dns_db_beginupdate(xfr->db, xfr->ver, &callbacks);
+
+       CHECK(dns_diff_apply_with_callbacks(&data->diff, &callbacks));
        if (xfr->maxrecords != 0U) {
                result = dns_db_getsize(xfr->db, xfr->ver, &records, NULL);
                if (result == ISC_R_SUCCESS && records > xfr->maxrecords) {
@@ -533,6 +533,12 @@ ixfr_apply_one(dns_xfrin_t *xfr, ixfr_apply_data_t *data) {
        if (xfr->ixfr.journal != NULL) {
                CHECK(dns_journal_writediff(xfr->ixfr.journal, &data->diff));
        }
+       /*
+        * In the current implementation, we always commit the results, since
+        * dns_zone_verifydb doesn't know how to access the in-progress
+        * transaction.
+        */
+       dns_db_commitupdate(xfr->db, &callbacks);
 
        /*
         * At the moment, rdatacallbacks doesn't offer a way to inspect the
index 1d822fed8db8405820bd1fde8df7833a641e8343..931f7017cace4d8902b436ce60e93d5899ad5a1a 100644 (file)
@@ -66,12 +66,13 @@ rawdata_callback(dns_zone_t *zone, dns_masterrawheader_t *header);
 
 static isc_result_t
 add_callback(void *arg, const dns_name_t *owner,
-            dns_rdataset_t *dataset DNS__DB_FLARG) {
+            dns_rdataset_t *dataset, dns_diffop_t op DNS__DB_FLARG) {
        char buf[BIGBUFLEN];
        isc_buffer_t target;
        isc_result_t result;
 
        UNUSED(arg);
+       UNUSED(op);
 
        isc_buffer_init(&target, buf, BIGBUFLEN);
        result = dns_rdataset_totext(dataset, owner, false, false, &target);
@@ -102,7 +103,7 @@ setup_master(void (*warn)(struct dns_rdatacallbacks *, const char *, ...),
        RETERR(dns_name_fromtext(dns_origin, &source, dns_rootname, 0));
 
        dns_rdatacallbacks_init_stdio(&callbacks);
-       callbacks.add = add_callback;
+       callbacks.update = add_callback;
        callbacks.rawdata = rawdata_callback;
        callbacks.zone = NULL;
        if (warn != NULL) {
@@ -125,7 +126,7 @@ test_master(const char *workdir, const char *testfile,
        RETERR(setup_master(warn, error));
 
        dns_rdatacallbacks_init_stdio(&callbacks);
-       callbacks.add = add_callback;
+       callbacks.update = add_callback;
        callbacks.rawdata = rawdata_callback;
        callbacks.zone = NULL;
        if (warn != NULL) {