]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Rewrite the GLUE cache in QP zone database
authorOndřej Surý <ondrej@isc.org>
Thu, 5 Dec 2024 12:45:24 +0000 (13:45 +0100)
committerOndřej Surý <ondrej@isc.org>
Fri, 13 Dec 2024 20:48:11 +0000 (21:48 +0100)
This is a second attempt to rewrite the GLUE cache to not use per
database version hash table.  Instead of keeping a hash table indexed by
the node, use a directly linked list of GLUE records for each
slabheader.  This was attempted before, but there was a data race caused
by the fact that the thread cleaning the GLUE records could be slower
than accessing the slab headers again and reinitializing the wait-free
stack.

The improved design builds on the previous design, but adds a new
dns_gluelist structure that has a pointer to the database version.

If a dns_gluelist belonging to a different (old) version is detected, it
is just detached from the slabheader and left for the closeversion() to
clean it up later.

lib/dns/db_p.h
lib/dns/include/dns/rdataslab.h
lib/dns/include/dns/types.h
lib/dns/qpzone.c
lib/dns/rdataslab.c

index 1101a9ac3a7278d316dce5b879b9a81b8bc0dfaf..bfad169252ffb0299f52964d5dd3530b191e690f 100644 (file)
@@ -119,21 +119,31 @@ ISC_LANG_BEGINDECLS
 
 struct dns_glue {
        struct dns_glue *next;
-       dns_fixedname_t fixedname;
+       dns_name_t name;
        dns_rdataset_t rdataset_a;
        dns_rdataset_t sigrdataset_a;
        dns_rdataset_t rdataset_aaaa;
        dns_rdataset_t sigrdataset_aaaa;
+};
 
+struct dns_gluelist {
        isc_mem_t *mctx;
+
+       const dns_dbversion_t *version;
+       dns_slabheader_t *header;
+
+       struct dns_glue *glue;
+
        struct rcu_head rcu_head;
+       struct cds_wfs_node wfs_node;
 };
 
-typedef struct {
-       dns_glue_t *glue_list;
+typedef struct dns_glue_additionaldata_ctx {
        dns_db_t *db;
        dns_dbversion_t *version;
-       dns_name_t *nodename;
+       dns_dbnode_t *node;
+
+       dns_glue_t *glue;
 } dns_glue_additionaldata_ctx_t;
 
 typedef struct {
index 1a46d12d0f6e09250f5ad1422b9022e0f66fc34a..3061f0ece45a33441cdf74dc048725d7ba962c41 100644 (file)
@@ -132,9 +132,9 @@ struct dns_slabheader {
         */
        unsigned char upper[32];
 
-       isc_heap_t         *heap;
-       dns_glue_t         *glue_list;
-       struct cds_wfs_node wfs_node;
+       isc_heap_t *heap;
+
+       dns_gluelist_t *gluelist;
 };
 
 enum {
index fbb06ec4a383ac410bef8564240f732bd6c8fdc7..bc04362c23969638b4ea2c126d63fe816c8f2cef 100644 (file)
@@ -95,6 +95,7 @@ typedef struct dns_forwarder     dns_forwarder_t;
 typedef struct dns_fwdtable       dns_fwdtable_t;
 typedef struct dns_geoip_databases dns_geoip_databases_t;
 typedef struct dns_glue                   dns_glue_t;
+typedef struct dns_gluelist       dns_gluelist_t;
 typedef struct dns_iptable        dns_iptable_t;
 typedef uint32_t                  dns_iterations_t;
 typedef struct dns_kasp                   dns_kasp_t;
index 006e7e300cfda0406fab6f83933f8607b3e8a43f..481961d2f4c7af71fc5dd4c719fd2459b949b96f 100644 (file)
@@ -369,12 +369,7 @@ set_index(void *what, unsigned int idx) {
 }
 
 static void
-freeglue(dns_glue_t *glue_list) {
-       if (glue_list == (void *)-1) {
-               return;
-       }
-
-       dns_glue_t *glue = glue_list;
+free_glue(isc_mem_t *mctx, dns_glue_t *glue) {
        while (glue != NULL) {
                dns_glue_t *next = glue->next;
 
@@ -397,31 +392,49 @@ freeglue(dns_glue_t *glue_list) {
                dns_rdataset_invalidate(&glue->rdataset_aaaa);
                dns_rdataset_invalidate(&glue->sigrdataset_aaaa);
 
-               isc_mem_putanddetach(&glue->mctx, glue, sizeof(*glue));
+               dns_name_free(&glue->name, mctx);
+
+               isc_mem_put(mctx, glue, sizeof(*glue));
 
                glue = next;
        }
 }
 
 static void
-free_gluelist_rcu(struct rcu_head *rcu_head) {
-       dns_glue_t *glue = caa_container_of(rcu_head, dns_glue_t, rcu_head);
+destroy_gluelist(dns_gluelist_t **gluelistp) {
+       REQUIRE(gluelistp != NULL);
+       if (*gluelistp == NULL) {
+               return;
+       }
+
+       dns_gluelist_t *gluelist = *gluelistp;
+
+       free_glue(gluelist->mctx, gluelist->glue);
 
-       freeglue(glue);
+       isc_mem_putanddetach(&gluelist->mctx, gluelist, sizeof(*gluelist));
+}
+
+static void
+free_gluelist_rcu(struct rcu_head *rcu_head) {
+       dns_gluelist_t *gluelist = caa_container_of(rcu_head, dns_gluelist_t,
+                                                   rcu_head);
+       destroy_gluelist(&gluelist);
 }
 
 static void
-free_gluetable(struct cds_wfs_stack *glue_stack) {
+cleanup_gluelists(struct cds_wfs_stack *glue_stack) {
        struct cds_wfs_head *head = __cds_wfs_pop_all(glue_stack);
        struct cds_wfs_node *node = NULL, *next = NULL;
 
        rcu_read_lock();
        cds_wfs_for_each_blocking_safe(head, node, next) {
-               dns_slabheader_t *header =
-                       caa_container_of(node, dns_slabheader_t, wfs_node);
-               dns_glue_t *glue = rcu_xchg_pointer(&header->glue_list, NULL);
+               dns_gluelist_t *gluelist =
+                       caa_container_of(node, dns_gluelist_t, wfs_node);
+               dns_slabheader_t *header = rcu_xchg_pointer(&gluelist->header,
+                                                           NULL);
+               (void)rcu_cmpxchg_pointer(&header->gluelist, gluelist, NULL);
 
-               call_rcu(&glue->rcu_head, free_gluelist_rcu);
+               call_rcu(&gluelist->rcu_head, free_gluelist_rcu);
        }
        rcu_read_unlock();
 }
@@ -511,7 +524,7 @@ qpdb_destroy(dns_db_t *arg) {
         * node count below.
         */
        if (qpdb->current_version != NULL) {
-               free_gluetable(&qpdb->current_version->glue_stack);
+               cleanup_gluelists(&qpdb->current_version->glue_stack);
        }
 
        /*
@@ -1474,7 +1487,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp,
        if (cleanup_version != NULL) {
                isc_refcount_destroy(&cleanup_version->references);
                INSIST(EMPTY(cleanup_version->changed_list));
-               free_gluetable(&cleanup_version->glue_stack);
+               cleanup_gluelists(&cleanup_version->glue_stack);
                cds_wfs_destroy(&cleanup_version->glue_stack);
                isc_rwlock_destroy(&cleanup_version->rwlock);
                isc_mem_put(qpdb->common.mctx, cleanup_version,
@@ -4017,10 +4030,6 @@ deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED,
                RWUNLOCK(&qpdb->lock, isc_rwlocktype_write);
        }
        header->heap_index = 0;
-
-       if (header->glue_list) {
-               freeglue(header->glue_list);
-       }
 }
 
 /*
@@ -4986,25 +4995,42 @@ nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) {
 }
 
 static dns_glue_t *
-new_gluelist(isc_mem_t *mctx, dns_name_t *name) {
+new_glue(isc_mem_t *mctx, const dns_name_t *name) {
        dns_glue_t *glue = isc_mem_get(mctx, sizeof(*glue));
-       *glue = (dns_glue_t){ 0 };
-       dns_name_t *gluename = dns_fixedname_initname(&glue->fixedname);
+       *glue = (dns_glue_t){
+               .name = DNS_NAME_INITEMPTY,
+       };
 
-       isc_mem_attach(mctx, &glue->mctx);
-       dns_name_copy(name, gluename);
+       dns_name_dup(name, mctx, &glue->name);
 
        return glue;
 }
 
+static dns_gluelist_t *
+new_gluelist(dns_db_t *db, dns_slabheader_t *header,
+            const dns_dbversion_t *dbversion) {
+       dns_gluelist_t *gluelist = isc_mem_get(db->mctx, sizeof(*gluelist));
+       *gluelist = (dns_gluelist_t){
+               .version = dbversion,
+               .header = header,
+       };
+
+       isc_mem_attach(db->mctx, &gluelist->mctx);
+
+       cds_wfs_node_init(&gluelist->wfs_node);
+
+       return gluelist;
+}
+
 static isc_result_t
 glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
-               dns_rdataset_t *unused DNS__DB_FLARG) {
+               dns_rdataset_t *rdataset ISC_ATTR_UNUSED DNS__DB_FLARG) {
        dns_glue_additionaldata_ctx_t *ctx = NULL;
        isc_result_t result;
        dns_fixedname_t fixedname_a;
        dns_name_t *name_a = NULL;
        dns_rdataset_t rdataset_a, sigrdataset_a;
+       const qpznode_t *node = NULL;
        qpznode_t *node_a = NULL;
        dns_fixedname_t fixedname_aaaa;
        dns_name_t *name_aaaa = NULL;
@@ -5012,8 +5038,6 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
        qpznode_t *node_aaaa = NULL;
        dns_glue_t *glue = NULL;
 
-       UNUSED(unused);
-
        /*
         * NS records want addresses in additional records.
         */
@@ -5021,6 +5045,8 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
 
        ctx = (dns_glue_additionaldata_ctx_t *)arg;
 
+       node = (qpznode_t *)ctx->node;
+
        name_a = dns_fixedname_initname(&fixedname_a);
        dns_rdataset_init(&rdataset_a);
        dns_rdataset_init(&sigrdataset_a);
@@ -5033,7 +5059,7 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
                      DNS_DBFIND_GLUEOK, 0, (dns_dbnode_t **)&node_a, name_a,
                      &rdataset_a, &sigrdataset_a DNS__DB_FLARG_PASS);
        if (result == DNS_R_GLUE) {
-               glue = new_gluelist(ctx->db->mctx, name_a);
+               glue = new_glue(ctx->db->mctx, name_a);
 
                dns_rdataset_init(&glue->rdataset_a);
                dns_rdataset_init(&glue->sigrdataset_a);
@@ -5053,7 +5079,7 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
                      &sigrdataset_aaaa DNS__DB_FLARG_PASS);
        if (result == DNS_R_GLUE) {
                if (glue == NULL) {
-                       glue = new_gluelist(ctx->db->mctx, name_aaaa);
+                       glue = new_glue(ctx->db->mctx, name_aaaa);
 
                        dns_rdataset_init(&glue->rdataset_a);
                        dns_rdataset_init(&glue->sigrdataset_a);
@@ -5079,7 +5105,7 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
         * attributes for the first rdataset associated with the first name
         * added to the ADDITIONAL section.
         */
-       if (glue != NULL && dns_name_issubdomain(name, ctx->nodename)) {
+       if (glue != NULL && dns_name_issubdomain(name, &node->name)) {
                if (dns_rdataset_isassociated(&glue->rdataset_a)) {
                        glue->rdataset_a.attributes |=
                                DNS_RDATASETATTR_REQUIRED;
@@ -5091,8 +5117,8 @@ glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype,
        }
 
        if (glue != NULL) {
-               glue->next = ctx->glue_list;
-               ctx->glue_list = glue;
+               glue->next = ctx->glue;
+               ctx->glue = glue;
        }
 
        result = ISC_R_SUCCESS;
@@ -5134,12 +5160,11 @@ addglue_to_message(dns_glue_t *ge, dns_message_t *msg) {
                dns_rdataset_t *sigrdataset_a = NULL;
                dns_rdataset_t *rdataset_aaaa = NULL;
                dns_rdataset_t *sigrdataset_aaaa = NULL;
-               dns_name_t *gluename = dns_fixedname_name(&ge->fixedname);
                bool prepend_name = false;
 
                dns_message_gettempname(msg, &name);
 
-               dns_name_copy(gluename, name);
+               dns_name_copy(&ge->name, name);
 
                if (dns_rdataset_isassociated(&ge->rdataset_a)) {
                        dns_message_gettemprdataset(msg, &rdataset_a);
@@ -5204,15 +5229,16 @@ addglue_to_message(dns_glue_t *ge, dns_message_t *msg) {
        }
 }
 
-static dns_glue_t *
-newglue(qpzonedb_t *qpdb, qpz_version_t *version, qpznode_t *node,
-       dns_rdataset_t *rdataset) {
-       dns_fixedname_t nodename;
+static dns_gluelist_t *
+create_gluelist(qpzonedb_t *qpdb, qpz_version_t *version, qpznode_t *node,
+               dns_rdataset_t *rdataset) {
+       dns_slabheader_t *header = dns_slabheader_fromrdataset(rdataset);
        dns_glue_additionaldata_ctx_t ctx = {
                .db = (dns_db_t *)qpdb,
                .version = (dns_dbversion_t *)version,
-               .nodename = dns_fixedname_initname(&nodename),
+               .node = (dns_dbnode_t *)node,
        };
+       dns_gluelist_t *gluelist = new_gluelist(ctx.db, header, ctx.version);
 
        /*
         * Get the owner name of the NS RRset - it will be necessary for
@@ -5220,12 +5246,13 @@ newglue(qpzonedb_t *qpdb, qpz_version_t *version, qpznode_t *node,
         * determining which NS records in the delegation are
         * in-bailiwick).
         */
-       dns_name_copy(&node->name, ctx.nodename);
 
        (void)dns_rdataset_additionaldata(rdataset, dns_rootname,
                                          glue_nsdname_cb, &ctx);
 
-       return ctx.glue_list;
+       gluelist->glue = ctx.glue;
+
+       return gluelist;
 }
 
 static void
@@ -5235,6 +5262,8 @@ addglue(dns_db_t *db, dns_dbversion_t *dbversion, dns_rdataset_t *rdataset,
        qpz_version_t *version = (qpz_version_t *)dbversion;
        qpznode_t *node = (qpznode_t *)rdataset->slab.node;
        dns_slabheader_t *header = dns_slabheader_fromrdataset(rdataset);
+       dns_glue_t *glue = NULL;
+       isc_statscounter_t counter = dns_gluecachestatscounter_hits_absent;
 
        REQUIRE(rdataset->type == dns_rdatatype_ns);
        REQUIRE(qpdb == (qpzonedb_t *)rdataset->slab.db);
@@ -5243,42 +5272,48 @@ addglue(dns_db_t *db, dns_dbversion_t *dbversion, dns_rdataset_t *rdataset,
 
        rcu_read_lock();
 
-       dns_glue_t *glue = rcu_dereference(header->glue_list);
-       if (glue == NULL) {
-               /* No cached glue was found in the table. Get new glue. */
-               glue = newglue(qpdb, version, node, rdataset);
-
-               /* Cache the glue or (void *)-1 if no glue was found. */
-               dns_glue_t *old_glue = rcu_cmpxchg_pointer(
-                       &header->glue_list, NULL, (glue) ? glue : (void *)-1);
-               if (old_glue != NULL) {
-                       /* Somebody else was faster */
-                       freeglue(glue);
-                       glue = old_glue;
-               } else if (glue != NULL) {
-                       cds_wfs_push(&version->glue_stack, &header->wfs_node);
-               }
-       }
+       dns_gluelist_t *gluelist = rcu_dereference(header->gluelist);
+       if (gluelist == NULL || gluelist->version != dbversion) {
+               /* No or old glue list was found in the table. */
 
-       /* We have a cached result. Add it to the message and return. */
+               dns_gluelist_t *xchg_gluelist = gluelist;
+               dns_gluelist_t *old_gluelist = (void *)-1;
+               dns_gluelist_t *new_gluelist = create_gluelist(qpdb, version,
+                                                              node, rdataset);
 
-       if (qpdb->gluecachestats != NULL) {
-               isc_stats_increment(
-                       qpdb->gluecachestats,
-                       (glue == (void *)-1)
-                               ? dns_gluecachestatscounter_hits_absent
-                               : dns_gluecachestatscounter_hits_present);
+               while (old_gluelist != xchg_gluelist &&
+                      (xchg_gluelist == NULL ||
+                       xchg_gluelist->version != dbversion))
+               {
+                       old_gluelist = xchg_gluelist;
+                       xchg_gluelist = rcu_cmpxchg_pointer(
+                               &header->gluelist, old_gluelist, new_gluelist);
+               }
+
+               if (old_gluelist == xchg_gluelist) {
+                       /* CAS was successful */
+                       cds_wfs_push(&version->glue_stack,
+                                    &new_gluelist->wfs_node);
+                       gluelist = new_gluelist;
+               } else {
+                       destroy_gluelist(&new_gluelist);
+                       gluelist = xchg_gluelist;
+               }
        }
 
-       /*
-        * (void *)-1 is a special value that means no glue is present in the
-        * zone.
-        */
-       if (glue != (void *)-1) {
+       glue = gluelist->glue;
+
+       if (glue != NULL) {
                addglue_to_message(glue, msg);
+               counter = dns_gluecachestatscounter_hits_present;
        }
 
        rcu_read_unlock();
+
+       /* We have a cached result. Add it to the message and return. */
+       if (qpdb->gluecachestats != NULL) {
+               isc_stats_increment(qpdb->gluecachestats, counter);
+       }
 }
 
 static void
index bdc2a9e67f43652e07397675e5669e30d6289e24..c5e0ffbf9143a2616e7b1c8eb37ee9a073521d86 100644 (file)
@@ -825,15 +825,12 @@ dns_slabheader_reset(dns_slabheader_t *h, dns_db_t *db, dns_dbnode_t *node) {
        ISC_LINK_INIT(h, link);
        h->heap_index = 0;
        h->heap = NULL;
-       h->glue_list = NULL;
        h->db = db;
        h->node = node;
 
        atomic_init(&h->attributes, 0);
        atomic_init(&h->last_refresh_fail_ts, 0);
 
-       cds_wfs_node_init(&h->wfs_node);
-
        STATIC_ASSERT((sizeof(h->attributes) == 2),
                      "The .attributes field of dns_slabheader_t needs to be "
                      "16-bit int type exactly.");