]> git.ipfire.org Git - thirdparty/bind9.git/commitdiff
Fix the glue table in the QP and RBT zone databases
authorOndřej Surý <ondrej@isc.org>
Thu, 4 Jul 2024 08:07:18 +0000 (10:07 +0200)
committerOndřej Surý <ondrej@isc.org>
Mon, 5 Aug 2024 13:36:54 +0000 (15:36 +0200)
When adding glue to the header, we add header to the wait-free stack to
be cleaned up later which sets wfc_node->next to non-NULL value.  When
the actual cleaning happens we would only cleanup the .glue_list, but
since the database isn't locked for the time being, the headers could be
reused while cleaning the existing glue entries, which creates a data
race between database versions.

Revert the code back to use per-database-version hashtable where keys
are the node pointers.  This allows each database version to have
independent glue cache table that doesn't affect nodes or headers that
could already "belong" to the future database version.

lib/dns/db.c
lib/dns/db_p.h
lib/dns/include/dns/db.h
lib/dns/include/dns/rdataslab.h
lib/dns/qpzone.c
lib/dns/rbt-zonedb.c
lib/dns/rbtdb.c
lib/dns/rbtdb_p.h
lib/dns/rdataslab.c

index ce27fce8c1a81f99d7f5fc12a381495abe5669a1..34086f713dbd97e2a986d06cb3bc7714f5034825 100644 (file)
@@ -1125,7 +1125,9 @@ dns_db_addglue(dns_db_t *db, dns_dbversion_t *version, dns_rdataset_t *rdataset,
        REQUIRE(rdataset->type == dns_rdatatype_ns);
 
        if (db->methods->addglue != NULL) {
-               return ((db->methods->addglue)(db, version, rdataset, msg));
+               (db->methods->addglue)(db, version, rdataset, msg);
+
+               return (ISC_R_SUCCESS);
        }
 
        return (ISC_R_NOTIMPLEMENTED);
index fa6fa5485a51a54aab72c1c1719a2d04681ef355..f566f9044210b3bc6bcb4db59facc946bf625582 100644 (file)
@@ -21,6 +21,9 @@
 #include <dns/rbt.h>
 #include <dns/types.h>
 
+#define GLUETABLE_INIT_SIZE 1 << 2
+#define GLUETABLE_MIN_SIZE  1 << 8
+
 #define RDATATYPE_NCACHEANY DNS_TYPEPAIR_VALUE(0, dns_rdatatype_any)
 
 #ifdef STRONG_RWLOCK_CHECK
@@ -125,9 +128,6 @@ struct dns_glue {
        dns_rdataset_t sigrdataset_a;
        dns_rdataset_t rdataset_aaaa;
        dns_rdataset_t sigrdataset_aaaa;
-
-       isc_mem_t *mctx;
-       struct rcu_head rcu_head;
 };
 
 typedef struct {
index 254b7d38e507ca6e62cd7d8d3bef8ab687ce9011..e38f4a3acac3c8f695d350600e2b3ebcc46f65a4 100644 (file)
@@ -177,8 +177,8 @@ typedef struct dns_dbmethods {
        void (*locknode)(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t t);
        void (*unlocknode)(dns_db_t *db, dns_dbnode_t *node,
                           isc_rwlocktype_t t);
-       isc_result_t (*addglue)(dns_db_t *db, dns_dbversion_t *version,
-                               dns_rdataset_t *rdataset, dns_message_t *msg);
+       void (*addglue)(dns_db_t *db, dns_dbversion_t *version,
+                       dns_rdataset_t *rdataset, dns_message_t *msg);
        void (*expiredata)(dns_db_t *db, dns_dbnode_t *node, void *data);
        void (*deletedata)(dns_db_t *db, dns_dbnode_t *node, void *data);
        isc_result_t (*nodefullname)(dns_db_t *db, dns_dbnode_t *node,
index 4227854669fd397ea6c877c293428c9cf976c29e..4a0d58a542e7c636007d7fcf7e2ef9d73c8c7c7b 100644 (file)
@@ -132,9 +132,7 @@ struct dns_slabheader {
         */
        unsigned char upper[32];
 
-       isc_heap_t         *heap;
-       dns_glue_t         *glue_list;
-       struct cds_wfs_node wfs_node;
+       isc_heap_t *heap;
 };
 
 enum {
index 64fbfc87948f2e0f48f63c58f1967983c62c191d..27d077a65aa9403fb4647538a4161e1aa97eebf9 100644 (file)
@@ -142,7 +142,7 @@ struct qpz_version {
        uint64_t records;
        uint64_t xfrsize;
 
-       struct cds_wfs_stack glue_stack;
+       struct cds_lfht *glue_table;
 };
 
 typedef ISC_LIST(qpz_version_t) qpz_versionlist_t;
@@ -213,6 +213,17 @@ typedef struct {
        isc_stdtime_t now;
 } qpz_search_t;
 
+typedef struct dns_gluenode_t {
+       isc_mem_t *mctx;
+
+       struct dns_glue *glue;
+
+       qpznode_t *node;
+
+       struct cds_lfht_node ht_node;
+       struct rcu_head rcu_head;
+} dns_gluenode_t;
+
 /*%
  * Load Context
  */
@@ -369,61 +380,21 @@ set_index(void *what, unsigned int idx) {
 }
 
 static void
-freeglue(dns_glue_t *glue_list) {
-       if (glue_list == (void *)-1) {
-               return;
-       }
-
-       dns_glue_t *glue = glue_list;
-       while (glue != NULL) {
-               dns_glue_t *next = glue->next;
-
-               if (dns_rdataset_isassociated(&glue->rdataset_a)) {
-                       dns_rdataset_disassociate(&glue->rdataset_a);
-               }
-               if (dns_rdataset_isassociated(&glue->sigrdataset_a)) {
-                       dns_rdataset_disassociate(&glue->sigrdataset_a);
-               }
-
-               if (dns_rdataset_isassociated(&glue->rdataset_aaaa)) {
-                       dns_rdataset_disassociate(&glue->rdataset_aaaa);
-               }
-               if (dns_rdataset_isassociated(&glue->sigrdataset_aaaa)) {
-                       dns_rdataset_disassociate(&glue->sigrdataset_aaaa);
-               }
-
-               dns_rdataset_invalidate(&glue->rdataset_a);
-               dns_rdataset_invalidate(&glue->sigrdataset_a);
-               dns_rdataset_invalidate(&glue->rdataset_aaaa);
-               dns_rdataset_invalidate(&glue->sigrdataset_aaaa);
-
-               isc_mem_putanddetach(&glue->mctx, glue, sizeof(*glue));
-
-               glue = next;
-       }
-}
-
-static void
-free_gluelist_rcu(struct rcu_head *rcu_head) {
-       dns_glue_t *glue = caa_container_of(rcu_head, dns_glue_t, rcu_head);
-
-       freeglue(glue);
-}
+free_gluenode(dns_gluenode_t *gluenode);
 
 static void
-free_gluetable(struct cds_wfs_stack *glue_stack) {
-       struct cds_wfs_head *head = __cds_wfs_pop_all(glue_stack);
-       struct cds_wfs_node *node = NULL, *next = NULL;
+free_gluetable(struct cds_lfht *glue_table) {
+       struct cds_lfht_iter iter;
+       dns_gluenode_t *gluenode = NULL;
 
        rcu_read_lock();
-       cds_wfs_for_each_blocking_safe(head, node, next) {
-               dns_slabheader_t *header =
-                       caa_container_of(node, dns_slabheader_t, wfs_node);
-               dns_glue_t *glue = rcu_xchg_pointer(&header->glue_list, NULL);
-
-               call_rcu(&glue->rcu_head, free_gluelist_rcu);
+       cds_lfht_for_each_entry(glue_table, &iter, gluenode, ht_node) {
+               INSIST(!cds_lfht_del(glue_table, &gluenode->ht_node));
+               free_gluenode(gluenode);
        }
        rcu_read_unlock();
+
+       cds_lfht_destroy(glue_table, NULL);
 }
 
 static void
@@ -470,7 +441,6 @@ free_qpdb(qpzonedb_t *qpdb, bool log) {
 
        isc_refcount_destroy(&qpdb->current_version->references);
        UNLINK(qpdb->open_versions, qpdb->current_version, link);
-       cds_wfs_destroy(&qpdb->current_version->glue_stack);
        isc_rwlock_destroy(&qpdb->current_version->rwlock);
        isc_mem_put(qpdb->common.mctx, qpdb->current_version,
                    sizeof(*qpdb->current_version));
@@ -512,7 +482,7 @@ qpdb_destroy(dns_db_t *arg) {
         * node count below.
         */
        if (qpdb->current_version != NULL) {
-               free_gluetable(&qpdb->current_version->glue_stack);
+               free_gluetable(qpdb->current_version->glue_table);
        }
 
        /*
@@ -584,9 +554,11 @@ allocate_version(isc_mem_t *mctx, uint32_t serial, unsigned int references,
                .changed_list = ISC_LIST_INITIALIZER,
                .resigned_list = ISC_LIST_INITIALIZER,
                .link = ISC_LINK_INITIALIZER,
+               .glue_table = cds_lfht_new(GLUETABLE_INIT_SIZE,
+                                          GLUETABLE_MIN_SIZE, 0,
+                                          CDS_LFHT_AUTO_RESIZE, NULL),
        };
 
-       cds_wfs_init(&version->glue_stack);
        isc_rwlock_init(&version->rwlock);
        isc_refcount_init(&version->references, references);
 
@@ -1474,8 +1446,7 @@ closeversion(dns_db_t *db, dns_dbversion_t **versionp,
        if (cleanup_version != NULL) {
                isc_refcount_destroy(&cleanup_version->references);
                INSIST(EMPTY(cleanup_version->changed_list));
-               free_gluetable(&cleanup_version->glue_stack);
-               cds_wfs_destroy(&cleanup_version->glue_stack);
+               free_gluetable(cleanup_version->glue_table);
                isc_rwlock_destroy(&cleanup_version->rwlock);
                isc_mem_put(qpdb->common.mctx, cleanup_version,
                            sizeof(*cleanup_version));
@@ -4008,10 +3979,6 @@ deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED,
                RWUNLOCK(&qpdb->lock, isc_rwlocktype_write);
        }
        header->heap_index = 0;
-
-       if (header->glue_list) {
-               freeglue(header->glue_list);
-       }
 }
 
 /*
@@ -4976,7 +4943,6 @@ new_gluelist(isc_mem_t *mctx, dns_name_t *name) {
        *glue = (dns_glue_t){ 0 };
        dns_name_t *gluename = dns_fixedname_initname(&glue->fixedname);
 
-       isc_mem_attach(mctx, &glue->mctx);
        dns_name_copy(name, gluename);
 
        return (glue);
@@ -5189,11 +5155,11 @@ addglue_to_message(dns_glue_t *ge, dns_message_t *msg) {
 }
 
 static dns_glue_t *
-newglue(qpzonedb_t *qpdb, qpz_version_t *version, qpznode_t *node,
+newglue(dns_db_t *db, qpz_version_t *version, qpznode_t *node,
        dns_rdataset_t *rdataset) {
        dns_fixedname_t nodename;
        dns_glue_additionaldata_ctx_t ctx = {
-               .db = (dns_db_t *)qpdb,
+               .db = db,
                .version = (dns_dbversion_t *)version,
                .nodename = dns_fixedname_initname(&nodename),
        };
@@ -5212,59 +5178,160 @@ newglue(qpzonedb_t *qpdb, qpz_version_t *version, qpznode_t *node,
        return (ctx.glue_list);
 }
 
-static isc_result_t
+static dns_gluenode_t *
+new_gluenode(dns_db_t *db, qpz_version_t *version, qpznode_t *node,
+            dns_rdataset_t *rdataset) {
+       dns_gluenode_t *gluenode = isc_mem_get(db->mctx, sizeof(*gluenode));
+       *gluenode = (dns_gluenode_t){
+               .glue = newglue(db, version, node, rdataset),
+       };
+
+       isc_mem_attach(db->mctx, &gluenode->mctx);
+       qpznode_attach(node, &gluenode->node);
+
+       return (gluenode);
+}
+
+static void
+freeglue(isc_mem_t *mctx, dns_glue_t *glue) {
+       while (glue != NULL) {
+               dns_glue_t *next = glue->next;
+
+               if (dns_rdataset_isassociated(&glue->rdataset_a)) {
+                       dns_rdataset_disassociate(&glue->rdataset_a);
+               }
+               if (dns_rdataset_isassociated(&glue->sigrdataset_a)) {
+                       dns_rdataset_disassociate(&glue->sigrdataset_a);
+               }
+
+               if (dns_rdataset_isassociated(&glue->rdataset_aaaa)) {
+                       dns_rdataset_disassociate(&glue->rdataset_aaaa);
+               }
+               if (dns_rdataset_isassociated(&glue->sigrdataset_aaaa)) {
+                       dns_rdataset_disassociate(&glue->sigrdataset_aaaa);
+               }
+
+               dns_rdataset_invalidate(&glue->rdataset_a);
+               dns_rdataset_invalidate(&glue->sigrdataset_a);
+               dns_rdataset_invalidate(&glue->rdataset_aaaa);
+               dns_rdataset_invalidate(&glue->sigrdataset_aaaa);
+
+               isc_mem_put(mctx, glue, sizeof(*glue));
+
+               glue = next;
+       }
+}
+
+static void
+free_gluenode_rcu(struct rcu_head *rcu_head) {
+       dns_gluenode_t *gluenode = caa_container_of(rcu_head, dns_gluenode_t,
+                                                   rcu_head);
+
+       freeglue(gluenode->mctx, gluenode->glue);
+
+       qpznode_detach(&gluenode->node);
+
+       isc_mem_putanddetach(&gluenode->mctx, gluenode, sizeof(*gluenode));
+}
+
+static void
+free_gluenode(dns_gluenode_t *gluenode) {
+       call_rcu(&gluenode->rcu_head, free_gluenode_rcu);
+}
+
+static uint32_t
+qpznode_hash(const qpznode_t *node) {
+       return (isc_hash32(&node, sizeof(node), true));
+}
+
+static int
+qpznode_match(struct cds_lfht_node *ht_node, const void *key) {
+       const dns_gluenode_t *gluenode =
+               caa_container_of(ht_node, dns_gluenode_t, ht_node);
+       const qpznode_t *node = key;
+
+       return (gluenode->node == node);
+}
+
+static uint32_t
+gluenode_hash(const dns_gluenode_t *gluenode) {
+       return (qpznode_hash(gluenode->node));
+}
+
+static int
+gluenode_match(struct cds_lfht_node *ht_node, const void *key) {
+       const dns_gluenode_t *gluenode = key;
+
+       return (qpznode_match(ht_node, gluenode->node));
+}
+
+static void
 addglue(dns_db_t *db, dns_dbversion_t *dbversion, dns_rdataset_t *rdataset,
        dns_message_t *msg) {
        qpzonedb_t *qpdb = (qpzonedb_t *)db;
        qpz_version_t *version = dbversion;
        qpznode_t *node = (qpznode_t *)rdataset->slab.node;
-       dns_slabheader_t *header = dns_slabheader_fromrdataset(rdataset);
+       dns_gluenode_t *gluenode = NULL;
 
        REQUIRE(rdataset->type == dns_rdatatype_ns);
        REQUIRE(qpdb == (qpzonedb_t *)rdataset->slab.db);
        REQUIRE(qpdb == version->qpdb);
        REQUIRE(!IS_STUB(qpdb));
 
+       /*
+        * The glue table cache that forms a part of the DB version
+        * structure is not explicitly bounded and there's no cache
+        * cleaning. The zone data size itself is an implicit bound.
+        *
+        * The key into the glue hashtable is the node pointer. This is
+        * because the glue hashtable is a property of the DB version,
+        * and the glue is keyed for the ownername/NS tuple. We don't
+        * bother with using an expensive dns_name_t comparison here as
+        * the node pointer is a fixed value that won't change for a DB
+        * version and can be compared directly.
+        */
+
        rcu_read_lock();
 
-       dns_glue_t *glue = rcu_dereference(header->glue_list);
-       if (glue == NULL) {
+       struct cds_lfht_iter iter;
+       cds_lfht_lookup(version->glue_table, qpznode_hash(node), qpznode_match,
+                       node, &iter);
+
+       gluenode = cds_lfht_entry(cds_lfht_iter_get_node(&iter), dns_gluenode_t,
+                                 ht_node);
+       if (gluenode == NULL) {
                /* No cached glue was found in the table. Get new glue. */
-               glue = newglue(qpdb, version, node, rdataset);
-
-               /* Cache the glue or (void *)-1 if no glue was found. */
-               dns_glue_t *old_glue = rcu_cmpxchg_pointer(
-                       &header->glue_list, NULL, (glue) ? glue : (void *)-1);
-               if (old_glue != NULL) {
-                       /* Somebody else was faster */
-                       freeglue(glue);
-                       glue = old_glue;
-               } else if (glue != NULL) {
-                       cds_wfs_push(&version->glue_stack, &header->wfs_node);
+               gluenode = new_gluenode(db, version, node, rdataset);
+
+               struct cds_lfht_node *ht_node = cds_lfht_add_unique(
+                       version->glue_table, gluenode_hash(gluenode),
+                       gluenode_match, gluenode, &gluenode->ht_node);
+
+               if (ht_node != &gluenode->ht_node) {
+                       free_gluenode_rcu(&gluenode->rcu_head);
+
+                       gluenode = cds_lfht_entry(ht_node, dns_gluenode_t,
+                                                 ht_node);
                }
        }
 
-       /* We have a cached result. Add it to the message and return. */
+       INSIST(gluenode != NULL);
 
-       if (qpdb->gluecachestats != NULL) {
-               isc_stats_increment(
-                       qpdb->gluecachestats,
-                       (glue == (void *)-1)
-                               ? dns_gluecachestatscounter_hits_absent
-                               : dns_gluecachestatscounter_hits_present);
-       }
+       dns_glue_t *glue = gluenode->glue;
+       isc_statscounter_t counter = dns_gluecachestatscounter_hits_present;
 
-       /*
-        * (void *)-1 is a special value that means no glue is present in the
-        * zone.
-        */
-       if (glue != (void *)-1) {
+       if (glue != NULL) {
+               /* We have a cached result. Add it to the message and return. */
                addglue_to_message(glue, msg);
+       } else {
+               counter = dns_gluecachestatscounter_hits_absent;
        }
 
        rcu_read_unlock();
 
-       return (ISC_R_SUCCESS);
+       if (qpdb->gluecachestats != NULL) {
+               isc_stats_increment(qpdb->gluecachestats, counter);
+       }
 }
 
 static void
index 5e8c2132c4130228f1b767db98a792aad12d0eed..8a34f76673771b9d8cdcde33f8481c496ede9f20 100644 (file)
@@ -2092,7 +2092,6 @@ new_gluelist(isc_mem_t *mctx, dns_name_t *name) {
        *glue = (dns_glue_t){ 0 };
        dns_name_t *gluename = dns_fixedname_initname(&glue->fixedname);
 
-       isc_mem_attach(mctx, &glue->mctx);
        dns_name_copy(name, gluename);
 
        return (glue);
@@ -2306,12 +2305,12 @@ addglue_to_message(dns_glue_t *ge, dns_message_t *msg) {
 }
 
 static dns_glue_t *
-newglue(dns_rbtdb_t *rbtdb, dns_rbtdb_version_t *rbtversion,
-       dns_rbtnode_t *node, dns_rdataset_t *rdataset) {
+newglue(dns_db_t *db, dns_dbversion_t *dbversion, dns_rbtnode_t *node,
+       dns_rdataset_t *rdataset) {
        dns_fixedname_t nodename;
        dns_glue_additionaldata_ctx_t ctx = {
-               .db = (dns_db_t *)rbtdb,
-               .version = (dns_dbversion_t *)rbtversion,
+               .db = db,
+               .version = dbversion,
                .nodename = dns_fixedname_initname(&nodename),
        };
 
@@ -2321,7 +2320,7 @@ newglue(dns_rbtdb_t *rbtdb, dns_rbtdb_version_t *rbtversion,
         * determining which NS records in the delegation are
         * in-bailiwick).
         */
-       dns__rbtdb_nodefullname((dns_db_t *)rbtdb, node, ctx.nodename);
+       dns__rbtdb_nodefullname(db, node, ctx.nodename);
 
        (void)dns_rdataset_additionaldata(rdataset, dns_rootname,
                                          glue_nsdname_cb, &ctx);
@@ -2329,60 +2328,117 @@ newglue(dns_rbtdb_t *rbtdb, dns_rbtdb_version_t *rbtversion,
        return (ctx.glue_list);
 }
 
-static isc_result_t
-addglue(dns_db_t *db, dns_dbversion_t *version, dns_rdataset_t *rdataset,
+/* FIXME: Perhaps we can squash dns_gluenode_t with
+ * dns_glue_additionaldata_ctx_t */
+
+static dns_gluenode_t *
+new_gluenode(dns_db_t *db, dns_dbversion_t *dbversion, dns_rbtnode_t *node,
+            dns_rdataset_t *rdataset) {
+       dns_gluenode_t *gluenode = isc_mem_get(db->mctx, sizeof(*gluenode));
+       *gluenode = (dns_gluenode_t){
+               .glue = newglue(db, dbversion, node, rdataset),
+               .db = db,
+       };
+
+       isc_mem_attach(db->mctx, &gluenode->mctx);
+       dns_db_attachnode(db, node, (dns_dbnode_t **)&gluenode->node);
+
+       return (gluenode);
+}
+
+static uint32_t
+rbtnode_hash(const dns_rbtnode_t *node) {
+       return (isc_hash32(&node, sizeof(node), true));
+}
+
+static int
+rbtnode_match(struct cds_lfht_node *ht_node, const void *key) {
+       const dns_gluenode_t *gluenode =
+               caa_container_of(ht_node, dns_gluenode_t, ht_node);
+       const dns_rbtnode_t *node = key;
+
+       return (gluenode->node == node);
+}
+
+static uint32_t
+gluenode_hash(const dns_gluenode_t *gluenode) {
+       return (rbtnode_hash(gluenode->node));
+}
+
+static int
+gluenode_match(struct cds_lfht_node *ht_node, const void *key) {
+       const dns_gluenode_t *gluenode = key;
+
+       return (rbtnode_match(ht_node, gluenode->node));
+}
+
+static void
+addglue(dns_db_t *db, dns_dbversion_t *dbversion, dns_rdataset_t *rdataset,
        dns_message_t *msg) {
        dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
-       dns_rbtdb_version_t *rbtversion = version;
+       dns_rbtdb_version_t *version = dbversion;
        dns_rbtnode_t *node = (dns_rbtnode_t *)rdataset->slab.node;
-       dns_slabheader_t *header = dns_slabheader_fromrdataset(rdataset);
+       dns_gluenode_t *gluenode = NULL;
 
        REQUIRE(rdataset->type == dns_rdatatype_ns);
        REQUIRE(rbtdb == (dns_rbtdb_t *)rdataset->slab.db);
-       REQUIRE(rbtdb == rbtversion->rbtdb);
+       REQUIRE(rbtdb == version->rbtdb);
        REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb));
 
+       /*
+        * The glue table cache that forms a part of the DB version
+        * structure is not explicitly bounded and there's no cache
+        * cleaning. The zone data size itself is an implicit bound.
+        *
+        * The key into the glue hashtable is the node pointer. This is
+        * because the glue hashtable is a property of the DB version,
+        * and the glue is keyed for the ownername/NS tuple. We don't
+        * bother with using an expensive dns_name_t comparison here as
+        * the node pointer is a fixed value that won't change for a DB
+        * version and can be compared directly.
+        */
+
        rcu_read_lock();
 
-       dns_glue_t *glue = rcu_dereference(header->glue_list);
-       if (glue == NULL) {
+       struct cds_lfht_iter iter;
+       cds_lfht_lookup(version->glue_table, rbtnode_hash(node), rbtnode_match,
+                       node, &iter);
+
+       gluenode = cds_lfht_entry(cds_lfht_iter_get_node(&iter), dns_gluenode_t,
+                                 ht_node);
+       if (gluenode == NULL) {
                /* No cached glue was found in the table. Get new glue. */
-               glue = newglue(rbtdb, rbtversion, node, rdataset);
+               gluenode = new_gluenode(db, version, node, rdataset);
+
+               struct cds_lfht_node *ht_node = cds_lfht_add_unique(
+                       version->glue_table, gluenode_hash(gluenode),
+                       gluenode_match, gluenode, &gluenode->ht_node);
 
-               /* Cache the glue or (void *)-1 if no glue was found. */
-               dns_glue_t *old_glue = rcu_cmpxchg_pointer(
-                       &header->glue_list, NULL, (glue) ? glue : (void *)-1);
-               if (old_glue != NULL) {
-                       /* Somebody else was faster */
-                       dns__rbtdb_freeglue(glue);
-                       glue = old_glue;
-               } else if (glue != NULL) {
-                       cds_wfs_push(&rbtversion->glue_stack,
-                                    &header->wfs_node);
+               if (ht_node != &gluenode->ht_node) {
+                       dns__rbtdb_free_gluenode_rcu(&gluenode->rcu_head);
+
+                       gluenode = cds_lfht_entry(ht_node, dns_gluenode_t,
+                                                 ht_node);
                }
        }
 
-       /* We have a cached result. Add it to the message and return. */
+       INSIST(gluenode != NULL);
 
-       if (rbtdb->gluecachestats != NULL) {
-               isc_stats_increment(
-                       rbtdb->gluecachestats,
-                       (glue == (void *)-1)
-                               ? dns_gluecachestatscounter_hits_absent
-                               : dns_gluecachestatscounter_hits_present);
-       }
+       dns_glue_t *glue = gluenode->glue;
+       isc_statscounter_t counter = dns_gluecachestatscounter_hits_present;
 
-       /*
-        * (void *)-1 is a special value that means no glue is present in the
-        * zone.
-        */
-       if (glue != (void *)-1) {
+       if (glue != NULL) {
+               /* We have a cached result. Add it to the message and return. */
                addglue_to_message(glue, msg);
+       } else {
+               counter = dns_gluecachestatscounter_hits_absent;
        }
 
        rcu_read_unlock();
 
-       return (ISC_R_SUCCESS);
+       if (rbtdb->gluecachestats != NULL) {
+               isc_stats_increment(rbtdb->gluecachestats, counter);
+       }
 }
 
 dns_dbmethods_t dns__rbtdb_zonemethods = {
index 705fa98028efcb9e45f45ddc43e5d2a21559dea2..314fb3facadcad745d45377070bc68c6c00de664 100644 (file)
@@ -166,7 +166,7 @@ delete_callback(void *data, void *arg);
 static void
 prune_tree(void *arg);
 static void
-free_gluetable(dns_rbtdb_version_t *version);
+free_gluetable(struct cds_lfht *glue_table);
 
 static void
 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG);
@@ -465,7 +465,6 @@ free_rbtdb(dns_rbtdb_t *rbtdb, bool log) {
 
                isc_refcount_destroy(&rbtdb->current_version->references);
                UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
-               cds_wfs_destroy(&rbtdb->current_version->glue_stack);
                isc_rwlock_destroy(&rbtdb->current_version->rwlock);
                isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
                            sizeof(*rbtdb->current_version));
@@ -622,7 +621,7 @@ dns__rbtdb_destroy(dns_db_t *arg) {
         * node count below.
         */
        if (rbtdb->current_version != NULL) {
-               free_gluetable(rbtdb->current_version);
+               free_gluetable(rbtdb->current_version->glue_table);
        }
 
        /*
@@ -688,10 +687,12 @@ allocate_version(isc_mem_t *mctx, uint32_t serial, unsigned int references,
                .changed_list = ISC_LIST_INITIALIZER,
                .resigned_list = ISC_LIST_INITIALIZER,
                .link = ISC_LINK_INITIALIZER,
+               .glue_table = cds_lfht_new(GLUETABLE_INIT_SIZE,
+                                          GLUETABLE_MIN_SIZE, 0,
+                                          CDS_LFHT_AUTO_RESIZE, NULL),
        };
 
-       cds_wfs_init(&version->glue_stack);
-
+       isc_rwlock_init(&version->rwlock);
        isc_refcount_init(&version->references, references);
 
        return (version);
@@ -728,7 +729,6 @@ dns__rbtdb_newversion(dns_db_t *db, dns_dbversion_t **versionp) {
                version->salt_length = 0;
                memset(version->salt, 0, sizeof(version->salt));
        }
-       isc_rwlock_init(&version->rwlock);
        RWLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read);
        version->records = rbtdb->current_version->records;
        version->xfrsize = rbtdb->current_version->xfrsize;
@@ -1946,8 +1946,7 @@ dns__rbtdb_closeversion(dns_db_t *db, dns_dbversion_t **versionp,
        if (cleanup_version != NULL) {
                isc_refcount_destroy(&cleanup_version->references);
                INSIST(EMPTY(cleanup_version->changed_list));
-               free_gluetable(cleanup_version);
-               cds_wfs_destroy(&cleanup_version->glue_stack);
+               free_gluetable(cleanup_version->glue_table);
                isc_rwlock_destroy(&cleanup_version->rwlock);
                isc_mem_put(rbtdb->common.mctx, cleanup_version,
                            sizeof(*cleanup_version));
@@ -4071,7 +4070,6 @@ dns__rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
         */
        rbtdb->current_version = allocate_version(mctx, 1, 1, false);
        rbtdb->current_version->rbtdb = rbtdb;
-       isc_rwlock_init(&rbtdb->current_version->rwlock);
 
        /*
         * Keep the current version in the open list so that list operation
@@ -4859,13 +4857,8 @@ dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
        return (ISC_R_SUCCESS);
 }
 
-void
-dns__rbtdb_freeglue(dns_glue_t *glue_list) {
-       if (glue_list == (void *)-1) {
-               return;
-       }
-
-       dns_glue_t *glue = glue_list;
+static void
+freeglue(isc_mem_t *mctx, dns_glue_t *glue) {
        while (glue != NULL) {
                dns_glue_t *next = glue->next;
 
@@ -4888,33 +4881,42 @@ dns__rbtdb_freeglue(dns_glue_t *glue_list) {
                dns_rdataset_invalidate(&glue->rdataset_aaaa);
                dns_rdataset_invalidate(&glue->sigrdataset_aaaa);
 
-               isc_mem_putanddetach(&glue->mctx, glue, sizeof(*glue));
+               isc_mem_put(mctx, glue, sizeof(*glue));
 
                glue = next;
        }
 }
 
-static void
-free_gluelist_rcu(struct rcu_head *rcu_head) {
-       dns_glue_t *glue = caa_container_of(rcu_head, dns_glue_t, rcu_head);
+void
+dns__rbtdb_free_gluenode_rcu(struct rcu_head *rcu_head) {
+       dns_gluenode_t *gluenode = caa_container_of(rcu_head, dns_gluenode_t,
+                                                   rcu_head);
 
-       dns__rbtdb_freeglue(glue);
+       freeglue(gluenode->mctx, gluenode->glue);
+
+       dns_db_detachnode(gluenode->db, (dns_dbnode_t **)&gluenode->node);
+
+       isc_mem_putanddetach(&gluenode->mctx, gluenode, sizeof(*gluenode));
+}
+
+void
+dns__rbtdb_free_gluenode(dns_gluenode_t *gluenode) {
+       call_rcu(&gluenode->rcu_head, dns__rbtdb_free_gluenode_rcu);
 }
 
 static void
-free_gluetable(dns_rbtdb_version_t *rbtversion) {
-       struct cds_wfs_head *head = __cds_wfs_pop_all(&rbtversion->glue_stack);
-       struct cds_wfs_node *node = NULL, *next = NULL;
+free_gluetable(struct cds_lfht *glue_table) {
+       struct cds_lfht_iter iter;
+       dns_gluenode_t *gluenode = NULL;
 
        rcu_read_lock();
-       cds_wfs_for_each_blocking_safe(head, node, next) {
-               dns_slabheader_t *header =
-                       caa_container_of(node, dns_slabheader_t, wfs_node);
-               dns_glue_t *glue = rcu_xchg_pointer(&header->glue_list, NULL);
-
-               call_rcu(&glue->rcu_head, free_gluelist_rcu);
+       cds_lfht_for_each_entry(glue_table, &iter, gluenode, ht_node) {
+               INSIST(!cds_lfht_del(glue_table, &gluenode->ht_node));
+               dns__rbtdb_free_gluenode(gluenode);
        }
        rcu_read_unlock();
+
+       cds_lfht_destroy(glue_table, NULL);
 }
 
 void
@@ -4944,10 +4946,6 @@ dns__rbtdb_deletedata(dns_db_t *db ISC_ATTR_UNUSED,
                if (header->closest != NULL) {
                        dns_slabheader_freeproof(db->mctx, &header->closest);
                }
-       } else {
-               if (header->glue_list) {
-                       dns__rbtdb_freeglue(header->glue_list);
-               }
        }
 }
 
index b28863eac7db36b474540af018a354903e70cebb..6552ee91439944adb03ac410a471a64c7ce7ef91 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <isc/heap.h>
 #include <isc/lang.h>
+#include <isc/rwlock.h>
 #include <isc/urcu.h>
 
 #include <dns/nsec3.h>
@@ -90,7 +91,7 @@ struct dns_rbtdb_version {
        uint64_t records;
        uint64_t xfrsize;
 
-       struct cds_wfs_stack glue_stack;
+       struct cds_lfht *glue_table;
 };
 
 typedef ISC_LIST(dns_rbtdb_version_t) rbtdb_versionlist_t;
@@ -213,6 +214,18 @@ typedef struct {
 extern dns_dbmethods_t dns__rbtdb_zonemethods;
 extern dns_dbmethods_t dns__rbtdb_cachemethods;
 
+typedef struct dns_gluenode_t {
+       isc_mem_t *mctx;
+
+       struct dns_glue *glue;
+
+       dns_db_t *db;
+       dns_rbtnode_t *node;
+
+       struct cds_lfht_node ht_node;
+       struct rcu_head rcu_head;
+} dns_gluenode_t;
+
 /*
  * Common DB implementation methods shared by both cache and zone RBT
  * databases:
@@ -370,7 +383,9 @@ isc_result_t
 dns__rbtdb_nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name);
 
 void
-dns__rbtdb_freeglue(dns_glue_t *glue_list);
+dns__rbtdb_free_gluenode_rcu(struct rcu_head *rcu_head);
+void
+dns__rbtdb_free_gluenode(dns_gluenode_t *gluenode);
 
 void
 dns__rbtdb_newref(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
index abbb902317d76f94537c253672497514d9e30419..67f51018063cd1fe9af45b844f1db71a2791218c 100644 (file)
@@ -1082,15 +1082,12 @@ dns_slabheader_reset(dns_slabheader_t *h, dns_db_t *db, dns_dbnode_t *node) {
        ISC_LINK_INIT(h, link);
        h->heap_index = 0;
        h->heap = NULL;
-       h->glue_list = NULL;
        h->db = db;
        h->node = node;
 
        atomic_init(&h->attributes, 0);
        atomic_init(&h->last_refresh_fail_ts, 0);
 
-       cds_wfs_node_init(&h->wfs_node);
-
        STATIC_ASSERT((sizeof(h->attributes) == 2),
                      "The .attributes field of dns_slabheader_t needs to be "
                      "16-bit int type exactly.");