From f187c948c4ade3e4e6a1c7afcfacbe0cb4359831 Mon Sep 17 00:00:00 2001 From: Evan Hunt Date: Thu, 3 Jun 2021 19:35:01 -0700 Subject: [PATCH] WIP: non-blocking zone lookup - add a DNS_DBFIND_NONBLOCK option, which causes dns_db_find/findext() to return ISC_R_LOCKBUSY if unable to acquire a read lock on the database. - use the asynchronous hook mechanism to postpone query_lookup() if this occurs. --- lib/dns/include/dns/db.h | 7 +++ lib/dns/rbtdb.c | 119 ++++++++++++++++++--------------------- lib/ns/query.c | 68 ++++++++++++++++++++-- 3 files changed, 125 insertions(+), 69 deletions(-) diff --git a/lib/dns/include/dns/db.h b/lib/dns/include/dns/db.h index 8664cf14582..3be368626ba 100644 --- a/lib/dns/include/dns/db.h +++ b/lib/dns/include/dns/db.h @@ -271,6 +271,13 @@ struct dns_dbonupdatelistener { * window. */ #define DNS_DBFIND_STALESTART 0x2000 + +/* + * DNS_DBFIND_NONBLOCK: If set, then if a call to dns_db_find() would block + * while waiting for a read lock, the implementation may return ISC_R_LOCKBUSY + * so the lookup can be rescheduled for later. + */ +#define DNS_DBFIND_NONBLOCK 0x4000 /*@}*/ /*@{*/ diff --git a/lib/dns/rbtdb.c b/lib/dns/rbtdb.c index 4d37a36ba32..5d4344c5f3e 100644 --- a/lib/dns/rbtdb.c +++ b/lib/dns/rbtdb.c @@ -3996,33 +3996,43 @@ zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, dns_dbnode_t **nodep, dns_name_t *foundname, dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) { - dns_rbtnode_t *node = NULL; isc_result_t result; + dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; + rbtdb_version_t *rbtversion = NULL; + dns_rbtnode_t *node = NULL; rbtdb_search_t search; bool cname_ok = true; bool close_version = false; bool maybe_zonecut = false; bool at_zonecut = false; - bool wild; - bool empty_node; - rdatasetheader_t *header, *header_next, *found, *nsecheader; - rdatasetheader_t *foundsig, *cnamesig, *nsecsig; + bool wild = false; + bool empty_node = true; + bool active = false; + rdatasetheader_t *header = NULL, *header_next = NULL; + rdatasetheader_t *found = NULL, *nsecheader = NULL; + rdatasetheader_t *foundsig = NULL, *cnamesig = NULL, *nsecsig = NULL; rbtdb_rdatatype_t sigtype; - bool active; - nodelock_t *lock; - dns_rbt_t *tree; - - search.rbtdb = (dns_rbtdb_t *)db; + nodelock_t *lock = NULL; + dns_rbt_t *tree = NULL; - REQUIRE(VALID_RBTDB(search.rbtdb)); - INSIST(version == NULL || - ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db); + REQUIRE(VALID_RBTDB(rbtdb)); + INSIST(version == NULL || ((rbtdb_version_t *)version)->rbtdb == rbtdb); /* * We don't care about 'now'. */ UNUSED(now); + if ((options & DNS_DBFIND_NONBLOCK) != 0) { + result = isc_rwlock_trylock(&rbtdb->tree_lock, + isc_rwlocktype_read); + if (result != ISC_R_SUCCESS) { + return (result); + } + } else { + RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); + } + /* * If the caller didn't supply a version, attach to the current * version. @@ -4032,31 +4042,21 @@ zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, close_version = true; } - search.rbtversion = version; - search.serial = search.rbtversion->serial; - search.options = options; - search.copy_name = false; - search.need_cleanup = false; - search.wild = false; - search.zonecut = NULL; - dns_fixedname_init(&search.zonecut_name); + rbtversion = (rbtdb_version_t *)version; + search = (rbtdb_search_t){ .rbtdb = rbtdb, + .options = options, + .rbtversion = rbtversion, + .serial = rbtversion->serial }; dns_rbtnodechain_init(&search.chain); - search.now = 0; - - /* - * 'wild' will be true iff. we've matched a wildcard. - */ - wild = false; - - RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); + dns_fixedname_init(&search.zonecut_name); /* * Search down from the root of the tree. If, while going down, we * encounter a callback node, zone_zonecut_callback() will search the * rdatasets at the zone cut for active DNAME or NS rdatasets. */ - tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 - : search.rbtdb->tree; + tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? rbtdb->nsec3 + : rbtdb->tree; result = dns_rbt_findnode(tree, name, foundname, &node, &search.chain, DNS_RBTFIND_EMPTYDATA, zone_zonecut_callback, &search); @@ -4086,7 +4086,6 @@ zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, } } - active = false; if ((options & DNS_DBFIND_FORCENSEC3) == 0) { /* * The NSEC3 tree won't have empty nodes, @@ -4100,14 +4099,14 @@ zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, * If we're here, then the name does not exist, is not * beneath a zonecut, and there's no matching wildcard. */ - if ((search.rbtversion->secure == dns_db_secure && - !search.rbtversion->havensec3) || + if ((rbtversion->secure == dns_db_secure && + !rbtversion->havensec3) || (search.options & DNS_DBFIND_FORCENSEC) != 0 || (search.options & DNS_DBFIND_FORCENSEC3) != 0) { result = find_closest_nsec(&search, nodep, foundname, rdataset, sigrdataset, tree, - search.rbtversion->secure); + rbtversion->secure); if (result == ISC_R_SUCCESS) { result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN; @@ -4144,9 +4143,8 @@ found: * we always return a referral. */ if (node->find_callback && - ((node != search.rbtdb->origin_node && - !dns_rdatatype_atparent(type)) || - IS_STUB(search.rbtdb))) + ((node != rbtdb->origin_node && + !dns_rdatatype_atparent(type)) || IS_STUB(rbtdb))) { maybe_zonecut = true; } @@ -4167,16 +4165,10 @@ found: * We now go looking for rdata... */ - lock = &search.rbtdb->node_locks[node->locknum].lock; + lock = &rbtdb->node_locks[node->locknum].lock; NODE_LOCK(lock, isc_rwlocktype_read); - found = NULL; - foundsig = NULL; sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type); - nsecheader = NULL; - nsecsig = NULL; - cnamesig = NULL; - empty_node = true; for (header = node->data; header != NULL; header = header_next) { header_next = header->next; /* @@ -4213,8 +4205,7 @@ found: * ensure that search->zonecut_rdataset will * still be valid later. */ - new_reference(search.rbtdb, node, - isc_rwlocktype_read); + new_reference(rbtdb, node, isc_rwlocktype_read); search.zonecut = node; search.zonecut_rdataset = header; search.zonecut_sigrdataset = NULL; @@ -4300,7 +4291,7 @@ found: break; } } else if (header->type == dns_rdatatype_nsec && - !search.rbtversion->havensec3) { + !rbtversion->havensec3) { /* * Remember a NSEC rdataset even if we're * not specifically looking for it, because @@ -4308,7 +4299,7 @@ found: */ nsecheader = header; } else if (header->type == RBTDB_RDATATYPE_SIGNSEC && - !search.rbtversion->havensec3) + !rbtversion->havensec3) { /* * If we need the NSEC rdataset, we'll also @@ -4359,8 +4350,8 @@ found: * The desired type doesn't exist. */ result = DNS_R_NXRRSET; - if (search.rbtversion->secure == dns_db_secure && - !search.rbtversion->havensec3 && + if (rbtversion->secure == dns_db_secure && + !rbtversion->havensec3 && (nsecheader == NULL || nsecsig == NULL)) { /* @@ -4375,8 +4366,8 @@ found: NODE_UNLOCK(lock, isc_rwlocktype_read); result = find_closest_nsec(&search, nodep, foundname, rdataset, sigrdataset, - search.rbtdb->tree, - search.rbtversion->secure); + rbtdb->tree, + rbtversion->secure); if (result == ISC_R_SUCCESS) { result = DNS_R_EMPTYWILD; } @@ -4392,17 +4383,17 @@ found: goto node_exit; } if (nodep != NULL) { - new_reference(search.rbtdb, node, isc_rwlocktype_read); + new_reference(rbtdb, node, isc_rwlocktype_read); *nodep = node; } - if ((search.rbtversion->secure == dns_db_secure && - !search.rbtversion->havensec3) || + if ((rbtversion->secure == dns_db_secure && + !rbtversion->havensec3) || (search.options & DNS_DBFIND_FORCENSEC) != 0) { - bind_rdataset(search.rbtdb, node, nsecheader, 0, + bind_rdataset(rbtdb, node, nsecheader, 0, isc_rwlocktype_read, rdataset); if (nsecsig != NULL) { - bind_rdataset(search.rbtdb, node, nsecsig, 0, + bind_rdataset(rbtdb, node, nsecsig, 0, isc_rwlocktype_read, sigrdataset); } } @@ -4476,7 +4467,7 @@ found: if (nodep != NULL) { if (!at_zonecut) { - new_reference(search.rbtdb, node, isc_rwlocktype_read); + new_reference(rbtdb, node, isc_rwlocktype_read); } else { search.need_cleanup = false; } @@ -4484,10 +4475,10 @@ found: } if (type != dns_rdatatype_any) { - bind_rdataset(search.rbtdb, node, found, 0, isc_rwlocktype_read, + bind_rdataset(rbtdb, node, found, 0, isc_rwlocktype_read, rdataset); if (foundsig != NULL) { - bind_rdataset(search.rbtdb, node, foundsig, 0, + bind_rdataset(rbtdb, node, foundsig, 0, isc_rwlocktype_read, sigrdataset); } } @@ -4500,7 +4491,7 @@ node_exit: NODE_UNLOCK(lock, isc_rwlocktype_read); tree_exit: - RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read); + RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read); /* * If we found a zonecut but aren't going to use it, we have to @@ -4509,10 +4500,10 @@ tree_exit: if (search.need_cleanup) { node = search.zonecut; INSIST(node != NULL); - lock = &(search.rbtdb->node_locks[node->locknum].lock); + lock = &(rbtdb->node_locks[node->locknum].lock); NODE_LOCK(lock, isc_rwlocktype_read); - decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read, + decrement_reference(rbtdb, node, 0, isc_rwlocktype_read, isc_rwlocktype_none, false); NODE_UNLOCK(lock, isc_rwlocktype_read); } diff --git a/lib/ns/query.c b/lib/ns/query.c index 749c748704b..1c520c6bb21 100644 --- a/lib/ns/query.c +++ b/lib/ns/query.c @@ -5177,7 +5177,7 @@ qctx_clean(query_ctx_t *qctx) { * Free any allocated memory associated with qctx. */ static void -qctx_freedata(query_ctx_t *qctx) { +qctx_free_buffers(query_ctx_t *qctx) { if (qctx->rdataset != NULL) { ns_client_putrdataset(qctx->client, &qctx->rdataset); } @@ -5189,6 +5189,11 @@ qctx_freedata(query_ctx_t *qctx) { if (qctx->fname != NULL) { ns_client_releasename(qctx->client, &qctx->fname); } +} + +static void +qctx_freedata(query_ctx_t *qctx) { + qctx_free_buffers(qctx); if (qctx->db != NULL) { INSIST(qctx->node == NULL); @@ -5743,6 +5748,49 @@ query_refresh_rrset(query_ctx_t *orig_qctx) { qctx_destroy(&qctx); } +/* + * Use the asynchronous hook mechanism to set up a callback in order to + * delay an authoritative query until later if the database lock is busy. + */ +static void +cancelhook(ns_hookasync_t *hctx) { + UNUSED(hctx); +} + +static void +destroyhook(ns_hookasync_t **ctxp) { + ns_hookasync_t *ctx = NULL; + + REQUIRE(ctxp != NULL); + + ctx = *ctxp; + isc_mem_putanddetach(&ctx->mctx, ctx, sizeof(*ctx)); +} + +static isc_result_t +delayquery(query_ctx_t *qctx, isc_mem_t *mctx, void *arg, isc_task_t *task, + isc_taskaction_t action, void *evarg, ns_hookasync_t **ctxp) { + ns_hook_resevent_t *rev = (ns_hook_resevent_t *)isc_event_allocate( + mctx, task, NS_EVENT_HOOKASYNCDONE, action, evarg, + sizeof(*rev)); + ns_hookasync_t *ctx = isc_mem_get(mctx, sizeof(*ctx)); + + UNUSED(arg); + + *ctx = (ns_hookasync_t){ .cancel = cancelhook, + .destroy = destroyhook }; + isc_mem_attach(mctx, &ctx->mctx); + + rev->hookpoint = NS_QUERY_LOOKUP_BEGIN; + rev->saved_qctx = qctx; + rev->ctx = ctx; + + isc_task_send(task, (isc_event_t **)&rev); + + *ctxp = ctx; + return (ISC_R_SUCCESS); +} + /*% * Perform a local database lookup, in either an authoritative or * cache database. If unable to answer, call ns_query_done(); otherwise @@ -5756,7 +5804,7 @@ query_lookup(query_ctx_t *qctx) { dns_clientinfo_t ci; dns_name_t *rpzqname = NULL; char namebuf[DNS_NAME_FORMATSIZE]; - unsigned int dboptions; + unsigned int dboptions = 0; dns_ttl_t stale_refresh = 0; bool dbfind_stale = false; bool stale_timeout = false; @@ -5800,6 +5848,8 @@ query_lookup(query_ctx_t *qctx) { } dboptions = qctx->client->query.dboptions; + dboptions |= DNS_DBFIND_NONBLOCK; + if (!qctx->is_zone && qctx->findcoveringnsec && (qctx->type != dns_rdatatype_null || !dns_name_istat(rpzqname))) { @@ -5817,6 +5867,11 @@ query_lookup(query_ctx_t *qctx) { dboptions, qctx->client->now, &qctx->node, qctx->fname, &cm, &ci, qctx->rdataset, qctx->sigrdataset); + if (result == ISC_R_LOCKBUSY) { + qctx_free_buffers(qctx); + ns_query_hookasync(qctx, delayquery, NULL); + return (ISC_R_COMPLETE); + } /* * Fixup fname and sigrdataset. @@ -6112,6 +6167,7 @@ fetch_callback(isc_task_t *task, isc_event_t *event) { isc_event_free(ISC_EVENT_PTR(&event)); return; } + /* * We are resuming from recursion. Reset any attributes, options * that a lookup due to stale-answer-client-timeout may have set. @@ -6847,9 +6903,11 @@ ns_query_hookasync(query_ctx_t *qctx, ns_query_starthookasync_t runasync, REQUIRE(client->query.hookactx == NULL); REQUIRE(client->query.fetch == NULL); - result = check_recursionquota(client); - if (result != ISC_R_SUCCESS) { - goto cleanup; + if (!qctx->is_zone) { + result = check_recursionquota(client); + if (result != ISC_R_SUCCESS) { + goto cleanup; + } } saved_qctx = isc_mem_get(client->mctx, sizeof(*saved_qctx)); -- 2.47.3