From: JINMEI Tatuya Date: Mon, 8 Nov 2021 21:39:13 +0000 (-0800) Subject: add keep-cache option to unbound-control reload to keep caches X-Git-Tag: release-1.17.1rc1~5^2~1^2~1 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5b2eda28e338ad45f4bb589951776feddc063795;p=thirdparty%2Funbound.git add keep-cache option to unbound-control reload to keep caches --- diff --git a/daemon/cachedump.c b/daemon/cachedump.c index b1ce53b59..517a6b1f3 100644 --- a/daemon/cachedump.c +++ b/daemon/cachedump.c @@ -385,7 +385,7 @@ move_into_cache(struct ub_packed_rrset_key* k, struct rrset_ref ref; uint8_t* p; - ak = alloc_special_obtain(&worker->alloc); + ak = alloc_special_obtain(worker->alloc); if(!ak) { log_warn("error out of memory"); return 0; @@ -396,7 +396,7 @@ move_into_cache(struct ub_packed_rrset_key* k, ak->rk.dname = (uint8_t*)memdup(k->rk.dname, k->rk.dname_len); if(!ak->rk.dname) { log_warn("error out of memory"); - ub_packed_rrset_parsedelete(ak, &worker->alloc); + ub_packed_rrset_parsedelete(ak, worker->alloc); return 0; } s = sizeof(*ad) + (sizeof(size_t) + sizeof(uint8_t*) + @@ -406,7 +406,7 @@ move_into_cache(struct ub_packed_rrset_key* k, ad = (struct packed_rrset_data*)malloc(s); if(!ad) { log_warn("error out of memory"); - ub_packed_rrset_parsedelete(ak, &worker->alloc); + ub_packed_rrset_parsedelete(ak, worker->alloc); return 0; } p = (uint8_t*)ad; @@ -429,7 +429,8 @@ move_into_cache(struct ub_packed_rrset_key* k, ref.key = ak; ref.id = ak->id; (void)rrset_cache_update(worker->env.rrset_cache, &ref, - &worker->alloc, *worker->env.now); + worker->alloc, *worker->env.now); + return 1; } diff --git a/daemon/daemon.c b/daemon/daemon.c index 0e3923b4e..e70ece168 100644 --- a/daemon/daemon.c +++ b/daemon/daemon.c @@ -433,6 +433,27 @@ static int daemon_get_shufport(struct daemon* daemon, int* shufport) return avail; } +/** + * Clear and delete per-worker alloc caches, and free memory maintained in + * superalloc. + * The rrset and message caches must be empty at the time of call. + * @param daemon: the daemon that maintains the alloc caches to be cleared. + */ +static void +daemon_clear_allocs(struct daemon* daemon) +{ + int i; + + for(i=0; inum; i++) { + alloc_clear(daemon->worker_allocs[i]); + free(daemon->worker_allocs[i]); + } + free(daemon->worker_allocs); + daemon->worker_allocs = NULL; + + alloc_clear_special(&daemon->superalloc); +} + /** * Allocate empty worker structures. With backptr and thread-number, * from 0..numthread initialised. Used as user arguments to new threads. @@ -485,6 +506,21 @@ daemon_create_workers(struct daemon* daemon) /* the above is not ports/numthr, due to rounding */ fatal_exit("could not create worker"); } + /* create per-worker alloc caches if not reusing existing ones. */ + if(!daemon->worker_allocs) { + daemon->worker_allocs = (struct alloc_cache**)calloc( + (size_t)daemon->num, sizeof(struct alloc_cache*)); + if(!daemon->worker_allocs) + fatal_exit("could not allocate worker allocs"); + for(i=0; inum; i++) { + struct alloc_cache* alloc = calloc(1, + sizeof(struct alloc_cache)); + if (!alloc) + fatal_exit("could not allocate worker alloc"); + alloc_init(alloc, &daemon->superalloc, i); + daemon->worker_allocs[i] = alloc; + } + } free(shufport); } @@ -713,6 +749,7 @@ daemon_fork(struct daemon* daemon) /* Shutdown SHM */ shm_main_shutdown(daemon); + daemon->reuse_cache = daemon->workers[0]->reuse_cache; daemon->need_to_exit = daemon->workers[0]->need_to_exit; } @@ -727,9 +764,16 @@ daemon_cleanup(struct daemon* daemon) log_thread_set(NULL); /* clean up caches because * a) RRset IDs will be recycled after a reload, causing collisions - * b) validation config can change, thus rrset, msg, keycache clear */ - slabhash_clear(&daemon->env->rrset_cache->table); - slabhash_clear(daemon->env->msg_cache); + * b) validation config can change, thus rrset, msg, keycache clear + * + * If we are trying to keep the cache as long as possible, we should + * defer the cleanup until we know whether the new configuration allows + * the reuse. (If we're exiting, cleanup should be done here). */ + if(!daemon->reuse_cache || daemon->need_to_exit) { + slabhash_clear(&daemon->env->rrset_cache->table); + slabhash_clear(daemon->env->msg_cache); + } + daemon->old_num = daemon->num; /* save the current num */ local_zones_delete(daemon->local_zones); daemon->local_zones = NULL; respip_set_delete(daemon->respip_set); @@ -745,7 +789,12 @@ daemon_cleanup(struct daemon* daemon) free(daemon->workers); daemon->workers = NULL; daemon->num = 0; - alloc_clear_special(&daemon->superalloc); + /* Unless we're trying to keep the cache, worker alloc_caches should be + * cleared and freed here. We do this after deleting workers to + * guarantee that the alloc caches are valid throughout the lifetime + * of workers. */ + if(!daemon->reuse_cache || daemon->need_to_exit) + daemon_clear_allocs(daemon); #ifdef USE_DNSTAP dt_delete(daemon->dtenv); daemon->dtenv = NULL; @@ -841,8 +890,42 @@ daemon_delete(struct daemon* daemon) void daemon_apply_cfg(struct daemon* daemon, struct config_file* cfg) { + int new_num = cfg->num_threads?cfg->num_threads:1; + daemon->cfg = cfg; config_apply(cfg); + + /* If this is a reload and we deferred the decision on whether to + * reuse the alloc, RRset, and message caches, then check to see if + * it's safe to keep the caches: + * - changing the number of threads is obviously incompatible with + * keeping the per-thread alloc caches. It also means we have to + * clear RRset and message caches. (note that 'new_num' may be + * adjusted in daemon_create_workers, but for our purpose we can + * simply compare it with 'old_num'; if they are equal here, + * 'new_num' won't be adjusted to a different value than 'old_num'). + * - changing RRset cache size effectively clears any remaining cache + * entries. We could keep their keys in alloc caches, but it would + * be more consistent with the sense of the change to clear allocs + * and free memory. To do so we also have to clear message cache. + * - only changing message cache size does not necessarily affect + * RRset or alloc cache. But almost all new subsequent queries will + * require recursive resolution anyway, so it doesn't help much to + * just keep RRset and alloc caches. For simplicity we clear/free + * the other two, too. */ + if(daemon->worker_allocs && + (new_num != daemon->old_num || + !slabhash_is_size(daemon->env->msg_cache, cfg->msg_cache_size, + cfg->msg_cache_slabs) || + !slabhash_is_size(&daemon->env->rrset_cache->table, + cfg->rrset_cache_size, cfg->rrset_cache_slabs))) + { + log_warn("cannot reuse caches due to critical config change"); + slabhash_clear(&daemon->env->rrset_cache->table); + slabhash_clear(daemon->env->msg_cache); + daemon_clear_allocs(daemon); + } + if(!slabhash_is_size(daemon->env->msg_cache, cfg->msg_cache_size, cfg->msg_cache_slabs)) { slabhash_delete(daemon->env->msg_cache); diff --git a/daemon/daemon.h b/daemon/daemon.h index 3effbafb7..58d78d6ff 100644 --- a/daemon/daemon.h +++ b/daemon/daemon.h @@ -99,8 +99,12 @@ struct daemon { void* listen_sslctx, *connect_sslctx; /** num threads allocated */ int num; + /** num threads allocated in the previous config or 0 at first */ + int old_num; /** the worker entries */ struct worker** workers; + /** per-worker allocation cache */ + struct alloc_cache **worker_allocs; /** do we need to exit unbound (or is it only a reload?) */ int need_to_exit; /** master random table ; used for port div between threads on reload*/ @@ -138,6 +142,8 @@ struct daemon { /** the dnscrypt environment */ struct dnsc_env* dnscenv; #endif + /** reuse existing cache on reload if other conditions allow it. */ + int reuse_cache; }; /** diff --git a/daemon/remote.c b/daemon/remote.c index adf038389..05e6b1a56 100644 --- a/daemon/remote.c +++ b/daemon/remote.c @@ -684,8 +684,10 @@ do_stop(RES* ssl, struct worker* worker) /** do the reload command */ static void -do_reload(RES* ssl, struct worker* worker) +do_reload(RES* ssl, struct worker* worker, char* arg) { + arg = skipwhite(arg); + worker->reuse_cache = (strcmp(arg, "+keep-cache") == 0); worker->need_to_exit = 0; comm_base_exit(worker->base); send_ok(ssl); @@ -3029,7 +3031,7 @@ execute_cmd(struct daemon_remote* rc, RES* ssl, char* cmd, do_stop(ssl, worker); return; } else if(cmdcmp(p, "reload", 6)) { - do_reload(ssl, worker); + do_reload(ssl, worker, skipwhite(p+6)); return; } else if(cmdcmp(p, "stats_noreset", 13)) { do_stats(ssl, worker, 0); diff --git a/daemon/worker.c b/daemon/worker.c index b438700af..16368aa77 100644 --- a/daemon/worker.c +++ b/daemon/worker.c @@ -133,7 +133,7 @@ worker_mem_report(struct worker* ATTR_UNUSED(worker), rrset = slabhash_get_mem(&worker->env.rrset_cache->table); infra = infra_get_mem(worker->env.infra_cache); mesh = mesh_get_mem(worker->env.mesh); - ac = alloc_get_mem(&worker->alloc); + ac = alloc_get_mem(worker->alloc); superac = alloc_get_mem(&worker->daemon->superalloc); anch = anchors_get_mem(worker->env.anchors); iter = 0; @@ -1834,15 +1834,14 @@ worker_init(struct worker* worker, struct config_file *cfg, } server_stats_init(&worker->stats, cfg); - alloc_init(&worker->alloc, &worker->daemon->superalloc, - worker->thread_num); - alloc_set_id_cleanup(&worker->alloc, &worker_alloc_cleanup, worker); + worker->alloc = worker->daemon->worker_allocs[worker->thread_num]; + alloc_set_id_cleanup(worker->alloc, &worker_alloc_cleanup, worker); worker->env = *worker->daemon->env; comm_base_timept(worker->base, &worker->env.now, &worker->env.now_tv); worker->env.worker = worker; worker->env.worker_base = worker->base; worker->env.send_query = &worker_send_query; - worker->env.alloc = &worker->alloc; + worker->env.alloc = worker->alloc; worker->env.outnet = worker->back; worker->env.rnd = worker->rndstate; /* If case prefetch is triggered, the corresponding mesh will clear @@ -1986,7 +1985,7 @@ worker_delete(struct worker* worker) #endif /* USE_DNSTAP */ comm_base_delete(worker->base); ub_randfree(worker->rndstate); - alloc_clear(&worker->alloc); + /* don't touch worker->alloc, as it's maintained in daemon */ regional_destroy(worker->env.scratch); regional_destroy(worker->scratchpad); free(worker); diff --git a/daemon/worker.h b/daemon/worker.h index 3887d0405..59b76e1e3 100644 --- a/daemon/worker.h +++ b/daemon/worker.h @@ -114,7 +114,7 @@ struct worker { /** do we need to restart or quit (on signal) */ int need_to_exit; /** allocation cache for this thread */ - struct alloc_cache alloc; + struct alloc_cache *alloc; /** per thread statistics */ struct ub_server_stats stats; /** thread scratch regional */ @@ -127,6 +127,8 @@ struct worker { /** dnstap environment, changed for this thread */ struct dt_env dtenv; #endif + /** reuse existing cache on reload if other conditions allow it. */ + int reuse_cache; }; /** diff --git a/smallapp/unbound-control.c b/smallapp/unbound-control.c index c7c38276f..d9ba63b88 100644 --- a/smallapp/unbound-control.c +++ b/smallapp/unbound-control.c @@ -102,6 +102,7 @@ usage(void) printf(" stop stops the server\n"); printf(" reload reloads the server\n"); printf(" (this flushes data, stats, requestlist)\n"); + printf(" reload +keep-cache ditto but keep RRset and message cache\n"); printf(" stats print statistics\n"); printf(" stats_noreset peek at statistics\n"); #ifdef HAVE_SHMGET