add keep-cache option to unbound-control reload to keep caches

author JINMEI Tatuya <jtatuya@infoblox.com>

Mon, 8 Nov 2021 21:39:13 +0000 (13:39 -0800)

committer JINMEI Tatuya <jtatuya@infoblox.com>

Thu, 11 Nov 2021 18:47:08 +0000 (10:47 -0800)
author JINMEI Tatuya <jtatuya@infoblox.com>
Mon, 8 Nov 2021 21:39:13 +0000 (13:39 -0800)
committer JINMEI Tatuya <jtatuya@infoblox.com>
Thu, 11 Nov 2021 18:47:08 +0000 (10:47 -0800)
diff --git a/daemon/cachedump.c b/daemon/cachedump.c

index b1ce53b596b66095d18dec20964d683af73a98a1..517a6b1f36b07f1ce847406b7b0dfad30d568c2c 100644 (file)
--- a/daemon/cachedump.c
+++ b/daemon/cachedump.c
@@ -385,7 +385,7 @@ move_into_cache(struct ub_packed_rrset_key* k,
         struct rrset_ref ref;
         uint8_t* p;
  
-       ak = alloc_special_obtain(&worker->alloc);
+       ak = alloc_special_obtain(worker->alloc);
         if(!ak) {
                 log_warn("error out of memory");
                 return 0;
@@ -396,7 +396,7 @@ move_into_cache(struct ub_packed_rrset_key* k,
         ak->rk.dname = (uint8_t*)memdup(k->rk.dname, k->rk.dname_len);
         if(!ak->rk.dname) {
                 log_warn("error out of memory");
-               ub_packed_rrset_parsedelete(ak, &worker->alloc);
+               ub_packed_rrset_parsedelete(ak, worker->alloc);
                 return 0;
         }
         s = sizeof(*ad) + (sizeof(size_t) + sizeof(uint8_t*) + 
@@ -406,7 +406,7 @@ move_into_cache(struct ub_packed_rrset_key* k,
         ad = (struct packed_rrset_data*)malloc(s);
         if(!ad) {
                 log_warn("error out of memory");
-               ub_packed_rrset_parsedelete(ak, &worker->alloc);
+               ub_packed_rrset_parsedelete(ak, worker->alloc);
                 return 0;
         }
         p = (uint8_t*)ad;
@@ -429,7 +429,8 @@ move_into_cache(struct ub_packed_rrset_key* k,
         ref.key = ak;
         ref.id = ak->id;
         (void)rrset_cache_update(worker->env.rrset_cache, &ref,
-               &worker->alloc, *worker->env.now);
+               worker->alloc, *worker->env.now);
+
         return 1;
  }
  
diff --git a/daemon/daemon.c b/daemon/daemon.c

index 0e3923b4e9f2cbd287515cdfbb5128fdbcc2650a..e70ece168cc975f574e48fb4f794f12348348b3d 100644 (file)
--- a/daemon/daemon.c
+++ b/daemon/daemon.c
@@ -433,6 +433,27 @@ static int daemon_get_shufport(struct daemon* daemon, int* shufport)
         return avail;
  }
  
+/**
+ * Clear and delete per-worker alloc caches, and free memory maintained in
+ * superalloc.
+ * The rrset and message caches must be empty at the time of call.
+ * @param daemon: the daemon that maintains the alloc caches to be cleared.
+ */
+static void
+daemon_clear_allocs(struct daemon* daemon)
+{
+       int i;
+
+       for(i=0; i<daemon->num; i++) {
+               alloc_clear(daemon->worker_allocs[i]);
+               free(daemon->worker_allocs[i]);
+       }
+       free(daemon->worker_allocs);
+       daemon->worker_allocs = NULL;
+
+       alloc_clear_special(&daemon->superalloc);
+}
+
  /**
   * Allocate empty worker structures. With backptr and thread-number,
   * from 0..numthread initialised. Used as user arguments to new threads.
@@ -485,6 +506,21 @@ daemon_create_workers(struct daemon* daemon)
                         /* the above is not ports/numthr, due to rounding */
                         fatal_exit("could not create worker");
         }
+       /* create per-worker alloc caches if not reusing existing ones. */
+       if(!daemon->worker_allocs) {
+               daemon->worker_allocs = (struct alloc_cache**)calloc(
+                       (size_t)daemon->num, sizeof(struct alloc_cache*));
+               if(!daemon->worker_allocs)
+                       fatal_exit("could not allocate worker allocs");
+               for(i=0; i<daemon->num; i++) {
+                       struct alloc_cache* alloc = calloc(1,
+                               sizeof(struct alloc_cache));
+                       if (!alloc)
+                               fatal_exit("could not allocate worker alloc");
+                       alloc_init(alloc, &daemon->superalloc, i);
+                       daemon->worker_allocs[i] = alloc;
+               }
+       }
         free(shufport);
  }
  
@@ -713,6 +749,7 @@ daemon_fork(struct daemon* daemon)
         /* Shutdown SHM */
         shm_main_shutdown(daemon);
  
+       daemon->reuse_cache = daemon->workers[0]->reuse_cache;
         daemon->need_to_exit = daemon->workers[0]->need_to_exit;
  }
  
@@ -727,9 +764,16 @@ daemon_cleanup(struct daemon* daemon)
         log_thread_set(NULL);
         /* clean up caches because
          * a) RRset IDs will be recycled after a reload, causing collisions
-        * b) validation config can change, thus rrset, msg, keycache clear */
-       slabhash_clear(&daemon->env->rrset_cache->table);
-       slabhash_clear(daemon->env->msg_cache);
+        * b) validation config can change, thus rrset, msg, keycache clear
+        *
+        * If we are trying to keep the cache as long as possible, we should
+        * defer the cleanup until we know whether the new configuration allows
+        * the reuse.  (If we're exiting, cleanup should be done here). */
+       if(!daemon->reuse_cache || daemon->need_to_exit) {
+               slabhash_clear(&daemon->env->rrset_cache->table);
+               slabhash_clear(daemon->env->msg_cache);
+       }
+       daemon->old_num = daemon->num; /* save the current num */
         local_zones_delete(daemon->local_zones);
         daemon->local_zones = NULL;
         respip_set_delete(daemon->respip_set);
@@ -745,7 +789,12 @@ daemon_cleanup(struct daemon* daemon)
         free(daemon->workers);
         daemon->workers = NULL;
         daemon->num = 0;
-       alloc_clear_special(&daemon->superalloc);
+       /* Unless we're trying to keep the cache, worker alloc_caches should be
+        * cleared and freed here. We do this after deleting workers to
+        * guarantee that the alloc caches are valid throughout the lifetime
+        * of workers. */
+       if(!daemon->reuse_cache || daemon->need_to_exit)
+               daemon_clear_allocs(daemon);
  #ifdef USE_DNSTAP
         dt_delete(daemon->dtenv);
         daemon->dtenv = NULL;
@@ -841,8 +890,42 @@ daemon_delete(struct daemon* daemon)
  
  void daemon_apply_cfg(struct daemon* daemon, struct config_file* cfg)
  {
+       int new_num = cfg->num_threads?cfg->num_threads:1;
+
          daemon->cfg = cfg;
         config_apply(cfg);
+
+       /* If this is a reload and we deferred the decision on whether to
+        * reuse the alloc, RRset, and message caches, then check to see if
+        * it's safe to keep the caches:
+        * - changing the number of threads is obviously incompatible with
+        *   keeping the per-thread alloc caches. It also means we have to
+        *   clear RRset and message caches. (note that 'new_num' may be
+        *   adjusted in daemon_create_workers, but for our purpose we can
+        *   simply compare it with 'old_num'; if they are equal here,
+        *   'new_num' won't be adjusted to a different value than 'old_num').
+        * - changing RRset cache size effectively clears any remaining cache
+        *   entries. We could keep their keys in alloc caches, but it would
+        *   be more consistent with the sense of the change to clear allocs
+        *   and free memory. To do so we also have to clear message cache.
+        * - only changing message cache size does not necessarily affect
+        *   RRset or alloc cache. But almost all new subsequent queries will
+        *   require recursive resolution anyway, so it doesn't help much to
+        *   just keep RRset and alloc caches. For simplicity we clear/free
+        *   the other two, too. */
+       if(daemon->worker_allocs &&
+               (new_num != daemon->old_num ||
+                !slabhash_is_size(daemon->env->msg_cache, cfg->msg_cache_size,
+                       cfg->msg_cache_slabs) ||
+                !slabhash_is_size(&daemon->env->rrset_cache->table,
+                       cfg->rrset_cache_size, cfg->rrset_cache_slabs)))
+       {
+               log_warn("cannot reuse caches due to critical config change");
+               slabhash_clear(&daemon->env->rrset_cache->table);
+               slabhash_clear(daemon->env->msg_cache);
+               daemon_clear_allocs(daemon);
+       }
+
         if(!slabhash_is_size(daemon->env->msg_cache, cfg->msg_cache_size,
                 cfg->msg_cache_slabs)) {
                 slabhash_delete(daemon->env->msg_cache);
diff --git a/daemon/daemon.h b/daemon/daemon.h

index 3effbafb79183a2da8312a082d50dec0b5e2d008..58d78d6ffa6bd788dfb007941456b2480b279dff 100644 (file)
--- a/daemon/daemon.h
+++ b/daemon/daemon.h
@@ -99,8 +99,12 @@ struct daemon {
         void* listen_sslctx, *connect_sslctx;
         /** num threads allocated */
         int num;
+       /** num threads allocated in the previous config or 0 at first */
+       int old_num;
         /** the worker entries */
         struct worker** workers;
+       /** per-worker allocation cache */
+       struct alloc_cache **worker_allocs;
         /** do we need to exit unbound (or is it only a reload?) */
         int need_to_exit;
         /** master random table ; used for port div between threads on reload*/
@@ -138,6 +142,8 @@ struct daemon {
         /** the dnscrypt environment */
         struct dnsc_env* dnscenv;
  #endif
+       /** reuse existing cache on reload if other conditions allow it. */
+       int reuse_cache;
  };
  
  /**
diff --git a/daemon/remote.c b/daemon/remote.c

index adf0383895d49a3ce54bbafb87beb0147da14629..05e6b1a56b5d9570971e32d10f646bdb1a63bc5c 100644 (file)
--- a/daemon/remote.c
+++ b/daemon/remote.c
@@ -684,8 +684,10 @@ do_stop(RES* ssl, struct worker* worker)
  
  /** do the reload command */
  static void
-do_reload(RES* ssl, struct worker* worker)
+do_reload(RES* ssl, struct worker* worker, char* arg)
  {
+       arg = skipwhite(arg);
+       worker->reuse_cache = (strcmp(arg, "+keep-cache") == 0);
         worker->need_to_exit = 0;
         comm_base_exit(worker->base);
         send_ok(ssl);
@@ -3029,7 +3031,7 @@ execute_cmd(struct daemon_remote* rc, RES* ssl, char* cmd,
                 do_stop(ssl, worker);
                 return;
         } else if(cmdcmp(p, "reload", 6)) {
-               do_reload(ssl, worker);
+               do_reload(ssl, worker, skipwhite(p+6));
                 return;
         } else if(cmdcmp(p, "stats_noreset", 13)) {
                 do_stats(ssl, worker, 0);
diff --git a/daemon/worker.c b/daemon/worker.c

index b438700af1f698a4644fe3ca6c390f48a004de62..16368aa77674a248331aae9b6451a139917173ca 100644 (file)
--- a/daemon/worker.c
+++ b/daemon/worker.c
@@ -133,7 +133,7 @@ worker_mem_report(struct worker* ATTR_UNUSED(worker),
         rrset = slabhash_get_mem(&worker->env.rrset_cache->table);
         infra = infra_get_mem(worker->env.infra_cache);
         mesh = mesh_get_mem(worker->env.mesh);
-       ac = alloc_get_mem(&worker->alloc);
+       ac = alloc_get_mem(worker->alloc);
         superac = alloc_get_mem(&worker->daemon->superalloc);
         anch = anchors_get_mem(worker->env.anchors);
         iter = 0;
@@ -1834,15 +1834,14 @@ worker_init(struct worker* worker, struct config_file *cfg,
         }
  
         server_stats_init(&worker->stats, cfg);
-       alloc_init(&worker->alloc, &worker->daemon->superalloc, 
-               worker->thread_num);
-       alloc_set_id_cleanup(&worker->alloc, &worker_alloc_cleanup, worker);
+       worker->alloc = worker->daemon->worker_allocs[worker->thread_num];
+       alloc_set_id_cleanup(worker->alloc, &worker_alloc_cleanup, worker);
         worker->env = *worker->daemon->env;
         comm_base_timept(worker->base, &worker->env.now, &worker->env.now_tv);
         worker->env.worker = worker;
         worker->env.worker_base = worker->base;
         worker->env.send_query = &worker_send_query;
-       worker->env.alloc = &worker->alloc;
+       worker->env.alloc = worker->alloc;
         worker->env.outnet = worker->back;
         worker->env.rnd = worker->rndstate;
         /* If case prefetch is triggered, the corresponding mesh will clear
@@ -1986,7 +1985,7 @@ worker_delete(struct worker* worker)
  #endif /* USE_DNSTAP */
         comm_base_delete(worker->base);
         ub_randfree(worker->rndstate);
-       alloc_clear(&worker->alloc);
+       /* don't touch worker->alloc, as it's maintained in daemon */
         regional_destroy(worker->env.scratch);
         regional_destroy(worker->scratchpad);
         free(worker);
diff --git a/daemon/worker.h b/daemon/worker.h

index 3887d0405ae6a62bf88f5a5c376832b09fdff74c..59b76e1e347337c38ec318c76a5b9d2c843aed7c 100644 (file)
--- a/daemon/worker.h
+++ b/daemon/worker.h
@@ -114,7 +114,7 @@ struct worker {
         /** do we need to restart or quit (on signal) */
         int need_to_exit;
         /** allocation cache for this thread */
-       struct alloc_cache alloc;
+       struct alloc_cache *alloc;
         /** per thread statistics */
         struct ub_server_stats stats;
         /** thread scratch regional */
@@ -127,6 +127,8 @@ struct worker {
         /** dnstap environment, changed for this thread */
         struct dt_env dtenv;
  #endif
+       /** reuse existing cache on reload if other conditions allow it. */
+       int reuse_cache;
  };
  
  /**
diff --git a/smallapp/unbound-control.c b/smallapp/unbound-control.c

index c7c38276f006985cae14df342d5da034a59c1001..d9ba63b88983bbcf25c5c0d79de5ea5801a7ac6b 100644 (file)
--- a/smallapp/unbound-control.c
+++ b/smallapp/unbound-control.c
@@ -102,6 +102,7 @@ usage(void)
         printf("  stop                          stops the server\n");
         printf("  reload                        reloads the server\n");
         printf("                                (this flushes data, stats, requestlist)\n");
+       printf("  reload +keep-cache            ditto but keep RRset and message cache\n");
         printf("  stats                         print statistics\n");
         printf("  stats_noreset                 peek at statistics\n");
  #ifdef HAVE_SHMGET
author	JINMEI Tatuya <jtatuya@infoblox.com>
	Mon, 8 Nov 2021 21:39:13 +0000 (13:39 -0800)
committer	JINMEI Tatuya <jtatuya@infoblox.com>
	Thu, 11 Nov 2021 18:47:08 +0000 (10:47 -0800)
daemon/cachedump.c		patch \| blob \| blame \| history
daemon/daemon.c		patch \| blob \| blame \| history
daemon/daemon.h		patch \| blob \| blame \| history
daemon/remote.c		patch \| blob \| blame \| history
daemon/worker.c		patch \| blob \| blame \| history
daemon/worker.h		patch \| blob \| blame \| history
smallapp/unbound-control.c		patch \| blob \| blame \| history