#include "daemon/remote.h"
#include "daemon/worker.h"
#include "daemon/daemon.h"
+#include "daemon/stats.h"
#include "util/log.h"
#include "util/config_file.h"
#include "util/net_help.h"
}
}
+/** divide sum of timers to get average */
+static void
+timeval_divide(struct timeval* avg, struct timeval* sum, size_t d)
+{
+#ifndef S_SPLINT_S
+ size_t leftover;
+ if(d == 0) {
+ avg->tv_sec = 0;
+ avg->tv_usec = 0;
+ return;
+ }
+ avg->tv_sec = sum->tv_sec / d;
+ avg->tv_usec = sum->tv_usec / d;
+ /* handle fraction from seconds divide */
+ leftover = sum->tv_sec - avg->tv_sec*d;
+ avg->tv_usec += (leftover*1000000)/d;
+#endif
+}
+
struct daemon_remote*
daemon_remote_create(struct worker* worker)
{
return 0;
}
+/** skip whitespace, return new pointer into string */
+static char*
+skipwhite(char* str)
+{
+ /* EOS \0 is not a space */
+ while( isspace(*str) )
+ str++;
+ return str;
+}
+
/** send the OK to the control client */
static void send_ok(SSL* ssl)
{
/** do the stop command */
static void
-do_stop(struct daemon_remote* rc, SSL* ssl)
+do_stop(SSL* ssl, struct daemon_remote* rc)
{
rc->worker->need_to_exit = 1;
comm_base_exit(rc->worker->base);
/** do the reload command */
static void
-do_reload(struct daemon_remote* rc, SSL* ssl)
+do_reload(SSL* ssl, struct daemon_remote* rc)
{
rc->worker->need_to_exit = 0;
comm_base_exit(rc->worker->base);
send_ok(ssl);
}
+/** do the verbosity command */
+static void
+do_verbosity(SSL* ssl, char* str)
+{
+ int val = atoi(str);
+ if(val == 0 && strcmp(str, "0") != 0) {
+ ssl_printf(ssl, "error in verbosity number syntax: %s\n", str);
+ return;
+ }
+ verbosity = val;
+ send_ok(ssl);
+}
+
+/** print stats from statinfo */
+static int
+print_stats(SSL* ssl, char* nm, struct stats_info* s)
+{
+ struct timeval avg;
+ if(!ssl_printf(ssl, "%s.num.queries: %u\n", nm,
+ (unsigned)s->svr.num_queries)) return 0;
+ if(!ssl_printf(ssl, "%s.num.cachehits: %u\n", nm,
+ (unsigned)(s->svr.num_queries
+ - s->svr.num_queries_missed_cache))) return 0;
+ if(!ssl_printf(ssl, "%s.num.cachemiss: %u\n", nm,
+ (unsigned)s->svr.num_queries_missed_cache)) return 0;
+ if(!ssl_printf(ssl, "%s.num.recursivereplies: %u\n", nm,
+ (unsigned)s->mesh_replies_sent)) return 0;
+ if(!ssl_printf(ssl, "%s.requestlist.avg: %g\n", nm,
+ s->svr.num_queries_missed_cache?
+ (double)s->svr.sum_query_list_size/
+ s->svr.num_queries_missed_cache : 0.0)) return 0;
+ if(!ssl_printf(ssl, "%s.requestlist.max: %u\n", nm,
+ (unsigned)s->svr.max_query_list_size)) return 0;
+ if(!ssl_printf(ssl, "%s.requestlist.overwritten: %u\n", nm,
+ (unsigned)s->mesh_jostled)) return 0;
+ if(!ssl_printf(ssl, "%s.requestlist.exceeded: %u\n", nm,
+ (unsigned)s->mesh_dropped)) return 0;
+ if(!ssl_printf(ssl, "%s.requestlist.current.all: %u\n", nm,
+ (unsigned)s->mesh_num_states)) return 0;
+ if(!ssl_printf(ssl, "%s.requestlist.current.user: %u\n", nm,
+ (unsigned)s->mesh_num_reply_states)) return 0;
+ timeval_divide(&avg, &s->mesh_replies_sum_wait, s->mesh_replies_sent);
+ if(!ssl_printf(ssl, "%s.recursion.time.avg: %d.%6.6d\n", nm,
+ (int)avg.tv_sec, (int)avg.tv_usec)) return 0;
+ if(!ssl_printf(ssl, "%s.recursion.time.median: %g\n", nm,
+ s->mesh_time_median)) return 0;
+ return 1;
+}
+
+/** print stats for one thread */
+static int
+print_thread_stats(SSL* ssl, int i, struct stats_info* s)
+{
+ char nm[16];
+ snprintf(nm, sizeof(nm), "thread%d", i);
+ nm[sizeof(nm)-1]=0;
+ return print_stats(ssl, nm, s);
+}
+
+/** do the stats command */
+static void
+do_stats(SSL* ssl, struct daemon_remote* rc)
+{
+ struct daemon* daemon = rc->worker->daemon;
+ struct stats_info total;
+ struct stats_info s;
+ int i;
+ /* gather all thread statistics in one place */
+ for(i=0; i<daemon->num; i++) {
+ server_stats_obtain(rc->worker, daemon->workers[i], &s);
+ if(!print_thread_stats(ssl, i, &s))
+ return;
+ if(i == 0)
+ total = s;
+ else server_stats_add(&total, &s);
+ }
+ /* print the thread statistics */
+ total.mesh_time_median /= (double)daemon->num;
+ print_stats(ssl, "total", &total);
+}
+
/** execute a remote control command */
static void
execute_cmd(struct daemon_remote* rc, SSL* ssl, char* cmd)
{
- char* p = cmd;
- /* skip whitespace */
- while( isspace(*p) ) p++;
- /* compare command - check longer strings first */
+ char* p = skipwhite(cmd);
+ /* compare command - check longer strings first in case of substrings*/
if(strncmp(p, "stop", 4) == 0) {
- do_stop(rc, ssl);
+ do_stop(ssl, rc);
} else if(strncmp(p, "reload", 6) == 0) {
- do_reload(rc, ssl);
+ do_reload(ssl, rc);
+ } else if(strncmp(p, "verbosity", 9) == 0) {
+ do_verbosity(ssl, skipwhite(p+9));
+ } else if(strncmp(p, "stats", 5) == 0) {
+ do_stats(ssl, rc);
} else {
(void)ssl_printf(ssl, "error unknown command '%s'\n", p);
}
#include "config.h"
#include "daemon/stats.h"
#include "daemon/worker.h"
+#include "daemon/daemon.h"
#include "services/mesh.h"
+#include "util/config_file.h"
+#include "util/tube.h"
+#include "util/timehist.h"
+
+/** add timers and the values do not overflow or become negative */
+static void
+timeval_add(struct timeval* d, struct timeval* add)
+{
+#ifndef S_SPLINT_S
+ d->tv_sec += add->tv_sec;
+ d->tv_usec += add->tv_usec;
+ while(d->tv_usec > 1000000 ) {
+ d->tv_usec -= 1000000;
+ d->tv_sec++;
+ }
+#endif
+}
void server_stats_init(struct server_stats* stats)
{
stats->max_query_list_size = worker->env.mesh->all.count;
}
-void server_stats_log(struct server_stats* stats, int threadnum)
+void server_stats_log(struct server_stats* stats, struct worker* worker,
+ int threadnum)
{
log_info("server stats for thread %d: %u queries, "
"%u answers from cache, %u recursions",
stats->num_queries_missed_cache?
(double)stats->sum_query_list_size/
stats->num_queries_missed_cache : 0.0,
- (unsigned)stats->num_query_list_exceeded);
+ (unsigned)worker->env.mesh->stats_dropped);
+}
+
+void
+server_stats_compile(struct worker* worker, struct stats_info* s)
+{
+ s->svr = worker->stats;
+ s->mesh_num_states = worker->env.mesh->all.count;
+ s->mesh_num_reply_states = worker->env.mesh->num_reply_states;
+ s->mesh_jostled = worker->env.mesh->stats_jostled;
+ s->mesh_dropped = worker->env.mesh->stats_dropped;
+ s->mesh_replies_sent = worker->env.mesh->replies_sent;
+ s->mesh_replies_sum_wait = worker->env.mesh->replies_sum_wait;
+ s->mesh_time_median = timehist_quartile(worker->env.mesh->histogram,
+ 0.50);
+ if(!worker->env.cfg->stat_cumulative) {
+ server_stats_init(&worker->stats);
+ mesh_stats_clear(worker->env.mesh);
+ }
+}
+
+void server_stats_obtain(struct worker* worker, struct worker* who,
+ struct stats_info* s)
+{
+ uint8_t *reply = NULL;
+ size_t len = 0;
+ if(worker == who) {
+ /* just fill it in */
+ server_stats_compile(worker, s);
+ return;
+ }
+ /* communicate over tube */
+ verbose(VERB_ALGO, "write stats cmd");
+ worker_send_cmd(who, worker_cmd_stats);
+ verbose(VERB_ALGO, "wait for stats reply");
+ if(!tube_read_msg(worker->cmd, &reply, &len, 0))
+ fatal_exit("failed to read stats over cmd channel");
+ if(len != sizeof(*s))
+ fatal_exit("stats on cmd channel wrong length %d %d",
+ (int)len, (int)sizeof(*s));
+ memcpy(s, reply, len);
+ free(reply);
+}
+
+void server_stats_reply(struct worker* worker)
+{
+ struct stats_info s;
+ server_stats_compile(worker, &s);
+ verbose(VERB_ALGO, "write stats replymsg");
+ if(!tube_write_msg(worker->daemon->workers[0]->cmd,
+ (uint8_t*)&s, sizeof(s), 0))
+ fatal_exit("could not write stat values over cmd channel");
+}
+
+void server_stats_add(struct stats_info* total, struct stats_info* a)
+{
+ total->svr.num_queries += a->svr.num_queries;
+ total->svr.num_queries_missed_cache += a->svr.num_queries_missed_cache;
+ total->svr.sum_query_list_size += a->svr.sum_query_list_size;
+ /* the max size reached is upped to higher of both */
+ if(a->svr.max_query_list_size > total->svr.max_query_list_size)
+ total->svr.max_query_list_size = a->svr.max_query_list_size;
+
+ total->mesh_num_states += a->mesh_num_states;
+ total->mesh_num_reply_states += a->mesh_num_reply_states;
+ total->mesh_jostled += a->mesh_jostled;
+ total->mesh_dropped += a->mesh_dropped;
+ total->mesh_replies_sent += a->mesh_replies_sent;
+ timeval_add(&total->mesh_replies_sum_wait, &a->mesh_replies_sum_wait);
+ /* the medians are averaged together, this is not as accurate as
+ * taking the median over all of the data, but is good and fast
+ * added up here, division later*/
+ total->mesh_time_median += a->mesh_time_median;
}
size_t sum_query_list_size;
/** max value of query list size reached. */
size_t max_query_list_size;
- /** number of times that the query_list_size was insufficient */
- size_t num_query_list_exceeded;
+};
+
+/**
+ * Statistics to send over the control pipe when asked
+ * This struct is made to be memcpied, sent in binary.
+ */
+struct stats_info {
+ /** the thread stats */
+ struct server_stats svr;
+
+ /** mesh stats: current number of states */
+ size_t mesh_num_states;
+ /** mesh stats: current number of reply (user) states */
+ size_t mesh_num_reply_states;
+ /** mesh stats: number of reply states overwritten with a new one */
+ size_t mesh_jostled;
+ /** mesh stats: number of incoming queries dropped */
+ size_t mesh_dropped;
+ /** mesh stats: replies sent */
+ size_t mesh_replies_sent;
+ /** mesh stats: sum of waiting times for the replies */
+ struct timeval mesh_replies_sum_wait;
+ /** mesh stats: median of waiting times for replies (in sec) */
+ double mesh_time_median;
};
/**
void server_stats_querymiss(struct server_stats* stats, struct worker* worker);
/** display the stats to the log */
-void server_stats_log(struct server_stats* stats, int threadnum);
+void server_stats_log(struct server_stats* stats, struct worker* worker,
+ int threadnum);
+
+/**
+ * Obtain the stats info for a given thread. Uses pipe to communicate.
+ * @param worker: the worker that is executing (the first worker).
+ * @param who: on who to get the statistics info.
+ * @param s: the stats block to fill in.
+ */
+void server_stats_obtain(struct worker* worker, struct worker* who,
+ struct stats_info* s);
+
+/**
+ * Compile stats into structure for this thread worker.
+ * Also clears the statistics counters (if that is set by config file).
+ * @param worker: the worker to compile stats for, also the executing worker.
+ * @param s: stats block.
+ */
+void server_stats_compile(struct worker* worker, struct stats_info* s);
+
+/**
+ * Send stats over comm tube in reply to query cmd
+ * @param worker: this worker.
+ */
+void server_stats_reply(struct worker* worker);
+
+/**
+ * Addup stat blocks.
+ * @param total: sum of the two entries.
+ * @param a: to add to it.
+ */
+void server_stats_add(struct stats_info* total, struct stats_info* a);
#endif /* DAEMON_STATS_H */
void
worker_send_cmd(struct worker* worker, enum worker_commands cmd)
{
- uint32_t c = (uint32_t)cmd;
+ uint32_t c = (uint32_t)htonl(cmd);
if(!tube_write_msg(worker->cmd, (uint8_t*)&c, sizeof(c), 0)) {
log_err("worker send cmd %d failed", (int)cmd);
}
verbose(VERB_ALGO, "got control cmd quit");
comm_base_exit(worker->base);
break;
+ case worker_cmd_stats:
+ verbose(VERB_ALGO, "got control cmd stats");
+ server_stats_reply(worker);
+ break;
default:
log_err("bad command %d", (int)cmd);
break;
/* protect our memory usage from storing reply addresses */
verbose(VERB_ALGO, "Too many requests queued. "
"dropping incoming query.");
- worker->stats.num_query_list_exceeded++;
+ worker->env.mesh->stats_dropped++;
comm_point_drop_reply(repinfo);
return 0;
}
void worker_stat_timer_cb(void* arg)
{
struct worker* worker = (struct worker*)arg;
- server_stats_log(&worker->stats, worker->thread_num);
+ server_stats_log(&worker->stats, worker, worker->thread_num);
mesh_stats(worker->env.mesh, "mesh has");
worker_mem_report(worker, NULL);
if(!worker->daemon->cfg->stat_cumulative) {
}
worker->daemon = daemon;
worker->thread_num = id;
- if(id != 0) {
- if(!(worker->cmd = tube_create())) {
- free(worker->ports);
- free(worker);
- return NULL;
- }
+ if(!(worker->cmd = tube_create())) {
+ free(worker->ports);
+ free(worker);
+ return NULL;
}
return worker;
}
worker_delete(worker);
return 0;
}
- if(worker->thread_num != 0) {
- /* start listening to commands */
- if(!tube_setup_bg_listen(worker->cmd, worker->base,
- &worker_handle_control_cmd, worker)) {
- log_err("could not create control compt.");
- worker_delete(worker);
- return 0;
- }
+ /* start listening to commands */
+ if(!tube_setup_bg_listen(worker->cmd, worker->base,
+ &worker_handle_control_cmd, worker)) {
+ log_err("could not create control compt.");
+ worker_delete(worker);
+ return 0;
}
worker->stat_timer = comm_timer_create(worker->base,
worker_stat_timer_cb, worker);
if(!worker)
return;
if(worker->env.mesh && verbosity >= VERB_OPS) {
- server_stats_log(&worker->stats, worker->thread_num);
+ server_stats_log(&worker->stats, worker, worker->thread_num);
mesh_stats(worker->env.mesh, "mesh has");
worker_mem_report(worker, NULL);
}
/** worker commands */
enum worker_commands {
/** make the worker quit */
- worker_cmd_quit
+ worker_cmd_quit,
+ /** obtain statistics */
+ worker_cmd_stats
};
/**
15 September 2008: Wouter
- working start, stop, reload commands for unbound-control.
- test for unbound-control working; better exit value for control.
+ - verbosity control via unbound-control.
+ - unbound-control stats.
12 September 2008: Wouter
- removed browser control mentions. Proto speccy.
stops the server.
reload
reloads the config file, and flushes the cache.
+verbosity <new value>
+ Change logging verbosity to new value.
stats
output is a list of [name]=[value] lines.
clears the counters.
local_data_add www.example.com. IN A 192.0.2.2
if no local_zone exists for it; a transparent zone with the same
name as the data is created.
-verbosity <new value>
- Change logging verbosity to new value.
.TP
.B reload
Reload the server. This flushes the cache and reads the config file fresh.
+.TP
+.B verbosity \fInumber
+Change verbosity value for logging. Same values as \fBverbosity\fR keyword in
+\fIunbound.conf\fR(5). This new setting lasts until the server is issued
+a reload (taken from config file again), or the next verbosity control command.
+.TP
+.B stats
+Print statistics. Resets the internal counters to zero, this can be
+controlled using the \fBstatistics\-cumulative\fR config statement.
+Statistics are printed with one [name]: [value] per line.
.SH "EXIT CODE"
The unbound-control program exits with status code 1 on error.
.SH "SET UP"
printf(" start start server; runs unbound(8)\n");
printf(" stop stops the server\n");
printf(" reload reloads the server\n");
+ printf(" stats print statistics\n");
+ printf(" verbosity [number] change logging detail\n");
printf("Version %s\n", PACKAGE_VERSION);
printf("BSD licensed, see LICENSE in source package for details.\n");
printf("Report bugs to %s\n", PACKAGE_BUGREPORT);
if(cfg->control_ifs)
svr = cfg->control_ifs->str;
else svr = "127.0.0.1";
+ /* config 0 addr (everything), means ask localhost */
+ if(strcmp(svr, "0.0.0.0") == 0)
+ svr = "127.0.0.1";
+ else if(strcmp(svr, "::0") == 0 ||
+ strcmp(svr, "0::0") == 0 ||
+ strcmp(svr, "0::") == 0 ||
+ strcmp(svr, "::") == 0)
+ svr = "::1";
}
if(strchr(svr, '@')) {
if(!extstrtoaddr(svr, &addr, &addrlen))