resources are not held by the threads and can be deleted.
return "reload_stop";
case fast_reload_notification_reload_ack:
return "reload_ack";
+ case fast_reload_notification_reload_nopause_poll:
+ return "reload_nopause_poll";
case fast_reload_notification_reload_start:
return "reload_start";
default:
log_err("gettimeofday: %s", strerror(errno));
/* Delete old. */
+ if(fr_poll_for_quit(fr)) {
+ config_delete(newcfg);
+ fr_construct_clear(&ct);
+ return 1;
+ }
+ if(fr->fr_nopause) {
+ /* Poll every thread, with a no-work poll item over the
+ * command pipe. This makes the worker thread surely move
+ * to deal with that event, and thus the thread is no longer
+ * holding, eg. a string item from the old config struct.
+ * And then the old config struct can safely be deleted.
+ * Only needed when nopause is used, because without that
+ * the worker threads are already waiting on a command pipe
+ * item. This nopause command pipe item does not take work,
+ * it returns immediately, so it does not delay the workers.
+ * They can be polled one at a time. But its processing causes
+ * the worker to have released data items from old config. */
+ fr_send_notification(fr,
+ fast_reload_notification_reload_nopause_poll);
+ fr_poll_for_ack(fr);
+ }
config_delete(newcfg);
fr_construct_clear(&ct);
return 1;
verbose(VERB_ALGO, "worker resume after reload");
}
+/** Fast reload, the main thread performs the nopause poll. It polls every
+ * other worker thread briefly over the command pipe ipc. The command takes
+ * no time for the worker, it can return immediately. After that it sends
+ * an acknowledgement to the fastreload thread. */
+static void
+fr_main_perform_reload_nopause_poll(struct fast_reload_thread* fr)
+{
+ struct daemon* daemon = fr->worker->daemon;
+ int i;
+
+ /* Send the reload_poll to other threads. They can respond
+ * one at a time. */
+ for(i=0; i<daemon->num; i++) {
+ if(i == fr->worker->thread_num)
+ continue; /* Do not send to ourselves. */
+ worker_send_cmd(daemon->workers[i], worker_cmd_reload_poll);
+ }
+
+ /* Wait for the other threads to ack. */
+ fr_read_ack_from_workers(fr);
+
+ /* Send ack to fast reload thread. */
+ fr_send_cmd_to(fr, fast_reload_notification_reload_ack, 0, 1);
+}
+
/** Fast reload, perform the command received from the fast reload thread */
static void
fr_main_perform_cmd(struct fast_reload_thread* fr,
fr_main_perform_done(fr);
} else if(status == fast_reload_notification_reload_stop) {
fr_main_perform_reload_stop(fr);
+ } else if(status == fast_reload_notification_reload_nopause_poll) {
+ fr_main_perform_reload_nopause_poll(fr);
} else {
log_err("main received unknown status from fast reload: %d %s",
(int)status, fr_notification_to_string(status));
/** ack the stop as part of the reload */
fast_reload_notification_reload_ack = 7,
/** resume from stop as part of the reload */
- fast_reload_notification_reload_start = 8
+ fast_reload_notification_reload_start = 8,
+ /** the fast reload thread wants the mainthread to poll workers,
+ * after the reload, sent when nopause is used */
+ fast_reload_notification_reload_nopause_poll = 9
};
/**
return;
}
-/** stop and wait to resume the worker */
+/**
+ * Send fast-reload acknowledgement to the mainthread in one byte.
+ * This signals that this works has received the previous command.
+ * The worker is waiting if that is after a reload_stop command.
+ * Or the worker has briefly processed the event itself, and in doing so
+ * released data pointers to old config, after a reload_poll command.
+ */
static void
-worker_stop_and_wait(struct worker* worker)
+worker_send_reload_ack(struct worker* worker)
{
+ /* If this is clipped to 8 bits because thread_num>255, then that
+ * is not a problem, the receiver counts the number of bytes received.
+ * The number is informative only. */
uint8_t c = (uint8_t)worker->thread_num;
ssize_t ret;
- uint8_t* buf = NULL;
- uint32_t len = 0, cmd;
while(1) {
ret = send(worker->daemon->fast_reload_thread->commreload[1],
&c, 1, 0);
}
break;
}
+}
+
+/** stop and wait to resume the worker */
+static void
+worker_stop_and_wait(struct worker* worker)
+{
+ uint8_t* buf = NULL;
+ uint32_t len = 0, cmd;
+ worker_send_reload_ack(worker);
/* wait for reload */
if(!tube_read_msg(worker->cmd, &buf, &len, 0)) {
log_err("worker reload read reply failed");
verbose(VERB_ALGO, "got control cmd reload_stop");
worker_stop_and_wait(worker);
break;
+ case worker_cmd_reload_poll:
+ verbose(VERB_ALGO, "got control cmd reload_poll");
+ worker_send_reload_ack(worker);
+ break;
default:
log_err("bad command %d", (int)cmd);
break;
/** for fast-reload, perform stop */
worker_cmd_reload_stop,
/** for fast-reload, start again */
- worker_cmd_reload_start
+ worker_cmd_reload_start,
+ /** for fast-reload, poll to make sure worker has released data */
+ worker_cmd_reload_poll
};
/**
or new values together. The option makes the reload time take eg. 3
microseconds instead of 0.3 milliseconds during which the worker threads are
interrupted. So, the interruption is much shorter, at the expense of some
-inconsistency.
+inconsistency. After the reload itself, every worker thread is briefly
+contacted to make them release resources, this makes the delete timing
+a little longer, and takes up time from the remote control servicing
+worker thread.
.IP
The '+d' option makes the reload drop queries that the worker threads are
working on. This is like flush_requestlist. Without it the queries are kept