From: Arran Cudbard-Bell Date: Thu, 13 Jul 2023 22:04:47 +0000 (-0600) Subject: Fix hangs/crashes on exit if thread instantiation fails X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=f724a53204bb1179a91b43669503f1651c69b751;p=thirdparty%2Ffreeradius-server.git Fix hangs/crashes on exit if thread instantiation fails --- diff --git a/src/bin/radiusd.c b/src/bin/radiusd.c index d64254710c2..872f1fc21a8 100644 --- a/src/bin/radiusd.c +++ b/src/bin/radiusd.c @@ -27,6 +27,7 @@ * @copyright 2000 Jeff Carneal (jeff@apex.net) * @copyright 2000 Chad Miller (cmiller@surfsouth.com) */ +#include "lib/util/strerror.h" RCSID("$Id$") #include @@ -849,6 +850,10 @@ int main(int argc, char *argv[]) el = main_loop_event_list(); } + /* + * Fix spurious messages + */ + fr_strerror_clear(); sc = fr_schedule_create(NULL, el, &default_log, fr_debug_lvl, thread_instantiate, thread_detach, schedule); if (!sc) { diff --git a/src/lib/io/network.c b/src/lib/io/network.c index 08a4fa0220a..529f731c7c9 100644 --- a/src/lib/io/network.c +++ b/src/lib/io/network.c @@ -1507,6 +1507,12 @@ static void fr_network_worker_started_callback(void *ctx, void const *data, size fr_channel_requestor_uctx_add(w->channel, w); fr_channel_set_recv_reply(w->channel, nr, fr_network_recv_reply); + /* + * FIXME: This creates a race in the network loop + * exit condition, because it can theoretically + * be signalled to exit before the workers have + * ACKd channel creation. + */ nr->num_workers++; /* @@ -1773,7 +1779,20 @@ static void _signal_pipe_read(UNUSED fr_event_list_t *el, int fd, UNUSED int fla */ void fr_network(fr_network_t *nr) { - while (likely((nr->num_workers > 0) || !nr->exiting)) { + /* + * Run until we're told to exit AND the number of + * workers has dropped to zero. + * + * This is important as if we exit too early we + * free the channels out from underneath the + * workers and they read uninitialised memory. + * + * Whenever a worker ACKs our close notification + * nr->num_workers is decremented, so when + * nr->num_workers == 0, all workers have ACKd + * our close and are no longer using the channel. + */ + while (likely(!(nr->exiting && (nr->num_workers == 0)))) { bool wait_for_event; int num_events; diff --git a/src/lib/io/schedule.c b/src/lib/io/schedule.c index 46072a21016..eb4ec47bf02 100644 --- a/src/lib/io/schedule.c +++ b/src/lib/io/schedule.c @@ -211,7 +211,7 @@ static void *fr_schedule_worker_thread(void *arg) if (!cs) cs = cf_section_find(sc->cs, "worker", NULL); if (sc->worker_thread_instantiate(sw->ctx, sw->el, cs) < 0) { - PERROR("%s - Failed calling thread instantiate", worker_name); + PERROR("%s - Worker thread instantiation failed", worker_name); goto fail; } } @@ -461,7 +461,7 @@ fr_schedule_t *fr_schedule_create(TALLOC_CTX *ctx, fr_event_list_t *el, if (!subcs) subcs = cf_section_find(sc->cs, "worker", NULL); if (sc->worker_thread_instantiate(sc->single_worker, el, subcs) < 0) { - PERROR("Failed calling thread instantiate"); + PERROR("Worker thread instantiation failed"); destroy_both: fr_network_destroy(sc->single_network); fr_worker_destroy(sc->single_worker); diff --git a/src/lib/io/worker.c b/src/lib/io/worker.c index f83c3300eaa..bc722fcf1df 100644 --- a/src/lib/io/worker.c +++ b/src/lib/io/worker.c @@ -331,6 +331,9 @@ static void worker_channel_callback(void *ctx, void const *data, size_t data_siz ms = fr_channel_responder_uctx_get(ch); + fr_assert_msg(fr_dlist_num_elements(&worker->channel[i].dlist) == 0, + "Network added messages to channel after sending FR_CHANNEL_CLOSE"); + fr_channel_responder_ack_close(ch); fr_assert(ms != NULL); fr_message_set_gc(ms); @@ -1046,6 +1049,11 @@ void fr_worker_destroy(fr_worker_t *worker) for (i = 0; i < worker->config.max_channels; i++) { if (!worker->channel[i].ch) continue; + worker_requests_cancel(&worker->channel[i]); + + fr_assert_msg(fr_dlist_num_elements(&worker->channel[i].dlist) == 0, + "Pending messages in channel after cancelling request"); + fr_channel_responder_ack_close(worker->channel[i].ch); } diff --git a/src/lib/server/module.c b/src/lib/server/module.c index ccf95e71068..1f159a395c1 100644 --- a/src/lib/server/module.c +++ b/src/lib/server/module.c @@ -35,6 +35,7 @@ RCSID("$Id$") #include #include #include +#include #include /** Heap of all lists/modules used to get a common index with module_thread_inst_list @@ -645,6 +646,11 @@ int modules_thread_instantiate(TALLOC_CTX *ctx, module_list_t const *ml, fr_even } } + /* + * So we don't get spurious errors + */ + fr_strerror_clear(); + DEBUG4("Worker alloced %s thread instance data (%p/%p)", ti->mi->module->name, ti, ti->data); if (mi->module->thread_instantiate && mi->module->thread_instantiate(MODULE_THREAD_INST_CTX(mi->dl_inst, ti->data, el)) < 0) {