]> git.ipfire.org Git - thirdparty/freeradius-server.git/commitdiff
Fix hangs/crashes on exit if thread instantiation fails
authorArran Cudbard-Bell <a.cudbardb@freeradius.org>
Thu, 13 Jul 2023 22:04:47 +0000 (16:04 -0600)
committerArran Cudbard-Bell <a.cudbardb@freeradius.org>
Thu, 13 Jul 2023 22:04:47 +0000 (16:04 -0600)
src/bin/radiusd.c
src/lib/io/network.c
src/lib/io/schedule.c
src/lib/io/worker.c
src/lib/server/module.c

index d64254710c28f9229111dcc381ca4e27a6839c1b..872f1fc21a84ae3cc1df85d63e8339eba4d53658 100644 (file)
@@ -27,6 +27,7 @@
  * @copyright 2000 Jeff Carneal (jeff@apex.net)
  * @copyright 2000 Chad Miller (cmiller@surfsouth.com)
  */
+#include "lib/util/strerror.h"
 RCSID("$Id$")
 
 #include <freeradius-devel/server/base.h>
@@ -849,6 +850,10 @@ int main(int argc, char *argv[])
                        el = main_loop_event_list();
                }
 
+               /*
+                *      Fix spurious messages
+                */
+               fr_strerror_clear();
                sc = fr_schedule_create(NULL, el, &default_log, fr_debug_lvl,
                                        thread_instantiate, thread_detach, schedule);
                if (!sc) {
index 08a4fa0220a91a98ea26270c21d4bf3cb233fa41..529f731c7c9c9f22dd7655b7f73437420afeda14 100644 (file)
@@ -1507,6 +1507,12 @@ static void fr_network_worker_started_callback(void *ctx, void const *data, size
        fr_channel_requestor_uctx_add(w->channel, w);
        fr_channel_set_recv_reply(w->channel, nr, fr_network_recv_reply);
 
+       /*
+        *      FIXME: This creates a race in the network loop
+        *      exit condition, because it can theoretically
+        *      be signalled to exit before the workers have
+        *      ACKd channel creation.
+        */
        nr->num_workers++;
 
        /*
@@ -1773,7 +1779,20 @@ static void _signal_pipe_read(UNUSED fr_event_list_t *el, int fd, UNUSED int fla
  */
 void fr_network(fr_network_t *nr)
 {
-       while (likely((nr->num_workers > 0) || !nr->exiting)) {
+       /*
+        *      Run until we're told to exit AND the number of
+        *      workers has dropped to zero.
+        *
+        *      This is important as if we exit too early we
+        *      free the channels out from underneath the
+        *      workers and they read uninitialised memory.
+        *
+        *      Whenever a worker ACKs our close notification
+        *      nr->num_workers is decremented, so when
+        *      nr->num_workers == 0, all workers have ACKd
+        *      our close and are no longer using the channel.
+        */
+       while (likely(!(nr->exiting && (nr->num_workers == 0)))) {
                bool wait_for_event;
                int num_events;
 
index 46072a21016f62d20e7d1a07e5b44960a0eddd0d..eb4ec47bf0228a87b36c1f8c69b6d8e8f919a280 100644 (file)
@@ -211,7 +211,7 @@ static void *fr_schedule_worker_thread(void *arg)
                if (!cs) cs = cf_section_find(sc->cs, "worker", NULL);
 
                if (sc->worker_thread_instantiate(sw->ctx, sw->el, cs) < 0) {
-                       PERROR("%s - Failed calling thread instantiate", worker_name);
+                       PERROR("%s - Worker thread instantiation failed", worker_name);
                        goto fail;
                }
        }
@@ -461,7 +461,7 @@ fr_schedule_t *fr_schedule_create(TALLOC_CTX *ctx, fr_event_list_t *el,
                        if (!subcs) subcs = cf_section_find(sc->cs, "worker", NULL);
 
                        if (sc->worker_thread_instantiate(sc->single_worker, el, subcs) < 0) {
-                               PERROR("Failed calling thread instantiate");
+                               PERROR("Worker thread instantiation failed");
                        destroy_both:
                                fr_network_destroy(sc->single_network);
                                fr_worker_destroy(sc->single_worker);
index f83c3300eaa0f94665d8ed66d5554ba446f9a019..bc722fcf1dfc2fcc187dda0dad6a0f2d526cac7d 100644 (file)
@@ -331,6 +331,9 @@ static void worker_channel_callback(void *ctx, void const *data, size_t data_siz
 
                        ms = fr_channel_responder_uctx_get(ch);
 
+                       fr_assert_msg(fr_dlist_num_elements(&worker->channel[i].dlist) == 0,
+                                     "Network added messages to channel after sending FR_CHANNEL_CLOSE");
+
                        fr_channel_responder_ack_close(ch);
                        fr_assert(ms != NULL);
                        fr_message_set_gc(ms);
@@ -1046,6 +1049,11 @@ void fr_worker_destroy(fr_worker_t *worker)
        for (i = 0; i < worker->config.max_channels; i++) {
                if (!worker->channel[i].ch) continue;
 
+               worker_requests_cancel(&worker->channel[i]);
+
+               fr_assert_msg(fr_dlist_num_elements(&worker->channel[i].dlist) == 0,
+                             "Pending messages in channel after cancelling request");
+
                fr_channel_responder_ack_close(worker->channel[i].ch);
        }
 
index ccf95e710681196d53067e64c89002c8c75c57d1..1f159a395c13bd2b140c5987ddd378356f642b90 100644 (file)
@@ -35,6 +35,7 @@ RCSID("$Id$")
 #include <freeradius-devel/server/module_rlm.h>
 #include <freeradius-devel/server/radmin.h>
 #include <freeradius-devel/server/request_data.h>
+#include <freeradius-devel/util/strerror.h>
 #include <freeradius-devel/unlang/xlat_func.h>
 
 /** Heap of all lists/modules used to get a common index with module_thread_inst_list
@@ -645,6 +646,11 @@ int modules_thread_instantiate(TALLOC_CTX *ctx, module_list_t const *ml, fr_even
                        }
                }
 
+               /*
+                *      So we don't get spurious errors
+                */
+               fr_strerror_clear();
+
                DEBUG4("Worker alloced %s thread instance data (%p/%p)", ti->mi->module->name, ti, ti->data);
                if (mi->module->thread_instantiate &&
                    mi->module->thread_instantiate(MODULE_THREAD_INST_CTX(mi->dl_inst, ti->data, el)) < 0) {