]> git.ipfire.org Git - pakfire.git/commitdiff
daemon: Connect the worker with the daemon
authorMichael Tremer <michael.tremer@ipfire.org>
Fri, 4 Oct 2024 12:41:30 +0000 (12:41 +0000)
committerMichael Tremer <michael.tremer@ipfire.org>
Fri, 4 Oct 2024 12:41:30 +0000 (12:41 +0000)
The daemon will now process when the worker exits and at least clean up.

Signed-off-by: Michael Tremer <michael.tremer@ipfire.org>
src/libpakfire/daemon.c
src/libpakfire/include/pakfire/daemon.h
src/libpakfire/include/pakfire/worker.h
src/libpakfire/worker.c

index d1367e0fd46a9ddfad28f1cfac5d0259eb560258..839051d7a5b9d7db398d3a97d20d4b1f13d7e558 100644 (file)
@@ -344,21 +344,13 @@ static int pakfire_daemon_job(struct pakfire_daemon* daemon, json_object* m) {
        }
 
        // Create a new worker
-       r = pakfire_worker_create(&worker, daemon->ctx, data);
+       r = pakfire_worker_create(&worker, daemon->ctx, daemon, data);
        if (r) {
                CTX_ERROR(daemon->ctx, "Could not create a new worker: %s\n", strerror(-r));
 
                goto ERROR;
        }
 
-       // Launch the worker
-       r = pakfire_worker_launch(worker, daemon->loop);
-       if (r < 0) {
-               CTX_ERROR(daemon->ctx, "Could not launch worker: %s\n", strerror(-r));
-
-               goto ERROR;
-       }
-
        // Store the worker
        for (unsigned int i = 0; i < MAX_WORKERS; i++) {
                if (!daemon->workers[i]) {
@@ -367,6 +359,14 @@ static int pakfire_daemon_job(struct pakfire_daemon* daemon, json_object* m) {
                }
        }
 
+       // Launch the worker
+       r = pakfire_worker_launch(worker);
+       if (r < 0) {
+               CTX_ERROR(daemon->ctx, "Could not launch worker: %s\n", strerror(-r));
+
+               goto ERROR;
+       }
+
        // Increment the number of running workers
        daemon->running_workers++;
 
@@ -535,6 +535,11 @@ ERROR:
        return r;
 }
 
+// Currently we are not doing anything here. We just need to block SIGCHLD.
+static int pakfire_daemon_SIGCHLD(sd_event_source* s, const struct signalfd_siginfo* si, void* data) {
+       return 0;
+}
+
 static int pakfire_daemon_setup_loop(struct pakfire_daemon* daemon) {
        int r;
 
@@ -568,6 +573,14 @@ static int pakfire_daemon_setup_loop(struct pakfire_daemon* daemon) {
                return r;
        }
 
+       // Listen for SIGCHLD
+       r = sd_event_add_signal(daemon->loop, NULL, SIGCHLD|SD_EVENT_SIGNAL_PROCMASK,
+               pakfire_daemon_SIGCHLD, daemon);
+       if (r < 0) {
+               CTX_ERROR(daemon->ctx, "Could not register handling SIGCHLD: %s\n", strerror(-r));
+               return r;
+       }
+
        // Setup the reconnection timer
        r = sd_event_add_time_relative(daemon->loop, &daemon->connect_timer,
                CLOCK_MONOTONIC, 0, 0, pakfire_daemon_connect, daemon);
@@ -710,3 +723,43 @@ ERROR:
 int pakfire_daemon_main(struct pakfire_daemon* daemon) {
        return pakfire_daemon_loop(daemon);
 }
+
+/*
+       Called after a new worker has been launched to register with the daemon's event loop.
+*/
+int pakfire_daemon_worker_launched(struct pakfire_daemon* daemon,
+               struct pakfire_worker* worker, int pidfd) {
+       int r;
+
+       CTX_DEBUG(daemon->ctx, "Registering worker %p with PIDFD %d\n", worker, pidfd);
+
+       r = sd_event_add_child_pidfd(daemon->loop, NULL, pidfd,
+               WEXITED, pakfire_worker_exited, worker);
+       if (r < 0) {
+               CTX_DEBUG(daemon->ctx, "Could not register the worker with the event loop: %s\n", strerror(-r));
+
+               return r;
+       }
+
+       return 0;
+}
+
+/*
+       Called after a worker has exited.
+*/
+int pakfire_daemon_worker_exited(struct pakfire_daemon* daemon, struct pakfire_worker* worker) {
+       CTX_DEBUG(daemon->ctx, "Deregistering worker %p\n", worker);
+
+       for (unsigned int i = 0; i < MAX_WORKERS; i++) {
+               if (daemon->workers[i] != worker)
+                       continue;
+
+               // Dereference the worker and clear the slot
+               pakfire_worker_unref(daemon->workers[i]);
+               daemon->workers[i] = NULL;
+
+               break;
+       }
+
+       return 0;
+}
index ee5bcbad5d9bbdf1c72bbb00c8f68ce22d49668c..b49d2bf065ce95c363586050b887f6d69bae95fd 100644 (file)
@@ -26,6 +26,7 @@
 struct pakfire_daemon;
 
 #include <pakfire/ctx.h>
+#include <pakfire/worker.h>
 
 int pakfire_daemon_create(struct pakfire_daemon** daemon, struct pakfire_ctx* ctx);
 
@@ -34,5 +35,10 @@ struct pakfire_daemon* pakfire_daemon_unref(struct pakfire_daemon* daemon);
 
 int pakfire_daemon_main(struct pakfire_daemon* daemon);
 
+int pakfire_daemon_worker_launched(
+       struct pakfire_daemon* daemon, struct pakfire_worker* worker, int pidfd);
+int pakfire_daemon_worker_exited(
+       struct pakfire_daemon* daemon, struct pakfire_worker* worker);
+
 #endif /* PAKFIRE_PRIVATE */
 #endif /* PAKFIRE_DAEMON_H */
index e6bd146748bbe0f2234e2b3c0c445b2cd12770e3..de0568d86d07108f15ca83da511d30bb65fef81a 100644 (file)
 
 #ifdef PAKFIRE_PRIVATE
 
+#include <pakfire/ctx.h>
+#include <pakfire/daemon.h>
+
 struct pakfire_worker;
 
 int pakfire_worker_create(struct pakfire_worker** worker,
-       struct pakfire_ctx* ctx, json_object* data);
+       struct pakfire_ctx* ctx, struct pakfire_daemon* daemon, json_object* data);
 
 struct pakfire_worker* pakfire_worker_ref(struct pakfire_worker* worker);
 struct pakfire_worker* pakfire_worker_unref(struct pakfire_worker* worker);
 
+// Launch
+int pakfire_worker_launch(struct pakfire_worker* worker);
+
+int pakfire_worker_exited(sd_event_source* s, const siginfo_t* si, void* data);
+
 #endif /* PAKFIRE_PRIVATE */
 
 #endif /* PAKFIRE_WORKER_H */
index b553769e913ec31aa26ca8f51e02101635682e49..7bf60b4d8ca39c8d16f3ece56f319a0bdcba315e 100644 (file)
 
 #include <errno.h>
 #include <limits.h>
+#include <linux/sched.h>
 
 #include <json.h>
 
 #include <uuid/uuid.h>
 
+#include <systemd/sd-event.h>
+
 #include <pakfire/constants.h>
 #include <pakfire/ctx.h>
+#include <pakfire/daemon.h>
+#include <pakfire/logging.h>
 #include <pakfire/string.h>
 #include <pakfire/worker.h>
+#include <pakfire/util.h>
 
 struct pakfire_worker {
        struct pakfire_ctx* ctx;
        int nrefs;
 
+       struct pakfire_daemon* daemon;
+
        uuid_t job_id;
 
        char name[NAME_MAX];
@@ -53,6 +61,12 @@ struct pakfire_worker {
 
        // Configuration
        char conf[4096];
+
+       // PID File Descriptor
+       int pidfd;
+
+       // Process Event
+       sd_event_source* process;
 };
 
 static int pakfire_parse_job(struct pakfire_worker* worker, json_object* data) {
@@ -180,12 +194,15 @@ static int pakfire_parse_job(struct pakfire_worker* worker, json_object* data) {
 }
 
 static void pakfire_worker_free(struct pakfire_worker* worker) {
+       if (worker->daemon)
+               pakfire_daemon_unref(worker->daemon);
        if (worker->ctx)
                pakfire_ctx_unref(worker->ctx);
        free(worker);
 }
 
-int pakfire_worker_create(struct pakfire_worker** worker, struct pakfire_ctx* ctx, json_object* data) {
+int pakfire_worker_create(struct pakfire_worker** worker, struct pakfire_ctx* ctx,
+               struct pakfire_daemon* daemon, json_object* data) {
        struct pakfire_worker* w = NULL;
        int r;
 
@@ -200,6 +217,9 @@ int pakfire_worker_create(struct pakfire_worker** worker, struct pakfire_ctx* ct
        // Initialize the reference counter
        w->nrefs = 1;
 
+       // Store a reference to the daemon
+       w->daemon = pakfire_daemon_ref(daemon);
+
        // Parse the job
        r = pakfire_parse_job(w, data);
        if (r)
@@ -229,3 +249,95 @@ struct pakfire_worker* pakfire_worker_unref(struct pakfire_worker* worker) {
        pakfire_worker_free(worker);
        return NULL;
 }
+
+/*
+
+*/
+static int pakfire_worker_parent(struct pakfire_worker* worker) {
+       int r;
+
+       // Tell the daemon process that the worker has been launched
+       r = pakfire_daemon_worker_launched(worker->daemon, worker, worker->pidfd);
+       if (r < 0)
+               return r;
+
+       return 0;
+}
+
+static int pakfire_worker_child(struct pakfire_worker* worker) {
+       // Fetch our PID
+       pid_t pid = getpid();
+
+       CTX_DEBUG(worker->ctx, "Launched worker child as PID %d\n", pid);
+
+       // XXX TODO
+
+       return 0;
+}
+
+/*
+       Launches the worker and returns the pidfd
+*/
+int pakfire_worker_launch(struct pakfire_worker* worker) {
+       char job_id[UUID_STR_LEN];
+       int pid;
+       int r;
+
+       // Format the job ID as string
+       uuid_unparse(worker->job_id, job_id);
+
+       CTX_DEBUG(worker->ctx, "Launching worker %s\n", job_id);
+
+       // Configure child process
+       struct clone_args args = {
+               .flags = CLONE_PIDFD,
+               .exit_signal = SIGCHLD,
+               .pidfd = (long long unsigned int)&worker->pidfd,
+       };
+
+       // Fork this process
+       pid = clone3(&args, sizeof(args));
+       if (pid < 0) {
+               CTX_ERROR(worker->ctx, "Could not clone: %m\n");
+
+               return -errno;
+
+       // Child process
+       } else if (pid == 0) {
+               r = pakfire_worker_child(worker);
+               _exit(r);
+       }
+
+       return pakfire_worker_parent(worker);
+}
+
+/*
+       This method is triggered by SIGCHLD whenever the worker exits.
+*/
+int pakfire_worker_exited(sd_event_source* s, const siginfo_t* si, void* data) {
+       struct pakfire_worker* worker = data;
+       char job_id[UUID_STR_LEN];
+       int r;
+
+       // Format the job ID as string
+       uuid_unparse(worker->job_id, job_id);
+
+       switch (si->si_code) {
+               case CLD_EXITED:
+                       CTX_DEBUG(worker->ctx, "Worker %s has exited with status code %d\n",
+                               job_id, si->si_status);
+                       break;
+
+               case CLD_KILLED:
+                       CTX_ERROR(worker->ctx, "Worker %s has been killed by signal %d\n",
+                               job_id, si->si_signo);
+                       break;
+       }
+
+       // Let the daemon know this worker has exited
+       r = pakfire_daemon_worker_exited(worker->daemon, worker);
+       if (r)
+               return r;
+
+       return 0;
+}