]> git.ipfire.org Git - thirdparty/libcgroup.git/commitdiff
systemd: Add function to create a systemd scope
authorTom Hromatka <tom.hromatka@oracle.com>
Wed, 26 Oct 2022 16:14:27 +0000 (10:14 -0600)
committerTom Hromatka <tom.hromatka@oracle.com>
Tue, 1 Nov 2022 21:20:26 +0000 (15:20 -0600)
Add a function, cgroup_create_scope(), to create a systemd
scope.  This scope can be delegated, in other words the cgroup
management of this scope can be delegated away from systemd.
This is the official way to create a cgroup that systemd will
not interfere with.

Signed-off-by: Tom Hromatka <tom.hromatka@oracle.com>
Reviewed-by: Kamalesh Babulal <kamalesh.babulal@oracle.com>
.github/actions/setup-libcgroup/action.yml
configure.ac
include/Makefile.am
include/libcgroup.h
include/libcgroup/systemd.h [new file with mode: 0644]
src/.gitignore
src/Makefile.am
src/libcgroup.map
src/libcgroup_systemd_idle_thread.c [new file with mode: 0644]
src/systemd.c [new file with mode: 0644]

index 6f6c7ab557bfb60443db17f5fac2e4ce32227387..29d3f85ebf4c14282819920295c2f9c7d2df065f 100644 (file)
@@ -13,7 +13,7 @@ runs:
   steps:
   - run: sudo apt-get update
     shell: bash
-  - run: sudo apt-get install libpam-dev lcov python3-pip python3-dev cmake bison flex byacc g++ autoconf automake libtool -y
+  - run: sudo apt-get install libpam-dev lcov python3-pip python3-dev cmake bison flex byacc g++ autoconf automake libtool libsystemd-dev -y
     shell: bash
   - run: sudo pip install cython
     shell: bash
index c31e7c85c8bffb72208c34cdfdd71f0ef3fe1276..2856a1747c448e9f48312e7e3f0da19a8e11c3e3 100644 (file)
@@ -84,6 +84,18 @@ AC_DEFINE_UNQUOTED([ENABLE_PYTHON],
        [$(test "$enable_python" = yes && echo 1 || echo 0)],
        [Python bindings build flag.])
 
+AC_ARG_ENABLE([systemd],
+       [AS_HELP_STRING([--enable-systemd],[enable systemd support [default=yes]])],
+       [
+               if test "x$enableval" = xno; then
+                       with_systemd=false
+               else
+                       with_systemd=true
+               fi
+       ],
+       [with_systemd=true])
+AM_CONDITIONAL([WITH_SYSTEMD], [test x$with_systemd = xtrue])
+
 AC_ARG_ENABLE([initscript-install],
        [AS_HELP_STRING([--enable-initscript-install],[install init scripts [default=no]])],
        [
index 24e5bac91d3e3ab4aba0dd51a11a11ad7c846d33..23cebaac7539648c7a293df2009a6c5ce20be0bf 100644 (file)
@@ -3,3 +3,7 @@ nobase_include_HEADERS = libcgroup.h libcgroup/error.h libcgroup/init.h \
                         libcgroup/groups.h libcgroup/tasks.h \
                         libcgroup/iterators.h libcgroup/config.h \
                         libcgroup/log.h libcgroup/tools.h
+
+if WITH_SYSTEMD
+nobase_include_HEADERS += libcgroup/systemd.h
+endif
index 3f7c759c3a9b7fad48532f2e5095c07166120dcc..eddb356f6787f9852624e8a40f652930831ed6b3 100644 (file)
@@ -18,6 +18,7 @@
 #include <libcgroup/config.h>
 #include <libcgroup/log.h>
 #include <libcgroup/tools.h>
+#include <libcgroup/systemd.h>
 
 #undef _LIBCGROUP_H_INSIDE
 
diff --git a/include/libcgroup/systemd.h b/include/libcgroup/systemd.h
new file mode 100644 (file)
index 0000000..f12772a
--- /dev/null
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: LGPL-2.1-only */
+#ifndef _LIBCGROUP_SYSTEMD_H
+#define _LIBCGROUP_SYSTEMD_H
+
+#ifndef _LIBCGROUP_H_INSIDE
+#error "Only <libcgroup.h> should be included directly."
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum cgroup_systemd_mode_t {
+       CGROUP_SYSTEMD_MODE_FAIL = 0,
+       CGROUP_SYSTEMD_MODE_REPLACE,
+       CGROUP_SYSTEMD_MODE_ISOLATE,
+       CGROUP_SYSTEMD_MODE_IGNORE_DEPS,
+       CGROUP_SYSTEMD_MODE_IGNORE_REQS,
+
+       CGROUP_SYSTEMD_MODE_CNT,
+       CGROUP_SYSTEMD_MODE_DFLT = CGROUP_SYSTEMD_MODE_REPLACE
+};
+
+/**
+ * Options associated with creating a systemd scope
+ */
+struct cgroup_systemd_scope_opts {
+       /** should systemd delegate this cgroup or not.  1 == yes, 0 == no */
+       int delegated;
+       /** systemd behavior when the scope already exists */
+       enum cgroup_systemd_mode_t mode;
+       /** pid to be placed in the cgroup.  if 0, libcgroup will create a dummy process */
+       pid_t pid;
+};
+
+/**
+ * Populate the scope options structure with default values
+ *
+ * @param opts Scope creation options structure instance.  Must already be allocated
+ *
+ * @return 0 on success and > 0 on error
+ */
+int cgroup_set_default_scope_opts(struct cgroup_systemd_scope_opts * const opts);
+
+/**
+ * Create a systemd scope under the specified slice
+ *
+ * @param scope_name Name of the scope, must end in .scope
+ * @param slice_name Name of the slice, must end in .slice
+ * @param opts Scope creation options structure instance
+ *
+ * @return 0 on success and > 0 on error
+ */
+int cgroup_create_scope(const char * const scope_name, const char * const slice_name,
+                       const struct cgroup_systemd_scope_opts * const opts);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* _LIBCGROUP_SYSTEMD_H */
index 149254627a41bdbcad12f40a74e19ba5e73f69ab..ed9b4ecf0f97f63b412e6e0787642f6b7954e8c8 100644 (file)
@@ -1,3 +1,4 @@
+libcgroup_systemd_idle_thread
 lex.c
 parse.c
 parse.h
index 6a838f1ea6f023b18a89a07f73ded5707a1c92f1..da02b013d174f4d26b26ee18d7b2dc64f74918d3 100644 (file)
@@ -18,22 +18,43 @@ AM_CPPFLAGS = -I$(top_srcdir)/include
 VERSION_NUMBER = $(LIBRARY_VERSION_MAJOR):$(LIBRARY_VERSION_MINOR):$(LIBRARY_VERSION_RELEASE)
 TESTING_MAP_FILE = $(top_srcdir)/tests/gunit/libcgroup_unittest.map
 
+if WITH_SYSTEMD
+libcgroup_systemd_idle_thread_SOURCES = libcgroup_systemd_idle_thread.c
+bin_PROGRAMS = libcgroup_systemd_idle_thread
+endif
+
 lib_LTLIBRARIES = libcgroup.la
 libcgroup_la_SOURCES = parse.h parse.y lex.l api.c config.c libcgroup-internal.h libcgroup.map \
                       wrapper.c log.c abstraction-common.c abstraction-common.h \
                       abstraction-map.c abstraction-map.h abstraction-cpu.c abstraction-cpuset.c \
                       tools/cgxget.c tools/cgxset.c
+if WITH_SYSTEMD
+libcgroup_la_SOURCES += systemd.c
+endif
+
 libcgroup_la_LIBADD = -lpthread $(CODE_COVERAGE_LIBS)
 libcgroup_la_CFLAGS = $(CODE_COVERAGE_CFLAGS) -DSTATIC=static -DLIBCG_LIB -fPIC
+
 libcgroup_la_LDFLAGS = -Wl,--version-script,$(srcdir)/libcgroup.map \
                       -version-number $(VERSION_NUMBER)
+if WITH_SYSTEMD
+libcgroup_la_LDFLAGS += -lsystemd
+endif
 
 noinst_LTLIBRARIES = libcgroupfortesting.la
 libcgroupfortesting_la_SOURCES = parse.h parse.y lex.l api.c config.c libcgroup-internal.h \
                                 libcgroup.map wrapper.c log.c abstraction-common.c \
                                 abstraction-common.h abstraction-map.c abstraction-map.h \
                                 abstraction-cpu.c abstraction-cpuset.c
+if WITH_SYSTEMD
+libcgroupfortesting_la_SOURCES += systemd.c
+endif
+
 libcgroupfortesting_la_LIBADD = -lpthread $(CODE_COVERAGE_LIBS)
 libcgroupfortesting_la_CFLAGS = $(CODE_COVERAGE_CFLAGS) -DSTATIC= -DUNIT_TEST
+
 libcgroupfortesting_la_LDFLAGS = -Wl,--version-script,$(TESTING_MAP_FILE) \
                                 -version-number $(VERSION_NUMBER)
+if WITH_SYSTEMD
+libcgroupfortesting_la_LDFLAGS += -lsystemd
+endif
index 0cecba2fd465d9d5a20503d33e5e5c443301dd82..0d582982f7d9d92d3b63415d8cfb07c3e02ac916 100644 (file)
@@ -149,4 +149,6 @@ CGROUP_3.0 {
 
        /* libcgroup 3.0.1 */
        cgroup_setup_mode;
+       cgroup_create_scope;
+       cgroup_set_default_scope_opts;
 } CGROUP_2.0;
diff --git a/src/libcgroup_systemd_idle_thread.c b/src/libcgroup_systemd_idle_thread.c
new file mode 100644 (file)
index 0000000..1b3b06e
--- /dev/null
@@ -0,0 +1,11 @@
+#include <unistd.h>
+
+#define SECS_PER_DAY   (60 * 60 *24)
+
+int main(void)
+{
+       while(1)
+               sleep(1 * SECS_PER_DAY);
+
+       return 0;
+}
diff --git a/src/systemd.c b/src/systemd.c
new file mode 100644 (file)
index 0000000..cbd36c2
--- /dev/null
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: LGPL-2.1-only */
+/**
+ * Copyright (c) 2022 Oracle and/or its affiliates.
+ * Author: Tom Hromatka <tom.hromatka@oracle.com>
+ * Author: Silvia Chapa <silvia.chapa@oracle.com>
+ */
+
+#include <libcgroup-internal.h>
+#include <systemd/sd-bus.h>
+#include <libcgroup.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+
+#define USEC_PER_SEC 1000000
+
+static const char * const modes[] = {
+       "fail",                 /* CGROUP_SYSTEMD_MODE_FAIL */
+       "replace",              /* CGROUP_SYSTEMD_MODE_REPLACE */
+       "isolate",              /* CGROUP_SYSTEMD_MODE_ISOLATE */
+       "ignore-dependencies",  /* CGROUP_SYSTEMD_MODE_IGNORE_DEPS */
+       "ignore-requirements",  /* CGROUP_SYSTEMD_MODE_IGNORE_REQS */
+};
+static_assert((sizeof(modes) / sizeof(modes[0])) == CGROUP_SYSTEMD_MODE_CNT,
+             "modes[] array must be same length as CGROUP_SYSTEMD_MODE_CNT");
+
+static const char * const sender = "org.freedesktop.systemd1";
+static const char * const path = "/org/freedesktop/systemd1";
+static const char * const interface = "org.freedesktop.systemd1.Manager";
+
+int cgroup_set_default_scope_opts(struct cgroup_systemd_scope_opts * const opts)
+{
+       if (!opts)
+               return ECGINVAL;
+
+       opts->delegated = 1;
+       opts->mode = CGROUP_SYSTEMD_MODE_FAIL;
+       opts->pid = -1;
+
+       return 0;
+}
+
+/*
+ * Returns time elapsed in usec
+ *
+ * Inspired-by: https://github.com/cockpit-project/cockpit/blob/main/src/tls/socket-io.c#L39
+ */
+static int64_t elapsed_time(const struct timespec * const start, const struct timespec * const end)
+{
+       int64_t elapsed = (end->tv_sec - start->tv_sec) * 1000000 +
+                         (end->tv_nsec - start->tv_nsec) / 1000;
+
+       assert(elapsed >= 0);
+
+       return elapsed;
+}
+
+static int job_removed_callback(sd_bus_message *message, void *user_data, sd_bus_error *error)
+{
+       const char *result, *msg_path, *scope_name;
+       const char **job_path = user_data;
+       int ret;
+
+       ret = sd_bus_message_read(message, "uoss", NULL, &msg_path, &scope_name, &result);
+       if (ret < 0) {
+               cgroup_err("callback message read failed: %d\n", errno);
+               return 0;
+       }
+
+       if (*job_path == NULL || strcmp(msg_path, *job_path) != 0) {
+               cgroup_dbg("Received a systemd signal, but it was not our message\n");
+               return 0;
+       }
+
+       cgroup_dbg("Received JobRemoved signal for scope %s.  Result: %s\n", scope_name, result);
+
+       /*
+        * Use the job_path pointer as a way to inform the original thread that the job has
+        * completed.
+        */
+       *job_path = NULL;
+       return 0;
+}
+
+int cgroup_create_scope(const char * const scope_name, const char * const slice_name,
+                       const struct cgroup_systemd_scope_opts * const opts)
+{
+       sd_bus_message *msg = NULL, *reply = NULL;
+       sd_bus_error error = SD_BUS_ERROR_NULL;
+       const char *job_path = NULL;
+       struct timespec start, now;
+       sd_bus *bus = NULL;
+       pid_t child_pid;
+       int ret = 0;
+
+       if (!scope_name || !slice_name || !opts)
+               return ECGINVAL;
+
+       if (strcmp(&scope_name[strlen(scope_name) - strlen(".scope")], ".scope") != 0)
+               cgroup_warn("scope doesn't have expected suffix\n");
+       if (strcmp(&slice_name[strlen(slice_name) - strlen(".slice")], ".slice") != 0)
+               cgroup_warn("slice doesn't have expected suffix\n");
+
+       if (opts->mode >= CGROUP_SYSTEMD_MODE_CNT) {
+               cgroup_err("invalid systemd mode: %d\n", opts->mode);
+               return ECGINVAL;
+       }
+
+       if (opts->mode == CGROUP_SYSTEMD_MODE_ISOLATE ||
+           opts->mode == CGROUP_SYSTEMD_MODE_IGNORE_DEPS ||
+           opts->mode == CGROUP_SYSTEMD_MODE_IGNORE_REQS) {
+               cgroup_err("unsupported systemd mode: %d\n", opts->mode);
+               return ECGINVAL;
+       }
+
+       if (opts->pid < 0) {
+               child_pid = fork();
+               if (child_pid < 0) {
+                       cgroup_err("fork failed: %d\n", errno);
+                       return ECGOTHER;
+               }
+
+               if (child_pid == 0) {
+                       char *args[] = {"libcgroup_systemd_idle_thread", NULL};
+
+                       /*
+                        * Have the child sleep forever.  Systemd will delete the scope if
+                        * there isn't a running process in it.
+                        */
+                       execvp("libcgroup_systemd_idle_thread", args);
+                       /* The child process should never get here */
+                       cgroup_err("failed to create system idle thread.\n");
+                       return ECGOTHER;
+               }
+
+               cgroup_dbg("created libcgroup_system_idle thread pid %d\n", child_pid);
+       } else {
+               child_pid = opts->pid;
+       }
+       cgroup_dbg("pid %d will be placed in scope %s\n", child_pid, scope_name);
+
+       ret = sd_bus_default_system(&bus);
+       if (ret < 0) {
+               cgroup_err("failed to open the system bus: %d\n", errno);
+               goto out;
+       }
+
+       ret = sd_bus_match_signal(bus, NULL, sender, path, interface,
+                                 "JobRemoved", job_removed_callback, &job_path);
+       if (ret < 0) {
+               cgroup_err("failed to install match callback: %d\n", errno);
+               goto out;
+       }
+
+       ret = sd_bus_message_new_method_call(bus, &msg, sender, path, interface,
+                                            "StartTransientUnit");
+       if (ret < 0) {
+               cgroup_err("failed to create the systemd msg: %d\n", errno);
+               goto out;
+       }
+
+       ret = sd_bus_message_append(msg, "ss", scope_name, modes[opts->mode]);
+       if (ret < 0) {
+               cgroup_err("failed to append the scope name: %d\n", errno);
+               goto out;
+       }
+
+       ret = sd_bus_message_open_container(msg, 'a', "(sv)");
+       if (ret < 0) {
+               cgroup_err("failed to open container: %d\n", errno);
+               goto out;
+       }
+
+       ret = sd_bus_message_append(msg, "(sv)", "Description", "s",
+                                   "scope created by libcgroup");
+       if (ret < 0) {
+               cgroup_err("failed to append the description: %d\n", errno);
+               goto out;
+       }
+
+       ret = sd_bus_message_append(msg, "(sv)", "PIDs", "au", 1, child_pid);
+       if (ret < 0) {
+               cgroup_err("failed to append the PID: %d\n", errno);
+               goto out;
+       }
+
+       ret = sd_bus_message_append(msg, "(sv)", "Slice", "s", slice_name);
+       if (ret < 0) {
+               cgroup_err("failed to append the slice: %d\n", errno);
+               goto out;
+       }
+
+       if (opts->delegated == 1) {
+               ret = sd_bus_message_append(msg, "(sv)", "Delegate", "b", 1);
+               if (ret < 0) {
+                       cgroup_err("failed to append delegate: %d\n", errno);
+                       goto out;
+               }
+       }
+
+       ret = sd_bus_message_close_container(msg);
+       if (ret < 0) {
+               cgroup_err("failed to close the container: %d\n", errno);
+               goto out;
+       }
+
+       ret = sd_bus_message_append(msg, "a(sa(sv))", 0);
+       if (ret < 0) {
+               cgroup_err("failed to append aux structure: %d\n", errno);
+               goto out;
+       }
+
+       ret = sd_bus_call(bus, msg, 0, &error, &reply);
+       if (ret < 0) {
+               cgroup_err("sd_bus_call() failed: %d\n",
+                          sd_bus_message_get_errno(msg));
+               cgroup_err("error message: %s\n", error.message);
+               goto out;
+       }
+
+       /* Receive the job_path from systemd */
+       ret = sd_bus_message_read(reply, "o", &job_path);
+       if (ret < 0) {
+               cgroup_err("failed to read reply: %d\n", errno);
+               goto out;
+       }
+
+       cgroup_dbg("job_path = %s\n", job_path);
+
+       ret = clock_gettime(CLOCK_MONOTONIC, &start);
+       if (ret < 0) {
+               cgroup_err("Failed to get time: %d\n", errno);
+               ret = ECGFAIL;
+               goto out;
+       }
+
+       /* The callback will null out the job_path pointer on completion */
+       while(job_path) {
+               ret = sd_bus_process(bus, NULL);
+               if (ret < 0) {
+                       cgroup_err("failed to process the sd bus: %d\n", errno);
+                       goto out;
+               }
+
+               if (ret == 0) {
+                       /*
+                        * Per the sd_bus_wait() man page, call this function after sd_bus_process
+                        * returns zero. The wait time (usec) was somewhat arbitrarily chosen
+                        */
+                       ret = sd_bus_wait(bus, 10);
+                       if (ret < 0) {
+                               cgroup_err("failed to wait for sd bus: %d\n", errno);
+                               goto out;
+                       }
+               }
+
+               ret = clock_gettime(CLOCK_MONOTONIC, &now);
+               if (ret < 0) {
+                       cgroup_err("Failed to get time: %d\n", errno);
+                       ret = ECGFAIL;
+                       goto out;
+               }
+
+               if (elapsed_time(&start, &now) > USEC_PER_SEC) {
+                       cgroup_err("The create scope command timed out\n");
+                       ret = ECGFAIL;
+                       goto out;
+               }
+       }
+
+       ret = 0;
+
+out:
+       if (ret && opts->pid < 0)
+               kill(child_pid, SIGTERM);
+
+       sd_bus_error_free(&error);
+       sd_bus_message_unref(msg);
+       sd_bus_message_unref(reply);
+       sd_bus_unref(bus);
+
+       return ret;
+}