From 0863b011a6117d4ddf6b0272bcc32c2bd8b29fce Mon Sep 17 00:00:00 2001 From: Kamalesh Babulal Date: Fri, 10 Feb 2023 14:46:45 -0700 Subject: [PATCH] config: Support systemd's slice/scope creation/delegation using configuration Add support for the creation of systemd slices and scopes (transient). Users can create a systemd slice and scope using the new setting 'systemd' in the cgconfig.conf. $ cat /etc/cgconfig.conf ... systemd { slice = database.slice; scope = db.scope; setdefault = yes; } systemd { slice = database.slice; scope = house_keeping.scope; pid = 3456; } systemd { slice = others.slice; scope = server.scope; } systemd configuration requires two mandatory settings, slice and scope, those represent the systemd slice and scope name and other optional settings are 'setdefault', that allows the users to append one of the systemd slice/scope name to as default cgroup root, making the delegated subtree as the new cgroup root, i.e., the default cgroup path is changed to '/sys/fs/cgroup/database.slice/db.scope/' from '/sys/fs/cgroup/', this is useful to the libcgroup tools, where users can work on new subtree hierarchy, they created and other optional setting is pid, which can be used as the default task for transient scope, created by us, by default a task that spins an idle loop is created and set as default task. Signed-off-by: Kamalesh Babulal Signed-off-by: Tom Hromatka TJH: Fix minor typos in a couple comments where delegate was written instead of setdefault TJH: Explicitly initialize the list head and tail to NULL. May not be needed but better safe than sorry (cherry picked from commit 4fd38393d3934e62351a680bdad0c9f4deb4d3d2) --- include/libcgroup/systemd.h | 50 +++++++ src/Makefile.am | 1 + src/api.c | 5 + src/config.c | 283 ++++++++++++++++++++++++++++++++++++ src/lex.l | 1 + src/libcgroup-internal.h | 6 + src/parse.y | 38 ++++- 7 files changed, 383 insertions(+), 1 deletion(-) diff --git a/include/libcgroup/systemd.h b/include/libcgroup/systemd.h index 17efa51c..39f69743 100644 --- a/include/libcgroup/systemd.h +++ b/include/libcgroup/systemd.h @@ -33,6 +33,17 @@ struct cgroup_systemd_scope_opts { pid_t pid; }; +/* + * cgroup systemd settings + */ +struct cgroup_systemd_opts { + char slice_name[FILENAME_MAX]; + char scope_name[FILENAME_MAX]; + int setdefault; + pid_t pid; + struct cgroup_systemd_opts *next; +}; + /** * Populate the scope options structure with default values * @@ -69,6 +80,45 @@ int cgroup_create_scope(const char * const scope_name, const char * const slice_ int cgroup_create_scope2(struct cgroup *cgroup, int ignore_ownership, const struct cgroup_systemd_scope_opts * const opts); +/** + * Parse the systemd default cgroup's relative path from + * /var/run/libcgroup/systemd and set it as default delegation cgroup + * path, if available. + * + * The path is relative to cgroup root (default: /sys/fs/cgroup) + */ +void cgroup_set_default_systemd_cgroup(void); + +/** + * Parse the systemd delegation settings from the configuration file + * and allocate a new cgroup_systemd_opts object. + * This function internally calls cgroup_add_systemd_opts() to add the conf and + * value to the newly allocated cgroup_systemd_opts object. + * + * @param conf Name of the systemd delegate setting read from configuration file. + * @param value The value of the conf systemd delegate setting. + * + * @return 1 on success and 0 on error + */ +int cgroup_alloc_systemd_opts(const char * const conf, const char * const value); + +/** + * Parse the systemd delegation settings from the configuration file + * and add the conf and value to the last allocated cgroup_systemd_opts object + * (tail) allocated by cgroup_alloc_systemd_opts() + * + * @param conf Name of the systemd delegate setting read from configuration file. + * @param value The value of the conf systemd delegate setting. + * + * @return 1 on success and 0 on error + */ +int cgroup_add_systemd_opts(const char * const conf, const char * const value); + +/** + * Free the cgroup_systemd_opts objects allocated by cgroup_alloc_systemd_opts() + */ +void cgroup_cleanup_systemd_opts(void); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/Makefile.am b/src/Makefile.am index da02b013..6ca91407 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -39,6 +39,7 @@ libcgroup_la_LDFLAGS = -Wl,--version-script,$(srcdir)/libcgroup.map \ -version-number $(VERSION_NUMBER) if WITH_SYSTEMD libcgroup_la_LDFLAGS += -lsystemd +libcgroup_la_CFLAGS += -DWITH_SYSTEMD endif noinst_LTLIBRARIES = libcgroupfortesting.la diff --git a/src/api.c b/src/api.c index 984bab96..3123c12a 100644 --- a/src/api.c +++ b/src/api.c @@ -97,6 +97,11 @@ struct cg_mount_table_s cg_mount_table[CG_CONTROLLER_MAX]; /* Cgroup v2 mount paths, with empty controllers */ struct cg_mount_point *cg_cgroup_v2_empty_mount_paths; +#ifdef WITH_SYSTEMD +/* Default systemd path name. Length: .slice/.scope */ +char systemd_default_cgroup[FILENAME_MAX * 2 + 1]; +#endif + const char * const cgroup_strerror_codes[] = { "Cgroup is not compiled in", "Cgroup is not mounted", diff --git a/src/config.c b/src/config.c index 155c800d..1893b1d3 100644 --- a/src/config.c +++ b/src/config.c @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -91,6 +92,26 @@ static struct cgroup_string_list *template_files; /* Needed for the type while mounting cgroupfs. */ #define CGROUP_FILESYSTEM "cgroup" +#ifdef WITH_SYSTEMD +/* Directory that holds dynamically created internal libcgroup files */ +static const char * const systemd_def_cgrp_file_dir = "/var/run/libcgroup/"; + +/* File that stores the relative delegated systemd cgroup path */ +static const char * const systemd_default_cgroup_file = "/var/run/libcgroup/systemd"; + +/* Lock that protects systemd_default_cgroup_file */ +static pthread_rwlock_t systemd_default_cgroup_lock = PTHREAD_RWLOCK_INITIALIZER; + +/* + * cgroup_systemd_opts is dynamically allocated, hence having head + * and tail helps in traversing. + */ +static struct cgroup_systemd_opts *cgroup_systemd_opts_head = NULL; +static struct cgroup_systemd_opts *cgroup_systemd_opts_tail = NULL; + +static int config_create_slice_scope(char * const tmp_systemd_default_cgroup); +#endif + /* * NOTE: All these functions return 1 on success and not 0 as is the * library convention @@ -971,6 +992,8 @@ static void cgroup_free_config(void) config_template_table = NULL; } config_template_table_index = 0; + + cgroup_cleanup_systemd_opts(); } /** @@ -1081,6 +1104,10 @@ static void cgroup_config_sort_groups(void) */ int cgroup_config_load_config(const char *pathname) { +#ifdef WITH_SYSTEMD + /* slice[FILENAME_MAX] + '/' + scope[FILENAME_MAX] */ + char tmp_systemd_default_cgroup[FILENAME_MAX * 2 + 1] = "\0"; +#endif int namespace_enabled = 0; int mount_enabled = 0; int error; @@ -1126,12 +1153,35 @@ int cgroup_config_load_config(const char *pathname) if (error) goto err_mnt; +#ifdef WITH_SYSTEMD + error = config_create_slice_scope(tmp_systemd_default_cgroup); + if (!error) { + error = ECGROUPPARSEFAIL; + goto err_mnt; + } +#endif + cgroup_config_apply_default(); error = cgroup_config_create_groups(); cgroup_dbg("creating all cgroups now, error=%d\n", error); if (error) goto err_grp; +#ifdef WITH_SYSTEMD + /* + * Setting up systemd_default_cgroup, during creation + * of slice/scopes, clobbers the path returned by cg_build_path(), + * by appending the systemd_default_cgroup to it and + * its required only after all of the slices/scopes are created. + * The user might also set setdefault in more than one systemd + * delegate settings, in that case the last parsed one overwrites + * the systemd_default_cgroup. + */ + if (strlen(tmp_systemd_default_cgroup)) + snprintf(systemd_default_cgroup, sizeof(systemd_default_cgroup), + "%s", tmp_systemd_default_cgroup); +#endif + cgroup_free_config(); return 0; @@ -1807,3 +1857,236 @@ end: cgroup_free(&aux_cgroup); return ret; } + +#ifdef WITH_SYSTEMD +int cgroup_add_systemd_opts(const char * const config, const char * const value) +{ + struct cgroup_systemd_opts *curr = cgroup_systemd_opts_tail; + int len; + + if (strcmp(config, "slice") == 0) { + snprintf(curr->slice_name, FILENAME_MAX, "%s", value); + + len = strlen(curr->slice_name) - 6; + if (strcmp(curr->slice_name + len, ".slice")) + goto err; + + } else if (strcmp(config, "scope") == 0) { + snprintf(curr->scope_name, FILENAME_MAX, "%s", value); + + len = strlen(curr->scope_name) - 6; + if (strcmp(curr->scope_name + len, ".scope")) + goto err; + + } else if (strcmp(config, "setdefault") == 0 && strcasecmp(value, "yes") == 0) { + curr->setdefault = 1; + } else if (strcmp(config, "pid") == 0) { + /* + * If the atoi() fails, the pid value is zero and also + * avoid allowing init task (systemd). + */ + curr->pid = atoi(value); + if (curr->pid <= 1) + goto err; + } else + goto err; + + return 1; +err: + cgroup_err("Invalid systemd configuration %s value %s\n", config, value); + cgroup_cleanup_systemd_opts(); + return 0; +} + +int cgroup_alloc_systemd_opts(const char * const config, const char * const value) +{ + struct cgroup_systemd_opts *new_cgrp_systemd_opts; + + /* + * check for the allowed systemd configurations. We don't + * check for values, they will be checked by systemd anyway. + */ + if (strcmp(config, "slice") != 0 && + strcmp(config, "scope") != 0 && + strcmp(config, "setdefault") != 0 && + strcmp(config, "pid") != 0) { + cgroup_err("Invalid systemd configuration %s\n", config); + goto err; + } + + new_cgrp_systemd_opts = calloc(1, sizeof(struct cgroup_systemd_opts)); + if (!new_cgrp_systemd_opts) { + cgroup_err("Failed to allocate memory for cgroup_systemd_opts\n"); + goto err; + } + + if (!cgroup_systemd_opts_head) + cgroup_systemd_opts_tail = cgroup_systemd_opts_head = new_cgrp_systemd_opts; + else { + cgroup_systemd_opts_tail->next = new_cgrp_systemd_opts; + cgroup_systemd_opts_tail = new_cgrp_systemd_opts; + } + + return cgroup_add_systemd_opts(config, value); +err: + cgroup_cleanup_systemd_opts(); + return 0; +} + +void cgroup_cleanup_systemd_opts(void) +{ + struct cgroup_systemd_opts *curr, *next; + + for (curr = cgroup_systemd_opts_head; curr; curr = next) { + next = curr->next; + free(curr); + } + + cgroup_systemd_opts_head = cgroup_systemd_opts_tail = NULL; +} + +/* + * Helper function to remove the systemd_default_cgroup_file. + * systemd_default_cgroup_lock is expected to be held by the + * caller. + */ +static int remove_systemd_default_cgroup_file(void) +{ + int ret; + + ret = unlink(systemd_default_cgroup_file); + if (ret < 0 && errno != ENOENT) { + cgroup_err("Failed to remove %s\n", systemd_default_cgroup_file); + return 0; + } + + return 1; +} +/* + * Helper function to create systemd_default_cgroup_file and write systemd + * default cgroup slice/scope into it. This file will be read by + * cgroup_set_default_systemd_cgroup() for setting + * systemd_default_cgroup used to form the cgroup path. + */ +static int cgroup_write_systemd_default_cgroup(const char * const slice, + const char * const scope) +{ + FILE *systemd_def_cgrp_f; + int ret, len; + + pthread_rwlock_wrlock(&systemd_default_cgroup_lock); + + ret = mkdir(systemd_def_cgrp_file_dir, 0755); + if (ret != 0 && errno != EEXIST) { + cgroup_err("Failed to create directory %s\n", systemd_def_cgrp_file_dir); + ret = 0; + goto out; + } + + systemd_def_cgrp_f = fopen(systemd_default_cgroup_file, "w"); + if (!systemd_def_cgrp_f) { + cgroup_err("Failed to create file %s\n", systemd_default_cgroup_file); + ret = 0; + goto out; + } + + len = strlen(slice) + strlen(scope) + 1; + + ret = fprintf(systemd_def_cgrp_f, "%s/%s", slice, scope); + fclose(systemd_def_cgrp_f); + if (ret != len) { + cgroup_err("Incomplete systemd default cgroup written to %s\n", + systemd_default_cgroup_file); + ret = remove_systemd_default_cgroup_file(); + /* Ignore the return value, we are already in error path */ + ret = 0; + goto out; + } + + ret = 1; +out: + pthread_rwlock_unlock(&systemd_default_cgroup_lock); + return ret; +} + +/** + * Create the systemd slice and scope. The slice/scope are parsed and available in + * the cgroup_systemd_opts_head list. This function skips the slice and scope creation + * if previously created. + * + * Returns 1 on success and 0 on failure. + */ +static int config_create_slice_scope(char * const tmp_systemd_default_cgroup) +{ + struct cgroup_systemd_opts *curr, *def = NULL; + struct cgroup_systemd_scope_opts scope_opts; + int ret = 0; + + if (!tmp_systemd_default_cgroup) + return 0; + + if (cgroup_set_default_scope_opts(&scope_opts)) + return 0; + + pthread_rwlock_wrlock(&systemd_default_cgroup_lock); + ret = remove_systemd_default_cgroup_file(); + pthread_rwlock_unlock(&systemd_default_cgroup_lock); + if (!ret) + return 0; + + for (curr = cgroup_systemd_opts_head; curr; curr = curr->next) { + + if (!strlen(curr->slice_name)) { + cgroup_err("Invalid systemd setting, missing slice name.\n"); + goto err; + } + + if (!strlen(curr->scope_name)) { + cgroup_err("Invalid systemd setting, missing scope name.\n"); + goto err; + } + + /* incase of multiple setdefault configurations, set it to latest scope */ + if (curr->setdefault) + def = curr; + + if (curr->pid) + scope_opts.pid = curr->pid; + + ret = cgroup_create_scope(curr->scope_name, curr->slice_name, &scope_opts); + if (ret) + goto err; + + cgroup_dbg("Created systemd slice %s scope %s default %d pid %d\n", + curr->slice_name, curr->scope_name, curr->setdefault, curr->pid); + } + + if (def) { + if (!cgroup_write_systemd_default_cgroup(def->slice_name, def->scope_name)) + goto err; + + snprintf(tmp_systemd_default_cgroup, sizeof(systemd_default_cgroup), + "%s/%s", def->slice_name, def->scope_name); + + cgroup_dbg("Setting/Writing systemd default cgroup %s to file %s\n", + tmp_systemd_default_cgroup, systemd_default_cgroup_file); + + } + + return 1; +err: + return 0; +} +#else +int cgroup_add_systemd_opts(const char * const config, const char * const value) +{ + return 1; +} + +int cgroup_alloc_systemd_opts(const char * const config, const char * const value) +{ + return 1; +} + +void cgroup_cleanup_systemd_opts(void) { } +#endif diff --git a/src/lex.l b/src/lex.l index e1e8321e..5f680fc8 100644 --- a/src/lex.l +++ b/src/lex.l @@ -36,6 +36,7 @@ jmp_buf parser_error_env; "group" {return GROUP;} "namespace" {return NAMESPACE;} "template" {return TEMPLATE;} +"systemd" {return SYSTEMD;} "default" {yylval.name = strdup(yytext); return DEFAULT;} [a-zA-Z0-9_\-\/\.\,\%\@\\]+ {yylval.name = strdup(yytext); return ID;} \"[^"]*\" {yylval.name = strdup(yytext+1); yylval.name[strlen(yylval.name)-1] = '\0'; return ID; } diff --git a/src/libcgroup-internal.h b/src/libcgroup-internal.h index cf1fdc94..b53c9c4d 100644 --- a/src/libcgroup-internal.h +++ b/src/libcgroup-internal.h @@ -239,6 +239,12 @@ extern pthread_rwlock_t cg_mount_table_lock; */ extern __thread char *cg_namespace_table[CG_CONTROLLER_MAX]; +/* + * Default systemd cgroup used by the cg_build_path_locked() and tools + * setting the default cgroup path. + */ +extern char systemd_default_cgroup[FILENAME_MAX * 2 + 1]; + /* * config related API */ diff --git a/src/parse.y b/src/parse.y index 292a0664..01752857 100644 --- a/src/parse.y +++ b/src/parse.y @@ -31,7 +31,7 @@ int yywrap(void) %} -%token ID MOUNT GROUP PERM TASK ADMIN NAMESPACE DEFAULT TEMPLATE +%token ID MOUNT GROUP PERM TASK ADMIN NAMESPACE DEFAULT TEMPLATE SYSTEMD %union { char *name; @@ -48,6 +48,7 @@ int yywrap(void) %type template_task_or_admin template_task_namevalue_conf %type template_admin_namevalue_conf template_task_conf %type template_admin_conf +%type systemdvalue_conf systemd %start start %% @@ -71,6 +72,10 @@ start : start group { $$ = $1; } + | start systemd + { + $$ = $1; + } | { $$ = 1; @@ -475,4 +480,35 @@ namespace : NAMESPACE '{' namespace_conf '}' } ; +systemdvalue_conf + : ID '=' ID ';' + { + if (!cgroup_alloc_systemd_opts($1, $3)) { + cgroup_cleanup_systemd_opts(); + $$ = ECGCONFIGPARSEFAIL; + return $$; + } + $$ = 1; + } + | systemdvalue_conf ID '=' ID ';' + { + if (!cgroup_add_systemd_opts($2, $4)) { + cgroup_cleanup_systemd_opts(); + $$ = ECGCONFIGPARSEFAIL; + return $$; + } + $$ = 1; + } + ; + +systemd : SYSTEMD '{' systemdvalue_conf '}' + { + $$ = $3; + if (!$$) { + fprintf(stderr, "parsing failed at line number %d\n", line_no); + $$ = ECGCONFIGPARSEFAIL; + return $$; + } + } + ; %% -- 2.47.2