From: Michael Tremer Date: Mon, 8 Aug 2022 10:34:25 +0000 (+0000) Subject: cgroup: Start again from scratch X-Git-Tag: 0.9.28~583 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e3ddb4986e590583118011bce8153c6eb476ba1c;p=pakfire.git cgroup: Start again from scratch Signed-off-by: Michael Tremer --- diff --git a/src/libpakfire/cgroup.c b/src/libpakfire/cgroup.c index 4606e9e42..0dccef797 100644 --- a/src/libpakfire/cgroup.c +++ b/src/libpakfire/cgroup.c @@ -1,7 +1,7 @@ /*############################################################################# # # # Pakfire - The IPFire package management system # -# Copyright (C) 2021 Pakfire development team # +# Copyright (C) 2022 Pakfire development team # # # # This program is free software: you can redistribute it and/or modify # # it under the terms of the GNU General Public License as published by # @@ -18,490 +18,316 @@ # # #############################################################################*/ -#include #include -#include -#include -#include +#include #include -#include -#include #include -#include -#include #include #include +#include #include -/* - We expect this to be a cgroupv2 file system -*/ -#define CGROUP_ROOT "/sys/fs/cgroup" +#define ROOT "/sys/fs/cgroup" +#define BUFFER_SIZE 64 * 1024 -// Cache whether cgroups are supported on this system -static int pakfire_cgroups_supported = -1; +struct pakfire_cgroup { + struct pakfire* pakfire; + int nrefs; -static const char* cgroup_controllers[] = { - "cpu", - "memory", - NULL, -}; + // Store the path + char path[PATH_MAX]; -static char random_character(void) { - static char characters[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789"; + // File descriptor to cgroup + int fd; +}; - int random; +// Returns true if this is the root cgroup +static int pakfire_cgroup_is_root(struct pakfire_cgroup* cgroup) { + return !*cgroup->path; +} - int r = getrandom(&random, sizeof(random), GRND_NONBLOCK); - if (r != sizeof(random)) - return -1; +static const char* pakfire_cgroup_name(struct pakfire_cgroup* cgroup) { + if (pakfire_cgroup_is_root(cgroup)) + return "(root)"; - return characters[random % strlen(characters)]; + return cgroup->path; } -int pakfire_cgroup_random_name(char* template) { - for (int i = strlen(template) - 1; i > 0 && template[i] == 'X'; i--) { - char c = random_character(); - if (c < 0) - return 1; +static void pakfire_cgroup_free(struct pakfire_cgroup* cgroup) { + DEBUG(cgroup->pakfire, "Releasing cgroup %s at %p\n", + pakfire_cgroup_name(cgroup), cgroup); - template[i] = c; - } + // Close the file descriptor + if (cgroup->fd) + close(cgroup->fd); - return 0; + pakfire_unref(cgroup->pakfire); + free(cgroup); } -/* - Returns the name of the parent group -*/ -static char* pakfire_cgroup_parent_name(const char* group) { - if (!group || !*group) - return NULL; +static int __pakfire_cgroup_create(struct pakfire_cgroup* cgroup) { + char path[PATH_MAX]; + int r; - // Find the last / in group - char* slash = strrchr(group, '/'); + DEBUG(cgroup->pakfire, "Trying to create cgroup %s\n", pakfire_cgroup_name(cgroup)); - // If nothing was found, the next level up is "root" - if (!slash) - return strdup(""); + // Compose the absolute path + r = pakfire_path_join(path, ROOT, cgroup->path); + if (r < 0) + return 1; - size_t length = slash - group + 1; + // Try creating the directory + return pakfire_mkdir(path, 0755); +} - // Allocate the parent group name - char* parent = malloc(length + 1); - if (!parent) - return NULL; +/* + Opens the cgroup and returns a file descriptor. - // Write everything up to "slash" which snprintf will replace by NUL - snprintf(parent, length, "%s", group); + If the cgroup does not exist, it will try to create it. - return parent; -} + This function returns a negative value on error. +*/ +static int __pakfire_cgroup_open(struct pakfire_cgroup* cgroup) { + int rootfd = -1; + int fd = -1; + int r; -static int pakfire_cgroup_supported(struct pakfire* pakfire) { - if (pakfire_cgroups_supported < 0) { - struct statfs fs; - - int r = statfs(CGROUP_ROOT, &fs); - if (r == 0) { - // Check if this is a cgroupv2 file system - if (fs.f_type == CGROUP2_SUPER_MAGIC) - pakfire_cgroups_supported = 1; - else { - ERROR(pakfire, "%s is not a cgroupv2 hierarchy\n", CGROUP_ROOT); - pakfire_cgroups_supported = 0; - } - } else if (r < 0) { - ERROR(pakfire, "Could not stat %s: %m\n", CGROUP_ROOT); - pakfire_cgroups_supported = 0; - } + // Open file descriptor of the cgroup root + rootfd = open(ROOT, O_DIRECTORY|O_PATH|O_CLOEXEC); + if (rootfd < 0) { + ERROR(cgroup->pakfire, "Could not open %s: %m\n", ROOT); + return -1; } - return pakfire_cgroups_supported; -} + // Return the rootfd for the root group + if (pakfire_cgroup_is_root(cgroup)) + return rootfd; -static int pakfire_cgroup_make_path(struct pakfire* pakfire, char* path, size_t length, - const char* subgroup, const char* file) { - // Store up to where we have written and how much space is left - char* p = path; - size_t l = length; +RETRY: + fd = openat(rootfd, cgroup->path, O_DIRECTORY|O_PATH|O_CLOEXEC); + if (fd < 0) { + switch (errno) { + // If the cgroup doesn't exist yet, try to create it + case ENOENT: + r = __pakfire_cgroup_create(cgroup); + if (r) + goto ERROR; - // Write root - size_t bytes_written = snprintf(p, l, "%s", CGROUP_ROOT); - if (bytes_written >= l) - return -1; + // Retry open after successful creation + goto RETRY; - p += bytes_written; - l -= bytes_written; + // Exit on all other errors + default: + ERROR(cgroup->pakfire, "Could not open cgroup %s: %m\n", + pakfire_cgroup_name(cgroup)); + goto ERROR; + } + } - // Append subgroup - if (subgroup) { - bytes_written = snprintf(p, l, "/%s", subgroup); - if (bytes_written >= l) - return -1; +ERROR: + if (rootfd > 0) + close(rootfd); - p += bytes_written; - l -= bytes_written; - } + return fd; +} - // Append file - if (file) { - bytes_written = snprintf(p, l, "/%s", file); - if (bytes_written >= l) - return -1; +static int pakfire_cgroup_read(struct pakfire_cgroup* cgroup, const char* path, + char* buffer, size_t length) { + int r = -1; - p += bytes_written; - l -= bytes_written; + // Check if this cgroup has been destroyed already + if (!cgroup->fd) { + ERROR(cgroup->pakfire, "Trying to read from destroyed cgroup\n"); + return r; } - // Return total bytes written - return length - l; -} + // Open the file + int fd = openat(cgroup->fd, path, O_CLOEXEC); + if (fd < 0) { + DEBUG(cgroup->pakfire, "Could not open %s/%s: %m\n", + pakfire_cgroup_name(cgroup), path); + goto ERROR; + } -static FILE* pakfire_cgroup_fopen(struct pakfire* pakfire, - const char* group, const char* file, const char* mode) { - char path[PATH_MAX]; + // Read file content into buffer + ssize_t bytes_read = read(fd, buffer, length); + if (bytes_read <= 0) { + DEBUG(cgroup->pakfire, "Could not read from %s/%s: %m\n", + pakfire_cgroup_name(cgroup), path); + goto ERROR; + } - int r = pakfire_cgroup_make_path(pakfire, path, sizeof(path) - 1, group, file); - if (r < 0) - return NULL; + // Return how many bytes we have read + r = bytes_read; - FILE* f = fopen(path, mode); - if (!f) { - ERROR(pakfire, "Could not open %s: %m\n", path); - return NULL; - } +ERROR: + if (fd > 0) + close(fd); - return f; + return r; } -static int pakfire_cgroup_fprintf(struct pakfire* pakfire, - const char* group, const char* file, const char* format, ...) { - char buffer[64]; - ssize_t length; - int r; +static int pakfire_cgroup_write(struct pakfire_cgroup* cgroup, + const char* path, const char* format, ...) { va_list args; + int r = 0; + + // Check if this cgroup has been destroyed already + if (!cgroup->fd) { + ERROR(cgroup->pakfire, "Trying to write to destroyed cgroup\n"); + errno = EPERM; + return 1; + } - FILE* f = pakfire_cgroup_fopen(pakfire, group, file, "w"); - if (!f) + // Open the file + int fd = openat(cgroup->fd, path, O_WRONLY|O_CLOEXEC); + if (fd < 0) { + DEBUG(cgroup->pakfire, "Could not open %s/%s for writing: %m\n", + pakfire_cgroup_name(cgroup), path); return 1; + } - // Format what we have to write + // Write buffer va_start(args, format); - length = vsnprintf(buffer, sizeof(buffer) - 1, format, args); + ssize_t bytes_written = vdprintf(fd, format, args); va_end(args); - // Use write(2) instead of fprintf/fwrite because we want to know - // if the operation was successful. - r = write(fileno(f), buffer, length); - - // Return zero if everything was written correctly - if (r == length) - r = 0; + // Check if content was written okay + if (bytes_written < 0) { + DEBUG(cgroup->pakfire, "Could not write to %s/%s: %m\n", + pakfire_cgroup_name(cgroup), path); + r = 1; + } - fclose(f); + // Close fd + close(fd); return r; } -static int pakfire_cgroup_enable_controller(struct pakfire* pakfire, - const char* group, const char* controller) { - // Enable controller - int r = pakfire_cgroup_fprintf(pakfire, group, "cgroup.subtree_control", - "+%s", controller); - - // fprintf might set errno when there was a problem, although the write itself was ok - if (r < 0) { - // The parent group does not seem to have this controller enabled - if (errno == ENOENT) { - char* parent = pakfire_cgroup_parent_name(group); - if (!parent) - return 1; - - // Try to enable this on the parent level - r = pakfire_cgroup_enable_controller(pakfire, parent, controller); - free(parent); - - // If this failed, we fail - if (r) - return r; - - // Otherwise we try again - return pakfire_cgroup_enable_controller(pakfire, group, controller); - } - } - - return 0; -} - -static int pakfire_cgroup_enable_controllers(struct pakfire* pakfire, - const char* group, const char** controllers) { +static struct pakfire_cgroup* pakfire_cgroup_parent(struct pakfire_cgroup* cgroup) { + struct pakfire_cgroup* parent = NULL; int r; - // Enable all controllers - for (const char** controller = controllers; *controller; controller++) { - r = pakfire_cgroup_enable_controller(pakfire, group, *controller); - if (r) - return r; - } - - return 0; -} - -int pakfire_cgroup_create(struct pakfire* pakfire, const char* group) { - int supported = pakfire_cgroup_supported(pakfire); - if (!supported) { - errno = ENOTSUP; - return 1; - } - - // Ensure that parent groups exist - char* parent = pakfire_cgroup_parent_name(group); - if (parent) { - int r = pakfire_cgroup_create(pakfire, parent); - if (r) { - free(parent); - return r; - } - - // Enable default controllers in all parent groups - r = pakfire_cgroup_enable_controllers(pakfire, parent, cgroup_controllers); - if (r) { - free(parent); - return r; - } + // Cannot return parent for root group + if (pakfire_cgroup_is_root(cgroup)) + return NULL; - free(parent); + // Determine the path of the parent + char* path = pakfire_dirname(cgroup->path); + if (!path) { + ERROR(cgroup->pakfire, "Could not determine path for parent cgroup: %m\n"); + return NULL; } - // Make path - char path[PATH_MAX]; - int r = pakfire_cgroup_make_path(pakfire, path, sizeof(path) - 1, group, NULL); - if (r < 0) - return r; + // dirname() returns . if no directory component could be found + if (strcmp(path, ".") == 0) + *path = '\0'; - // Create group - r = pakfire_mkdir(path, 0755); + // Open the cgroup + r = pakfire_cgroup_open(&parent, cgroup->pakfire, path); if (r) { - switch (errno) { - // The group already exists - case EEXIST: - return 0; - - default: - ERROR(pakfire, "Could not create cgroup %s: %m\n", group); - return r; - } + ERROR(cgroup->pakfire, "Could not open parent cgroup: %m\n"); + parent = NULL; } - DEBUG(pakfire, "Created cgroup %s\n", group); + // Cleanup + free(path); - return 0; + return parent; } -int pakfire_cgroup_destroy(struct pakfire* pakfire, const char* group) { - // Never attempt to destroy root - if (!*group) - return EINVAL; +/* + Entry function to open a new cgroup. - char path[PATH_MAX]; + If the cgroup doesn't exist, it will be created including any parent cgroups. +*/ +int pakfire_cgroup_open(struct pakfire_cgroup** cgroup, + struct pakfire* pakfire, const char* path) { + int r = 1; - int r = pakfire_cgroup_make_path(pakfire, path, sizeof(path) - 1, group, NULL); - if (r < 0) - return r; + // Allocate the cgroup struct + struct pakfire_cgroup* c = calloc(1, sizeof(*c)); + if (!c) + return 1; - // Remove the directory - r = rmdir(path); - if (r) { - ERROR(pakfire, "Could not destroy cgroup %s: %m\n", group); - return r; - } + DEBUG(pakfire, "Allocated cgroup %s at %p\n", path, c); - return 0; -} + // Keep a reference to pakfire + c->pakfire = pakfire_ref(pakfire); -DIR* pakfire_cgroup_opendir(struct pakfire* pakfire, const char* group) { - // Make path - char path[PATH_MAX]; - int r = pakfire_cgroup_make_path(pakfire, path, sizeof(path) - 1, group, NULL); - if (r < 0) - return NULL; + // Initialize reference counter + c->nrefs = 1; - return opendir(path); -} + // Copy path + pakfire_string_set(c->path, path); -int pakfire_cgroup_attach(struct pakfire* pakfire, const char* group, pid_t pid) { - int r = pakfire_cgroup_fprintf(pakfire, group, "cgroup.procs", "%d", pid); - if (r < 0) { - ERROR(pakfire, "Could not attach process %d to cgroup %s: %m\n", pid, group); - return r; - } + // Open a file descriptor + c->fd = __pakfire_cgroup_open(c); + if (c->fd < 0) + goto ERROR; - DEBUG(pakfire, "Attached process %d to cgroup %s\n", pid, group); + *cgroup = c; return 0; -} - -int pakfire_cgroup_detach(struct pakfire* pakfire, const char* group, pid_t pid) { - char* parent = pakfire_cgroup_parent_name(group); - if (!parent) - return EINVAL; - - while (parent) { - int r = pakfire_cgroup_attach(pakfire, parent, pid); - // Break on success - if (r == 0) { - free(parent); - return 0; - } - - // Move on to the next parent group - char* p = parent; - parent = pakfire_cgroup_parent_name(p); - free(p); - } - - ERROR(pakfire, "Could not detach process %d from %s\n", pid, group); - return 1; +ERROR: + pakfire_cgroup_free(c); + return r; } -static ssize_t pakfire_cgroup_procs_callback(struct pakfire* pakfire, const char* group, - int (*func)(struct pakfire* pakfire, pid_t pid, void* data), void* data) { - FILE* f = pakfire_cgroup_fopen(pakfire, group, "cgroup.procs", "r"); - if (!f) - return -1; - - ssize_t num_processes = 0; - - char* line = NULL; - size_t l = 0; - - while (1) { - ssize_t bytes_read = getline(&line, &l, f); - if (bytes_read < 0) - break; +struct pakfire_cgroup* pakfire_cgroup_ref(struct pakfire_cgroup* cgroup) { + ++cgroup->nrefs; - // Increment process counter - num_processes++; - - // Process callback - if (func) { - // Parse PID - pid_t pid = strtol(line, NULL, 10); - - // Call callback function - int r = func(pakfire, pid, data); - if (r) { - fclose(f); - return -r; - } - } - } - - fclose(f); - - // Returns the number of processes - return num_processes; -} - -ssize_t pakfire_cgroup_num_processes(struct pakfire* pakfire, const char* group) { - return pakfire_cgroup_procs_callback(pakfire, group, NULL, NULL); + return cgroup; } -static int send_signal(struct pakfire* pakfire, pid_t pid, void* data) { - int* signum = (int*)data; - - DEBUG(pakfire, "Sending signal %d to PID %d\n", *signum, pid); +struct pakfire_cgroup* pakfire_cgroup_unref(struct pakfire_cgroup* cgroup) { + if (--cgroup->nrefs > 0) + return cgroup; - int r = kill(pid, *signum); - if (r < 0 && errno != ESRCH) { - ERROR(pakfire, "Could not send signal %d to PID %d: %m\n", *signum, pid); - return r; - } - - return 0; + pakfire_cgroup_free(cgroup); + return NULL; } -int pakfire_cgroup_killall(struct pakfire* pakfire, const char* group) { - DEBUG(pakfire, "Killing all processes in cgroup %s\n", group); - int signum = SIGTERM; - - int count = 0; - while (1) { - // Kill all processes - ssize_t num_procs = pakfire_cgroup_procs_callback(pakfire, group, - send_signal, &signum); +/* + Immediately kills all processes in this cgroup +*/ +int pakfire_cgroup_killall(struct pakfire_cgroup* cgroup) { + int r = pakfire_cgroup_write(cgroup, "cgroup.kill", "1"); + if (r) + ERROR(cgroup->pakfire, "Could not kill processes: %m\n"); - // If no processes are left, we are done - if (num_procs <= 0) - return num_procs; + return r; +} - DEBUG(pakfire, " %zu process(es) left\n", num_procs); +/* + Immediately destroys this cgroup +*/ +int pakfire_cgroup_destroy(struct pakfire_cgroup* cgroup) { + int r; - // Use SIGKILL after 5 attempts with SIGTERM - if (count++ > 5 && signum == SIGTERM) - signum = SIGKILL; + // Kill everything in this group + r = pakfire_cgroup_killall(cgroup); + if (r) + return r; - usleep(100000); + // Delete the directory + r = rmdir(cgroup->path); + if (r) { + ERROR(cgroup->pakfire, "Could not destroy cgroup: %m\n"); + return r; } - return 1; -} - -int pakfire_cgroup_cpustat(struct pakfire* pakfire, const char* group, - struct pakfire_cgroup_cpustat* st) { - FILE* f = pakfire_cgroup_fopen(pakfire, group, "cpu.stat", "r"); - if (!f) - return 1; - - const struct keyword { - const char* keyword; - struct timeval* value; - } keywords[] = { - { "usage_usec", &st->usage }, - { "user_usec", &st->user }, - { "system_usec", &st->system }, - { NULL, NULL }, - }; - - char* line = NULL; - size_t l = 0; - - while (1) { - ssize_t bytes_read = getline(&line, &l, f); - if (bytes_read < 0) - break; - - for (const struct keyword* keyword = keywords; keyword->keyword; keyword++) { - if (pakfire_string_startswith(line, keyword->keyword)) { - const char* p = line + strlen(keyword->keyword) + 1; - - unsigned long long v = strtoull(p, NULL, 10); - - // Set value - keyword->value->tv_sec = v / 1000000; - keyword->value->tv_usec = v % 1000000; - } - } + // Close the file descriptor + if (cgroup->fd) { + close(cgroup->fd); + cgroup->fd = 0; } - - fclose(f); return 0; } -int pakfire_cgroup_set_nice(struct pakfire* pakfire, const char* group, int level) { - DEBUG(pakfire, "Setting nice level of cgroup %s to %d\n", group, level); - - int r = pakfire_cgroup_fprintf(pakfire, group, "cpu.weight.nice", "%d", level); - if (r) { - ERROR(pakfire, "Could not change nice level of cgroup %s: %m\n", group); - return r; - } - - return 0; +int pakfire_cgroup_fd(struct pakfire_cgroup* cgroup) { + return cgroup->fd; } diff --git a/src/libpakfire/include/pakfire/cgroup.h b/src/libpakfire/include/pakfire/cgroup.h index 2b0818f04..a9d3914b1 100644 --- a/src/libpakfire/include/pakfire/cgroup.h +++ b/src/libpakfire/include/pakfire/cgroup.h @@ -1,7 +1,7 @@ /*############################################################################# # # # Pakfire - The IPFire package management system # -# Copyright (C) 2021 Pakfire development team # +# Copyright (C) 2022 Pakfire development team # # # # This program is free software: you can redistribute it and/or modify # # it under the terms of the GNU General Public License as published by # @@ -23,37 +23,20 @@ #ifdef PAKFIRE_PRIVATE -#include -#include -#include - #include -int pakfire_cgroup_random_name(char* template); - -int pakfire_cgroup_create(struct pakfire* pakfire, const char* group); -int pakfire_cgroup_destroy(struct pakfire* pakfire, const char* group); - -DIR* pakfire_cgroup_opendir(struct pakfire* pakfire, const char* group); - -int pakfire_cgroup_attach(struct pakfire* pakfire, const char* group, pid_t pid); -int pakfire_cgroup_detach(struct pakfire* pakfire, const char* group, pid_t pid); - -ssize_t pakfire_cgroup_num_processes(struct pakfire* pakfire, const char* group); +struct pakfire_cgroup; -int pakfire_cgroup_killall(struct pakfire* pakfire, const char* group); +int pakfire_cgroup_open(struct pakfire_cgroup** cgroup, + struct pakfire* pakfire, const char* path); -struct pakfire_cgroup_cpustat { - struct timeval usage; - struct timeval user; - struct timeval system; -}; +struct pakfire_cgroup* pakfire_cgroup_ref(struct pakfire_cgroup* cgroup); +struct pakfire_cgroup* pakfire_cgroup_unref(struct pakfire_cgroup* cgroup); -int pakfire_cgroup_cpustat(struct pakfire* pakfire, const char* group, - struct pakfire_cgroup_cpustat* st); +int pakfire_cgroup_destroy(struct pakfire_cgroup* cgroup); -int pakfire_cgroup_set_nice(struct pakfire* pakfire, const char* group, int level); +int pakfire_cgroup_fd(struct pakfire_cgroup* cgroup); -#endif +#endif /* PAKFIRE_PRIVATE */ #endif /* PAKFIRE_CGROUP_H */ diff --git a/src/libpakfire/jail.c b/src/libpakfire/jail.c index 078bb7f56..ce56c5769 100644 --- a/src/libpakfire/jail.c +++ b/src/libpakfire/jail.c @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -116,6 +117,9 @@ struct pakfire_jail_exec { struct pakfire_log_buffer log_ERROR; struct pakfire_log_buffer log_DEBUG; } buffers; + + // cgroup + struct pakfire_cgroup* cgroup; }; static int clone3(struct clone_args* args, size_t size) { @@ -1274,6 +1278,11 @@ static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[]) { goto ERROR; #endif /* ENABLE_DEBUG */ + // Setup a cgroup + r = pakfire_cgroup_open(&ctx.cgroup, jail->pakfire, "jail"); + if (r) + goto ERROR; + // Configure child process struct clone_args args = { .flags = @@ -1283,9 +1292,13 @@ static int __pakfire_jail_exec(struct pakfire_jail* jail, const char* argv[]) { CLONE_NEWPID | CLONE_NEWUSER | CLONE_NEWUTS | - CLONE_PIDFD, + CLONE_PIDFD | + CLONE_INTO_CGROUP, .exit_signal = SIGCHLD, .pidfd = (long long unsigned int)&ctx.pidfd, + + // Clone into the new cgroup + .cgroup = pakfire_cgroup_fd(ctx.cgroup), }; // Fork this process @@ -1343,6 +1356,12 @@ ERROR: pakfire_jail_close_pipe(jail, ctx.pipes.log_ERROR); pakfire_jail_close_pipe(jail, ctx.pipes.log_DEBUG); + // Destroy cgroup + if (ctx.cgroup) { + pakfire_cgroup_destroy(ctx.cgroup); + pakfire_cgroup_unref(ctx.cgroup); + } + // Umount everything if (!pakfire_on_root(jail->pakfire)) pakfire_umount_all(jail->pakfire);