From 09996a4821c25c6741dbdcd37e9b07f56018458c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Sep 2021 13:16:26 +0200 Subject: [PATCH] lxc: add lxc.sched.core Core scheduling defines if the container payload is marked as being schedulable on the same core. Doing so will cause the kernel scheduler to ensure that tasks that are not in the same group never run simultaneously on a core. This can serve as an extra security measure to prevent the container payload from using cross hyper thread attacks. The only allowed values are 0 and 1. Set this to 1 to create a core scheduling domain for the container or 0 to not create one. If not set explicitly no core scheduling domain will be created for the container. Signed-off-by: Christian Brauner --- doc/lxc.container.conf.sgml.in | 27 ++++++++++++++ src/lxc/api_extensions.h | 1 + src/lxc/conf.c | 2 + src/lxc/conf.h | 3 ++ src/lxc/confile.c | 34 +++++++++++++++++ src/lxc/start.c | 30 +++++++++++++++ src/lxc/syscall_wrappers.h | 67 ++++++++++++++++++++++++++++++++++ src/tests/parse_config_file.c | 5 +++ 8 files changed, 169 insertions(+) diff --git a/doc/lxc.container.conf.sgml.in b/doc/lxc.container.conf.sgml.in index 022ef869f..095562704 100644 --- a/doc/lxc.container.conf.sgml.in +++ b/doc/lxc.container.conf.sgml.in @@ -338,6 +338,33 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + Core Scheduling + + Core scheduling defines if the container payload + is marked as being schedulable on the same core. Doing so will cause + the kernel scheduler to ensure that tasks that are not in the same + group never run simultaneously on a core. This can serve as an extra + security measure to prevent the container payload from using + cross hyper thread attacks. + + + + + + + + + The only allowed values are 0 and 1. Set this to 1 to create a + core scheduling domain for the container or 0 to not create one. + If not set explicitly no core scheduling domain will be created + for the container. + + + + + + Proc diff --git a/src/lxc/api_extensions.h b/src/lxc/api_extensions.h index bdffa649f..90f972463 100644 --- a/src/lxc/api_extensions.h +++ b/src/lxc/api_extensions.h @@ -46,6 +46,7 @@ static char *api_extensions[] = { "seccomp_proxy_send_notify_fd", "idmapped_mounts", "idmapped_mounts_v2", + "core_scheduling", }; static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions); diff --git a/src/lxc/conf.c b/src/lxc/conf.c index e645d10be..45b13b086 100644 --- a/src/lxc/conf.c +++ b/src/lxc/conf.c @@ -3382,6 +3382,8 @@ struct lxc_conf *lxc_conf_init(void) new->transient_procfs_mnt = false; new->shmount.path_host = NULL; new->shmount.path_cont = NULL; + new->sched_core = false; + new->sched_core_cookie = INVALID_SCHED_CORE_COOKIE; /* if running in a new user namespace, init and COMMAND * default to running as UID/GID 0 when using lxc-execute */ diff --git a/src/lxc/conf.h b/src/lxc/conf.h index 636b9017d..12c26d98d 100644 --- a/src/lxc/conf.h +++ b/src/lxc/conf.h @@ -523,6 +523,9 @@ struct lxc_conf { } shmount; struct timens_offsets timens; + + bool sched_core; + __u64 sched_core_cookie; }; __hidden extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size) diff --git a/src/lxc/confile.c b/src/lxc/confile.c index 4a74f8daf..5d2c99f8a 100644 --- a/src/lxc/confile.c +++ b/src/lxc/confile.c @@ -154,6 +154,7 @@ lxc_config_define(tty_dir); lxc_config_define(uts_name); lxc_config_define(sysctl); lxc_config_define(proc); +lxc_config_define(sched_core); static int set_config_unsupported_key(const char *key, const char *value, struct lxc_conf *lxc_conf, void *data) @@ -207,6 +208,7 @@ static struct lxc_config_t config_jump_table[] = { { "lxc.console.path", true, set_config_console_path, get_config_console_path, clr_config_console_path, }, { "lxc.console.rotate", true, set_config_console_rotate, get_config_console_rotate, clr_config_console_rotate, }, { "lxc.console.size", true, set_config_console_size, get_config_console_size, clr_config_console_size, }, + { "lxc.sched.core", true, set_config_sched_core, get_config_sched_core, clr_config_sched_core, }, { "lxc.environment", true, set_config_environment, get_config_environment, clr_config_environment, }, { "lxc.ephemeral", true, set_config_ephemeral, get_config_ephemeral, clr_config_ephemeral, }, { "lxc.execute.cmd", true, set_config_execute_cmd, get_config_execute_cmd, clr_config_execute_cmd, }, @@ -6583,3 +6585,35 @@ int lxc_list_net(struct lxc_conf *c, const char *key, char *retv, int inlen) return fulllen; } + +static int set_config_sched_core(const char *key, const char *value, + struct lxc_conf *lxc_conf, void *data) +{ + int ret; + unsigned int nr; + + if (lxc_config_value_empty(value)) + return clr_config_sched_core(key, lxc_conf, data); + + ret = lxc_safe_uint(value, &nr); + if (ret) + return ret_errno(EINVAL); + + if (nr != 0 && nr != 1) + return ret_errno(EINVAL); + + lxc_conf->sched_core = (nr == 1); + return 0; +} + +static int get_config_sched_core(const char *key, char *retv, int inlen, + struct lxc_conf *c, void *data) +{ + return lxc_get_conf_bool(c, retv, inlen, c->sched_core); +} + +static int clr_config_sched_core(const char *key, struct lxc_conf *c, void *data) +{ + c->sched_core = false; + return 0; +} diff --git a/src/lxc/start.c b/src/lxc/start.c index a9d3d5ead..f783f2e53 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -1553,6 +1553,32 @@ static inline int do_share_ns(void *arg) return 0; } +static int core_scheduling(struct lxc_handler *handler) +{ + struct lxc_conf *conf = handler->conf; + int ret; + + if (!conf->sched_core) + return log_trace(0, "No new core scheduling domain requested"); + + ret = core_scheduling_cookie_create_thread(handler->pid); + if (ret < 0) { + if (ret == -EINVAL) + return sysinfo("The kernel does not support core scheduling"); + + return syserror("Failed to create new core scheduling domain"); + } + + conf->sched_core_cookie = core_scheduling_cookie_get(handler->pid); + if (conf->sched_core_cookie == INVALID_SCHED_CORE_COOKIE) + return syserror("Failed to retrieve core scheduling domain cookie"); + + TRACE("Created new core scheduling domain with cookie %llu", + (long long unsigned int)conf->sched_core_cookie); + + return 0; +} + /* lxc_spawn() performs crucial setup tasks and clone()s the new process which * exec()s the requested container binary. * Note that lxc_spawn() runs in the parent namespaces. Any operations performed @@ -1709,6 +1735,10 @@ static int lxc_spawn(struct lxc_handler *handler) handler->clone_flags &= ~CLONE_PIDFD; TRACE("Cloned child process %d", handler->pid); + ret = core_scheduling(handler); + if (ret < 0) + goto out_delete_net; + /* Verify that we can actually make use of pidfds. */ if (!lxc_can_use_pidfd(handler->pidfd)) close_prot_errno_disarm(handler->pidfd); diff --git a/src/lxc/syscall_wrappers.h b/src/lxc/syscall_wrappers.h index f1004d264..6e90f572d 100644 --- a/src/lxc/syscall_wrappers.h +++ b/src/lxc/syscall_wrappers.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -320,4 +321,70 @@ static inline int personality(unsigned long persona) } #endif +/* arg1 of prctl() */ +#ifndef PR_SCHED_CORE +#define PR_SCHED_CORE 62 +#endif + +/* arg2 of prctl() */ +#ifndef PR_SCHED_CORE_GET +#define PR_SCHED_CORE_GET 0 +#endif + +#ifndef PR_SCHED_CORE_CREATE +#define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +#endif + +#ifndef PR_SCHED_CORE_SHARE_TO +#define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +#endif + +#ifndef PR_SCHED_CORE_SHARE_FROM +#define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +#endif + +#ifndef PR_SCHED_CORE_MAX +#define PR_SCHED_CORE_MAX 4 +#endif + +/* arg3 of prctl() */ +#ifndef PR_SCHED_CORE_SCOPE_THREAD +#define PR_SCHED_CORE_SCOPE_THREAD 0 +#endif + +#ifndef PR_SCHED_CORE_SCOPE_THREAD_GROUP +#define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 +#endif + +#ifndef PR_SCHED_CORE_SCOPE_PROCESS_GROUP +#define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 +#endif + +#define INVALID_SCHED_CORE_COOKIE ((__u64)-1) + +static inline __u64 core_scheduling_cookie_get(pid_t pid) +{ + __u64 cookie; + int ret; + + ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid, + PR_SCHED_CORE_SCOPE_THREAD, (unsigned long)&cookie); + if (ret) + return INVALID_SCHED_CORE_COOKIE; + + return cookie; +} + +static inline int core_scheduling_cookie_create_thread(pid_t pid) +{ + int ret; + + ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid, + PR_SCHED_CORE_SCOPE_THREAD_GROUP, 0); + if (ret) + return -errno; + + return 0; +} + #endif /* __LXC_SYSCALL_WRAPPER_H */ diff --git a/src/tests/parse_config_file.c b/src/tests/parse_config_file.c index e7468a96a..d19e24ec9 100644 --- a/src/tests/parse_config_file.c +++ b/src/tests/parse_config_file.c @@ -925,6 +925,11 @@ int main(int argc, char *argv[]) goto non_test_error; } + if (set_get_compare_clear_save_load(c, "lxc.sched.core", "1", tmpf, true) < 0) { + lxc_error("%s\n", "lxc.sched.core"); + goto non_test_error; + } + fret = EXIT_SUCCESS; non_test_error: -- 2.47.2