]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
lxc: add lxc.sched.core
authorChristian Brauner <christian.brauner@ubuntu.com>
Wed, 29 Sep 2021 11:16:26 +0000 (13:16 +0200)
committerChristian Brauner <christian.brauner@ubuntu.com>
Wed, 29 Sep 2021 12:07:41 +0000 (14:07 +0200)
Core scheduling defines if the container payload is marked as being
schedulable on the same core. Doing so will cause the kernel scheduler
to ensure that tasks that are not in the same group never run
simultaneously on a core. This can serve as an extra security measure to
prevent the container payload from using cross hyper thread attacks.

The only allowed values are 0 and 1. Set this to 1 to create a core
scheduling domain for the container or 0 to not create one. If not set
explicitly no core scheduling domain will be created for the container.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
doc/lxc.container.conf.sgml.in
src/lxc/api_extensions.h
src/lxc/conf.c
src/lxc/conf.h
src/lxc/confile.c
src/lxc/start.c
src/lxc/syscall_wrappers.h
src/tests/parse_config_file.c

index 022ef869fe3d6ce3f1c905d07da06601521899ab..0955627045bb20014eca990f728309fcd39b538e 100644 (file)
@@ -338,6 +338,33 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
       </variablelist>
     </refsect2>
 
+    <refsect2>
+      <title>Core Scheduling</title>
+      <para>
+        Core scheduling defines if the container payload
+       is marked as being schedulable on the same core. Doing so will cause
+       the kernel scheduler to ensure that tasks that are not in the same
+       group never run simultaneously on a core. This can serve as an extra
+       security measure to prevent the container payload from using
+       cross hyper thread attacks.
+      </para>
+      <variablelist>
+        <varlistentry>
+          <term>
+            <option>lxc.sched.core</option>
+          </term>
+          <listitem>
+            <para>
+              The only allowed values are 0 and 1. Set this to 1 to create a
+             core scheduling domain for the container or 0 to not create one.
+             If not set explicitly no core scheduling domain will be created
+             for the container.
+            </para>
+          </listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
     <refsect2>
       <title>Proc</title>
       <para>
index bdffa649fc344bdd2b53e2c11bba417f1a332013..90f9724637aa4dd471ceaa532f37645087f5932c 100644 (file)
@@ -46,6 +46,7 @@ static char *api_extensions[] = {
        "seccomp_proxy_send_notify_fd",
        "idmapped_mounts",
        "idmapped_mounts_v2",
+       "core_scheduling",
 };
 
 static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions);
index e645d10be8621446695cb0512a60d264866b3192..45b13b0862a62b0a0898836416ecb19977f8b3ae 100644 (file)
@@ -3382,6 +3382,8 @@ struct lxc_conf *lxc_conf_init(void)
        new->transient_procfs_mnt = false;
        new->shmount.path_host = NULL;
        new->shmount.path_cont = NULL;
+       new->sched_core = false;
+       new->sched_core_cookie = INVALID_SCHED_CORE_COOKIE;
 
        /* if running in a new user namespace, init and COMMAND
         * default to running as UID/GID 0 when using lxc-execute */
index 636b9017d88e372869ba4cc9a87785b36bbff7cd..12c26d98d7e311aa98f778df827ffca7c670360a 100644 (file)
@@ -523,6 +523,9 @@ struct lxc_conf {
        } shmount;
 
        struct timens_offsets timens;
+
+       bool sched_core;
+       __u64 sched_core_cookie;
 };
 
 __hidden extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size)
index 4a74f8daf42e188df5e036bb360ff24e9132ab26..5d2c99f8ae1dc2712ded09e56f0c545dc02fb08d 100644 (file)
@@ -154,6 +154,7 @@ lxc_config_define(tty_dir);
 lxc_config_define(uts_name);
 lxc_config_define(sysctl);
 lxc_config_define(proc);
+lxc_config_define(sched_core);
 
 static int set_config_unsupported_key(const char *key, const char *value,
                                      struct lxc_conf *lxc_conf, void *data)
@@ -207,6 +208,7 @@ static struct lxc_config_t config_jump_table[] = {
        { "lxc.console.path",               true,  set_config_console_path,               get_config_console_path,               clr_config_console_path,               },
        { "lxc.console.rotate",             true,  set_config_console_rotate,             get_config_console_rotate,             clr_config_console_rotate,             },
        { "lxc.console.size",               true,  set_config_console_size,               get_config_console_size,               clr_config_console_size,               },
+       { "lxc.sched.core",                 true,  set_config_sched_core,                 get_config_sched_core,                 clr_config_sched_core,                 },
        { "lxc.environment",                true,  set_config_environment,                get_config_environment,                clr_config_environment,                },
        { "lxc.ephemeral",                  true,  set_config_ephemeral,                  get_config_ephemeral,                  clr_config_ephemeral,                  },
        { "lxc.execute.cmd",                true,  set_config_execute_cmd,                get_config_execute_cmd,                clr_config_execute_cmd,                },
@@ -6583,3 +6585,35 @@ int lxc_list_net(struct lxc_conf *c, const char *key, char *retv, int inlen)
 
        return fulllen;
 }
+
+static int set_config_sched_core(const char *key, const char *value,
+                                struct lxc_conf *lxc_conf, void *data)
+{
+       int ret;
+       unsigned int nr;
+
+       if (lxc_config_value_empty(value))
+               return clr_config_sched_core(key, lxc_conf, data);
+
+       ret = lxc_safe_uint(value, &nr);
+       if (ret)
+               return ret_errno(EINVAL);
+
+       if (nr != 0 && nr != 1)
+               return ret_errno(EINVAL);
+
+       lxc_conf->sched_core = (nr == 1);
+       return 0;
+}
+
+static int get_config_sched_core(const char *key, char *retv, int inlen,
+                                struct lxc_conf *c, void *data)
+{
+       return lxc_get_conf_bool(c, retv, inlen, c->sched_core);
+}
+
+static int clr_config_sched_core(const char *key, struct lxc_conf *c, void *data)
+{
+       c->sched_core = false;
+       return 0;
+}
index a9d3d5eadac6cae53a22133bea8e5ee8fccaabb7..f783f2e53da67b83f663c0d7994be9cb6df2a7b1 100644 (file)
@@ -1553,6 +1553,32 @@ static inline int do_share_ns(void *arg)
        return 0;
 }
 
+static int core_scheduling(struct lxc_handler *handler)
+{
+       struct lxc_conf *conf = handler->conf;
+       int ret;
+
+       if (!conf->sched_core)
+               return log_trace(0, "No new core scheduling domain requested");
+
+       ret = core_scheduling_cookie_create_thread(handler->pid);
+       if (ret < 0) {
+               if (ret == -EINVAL)
+                       return sysinfo("The kernel does not support core scheduling");
+
+               return syserror("Failed to create new core scheduling domain");
+       }
+
+       conf->sched_core_cookie = core_scheduling_cookie_get(handler->pid);
+       if (conf->sched_core_cookie == INVALID_SCHED_CORE_COOKIE)
+               return syserror("Failed to retrieve core scheduling domain cookie");
+
+       TRACE("Created new core scheduling domain with cookie %llu",
+             (long long unsigned int)conf->sched_core_cookie);
+
+       return 0;
+}
+
 /* lxc_spawn() performs crucial setup tasks and clone()s the new process which
  * exec()s the requested container binary.
  * Note that lxc_spawn() runs in the parent namespaces. Any operations performed
@@ -1709,6 +1735,10 @@ static int lxc_spawn(struct lxc_handler *handler)
                handler->clone_flags &= ~CLONE_PIDFD;
        TRACE("Cloned child process %d", handler->pid);
 
+       ret = core_scheduling(handler);
+       if (ret < 0)
+               goto out_delete_net;
+
        /* Verify that we can actually make use of pidfds. */
        if (!lxc_can_use_pidfd(handler->pidfd))
                close_prot_errno_disarm(handler->pidfd);
index f1004d2647f8d2aff08e1568954a76790468a43e..6e90f572d2f1fc87aa5345d0b6f3add8c53eb627 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/keyctl.h>
 #include <sched.h>
 #include <stdint.h>
+#include <sys/prctl.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
 #include <unistd.h>
@@ -320,4 +321,70 @@ static inline int personality(unsigned long persona)
 }
 #endif
 
+/* arg1 of prctl() */
+#ifndef PR_SCHED_CORE
+#define PR_SCHED_CORE 62
+#endif
+
+/* arg2 of prctl() */
+#ifndef PR_SCHED_CORE_GET
+#define PR_SCHED_CORE_GET 0
+#endif
+
+#ifndef PR_SCHED_CORE_CREATE
+#define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+#endif
+
+#ifndef PR_SCHED_CORE_SHARE_TO
+#define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+#endif
+
+#ifndef PR_SCHED_CORE_SHARE_FROM
+#define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+#endif
+
+#ifndef PR_SCHED_CORE_MAX
+#define PR_SCHED_CORE_MAX 4
+#endif
+
+/* arg3 of prctl() */
+#ifndef PR_SCHED_CORE_SCOPE_THREAD
+#define PR_SCHED_CORE_SCOPE_THREAD 0
+#endif
+
+#ifndef PR_SCHED_CORE_SCOPE_THREAD_GROUP
+#define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
+#endif
+
+#ifndef PR_SCHED_CORE_SCOPE_PROCESS_GROUP
+#define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
+#endif
+
+#define INVALID_SCHED_CORE_COOKIE ((__u64)-1)
+
+static inline __u64 core_scheduling_cookie_get(pid_t pid)
+{
+       __u64 cookie;
+       int ret;
+
+       ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid,
+                   PR_SCHED_CORE_SCOPE_THREAD, (unsigned long)&cookie);
+       if (ret)
+               return INVALID_SCHED_CORE_COOKIE;
+
+       return cookie;
+}
+
+static inline int core_scheduling_cookie_create_thread(pid_t pid)
+{
+       int ret;
+
+       ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid,
+                   PR_SCHED_CORE_SCOPE_THREAD_GROUP, 0);
+       if (ret)
+               return -errno;
+
+       return 0;
+}
+
 #endif /* __LXC_SYSCALL_WRAPPER_H */
index e7468a96a474c38d9477f0d159b6cc5312452ced..d19e24ec9d20e2925b7fab72bfd6337061219ba1 100644 (file)
@@ -925,6 +925,11 @@ int main(int argc, char *argv[])
                goto non_test_error;
        }
 
+       if (set_get_compare_clear_save_load(c, "lxc.sched.core", "1", tmpf, true) < 0) {
+               lxc_error("%s\n", "lxc.sched.core");
+               goto non_test_error;
+       }
+
        fret = EXIT_SUCCESS;
 
 non_test_error: