]> git.ipfire.org Git - thirdparty/lxc.git/commitdiff
lxc: add lxc.sched.core
authorChristian Brauner <christian.brauner@ubuntu.com>
Wed, 29 Sep 2021 11:16:26 +0000 (13:16 +0200)
committerChristian Brauner <christian.brauner@ubuntu.com>
Thu, 14 Oct 2021 15:30:31 +0000 (17:30 +0200)
Core scheduling defines if the container payload is marked as being
schedulable on the same core. Doing so will cause the kernel scheduler
to ensure that tasks that are not in the same group never run
simultaneously on a core. This can serve as an extra security measure to
prevent the container payload from using cross hyper thread attacks.

The only allowed values are 0 and 1. Set this to 1 to create a core
scheduling domain for the container or 0 to not create one. If not set
explicitly no core scheduling domain will be created for the container.

Signed-off-by: Christian Brauner <christian.brauner@ubuntu.com>
doc/lxc.container.conf.sgml.in
src/lxc/api_extensions.h
src/lxc/conf.c
src/lxc/conf.h
src/lxc/confile.c
src/lxc/start.c
src/lxc/syscall_wrappers.h
src/tests/parse_config_file.c

index 22b6b1305caa5bc70246c27f18c51b4ad282a5c9..6c9271130c946a51afa00e4f9c4456c4596efdb1 100644 (file)
@@ -338,6 +338,33 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
       </variablelist>
     </refsect2>
 
+    <refsect2>
+      <title>Core Scheduling</title>
+      <para>
+        Core scheduling defines if the container payload
+       is marked as being schedulable on the same core. Doing so will cause
+       the kernel scheduler to ensure that tasks that are not in the same
+       group never run simultaneously on a core. This can serve as an extra
+       security measure to prevent the container payload from using
+       cross hyper thread attacks.
+      </para>
+      <variablelist>
+        <varlistentry>
+          <term>
+            <option>lxc.sched.core</option>
+          </term>
+          <listitem>
+            <para>
+              The only allowed values are 0 and 1. Set this to 1 to create a
+             core scheduling domain for the container or 0 to not create one.
+             If not set explicitly no core scheduling domain will be created
+             for the container.
+            </para>
+          </listitem>
+        </varlistentry>
+      </variablelist>
+    </refsect2>
+
     <refsect2>
       <title>Proc</title>
       <para>
index 9b4a88206566d49764aa6a27a1972a16e0b26084..c2509207da81c8c4d9d68e1d4d01c1a82b7c5820 100644 (file)
@@ -43,6 +43,7 @@ static char *api_extensions[] = {
        "seccomp_proxy_send_notify_fd",
        "idmapped_mounts",
        "idmapped_mounts_v2",
+       "core_scheduling",
 };
 
 static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions);
index e645d10be8621446695cb0512a60d264866b3192..45b13b0862a62b0a0898836416ecb19977f8b3ae 100644 (file)
@@ -3382,6 +3382,8 @@ struct lxc_conf *lxc_conf_init(void)
        new->transient_procfs_mnt = false;
        new->shmount.path_host = NULL;
        new->shmount.path_cont = NULL;
+       new->sched_core = false;
+       new->sched_core_cookie = INVALID_SCHED_CORE_COOKIE;
 
        /* if running in a new user namespace, init and COMMAND
         * default to running as UID/GID 0 when using lxc-execute */
index 636b9017d88e372869ba4cc9a87785b36bbff7cd..12c26d98d7e311aa98f778df827ffca7c670360a 100644 (file)
@@ -523,6 +523,9 @@ struct lxc_conf {
        } shmount;
 
        struct timens_offsets timens;
+
+       bool sched_core;
+       __u64 sched_core_cookie;
 };
 
 __hidden extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size)
index 5a321862e07020075248cf155658ba1cb195593e..cbab448b8b4bf9b5573dc43dd602eaebaf82bd10 100644 (file)
@@ -145,6 +145,7 @@ lxc_config_define(tty_dir);
 lxc_config_define(uts_name);
 lxc_config_define(sysctl);
 lxc_config_define(proc);
+lxc_config_define(sched_core);
 
 static int set_config_unsupported_key(const char *key, const char *value,
                                      struct lxc_conf *lxc_conf, void *data)
@@ -194,6 +195,7 @@ static struct lxc_config_t config_jump_table[] = {
        { "lxc.console.path",               true,  set_config_console_path,               get_config_console_path,               clr_config_console_path,               },
        { "lxc.console.rotate",             true,  set_config_console_rotate,             get_config_console_rotate,             clr_config_console_rotate,             },
        { "lxc.console.size",               true,  set_config_console_size,               get_config_console_size,               clr_config_console_size,               },
+       { "lxc.sched.core",                 true,  set_config_sched_core,                 get_config_sched_core,                 clr_config_sched_core,                 },
        { "lxc.environment",                true,  set_config_environment,                get_config_environment,                clr_config_environment,                },
        { "lxc.ephemeral",                  true,  set_config_ephemeral,                  get_config_ephemeral,                  clr_config_ephemeral,                  },
        { "lxc.execute.cmd",                true,  set_config_execute_cmd,                get_config_execute_cmd,                clr_config_execute_cmd,                },
@@ -6126,3 +6128,35 @@ int lxc_list_net(struct lxc_conf *c, const char *key, char *retv, int inlen)
 
        return fulllen;
 }
+
+static int set_config_sched_core(const char *key, const char *value,
+                                struct lxc_conf *lxc_conf, void *data)
+{
+       int ret;
+       unsigned int nr;
+
+       if (lxc_config_value_empty(value))
+               return clr_config_sched_core(key, lxc_conf, data);
+
+       ret = lxc_safe_uint(value, &nr);
+       if (ret)
+               return ret_errno(EINVAL);
+
+       if (nr != 0 && nr != 1)
+               return ret_errno(EINVAL);
+
+       lxc_conf->sched_core = (nr == 1);
+       return 0;
+}
+
+static int get_config_sched_core(const char *key, char *retv, int inlen,
+                                struct lxc_conf *c, void *data)
+{
+       return lxc_get_conf_bool(c, retv, inlen, c->sched_core);
+}
+
+static int clr_config_sched_core(const char *key, struct lxc_conf *c, void *data)
+{
+       c->sched_core = false;
+       return 0;
+}
index a9d3d5eadac6cae53a22133bea8e5ee8fccaabb7..f783f2e53da67b83f663c0d7994be9cb6df2a7b1 100644 (file)
@@ -1553,6 +1553,32 @@ static inline int do_share_ns(void *arg)
        return 0;
 }
 
+static int core_scheduling(struct lxc_handler *handler)
+{
+       struct lxc_conf *conf = handler->conf;
+       int ret;
+
+       if (!conf->sched_core)
+               return log_trace(0, "No new core scheduling domain requested");
+
+       ret = core_scheduling_cookie_create_thread(handler->pid);
+       if (ret < 0) {
+               if (ret == -EINVAL)
+                       return sysinfo("The kernel does not support core scheduling");
+
+               return syserror("Failed to create new core scheduling domain");
+       }
+
+       conf->sched_core_cookie = core_scheduling_cookie_get(handler->pid);
+       if (conf->sched_core_cookie == INVALID_SCHED_CORE_COOKIE)
+               return syserror("Failed to retrieve core scheduling domain cookie");
+
+       TRACE("Created new core scheduling domain with cookie %llu",
+             (long long unsigned int)conf->sched_core_cookie);
+
+       return 0;
+}
+
 /* lxc_spawn() performs crucial setup tasks and clone()s the new process which
  * exec()s the requested container binary.
  * Note that lxc_spawn() runs in the parent namespaces. Any operations performed
@@ -1709,6 +1735,10 @@ static int lxc_spawn(struct lxc_handler *handler)
                handler->clone_flags &= ~CLONE_PIDFD;
        TRACE("Cloned child process %d", handler->pid);
 
+       ret = core_scheduling(handler);
+       if (ret < 0)
+               goto out_delete_net;
+
        /* Verify that we can actually make use of pidfds. */
        if (!lxc_can_use_pidfd(handler->pidfd))
                close_prot_errno_disarm(handler->pidfd);
index f1004d2647f8d2aff08e1568954a76790468a43e..6e90f572d2f1fc87aa5345d0b6f3add8c53eb627 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/keyctl.h>
 #include <sched.h>
 #include <stdint.h>
+#include <sys/prctl.h>
 #include <sys/syscall.h>
 #include <sys/types.h>
 #include <unistd.h>
@@ -320,4 +321,70 @@ static inline int personality(unsigned long persona)
 }
 #endif
 
+/* arg1 of prctl() */
+#ifndef PR_SCHED_CORE
+#define PR_SCHED_CORE 62
+#endif
+
+/* arg2 of prctl() */
+#ifndef PR_SCHED_CORE_GET
+#define PR_SCHED_CORE_GET 0
+#endif
+
+#ifndef PR_SCHED_CORE_CREATE
+#define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+#endif
+
+#ifndef PR_SCHED_CORE_SHARE_TO
+#define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+#endif
+
+#ifndef PR_SCHED_CORE_SHARE_FROM
+#define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+#endif
+
+#ifndef PR_SCHED_CORE_MAX
+#define PR_SCHED_CORE_MAX 4
+#endif
+
+/* arg3 of prctl() */
+#ifndef PR_SCHED_CORE_SCOPE_THREAD
+#define PR_SCHED_CORE_SCOPE_THREAD 0
+#endif
+
+#ifndef PR_SCHED_CORE_SCOPE_THREAD_GROUP
+#define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
+#endif
+
+#ifndef PR_SCHED_CORE_SCOPE_PROCESS_GROUP
+#define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
+#endif
+
+#define INVALID_SCHED_CORE_COOKIE ((__u64)-1)
+
+static inline __u64 core_scheduling_cookie_get(pid_t pid)
+{
+       __u64 cookie;
+       int ret;
+
+       ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid,
+                   PR_SCHED_CORE_SCOPE_THREAD, (unsigned long)&cookie);
+       if (ret)
+               return INVALID_SCHED_CORE_COOKIE;
+
+       return cookie;
+}
+
+static inline int core_scheduling_cookie_create_thread(pid_t pid)
+{
+       int ret;
+
+       ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid,
+                   PR_SCHED_CORE_SCOPE_THREAD_GROUP, 0);
+       if (ret)
+               return -errno;
+
+       return 0;
+}
+
 #endif /* __LXC_SYSCALL_WRAPPER_H */
index df55d69c1182e5c051b204b398d32fff56a8f556..799632d371ff683b1bc3db49612770256bf5d558 100644 (file)
@@ -912,6 +912,11 @@ int main(int argc, char *argv[])
                goto non_test_error;
        }
 
+       if (set_get_compare_clear_save_load(c, "lxc.sched.core", "1", tmpf, true) < 0) {
+               lxc_error("%s\n", "lxc.sched.core");
+               goto non_test_error;
+       }
+
        fret = EXIT_SUCCESS;
 
 non_test_error: