</variablelist>
</refsect2>
+ <refsect2>
+ <title>Core Scheduling</title>
+ <para>
+ Core scheduling defines if the container payload
+ is marked as being schedulable on the same core. Doing so will cause
+ the kernel scheduler to ensure that tasks that are not in the same
+ group never run simultaneously on a core. This can serve as an extra
+ security measure to prevent the container payload from using
+ cross hyper thread attacks.
+ </para>
+ <variablelist>
+ <varlistentry>
+ <term>
+ <option>lxc.sched.core</option>
+ </term>
+ <listitem>
+ <para>
+ The only allowed values are 0 and 1. Set this to 1 to create a
+ core scheduling domain for the container or 0 to not create one.
+ If not set explicitly no core scheduling domain will be created
+ for the container.
+ </para>
+ </listitem>
+ </varlistentry>
+ </variablelist>
+ </refsect2>
+
<refsect2>
<title>Proc</title>
<para>
"seccomp_proxy_send_notify_fd",
"idmapped_mounts",
"idmapped_mounts_v2",
+ "core_scheduling",
};
static size_t nr_api_extensions = sizeof(api_extensions) / sizeof(*api_extensions);
new->transient_procfs_mnt = false;
new->shmount.path_host = NULL;
new->shmount.path_cont = NULL;
+ new->sched_core = false;
+ new->sched_core_cookie = INVALID_SCHED_CORE_COOKIE;
/* if running in a new user namespace, init and COMMAND
* default to running as UID/GID 0 when using lxc-execute */
} shmount;
struct timens_offsets timens;
+
+ bool sched_core;
+ __u64 sched_core_cookie;
};
__hidden extern int write_id_mapping(enum idtype idtype, pid_t pid, const char *buf, size_t buf_size)
lxc_config_define(uts_name);
lxc_config_define(sysctl);
lxc_config_define(proc);
+lxc_config_define(sched_core);
static int set_config_unsupported_key(const char *key, const char *value,
struct lxc_conf *lxc_conf, void *data)
{ "lxc.console.path", true, set_config_console_path, get_config_console_path, clr_config_console_path, },
{ "lxc.console.rotate", true, set_config_console_rotate, get_config_console_rotate, clr_config_console_rotate, },
{ "lxc.console.size", true, set_config_console_size, get_config_console_size, clr_config_console_size, },
+ { "lxc.sched.core", true, set_config_sched_core, get_config_sched_core, clr_config_sched_core, },
{ "lxc.environment", true, set_config_environment, get_config_environment, clr_config_environment, },
{ "lxc.ephemeral", true, set_config_ephemeral, get_config_ephemeral, clr_config_ephemeral, },
{ "lxc.execute.cmd", true, set_config_execute_cmd, get_config_execute_cmd, clr_config_execute_cmd, },
return fulllen;
}
+
+static int set_config_sched_core(const char *key, const char *value,
+ struct lxc_conf *lxc_conf, void *data)
+{
+ int ret;
+ unsigned int nr;
+
+ if (lxc_config_value_empty(value))
+ return clr_config_sched_core(key, lxc_conf, data);
+
+ ret = lxc_safe_uint(value, &nr);
+ if (ret)
+ return ret_errno(EINVAL);
+
+ if (nr != 0 && nr != 1)
+ return ret_errno(EINVAL);
+
+ lxc_conf->sched_core = (nr == 1);
+ return 0;
+}
+
+static int get_config_sched_core(const char *key, char *retv, int inlen,
+ struct lxc_conf *c, void *data)
+{
+ return lxc_get_conf_bool(c, retv, inlen, c->sched_core);
+}
+
+static int clr_config_sched_core(const char *key, struct lxc_conf *c, void *data)
+{
+ c->sched_core = false;
+ return 0;
+}
return 0;
}
+static int core_scheduling(struct lxc_handler *handler)
+{
+ struct lxc_conf *conf = handler->conf;
+ int ret;
+
+ if (!conf->sched_core)
+ return log_trace(0, "No new core scheduling domain requested");
+
+ ret = core_scheduling_cookie_create_thread(handler->pid);
+ if (ret < 0) {
+ if (ret == -EINVAL)
+ return sysinfo("The kernel does not support core scheduling");
+
+ return syserror("Failed to create new core scheduling domain");
+ }
+
+ conf->sched_core_cookie = core_scheduling_cookie_get(handler->pid);
+ if (conf->sched_core_cookie == INVALID_SCHED_CORE_COOKIE)
+ return syserror("Failed to retrieve core scheduling domain cookie");
+
+ TRACE("Created new core scheduling domain with cookie %llu",
+ (long long unsigned int)conf->sched_core_cookie);
+
+ return 0;
+}
+
/* lxc_spawn() performs crucial setup tasks and clone()s the new process which
* exec()s the requested container binary.
* Note that lxc_spawn() runs in the parent namespaces. Any operations performed
handler->clone_flags &= ~CLONE_PIDFD;
TRACE("Cloned child process %d", handler->pid);
+ ret = core_scheduling(handler);
+ if (ret < 0)
+ goto out_delete_net;
+
/* Verify that we can actually make use of pidfds. */
if (!lxc_can_use_pidfd(handler->pidfd))
close_prot_errno_disarm(handler->pidfd);
#include <linux/keyctl.h>
#include <sched.h>
#include <stdint.h>
+#include <sys/prctl.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
}
#endif
+/* arg1 of prctl() */
+#ifndef PR_SCHED_CORE
+#define PR_SCHED_CORE 62
+#endif
+
+/* arg2 of prctl() */
+#ifndef PR_SCHED_CORE_GET
+#define PR_SCHED_CORE_GET 0
+#endif
+
+#ifndef PR_SCHED_CORE_CREATE
+#define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+#endif
+
+#ifndef PR_SCHED_CORE_SHARE_TO
+#define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+#endif
+
+#ifndef PR_SCHED_CORE_SHARE_FROM
+#define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+#endif
+
+#ifndef PR_SCHED_CORE_MAX
+#define PR_SCHED_CORE_MAX 4
+#endif
+
+/* arg3 of prctl() */
+#ifndef PR_SCHED_CORE_SCOPE_THREAD
+#define PR_SCHED_CORE_SCOPE_THREAD 0
+#endif
+
+#ifndef PR_SCHED_CORE_SCOPE_THREAD_GROUP
+#define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
+#endif
+
+#ifndef PR_SCHED_CORE_SCOPE_PROCESS_GROUP
+#define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
+#endif
+
+#define INVALID_SCHED_CORE_COOKIE ((__u64)-1)
+
+static inline __u64 core_scheduling_cookie_get(pid_t pid)
+{
+ __u64 cookie;
+ int ret;
+
+ ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid,
+ PR_SCHED_CORE_SCOPE_THREAD, (unsigned long)&cookie);
+ if (ret)
+ return INVALID_SCHED_CORE_COOKIE;
+
+ return cookie;
+}
+
+static inline int core_scheduling_cookie_create_thread(pid_t pid)
+{
+ int ret;
+
+ ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid,
+ PR_SCHED_CORE_SCOPE_THREAD_GROUP, 0);
+ if (ret)
+ return -errno;
+
+ return 0;
+}
+
#endif /* __LXC_SYSCALL_WRAPPER_H */
goto non_test_error;
}
+ if (set_get_compare_clear_save_load(c, "lxc.sched.core", "1", tmpf, true) < 0) {
+ lxc_error("%s\n", "lxc.sched.core");
+ goto non_test_error;
+ }
+
fret = EXIT_SUCCESS;
non_test_error: