#include <alloca.h>
#include <errno.h>
#include <fcntl.h>
+#include <sched.h>
#include <signal.h>
#include <unistd.h>
#include <sys/param.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#include <sys/types.h>
#include "log.h"
pid_t ret;
#ifdef __ia64__
- ret = __clone2(do_clone, stack,
- stack_size, flags | SIGCHLD, &clone_arg);
+ ret = __clone2(do_clone, stack, stack_size, flags | SIGCHLD, &clone_arg);
#else
ret = clone(do_clone, stack + stack_size, flags | SIGCHLD, &clone_arg);
#endif
return ret;
}
+/**
+ * This is based on raw_clone in systemd but adapted to our needs. This uses
+ * copy on write semantics and doesn't pass a stack. CLONE_VM is tricky and
+ * doesn't really matter to us so disallow it.
+ *
+ * The nice thing about this is that we get fork() behavior. That is
+ * lxc_raw_clone() returns 0 in the child and the child pid in the parent.
+ */
+pid_t lxc_raw_clone(unsigned long flags)
+{
+
+ /* These flags don't interest at all so we don't jump through any hoopes
+ * of retrieving them and passing them to the kernel.
+ */
+ errno = EINVAL;
+ if ((flags & (CLONE_VM | CLONE_PARENT_SETTID | CLONE_CHILD_SETTID |
+ CLONE_CHILD_CLEARTID | CLONE_SETTLS)))
+ return -EINVAL;
+
+#if defined(__s390x__) || defined(__s390__) || defined(__CRIS__)
+ /* On s390/s390x and cris the order of the first and second arguments
+ * of the system call is reversed.
+ */
+ return (int)syscall(__NR_clone, NULL, flags | SIGCHLD);
+#elif defined(__sparc__) && defined(__arch64__)
+ {
+ /**
+ * sparc64 always returns the other process id in %o0, and
+ * a boolean flag whether this is the child or the parent in
+ * %o1. Inline assembly is needed to get the flag returned
+ * in %o1.
+ */
+ int in_child;
+ int child_pid;
+ asm volatile("mov %2, %%g1\n\t"
+ "mov %3, %%o0\n\t"
+ "mov 0 , %%o1\n\t"
+ "t 0x6d\n\t"
+ "mov %%o1, %0\n\t"
+ "mov %%o0, %1"
+ : "=r"(in_child), "=r"(child_pid)
+ : "i"(__NR_clone), "r"(flags | SIGCHLD)
+ : "%o1", "%o0", "%g1");
+ if (in_child)
+ return 0;
+ else
+ return child_pid;
+ }
+#elif defined(__ia64__)
+ /* On ia64 the stack and stack size are passed as separate arguments. */
+ return (int)syscall(__NR_clone, flags | SIGCHLD, NULL, 0);
+#else
+ return (int)syscall(__NR_clone, flags | SIGCHLD, NULL);
+#endif
+}
+
/* Leave the user namespace at the first position in the array of structs so
* that we always attach to it first when iterating over the struct and using
* setns() to switch namespaces. This especially affects lxc_attach(): Suppose
#include "config.h"
+#ifndef CLONE_PARENT_SETTID
+#define CLONE_PARENT_SETTID 0x00100000
+#endif
+
+#ifndef CLONE_CHILD_CLEARTID
+#define CLONE_CHILD_CLEARTID 0x00200000
+#endif
+
+#ifndef CLONE_CHILD_SETTID
+#define CLONE_CHILD_SETTID 0x01000000
+#endif
+
+#ifndef CLONE_VFORK
+#define CLONE_VFORK 0x00004000
+#endif
+
+#ifndef CLONE_THREAD
+#define CLONE_THREAD 0x00010000
+#endif
+
+#ifndef CLONE_SETTLS
+#define CLONE_SETTLS 0x00080000
+#endif
+
+#ifndef CLONE_VM
+#define CLONE_VM 0x00000100
+#endif
+
+#ifndef CLONE_FILES
+#define CLONE_FILES 0x00000400
+#endif
+
#ifndef CLONE_FS
# define CLONE_FS 0x00000200
#endif
#endif
extern pid_t lxc_clone(int (*fn)(void *), void *arg, int flags);
+extern pid_t lxc_raw_clone(unsigned long flags);
extern int lxc_namespace_2_cloneflag(char *namespace);
extern int lxc_fill_namespace_flags(char *flaglist, int *flags);
--- /dev/null
+/*
+ * lxc: linux Container library
+ *
+ * Copyright © 2017 Canonical Ltd.
+ *
+ * Authors:
+ * Christian Brauner <christian.brauner@ubuntu.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define _GNU_SOURCE
+#define __STDC_FORMAT_MACROS
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "lxctest.h"
+#include "namespace.h"
+#include "utils.h"
+
+int main(int argc, char *argv[])
+{
+ int status;
+ pid_t pid;
+ int flags = 0;
+
+ pid = lxc_raw_clone(CLONE_PARENT_SETTID);
+ if (pid >= 0 || pid != -EINVAL) {
+ lxc_error("%s\n", "Calling lxc_raw_clone(CLONE_PARENT_SETTID) "
+ "should not be possible");
+ exit(EXIT_FAILURE);
+ }
+
+ pid = lxc_raw_clone(CLONE_CHILD_SETTID);
+ if (pid >= 0 || pid != -EINVAL) {
+ lxc_error("%s\n", "Calling lxc_raw_clone(CLONE_CHILD_SETTID) "
+ "should not be possible");
+ exit(EXIT_FAILURE);
+ }
+
+ pid = lxc_raw_clone(CLONE_CHILD_CLEARTID);
+ if (pid >= 0 || pid != -EINVAL) {
+ lxc_error("%s\n", "Calling lxc_raw_clone(CLONE_CHILD_CLEARTID) "
+ "should not be possible");
+ exit(EXIT_FAILURE);
+ }
+
+ pid = lxc_raw_clone(CLONE_SETTLS);
+ if (pid >= 0 || pid != -EINVAL) {
+ lxc_error("%s\n", "Calling lxc_raw_clone(CLONE_SETTLS) should "
+ "not be possible");
+ exit(EXIT_FAILURE);
+ }
+
+ pid = lxc_raw_clone(CLONE_VM);
+ if (pid >= 0 || pid != -EINVAL) {
+ lxc_error("%s\n", "Calling lxc_raw_clone(CLONE_VM) should "
+ "not be possible");
+ exit(EXIT_FAILURE);
+ }
+
+ pid = lxc_raw_clone(0);
+ if (pid < 0) {
+ lxc_error("%s\n", "Failed to call lxc_raw_clone(0)");
+ exit(EXIT_FAILURE);
+ }
+
+ if (pid == 0) {
+ lxc_error("%s\n", "Child will exit(EXIT_SUCCESS)");
+ exit(EXIT_SUCCESS);
+ }
+
+ status = wait_for_pid(pid);
+ if (status != 0) {
+ lxc_error("%s\n", "Failed to retrieve correct exit status");
+ exit(EXIT_FAILURE);
+ }
+
+ pid = lxc_raw_clone(0);
+ if (pid < 0) {
+ lxc_error("%s\n", "Failed to call lxc_raw_clone(0)");
+ exit(EXIT_FAILURE);
+ }
+
+ if (pid == 0) {
+ lxc_error("%s\n", "Child will exit(EXIT_FAILURE)");
+ exit(EXIT_FAILURE);
+ }
+
+ status = wait_for_pid(pid);
+ if (status == 0) {
+ lxc_error("%s\n", "Failed to retrieve correct exit status");
+ exit(EXIT_FAILURE);
+ }
+
+ pid = lxc_raw_clone(CLONE_NEWUSER | CLONE_NEWCGROUP | CLONE_NEWNS |
+ CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWIPC |
+ CLONE_NEWPID | CLONE_NEWUTS);
+ if (pid < 0) {
+ lxc_error("%s\n", "Failed to call lxc_raw_clone(CLONE_NEWUSER "
+ "| CLONE_NEWCGROUP | CLONE_NEWNS | "
+ "CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWIPC "
+ "| CLONE_NEWPID | CLONE_NEWUTS);");
+ exit(EXIT_FAILURE);
+ }
+
+ if (pid == 0) {
+ lxc_error("%s\n", "Child will exit(EXIT_SUCCESS)");
+ exit(EXIT_SUCCESS);
+ }
+
+ status = wait_for_pid(pid);
+ if (status != 0) {
+ lxc_error("%s\n", "Failed to retrieve correct exit status");
+ exit(EXIT_FAILURE);
+ }
+
+ flags |= CLONE_NEWUSER;
+ if (cgns_supported())
+ flags |= CLONE_NEWCGROUP;
+ flags |= CLONE_NEWNS;
+ flags |= CLONE_NEWIPC;
+ flags |= CLONE_NEWNET;
+ flags |= CLONE_NEWIPC;
+ flags |= CLONE_NEWPID;
+ flags |= CLONE_NEWUTS;
+ pid = lxc_raw_clone(flags);
+ if (pid < 0) {
+ lxc_error("%s\n", "Failed to call lxc_raw_clone(CLONE_NEWUSER "
+ "| CLONE_NEWCGROUP | CLONE_NEWNS | "
+ "CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWIPC "
+ "| CLONE_NEWPID | CLONE_NEWUTS);");
+ exit(EXIT_FAILURE);
+ }
+
+
+ if (pid == 0) {
+ lxc_error("%s\n", "Child will exit(EXIT_FAILURE)");
+ exit(EXIT_FAILURE);
+ }
+
+ status = wait_for_pid(pid);
+ if (status == 0) {
+ lxc_error("%s\n", "Failed to retrieve correct exit status");
+ exit(EXIT_SUCCESS);
+ }
+
+ lxc_debug("%s\n", "All lxc_raw_clone() tests successful");
+ exit(EXIT_SUCCESS);
+}