]>
Commit | Line | Data |
---|---|---|
88213476 LP |
1 | /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/ |
2 | ||
3 | /*** | |
4 | This file is part of systemd. | |
5 | ||
6 | Copyright 2010 Lennart Poettering | |
7 | ||
8 | systemd is free software; you can redistribute it and/or modify it | |
9 | under the terms of the GNU General Public License as published by | |
10 | the Free Software Foundation; either version 2 of the License, or | |
11 | (at your option) any later version. | |
12 | ||
13 | systemd is distributed in the hope that it will be useful, but | |
14 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | General Public License for more details. | |
17 | ||
18 | You should have received a copy of the GNU General Public License | |
19 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
20 | ***/ | |
21 | ||
22 | #include <signal.h> | |
23 | #include <sched.h> | |
24 | #include <unistd.h> | |
25 | #include <sys/types.h> | |
26 | #include <sys/syscall.h> | |
27 | #include <sys/mount.h> | |
28 | #include <sys/wait.h> | |
29 | #include <stdlib.h> | |
30 | #include <string.h> | |
31 | #include <stdio.h> | |
32 | #include <errno.h> | |
33 | #include <sys/prctl.h> | |
34 | #include <sys/capability.h> | |
35 | #include <getopt.h> | |
36 | ||
37 | #include "log.h" | |
38 | #include "util.h" | |
94d82985 | 39 | #include "missing.h" |
88213476 LP |
40 | |
41 | static char *arg_directory = NULL; | |
42 | ||
43 | static int help(void) { | |
44 | ||
45 | printf("%s [OPTIONS...] [PATH] [ARGUMENTS...]\n\n" | |
46 | "Spawn a minimal namespace container for debugging, testing and building.\n\n" | |
47 | " -h --help Show this help\n" | |
48 | " -D --directory=NAME Root directory for the container\n", | |
49 | program_invocation_short_name); | |
50 | ||
51 | return 0; | |
52 | } | |
53 | ||
54 | static int parse_argv(int argc, char *argv[]) { | |
55 | ||
56 | static const struct option options[] = { | |
57 | { "help", no_argument, NULL, 'h' }, | |
58 | { "directory", required_argument, NULL, 'D' }, | |
59 | { NULL, 0, NULL, 0 } | |
60 | }; | |
61 | ||
62 | int c; | |
63 | ||
64 | assert(argc >= 0); | |
65 | assert(argv); | |
66 | ||
67 | while ((c = getopt_long(argc, argv, "+hD:", options, NULL)) >= 0) { | |
68 | ||
69 | switch (c) { | |
70 | ||
71 | case 'h': | |
72 | help(); | |
73 | return 0; | |
74 | ||
75 | case 'D': | |
76 | free(arg_directory); | |
77 | if (!(arg_directory = strdup(optarg))) { | |
78 | log_error("Failed to duplicate root directory."); | |
79 | return -ENOMEM; | |
80 | } | |
81 | ||
82 | break; | |
83 | ||
84 | case '?': | |
85 | return -EINVAL; | |
86 | ||
87 | default: | |
88 | log_error("Unknown option code %c", c); | |
89 | return -EINVAL; | |
90 | } | |
91 | } | |
92 | ||
93 | return 1; | |
94 | } | |
95 | ||
96 | static int mount_all(const char *dest) { | |
97 | ||
98 | typedef struct MountPoint { | |
99 | const char *what; | |
100 | const char *where; | |
101 | const char *type; | |
102 | const char *options; | |
103 | unsigned long flags; | |
104 | } MountPoint; | |
105 | ||
106 | static const MountPoint mount_table[] = { | |
9b634ea5 LP |
107 | { "proc", "/proc", "proc", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV }, |
108 | { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND }, /* Bind mount first */ | |
109 | { "/proc/sys", "/proc/sys", "bind", NULL, MS_BIND|MS_RDONLY|MS_REMOUNT }, /* Then, make it r/o */ | |
110 | { "sysfs", "/sys", "sysfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY }, | |
111 | { "tmpfs", "/dev", "tmpfs", "mode=755", MS_NOSUID }, | |
112 | { "/dev/pts", "/dev/pts", "bind", NULL, MS_BIND }, | |
113 | { "tmpfs", "/dev/.run", "tmpfs", "mode=755", MS_NOSUID|MS_NOEXEC|MS_NODEV }, | |
114 | #ifdef HAVE_SELINUX | |
115 | { "selinux", "/selinux", "selinuxfs", NULL, MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_RDONLY }, | |
116 | #endif | |
88213476 LP |
117 | }; |
118 | ||
119 | unsigned k; | |
120 | int r = 0; | |
121 | ||
122 | for (k = 0; k < ELEMENTSOF(mount_table); k++) { | |
123 | char *where; | |
124 | int t; | |
125 | ||
126 | if (asprintf(&where, "%s/%s", dest, mount_table[k].where) < 0) { | |
127 | log_error("Out of memory"); | |
128 | ||
129 | if (r == 0) | |
130 | r = -ENOMEM; | |
131 | ||
132 | break; | |
133 | } | |
134 | ||
135 | if ((t = path_is_mount_point(where)) < 0) { | |
136 | log_error("Failed to detect whether %s is a mount point: %s", where, strerror(-t)); | |
137 | free(where); | |
138 | ||
139 | if (r == 0) | |
140 | r = t; | |
141 | ||
142 | continue; | |
143 | } | |
144 | ||
145 | mkdir_p(where, 0755); | |
146 | ||
147 | if (mount(mount_table[k].what, | |
148 | where, | |
149 | mount_table[k].type, | |
150 | mount_table[k].flags, | |
151 | mount_table[k].options) < 0) { | |
152 | ||
153 | log_error("mount(%s) failed: %m", where); | |
154 | ||
155 | if (r == 0) | |
156 | r = -errno; | |
157 | } | |
158 | ||
159 | free(where); | |
160 | } | |
161 | ||
162 | return r; | |
163 | } | |
164 | ||
165 | static int copy_devnodes(const char *dest) { | |
166 | ||
167 | static const char devnodes[] = | |
168 | "null\0" | |
169 | "zero\0" | |
170 | "full\0" | |
171 | "random\0" | |
172 | "urandom\0" | |
173 | "tty\0" | |
174 | "ptmx\0" | |
175 | "kmsg\0" | |
176 | "rtc0\0"; | |
177 | ||
178 | const char *d; | |
179 | int r = 0, k; | |
180 | char *tty = NULL; | |
181 | dev_t tty_devnum; | |
124640f1 LP |
182 | mode_t u; |
183 | ||
184 | u = umask(0000); | |
88213476 LP |
185 | |
186 | NULSTR_FOREACH(d, devnodes) { | |
187 | char *from = NULL, *to = NULL; | |
188 | struct stat st; | |
189 | ||
190 | asprintf(&from, "/dev/%s", d); | |
191 | asprintf(&to, "%s/dev/%s", dest, d); | |
192 | ||
193 | if (!from || !to) { | |
194 | log_error("Failed to allocate devnode path"); | |
195 | ||
196 | free(from); | |
197 | free(to); | |
198 | ||
199 | if (r == 0) | |
200 | r = -ENOMEM; | |
201 | ||
202 | break; | |
203 | } | |
204 | ||
205 | if (stat(from, &st) < 0) { | |
206 | ||
207 | if (errno != ENOENT) { | |
208 | log_error("Failed to stat %s: %m", from); | |
209 | ||
210 | if (r == 0) | |
211 | r = -errno; | |
212 | } | |
213 | ||
214 | } else { | |
215 | if (mknod(to, st.st_mode, st.st_rdev) < 0) { | |
216 | log_error("mknod(%s) failed: %m", dest); | |
217 | ||
218 | if (r == 0) | |
219 | r = -errno; | |
220 | } | |
221 | } | |
222 | ||
223 | free(from); | |
224 | free(to); | |
225 | } | |
226 | ||
227 | if ((k = get_ctty(&tty, &tty_devnum)) < 0) { | |
228 | log_error("Failed to determine controlling tty: %s", strerror(-k)); | |
229 | ||
230 | if (r == 0) | |
231 | r = k; | |
232 | } else { | |
233 | char *from = NULL, *to = NULL; | |
234 | ||
235 | asprintf(&from, "/dev/%s", tty); | |
236 | asprintf(&to, "%s/dev/console", dest); | |
237 | ||
238 | if (!from || !to) { | |
239 | log_error("Out of memory"); | |
240 | ||
241 | if (r == 0) | |
242 | r = k; | |
243 | } else { | |
244 | /* We need to bind mount our own tty on | |
245 | * /dev/console, since ptys cannot be used | |
246 | * unless on a devpts file system. But to bind | |
247 | * mount it we first have to create a device | |
248 | * node where we can bind mount it on. This is | |
249 | * kinda ugly since the TTY will very likely | |
250 | * be owned by a user/group that does not | |
251 | * exist in the container. */ | |
252 | ||
253 | if (mknod(to, S_IFCHR|0600, tty_devnum) < 0) { | |
254 | log_error("mknod for /dev/console failed: %m"); | |
255 | ||
256 | if (r == 0) | |
257 | r = -errno; | |
258 | } | |
259 | ||
90df7e56 | 260 | if (mount(from, to, "bind", MS_BIND|MS_RDONLY, NULL) < 0) { |
88213476 LP |
261 | log_error("bind mount for /dev/console failed: %m"); |
262 | ||
263 | if (r == 0) | |
264 | r = -errno; | |
265 | } | |
266 | } | |
267 | ||
268 | free(from); | |
269 | free(to); | |
270 | } | |
271 | ||
272 | free(tty); | |
273 | ||
124640f1 LP |
274 | umask(u); |
275 | ||
88213476 LP |
276 | return r; |
277 | } | |
278 | ||
279 | static int drop_capabilities(void) { | |
280 | static const unsigned long retain[] = { | |
281 | CAP_CHOWN, | |
282 | CAP_DAC_OVERRIDE, | |
283 | CAP_DAC_READ_SEARCH, | |
284 | CAP_FOWNER, | |
285 | CAP_FSETID, | |
286 | CAP_IPC_OWNER, | |
287 | CAP_KILL, | |
288 | CAP_LEASE, | |
289 | CAP_LINUX_IMMUTABLE, | |
290 | CAP_NET_BIND_SERVICE, | |
291 | CAP_NET_BROADCAST, | |
292 | CAP_NET_RAW, | |
293 | CAP_SETGID, | |
294 | CAP_SETFCAP, | |
295 | CAP_SETPCAP, | |
296 | CAP_SETUID, | |
297 | CAP_SYS_ADMIN, | |
298 | CAP_SYS_CHROOT, | |
299 | CAP_SYS_NICE, | |
300 | CAP_SYS_PTRACE, | |
301 | CAP_SYS_TTY_CONFIG | |
302 | }; | |
303 | ||
304 | unsigned long l; | |
305 | ||
306 | for (l = 0; l <= MAX(63LU, (unsigned long) CAP_LAST_CAP); l ++) { | |
307 | unsigned i; | |
308 | ||
309 | for (i = 0; i < ELEMENTSOF(retain); i++) | |
310 | if (retain[i] == l) | |
311 | break; | |
312 | ||
313 | if (i < ELEMENTSOF(retain)) | |
314 | continue; | |
315 | ||
316 | if (prctl(PR_CAPBSET_DROP, l) < 0) { | |
317 | ||
318 | /* If this capability is not known, EINVAL | |
319 | * will be returned, let's ignore this. */ | |
320 | if (errno == EINVAL) | |
321 | continue; | |
322 | ||
323 | log_error("PR_CAPBSET_DROP failed: %m"); | |
324 | return -errno; | |
325 | } | |
326 | } | |
327 | ||
328 | return 0; | |
329 | } | |
330 | ||
331 | static int is_os_tree(const char *path) { | |
332 | int r; | |
333 | char *p; | |
334 | /* We use /bin/sh as flag file if something is an OS */ | |
335 | ||
336 | if (asprintf(&p, "%s/bin/sh", path) < 0) | |
337 | return -ENOMEM; | |
338 | ||
339 | r = access(p, F_OK); | |
340 | free(p); | |
341 | ||
342 | return r < 0 ? 0 : 1; | |
343 | } | |
344 | ||
345 | ||
346 | int main(int argc, char *argv[]) { | |
347 | pid_t pid = 0; | |
348 | int r = EXIT_FAILURE; | |
349 | ||
350 | log_parse_environment(); | |
351 | log_open(); | |
352 | ||
353 | if ((r = parse_argv(argc, argv)) <= 0) | |
354 | goto finish; | |
355 | ||
356 | if (arg_directory) { | |
357 | char *p; | |
358 | ||
359 | p = path_make_absolute_cwd(arg_directory); | |
360 | free(arg_directory); | |
361 | arg_directory = p; | |
362 | } else | |
363 | arg_directory = get_current_dir_name(); | |
364 | ||
365 | if (!arg_directory) { | |
366 | log_error("Failed to determine path"); | |
367 | goto finish; | |
368 | } | |
369 | ||
370 | path_kill_slashes(arg_directory); | |
371 | ||
372 | if (geteuid() != 0) { | |
373 | log_error("Need to be root."); | |
374 | goto finish; | |
375 | } | |
376 | ||
377 | if (path_equal(arg_directory, "/")) { | |
6df6b939 | 378 | log_error("Spawning container on root directory not supported."); |
88213476 LP |
379 | goto finish; |
380 | } | |
381 | ||
382 | if (is_os_tree(arg_directory) <= 0) { | |
383 | log_error("Directory %s doesn't look like an OS root directory. Refusing.", arg_directory); | |
384 | goto finish; | |
385 | } | |
386 | ||
387 | log_info("Spawning namespace container on %s.", arg_directory); | |
388 | ||
64af1b62 | 389 | if ((pid = syscall(__NR_clone, SIGCHLD|CLONE_NEWIPC|CLONE_NEWNS|CLONE_NEWPID|CLONE_NEWUTS, NULL)) < 0) { |
88213476 LP |
390 | log_error("clone() failed: %m"); |
391 | goto finish; | |
392 | } | |
393 | ||
394 | if (pid == 0) { | |
395 | const char *hn; | |
da5b3bad LP |
396 | const char *envp[] = { |
397 | "HOME=/root", | |
398 | "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", | |
399 | NULL | |
400 | }; | |
88213476 LP |
401 | |
402 | /* child */ | |
403 | ||
404 | if (mount_all(arg_directory) < 0) | |
405 | goto child_fail; | |
406 | ||
407 | if (copy_devnodes(arg_directory) < 0) | |
408 | goto child_fail; | |
409 | ||
410 | if (chdir(arg_directory) < 0) { | |
411 | log_error("chdir(%s) failed: %m", arg_directory); | |
412 | goto child_fail; | |
413 | } | |
414 | if (mount(arg_directory, "/", "bind", MS_BIND|MS_MOVE, NULL) < 0) { | |
415 | log_error("mount(MS_MOVE) failed: %m"); | |
416 | goto child_fail; | |
417 | } | |
418 | ||
419 | if (chroot(".") < 0) { | |
420 | log_error("chroot() failed: %m"); | |
421 | goto child_fail; | |
422 | } | |
423 | ||
424 | if (chdir("/") < 0) { | |
425 | log_error("chdir() failed: %m"); | |
426 | goto child_fail; | |
427 | } | |
428 | ||
429 | if (drop_capabilities() < 0) | |
430 | goto child_fail; | |
431 | ||
432 | if ((hn = file_name_from_path(arg_directory))) | |
433 | sethostname(hn, strlen(hn)); | |
434 | ||
435 | if (argc > optind) | |
da5b3bad LP |
436 | execvpe(argv[optind], argv + optind, (char**) envp); |
437 | else { | |
438 | chdir("/root"); | |
439 | execle("/bin/bash", "-bash", NULL, (char**) envp); | |
440 | } | |
88213476 LP |
441 | |
442 | log_error("execv() failed: %m"); | |
443 | ||
444 | child_fail: | |
445 | _exit(EXIT_FAILURE); | |
446 | } | |
447 | ||
6df6b939 | 448 | r = wait_for_terminate_and_warn(argc > optind ? argv[optind] : "bash", pid); |
88213476 LP |
449 | |
450 | if (r < 0) | |
451 | r = EXIT_FAILURE; | |
452 | ||
453 | finish: | |
454 | free(arg_directory); | |
455 | ||
456 | if (pid > 0) | |
457 | kill(pid, SIGTERM); | |
458 | ||
459 | return r; | |
460 | } |