]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/machine-image.c
test: make sure test-execute can run in a read-only environment
[thirdparty/systemd.git] / src / shared / machine-image.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2013 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <dirent.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/file.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #include <linux/fs.h>
30
31 #include "alloc-util.h"
32 #include "btrfs-util.h"
33 #include "chattr-util.h"
34 #include "copy.h"
35 #include "dirent-util.h"
36 #include "env-util.h"
37 #include "fd-util.h"
38 #include "fs-util.h"
39 #include "hashmap.h"
40 #include "lockfile-util.h"
41 #include "log.h"
42 #include "machine-image.h"
43 #include "macro.h"
44 #include "mkdir.h"
45 #include "path-util.h"
46 #include "rm-rf.h"
47 #include "string-table.h"
48 #include "string-util.h"
49 #include "strv.h"
50 #include "time-util.h"
51 #include "utf8.h"
52 #include "util.h"
53 #include "xattr-util.h"
54
55 static const char image_search_path[] =
56 "/var/lib/machines\0"
57 "/var/lib/container\0" /* legacy */
58 "/usr/local/lib/machines\0"
59 "/usr/lib/machines\0";
60
61 Image *image_unref(Image *i) {
62 if (!i)
63 return NULL;
64
65 free(i->name);
66 free(i->path);
67 return mfree(i);
68 }
69
70 static char **image_settings_path(Image *image) {
71 _cleanup_strv_free_ char **l = NULL;
72 char **ret;
73 const char *fn, *s;
74 unsigned i = 0;
75
76 assert(image);
77
78 l = new0(char*, 4);
79 if (!l)
80 return NULL;
81
82 fn = strjoina(image->name, ".nspawn");
83
84 FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
85 l[i] = strappend(s, fn);
86 if (!l[i])
87 return NULL;
88
89 i++;
90 }
91
92 l[i] = file_in_same_dir(image->path, fn);
93 if (!l[i])
94 return NULL;
95
96 ret = l;
97 l = NULL;
98
99 return ret;
100 }
101
102 static int image_new(
103 ImageType t,
104 const char *pretty,
105 const char *path,
106 const char *filename,
107 bool read_only,
108 usec_t crtime,
109 usec_t mtime,
110 Image **ret) {
111
112 _cleanup_(image_unrefp) Image *i = NULL;
113
114 assert(t >= 0);
115 assert(t < _IMAGE_TYPE_MAX);
116 assert(pretty);
117 assert(filename);
118 assert(ret);
119
120 i = new0(Image, 1);
121 if (!i)
122 return -ENOMEM;
123
124 i->type = t;
125 i->read_only = read_only;
126 i->crtime = crtime;
127 i->mtime = mtime;
128 i->usage = i->usage_exclusive = (uint64_t) -1;
129 i->limit = i->limit_exclusive = (uint64_t) -1;
130
131 i->name = strdup(pretty);
132 if (!i->name)
133 return -ENOMEM;
134
135 if (path)
136 i->path = strjoin(path, "/", filename);
137 else
138 i->path = strdup(filename);
139
140 if (!i->path)
141 return -ENOMEM;
142
143 path_kill_slashes(i->path);
144
145 *ret = i;
146 i = NULL;
147
148 return 0;
149 }
150
151 static int image_make(
152 const char *pretty,
153 int dfd,
154 const char *path,
155 const char *filename,
156 Image **ret) {
157
158 struct stat st;
159 bool read_only;
160 int r;
161
162 assert(filename);
163
164 /* We explicitly *do* follow symlinks here, since we want to
165 * allow symlinking trees into /var/lib/machines/, and treat
166 * them normally. */
167
168 if (fstatat(dfd, filename, &st, 0) < 0)
169 return -errno;
170
171 read_only =
172 (path && path_startswith(path, "/usr")) ||
173 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
174
175 if (S_ISDIR(st.st_mode)) {
176 _cleanup_close_ int fd = -1;
177 unsigned file_attr = 0;
178
179 if (!ret)
180 return 1;
181
182 if (!pretty)
183 pretty = filename;
184
185 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
186 if (fd < 0)
187 return -errno;
188
189 /* btrfs subvolumes have inode 256 */
190 if (st.st_ino == 256) {
191
192 r = btrfs_is_filesystem(fd);
193 if (r < 0)
194 return r;
195 if (r) {
196 BtrfsSubvolInfo info;
197
198 /* It's a btrfs subvolume */
199
200 r = btrfs_subvol_get_info_fd(fd, 0, &info);
201 if (r < 0)
202 return r;
203
204 r = image_new(IMAGE_SUBVOLUME,
205 pretty,
206 path,
207 filename,
208 info.read_only || read_only,
209 info.otime,
210 0,
211 ret);
212 if (r < 0)
213 return r;
214
215 if (btrfs_quota_scan_ongoing(fd) == 0) {
216 BtrfsQuotaInfo quota;
217
218 r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
219 if (r >= 0) {
220 (*ret)->usage = quota.referenced;
221 (*ret)->usage_exclusive = quota.exclusive;
222
223 (*ret)->limit = quota.referenced_max;
224 (*ret)->limit_exclusive = quota.exclusive_max;
225 }
226 }
227
228 return 1;
229 }
230 }
231
232 /* If the IMMUTABLE bit is set, we consider the
233 * directory read-only. Since the ioctl is not
234 * supported everywhere we ignore failures. */
235 (void) read_attr_fd(fd, &file_attr);
236
237 /* It's just a normal directory. */
238 r = image_new(IMAGE_DIRECTORY,
239 pretty,
240 path,
241 filename,
242 read_only || (file_attr & FS_IMMUTABLE_FL),
243 0,
244 0,
245 ret);
246 if (r < 0)
247 return r;
248
249 return 1;
250
251 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
252 usec_t crtime = 0;
253
254 /* It's a RAW disk image */
255
256 if (!ret)
257 return 1;
258
259 fd_getcrtime_at(dfd, filename, &crtime, 0);
260
261 if (!pretty)
262 pretty = strndupa(filename, strlen(filename) - 4);
263
264 r = image_new(IMAGE_RAW,
265 pretty,
266 path,
267 filename,
268 !(st.st_mode & 0222) || read_only,
269 crtime,
270 timespec_load(&st.st_mtim),
271 ret);
272 if (r < 0)
273 return r;
274
275 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
276 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
277
278 return 1;
279 }
280
281 return 0;
282 }
283
284 int image_find(const char *name, Image **ret) {
285 const char *path;
286 int r;
287
288 assert(name);
289
290 /* There are no images with invalid names */
291 if (!image_name_is_valid(name))
292 return 0;
293
294 NULSTR_FOREACH(path, image_search_path) {
295 _cleanup_closedir_ DIR *d = NULL;
296
297 d = opendir(path);
298 if (!d) {
299 if (errno == ENOENT)
300 continue;
301
302 return -errno;
303 }
304
305 r = image_make(NULL, dirfd(d), path, name, ret);
306 if (r == 0 || r == -ENOENT) {
307 _cleanup_free_ char *raw = NULL;
308
309 raw = strappend(name, ".raw");
310 if (!raw)
311 return -ENOMEM;
312
313 r = image_make(NULL, dirfd(d), path, raw, ret);
314 if (r == 0 || r == -ENOENT)
315 continue;
316 }
317 if (r < 0)
318 return r;
319
320 return 1;
321 }
322
323 if (streq(name, ".host"))
324 return image_make(".host", AT_FDCWD, NULL, "/", ret);
325
326 return 0;
327 };
328
329 int image_discover(Hashmap *h) {
330 const char *path;
331 int r;
332
333 assert(h);
334
335 NULSTR_FOREACH(path, image_search_path) {
336 _cleanup_closedir_ DIR *d = NULL;
337 struct dirent *de;
338
339 d = opendir(path);
340 if (!d) {
341 if (errno == ENOENT)
342 continue;
343
344 return -errno;
345 }
346
347 FOREACH_DIRENT_ALL(de, d, return -errno) {
348 _cleanup_(image_unrefp) Image *image = NULL;
349
350 if (!image_name_is_valid(de->d_name))
351 continue;
352
353 if (hashmap_contains(h, de->d_name))
354 continue;
355
356 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
357 if (r == 0 || r == -ENOENT)
358 continue;
359 if (r < 0)
360 return r;
361
362 r = hashmap_put(h, image->name, image);
363 if (r < 0)
364 return r;
365
366 image = NULL;
367 }
368 }
369
370 if (!hashmap_contains(h, ".host")) {
371 _cleanup_(image_unrefp) Image *image = NULL;
372
373 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
374 if (r < 0)
375 return r;
376
377 r = hashmap_put(h, image->name, image);
378 if (r < 0)
379 return r;
380
381 image = NULL;
382
383 }
384
385 return 0;
386 }
387
388 void image_hashmap_free(Hashmap *map) {
389 Image *i;
390
391 while ((i = hashmap_steal_first(map)))
392 image_unref(i);
393
394 hashmap_free(map);
395 }
396
397 int image_remove(Image *i) {
398 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
399 _cleanup_strv_free_ char **settings = NULL;
400 char **j;
401 int r;
402
403 assert(i);
404
405 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
406 return -EROFS;
407
408 settings = image_settings_path(i);
409 if (!settings)
410 return -ENOMEM;
411
412 /* Make sure we don't interfere with a running nspawn */
413 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
414 if (r < 0)
415 return r;
416
417 switch (i->type) {
418
419 case IMAGE_SUBVOLUME:
420 r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
421 if (r < 0)
422 return r;
423 break;
424
425 case IMAGE_DIRECTORY:
426 /* Allow deletion of read-only directories */
427 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
428 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
429 if (r < 0)
430 return r;
431
432 break;
433
434 case IMAGE_RAW:
435 if (unlink(i->path) < 0)
436 return -errno;
437 break;
438
439 default:
440 return -EOPNOTSUPP;
441 }
442
443 STRV_FOREACH(j, settings) {
444 if (unlink(*j) < 0 && errno != ENOENT)
445 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
446 }
447
448 return 0;
449 }
450
451 static int rename_settings_file(const char *path, const char *new_name) {
452 _cleanup_free_ char *rs = NULL;
453 const char *fn;
454
455 fn = strjoina(new_name, ".nspawn");
456
457 rs = file_in_same_dir(path, fn);
458 if (!rs)
459 return -ENOMEM;
460
461 return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
462 }
463
464 int image_rename(Image *i, const char *new_name) {
465 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
466 _cleanup_free_ char *new_path = NULL, *nn = NULL;
467 _cleanup_strv_free_ char **settings = NULL;
468 unsigned file_attr = 0;
469 char **j;
470 int r;
471
472 assert(i);
473
474 if (!image_name_is_valid(new_name))
475 return -EINVAL;
476
477 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
478 return -EROFS;
479
480 settings = image_settings_path(i);
481 if (!settings)
482 return -ENOMEM;
483
484 /* Make sure we don't interfere with a running nspawn */
485 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
486 if (r < 0)
487 return r;
488
489 /* Make sure nobody takes the new name, between the time we
490 * checked it is currently unused in all search paths, and the
491 * time we take possession of it */
492 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
493 if (r < 0)
494 return r;
495
496 r = image_find(new_name, NULL);
497 if (r < 0)
498 return r;
499 if (r > 0)
500 return -EEXIST;
501
502 switch (i->type) {
503
504 case IMAGE_DIRECTORY:
505 /* Turn of the immutable bit while we rename the image, so that we can rename it */
506 (void) read_attr_path(i->path, &file_attr);
507
508 if (file_attr & FS_IMMUTABLE_FL)
509 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
510
511 /* fall through */
512
513 case IMAGE_SUBVOLUME:
514 new_path = file_in_same_dir(i->path, new_name);
515 break;
516
517 case IMAGE_RAW: {
518 const char *fn;
519
520 fn = strjoina(new_name, ".raw");
521 new_path = file_in_same_dir(i->path, fn);
522 break;
523 }
524
525 default:
526 return -EOPNOTSUPP;
527 }
528
529 if (!new_path)
530 return -ENOMEM;
531
532 nn = strdup(new_name);
533 if (!nn)
534 return -ENOMEM;
535
536 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
537 if (r < 0)
538 return r;
539
540 /* Restore the immutable bit, if it was set before */
541 if (file_attr & FS_IMMUTABLE_FL)
542 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL);
543
544 free(i->path);
545 i->path = new_path;
546 new_path = NULL;
547
548 free(i->name);
549 i->name = nn;
550 nn = NULL;
551
552 STRV_FOREACH(j, settings) {
553 r = rename_settings_file(*j, new_name);
554 if (r < 0 && r != -ENOENT)
555 log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
556 }
557
558 return 0;
559 }
560
561 static int clone_settings_file(const char *path, const char *new_name) {
562 _cleanup_free_ char *rs = NULL;
563 const char *fn;
564
565 fn = strjoina(new_name, ".nspawn");
566
567 rs = file_in_same_dir(path, fn);
568 if (!rs)
569 return -ENOMEM;
570
571 return copy_file_atomic(path, rs, 0664, false, 0);
572 }
573
574 int image_clone(Image *i, const char *new_name, bool read_only) {
575 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
576 _cleanup_strv_free_ char **settings = NULL;
577 const char *new_path;
578 char **j;
579 int r;
580
581 assert(i);
582
583 if (!image_name_is_valid(new_name))
584 return -EINVAL;
585
586 settings = image_settings_path(i);
587 if (!settings)
588 return -ENOMEM;
589
590 /* Make sure nobody takes the new name, between the time we
591 * checked it is currently unused in all search paths, and the
592 * time we take possession of it */
593 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
594 if (r < 0)
595 return r;
596
597 r = image_find(new_name, NULL);
598 if (r < 0)
599 return r;
600 if (r > 0)
601 return -EEXIST;
602
603 switch (i->type) {
604
605 case IMAGE_SUBVOLUME:
606 case IMAGE_DIRECTORY:
607 /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
608 * directory.*/
609
610 new_path = strjoina("/var/lib/machines/", new_name);
611
612 r = btrfs_subvol_snapshot(i->path, new_path,
613 (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
614 BTRFS_SNAPSHOT_FALLBACK_COPY |
615 BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
616 BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
617 BTRFS_SNAPSHOT_RECURSIVE |
618 BTRFS_SNAPSHOT_QUOTA);
619 if (r >= 0)
620 /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
621 (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
622
623 break;
624
625 case IMAGE_RAW:
626 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
627
628 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
629 break;
630
631 default:
632 return -EOPNOTSUPP;
633 }
634
635 if (r < 0)
636 return r;
637
638 STRV_FOREACH(j, settings) {
639 r = clone_settings_file(*j, new_name);
640 if (r < 0 && r != -ENOENT)
641 log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
642 }
643
644 return 0;
645 }
646
647 int image_read_only(Image *i, bool b) {
648 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
649 int r;
650 assert(i);
651
652 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
653 return -EROFS;
654
655 /* Make sure we don't interfere with a running nspawn */
656 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
657 if (r < 0)
658 return r;
659
660 switch (i->type) {
661
662 case IMAGE_SUBVOLUME:
663
664 /* Note that we set the flag only on the top-level
665 * subvolume of the image. */
666
667 r = btrfs_subvol_set_read_only(i->path, b);
668 if (r < 0)
669 return r;
670
671 break;
672
673 case IMAGE_DIRECTORY:
674 /* For simple directory trees we cannot use the access
675 mode of the top-level directory, since it has an
676 effect on the container itself. However, we can
677 use the "immutable" flag, to at least make the
678 top-level directory read-only. It's not as good as
679 a read-only subvolume, but at least something, and
680 we can read the value back.*/
681
682 r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL);
683 if (r < 0)
684 return r;
685
686 break;
687
688 case IMAGE_RAW: {
689 struct stat st;
690
691 if (stat(i->path, &st) < 0)
692 return -errno;
693
694 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
695 return -errno;
696
697 /* If the images is now read-only, it's a good time to
698 * defrag it, given that no write patterns will
699 * fragment it again. */
700 if (b)
701 (void) btrfs_defrag(i->path);
702 break;
703 }
704
705 default:
706 return -EOPNOTSUPP;
707 }
708
709 return 0;
710 }
711
712 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
713 _cleanup_free_ char *p = NULL;
714 LockFile t = LOCK_FILE_INIT;
715 struct stat st;
716 int r;
717
718 assert(path);
719 assert(global);
720 assert(local);
721
722 /* Locks an image path. This actually creates two locks: one
723 * "local" one, next to the image path itself, which might be
724 * shared via NFS. And another "global" one, in /run, that
725 * uses the device/inode number. This has the benefit that we
726 * can even lock a tree that is a mount point, correctly. */
727
728 if (!path_is_absolute(path))
729 return -EINVAL;
730
731 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
732 *local = *global = (LockFile) LOCK_FILE_INIT;
733 return 0;
734 }
735
736 if (path_equal(path, "/"))
737 return -EBUSY;
738
739 if (stat(path, &st) >= 0) {
740 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
741 return -ENOMEM;
742 }
743
744 r = make_lock_file_for(path, operation, &t);
745 if (r < 0)
746 return r;
747
748 if (p) {
749 mkdir_p("/run/systemd/nspawn/locks", 0700);
750
751 r = make_lock_file(p, operation, global);
752 if (r < 0) {
753 release_lock_file(&t);
754 return r;
755 }
756 } else
757 *global = (LockFile) LOCK_FILE_INIT;
758
759 *local = t;
760 return 0;
761 }
762
763 int image_set_limit(Image *i, uint64_t referenced_max) {
764 assert(i);
765
766 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
767 return -EROFS;
768
769 if (i->type != IMAGE_SUBVOLUME)
770 return -EOPNOTSUPP;
771
772 /* We set the quota both for the subvolume as well as for the
773 * subtree. The latter is mostly for historical reasons, since
774 * we didn't use to have a concept of subtree quota, and hence
775 * only modified the subvolume quota. */
776
777 (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
778 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
779 return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
780 }
781
782 int image_name_lock(const char *name, int operation, LockFile *ret) {
783 const char *p;
784
785 assert(name);
786 assert(ret);
787
788 /* Locks an image name, regardless of the precise path used. */
789
790 if (!image_name_is_valid(name))
791 return -EINVAL;
792
793 if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
794 *ret = (LockFile) LOCK_FILE_INIT;
795 return 0;
796 }
797
798 if (streq(name, ".host"))
799 return -EBUSY;
800
801 mkdir_p("/run/systemd/nspawn/locks", 0700);
802 p = strjoina("/run/systemd/nspawn/locks/name-", name);
803
804 return make_lock_file(p, operation, ret);
805 }
806
807 bool image_name_is_valid(const char *s) {
808 if (!filename_is_valid(s))
809 return false;
810
811 if (string_has_cc(s, NULL))
812 return false;
813
814 if (!utf8_is_valid(s))
815 return false;
816
817 /* Temporary files for atomically creating new files */
818 if (startswith(s, ".#"))
819 return false;
820
821 return true;
822 }
823
824 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
825 [IMAGE_DIRECTORY] = "directory",
826 [IMAGE_SUBVOLUME] = "subvolume",
827 [IMAGE_RAW] = "raw",
828 };
829
830 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);