]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/machine-image.c
Merge pull request #4390 from keszybz/install-specifiers
[thirdparty/systemd.git] / src / shared / machine-image.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2013 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <dirent.h>
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <sys/file.h>
27 #include <sys/stat.h>
28 #include <unistd.h>
29 #include <linux/fs.h>
30 #include "alloc-util.h"
31 #include "btrfs-util.h"
32 #include "chattr-util.h"
33 #include "copy.h"
34 #include "dirent-util.h"
35 #include "fd-util.h"
36 #include "fs-util.h"
37 #include "hashmap.h"
38 #include "lockfile-util.h"
39 #include "log.h"
40 #include "macro.h"
41 #include "machine-image.h"
42 #include "mkdir.h"
43 #include "path-util.h"
44 #include "rm-rf.h"
45 #include "string-table.h"
46 #include "string-util.h"
47 #include "strv.h"
48 #include "time-util.h"
49 #include "utf8.h"
50 #include "util.h"
51 #include "xattr-util.h"
52
53 static const char image_search_path[] =
54 "/var/lib/machines\0"
55 "/var/lib/container\0" /* legacy */
56 "/usr/local/lib/machines\0"
57 "/usr/lib/machines\0";
58
59 Image *image_unref(Image *i) {
60 if (!i)
61 return NULL;
62
63 free(i->name);
64 free(i->path);
65 return mfree(i);
66 }
67
68 static char **image_settings_path(Image *image) {
69 _cleanup_strv_free_ char **l = NULL;
70 char **ret;
71 const char *fn, *s;
72 unsigned i = 0;
73
74 assert(image);
75
76 l = new0(char*, 4);
77 if (!l)
78 return NULL;
79
80 fn = strjoina(image->name, ".nspawn");
81
82 FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
83 l[i] = strappend(s, fn);
84 if (!l[i])
85 return NULL;
86
87 i++;
88 }
89
90 l[i] = file_in_same_dir(image->path, fn);
91 if (!l[i])
92 return NULL;
93
94 ret = l;
95 l = NULL;
96
97 return ret;
98 }
99
100 static int image_new(
101 ImageType t,
102 const char *pretty,
103 const char *path,
104 const char *filename,
105 bool read_only,
106 usec_t crtime,
107 usec_t mtime,
108 Image **ret) {
109
110 _cleanup_(image_unrefp) Image *i = NULL;
111
112 assert(t >= 0);
113 assert(t < _IMAGE_TYPE_MAX);
114 assert(pretty);
115 assert(filename);
116 assert(ret);
117
118 i = new0(Image, 1);
119 if (!i)
120 return -ENOMEM;
121
122 i->type = t;
123 i->read_only = read_only;
124 i->crtime = crtime;
125 i->mtime = mtime;
126 i->usage = i->usage_exclusive = (uint64_t) -1;
127 i->limit = i->limit_exclusive = (uint64_t) -1;
128
129 i->name = strdup(pretty);
130 if (!i->name)
131 return -ENOMEM;
132
133 if (path)
134 i->path = strjoin(path, "/", filename, NULL);
135 else
136 i->path = strdup(filename);
137
138 if (!i->path)
139 return -ENOMEM;
140
141 path_kill_slashes(i->path);
142
143 *ret = i;
144 i = NULL;
145
146 return 0;
147 }
148
149 static int image_make(
150 const char *pretty,
151 int dfd,
152 const char *path,
153 const char *filename,
154 Image **ret) {
155
156 struct stat st;
157 bool read_only;
158 int r;
159
160 assert(filename);
161
162 /* We explicitly *do* follow symlinks here, since we want to
163 * allow symlinking trees into /var/lib/machines/, and treat
164 * them normally. */
165
166 if (fstatat(dfd, filename, &st, 0) < 0)
167 return -errno;
168
169 read_only =
170 (path && path_startswith(path, "/usr")) ||
171 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
172
173 if (S_ISDIR(st.st_mode)) {
174 _cleanup_close_ int fd = -1;
175 unsigned file_attr = 0;
176
177 if (!ret)
178 return 1;
179
180 if (!pretty)
181 pretty = filename;
182
183 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
184 if (fd < 0)
185 return -errno;
186
187 /* btrfs subvolumes have inode 256 */
188 if (st.st_ino == 256) {
189
190 r = btrfs_is_filesystem(fd);
191 if (r < 0)
192 return r;
193 if (r) {
194 BtrfsSubvolInfo info;
195
196 /* It's a btrfs subvolume */
197
198 r = btrfs_subvol_get_info_fd(fd, 0, &info);
199 if (r < 0)
200 return r;
201
202 r = image_new(IMAGE_SUBVOLUME,
203 pretty,
204 path,
205 filename,
206 info.read_only || read_only,
207 info.otime,
208 0,
209 ret);
210 if (r < 0)
211 return r;
212
213 if (btrfs_quota_scan_ongoing(fd) == 0) {
214 BtrfsQuotaInfo quota;
215
216 r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
217 if (r >= 0) {
218 (*ret)->usage = quota.referenced;
219 (*ret)->usage_exclusive = quota.exclusive;
220
221 (*ret)->limit = quota.referenced_max;
222 (*ret)->limit_exclusive = quota.exclusive_max;
223 }
224 }
225
226 return 1;
227 }
228 }
229
230 /* If the IMMUTABLE bit is set, we consider the
231 * directory read-only. Since the ioctl is not
232 * supported everywhere we ignore failures. */
233 (void) read_attr_fd(fd, &file_attr);
234
235 /* It's just a normal directory. */
236 r = image_new(IMAGE_DIRECTORY,
237 pretty,
238 path,
239 filename,
240 read_only || (file_attr & FS_IMMUTABLE_FL),
241 0,
242 0,
243 ret);
244 if (r < 0)
245 return r;
246
247 return 1;
248
249 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
250 usec_t crtime = 0;
251
252 /* It's a RAW disk image */
253
254 if (!ret)
255 return 1;
256
257 fd_getcrtime_at(dfd, filename, &crtime, 0);
258
259 if (!pretty)
260 pretty = strndupa(filename, strlen(filename) - 4);
261
262 r = image_new(IMAGE_RAW,
263 pretty,
264 path,
265 filename,
266 !(st.st_mode & 0222) || read_only,
267 crtime,
268 timespec_load(&st.st_mtim),
269 ret);
270 if (r < 0)
271 return r;
272
273 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
274 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
275
276 return 1;
277 }
278
279 return 0;
280 }
281
282 int image_find(const char *name, Image **ret) {
283 const char *path;
284 int r;
285
286 assert(name);
287
288 /* There are no images with invalid names */
289 if (!image_name_is_valid(name))
290 return 0;
291
292 NULSTR_FOREACH(path, image_search_path) {
293 _cleanup_closedir_ DIR *d = NULL;
294
295 d = opendir(path);
296 if (!d) {
297 if (errno == ENOENT)
298 continue;
299
300 return -errno;
301 }
302
303 r = image_make(NULL, dirfd(d), path, name, ret);
304 if (r == 0 || r == -ENOENT) {
305 _cleanup_free_ char *raw = NULL;
306
307 raw = strappend(name, ".raw");
308 if (!raw)
309 return -ENOMEM;
310
311 r = image_make(NULL, dirfd(d), path, raw, ret);
312 if (r == 0 || r == -ENOENT)
313 continue;
314 }
315 if (r < 0)
316 return r;
317
318 return 1;
319 }
320
321 if (streq(name, ".host"))
322 return image_make(".host", AT_FDCWD, NULL, "/", ret);
323
324 return 0;
325 };
326
327 int image_discover(Hashmap *h) {
328 const char *path;
329 int r;
330
331 assert(h);
332
333 NULSTR_FOREACH(path, image_search_path) {
334 _cleanup_closedir_ DIR *d = NULL;
335 struct dirent *de;
336
337 d = opendir(path);
338 if (!d) {
339 if (errno == ENOENT)
340 continue;
341
342 return -errno;
343 }
344
345 FOREACH_DIRENT_ALL(de, d, return -errno) {
346 _cleanup_(image_unrefp) Image *image = NULL;
347
348 if (!image_name_is_valid(de->d_name))
349 continue;
350
351 if (hashmap_contains(h, de->d_name))
352 continue;
353
354 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
355 if (r == 0 || r == -ENOENT)
356 continue;
357 if (r < 0)
358 return r;
359
360 r = hashmap_put(h, image->name, image);
361 if (r < 0)
362 return r;
363
364 image = NULL;
365 }
366 }
367
368 if (!hashmap_contains(h, ".host")) {
369 _cleanup_(image_unrefp) Image *image = NULL;
370
371 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
372 if (r < 0)
373 return r;
374
375 r = hashmap_put(h, image->name, image);
376 if (r < 0)
377 return r;
378
379 image = NULL;
380
381 }
382
383 return 0;
384 }
385
386 void image_hashmap_free(Hashmap *map) {
387 Image *i;
388
389 while ((i = hashmap_steal_first(map)))
390 image_unref(i);
391
392 hashmap_free(map);
393 }
394
395 int image_remove(Image *i) {
396 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
397 _cleanup_strv_free_ char **settings = NULL;
398 char **j;
399 int r;
400
401 assert(i);
402
403 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
404 return -EROFS;
405
406 settings = image_settings_path(i);
407 if (!settings)
408 return -ENOMEM;
409
410 /* Make sure we don't interfere with a running nspawn */
411 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
412 if (r < 0)
413 return r;
414
415 switch (i->type) {
416
417 case IMAGE_SUBVOLUME:
418 r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
419 if (r < 0)
420 return r;
421 break;
422
423 case IMAGE_DIRECTORY:
424 /* Allow deletion of read-only directories */
425 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
426 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
427 if (r < 0)
428 return r;
429
430 break;
431
432 case IMAGE_RAW:
433 if (unlink(i->path) < 0)
434 return -errno;
435 break;
436
437 default:
438 return -EOPNOTSUPP;
439 }
440
441 STRV_FOREACH(j, settings) {
442 if (unlink(*j) < 0 && errno != ENOENT)
443 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
444 }
445
446 return 0;
447 }
448
449 static int rename_settings_file(const char *path, const char *new_name) {
450 _cleanup_free_ char *rs = NULL;
451 const char *fn;
452
453 fn = strjoina(new_name, ".nspawn");
454
455 rs = file_in_same_dir(path, fn);
456 if (!rs)
457 return -ENOMEM;
458
459 return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
460 }
461
462 int image_rename(Image *i, const char *new_name) {
463 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
464 _cleanup_free_ char *new_path = NULL, *nn = NULL;
465 _cleanup_strv_free_ char **settings = NULL;
466 unsigned file_attr = 0;
467 char **j;
468 int r;
469
470 assert(i);
471
472 if (!image_name_is_valid(new_name))
473 return -EINVAL;
474
475 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
476 return -EROFS;
477
478 settings = image_settings_path(i);
479 if (!settings)
480 return -ENOMEM;
481
482 /* Make sure we don't interfere with a running nspawn */
483 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
484 if (r < 0)
485 return r;
486
487 /* Make sure nobody takes the new name, between the time we
488 * checked it is currently unused in all search paths, and the
489 * time we take possession of it */
490 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
491 if (r < 0)
492 return r;
493
494 r = image_find(new_name, NULL);
495 if (r < 0)
496 return r;
497 if (r > 0)
498 return -EEXIST;
499
500 switch (i->type) {
501
502 case IMAGE_DIRECTORY:
503 /* Turn of the immutable bit while we rename the image, so that we can rename it */
504 (void) read_attr_path(i->path, &file_attr);
505
506 if (file_attr & FS_IMMUTABLE_FL)
507 (void) chattr_path(i->path, 0, FS_IMMUTABLE_FL);
508
509 /* fall through */
510
511 case IMAGE_SUBVOLUME:
512 new_path = file_in_same_dir(i->path, new_name);
513 break;
514
515 case IMAGE_RAW: {
516 const char *fn;
517
518 fn = strjoina(new_name, ".raw");
519 new_path = file_in_same_dir(i->path, fn);
520 break;
521 }
522
523 default:
524 return -EOPNOTSUPP;
525 }
526
527 if (!new_path)
528 return -ENOMEM;
529
530 nn = strdup(new_name);
531 if (!nn)
532 return -ENOMEM;
533
534 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
535 if (r < 0)
536 return r;
537
538 /* Restore the immutable bit, if it was set before */
539 if (file_attr & FS_IMMUTABLE_FL)
540 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL);
541
542 free(i->path);
543 i->path = new_path;
544 new_path = NULL;
545
546 free(i->name);
547 i->name = nn;
548 nn = NULL;
549
550 STRV_FOREACH(j, settings) {
551 r = rename_settings_file(*j, new_name);
552 if (r < 0 && r != -ENOENT)
553 log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
554 }
555
556 return 0;
557 }
558
559 static int clone_settings_file(const char *path, const char *new_name) {
560 _cleanup_free_ char *rs = NULL;
561 const char *fn;
562
563 fn = strjoina(new_name, ".nspawn");
564
565 rs = file_in_same_dir(path, fn);
566 if (!rs)
567 return -ENOMEM;
568
569 return copy_file_atomic(path, rs, 0664, false, 0);
570 }
571
572 int image_clone(Image *i, const char *new_name, bool read_only) {
573 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
574 _cleanup_strv_free_ char **settings = NULL;
575 const char *new_path;
576 char **j;
577 int r;
578
579 assert(i);
580
581 if (!image_name_is_valid(new_name))
582 return -EINVAL;
583
584 settings = image_settings_path(i);
585 if (!settings)
586 return -ENOMEM;
587
588 /* Make sure nobody takes the new name, between the time we
589 * checked it is currently unused in all search paths, and the
590 * time we take possession of it */
591 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
592 if (r < 0)
593 return r;
594
595 r = image_find(new_name, NULL);
596 if (r < 0)
597 return r;
598 if (r > 0)
599 return -EEXIST;
600
601 switch (i->type) {
602
603 case IMAGE_SUBVOLUME:
604 case IMAGE_DIRECTORY:
605 /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
606 * directory.*/
607
608 new_path = strjoina("/var/lib/machines/", new_name);
609
610 r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA);
611 if (r == -EOPNOTSUPP) {
612 /* No btrfs snapshots supported, create a normal directory then. */
613
614 r = copy_directory(i->path, new_path, false);
615 if (r >= 0)
616 (void) chattr_path(new_path, read_only ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL);
617 } else if (r >= 0)
618 /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
619 (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
620
621 break;
622
623 case IMAGE_RAW:
624 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
625
626 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
627 break;
628
629 default:
630 return -EOPNOTSUPP;
631 }
632
633 if (r < 0)
634 return r;
635
636 STRV_FOREACH(j, settings) {
637 r = clone_settings_file(*j, new_name);
638 if (r < 0 && r != -ENOENT)
639 log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
640 }
641
642 return 0;
643 }
644
645 int image_read_only(Image *i, bool b) {
646 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
647 int r;
648 assert(i);
649
650 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
651 return -EROFS;
652
653 /* Make sure we don't interfere with a running nspawn */
654 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
655 if (r < 0)
656 return r;
657
658 switch (i->type) {
659
660 case IMAGE_SUBVOLUME:
661
662 /* Note that we set the flag only on the top-level
663 * subvolume of the image. */
664
665 r = btrfs_subvol_set_read_only(i->path, b);
666 if (r < 0)
667 return r;
668
669 break;
670
671 case IMAGE_DIRECTORY:
672 /* For simple directory trees we cannot use the access
673 mode of the top-level directory, since it has an
674 effect on the container itself. However, we can
675 use the "immutable" flag, to at least make the
676 top-level directory read-only. It's not as good as
677 a read-only subvolume, but at least something, and
678 we can read the value back.*/
679
680 r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL);
681 if (r < 0)
682 return r;
683
684 break;
685
686 case IMAGE_RAW: {
687 struct stat st;
688
689 if (stat(i->path, &st) < 0)
690 return -errno;
691
692 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
693 return -errno;
694
695 /* If the images is now read-only, it's a good time to
696 * defrag it, given that no write patterns will
697 * fragment it again. */
698 if (b)
699 (void) btrfs_defrag(i->path);
700 break;
701 }
702
703 default:
704 return -EOPNOTSUPP;
705 }
706
707 return 0;
708 }
709
710 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
711 _cleanup_free_ char *p = NULL;
712 LockFile t = LOCK_FILE_INIT;
713 struct stat st;
714 int r;
715
716 assert(path);
717 assert(global);
718 assert(local);
719
720 /* Locks an image path. This actually creates two locks: one
721 * "local" one, next to the image path itself, which might be
722 * shared via NFS. And another "global" one, in /run, that
723 * uses the device/inode number. This has the benefit that we
724 * can even lock a tree that is a mount point, correctly. */
725
726 if (path_equal(path, "/"))
727 return -EBUSY;
728
729 if (!path_is_absolute(path))
730 return -EINVAL;
731
732 if (stat(path, &st) >= 0) {
733 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
734 return -ENOMEM;
735 }
736
737 r = make_lock_file_for(path, operation, &t);
738 if (r < 0)
739 return r;
740
741 if (p) {
742 mkdir_p("/run/systemd/nspawn/locks", 0700);
743
744 r = make_lock_file(p, operation, global);
745 if (r < 0) {
746 release_lock_file(&t);
747 return r;
748 }
749 }
750
751 *local = t;
752 return 0;
753 }
754
755 int image_set_limit(Image *i, uint64_t referenced_max) {
756 assert(i);
757
758 if (IMAGE_IS_VENDOR(i) || IMAGE_IS_HOST(i))
759 return -EROFS;
760
761 if (i->type != IMAGE_SUBVOLUME)
762 return -EOPNOTSUPP;
763
764 /* We set the quota both for the subvolume as well as for the
765 * subtree. The latter is mostly for historical reasons, since
766 * we didn't use to have a concept of subtree quota, and hence
767 * only modified the subvolume quota. */
768
769 (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
770 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
771 return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
772 }
773
774 int image_name_lock(const char *name, int operation, LockFile *ret) {
775 const char *p;
776
777 assert(name);
778 assert(ret);
779
780 /* Locks an image name, regardless of the precise path used. */
781
782 if (!image_name_is_valid(name))
783 return -EINVAL;
784
785 if (streq(name, ".host"))
786 return -EBUSY;
787
788 mkdir_p("/run/systemd/nspawn/locks", 0700);
789 p = strjoina("/run/systemd/nspawn/locks/name-", name);
790
791 return make_lock_file(p, operation, ret);
792 }
793
794 bool image_name_is_valid(const char *s) {
795 if (!filename_is_valid(s))
796 return false;
797
798 if (string_has_cc(s, NULL))
799 return false;
800
801 if (!utf8_is_valid(s))
802 return false;
803
804 /* Temporary files for atomically creating new files */
805 if (startswith(s, ".#"))
806 return false;
807
808 return true;
809 }
810
811 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
812 [IMAGE_DIRECTORY] = "directory",
813 [IMAGE_SUBVOLUME] = "subvolume",
814 [IMAGE_RAW] = "raw",
815 };
816
817 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);