]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/machine-image.c
util-lib: move string table stuff into its own string-table.[ch]
[thirdparty/systemd.git] / src / shared / machine-image.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <fcntl.h>
23 #include <linux/fs.h>
24 #include <sys/statfs.h>
25
26 #include "btrfs-util.h"
27 #include "chattr-util.h"
28 #include "copy.h"
29 #include "dirent-util.h"
30 #include "fd-util.h"
31 #include "fs-util.h"
32 #include "machine-image.h"
33 #include "mkdir.h"
34 #include "path-util.h"
35 #include "rm-rf.h"
36 #include "string-table.h"
37 #include "string-util.h"
38 #include "strv.h"
39 #include "utf8.h"
40 #include "xattr-util.h"
41
42 static const char image_search_path[] =
43 "/var/lib/machines\0"
44 "/var/lib/container\0" /* legacy */
45 "/usr/local/lib/machines\0"
46 "/usr/lib/machines\0";
47
48 Image *image_unref(Image *i) {
49 if (!i)
50 return NULL;
51
52 free(i->name);
53 free(i->path);
54 free(i);
55 return NULL;
56 }
57
58 static char **image_settings_path(Image *image) {
59 _cleanup_strv_free_ char **l = NULL;
60 char **ret;
61 const char *fn, *s;
62 unsigned i = 0;
63
64 assert(image);
65
66 l = new0(char*, 4);
67 if (!l)
68 return NULL;
69
70 fn = strjoina(image->name, ".nspawn");
71
72 FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
73 l[i] = strappend(s, fn);
74 if (!l[i])
75 return NULL;
76
77 i++;
78 }
79
80 l[i] = file_in_same_dir(image->path, fn);
81 if (!l[i])
82 return NULL;
83
84 ret = l;
85 l = NULL;
86
87 return ret;
88 }
89
90 static int image_new(
91 ImageType t,
92 const char *pretty,
93 const char *path,
94 const char *filename,
95 bool read_only,
96 usec_t crtime,
97 usec_t mtime,
98 Image **ret) {
99
100 _cleanup_(image_unrefp) Image *i = NULL;
101
102 assert(t >= 0);
103 assert(t < _IMAGE_TYPE_MAX);
104 assert(pretty);
105 assert(filename);
106 assert(ret);
107
108 i = new0(Image, 1);
109 if (!i)
110 return -ENOMEM;
111
112 i->type = t;
113 i->read_only = read_only;
114 i->crtime = crtime;
115 i->mtime = mtime;
116 i->usage = i->usage_exclusive = (uint64_t) -1;
117 i->limit = i->limit_exclusive = (uint64_t) -1;
118
119 i->name = strdup(pretty);
120 if (!i->name)
121 return -ENOMEM;
122
123 if (path)
124 i->path = strjoin(path, "/", filename, NULL);
125 else
126 i->path = strdup(filename);
127
128 if (!i->path)
129 return -ENOMEM;
130
131 path_kill_slashes(i->path);
132
133 *ret = i;
134 i = NULL;
135
136 return 0;
137 }
138
139 static int image_make(
140 const char *pretty,
141 int dfd,
142 const char *path,
143 const char *filename,
144 Image **ret) {
145
146 struct stat st;
147 bool read_only;
148 int r;
149
150 assert(filename);
151
152 /* We explicitly *do* follow symlinks here, since we want to
153 * allow symlinking trees into /var/lib/machines/, and treat
154 * them normally. */
155
156 if (fstatat(dfd, filename, &st, 0) < 0)
157 return -errno;
158
159 read_only =
160 (path && path_startswith(path, "/usr")) ||
161 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
162
163 if (S_ISDIR(st.st_mode)) {
164 _cleanup_close_ int fd = -1;
165 unsigned file_attr = 0;
166
167 if (!ret)
168 return 1;
169
170 if (!pretty)
171 pretty = filename;
172
173 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
174 if (fd < 0)
175 return -errno;
176
177 /* btrfs subvolumes have inode 256 */
178 if (st.st_ino == 256) {
179
180 r = btrfs_is_filesystem(fd);
181 if (r < 0)
182 return r;
183 if (r) {
184 BtrfsSubvolInfo info;
185
186 /* It's a btrfs subvolume */
187
188 r = btrfs_subvol_get_info_fd(fd, 0, &info);
189 if (r < 0)
190 return r;
191
192 r = image_new(IMAGE_SUBVOLUME,
193 pretty,
194 path,
195 filename,
196 info.read_only || read_only,
197 info.otime,
198 0,
199 ret);
200 if (r < 0)
201 return r;
202
203 if (btrfs_quota_scan_ongoing(fd) == 0) {
204 BtrfsQuotaInfo quota;
205
206 r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
207 if (r >= 0) {
208 (*ret)->usage = quota.referenced;
209 (*ret)->usage_exclusive = quota.exclusive;
210
211 (*ret)->limit = quota.referenced_max;
212 (*ret)->limit_exclusive = quota.exclusive_max;
213 }
214 }
215
216 return 1;
217 }
218 }
219
220 /* If the IMMUTABLE bit is set, we consider the
221 * directory read-only. Since the ioctl is not
222 * supported everywhere we ignore failures. */
223 (void) read_attr_fd(fd, &file_attr);
224
225 /* It's just a normal directory. */
226 r = image_new(IMAGE_DIRECTORY,
227 pretty,
228 path,
229 filename,
230 read_only || (file_attr & FS_IMMUTABLE_FL),
231 0,
232 0,
233 ret);
234 if (r < 0)
235 return r;
236
237 return 1;
238
239 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
240 usec_t crtime = 0;
241
242 /* It's a RAW disk image */
243
244 if (!ret)
245 return 1;
246
247 fd_getcrtime_at(dfd, filename, &crtime, 0);
248
249 if (!pretty)
250 pretty = strndupa(filename, strlen(filename) - 4);
251
252 r = image_new(IMAGE_RAW,
253 pretty,
254 path,
255 filename,
256 !(st.st_mode & 0222) || read_only,
257 crtime,
258 timespec_load(&st.st_mtim),
259 ret);
260 if (r < 0)
261 return r;
262
263 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
264 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
265
266 return 1;
267 }
268
269 return 0;
270 }
271
272 int image_find(const char *name, Image **ret) {
273 const char *path;
274 int r;
275
276 assert(name);
277
278 /* There are no images with invalid names */
279 if (!image_name_is_valid(name))
280 return 0;
281
282 NULSTR_FOREACH(path, image_search_path) {
283 _cleanup_closedir_ DIR *d = NULL;
284
285 d = opendir(path);
286 if (!d) {
287 if (errno == ENOENT)
288 continue;
289
290 return -errno;
291 }
292
293 r = image_make(NULL, dirfd(d), path, name, ret);
294 if (r == 0 || r == -ENOENT) {
295 _cleanup_free_ char *raw = NULL;
296
297 raw = strappend(name, ".raw");
298 if (!raw)
299 return -ENOMEM;
300
301 r = image_make(NULL, dirfd(d), path, raw, ret);
302 if (r == 0 || r == -ENOENT)
303 continue;
304 }
305 if (r < 0)
306 return r;
307
308 return 1;
309 }
310
311 if (streq(name, ".host"))
312 return image_make(".host", AT_FDCWD, NULL, "/", ret);
313
314 return 0;
315 };
316
317 int image_discover(Hashmap *h) {
318 const char *path;
319 int r;
320
321 assert(h);
322
323 NULSTR_FOREACH(path, image_search_path) {
324 _cleanup_closedir_ DIR *d = NULL;
325 struct dirent *de;
326
327 d = opendir(path);
328 if (!d) {
329 if (errno == ENOENT)
330 continue;
331
332 return -errno;
333 }
334
335 FOREACH_DIRENT_ALL(de, d, return -errno) {
336 _cleanup_(image_unrefp) Image *image = NULL;
337
338 if (!image_name_is_valid(de->d_name))
339 continue;
340
341 if (hashmap_contains(h, de->d_name))
342 continue;
343
344 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
345 if (r == 0 || r == -ENOENT)
346 continue;
347 if (r < 0)
348 return r;
349
350 r = hashmap_put(h, image->name, image);
351 if (r < 0)
352 return r;
353
354 image = NULL;
355 }
356 }
357
358 if (!hashmap_contains(h, ".host")) {
359 _cleanup_(image_unrefp) Image *image = NULL;
360
361 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
362 if (r < 0)
363 return r;
364
365 r = hashmap_put(h, image->name, image);
366 if (r < 0)
367 return r;
368
369 image = NULL;
370
371 }
372
373 return 0;
374 }
375
376 void image_hashmap_free(Hashmap *map) {
377 Image *i;
378
379 while ((i = hashmap_steal_first(map)))
380 image_unref(i);
381
382 hashmap_free(map);
383 }
384
385 int image_remove(Image *i) {
386 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
387 _cleanup_strv_free_ char **settings = NULL;
388 char **j;
389 int r;
390
391 assert(i);
392
393 if (path_equal(i->path, "/") ||
394 path_startswith(i->path, "/usr"))
395 return -EROFS;
396
397 settings = image_settings_path(i);
398 if (!settings)
399 return -ENOMEM;
400
401 /* Make sure we don't interfere with a running nspawn */
402 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
403 if (r < 0)
404 return r;
405
406 switch (i->type) {
407
408 case IMAGE_SUBVOLUME:
409 r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
410 if (r < 0)
411 return r;
412 break;
413
414 case IMAGE_DIRECTORY:
415 /* Allow deletion of read-only directories */
416 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
417 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
418 if (r < 0)
419 return r;
420
421 break;
422
423 case IMAGE_RAW:
424 if (unlink(i->path) < 0)
425 return -errno;
426 break;
427
428 default:
429 return -EOPNOTSUPP;
430 }
431
432 STRV_FOREACH(j, settings) {
433 if (unlink(*j) < 0 && errno != ENOENT)
434 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
435 }
436
437 return 0;
438 }
439
440 static int rename_settings_file(const char *path, const char *new_name) {
441 _cleanup_free_ char *rs = NULL;
442 const char *fn;
443
444 fn = strjoina(new_name, ".nspawn");
445
446 rs = file_in_same_dir(path, fn);
447 if (!rs)
448 return -ENOMEM;
449
450 return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
451 }
452
453 int image_rename(Image *i, const char *new_name) {
454 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
455 _cleanup_free_ char *new_path = NULL, *nn = NULL;
456 _cleanup_strv_free_ char **settings = NULL;
457 unsigned file_attr = 0;
458 char **j;
459 int r;
460
461 assert(i);
462
463 if (!image_name_is_valid(new_name))
464 return -EINVAL;
465
466 if (path_equal(i->path, "/") ||
467 path_startswith(i->path, "/usr"))
468 return -EROFS;
469
470 settings = image_settings_path(i);
471 if (!settings)
472 return -ENOMEM;
473
474 /* Make sure we don't interfere with a running nspawn */
475 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
476 if (r < 0)
477 return r;
478
479 /* Make sure nobody takes the new name, between the time we
480 * checked it is currently unused in all search paths, and the
481 * time we take possesion of it */
482 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
483 if (r < 0)
484 return r;
485
486 r = image_find(new_name, NULL);
487 if (r < 0)
488 return r;
489 if (r > 0)
490 return -EEXIST;
491
492 switch (i->type) {
493
494 case IMAGE_DIRECTORY:
495 /* Turn of the immutable bit while we rename the image, so that we can rename it */
496 (void) read_attr_path(i->path, &file_attr);
497
498 if (file_attr & FS_IMMUTABLE_FL)
499 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
500
501 /* fall through */
502
503 case IMAGE_SUBVOLUME:
504 new_path = file_in_same_dir(i->path, new_name);
505 break;
506
507 case IMAGE_RAW: {
508 const char *fn;
509
510 fn = strjoina(new_name, ".raw");
511 new_path = file_in_same_dir(i->path, fn);
512 break;
513 }
514
515 default:
516 return -EOPNOTSUPP;
517 }
518
519 if (!new_path)
520 return -ENOMEM;
521
522 nn = strdup(new_name);
523 if (!nn)
524 return -ENOMEM;
525
526 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
527 if (r < 0)
528 return r;
529
530 /* Restore the immutable bit, if it was set before */
531 if (file_attr & FS_IMMUTABLE_FL)
532 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
533
534 free(i->path);
535 i->path = new_path;
536 new_path = NULL;
537
538 free(i->name);
539 i->name = nn;
540 nn = NULL;
541
542 STRV_FOREACH(j, settings) {
543 r = rename_settings_file(*j, new_name);
544 if (r < 0 && r != -ENOENT)
545 log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
546 }
547
548 return 0;
549 }
550
551 static int clone_settings_file(const char *path, const char *new_name) {
552 _cleanup_free_ char *rs = NULL;
553 const char *fn;
554
555 fn = strjoina(new_name, ".nspawn");
556
557 rs = file_in_same_dir(path, fn);
558 if (!rs)
559 return -ENOMEM;
560
561 return copy_file_atomic(path, rs, 0664, false, 0);
562 }
563
564 int image_clone(Image *i, const char *new_name, bool read_only) {
565 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
566 _cleanup_strv_free_ char **settings = NULL;
567 const char *new_path;
568 char **j;
569 int r;
570
571 assert(i);
572
573 if (!image_name_is_valid(new_name))
574 return -EINVAL;
575
576 settings = image_settings_path(i);
577 if (!settings)
578 return -ENOMEM;
579
580 /* Make sure nobody takes the new name, between the time we
581 * checked it is currently unused in all search paths, and the
582 * time we take possesion of it */
583 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
584 if (r < 0)
585 return r;
586
587 r = image_find(new_name, NULL);
588 if (r < 0)
589 return r;
590 if (r > 0)
591 return -EEXIST;
592
593 switch (i->type) {
594
595 case IMAGE_SUBVOLUME:
596 case IMAGE_DIRECTORY:
597 new_path = strjoina("/var/lib/machines/", new_name);
598
599 r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA);
600
601 /* Enable "subtree" quotas for the copy, if we didn't
602 * copy any quota from the source. */
603 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
604
605 break;
606
607 case IMAGE_RAW:
608 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
609
610 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
611 break;
612
613 default:
614 return -EOPNOTSUPP;
615 }
616
617 if (r < 0)
618 return r;
619
620 STRV_FOREACH(j, settings) {
621 r = clone_settings_file(*j, new_name);
622 if (r < 0 && r != -ENOENT)
623 log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
624 }
625
626 return 0;
627 }
628
629 int image_read_only(Image *i, bool b) {
630 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
631 int r;
632 assert(i);
633
634 if (path_equal(i->path, "/") ||
635 path_startswith(i->path, "/usr"))
636 return -EROFS;
637
638 /* Make sure we don't interfere with a running nspawn */
639 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
640 if (r < 0)
641 return r;
642
643 switch (i->type) {
644
645 case IMAGE_SUBVOLUME:
646
647 /* Note that we set the flag only on the top-level
648 * subvolume of the image. */
649
650 r = btrfs_subvol_set_read_only(i->path, b);
651 if (r < 0)
652 return r;
653
654 break;
655
656 case IMAGE_DIRECTORY:
657 /* For simple directory trees we cannot use the access
658 mode of the top-level directory, since it has an
659 effect on the container itself. However, we can
660 use the "immutable" flag, to at least make the
661 top-level directory read-only. It's not as good as
662 a read-only subvolume, but at least something, and
663 we can read the value back.*/
664
665 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
666 if (r < 0)
667 return r;
668
669 break;
670
671 case IMAGE_RAW: {
672 struct stat st;
673
674 if (stat(i->path, &st) < 0)
675 return -errno;
676
677 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
678 return -errno;
679
680 /* If the images is now read-only, it's a good time to
681 * defrag it, given that no write patterns will
682 * fragment it again. */
683 if (b)
684 (void) btrfs_defrag(i->path);
685 break;
686 }
687
688 default:
689 return -EOPNOTSUPP;
690 }
691
692 return 0;
693 }
694
695 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
696 _cleanup_free_ char *p = NULL;
697 LockFile t = LOCK_FILE_INIT;
698 struct stat st;
699 int r;
700
701 assert(path);
702 assert(global);
703 assert(local);
704
705 /* Locks an image path. This actually creates two locks: one
706 * "local" one, next to the image path itself, which might be
707 * shared via NFS. And another "global" one, in /run, that
708 * uses the device/inode number. This has the benefit that we
709 * can even lock a tree that is a mount point, correctly. */
710
711 if (path_equal(path, "/"))
712 return -EBUSY;
713
714 if (!path_is_absolute(path))
715 return -EINVAL;
716
717 if (stat(path, &st) >= 0) {
718 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
719 return -ENOMEM;
720 }
721
722 r = make_lock_file_for(path, operation, &t);
723 if (r < 0)
724 return r;
725
726 if (p) {
727 mkdir_p("/run/systemd/nspawn/locks", 0700);
728
729 r = make_lock_file(p, operation, global);
730 if (r < 0) {
731 release_lock_file(&t);
732 return r;
733 }
734 }
735
736 *local = t;
737 return 0;
738 }
739
740 int image_set_limit(Image *i, uint64_t referenced_max) {
741 assert(i);
742
743 if (path_equal(i->path, "/") ||
744 path_startswith(i->path, "/usr"))
745 return -EROFS;
746
747 if (i->type != IMAGE_SUBVOLUME)
748 return -EOPNOTSUPP;
749
750 /* We set the quota both for the subvolume as well as for the
751 * subtree. The latter is mostly for historical reasons, since
752 * we didn't use to have a concept of subtree quota, and hence
753 * only modified the subvolume quota. */
754
755 (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
756 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
757 return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
758 }
759
760 int image_name_lock(const char *name, int operation, LockFile *ret) {
761 const char *p;
762
763 assert(name);
764 assert(ret);
765
766 /* Locks an image name, regardless of the precise path used. */
767
768 if (!image_name_is_valid(name))
769 return -EINVAL;
770
771 if (streq(name, ".host"))
772 return -EBUSY;
773
774 mkdir_p("/run/systemd/nspawn/locks", 0700);
775 p = strjoina("/run/systemd/nspawn/locks/name-", name);
776
777 return make_lock_file(p, operation, ret);
778 }
779
780 bool image_name_is_valid(const char *s) {
781 if (!filename_is_valid(s))
782 return false;
783
784 if (string_has_cc(s, NULL))
785 return false;
786
787 if (!utf8_is_valid(s))
788 return false;
789
790 /* Temporary files for atomically creating new files */
791 if (startswith(s, ".#"))
792 return false;
793
794 return true;
795 }
796
797 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
798 [IMAGE_DIRECTORY] = "directory",
799 [IMAGE_SUBVOLUME] = "subvolume",
800 [IMAGE_RAW] = "raw",
801 };
802
803 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);