]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/machine-image.c
util-lib: split our string related calls from util.[ch] into its own file string...
[thirdparty/systemd.git] / src / shared / machine-image.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <fcntl.h>
23 #include <linux/fs.h>
24 #include <sys/statfs.h>
25
26 #include "btrfs-util.h"
27 #include "copy.h"
28 #include "mkdir.h"
29 #include "path-util.h"
30 #include "rm-rf.h"
31 #include "string-util.h"
32 #include "strv.h"
33 #include "utf8.h"
34 #include "machine-image.h"
35
36 static const char image_search_path[] =
37 "/var/lib/machines\0"
38 "/var/lib/container\0" /* legacy */
39 "/usr/local/lib/machines\0"
40 "/usr/lib/machines\0";
41
42 Image *image_unref(Image *i) {
43 if (!i)
44 return NULL;
45
46 free(i->name);
47 free(i->path);
48 free(i);
49 return NULL;
50 }
51
52 static char **image_settings_path(Image *image) {
53 _cleanup_strv_free_ char **l = NULL;
54 char **ret;
55 const char *fn, *s;
56 unsigned i = 0;
57
58 assert(image);
59
60 l = new0(char*, 4);
61 if (!l)
62 return NULL;
63
64 fn = strjoina(image->name, ".nspawn");
65
66 FOREACH_STRING(s, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
67 l[i] = strappend(s, fn);
68 if (!l[i])
69 return NULL;
70
71 i++;
72 }
73
74 l[i] = file_in_same_dir(image->path, fn);
75 if (!l[i])
76 return NULL;
77
78 ret = l;
79 l = NULL;
80
81 return ret;
82 }
83
84 static int image_new(
85 ImageType t,
86 const char *pretty,
87 const char *path,
88 const char *filename,
89 bool read_only,
90 usec_t crtime,
91 usec_t mtime,
92 Image **ret) {
93
94 _cleanup_(image_unrefp) Image *i = NULL;
95
96 assert(t >= 0);
97 assert(t < _IMAGE_TYPE_MAX);
98 assert(pretty);
99 assert(filename);
100 assert(ret);
101
102 i = new0(Image, 1);
103 if (!i)
104 return -ENOMEM;
105
106 i->type = t;
107 i->read_only = read_only;
108 i->crtime = crtime;
109 i->mtime = mtime;
110 i->usage = i->usage_exclusive = (uint64_t) -1;
111 i->limit = i->limit_exclusive = (uint64_t) -1;
112
113 i->name = strdup(pretty);
114 if (!i->name)
115 return -ENOMEM;
116
117 if (path)
118 i->path = strjoin(path, "/", filename, NULL);
119 else
120 i->path = strdup(filename);
121
122 if (!i->path)
123 return -ENOMEM;
124
125 path_kill_slashes(i->path);
126
127 *ret = i;
128 i = NULL;
129
130 return 0;
131 }
132
133 static int image_make(
134 const char *pretty,
135 int dfd,
136 const char *path,
137 const char *filename,
138 Image **ret) {
139
140 struct stat st;
141 bool read_only;
142 int r;
143
144 assert(filename);
145
146 /* We explicitly *do* follow symlinks here, since we want to
147 * allow symlinking trees into /var/lib/machines/, and treat
148 * them normally. */
149
150 if (fstatat(dfd, filename, &st, 0) < 0)
151 return -errno;
152
153 read_only =
154 (path && path_startswith(path, "/usr")) ||
155 (faccessat(dfd, filename, W_OK, AT_EACCESS) < 0 && errno == EROFS);
156
157 if (S_ISDIR(st.st_mode)) {
158 _cleanup_close_ int fd = -1;
159 unsigned file_attr = 0;
160
161 if (!ret)
162 return 1;
163
164 if (!pretty)
165 pretty = filename;
166
167 fd = openat(dfd, filename, O_CLOEXEC|O_NOCTTY|O_DIRECTORY);
168 if (fd < 0)
169 return -errno;
170
171 /* btrfs subvolumes have inode 256 */
172 if (st.st_ino == 256) {
173
174 r = btrfs_is_filesystem(fd);
175 if (r < 0)
176 return r;
177 if (r) {
178 BtrfsSubvolInfo info;
179
180 /* It's a btrfs subvolume */
181
182 r = btrfs_subvol_get_info_fd(fd, 0, &info);
183 if (r < 0)
184 return r;
185
186 r = image_new(IMAGE_SUBVOLUME,
187 pretty,
188 path,
189 filename,
190 info.read_only || read_only,
191 info.otime,
192 0,
193 ret);
194 if (r < 0)
195 return r;
196
197 if (btrfs_quota_scan_ongoing(fd) == 0) {
198 BtrfsQuotaInfo quota;
199
200 r = btrfs_subvol_get_subtree_quota_fd(fd, 0, &quota);
201 if (r >= 0) {
202 (*ret)->usage = quota.referenced;
203 (*ret)->usage_exclusive = quota.exclusive;
204
205 (*ret)->limit = quota.referenced_max;
206 (*ret)->limit_exclusive = quota.exclusive_max;
207 }
208 }
209
210 return 1;
211 }
212 }
213
214 /* If the IMMUTABLE bit is set, we consider the
215 * directory read-only. Since the ioctl is not
216 * supported everywhere we ignore failures. */
217 (void) read_attr_fd(fd, &file_attr);
218
219 /* It's just a normal directory. */
220 r = image_new(IMAGE_DIRECTORY,
221 pretty,
222 path,
223 filename,
224 read_only || (file_attr & FS_IMMUTABLE_FL),
225 0,
226 0,
227 ret);
228 if (r < 0)
229 return r;
230
231 return 1;
232
233 } else if (S_ISREG(st.st_mode) && endswith(filename, ".raw")) {
234 usec_t crtime = 0;
235
236 /* It's a RAW disk image */
237
238 if (!ret)
239 return 1;
240
241 fd_getcrtime_at(dfd, filename, &crtime, 0);
242
243 if (!pretty)
244 pretty = strndupa(filename, strlen(filename) - 4);
245
246 r = image_new(IMAGE_RAW,
247 pretty,
248 path,
249 filename,
250 !(st.st_mode & 0222) || read_only,
251 crtime,
252 timespec_load(&st.st_mtim),
253 ret);
254 if (r < 0)
255 return r;
256
257 (*ret)->usage = (*ret)->usage_exclusive = st.st_blocks * 512;
258 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
259
260 return 1;
261 }
262
263 return 0;
264 }
265
266 int image_find(const char *name, Image **ret) {
267 const char *path;
268 int r;
269
270 assert(name);
271
272 /* There are no images with invalid names */
273 if (!image_name_is_valid(name))
274 return 0;
275
276 NULSTR_FOREACH(path, image_search_path) {
277 _cleanup_closedir_ DIR *d = NULL;
278
279 d = opendir(path);
280 if (!d) {
281 if (errno == ENOENT)
282 continue;
283
284 return -errno;
285 }
286
287 r = image_make(NULL, dirfd(d), path, name, ret);
288 if (r == 0 || r == -ENOENT) {
289 _cleanup_free_ char *raw = NULL;
290
291 raw = strappend(name, ".raw");
292 if (!raw)
293 return -ENOMEM;
294
295 r = image_make(NULL, dirfd(d), path, raw, ret);
296 if (r == 0 || r == -ENOENT)
297 continue;
298 }
299 if (r < 0)
300 return r;
301
302 return 1;
303 }
304
305 if (streq(name, ".host"))
306 return image_make(".host", AT_FDCWD, NULL, "/", ret);
307
308 return 0;
309 };
310
311 int image_discover(Hashmap *h) {
312 const char *path;
313 int r;
314
315 assert(h);
316
317 NULSTR_FOREACH(path, image_search_path) {
318 _cleanup_closedir_ DIR *d = NULL;
319 struct dirent *de;
320
321 d = opendir(path);
322 if (!d) {
323 if (errno == ENOENT)
324 continue;
325
326 return -errno;
327 }
328
329 FOREACH_DIRENT_ALL(de, d, return -errno) {
330 _cleanup_(image_unrefp) Image *image = NULL;
331
332 if (!image_name_is_valid(de->d_name))
333 continue;
334
335 if (hashmap_contains(h, de->d_name))
336 continue;
337
338 r = image_make(NULL, dirfd(d), path, de->d_name, &image);
339 if (r == 0 || r == -ENOENT)
340 continue;
341 if (r < 0)
342 return r;
343
344 r = hashmap_put(h, image->name, image);
345 if (r < 0)
346 return r;
347
348 image = NULL;
349 }
350 }
351
352 if (!hashmap_contains(h, ".host")) {
353 _cleanup_(image_unrefp) Image *image = NULL;
354
355 r = image_make(".host", AT_FDCWD, NULL, "/", &image);
356 if (r < 0)
357 return r;
358
359 r = hashmap_put(h, image->name, image);
360 if (r < 0)
361 return r;
362
363 image = NULL;
364
365 }
366
367 return 0;
368 }
369
370 void image_hashmap_free(Hashmap *map) {
371 Image *i;
372
373 while ((i = hashmap_steal_first(map)))
374 image_unref(i);
375
376 hashmap_free(map);
377 }
378
379 int image_remove(Image *i) {
380 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
381 _cleanup_strv_free_ char **settings = NULL;
382 char **j;
383 int r;
384
385 assert(i);
386
387 if (path_equal(i->path, "/") ||
388 path_startswith(i->path, "/usr"))
389 return -EROFS;
390
391 settings = image_settings_path(i);
392 if (!settings)
393 return -ENOMEM;
394
395 /* Make sure we don't interfere with a running nspawn */
396 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
397 if (r < 0)
398 return r;
399
400 switch (i->type) {
401
402 case IMAGE_SUBVOLUME:
403 r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
404 if (r < 0)
405 return r;
406 break;
407
408 case IMAGE_DIRECTORY:
409 /* Allow deletion of read-only directories */
410 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
411 r = rm_rf(i->path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
412 if (r < 0)
413 return r;
414
415 break;
416
417 case IMAGE_RAW:
418 if (unlink(i->path) < 0)
419 return -errno;
420 break;
421
422 default:
423 return -EOPNOTSUPP;
424 }
425
426 STRV_FOREACH(j, settings) {
427 if (unlink(*j) < 0 && errno != ENOENT)
428 log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
429 }
430
431 return 0;
432 }
433
434 static int rename_settings_file(const char *path, const char *new_name) {
435 _cleanup_free_ char *rs = NULL;
436 const char *fn;
437
438 fn = strjoina(new_name, ".nspawn");
439
440 rs = file_in_same_dir(path, fn);
441 if (!rs)
442 return -ENOMEM;
443
444 return rename_noreplace(AT_FDCWD, path, AT_FDCWD, rs);
445 }
446
447 int image_rename(Image *i, const char *new_name) {
448 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
449 _cleanup_free_ char *new_path = NULL, *nn = NULL;
450 _cleanup_strv_free_ char **settings = NULL;
451 unsigned file_attr = 0;
452 char **j;
453 int r;
454
455 assert(i);
456
457 if (!image_name_is_valid(new_name))
458 return -EINVAL;
459
460 if (path_equal(i->path, "/") ||
461 path_startswith(i->path, "/usr"))
462 return -EROFS;
463
464 settings = image_settings_path(i);
465 if (!settings)
466 return -ENOMEM;
467
468 /* Make sure we don't interfere with a running nspawn */
469 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
470 if (r < 0)
471 return r;
472
473 /* Make sure nobody takes the new name, between the time we
474 * checked it is currently unused in all search paths, and the
475 * time we take possesion of it */
476 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
477 if (r < 0)
478 return r;
479
480 r = image_find(new_name, NULL);
481 if (r < 0)
482 return r;
483 if (r > 0)
484 return -EEXIST;
485
486 switch (i->type) {
487
488 case IMAGE_DIRECTORY:
489 /* Turn of the immutable bit while we rename the image, so that we can rename it */
490 (void) read_attr_path(i->path, &file_attr);
491
492 if (file_attr & FS_IMMUTABLE_FL)
493 (void) chattr_path(i->path, false, FS_IMMUTABLE_FL);
494
495 /* fall through */
496
497 case IMAGE_SUBVOLUME:
498 new_path = file_in_same_dir(i->path, new_name);
499 break;
500
501 case IMAGE_RAW: {
502 const char *fn;
503
504 fn = strjoina(new_name, ".raw");
505 new_path = file_in_same_dir(i->path, fn);
506 break;
507 }
508
509 default:
510 return -EOPNOTSUPP;
511 }
512
513 if (!new_path)
514 return -ENOMEM;
515
516 nn = strdup(new_name);
517 if (!nn)
518 return -ENOMEM;
519
520 r = rename_noreplace(AT_FDCWD, i->path, AT_FDCWD, new_path);
521 if (r < 0)
522 return r;
523
524 /* Restore the immutable bit, if it was set before */
525 if (file_attr & FS_IMMUTABLE_FL)
526 (void) chattr_path(new_path, true, FS_IMMUTABLE_FL);
527
528 free(i->path);
529 i->path = new_path;
530 new_path = NULL;
531
532 free(i->name);
533 i->name = nn;
534 nn = NULL;
535
536 STRV_FOREACH(j, settings) {
537 r = rename_settings_file(*j, new_name);
538 if (r < 0 && r != -ENOENT)
539 log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
540 }
541
542 return 0;
543 }
544
545 static int clone_settings_file(const char *path, const char *new_name) {
546 _cleanup_free_ char *rs = NULL;
547 const char *fn;
548
549 fn = strjoina(new_name, ".nspawn");
550
551 rs = file_in_same_dir(path, fn);
552 if (!rs)
553 return -ENOMEM;
554
555 return copy_file_atomic(path, rs, 0664, false, 0);
556 }
557
558 int image_clone(Image *i, const char *new_name, bool read_only) {
559 _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
560 _cleanup_strv_free_ char **settings = NULL;
561 const char *new_path;
562 char **j;
563 int r;
564
565 assert(i);
566
567 if (!image_name_is_valid(new_name))
568 return -EINVAL;
569
570 settings = image_settings_path(i);
571 if (!settings)
572 return -ENOMEM;
573
574 /* Make sure nobody takes the new name, between the time we
575 * checked it is currently unused in all search paths, and the
576 * time we take possesion of it */
577 r = image_name_lock(new_name, LOCK_EX|LOCK_NB, &name_lock);
578 if (r < 0)
579 return r;
580
581 r = image_find(new_name, NULL);
582 if (r < 0)
583 return r;
584 if (r > 0)
585 return -EEXIST;
586
587 switch (i->type) {
588
589 case IMAGE_SUBVOLUME:
590 case IMAGE_DIRECTORY:
591 new_path = strjoina("/var/lib/machines/", new_name);
592
593 r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA);
594
595 /* Enable "subtree" quotas for the copy, if we didn't
596 * copy any quota from the source. */
597 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
598
599 break;
600
601 case IMAGE_RAW:
602 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
603
604 r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
605 break;
606
607 default:
608 return -EOPNOTSUPP;
609 }
610
611 if (r < 0)
612 return r;
613
614 STRV_FOREACH(j, settings) {
615 r = clone_settings_file(*j, new_name);
616 if (r < 0 && r != -ENOENT)
617 log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
618 }
619
620 return 0;
621 }
622
623 int image_read_only(Image *i, bool b) {
624 _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
625 int r;
626 assert(i);
627
628 if (path_equal(i->path, "/") ||
629 path_startswith(i->path, "/usr"))
630 return -EROFS;
631
632 /* Make sure we don't interfere with a running nspawn */
633 r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
634 if (r < 0)
635 return r;
636
637 switch (i->type) {
638
639 case IMAGE_SUBVOLUME:
640
641 /* Note that we set the flag only on the top-level
642 * subvolume of the image. */
643
644 r = btrfs_subvol_set_read_only(i->path, b);
645 if (r < 0)
646 return r;
647
648 break;
649
650 case IMAGE_DIRECTORY:
651 /* For simple directory trees we cannot use the access
652 mode of the top-level directory, since it has an
653 effect on the container itself. However, we can
654 use the "immutable" flag, to at least make the
655 top-level directory read-only. It's not as good as
656 a read-only subvolume, but at least something, and
657 we can read the value back.*/
658
659 r = chattr_path(i->path, b, FS_IMMUTABLE_FL);
660 if (r < 0)
661 return r;
662
663 break;
664
665 case IMAGE_RAW: {
666 struct stat st;
667
668 if (stat(i->path, &st) < 0)
669 return -errno;
670
671 if (chmod(i->path, (st.st_mode & 0444) | (b ? 0000 : 0200)) < 0)
672 return -errno;
673
674 /* If the images is now read-only, it's a good time to
675 * defrag it, given that no write patterns will
676 * fragment it again. */
677 if (b)
678 (void) btrfs_defrag(i->path);
679 break;
680 }
681
682 default:
683 return -EOPNOTSUPP;
684 }
685
686 return 0;
687 }
688
689 int image_path_lock(const char *path, int operation, LockFile *global, LockFile *local) {
690 _cleanup_free_ char *p = NULL;
691 LockFile t = LOCK_FILE_INIT;
692 struct stat st;
693 int r;
694
695 assert(path);
696 assert(global);
697 assert(local);
698
699 /* Locks an image path. This actually creates two locks: one
700 * "local" one, next to the image path itself, which might be
701 * shared via NFS. And another "global" one, in /run, that
702 * uses the device/inode number. This has the benefit that we
703 * can even lock a tree that is a mount point, correctly. */
704
705 if (path_equal(path, "/"))
706 return -EBUSY;
707
708 if (!path_is_absolute(path))
709 return -EINVAL;
710
711 if (stat(path, &st) >= 0) {
712 if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
713 return -ENOMEM;
714 }
715
716 r = make_lock_file_for(path, operation, &t);
717 if (r < 0)
718 return r;
719
720 if (p) {
721 mkdir_p("/run/systemd/nspawn/locks", 0700);
722
723 r = make_lock_file(p, operation, global);
724 if (r < 0) {
725 release_lock_file(&t);
726 return r;
727 }
728 }
729
730 *local = t;
731 return 0;
732 }
733
734 int image_set_limit(Image *i, uint64_t referenced_max) {
735 assert(i);
736
737 if (path_equal(i->path, "/") ||
738 path_startswith(i->path, "/usr"))
739 return -EROFS;
740
741 if (i->type != IMAGE_SUBVOLUME)
742 return -EOPNOTSUPP;
743
744 /* We set the quota both for the subvolume as well as for the
745 * subtree. The latter is mostly for historical reasons, since
746 * we didn't use to have a concept of subtree quota, and hence
747 * only modified the subvolume quota. */
748
749 (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max);
750 (void) btrfs_subvol_auto_qgroup(i->path, 0, true);
751 return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max);
752 }
753
754 int image_name_lock(const char *name, int operation, LockFile *ret) {
755 const char *p;
756
757 assert(name);
758 assert(ret);
759
760 /* Locks an image name, regardless of the precise path used. */
761
762 if (!image_name_is_valid(name))
763 return -EINVAL;
764
765 if (streq(name, ".host"))
766 return -EBUSY;
767
768 mkdir_p("/run/systemd/nspawn/locks", 0700);
769 p = strjoina("/run/systemd/nspawn/locks/name-", name);
770
771 return make_lock_file(p, operation, ret);
772 }
773
774 bool image_name_is_valid(const char *s) {
775 if (!filename_is_valid(s))
776 return false;
777
778 if (string_has_cc(s, NULL))
779 return false;
780
781 if (!utf8_is_valid(s))
782 return false;
783
784 /* Temporary files for atomically creating new files */
785 if (startswith(s, ".#"))
786 return false;
787
788 return true;
789 }
790
791 static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
792 [IMAGE_DIRECTORY] = "directory",
793 [IMAGE_SUBVOLUME] = "subvolume",
794 [IMAGE_RAW] = "raw",
795 };
796
797 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);