2 This file is part of systemd.
4 Copyright 2013 Lennart Poettering
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
30 #include "alloc-util.h"
31 #include "btrfs-util.h"
32 #include "chattr-util.h"
34 #include "dirent-util.h"
38 #include "lockfile-util.h"
41 #include "machine-image.h"
43 #include "path-util.h"
45 #include "string-table.h"
46 #include "string-util.h"
48 #include "time-util.h"
51 #include "xattr-util.h"
53 static const char image_search_path
[] =
55 "/var/lib/container\0" /* legacy */
56 "/usr/local/lib/machines\0"
57 "/usr/lib/machines\0";
59 Image
*image_unref(Image
*i
) {
69 static char **image_settings_path(Image
*image
) {
70 _cleanup_strv_free_
char **l
= NULL
;
81 fn
= strjoina(image
->name
, ".nspawn");
83 FOREACH_STRING(s
, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
84 l
[i
] = strappend(s
, fn
);
91 l
[i
] = file_in_same_dir(image
->path
, fn
);
101 static int image_new(
105 const char *filename
,
111 _cleanup_(image_unrefp
) Image
*i
= NULL
;
114 assert(t
< _IMAGE_TYPE_MAX
);
124 i
->read_only
= read_only
;
127 i
->usage
= i
->usage_exclusive
= (uint64_t) -1;
128 i
->limit
= i
->limit_exclusive
= (uint64_t) -1;
130 i
->name
= strdup(pretty
);
135 i
->path
= strjoin(path
, "/", filename
, NULL
);
137 i
->path
= strdup(filename
);
142 path_kill_slashes(i
->path
);
150 static int image_make(
154 const char *filename
,
163 /* We explicitly *do* follow symlinks here, since we want to
164 * allow symlinking trees into /var/lib/machines/, and treat
167 if (fstatat(dfd
, filename
, &st
, 0) < 0)
171 (path
&& path_startswith(path
, "/usr")) ||
172 (faccessat(dfd
, filename
, W_OK
, AT_EACCESS
) < 0 && errno
== EROFS
);
174 if (S_ISDIR(st
.st_mode
)) {
175 _cleanup_close_
int fd
= -1;
176 unsigned file_attr
= 0;
184 fd
= openat(dfd
, filename
, O_CLOEXEC
|O_NOCTTY
|O_DIRECTORY
);
188 /* btrfs subvolumes have inode 256 */
189 if (st
.st_ino
== 256) {
191 r
= btrfs_is_filesystem(fd
);
195 BtrfsSubvolInfo info
;
197 /* It's a btrfs subvolume */
199 r
= btrfs_subvol_get_info_fd(fd
, 0, &info
);
203 r
= image_new(IMAGE_SUBVOLUME
,
207 info
.read_only
|| read_only
,
214 if (btrfs_quota_scan_ongoing(fd
) == 0) {
215 BtrfsQuotaInfo quota
;
217 r
= btrfs_subvol_get_subtree_quota_fd(fd
, 0, "a
);
219 (*ret
)->usage
= quota
.referenced
;
220 (*ret
)->usage_exclusive
= quota
.exclusive
;
222 (*ret
)->limit
= quota
.referenced_max
;
223 (*ret
)->limit_exclusive
= quota
.exclusive_max
;
231 /* If the IMMUTABLE bit is set, we consider the
232 * directory read-only. Since the ioctl is not
233 * supported everywhere we ignore failures. */
234 (void) read_attr_fd(fd
, &file_attr
);
236 /* It's just a normal directory. */
237 r
= image_new(IMAGE_DIRECTORY
,
241 read_only
|| (file_attr
& FS_IMMUTABLE_FL
),
250 } else if (S_ISREG(st
.st_mode
) && endswith(filename
, ".raw")) {
253 /* It's a RAW disk image */
258 fd_getcrtime_at(dfd
, filename
, &crtime
, 0);
261 pretty
= strndupa(filename
, strlen(filename
) - 4);
263 r
= image_new(IMAGE_RAW
,
267 !(st
.st_mode
& 0222) || read_only
,
269 timespec_load(&st
.st_mtim
),
274 (*ret
)->usage
= (*ret
)->usage_exclusive
= st
.st_blocks
* 512;
275 (*ret
)->limit
= (*ret
)->limit_exclusive
= st
.st_size
;
283 int image_find(const char *name
, Image
**ret
) {
289 /* There are no images with invalid names */
290 if (!image_name_is_valid(name
))
293 NULSTR_FOREACH(path
, image_search_path
) {
294 _cleanup_closedir_
DIR *d
= NULL
;
304 r
= image_make(NULL
, dirfd(d
), path
, name
, ret
);
305 if (r
== 0 || r
== -ENOENT
) {
306 _cleanup_free_
char *raw
= NULL
;
308 raw
= strappend(name
, ".raw");
312 r
= image_make(NULL
, dirfd(d
), path
, raw
, ret
);
313 if (r
== 0 || r
== -ENOENT
)
322 if (streq(name
, ".host"))
323 return image_make(".host", AT_FDCWD
, NULL
, "/", ret
);
328 int image_discover(Hashmap
*h
) {
334 NULSTR_FOREACH(path
, image_search_path
) {
335 _cleanup_closedir_
DIR *d
= NULL
;
346 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
347 _cleanup_(image_unrefp
) Image
*image
= NULL
;
349 if (!image_name_is_valid(de
->d_name
))
352 if (hashmap_contains(h
, de
->d_name
))
355 r
= image_make(NULL
, dirfd(d
), path
, de
->d_name
, &image
);
356 if (r
== 0 || r
== -ENOENT
)
361 r
= hashmap_put(h
, image
->name
, image
);
369 if (!hashmap_contains(h
, ".host")) {
370 _cleanup_(image_unrefp
) Image
*image
= NULL
;
372 r
= image_make(".host", AT_FDCWD
, NULL
, "/", &image
);
376 r
= hashmap_put(h
, image
->name
, image
);
387 void image_hashmap_free(Hashmap
*map
) {
390 while ((i
= hashmap_steal_first(map
)))
396 int image_remove(Image
*i
) {
397 _cleanup_release_lock_file_ LockFile global_lock
= LOCK_FILE_INIT
, local_lock
= LOCK_FILE_INIT
;
398 _cleanup_strv_free_
char **settings
= NULL
;
404 if (IMAGE_IS_VENDOR(i
) || IMAGE_IS_HOST(i
))
407 settings
= image_settings_path(i
);
411 /* Make sure we don't interfere with a running nspawn */
412 r
= image_path_lock(i
->path
, LOCK_EX
|LOCK_NB
, &global_lock
, &local_lock
);
418 case IMAGE_SUBVOLUME
:
419 r
= btrfs_subvol_remove(i
->path
, BTRFS_REMOVE_RECURSIVE
|BTRFS_REMOVE_QUOTA
);
424 case IMAGE_DIRECTORY
:
425 /* Allow deletion of read-only directories */
426 (void) chattr_path(i
->path
, 0, FS_IMMUTABLE_FL
);
427 r
= rm_rf(i
->path
, REMOVE_ROOT
|REMOVE_PHYSICAL
|REMOVE_SUBVOLUME
);
434 if (unlink(i
->path
) < 0)
442 STRV_FOREACH(j
, settings
) {
443 if (unlink(*j
) < 0 && errno
!= ENOENT
)
444 log_debug_errno(errno
, "Failed to unlink %s, ignoring: %m", *j
);
450 static int rename_settings_file(const char *path
, const char *new_name
) {
451 _cleanup_free_
char *rs
= NULL
;
454 fn
= strjoina(new_name
, ".nspawn");
456 rs
= file_in_same_dir(path
, fn
);
460 return rename_noreplace(AT_FDCWD
, path
, AT_FDCWD
, rs
);
463 int image_rename(Image
*i
, const char *new_name
) {
464 _cleanup_release_lock_file_ LockFile global_lock
= LOCK_FILE_INIT
, local_lock
= LOCK_FILE_INIT
, name_lock
= LOCK_FILE_INIT
;
465 _cleanup_free_
char *new_path
= NULL
, *nn
= NULL
;
466 _cleanup_strv_free_
char **settings
= NULL
;
467 unsigned file_attr
= 0;
473 if (!image_name_is_valid(new_name
))
476 if (IMAGE_IS_VENDOR(i
) || IMAGE_IS_HOST(i
))
479 settings
= image_settings_path(i
);
483 /* Make sure we don't interfere with a running nspawn */
484 r
= image_path_lock(i
->path
, LOCK_EX
|LOCK_NB
, &global_lock
, &local_lock
);
488 /* Make sure nobody takes the new name, between the time we
489 * checked it is currently unused in all search paths, and the
490 * time we take possesion of it */
491 r
= image_name_lock(new_name
, LOCK_EX
|LOCK_NB
, &name_lock
);
495 r
= image_find(new_name
, NULL
);
503 case IMAGE_DIRECTORY
:
504 /* Turn of the immutable bit while we rename the image, so that we can rename it */
505 (void) read_attr_path(i
->path
, &file_attr
);
507 if (file_attr
& FS_IMMUTABLE_FL
)
508 (void) chattr_path(i
->path
, 0, FS_IMMUTABLE_FL
);
512 case IMAGE_SUBVOLUME
:
513 new_path
= file_in_same_dir(i
->path
, new_name
);
519 fn
= strjoina(new_name
, ".raw");
520 new_path
= file_in_same_dir(i
->path
, fn
);
531 nn
= strdup(new_name
);
535 r
= rename_noreplace(AT_FDCWD
, i
->path
, AT_FDCWD
, new_path
);
539 /* Restore the immutable bit, if it was set before */
540 if (file_attr
& FS_IMMUTABLE_FL
)
541 (void) chattr_path(new_path
, FS_IMMUTABLE_FL
, FS_IMMUTABLE_FL
);
551 STRV_FOREACH(j
, settings
) {
552 r
= rename_settings_file(*j
, new_name
);
553 if (r
< 0 && r
!= -ENOENT
)
554 log_debug_errno(r
, "Failed to rename settings file %s, ignoring: %m", *j
);
560 static int clone_settings_file(const char *path
, const char *new_name
) {
561 _cleanup_free_
char *rs
= NULL
;
564 fn
= strjoina(new_name
, ".nspawn");
566 rs
= file_in_same_dir(path
, fn
);
570 return copy_file_atomic(path
, rs
, 0664, false, 0);
573 int image_clone(Image
*i
, const char *new_name
, bool read_only
) {
574 _cleanup_release_lock_file_ LockFile name_lock
= LOCK_FILE_INIT
;
575 _cleanup_strv_free_
char **settings
= NULL
;
576 const char *new_path
;
582 if (!image_name_is_valid(new_name
))
585 settings
= image_settings_path(i
);
589 /* Make sure nobody takes the new name, between the time we
590 * checked it is currently unused in all search paths, and the
591 * time we take possesion of it */
592 r
= image_name_lock(new_name
, LOCK_EX
|LOCK_NB
, &name_lock
);
596 r
= image_find(new_name
, NULL
);
604 case IMAGE_SUBVOLUME
:
605 case IMAGE_DIRECTORY
:
606 new_path
= strjoina("/var/lib/machines/", new_name
);
608 r
= btrfs_subvol_snapshot(i
->path
, new_path
, (read_only
? BTRFS_SNAPSHOT_READ_ONLY
: 0) | BTRFS_SNAPSHOT_FALLBACK_COPY
| BTRFS_SNAPSHOT_RECURSIVE
| BTRFS_SNAPSHOT_QUOTA
);
610 /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
612 (void) btrfs_subvol_auto_qgroup(new_path
, 0, true);
617 new_path
= strjoina("/var/lib/machines/", new_name
, ".raw");
619 r
= copy_file_atomic(i
->path
, new_path
, read_only
? 0444 : 0644, false, FS_NOCOW_FL
);
629 STRV_FOREACH(j
, settings
) {
630 r
= clone_settings_file(*j
, new_name
);
631 if (r
< 0 && r
!= -ENOENT
)
632 log_debug_errno(r
, "Failed to clone settings %s, ignoring: %m", *j
);
638 int image_read_only(Image
*i
, bool b
) {
639 _cleanup_release_lock_file_ LockFile global_lock
= LOCK_FILE_INIT
, local_lock
= LOCK_FILE_INIT
;
643 if (IMAGE_IS_VENDOR(i
) || IMAGE_IS_HOST(i
))
646 /* Make sure we don't interfere with a running nspawn */
647 r
= image_path_lock(i
->path
, LOCK_EX
|LOCK_NB
, &global_lock
, &local_lock
);
653 case IMAGE_SUBVOLUME
:
655 /* Note that we set the flag only on the top-level
656 * subvolume of the image. */
658 r
= btrfs_subvol_set_read_only(i
->path
, b
);
664 case IMAGE_DIRECTORY
:
665 /* For simple directory trees we cannot use the access
666 mode of the top-level directory, since it has an
667 effect on the container itself. However, we can
668 use the "immutable" flag, to at least make the
669 top-level directory read-only. It's not as good as
670 a read-only subvolume, but at least something, and
671 we can read the value back.*/
673 r
= chattr_path(i
->path
, b
? FS_IMMUTABLE_FL
: 0, FS_IMMUTABLE_FL
);
682 if (stat(i
->path
, &st
) < 0)
685 if (chmod(i
->path
, (st
.st_mode
& 0444) | (b
? 0000 : 0200)) < 0)
688 /* If the images is now read-only, it's a good time to
689 * defrag it, given that no write patterns will
690 * fragment it again. */
692 (void) btrfs_defrag(i
->path
);
703 int image_path_lock(const char *path
, int operation
, LockFile
*global
, LockFile
*local
) {
704 _cleanup_free_
char *p
= NULL
;
705 LockFile t
= LOCK_FILE_INIT
;
713 /* Locks an image path. This actually creates two locks: one
714 * "local" one, next to the image path itself, which might be
715 * shared via NFS. And another "global" one, in /run, that
716 * uses the device/inode number. This has the benefit that we
717 * can even lock a tree that is a mount point, correctly. */
719 if (path_equal(path
, "/"))
722 if (!path_is_absolute(path
))
725 if (stat(path
, &st
) >= 0) {
726 if (asprintf(&p
, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st
.st_dev
, (unsigned long) st
.st_ino
) < 0)
730 r
= make_lock_file_for(path
, operation
, &t
);
735 mkdir_p("/run/systemd/nspawn/locks", 0700);
737 r
= make_lock_file(p
, operation
, global
);
739 release_lock_file(&t
);
748 int image_set_limit(Image
*i
, uint64_t referenced_max
) {
751 if (IMAGE_IS_VENDOR(i
) || IMAGE_IS_HOST(i
))
754 if (i
->type
!= IMAGE_SUBVOLUME
)
757 /* We set the quota both for the subvolume as well as for the
758 * subtree. The latter is mostly for historical reasons, since
759 * we didn't use to have a concept of subtree quota, and hence
760 * only modified the subvolume quota. */
762 (void) btrfs_qgroup_set_limit(i
->path
, 0, referenced_max
);
763 (void) btrfs_subvol_auto_qgroup(i
->path
, 0, true);
764 return btrfs_subvol_set_subtree_quota_limit(i
->path
, 0, referenced_max
);
767 int image_name_lock(const char *name
, int operation
, LockFile
*ret
) {
773 /* Locks an image name, regardless of the precise path used. */
775 if (!image_name_is_valid(name
))
778 if (streq(name
, ".host"))
781 mkdir_p("/run/systemd/nspawn/locks", 0700);
782 p
= strjoina("/run/systemd/nspawn/locks/name-", name
);
784 return make_lock_file(p
, operation
, ret
);
787 bool image_name_is_valid(const char *s
) {
788 if (!filename_is_valid(s
))
791 if (string_has_cc(s
, NULL
))
794 if (!utf8_is_valid(s
))
797 /* Temporary files for atomically creating new files */
798 if (startswith(s
, ".#"))
804 static const char* const image_type_table
[_IMAGE_TYPE_MAX
] = {
805 [IMAGE_DIRECTORY
] = "directory",
806 [IMAGE_SUBVOLUME
] = "subvolume",
810 DEFINE_STRING_TABLE_LOOKUP(image_type
, ImageType
);