1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 #include <sys/statfs.h>
26 #include "btrfs-util.h"
29 #include "path-util.h"
34 #include "machine-image.h"
36 static const char image_search_path
[] =
38 "/var/lib/container\0" /* legacy */
39 "/usr/local/lib/machines\0"
40 "/usr/lib/machines\0";
42 Image
*image_unref(Image
*i
) {
52 static char **image_settings_path(Image
*image
) {
53 _cleanup_strv_free_
char **l
= NULL
;
64 fn
= strjoina(image
->name
, ".nspawn");
66 FOREACH_STRING(s
, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
67 l
[i
] = strappend(s
, fn
);
74 l
[i
] = file_in_same_dir(image
->path
, fn
);
94 _cleanup_(image_unrefp
) Image
*i
= NULL
;
97 assert(t
< _IMAGE_TYPE_MAX
);
107 i
->read_only
= read_only
;
110 i
->usage
= i
->usage_exclusive
= (uint64_t) -1;
111 i
->limit
= i
->limit_exclusive
= (uint64_t) -1;
113 i
->name
= strdup(pretty
);
118 i
->path
= strjoin(path
, "/", filename
, NULL
);
120 i
->path
= strdup(filename
);
125 path_kill_slashes(i
->path
);
133 static int image_make(
137 const char *filename
,
146 /* We explicitly *do* follow symlinks here, since we want to
147 * allow symlinking trees into /var/lib/machines/, and treat
150 if (fstatat(dfd
, filename
, &st
, 0) < 0)
154 (path
&& path_startswith(path
, "/usr")) ||
155 (faccessat(dfd
, filename
, W_OK
, AT_EACCESS
) < 0 && errno
== EROFS
);
157 if (S_ISDIR(st
.st_mode
)) {
158 _cleanup_close_
int fd
= -1;
159 unsigned file_attr
= 0;
167 fd
= openat(dfd
, filename
, O_CLOEXEC
|O_NOCTTY
|O_DIRECTORY
);
171 /* btrfs subvolumes have inode 256 */
172 if (st
.st_ino
== 256) {
174 r
= btrfs_is_filesystem(fd
);
178 BtrfsSubvolInfo info
;
180 /* It's a btrfs subvolume */
182 r
= btrfs_subvol_get_info_fd(fd
, 0, &info
);
186 r
= image_new(IMAGE_SUBVOLUME
,
190 info
.read_only
|| read_only
,
197 if (btrfs_quota_scan_ongoing(fd
) == 0) {
198 BtrfsQuotaInfo quota
;
200 r
= btrfs_subvol_get_subtree_quota_fd(fd
, 0, "a
);
202 (*ret
)->usage
= quota
.referenced
;
203 (*ret
)->usage_exclusive
= quota
.exclusive
;
205 (*ret
)->limit
= quota
.referenced_max
;
206 (*ret
)->limit_exclusive
= quota
.exclusive_max
;
214 /* If the IMMUTABLE bit is set, we consider the
215 * directory read-only. Since the ioctl is not
216 * supported everywhere we ignore failures. */
217 (void) read_attr_fd(fd
, &file_attr
);
219 /* It's just a normal directory. */
220 r
= image_new(IMAGE_DIRECTORY
,
224 read_only
|| (file_attr
& FS_IMMUTABLE_FL
),
233 } else if (S_ISREG(st
.st_mode
) && endswith(filename
, ".raw")) {
236 /* It's a RAW disk image */
241 fd_getcrtime_at(dfd
, filename
, &crtime
, 0);
244 pretty
= strndupa(filename
, strlen(filename
) - 4);
246 r
= image_new(IMAGE_RAW
,
250 !(st
.st_mode
& 0222) || read_only
,
252 timespec_load(&st
.st_mtim
),
257 (*ret
)->usage
= (*ret
)->usage_exclusive
= st
.st_blocks
* 512;
258 (*ret
)->limit
= (*ret
)->limit_exclusive
= st
.st_size
;
266 int image_find(const char *name
, Image
**ret
) {
272 /* There are no images with invalid names */
273 if (!image_name_is_valid(name
))
276 NULSTR_FOREACH(path
, image_search_path
) {
277 _cleanup_closedir_
DIR *d
= NULL
;
287 r
= image_make(NULL
, dirfd(d
), path
, name
, ret
);
288 if (r
== 0 || r
== -ENOENT
) {
289 _cleanup_free_
char *raw
= NULL
;
291 raw
= strappend(name
, ".raw");
295 r
= image_make(NULL
, dirfd(d
), path
, raw
, ret
);
296 if (r
== 0 || r
== -ENOENT
)
305 if (streq(name
, ".host"))
306 return image_make(".host", AT_FDCWD
, NULL
, "/", ret
);
311 int image_discover(Hashmap
*h
) {
317 NULSTR_FOREACH(path
, image_search_path
) {
318 _cleanup_closedir_
DIR *d
= NULL
;
329 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
330 _cleanup_(image_unrefp
) Image
*image
= NULL
;
332 if (!image_name_is_valid(de
->d_name
))
335 if (hashmap_contains(h
, de
->d_name
))
338 r
= image_make(NULL
, dirfd(d
), path
, de
->d_name
, &image
);
339 if (r
== 0 || r
== -ENOENT
)
344 r
= hashmap_put(h
, image
->name
, image
);
352 if (!hashmap_contains(h
, ".host")) {
353 _cleanup_(image_unrefp
) Image
*image
= NULL
;
355 r
= image_make(".host", AT_FDCWD
, NULL
, "/", &image
);
359 r
= hashmap_put(h
, image
->name
, image
);
370 void image_hashmap_free(Hashmap
*map
) {
373 while ((i
= hashmap_steal_first(map
)))
379 int image_remove(Image
*i
) {
380 _cleanup_release_lock_file_ LockFile global_lock
= LOCK_FILE_INIT
, local_lock
= LOCK_FILE_INIT
;
381 _cleanup_strv_free_
char **settings
= NULL
;
387 if (path_equal(i
->path
, "/") ||
388 path_startswith(i
->path
, "/usr"))
391 settings
= image_settings_path(i
);
395 /* Make sure we don't interfere with a running nspawn */
396 r
= image_path_lock(i
->path
, LOCK_EX
|LOCK_NB
, &global_lock
, &local_lock
);
402 case IMAGE_SUBVOLUME
:
403 r
= btrfs_subvol_remove(i
->path
, BTRFS_REMOVE_RECURSIVE
|BTRFS_REMOVE_QUOTA
);
408 case IMAGE_DIRECTORY
:
409 /* Allow deletion of read-only directories */
410 (void) chattr_path(i
->path
, false, FS_IMMUTABLE_FL
);
411 r
= rm_rf(i
->path
, REMOVE_ROOT
|REMOVE_PHYSICAL
|REMOVE_SUBVOLUME
);
418 if (unlink(i
->path
) < 0)
426 STRV_FOREACH(j
, settings
) {
427 if (unlink(*j
) < 0 && errno
!= ENOENT
)
428 log_debug_errno(errno
, "Failed to unlink %s, ignoring: %m", *j
);
434 static int rename_settings_file(const char *path
, const char *new_name
) {
435 _cleanup_free_
char *rs
= NULL
;
438 fn
= strjoina(new_name
, ".nspawn");
440 rs
= file_in_same_dir(path
, fn
);
444 return rename_noreplace(AT_FDCWD
, path
, AT_FDCWD
, rs
);
447 int image_rename(Image
*i
, const char *new_name
) {
448 _cleanup_release_lock_file_ LockFile global_lock
= LOCK_FILE_INIT
, local_lock
= LOCK_FILE_INIT
, name_lock
= LOCK_FILE_INIT
;
449 _cleanup_free_
char *new_path
= NULL
, *nn
= NULL
;
450 _cleanup_strv_free_
char **settings
= NULL
;
451 unsigned file_attr
= 0;
457 if (!image_name_is_valid(new_name
))
460 if (path_equal(i
->path
, "/") ||
461 path_startswith(i
->path
, "/usr"))
464 settings
= image_settings_path(i
);
468 /* Make sure we don't interfere with a running nspawn */
469 r
= image_path_lock(i
->path
, LOCK_EX
|LOCK_NB
, &global_lock
, &local_lock
);
473 /* Make sure nobody takes the new name, between the time we
474 * checked it is currently unused in all search paths, and the
475 * time we take possesion of it */
476 r
= image_name_lock(new_name
, LOCK_EX
|LOCK_NB
, &name_lock
);
480 r
= image_find(new_name
, NULL
);
488 case IMAGE_DIRECTORY
:
489 /* Turn of the immutable bit while we rename the image, so that we can rename it */
490 (void) read_attr_path(i
->path
, &file_attr
);
492 if (file_attr
& FS_IMMUTABLE_FL
)
493 (void) chattr_path(i
->path
, false, FS_IMMUTABLE_FL
);
497 case IMAGE_SUBVOLUME
:
498 new_path
= file_in_same_dir(i
->path
, new_name
);
504 fn
= strjoina(new_name
, ".raw");
505 new_path
= file_in_same_dir(i
->path
, fn
);
516 nn
= strdup(new_name
);
520 r
= rename_noreplace(AT_FDCWD
, i
->path
, AT_FDCWD
, new_path
);
524 /* Restore the immutable bit, if it was set before */
525 if (file_attr
& FS_IMMUTABLE_FL
)
526 (void) chattr_path(new_path
, true, FS_IMMUTABLE_FL
);
536 STRV_FOREACH(j
, settings
) {
537 r
= rename_settings_file(*j
, new_name
);
538 if (r
< 0 && r
!= -ENOENT
)
539 log_debug_errno(r
, "Failed to rename settings file %s, ignoring: %m", *j
);
545 static int clone_settings_file(const char *path
, const char *new_name
) {
546 _cleanup_free_
char *rs
= NULL
;
549 fn
= strjoina(new_name
, ".nspawn");
551 rs
= file_in_same_dir(path
, fn
);
555 return copy_file_atomic(path
, rs
, 0664, false, 0);
558 int image_clone(Image
*i
, const char *new_name
, bool read_only
) {
559 _cleanup_release_lock_file_ LockFile name_lock
= LOCK_FILE_INIT
;
560 _cleanup_strv_free_
char **settings
= NULL
;
561 const char *new_path
;
567 if (!image_name_is_valid(new_name
))
570 settings
= image_settings_path(i
);
574 /* Make sure nobody takes the new name, between the time we
575 * checked it is currently unused in all search paths, and the
576 * time we take possesion of it */
577 r
= image_name_lock(new_name
, LOCK_EX
|LOCK_NB
, &name_lock
);
581 r
= image_find(new_name
, NULL
);
589 case IMAGE_SUBVOLUME
:
590 case IMAGE_DIRECTORY
:
591 new_path
= strjoina("/var/lib/machines/", new_name
);
593 r
= btrfs_subvol_snapshot(i
->path
, new_path
, (read_only
? BTRFS_SNAPSHOT_READ_ONLY
: 0) | BTRFS_SNAPSHOT_FALLBACK_COPY
| BTRFS_SNAPSHOT_RECURSIVE
| BTRFS_SNAPSHOT_QUOTA
);
595 /* Enable "subtree" quotas for the copy, if we didn't
596 * copy any quota from the source. */
597 (void) btrfs_subvol_auto_qgroup(i
->path
, 0, true);
602 new_path
= strjoina("/var/lib/machines/", new_name
, ".raw");
604 r
= copy_file_atomic(i
->path
, new_path
, read_only
? 0444 : 0644, false, FS_NOCOW_FL
);
614 STRV_FOREACH(j
, settings
) {
615 r
= clone_settings_file(*j
, new_name
);
616 if (r
< 0 && r
!= -ENOENT
)
617 log_debug_errno(r
, "Failed to clone settings %s, ignoring: %m", *j
);
623 int image_read_only(Image
*i
, bool b
) {
624 _cleanup_release_lock_file_ LockFile global_lock
= LOCK_FILE_INIT
, local_lock
= LOCK_FILE_INIT
;
628 if (path_equal(i
->path
, "/") ||
629 path_startswith(i
->path
, "/usr"))
632 /* Make sure we don't interfere with a running nspawn */
633 r
= image_path_lock(i
->path
, LOCK_EX
|LOCK_NB
, &global_lock
, &local_lock
);
639 case IMAGE_SUBVOLUME
:
641 /* Note that we set the flag only on the top-level
642 * subvolume of the image. */
644 r
= btrfs_subvol_set_read_only(i
->path
, b
);
650 case IMAGE_DIRECTORY
:
651 /* For simple directory trees we cannot use the access
652 mode of the top-level directory, since it has an
653 effect on the container itself. However, we can
654 use the "immutable" flag, to at least make the
655 top-level directory read-only. It's not as good as
656 a read-only subvolume, but at least something, and
657 we can read the value back.*/
659 r
= chattr_path(i
->path
, b
, FS_IMMUTABLE_FL
);
668 if (stat(i
->path
, &st
) < 0)
671 if (chmod(i
->path
, (st
.st_mode
& 0444) | (b
? 0000 : 0200)) < 0)
674 /* If the images is now read-only, it's a good time to
675 * defrag it, given that no write patterns will
676 * fragment it again. */
678 (void) btrfs_defrag(i
->path
);
689 int image_path_lock(const char *path
, int operation
, LockFile
*global
, LockFile
*local
) {
690 _cleanup_free_
char *p
= NULL
;
691 LockFile t
= LOCK_FILE_INIT
;
699 /* Locks an image path. This actually creates two locks: one
700 * "local" one, next to the image path itself, which might be
701 * shared via NFS. And another "global" one, in /run, that
702 * uses the device/inode number. This has the benefit that we
703 * can even lock a tree that is a mount point, correctly. */
705 if (path_equal(path
, "/"))
708 if (!path_is_absolute(path
))
711 if (stat(path
, &st
) >= 0) {
712 if (asprintf(&p
, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st
.st_dev
, (unsigned long) st
.st_ino
) < 0)
716 r
= make_lock_file_for(path
, operation
, &t
);
721 mkdir_p("/run/systemd/nspawn/locks", 0700);
723 r
= make_lock_file(p
, operation
, global
);
725 release_lock_file(&t
);
734 int image_set_limit(Image
*i
, uint64_t referenced_max
) {
737 if (path_equal(i
->path
, "/") ||
738 path_startswith(i
->path
, "/usr"))
741 if (i
->type
!= IMAGE_SUBVOLUME
)
744 /* We set the quota both for the subvolume as well as for the
745 * subtree. The latter is mostly for historical reasons, since
746 * we didn't use to have a concept of subtree quota, and hence
747 * only modified the subvolume quota. */
749 (void) btrfs_qgroup_set_limit(i
->path
, 0, referenced_max
);
750 (void) btrfs_subvol_auto_qgroup(i
->path
, 0, true);
751 return btrfs_subvol_set_subtree_quota_limit(i
->path
, 0, referenced_max
);
754 int image_name_lock(const char *name
, int operation
, LockFile
*ret
) {
760 /* Locks an image name, regardless of the precise path used. */
762 if (!image_name_is_valid(name
))
765 if (streq(name
, ".host"))
768 mkdir_p("/run/systemd/nspawn/locks", 0700);
769 p
= strjoina("/run/systemd/nspawn/locks/name-", name
);
771 return make_lock_file(p
, operation
, ret
);
774 bool image_name_is_valid(const char *s
) {
775 if (!filename_is_valid(s
))
778 if (string_has_cc(s
, NULL
))
781 if (!utf8_is_valid(s
))
784 /* Temporary files for atomically creating new files */
785 if (startswith(s
, ".#"))
791 static const char* const image_type_table
[_IMAGE_TYPE_MAX
] = {
792 [IMAGE_DIRECTORY
] = "directory",
793 [IMAGE_SUBVOLUME
] = "subvolume",
797 DEFINE_STRING_TABLE_LOOKUP(image_type
, ImageType
);