1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
4 This file is part of systemd.
6 Copyright 2013 Lennart Poettering
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
24 #include <sys/statfs.h>
26 #include "btrfs-util.h"
29 #include "path-util.h"
34 #include "machine-image.h"
36 static const char image_search_path
[] =
38 "/var/lib/container\0" /* legacy */
39 "/usr/local/lib/machines\0"
40 "/usr/lib/machines\0";
42 Image
*image_unref(Image
*i
) {
52 static char **image_settings_path(Image
*image
) {
53 _cleanup_strv_free_
char **l
= NULL
;
64 fn
= strjoina(image
->name
, ".nspawn");
66 FOREACH_STRING(s
, "/etc/systemd/nspawn/", "/run/systemd/nspawn/") {
67 l
[i
] = strappend(s
, fn
);
74 l
[i
] = file_in_same_dir(image
->path
, fn
);
94 _cleanup_(image_unrefp
) Image
*i
= NULL
;
97 assert(t
< _IMAGE_TYPE_MAX
);
107 i
->read_only
= read_only
;
110 i
->usage
= i
->usage_exclusive
= (uint64_t) -1;
111 i
->limit
= i
->limit_exclusive
= (uint64_t) -1;
113 i
->name
= strdup(pretty
);
118 i
->path
= strjoin(path
, "/", filename
, NULL
);
120 i
->path
= strdup(filename
);
125 path_kill_slashes(i
->path
);
133 static int image_make(
137 const char *filename
,
146 /* We explicitly *do* follow symlinks here, since we want to
147 * allow symlinking trees into /var/lib/machines/, and treat
150 if (fstatat(dfd
, filename
, &st
, 0) < 0)
154 (path
&& path_startswith(path
, "/usr")) ||
155 (faccessat(dfd
, filename
, W_OK
, AT_EACCESS
) < 0 && errno
== EROFS
);
157 if (S_ISDIR(st
.st_mode
)) {
158 _cleanup_close_
int fd
= -1;
159 unsigned file_attr
= 0;
167 fd
= openat(dfd
, filename
, O_CLOEXEC
|O_NOCTTY
|O_DIRECTORY
);
171 /* btrfs subvolumes have inode 256 */
172 if (st
.st_ino
== 256) {
174 r
= btrfs_is_filesystem(fd
);
178 BtrfsSubvolInfo info
;
179 BtrfsQuotaInfo quota
;
181 /* It's a btrfs subvolume */
183 r
= btrfs_subvol_get_info_fd(fd
, &info
);
187 r
= image_new(IMAGE_SUBVOLUME
,
191 info
.read_only
|| read_only
,
198 r
= btrfs_subvol_get_quota_fd(fd
, "a
);
200 (*ret
)->usage
= quota
.referenced
;
201 (*ret
)->usage_exclusive
= quota
.exclusive
;
203 (*ret
)->limit
= quota
.referenced_max
;
204 (*ret
)->limit_exclusive
= quota
.exclusive_max
;
211 /* If the IMMUTABLE bit is set, we consider the
212 * directory read-only. Since the ioctl is not
213 * supported everywhere we ignore failures. */
214 (void) read_attr_fd(fd
, &file_attr
);
216 /* It's just a normal directory. */
217 r
= image_new(IMAGE_DIRECTORY
,
221 read_only
|| (file_attr
& FS_IMMUTABLE_FL
),
230 } else if (S_ISREG(st
.st_mode
) && endswith(filename
, ".raw")) {
233 /* It's a RAW disk image */
238 fd_getcrtime_at(dfd
, filename
, &crtime
, 0);
241 pretty
= strndupa(filename
, strlen(filename
) - 4);
243 r
= image_new(IMAGE_RAW
,
247 !(st
.st_mode
& 0222) || read_only
,
249 timespec_load(&st
.st_mtim
),
254 (*ret
)->usage
= (*ret
)->usage_exclusive
= st
.st_blocks
* 512;
255 (*ret
)->limit
= (*ret
)->limit_exclusive
= st
.st_size
;
263 int image_find(const char *name
, Image
**ret
) {
269 /* There are no images with invalid names */
270 if (!image_name_is_valid(name
))
273 NULSTR_FOREACH(path
, image_search_path
) {
274 _cleanup_closedir_
DIR *d
= NULL
;
284 r
= image_make(NULL
, dirfd(d
), path
, name
, ret
);
285 if (r
== 0 || r
== -ENOENT
) {
286 _cleanup_free_
char *raw
= NULL
;
288 raw
= strappend(name
, ".raw");
292 r
= image_make(NULL
, dirfd(d
), path
, raw
, ret
);
293 if (r
== 0 || r
== -ENOENT
)
302 if (streq(name
, ".host"))
303 return image_make(".host", AT_FDCWD
, NULL
, "/", ret
);
308 int image_discover(Hashmap
*h
) {
314 NULSTR_FOREACH(path
, image_search_path
) {
315 _cleanup_closedir_
DIR *d
= NULL
;
326 FOREACH_DIRENT_ALL(de
, d
, return -errno
) {
327 _cleanup_(image_unrefp
) Image
*image
= NULL
;
329 if (!image_name_is_valid(de
->d_name
))
332 if (hashmap_contains(h
, de
->d_name
))
335 r
= image_make(NULL
, dirfd(d
), path
, de
->d_name
, &image
);
336 if (r
== 0 || r
== -ENOENT
)
341 r
= hashmap_put(h
, image
->name
, image
);
349 if (!hashmap_contains(h
, ".host")) {
350 _cleanup_(image_unrefp
) Image
*image
= NULL
;
352 r
= image_make(".host", AT_FDCWD
, NULL
, "/", &image
);
356 r
= hashmap_put(h
, image
->name
, image
);
367 void image_hashmap_free(Hashmap
*map
) {
370 while ((i
= hashmap_steal_first(map
)))
376 int image_remove(Image
*i
) {
377 _cleanup_release_lock_file_ LockFile global_lock
= LOCK_FILE_INIT
, local_lock
= LOCK_FILE_INIT
;
378 _cleanup_strv_free_
char **settings
= NULL
;
384 if (path_equal(i
->path
, "/") ||
385 path_startswith(i
->path
, "/usr"))
388 settings
= image_settings_path(i
);
392 /* Make sure we don't interfere with a running nspawn */
393 r
= image_path_lock(i
->path
, LOCK_EX
|LOCK_NB
, &global_lock
, &local_lock
);
399 case IMAGE_SUBVOLUME
:
400 r
= btrfs_subvol_remove(i
->path
, true);
405 case IMAGE_DIRECTORY
:
406 /* Allow deletion of read-only directories */
407 (void) chattr_path(i
->path
, false, FS_IMMUTABLE_FL
);
408 r
= rm_rf(i
->path
, REMOVE_ROOT
|REMOVE_PHYSICAL
|REMOVE_SUBVOLUME
);
415 if (unlink(i
->path
) < 0)
423 STRV_FOREACH(j
, settings
) {
424 if (unlink(*j
) < 0 && errno
!= ENOENT
)
425 log_debug_errno(errno
, "Failed to unlink %s, ignoring: %m", *j
);
431 static int rename_settings_file(const char *path
, const char *new_name
) {
432 _cleanup_free_
char *rs
= NULL
;
435 fn
= strjoina(new_name
, ".nspawn");
437 rs
= file_in_same_dir(path
, fn
);
441 return rename_noreplace(AT_FDCWD
, path
, AT_FDCWD
, rs
);
444 int image_rename(Image
*i
, const char *new_name
) {
445 _cleanup_release_lock_file_ LockFile global_lock
= LOCK_FILE_INIT
, local_lock
= LOCK_FILE_INIT
, name_lock
= LOCK_FILE_INIT
;
446 _cleanup_free_
char *new_path
= NULL
, *nn
= NULL
;
447 _cleanup_strv_free_
char **settings
= NULL
;
448 unsigned file_attr
= 0;
454 if (!image_name_is_valid(new_name
))
457 if (path_equal(i
->path
, "/") ||
458 path_startswith(i
->path
, "/usr"))
461 settings
= image_settings_path(i
);
465 /* Make sure we don't interfere with a running nspawn */
466 r
= image_path_lock(i
->path
, LOCK_EX
|LOCK_NB
, &global_lock
, &local_lock
);
470 /* Make sure nobody takes the new name, between the time we
471 * checked it is currently unused in all search paths, and the
472 * time we take possesion of it */
473 r
= image_name_lock(new_name
, LOCK_EX
|LOCK_NB
, &name_lock
);
477 r
= image_find(new_name
, NULL
);
485 case IMAGE_DIRECTORY
:
486 /* Turn of the immutable bit while we rename the image, so that we can rename it */
487 (void) read_attr_path(i
->path
, &file_attr
);
489 if (file_attr
& FS_IMMUTABLE_FL
)
490 (void) chattr_path(i
->path
, false, FS_IMMUTABLE_FL
);
494 case IMAGE_SUBVOLUME
:
495 new_path
= file_in_same_dir(i
->path
, new_name
);
501 fn
= strjoina(new_name
, ".raw");
502 new_path
= file_in_same_dir(i
->path
, fn
);
513 nn
= strdup(new_name
);
517 r
= rename_noreplace(AT_FDCWD
, i
->path
, AT_FDCWD
, new_path
);
521 /* Restore the immutable bit, if it was set before */
522 if (file_attr
& FS_IMMUTABLE_FL
)
523 (void) chattr_path(new_path
, true, FS_IMMUTABLE_FL
);
533 STRV_FOREACH(j
, settings
) {
534 r
= rename_settings_file(*j
, new_name
);
535 if (r
< 0 && r
!= -ENOENT
)
536 log_debug_errno(r
, "Failed to rename settings file %s, ignoring: %m", *j
);
542 static int clone_settings_file(const char *path
, const char *new_name
) {
543 _cleanup_free_
char *rs
= NULL
;
546 fn
= strjoina(new_name
, ".nspawn");
548 rs
= file_in_same_dir(path
, fn
);
552 return copy_file_atomic(path
, rs
, 0664, false, 0);
555 int image_clone(Image
*i
, const char *new_name
, bool read_only
) {
556 _cleanup_release_lock_file_ LockFile name_lock
= LOCK_FILE_INIT
;
557 _cleanup_strv_free_
char **settings
= NULL
;
558 const char *new_path
;
564 if (!image_name_is_valid(new_name
))
567 settings
= image_settings_path(i
);
571 /* Make sure nobody takes the new name, between the time we
572 * checked it is currently unused in all search paths, and the
573 * time we take possesion of it */
574 r
= image_name_lock(new_name
, LOCK_EX
|LOCK_NB
, &name_lock
);
578 r
= image_find(new_name
, NULL
);
586 case IMAGE_SUBVOLUME
:
587 case IMAGE_DIRECTORY
:
588 new_path
= strjoina("/var/lib/machines/", new_name
);
590 r
= btrfs_subvol_snapshot(i
->path
, new_path
, (read_only
? BTRFS_SNAPSHOT_READ_ONLY
: 0) | BTRFS_SNAPSHOT_FALLBACK_COPY
| BTRFS_SNAPSHOT_RECURSIVE
);
594 new_path
= strjoina("/var/lib/machines/", new_name
, ".raw");
596 r
= copy_file_atomic(i
->path
, new_path
, read_only
? 0444 : 0644, false, FS_NOCOW_FL
);
606 STRV_FOREACH(j
, settings
) {
607 r
= clone_settings_file(*j
, new_name
);
608 if (r
< 0 && r
!= -ENOENT
)
609 log_debug_errno(r
, "Failed to clone settings %s, ignoring: %m", *j
);
615 int image_read_only(Image
*i
, bool b
) {
616 _cleanup_release_lock_file_ LockFile global_lock
= LOCK_FILE_INIT
, local_lock
= LOCK_FILE_INIT
;
620 if (path_equal(i
->path
, "/") ||
621 path_startswith(i
->path
, "/usr"))
624 /* Make sure we don't interfere with a running nspawn */
625 r
= image_path_lock(i
->path
, LOCK_EX
|LOCK_NB
, &global_lock
, &local_lock
);
631 case IMAGE_SUBVOLUME
:
632 r
= btrfs_subvol_set_read_only(i
->path
, b
);
638 case IMAGE_DIRECTORY
:
639 /* For simple directory trees we cannot use the access
640 mode of the top-level directory, since it has an
641 effect on the container itself. However, we can
642 use the "immutable" flag, to at least make the
643 top-level directory read-only. It's not as good as
644 a read-only subvolume, but at least something, and
645 we can read the value back.*/
647 r
= chattr_path(i
->path
, b
, FS_IMMUTABLE_FL
);
656 if (stat(i
->path
, &st
) < 0)
659 if (chmod(i
->path
, (st
.st_mode
& 0444) | (b
? 0000 : 0200)) < 0)
662 /* If the images is now read-only, it's a good time to
663 * defrag it, given that no write patterns will
664 * fragment it again. */
666 (void) btrfs_defrag(i
->path
);
677 int image_path_lock(const char *path
, int operation
, LockFile
*global
, LockFile
*local
) {
678 _cleanup_free_
char *p
= NULL
;
679 LockFile t
= LOCK_FILE_INIT
;
687 /* Locks an image path. This actually creates two locks: one
688 * "local" one, next to the image path itself, which might be
689 * shared via NFS. And another "global" one, in /run, that
690 * uses the device/inode number. This has the benefit that we
691 * can even lock a tree that is a mount point, correctly. */
693 if (path_equal(path
, "/"))
696 if (!path_is_absolute(path
))
699 if (stat(path
, &st
) >= 0) {
700 if (asprintf(&p
, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st
.st_dev
, (unsigned long) st
.st_ino
) < 0)
704 r
= make_lock_file_for(path
, operation
, &t
);
709 mkdir_p("/run/systemd/nspawn/locks", 0700);
711 r
= make_lock_file(p
, operation
, global
);
713 release_lock_file(&t
);
722 int image_set_limit(Image
*i
, uint64_t referenced_max
) {
725 if (path_equal(i
->path
, "/") ||
726 path_startswith(i
->path
, "/usr"))
729 if (i
->type
!= IMAGE_SUBVOLUME
)
732 return btrfs_quota_limit(i
->path
, referenced_max
);
735 int image_name_lock(const char *name
, int operation
, LockFile
*ret
) {
741 /* Locks an image name, regardless of the precise path used. */
743 if (!image_name_is_valid(name
))
746 if (streq(name
, ".host"))
749 mkdir_p("/run/systemd/nspawn/locks", 0700);
750 p
= strjoina("/run/systemd/nspawn/locks/name-", name
);
752 return make_lock_file(p
, operation
, ret
);
755 bool image_name_is_valid(const char *s
) {
756 if (!filename_is_valid(s
))
759 if (string_has_cc(s
, NULL
))
762 if (!utf8_is_valid(s
))
765 /* Temporary files for atomically creating new files */
766 if (startswith(s
, ".#"))
772 static const char* const image_type_table
[_IMAGE_TYPE_MAX
] = {
773 [IMAGE_DIRECTORY
] = "directory",
774 [IMAGE_SUBVOLUME
] = "subvolume",
778 DEFINE_STRING_TABLE_LOOKUP(image_type
, ImageType
);