]> git.ipfire.org Git - thirdparty/systemd.git/blobdiff - src/shared/machine-image.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / shared / machine-image.c
index 060f8d50c716b85615fe3b46b806957f392d7922..a9e5d608a52268dd703e0801cb0cd49e011ec743 100644 (file)
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: LGPL-2.1+ */
 /***
   This file is part of systemd.
 
 #include <sys/stat.h>
 #include <unistd.h>
 #include <linux/fs.h>
+
 #include "alloc-util.h"
 #include "btrfs-util.h"
 #include "chattr-util.h"
 #include "copy.h"
 #include "dirent-util.h"
+#include "env-util.h"
 #include "fd-util.h"
 #include "fs-util.h"
 #include "hashmap.h"
 #include "lockfile-util.h"
 #include "log.h"
-#include "macro.h"
 #include "machine-image.h"
+#include "macro.h"
 #include "mkdir.h"
 #include "path-util.h"
 #include "rm-rf.h"
@@ -97,6 +100,16 @@ static char **image_settings_path(Image *image) {
         return ret;
 }
 
+static char *image_roothash_path(Image *image) {
+        const char *fn;
+
+        assert(image);
+
+        fn = strjoina(image->name, ".roothash");
+
+        return file_in_same_dir(image->path, fn);
+}
+
 static int image_new(
                 ImageType t,
                 const char *pretty,
@@ -131,7 +144,7 @@ static int image_new(
                 return -ENOMEM;
 
         if (path)
-                i->path = strjoin(path, "/", filename, NULL);
+                i->path = strjoin(path, "/", filename);
         else
                 i->path = strdup(filename);
 
@@ -159,9 +172,8 @@ static int image_make(
 
         assert(filename);
 
-        /* We explicitly *do* follow symlinks here, since we want to
-         * allow symlinking trees into /var/lib/machines/, and treat
-         * them normally. */
+        /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
+         * devices into /var/lib/machines/, and treat them normally. */
 
         if (fstatat(dfd, filename, &st, 0) < 0)
                 return -errno;
@@ -274,6 +286,58 @@ static int image_make(
                 (*ret)->limit = (*ret)->limit_exclusive = st.st_size;
 
                 return 1;
+
+        } else if (S_ISBLK(st.st_mode)) {
+                _cleanup_close_ int block_fd = -1;
+                uint64_t size = UINT64_MAX;
+
+                /* A block device */
+
+                if (!ret)
+                        return 1;
+
+                if (!pretty)
+                        pretty = filename;
+
+                block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
+                if (block_fd < 0)
+                        log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path, filename);
+                else {
+                        if (fstat(block_fd, &st) < 0)
+                                return -errno;
+                        if (!S_ISBLK(st.st_mode)) /* Verify that what we opened is actually what we think it is */
+                                return -ENOTTY;
+
+                        if (!read_only) {
+                                int state = 0;
+
+                                if (ioctl(block_fd, BLKROGET, &state) < 0)
+                                        log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path, filename);
+                                else if (state)
+                                        read_only = true;
+                        }
+
+                        if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
+                                log_debug_errno(errno, "Failed to issue BLKFLSBUF on device %s/%s, ignoring: %m", path, filename);
+
+                        block_fd = safe_close(block_fd);
+                }
+
+                r = image_new(IMAGE_BLOCK,
+                              pretty,
+                              path,
+                              filename,
+                              !(st.st_mode & 0222) || read_only,
+                              0,
+                              0,
+                              ret);
+                if (r < 0)
+                        return r;
+
+                if (size != 0 && size != UINT64_MAX)
+                        (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
+
+                return 1;
         }
 
         return 0;
@@ -301,7 +365,7 @@ int image_find(const char *name, Image **ret) {
                 }
 
                 r = image_make(NULL, dirfd(d), path, name, ret);
-                if (r == 0 || r == -ENOENT) {
+                if (IN_SET(r, 0, -ENOENT)) {
                         _cleanup_free_ char *raw = NULL;
 
                         raw = strappend(name, ".raw");
@@ -309,7 +373,7 @@ int image_find(const char *name, Image **ret) {
                                 return -ENOMEM;
 
                         r = image_make(NULL, dirfd(d), path, raw, ret);
-                        if (r == 0 || r == -ENOENT)
+                        if (IN_SET(r, 0, -ENOENT))
                                 continue;
                 }
                 if (r < 0)
@@ -352,7 +416,7 @@ int image_discover(Hashmap *h) {
                                 continue;
 
                         r = image_make(NULL, dirfd(d), path, de->d_name, &image);
-                        if (r == 0 || r == -ENOENT)
+                        if (IN_SET(r, 0, -ENOENT))
                                 continue;
                         if (r < 0)
                                 return r;
@@ -395,6 +459,7 @@ void image_hashmap_free(Hashmap *map) {
 int image_remove(Image *i) {
         _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
         _cleanup_strv_free_ char **settings = NULL;
+        _cleanup_free_ char *roothash = NULL;
         char **j;
         int r;
 
@@ -407,6 +472,10 @@ int image_remove(Image *i) {
         if (!settings)
                 return -ENOMEM;
 
+        roothash = image_roothash_path(i);
+        if (!roothash)
+                return -ENOMEM;
+
         /* Make sure we don't interfere with a running nspawn */
         r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
         if (r < 0)
@@ -415,9 +484,15 @@ int image_remove(Image *i) {
         switch (i->type) {
 
         case IMAGE_SUBVOLUME:
-                r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
-                if (r < 0)
-                        return r;
+
+                /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
+                 * big guns */
+                if (unlink(i->path) < 0) {
+                        r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+                        if (r < 0)
+                                return r;
+                }
+
                 break;
 
         case IMAGE_DIRECTORY:
@@ -429,6 +504,17 @@ int image_remove(Image *i) {
 
                 break;
 
+        case IMAGE_BLOCK:
+
+                /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
+                 * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
+                 * the thing (it's most likely a symlink after all). */
+
+                if (path_startswith(i->path, "/dev"))
+                        break;
+
+                /* fallthrough */
+
         case IMAGE_RAW:
                 if (unlink(i->path) < 0)
                         return -errno;
@@ -443,14 +529,17 @@ int image_remove(Image *i) {
                         log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
         }
 
+        if (unlink(roothash) < 0 && errno != ENOENT)
+                log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
+
         return 0;
 }
 
-static int rename_settings_file(const char *path, const char *new_name) {
+static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
         _cleanup_free_ char *rs = NULL;
         const char *fn;
 
-        fn = strjoina(new_name, ".nspawn");
+        fn = strjoina(new_name, suffix);
 
         rs = file_in_same_dir(path, fn);
         if (!rs)
@@ -461,7 +550,7 @@ static int rename_settings_file(const char *path, const char *new_name) {
 
 int image_rename(Image *i, const char *new_name) {
         _cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
-        _cleanup_free_ char *new_path = NULL, *nn = NULL;
+        _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
         _cleanup_strv_free_ char **settings = NULL;
         unsigned file_attr = 0;
         char **j;
@@ -479,6 +568,10 @@ int image_rename(Image *i, const char *new_name) {
         if (!settings)
                 return -ENOMEM;
 
+        roothash = image_roothash_path(i);
+        if (!roothash)
+                return -ENOMEM;
+
         /* Make sure we don't interfere with a running nspawn */
         r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
         if (r < 0)
@@ -512,6 +605,15 @@ int image_rename(Image *i, const char *new_name) {
                 new_path = file_in_same_dir(i->path, new_name);
                 break;
 
+        case IMAGE_BLOCK:
+
+                /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
+                if (path_startswith(i->path, "/dev"))
+                        return -EROFS;
+
+                new_path = file_in_same_dir(i->path, new_name);
+                break;
+
         case IMAGE_RAW: {
                 const char *fn;
 
@@ -548,30 +650,35 @@ int image_rename(Image *i, const char *new_name) {
         nn = NULL;
 
         STRV_FOREACH(j, settings) {
-                r = rename_settings_file(*j, new_name);
+                r = rename_auxiliary_file(*j, new_name, ".nspawn");
                 if (r < 0 && r != -ENOENT)
                         log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
         }
 
+        r = rename_auxiliary_file(roothash, new_name, ".roothash");
+        if (r < 0 && r != -ENOENT)
+                log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
+
         return 0;
 }
 
-static int clone_settings_file(const char *path, const char *new_name) {
+static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
         _cleanup_free_ char *rs = NULL;
         const char *fn;
 
-        fn = strjoina(new_name, ".nspawn");
+        fn = strjoina(new_name, suffix);
 
         rs = file_in_same_dir(path, fn);
         if (!rs)
                 return -ENOMEM;
 
-        return copy_file_atomic(path, rs, 0664, false, 0);
+        return copy_file_atomic(path, rs, 0664, 0, COPY_REFLINK);
 }
 
 int image_clone(Image *i, const char *new_name, bool read_only) {
         _cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
         _cleanup_strv_free_ char **settings = NULL;
+        _cleanup_free_ char *roothash = NULL;
         const char *new_path;
         char **j;
         int r;
@@ -585,6 +692,10 @@ int image_clone(Image *i, const char *new_name, bool read_only) {
         if (!settings)
                 return -ENOMEM;
 
+        roothash = image_roothash_path(i);
+        if (!roothash)
+                return -ENOMEM;
+
         /* Make sure nobody takes the new name, between the time we
          * checked it is currently unused in all search paths, and the
          * time we take possession of it */
@@ -603,18 +714,18 @@ int image_clone(Image *i, const char *new_name, bool read_only) {
         case IMAGE_SUBVOLUME:
         case IMAGE_DIRECTORY:
                 /* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
-                 * directory.*/
+                 * directory. */
 
                 new_path = strjoina("/var/lib/machines/", new_name);
 
-                r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA);
-                if (r == -EOPNOTSUPP) {
-                        /* No btrfs snapshots supported, create a normal directory then. */
-
-                        r = copy_directory(i->path, new_path, false);
-                        if (r >= 0)
-                                (void) chattr_path(new_path, read_only ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL);
-                } else if (r >= 0)
+                r = btrfs_subvol_snapshot(i->path, new_path,
+                                          (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+                                          BTRFS_SNAPSHOT_FALLBACK_COPY |
+                                          BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+                                          BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
+                                          BTRFS_SNAPSHOT_RECURSIVE |
+                                          BTRFS_SNAPSHOT_QUOTA);
+                if (r >= 0)
                         /* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
                         (void) btrfs_subvol_auto_qgroup(new_path, 0, true);
 
@@ -623,9 +734,10 @@ int image_clone(Image *i, const char *new_name, bool read_only) {
         case IMAGE_RAW:
                 new_path = strjoina("/var/lib/machines/", new_name, ".raw");
 
-                r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
+                r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK);
                 break;
 
+        case IMAGE_BLOCK:
         default:
                 return -EOPNOTSUPP;
         }
@@ -634,11 +746,15 @@ int image_clone(Image *i, const char *new_name, bool read_only) {
                 return r;
 
         STRV_FOREACH(j, settings) {
-                r = clone_settings_file(*j, new_name);
+                r = clone_auxiliary_file(*j, new_name, ".nspawn");
                 if (r < 0 && r != -ENOENT)
                         log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
         }
 
+        r = clone_auxiliary_file(roothash, new_name, ".roothash");
+        if (r < 0 && r != -ENOENT)
+                log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
+
         return 0;
 }
 
@@ -675,7 +791,7 @@ int image_read_only(Image *i, bool b) {
                    use the "immutable" flag, to at least make the
                    top-level directory read-only. It's not as good as
                    a read-only subvolume, but at least something, and
-                   we can read the value back.*/
+                   we can read the value back. */
 
                 r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL);
                 if (r < 0)
@@ -700,6 +816,26 @@ int image_read_only(Image *i, bool b) {
                 break;
         }
 
+        case IMAGE_BLOCK: {
+                _cleanup_close_ int fd = -1;
+                struct stat st;
+                int state = b;
+
+                fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
+                if (fd < 0)
+                        return -errno;
+
+                if (fstat(fd, &st) < 0)
+                        return -errno;
+                if (!S_ISBLK(st.st_mode))
+                        return -ENOTTY;
+
+                if (ioctl(fd, BLKROSET, &state) < 0)
+                        return -errno;
+
+                break;
+        }
+
         default:
                 return -EOPNOTSUPP;
         }
@@ -723,20 +859,36 @@ int image_path_lock(const char *path, int operation, LockFile *global, LockFile
          * uses the device/inode number. This has the benefit that we
          * can even lock a tree that is a mount point, correctly. */
 
-        if (path_equal(path, "/"))
-                return -EBUSY;
-
         if (!path_is_absolute(path))
                 return -EINVAL;
 
+        if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+                *local = *global = (LockFile) LOCK_FILE_INIT;
+                return 0;
+        }
+
+        if (path_equal(path, "/"))
+                return -EBUSY;
+
         if (stat(path, &st) >= 0) {
-                if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
+                if (S_ISBLK(st.st_mode))
+                        r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
+                else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
+                        r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
+                else
+                        return -ENOTTY;
+
+                if (r < 0)
                         return -ENOMEM;
         }
 
-        r = make_lock_file_for(path, operation, &t);
-        if (r < 0)
-                return r;
+        /* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since
+         * block devices are device local anyway. */
+        if (!path_startswith(path, "/dev")) {
+                r = make_lock_file_for(path, operation, &t);
+                if (r < 0)
+                        return r;
+        }
 
         if (p) {
                 mkdir_p("/run/systemd/nspawn/locks", 0700);
@@ -746,7 +898,8 @@ int image_path_lock(const char *path, int operation, LockFile *global, LockFile
                         release_lock_file(&t);
                         return r;
                 }
-        }
+        } else
+                *global = (LockFile) LOCK_FILE_INIT;
 
         *local = t;
         return 0;
@@ -782,6 +935,11 @@ int image_name_lock(const char *name, int operation, LockFile *ret) {
         if (!image_name_is_valid(name))
                 return -EINVAL;
 
+        if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+                *ret = (LockFile) LOCK_FILE_INIT;
+                return 0;
+        }
+
         if (streq(name, ".host"))
                 return -EBUSY;
 
@@ -812,6 +970,7 @@ static const char* const image_type_table[_IMAGE_TYPE_MAX] = {
         [IMAGE_DIRECTORY] = "directory",
         [IMAGE_SUBVOLUME] = "subvolume",
         [IMAGE_RAW] = "raw",
+        [IMAGE_BLOCK] = "block",
 };
 
 DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);