+/* SPDX-License-Identifier: LGPL-2.1+ */
/***
This file is part of systemd.
#include <sys/stat.h>
#include <unistd.h>
#include <linux/fs.h>
+
#include "alloc-util.h"
#include "btrfs-util.h"
#include "chattr-util.h"
#include "copy.h"
#include "dirent-util.h"
+#include "env-util.h"
#include "fd-util.h"
#include "fs-util.h"
#include "hashmap.h"
#include "lockfile-util.h"
#include "log.h"
-#include "macro.h"
#include "machine-image.h"
+#include "macro.h"
#include "mkdir.h"
#include "path-util.h"
#include "rm-rf.h"
return ret;
}
+static char *image_roothash_path(Image *image) {
+ const char *fn;
+
+ assert(image);
+
+ fn = strjoina(image->name, ".roothash");
+
+ return file_in_same_dir(image->path, fn);
+}
+
static int image_new(
ImageType t,
const char *pretty,
return -ENOMEM;
if (path)
- i->path = strjoin(path, "/", filename, NULL);
+ i->path = strjoin(path, "/", filename);
else
i->path = strdup(filename);
assert(filename);
- /* We explicitly *do* follow symlinks here, since we want to
- * allow symlinking trees into /var/lib/machines/, and treat
- * them normally. */
+ /* We explicitly *do* follow symlinks here, since we want to allow symlinking trees, raw files and block
+ * devices into /var/lib/machines/, and treat them normally. */
if (fstatat(dfd, filename, &st, 0) < 0)
return -errno;
(*ret)->limit = (*ret)->limit_exclusive = st.st_size;
return 1;
+
+ } else if (S_ISBLK(st.st_mode)) {
+ _cleanup_close_ int block_fd = -1;
+ uint64_t size = UINT64_MAX;
+
+ /* A block device */
+
+ if (!ret)
+ return 1;
+
+ if (!pretty)
+ pretty = filename;
+
+ block_fd = openat(dfd, filename, O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_NOCTTY);
+ if (block_fd < 0)
+ log_debug_errno(errno, "Failed to open block device %s/%s, ignoring: %m", path, filename);
+ else {
+ if (fstat(block_fd, &st) < 0)
+ return -errno;
+ if (!S_ISBLK(st.st_mode)) /* Verify that what we opened is actually what we think it is */
+ return -ENOTTY;
+
+ if (!read_only) {
+ int state = 0;
+
+ if (ioctl(block_fd, BLKROGET, &state) < 0)
+ log_debug_errno(errno, "Failed to issue BLKROGET on device %s/%s, ignoring: %m", path, filename);
+ else if (state)
+ read_only = true;
+ }
+
+ if (ioctl(block_fd, BLKGETSIZE64, &size) < 0)
+ log_debug_errno(errno, "Failed to issue BLKFLSBUF on device %s/%s, ignoring: %m", path, filename);
+
+ block_fd = safe_close(block_fd);
+ }
+
+ r = image_new(IMAGE_BLOCK,
+ pretty,
+ path,
+ filename,
+ !(st.st_mode & 0222) || read_only,
+ 0,
+ 0,
+ ret);
+ if (r < 0)
+ return r;
+
+ if (size != 0 && size != UINT64_MAX)
+ (*ret)->usage = (*ret)->usage_exclusive = (*ret)->limit = (*ret)->limit_exclusive = size;
+
+ return 1;
}
return 0;
}
r = image_make(NULL, dirfd(d), path, name, ret);
- if (r == 0 || r == -ENOENT) {
+ if (IN_SET(r, 0, -ENOENT)) {
_cleanup_free_ char *raw = NULL;
raw = strappend(name, ".raw");
return -ENOMEM;
r = image_make(NULL, dirfd(d), path, raw, ret);
- if (r == 0 || r == -ENOENT)
+ if (IN_SET(r, 0, -ENOENT))
continue;
}
if (r < 0)
continue;
r = image_make(NULL, dirfd(d), path, de->d_name, &image);
- if (r == 0 || r == -ENOENT)
+ if (IN_SET(r, 0, -ENOENT))
continue;
if (r < 0)
return r;
int image_remove(Image *i) {
_cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT;
_cleanup_strv_free_ char **settings = NULL;
+ _cleanup_free_ char *roothash = NULL;
char **j;
int r;
if (!settings)
return -ENOMEM;
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
/* Make sure we don't interfere with a running nspawn */
r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
if (r < 0)
switch (i->type) {
case IMAGE_SUBVOLUME:
- r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
- if (r < 0)
- return r;
+
+ /* Let's unlink first, maybe it is a symlink? If that works we are happy. Otherwise, let's get out the
+ * big guns */
+ if (unlink(i->path) < 0) {
+ r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA);
+ if (r < 0)
+ return r;
+ }
+
break;
case IMAGE_DIRECTORY:
break;
+ case IMAGE_BLOCK:
+
+ /* If this is inside of /dev, then it's a real block device, hence let's not touch the device node
+ * itself (but let's remove the stuff stored alongside it). If it's anywhere else, let's try to unlink
+ * the thing (it's most likely a symlink after all). */
+
+ if (path_startswith(i->path, "/dev"))
+ break;
+
+ /* fallthrough */
+
case IMAGE_RAW:
if (unlink(i->path) < 0)
return -errno;
log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", *j);
}
+ if (unlink(roothash) < 0 && errno != ENOENT)
+ log_debug_errno(errno, "Failed to unlink %s, ignoring: %m", roothash);
+
return 0;
}
-static int rename_settings_file(const char *path, const char *new_name) {
+static int rename_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
_cleanup_free_ char *rs = NULL;
const char *fn;
- fn = strjoina(new_name, ".nspawn");
+ fn = strjoina(new_name, suffix);
rs = file_in_same_dir(path, fn);
if (!rs)
int image_rename(Image *i, const char *new_name) {
_cleanup_release_lock_file_ LockFile global_lock = LOCK_FILE_INIT, local_lock = LOCK_FILE_INIT, name_lock = LOCK_FILE_INIT;
- _cleanup_free_ char *new_path = NULL, *nn = NULL;
+ _cleanup_free_ char *new_path = NULL, *nn = NULL, *roothash = NULL;
_cleanup_strv_free_ char **settings = NULL;
unsigned file_attr = 0;
char **j;
if (!settings)
return -ENOMEM;
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
/* Make sure we don't interfere with a running nspawn */
r = image_path_lock(i->path, LOCK_EX|LOCK_NB, &global_lock, &local_lock);
if (r < 0)
new_path = file_in_same_dir(i->path, new_name);
break;
+ case IMAGE_BLOCK:
+
+ /* Refuse renaming raw block devices in /dev, the names are picked by udev after all. */
+ if (path_startswith(i->path, "/dev"))
+ return -EROFS;
+
+ new_path = file_in_same_dir(i->path, new_name);
+ break;
+
case IMAGE_RAW: {
const char *fn;
nn = NULL;
STRV_FOREACH(j, settings) {
- r = rename_settings_file(*j, new_name);
+ r = rename_auxiliary_file(*j, new_name, ".nspawn");
if (r < 0 && r != -ENOENT)
log_debug_errno(r, "Failed to rename settings file %s, ignoring: %m", *j);
}
+ r = rename_auxiliary_file(roothash, new_name, ".roothash");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to rename roothash file %s, ignoring: %m", roothash);
+
return 0;
}
-static int clone_settings_file(const char *path, const char *new_name) {
+static int clone_auxiliary_file(const char *path, const char *new_name, const char *suffix) {
_cleanup_free_ char *rs = NULL;
const char *fn;
- fn = strjoina(new_name, ".nspawn");
+ fn = strjoina(new_name, suffix);
rs = file_in_same_dir(path, fn);
if (!rs)
return -ENOMEM;
- return copy_file_atomic(path, rs, 0664, false, 0);
+ return copy_file_atomic(path, rs, 0664, 0, COPY_REFLINK);
}
int image_clone(Image *i, const char *new_name, bool read_only) {
_cleanup_release_lock_file_ LockFile name_lock = LOCK_FILE_INIT;
_cleanup_strv_free_ char **settings = NULL;
+ _cleanup_free_ char *roothash = NULL;
const char *new_path;
char **j;
int r;
if (!settings)
return -ENOMEM;
+ roothash = image_roothash_path(i);
+ if (!roothash)
+ return -ENOMEM;
+
/* Make sure nobody takes the new name, between the time we
* checked it is currently unused in all search paths, and the
* time we take possession of it */
case IMAGE_SUBVOLUME:
case IMAGE_DIRECTORY:
/* If we can we'll always try to create a new btrfs subvolume here, even if the source is a plain
- * directory.*/
+ * directory. */
new_path = strjoina("/var/lib/machines/", new_name);
- r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA);
- if (r == -EOPNOTSUPP) {
- /* No btrfs snapshots supported, create a normal directory then. */
-
- r = copy_directory(i->path, new_path, false);
- if (r >= 0)
- (void) chattr_path(new_path, read_only ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL);
- } else if (r >= 0)
+ r = btrfs_subvol_snapshot(i->path, new_path,
+ (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) |
+ BTRFS_SNAPSHOT_FALLBACK_COPY |
+ BTRFS_SNAPSHOT_FALLBACK_DIRECTORY |
+ BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE |
+ BTRFS_SNAPSHOT_RECURSIVE |
+ BTRFS_SNAPSHOT_QUOTA);
+ if (r >= 0)
/* Enable "subtree" quotas for the copy, if we didn't copy any quota from the source. */
(void) btrfs_subvol_auto_qgroup(new_path, 0, true);
case IMAGE_RAW:
new_path = strjoina("/var/lib/machines/", new_name, ".raw");
- r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, false, FS_NOCOW_FL);
+ r = copy_file_atomic(i->path, new_path, read_only ? 0444 : 0644, FS_NOCOW_FL, COPY_REFLINK);
break;
+ case IMAGE_BLOCK:
default:
return -EOPNOTSUPP;
}
return r;
STRV_FOREACH(j, settings) {
- r = clone_settings_file(*j, new_name);
+ r = clone_auxiliary_file(*j, new_name, ".nspawn");
if (r < 0 && r != -ENOENT)
log_debug_errno(r, "Failed to clone settings %s, ignoring: %m", *j);
}
+ r = clone_auxiliary_file(roothash, new_name, ".roothash");
+ if (r < 0 && r != -ENOENT)
+ log_debug_errno(r, "Failed to clone root hash file %s, ignoring: %m", roothash);
+
return 0;
}
use the "immutable" flag, to at least make the
top-level directory read-only. It's not as good as
a read-only subvolume, but at least something, and
- we can read the value back.*/
+ we can read the value back. */
r = chattr_path(i->path, b ? FS_IMMUTABLE_FL : 0, FS_IMMUTABLE_FL);
if (r < 0)
break;
}
+ case IMAGE_BLOCK: {
+ _cleanup_close_ int fd = -1;
+ struct stat st;
+ int state = b;
+
+ fd = open(i->path, O_CLOEXEC|O_RDONLY|O_NONBLOCK|O_NOCTTY);
+ if (fd < 0)
+ return -errno;
+
+ if (fstat(fd, &st) < 0)
+ return -errno;
+ if (!S_ISBLK(st.st_mode))
+ return -ENOTTY;
+
+ if (ioctl(fd, BLKROSET, &state) < 0)
+ return -errno;
+
+ break;
+ }
+
default:
return -EOPNOTSUPP;
}
* uses the device/inode number. This has the benefit that we
* can even lock a tree that is a mount point, correctly. */
- if (path_equal(path, "/"))
- return -EBUSY;
-
if (!path_is_absolute(path))
return -EINVAL;
+ if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+ *local = *global = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
+ if (path_equal(path, "/"))
+ return -EBUSY;
+
if (stat(path, &st) >= 0) {
- if (asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino) < 0)
+ if (S_ISBLK(st.st_mode))
+ r = asprintf(&p, "/run/systemd/nspawn/locks/block-%u:%u", major(st.st_rdev), minor(st.st_rdev));
+ else if (S_ISDIR(st.st_mode) || S_ISREG(st.st_mode))
+ r = asprintf(&p, "/run/systemd/nspawn/locks/inode-%lu:%lu", (unsigned long) st.st_dev, (unsigned long) st.st_ino);
+ else
+ return -ENOTTY;
+
+ if (r < 0)
return -ENOMEM;
}
- r = make_lock_file_for(path, operation, &t);
- if (r < 0)
- return r;
+ /* For block devices we don't need the "local" lock, as the major/minor lock above should be sufficient, since
+ * block devices are device local anyway. */
+ if (!path_startswith(path, "/dev")) {
+ r = make_lock_file_for(path, operation, &t);
+ if (r < 0)
+ return r;
+ }
if (p) {
mkdir_p("/run/systemd/nspawn/locks", 0700);
release_lock_file(&t);
return r;
}
- }
+ } else
+ *global = (LockFile) LOCK_FILE_INIT;
*local = t;
return 0;
if (!image_name_is_valid(name))
return -EINVAL;
+ if (getenv_bool("SYSTEMD_NSPAWN_LOCK") == 0) {
+ *ret = (LockFile) LOCK_FILE_INIT;
+ return 0;
+ }
+
if (streq(name, ".host"))
return -EBUSY;
[IMAGE_DIRECTORY] = "directory",
[IMAGE_SUBVOLUME] = "subvolume",
[IMAGE_RAW] = "raw",
+ [IMAGE_BLOCK] = "block",
};
DEFINE_STRING_TABLE_LOOKUP(image_type, ImageType);