/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <fcntl.h>
+#include <linux/magic.h>
#include <sys/file.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
PROGRESS_LOADING_DEFINITIONS,
PROGRESS_LOADING_TABLE,
PROGRESS_OPENING_COPY_BLOCK_SOURCES,
+ PROGRESS_OPENING_BLOCK_DEVICE_REPLACE_SOURCES,
PROGRESS_ACQUIRING_PARTITION_LABELS,
PROGRESS_MINIMIZING,
PROGRESS_PLACING,
PROGRESS_ADJUSTING_PARTITION,
PROGRESS_WRITING_TABLE,
PROGRESS_REREADING_TABLE,
+ PROGRESS_REPLACING_DEVICE,
_PROGRESS_PHASE_MAX,
_PROGRESS_PHASE_INVALID = -EINVAL,
} ProgressPhase;
typedef struct Context Context;
+typedef struct {
+ uint64_t devid;
+ int mountpoint_fd;
+ int source_fd;
+ char *source_path;
+ uint64_t source_size;
+ bool done;
+} BtrfsReplacement;
+
+static BtrfsReplacement* btrfs_replacement_free(BtrfsReplacement *b) {
+ if (!b)
+ return NULL;
+
+ safe_close(b->mountpoint_fd);
+ safe_close(b->source_fd);
+ free(b->source_path);
+ return mfree(b);
+}
+
+DEFINE_TRIVIAL_CLEANUP_FUNC(BtrfsReplacement*, btrfs_replacement_free);
+
typedef struct Partition {
Context *context;
uint64_t copy_blocks_done;
char *format;
+ char *block_device_replace;
+ BtrfsReplacement *btrfs_replaced;
char **exclude_files_source;
char **exclude_files_target;
char **make_directories;
};
static const char *progress_phase_table[_PROGRESS_PHASE_MAX] = {
- [PROGRESS_LOADING_DEFINITIONS] = "loading-definitions",
- [PROGRESS_LOADING_TABLE] = "loading-table",
- [PROGRESS_OPENING_COPY_BLOCK_SOURCES] = "opening-copy-block-sources",
- [PROGRESS_ACQUIRING_PARTITION_LABELS] = "acquiring-partition-labels",
- [PROGRESS_MINIMIZING] = "minimizing",
- [PROGRESS_PLACING] = "placing",
- [PROGRESS_WIPING_DISK] = "wiping-disk",
- [PROGRESS_WIPING_PARTITION] = "wiping-partition",
- [PROGRESS_COPYING_PARTITION] = "copying-partition",
- [PROGRESS_FORMATTING_PARTITION] = "formatting-partition",
- [PROGRESS_ADJUSTING_PARTITION] = "adjusting-partition",
- [PROGRESS_WRITING_TABLE] = "writing-table",
- [PROGRESS_REREADING_TABLE] = "rereading-table",
+ [PROGRESS_LOADING_DEFINITIONS] = "loading-definitions",
+ [PROGRESS_LOADING_TABLE] = "loading-table",
+ [PROGRESS_OPENING_COPY_BLOCK_SOURCES] = "opening-copy-block-sources",
+ [PROGRESS_OPENING_BLOCK_DEVICE_REPLACE_SOURCES] = "opening-block-device-replace-sources",
+ [PROGRESS_ACQUIRING_PARTITION_LABELS] = "acquiring-partition-labels",
+ [PROGRESS_MINIMIZING] = "minimizing",
+ [PROGRESS_PLACING] = "placing",
+ [PROGRESS_WIPING_DISK] = "wiping-disk",
+ [PROGRESS_WIPING_PARTITION] = "wiping-partition",
+ [PROGRESS_COPYING_PARTITION] = "copying-partition",
+ [PROGRESS_FORMATTING_PARTITION] = "formatting-partition",
+ [PROGRESS_ADJUSTING_PARTITION] = "adjusting-partition",
+ [PROGRESS_WRITING_TABLE] = "writing-table",
+ [PROGRESS_REREADING_TABLE] = "rereading-table",
+ [PROGRESS_REPLACING_DEVICE] = "replacing-device",
};
static uint64_t determine_grain_size(uint64_t sector_size) {
safe_close(p->copy_blocks_fd);
free(p->format);
+ free(p->block_device_replace);
+ btrfs_replacement_free(p->btrfs_replaced);
strv_free(p->exclude_files_source);
strv_free(p->exclude_files_target);
strv_free(p->make_directories);
p->copy_blocks_root = NULL;
p->format = mfree(p->format);
+ p->block_device_replace = mfree(p->block_device_replace);
+ p->btrfs_replaced = btrfs_replacement_free(p->btrfs_replaced);
p->exclude_files_source = strv_free(p->exclude_files_source);
p->exclude_files_target = strv_free(p->exclude_files_target);
p->make_directories = strv_free(p->make_directories);
if (p->copy_blocks_size != UINT64_MAX)
assert_se(INC_SAFE(&d, round_up_size(p->copy_blocks_size, context->grain_size)));
+ else if (p->btrfs_replaced)
+ assert_se(INC_SAFE(&d, round_up_size(p->btrfs_replaced->source_size, context->grain_size)));
else if (p->format || p->encrypt != ENCRYPT_OFF) {
uint64_t f;
{ "Partition", "AddValidateFS", config_parse_tristate, 0, &p->add_validatefs },
{ "Partition", "FileSystemSectorSize", config_parse_fs_sector_size, 0, &p->fs_sector_size },
{ "Partition", "Discard", config_parse_tristate, 0, &p->discard },
+ { "Partition", "BlockDeviceReplace", config_parse_path, 0, &p->block_device_replace },
{}
};
_cleanup_free_ char *filename = NULL;
return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
"Format=/CopyFiles=/MakeDirectories=/MakeSymlinks= and CopyBlocks= cannot be combined, refusing.");
+ if (p->block_device_replace && (p->format || partition_needs_populate(p)))
+ return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
+ "Format=/CopyFiles=/MakeDirectories=/MakeSymlinks= and BlockDeviceReplace= cannot be combined, refusing.");
+
+ if ((p->copy_blocks_path || p->copy_blocks_auto) && p->block_device_replace)
+ return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
+ "CopyBlocks= and BlockDeviceReplace= cannot be combined, refusing.");
+
+ if (p->block_device_replace && arg_offline == 1)
+ return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
+ "BlockDeviceReplace= is incompatible with --offline=yes, refusing.");
+
if (partition_needs_populate(p) && streq_ptr(p->format, "swap"))
return log_syntax(NULL, LOG_ERR, path, 1, SYNTHETIC_ERRNO(EINVAL),
"Format=swap and CopyFiles=/MakeDirectories=/MakeSymlinks= cannot be combined, refusing.");
if (p->type.designator == PARTITION_SWAP)
format = "swap";
- else if (partition_needs_populate(p) || (p->encrypt != ENCRYPT_OFF && !(p->copy_blocks_path || p->copy_blocks_auto)))
+ else if (partition_needs_populate(p) || (p->encrypt != ENCRYPT_OFF && !(p->copy_blocks_path || p->copy_blocks_auto || p->block_device_replace)))
/* Pick "vfat" as file system for esp and xbootldr partitions, otherwise default to "ext4". */
format = IN_SET(p->type.designator, PARTITION_ESP, PARTITION_XBOOTLDR) ? "vfat" : "ext4";
return 0;
}
+static int context_block_device_replace(Context *context) {
+ int r;
+
+ assert(context);
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ _cleanup_(partition_target_freep) PartitionTarget *t = NULL;
+
+ if (p->dropped)
+ continue;
+
+ if (PARTITION_EXISTS(p))
+ continue;
+
+ if (!p->btrfs_replaced)
+ continue;
+
+ if (partition_defer(context, p))
+ continue;
+
+ assert(!p->btrfs_replaced->done);
+
+ (void) context_notify(context, PROGRESS_REPLACING_DEVICE, p->definition_path, UINT_MAX);
+
+ assert(p->offset != UINT64_MAX);
+ assert(p->new_size != UINT64_MAX);
+
+ r = partition_target_prepare(context, p,
+ p->new_size,
+ /* need_path= */ true,
+ &t);
+ if (r < 0)
+ return r;
+
+ if (p->encrypt != ENCRYPT_OFF) {
+ r = partition_encrypt(context, p, t, /* offline= */ false);
+ if (r < 0)
+ return log_error_errno(r, "Failed to encrypt device: %m");
+ }
+
+ log_info("Replacing partition %" PRIu64 ".", p->partno);
+
+ /* btrfs_replace calls a synchronous ioctl and will return when replace is finished */
+ r = btrfs_replace(p->btrfs_replaced->mountpoint_fd, p->btrfs_replaced->devid, partition_target_path(t));
+ if (r < 0)
+ return log_error_errno(r, "Failed to replace btrfs device on partition %" PRIu64 ": %m", p->partno);
+
+ p->btrfs_replaced->done = true;
+
+ if (t->decrypted)
+ t->decrypted->keep = true;
+
+ log_info("Successfully replaced partition %" PRIu64 ".", p->partno);
+ }
+
+ return 0;
+}
+
+static void context_btrfs_replace_resize(Context *context) {
+ int r;
+
+ assert(context);
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ if (!p->btrfs_replaced)
+ continue;
+
+ if (!p->btrfs_replaced->done)
+ continue;
+
+ r = btrfs_resize_max(p->btrfs_replaced->mountpoint_fd, p->btrfs_replaced->devid);
+ if (r < 0)
+ log_warning_errno(r, "Could not resize btrfs filesystem moved to partition %" PRIu64 ", proceeding without resizing: %m", p->partno);
+ else
+ log_info("Successfully resized partition %" PRIu64 ".", p->partno);
+ }
+}
+
+static void context_btrfs_replace_back(Context *context) {
+ int r;
+
+ assert(context);
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ if (!p->btrfs_replaced)
+ continue;
+
+ if (!p->btrfs_replaced->done)
+ continue;
+
+ r = btrfs_replace(p->btrfs_replaced->mountpoint_fd, p->btrfs_replaced->devid, p->btrfs_replaced->source_path);
+ if (r < 0)
+ log_warning_errno(r, "Could not move back btrfs filesystem from partition %" PRIu64 ", leaving it on new device: %m", p->partno);
+ }
+}
+
static int context_mkfs(Context *context) {
int r;
if (r < 0)
goto error;
+ /* We are now moving destructively btrfs filesystems into the disk before we have written the
+ * partitions. This is OK because the main use case is that the btrfs filesystems moved are initially
+ * volatile (in ram disk for example) with little data to save. But we do not want to finish the gpt
+ * table in case we lose power and reboot and try to boot that incomplete disk.
+ */
+ r = context_block_device_replace(context);
+ if (r < 0)
+ goto error;
+
r = context_mangle_partitions(context);
if (r < 0)
goto error;
goto error;
}
+ context_btrfs_replace_resize(context);
+
r = context_partscan(context);
if (r < 0)
return r;
return 0;
error:
+ context_btrfs_replace_back(context);
+
if (context->needs_rescan)
(void) context_partscan(context);
assert(context);
- if (!context->partitions)
- return 0;
-
LIST_FOREACH(partitions, p, context->partitions) {
_cleanup_close_ int source_fd = -EBADF;
_cleanup_free_ char *opened = NULL;
return 0;
}
+static int context_open_btrfs_filesystems(Context *context) {
+ int r;
+
+ assert(context);
+
+ LIST_FOREACH(partitions, p, context->partitions) {
+ _cleanup_(btrfs_replacement_freep) BtrfsReplacement *replacement = NULL;
+
+ if (p->dropped)
+ continue;
+
+ if (PARTITION_EXISTS(p))
+ continue;
+
+ if (!p->block_device_replace)
+ continue;
+
+ if (partition_defer(context, p))
+ continue;
+
+ (void) context_notify(context, PROGRESS_OPENING_BLOCK_DEVICE_REPLACE_SOURCES, p->definition_path, UINT_MAX);
+
+ replacement = new(BtrfsReplacement, 1);
+ if (!replacement)
+ return log_oom();
+
+ *replacement = (BtrfsReplacement) {
+ .mountpoint_fd = -EBADF,
+ .source_fd = -EBADF,
+ };
+
+ replacement->mountpoint_fd = xopenat(AT_FDCWD, p->block_device_replace, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
+ if (replacement->mountpoint_fd < 0)
+ return log_error_errno(replacement->mountpoint_fd, "Failed to open mountpoint %s for btrfs filesystem: %m", p->block_device_replace);
+
+ r = fd_is_fs_type(replacement->mountpoint_fd, BTRFS_SUPER_MAGIC);
+ if (r < 0)
+ return log_error_errno(r, "Failed to check filesystem for mountpoint %s: %m", p->block_device_replace);
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Mountpoint %s is not a btrfs filesystem", p->block_device_replace);
+
+ r = btrfs_get_block_device_at_full(replacement->mountpoint_fd, "", &replacement->devid, &replacement->source_path, /* ret= */ NULL);
+ if (r < 0)
+ return log_error_errno(r, "Failed to find device id for btrfs filesystem: %m");
+ if (r == 0)
+ return log_error_errno(SYNTHETIC_ERRNO(EOPNOTSUPP), "Btrfs filesystem has multiple devices.");
+
+ /* We need to keep the source device open otherwise, it might be collected. */
+ replacement->source_fd = open(replacement->source_path, O_RDONLY|O_CLOEXEC);
+ if (replacement->source_fd < 0)
+ return log_error_errno(errno, "Failed to open source device %s: %m", replacement->source_path);
+
+ r = fd_verify_block(replacement->source_fd);
+ if (r < 0)
+ return log_error_errno(r, "Device %s is not a block device: %m", replacement->source_path);
+
+ r = blockdev_get_device_size(replacement->source_fd, &replacement->source_size);
+ if (r < 0)
+ return log_error_errno(r, "Failed to get device size %s: %m", replacement->source_path);
+
+ p->btrfs_replaced = TAKE_PTR(replacement);
+ }
+
+ return 0;
+}
+
static int fd_apparent_size(int fd, uint64_t *ret) {
off_t initial = 0;
uint64_t size = 0;
if (r < 0)
return r;
+ r = context_open_btrfs_filesystems(context);
+ if (r < 0)
+ return r;
+
r = context_acquire_partition_uuids_and_labels(context);
if (r < 0)
return r;
if (r < 0)
return r;
+ r = context_open_btrfs_filesystems(context);
+ if (r < 0)
+ return r;
+
/* Make sure each partition has a unique UUID and unique label */
r = context_acquire_partition_uuids_and_labels(context);
if (r < 0)
#include "rm-rf.h"
#include "sparse-endian.h"
#include "stat-util.h"
+#include "stdio-util.h"
#include "string-util.h"
#include "time-util.h"
return !!(flags & BTRFS_SUBVOL_RDONLY);
}
-int btrfs_get_block_device_at(int dir_fd, const char *path, dev_t *ret) {
+int btrfs_get_block_device_at_full(int dir_fd, const char *path, uint64_t *ret_devid, char **ret_path, dev_t *ret) {
struct btrfs_ioctl_fs_info_args fsi = {};
_cleanup_close_ int fd = -EBADF;
uint64_t id;
int r;
+ /*
+ * Returns:
+ * ret_devid - the device id in the filesystem for the returned block device
+ * ret_path - the path to the returned block device
+ * ret - the returned block device
+ */
+
assert(dir_fd >= 0 || IN_SET(dir_fd, AT_FDCWD, XAT_FDROOT));
- assert(ret);
fd = xopenat(dir_fd, path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
if (fd < 0)
/* We won't do this for btrfs RAID */
if (fsi.num_devices != 1) {
- *ret = 0;
+ if (ret_devid)
+ *ret_devid = 0;
+ if (ret_path)
+ *ret_path = NULL;
+ if (ret)
+ *ret = 0;
return 0;
}
.devid = id,
};
struct stat st;
+ _cleanup_free_ char *device_path = NULL;
if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
if (errno == ENODEV)
if (major(st.st_rdev) == 0)
return -ENODEV;
- *ret = st.st_rdev;
+ device_path = strdup((char*) di.path);
+ if (!device_path)
+ return -ENOMEM;
+
+ if (ret_path)
+ *ret_path = TAKE_PTR(device_path);
+ if (ret)
+ *ret = st.st_rdev;
+ if (ret_devid)
+ *ret_devid = id;
return 1;
}
return -ENODATA;
}
+
+int btrfs_replace(int fdmntpnt, uint64_t device_id, const char *target) {
+ struct btrfs_ioctl_dev_replace_args replace = {
+ .cmd = BTRFS_IOCTL_DEV_REPLACE_CMD_START,
+ .result = UINT64_MAX,
+ .start = {
+ .srcdevid = device_id,
+ .cont_reading_from_srcdev_mode = BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS,
+ },
+ };
+
+ assert(fdmntpnt >= 0);
+ assert(target);
+
+ if (strlen(target) >= sizeof(replace.start.tgtdev_name))
+ return log_debug_errno(SYNTHETIC_ERRNO(EINVAL), "Path to the btrfs replace target is too long");
+ strncpy((char *)replace.start.tgtdev_name, target, sizeof(replace.start.tgtdev_name));
+
+ if (ioctl(fdmntpnt, BTRFS_IOC_DEV_REPLACE, &replace) < 0)
+ return -errno;
+
+ switch (replace.result) {
+ case BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR:
+ break;
+ case BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED:
+ return log_debug_errno(SYNTHETIC_ERRNO(ECANCELED), "btrfs replace was not started");
+ case BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED:
+ return log_debug_errno(SYNTHETIC_ERRNO(EALREADY), "btrfs replace was already started on this device");
+ case BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS:
+ return log_debug_errno(SYNTHETIC_ERRNO(EBUSY), "btrfs scrub is in progress");
+ default:
+ return log_debug_errno(SYNTHETIC_ERRNO(EIO), "An unknown btrfs error status occurred");
+ }
+
+ return 0;
+}
+
+int btrfs_resize_max(int fdmntpnt, uint64_t devid) {
+ struct btrfs_ioctl_vol_args args = {};
+
+ assert(fdmntpnt >= 0);
+
+ assert_cc(STRLEN(":max") + DECIMAL_STR_MAX(uint64_t) + 1 <= sizeof(args.name));
+ xsprintf(args.name, "%" PRIu64 ":max", devid);
+
+ return RET_NERRNO(ioctl(fdmntpnt, BTRFS_IOC_RESIZE, &args));
+}