From 5bcd08db289cd02aad8a89b37b2a46244a7bd473 Mon Sep 17 00:00:00 2001 From: Lennart Poettering Date: Wed, 21 Oct 2015 19:38:21 +0200 Subject: [PATCH] btrfs: beef-up btrfs support with a limited understanding of quota With this change we understand more than just leaf quota groups for btrfs file systems. Specifically: - When we create a subvolume we can now optionally add the new subvolume to all qgroups its parent subvolume was member of too. Alternatively it is also possible to insert an intermediary quota group between the parent's qgroups and the subvolume's leaf qgroup, which is useful for a concept of "subtree" qgroups, that contain a subvolume and all its children. - The remove logic for subvolumes has been updated to optionally remove any leaf qgroups or "subtree" qgroups, following the logic above. - The snapshot logic for subvolumes has been updated to replicate the original qgroup setup of the source, if it follows the "subtree" design described above. It will not cover qgroup setups that introduce arbitrary qgroups, especially those orthogonal to the subvolume hierarchy. This also tries to be more graceful when setting up /var/lib/machines as btrfs. For example, if mkfs.btrfs is missing we don't even try to set it up as loopback device. Fixes #1559 Fixes #1129 --- src/basic/btrfs-util.c | 905 +++++++++++++++++++++++++++++++++--- src/basic/btrfs-util.h | 73 ++- src/basic/missing.h | 4 + src/basic/path-util.c | 23 +- src/basic/path-util.h | 1 + src/basic/rm-rf.c | 4 +- src/import/export-tar.c | 4 +- src/import/pull-common.c | 2 +- src/import/pull-dkr.c | 14 +- src/machine/machined-dbus.c | 8 +- src/nspawn/nspawn.c | 6 +- src/shared/machine-image.c | 41 +- src/shared/machine-pool.c | 45 +- src/test/test-btrfs.c | 68 ++- 14 files changed, 1077 insertions(+), 121 deletions(-) diff --git a/src/basic/btrfs-util.c b/src/basic/btrfs-util.c index ec7e00986b4..f327c16a808 100644 --- a/src/basic/btrfs-util.c +++ b/src/basic/btrfs-util.c @@ -436,7 +436,7 @@ static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args #define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \ ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header))) -int btrfs_subvol_get_info_fd(int fd, BtrfsSubvolInfo *ret) { +int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) { struct btrfs_ioctl_search_args args = { /* Tree of tree roots */ .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, @@ -453,16 +453,23 @@ int btrfs_subvol_get_info_fd(int fd, BtrfsSubvolInfo *ret) { .key.max_transid = (uint64_t) -1, }; - uint64_t subvol_id; bool found = false; int r; assert(fd >= 0); assert(ret); - r = btrfs_subvol_get_id_fd(fd, &subvol_id); - if (r < 0) - return r; + if (subvol_id == 0) { + r = btrfs_subvol_get_id_fd(fd, &subvol_id); + if (r < 0) + return r; + } else { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + } args.key.min_objectid = args.key.max_objectid = subvol_id; @@ -521,7 +528,7 @@ finish: return 0; } -int btrfs_subvol_get_quota_fd(int fd, BtrfsQuotaInfo *ret) { +int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) { struct btrfs_ioctl_search_args args = { /* Tree of quota items */ @@ -540,18 +547,25 @@ int btrfs_subvol_get_quota_fd(int fd, BtrfsQuotaInfo *ret) { .key.max_transid = (uint64_t) -1, }; - uint64_t subvol_id; bool found_info = false, found_limit = false; int r; assert(fd >= 0); assert(ret); - r = btrfs_subvol_get_id_fd(fd, &subvol_id); - if (r < 0) - return r; + if (qgroupid == 0) { + r = btrfs_subvol_get_id_fd(fd, &qgroupid); + if (r < 0) + return r; + } else { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + } - args.key.min_offset = args.key.max_offset = subvol_id; + args.key.min_offset = args.key.max_offset = qgroupid; while (btrfs_ioctl_search_args_compare(&args) <= 0) { const struct btrfs_ioctl_search_header *sh; @@ -571,7 +585,7 @@ int btrfs_subvol_get_quota_fd(int fd, BtrfsQuotaInfo *ret) { if (sh->objectid != 0) continue; - if (sh->offset != subvol_id) + if (sh->offset != qgroupid) continue; if (sh->type == BTRFS_QGROUP_INFO_KEY) { @@ -585,12 +599,14 @@ int btrfs_subvol_get_quota_fd(int fd, BtrfsQuotaInfo *ret) { } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) { const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh); - ret->referenced_max = le64toh(qli->max_rfer); - ret->exclusive_max = le64toh(qli->max_excl); - - if (ret->referenced_max == 0) + if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER) + ret->referenced_max = le64toh(qli->max_rfer); + else ret->referenced_max = (uint64_t) -1; - if (ret->exclusive_max == 0) + + if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL) + ret->exclusive_max = le64toh(qli->max_excl); + else ret->exclusive_max = (uint64_t) -1; found_limit = true; @@ -622,6 +638,109 @@ finish: return 0; } +int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret); +} + +int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) { + uint64_t level, lowest = (uint64_t) -1, lowest_qgroupid = 0; + _cleanup_free_ uint64_t *qgroups = NULL; + int r, n, i; + + assert(fd >= 0); + assert(ret); + + /* This finds the "subtree" qgroup for a specific + * subvolume. This only works for subvolumes that have been + * prepared with btrfs_subvol_auto_qgroup_fd() with + * insert_intermediary_qgroup=true (or equivalent). For others + * it will return the leaf qgroup instead. The two cases may + * be distuingished via the return value, which is 1 in case + * an appropriate "subtree" qgroup was found, and 0 + * otherwise. */ + + if (subvol_id == 0) { + r = btrfs_subvol_get_id_fd(fd, &subvol_id); + if (r < 0) + return r; + } + + r = btrfs_qgroupid_split(subvol_id, &level, NULL); + if (r < 0) + return r; + if (level != 0) /* Input must be a leaf qgroup */ + return -EINVAL; + + n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups); + if (n < 0) + return n; + + for (i = 0; i < n; i++) { + uint64_t id; + + r = btrfs_qgroupid_split(qgroups[i], &level, &id); + if (r < 0) + return r; + + if (id != subvol_id) + continue; + + if (lowest == (uint64_t) -1 || level < lowest) { + lowest_qgroupid = qgroups[i]; + lowest = level; + } + } + + if (lowest == (uint64_t) -1) { + /* No suitable higher-level qgroup found, let's return + * the leaf qgroup instead, and indicate that with the + * return value. */ + + *ret = subvol_id; + return 0; + } + + *ret = lowest_qgroupid; + return 1; +} + +int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) { + uint64_t qgroupid; + int r; + + assert(fd >= 0); + assert(ret); + + /* This determines the quota data of the qgroup with the + * lowest level, that shares the id part with the specified + * subvolume. This is useful for determining the quota data + * for entire subvolume subtrees, as long as the subtrees have + * been set up with btrfs_qgroup_subvol_auto_fd() or in a + * compatible way */ + + r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid); + if (r < 0) + return r; + + return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret); +} + +int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret); +} + int btrfs_defrag_fd(int fd) { struct stat st; @@ -679,37 +798,79 @@ int btrfs_quota_enable(const char *path, bool b) { return btrfs_quota_enable_fd(fd, b); } -int btrfs_quota_limit_fd(int fd, uint64_t referenced_max) { +int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) { + struct btrfs_ioctl_qgroup_limit_args args = { - .lim.max_rfer = - referenced_max == (uint64_t) -1 ? 0 : - referenced_max == 0 ? 1 : referenced_max, + .lim.max_rfer = referenced_max, .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER, }; + unsigned c; int r; assert(fd >= 0); - r = btrfs_is_filesystem(fd); - if (r < 0) - return r; - if (!r) - return -ENOTTY; + if (qgroupid == 0) { + r = btrfs_subvol_get_id_fd(fd, &qgroupid); + if (r < 0) + return r; + } else { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + } - if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) - return -errno; + args.qgroupid = qgroupid; + + for (c = 0;; c++) { + if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) { + + if (errno == EBUSY && c < 10) { + (void) btrfs_quota_scan_wait(fd); + continue; + } + + return -errno; + } + + break; + } return 0; } -int btrfs_quota_limit(const char *path, uint64_t referenced_max) { +int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); + if (fd < 0) + return -errno; + + return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max); +} + +int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) { + uint64_t qgroupid; + int r; + + assert(fd >= 0); + + r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid); + if (r < 0) + return r; + + return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max); +} + +int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) { _cleanup_close_ int fd = -1; fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW); if (fd < 0) return -errno; - return btrfs_quota_limit_fd(fd, referenced_max); + return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max); } int btrfs_resize_loopback_fd(int fd, uint64_t new_size, bool grow_only) { @@ -799,7 +960,7 @@ int btrfs_resize_loopback(const char *p, uint64_t new_size, bool grow_only) { return btrfs_resize_loopback_fd(fd, new_size, grow_only); } -static int make_qgroup_id(uint64_t level, uint64_t id, uint64_t *ret) { +int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) { assert(ret); if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT))) @@ -812,33 +973,175 @@ static int make_qgroup_id(uint64_t level, uint64_t id, uint64_t *ret) { return 0; } -static int qgroup_create_or_destroy(int fd, bool b, uint64_t level, uint64_t id) { +int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) { + assert(level || id); + + if (level) + *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT; + + if (id) + *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1); + + return 0; +} + +static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) { struct btrfs_ioctl_qgroup_create_args args = { .create = b, + .qgroupid = qgroupid, }; - + unsigned c; int r; - r = make_qgroup_id(level, id, (uint64_t*) &args.qgroupid); + r = btrfs_is_filesystem(fd); if (r < 0) return r; + if (r == 0) + return -ENOTTY; + + for (c = 0;; c++) { + if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) { + + if (errno == EBUSY && c < 10) { + (void) btrfs_quota_scan_wait(fd); + continue; + } + + return -errno; + } - if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) + break; + } + + return 0; +} + +int btrfs_qgroup_create(int fd, uint64_t qgroupid) { + return qgroup_create_or_destroy(fd, true, qgroupid); +} + +int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) { + return qgroup_create_or_destroy(fd, false, qgroupid); +} + +int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) { + _cleanup_free_ uint64_t *qgroups = NULL; + uint64_t subvol_id; + int i, n, r; + + /* Destroys the specified qgroup, but unassigns it from all + * its parents first. Also, it recursively destroys all + * qgroups it is assgined to that have the same id part of the + * qgroupid as the specified group. */ + + r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id); + if (r < 0) + return r; + + n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups); + if (n < 0) + return n; + + for (i = 0; i < n; i++) { + uint64_t id; + + r = btrfs_qgroupid_split(qgroups[i], NULL, &id); + if (r < 0) + return r; + + r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]); + if (r < 0) + return r; + + if (id != subvol_id) + continue; + + /* The parent qgroupid shares the same id part with + * us? If so, destroy it too. */ + + (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]); + } + + return btrfs_qgroup_destroy(fd, qgroupid); +} + +int btrfs_quota_scan_start(int fd) { + struct btrfs_ioctl_quota_rescan_args args = {}; + + assert(fd >= 0); + + if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0) return -errno; return 0; } -int btrfs_qgroup_create(int fd, uint64_t level, uint64_t id) { - return qgroup_create_or_destroy(fd, true, level, id); +int btrfs_quota_scan_wait(int fd) { + assert(fd >= 0); + + if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0) + return -errno; + + return 0; } -int btrfs_qgroup_destroy(int fd, uint64_t level, uint64_t id) { - return qgroup_create_or_destroy(fd, false, level, id); +int btrfs_quota_scan_ongoing(int fd) { + struct btrfs_ioctl_quota_rescan_args args = {}; + + assert(fd >= 0); + + if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0) + return -errno; + + return !!args.flags; } -static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, bool recursive) { +static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) { + struct btrfs_ioctl_qgroup_assign_args args = { + .assign = b, + .src = child, + .dst = parent, + }; + unsigned c; + int r; + + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (r == 0) + return -ENOTTY; + + for (c = 0;; c++) { + r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args); + if (r < 0) { + if (errno == EBUSY && c < 10) { + (void) btrfs_quota_scan_wait(fd); + continue; + } + + return -errno; + } + + if (r == 0) + return 0; + + /* If the return value is > 0, we need to request a rescan */ + + (void) btrfs_quota_scan_start(fd); + return 1; + } +} + +int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) { + return qgroup_assign_or_unassign(fd, true, child, parent); +} + +int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) { + return qgroup_assign_or_unassign(fd, false, child, parent); +} + +static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) { struct btrfs_ioctl_search_args args = { .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, @@ -881,10 +1184,10 @@ static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol * already empty, this will just work. */ strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1); if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) { - (void) btrfs_qgroup_destroy(fd, 0, subvol_id); + (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */ return 0; } - if (!recursive || errno != ENOTEMPTY) + if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY) return -errno; /* OK, the subvolume is not empty, let's look for child @@ -939,7 +1242,7 @@ static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol if (isempty(ino_args.name)) /* Subvolume is in the top-level * directory of the subvolume. */ - r = subvol_remove_children(subvol_fd, p, sh->objectid, recursive); + r = subvol_remove_children(subvol_fd, p, sh->objectid, flags); else { _cleanup_close_ int child_fd = -1; @@ -951,7 +1254,7 @@ static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol if (child_fd < 0) return -errno; - r = subvol_remove_children(child_fd, p, sh->objectid, recursive); + r = subvol_remove_children(child_fd, p, sh->objectid, flags); } if (r < 0) return r; @@ -967,11 +1270,11 @@ static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0) return -errno; - (void) btrfs_qgroup_destroy(fd, 0, subvol_id); + (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); return 0; } -int btrfs_subvol_remove(const char *path, bool recursive) { +int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) { _cleanup_close_ int fd = -1; const char *subvolume; int r; @@ -986,11 +1289,194 @@ int btrfs_subvol_remove(const char *path, bool recursive) { if (fd < 0) return fd; - return subvol_remove_children(fd, subvolume, 0, recursive); + return subvol_remove_children(fd, subvolume, 0, flags); +} + +int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) { + return subvol_remove_children(fd, subvolume, 0, flags); +} + +int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) { + + struct btrfs_ioctl_search_args args = { + /* Tree of quota items */ + .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID, + + /* The object ID is always 0 */ + .key.min_objectid = 0, + .key.max_objectid = 0, + + /* Look precisely for the quota items */ + .key.min_type = BTRFS_QGROUP_LIMIT_KEY, + .key.max_type = BTRFS_QGROUP_LIMIT_KEY, + + /* For our qgroup */ + .key.min_offset = old_qgroupid, + .key.max_offset = old_qgroupid, + + /* No restrictions on the other components */ + .key.min_transid = 0, + .key.max_transid = (uint64_t) -1, + }; + + int r; + + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + + while (btrfs_ioctl_search_args_compare(&args) <= 0) { + const struct btrfs_ioctl_search_header *sh; + unsigned i; + + args.key.nr_items = 256; + if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + return -errno; + + if (args.key.nr_items <= 0) + break; + + FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) { + const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh); + struct btrfs_ioctl_qgroup_limit_args qargs; + unsigned c; + + /* Make sure we start the next search at least from this entry */ + btrfs_ioctl_search_args_set(&args, sh); + + if (sh->objectid != 0) + continue; + if (sh->type != BTRFS_QGROUP_LIMIT_KEY) + continue; + if (sh->offset != old_qgroupid) + continue; + + /* We found the entry, now copy things over. */ + + qargs = (struct btrfs_ioctl_qgroup_limit_args) { + .qgroupid = new_qgroupid, + + .lim.max_rfer = le64toh(qli->max_rfer), + .lim.max_excl = le64toh(qli->max_excl), + .lim.rsv_rfer = le64toh(qli->rsv_rfer), + .lim.rsv_excl = le64toh(qli->rsv_excl), + + .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER| + BTRFS_QGROUP_LIMIT_MAX_EXCL| + BTRFS_QGROUP_LIMIT_RSV_RFER| + BTRFS_QGROUP_LIMIT_RSV_EXCL), + }; + + for (c = 0;; c++) { + if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) { + if (errno == EBUSY && c < 10) { + (void) btrfs_quota_scan_wait(fd); + continue; + } + return -errno; + } + + break; + } + + return 1; + } + + /* Increase search key by one, to read the next item, if we can. */ + if (!btrfs_ioctl_search_args_inc(&args)) + break; + } + + return 0; } -int btrfs_subvol_remove_fd(int fd, const char *subvolume, bool recursive) { - return subvol_remove_children(fd, subvolume, 0, recursive); +static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) { + _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL; + bool copy_from_parent = false, insert_intermediary_qgroup = false; + int n_old_qgroups, n_old_parent_qgroups, r, i; + uint64_t old_parent_id; + + assert(fd >= 0); + + /* Copies a reduced form of quota information from the old to + * the new subvolume. */ + + n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups); + if (n_old_qgroups <= 0) /* Nothing to copy */ + return n_old_qgroups; + + r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id); + if (r < 0) + return r; + + n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups); + if (n_old_parent_qgroups < 0) + return n_old_parent_qgroups; + + for (i = 0; i < n_old_qgroups; i++) { + uint64_t id; + int j; + + r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id); + if (r < 0) + return r; + + if (id == old_subvol_id) { + /* The old subvolume was member of a qgroup + * that had the same id, but a different level + * as it self. Let's set up something similar + * in the destination. */ + insert_intermediary_qgroup = true; + break; + } + + for (j = 0; j < n_old_parent_qgroups; j++) + if (old_parent_qgroups[j] == old_qgroups[i]) { + /* The old subvolume shared a common + * parent qgroup with its parent + * subvolume. Let's set up something + * similar in the destination. */ + copy_from_parent = true; + } + } + + if (!insert_intermediary_qgroup && !copy_from_parent) + return 0; + + return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup); +} + +static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) { + uint64_t old_subtree_qgroup, new_subtree_qgroup; + bool changed; + int r; + + /* First copy the leaf limits */ + r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol); + if (r < 0) + return r; + changed = r > 0; + + /* Then, try to copy the subtree limits, if there are any. */ + r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup); + if (r < 0) + return r; + if (r == 0) + return changed; + + r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup); + if (r < 0) + return r; + if (r == 0) + return changed; + + r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup); + if (r != 0) + return r; + + return changed; } static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolume, uint64_t old_subvol_id, BtrfsSnapshotFlags flags) { @@ -1021,12 +1507,12 @@ static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolum assert(subvolume); strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1); - vol_args.fd = old_fd; if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0) return -errno; - if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) + if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) && + !(flags & BTRFS_SNAPSHOT_QUOTA)) return 0; if (old_subvol_id == 0) { @@ -1039,6 +1525,17 @@ static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolum if (r < 0) return r; + if (flags & BTRFS_SNAPSHOT_QUOTA) + (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id); + + if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) { + + if (flags & BTRFS_SNAPSHOT_QUOTA) + (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id); + + return 0; + } + args.key.min_offset = args.key.max_offset = old_subvol_id; while (btrfs_ioctl_search_args_compare(&args) <= 0) { @@ -1156,6 +1653,9 @@ static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolum break; } + if (flags & BTRFS_SNAPSHOT_QUOTA) + (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id); + return 0; } @@ -1180,14 +1680,14 @@ int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, BtrfsSnapshotFlag r = copy_directory_fd(old_fd, new_path, true); if (r < 0) { - btrfs_subvol_remove(new_path, false); + (void) btrfs_subvol_remove(new_path, BTRFS_REMOVE_QUOTA); return r; } if (flags & BTRFS_SNAPSHOT_READ_ONLY) { r = btrfs_subvol_set_read_only(new_path, true); if (r < 0) { - btrfs_subvol_remove(new_path, false); + (void) btrfs_subvol_remove(new_path, BTRFS_REMOVE_QUOTA); return r; } } @@ -1218,3 +1718,302 @@ int btrfs_subvol_snapshot(const char *old_path, const char *new_path, BtrfsSnaps return btrfs_subvol_snapshot_fd(old_fd, new_path, flags); } + +int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) { + + struct btrfs_ioctl_search_args args = { + /* Tree of quota items */ + .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID, + + /* Look precisely for the quota relation items */ + .key.min_type = BTRFS_QGROUP_RELATION_KEY, + .key.max_type = BTRFS_QGROUP_RELATION_KEY, + + /* No restrictions on the other components */ + .key.min_offset = 0, + .key.max_offset = (uint64_t) -1, + + .key.min_transid = 0, + .key.max_transid = (uint64_t) -1, + }; + + _cleanup_free_ uint64_t *items = NULL; + size_t n_items = 0, n_allocated = 0; + int r; + + assert(fd >= 0); + assert(ret); + + if (qgroupid == 0) { + r = btrfs_subvol_get_id_fd(fd, &qgroupid); + if (r < 0) + return r; + } else { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + } + + args.key.min_objectid = args.key.max_objectid = qgroupid; + + while (btrfs_ioctl_search_args_compare(&args) <= 0) { + const struct btrfs_ioctl_search_header *sh; + unsigned i; + + args.key.nr_items = 256; + if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + return -errno; + + if (args.key.nr_items <= 0) + break; + + FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) { + + /* Make sure we start the next search at least from this entry */ + btrfs_ioctl_search_args_set(&args, sh); + + if (sh->type != BTRFS_QGROUP_RELATION_KEY) + continue; + if (sh->offset < sh->objectid) + continue; + if (sh->objectid != qgroupid) + continue; + + if (!GREEDY_REALLOC(items, n_allocated, n_items+1)) + return -ENOMEM; + + items[n_items++] = sh->offset; + } + + /* Increase search key by one, to read the next item, if we can. */ + if (!btrfs_ioctl_search_args_inc(&args)) + break; + } + + if (n_items <= 0) { + *ret = NULL; + return 0; + } + + *ret = items; + items = NULL; + + return (int) n_items; +} + +int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) { + _cleanup_free_ uint64_t *qgroups = NULL; + uint64_t parent_subvol; + bool changed = false; + int n = 0, r; + + assert(fd >= 0); + + /* + * Sets up the specified subvolume's qgroup automatically in + * one of two ways: + * + * If insert_intermediary_qgroup is false, the subvolume's + * leaf qgroup will be assigned to the same parent qgroups as + * the subvolume's parent subvolume. + * + * If insert_intermediary_qgroup is true a new intermediary + * higher-level qgroup is created, with a higher level number, + * but reusing the id of the subvolume. The level number is + * picked as one smaller than the lowest level qgroup the + * parent subvolume is a member of. If the parent subvolume's + * leaf qgroup is assigned to no higher-level qgroup a new + * qgroup of level 255 is created instead. Either way, the new + * qgroup is then assigned to the parent's higher-level + * qgroup, and the subvolume itself is assigned to it. + * + * If the subvolume is already assigned to a higher level + * qgroup, no operation is executed. + * + * Effectively this means: regardless if + * insert_intermediary_qgroup is true or not, after this + * function is invoked the subvolume will be accounted within + * the same qgroups as the parent. However, if it is true, it + * will also get its own higher-level qgroup, which may in + * turn be used by subvolumes created beneath this subvolume + * later on. + * + * This hence defines a simple default qgroup setup for + * subvolumes, as long as this function is invoked on each + * created subvolume: each subvolume is always accounting + * together with its immediate parents. Optionally, if + * insert_intermediary_qgroup is true, it will also get a + * qgroup that then includes all its own child subvolumes. + */ + + if (subvol_id == 0) { + r = btrfs_is_subvol(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + + r = btrfs_subvol_get_id_fd(fd, &subvol_id); + if (r < 0) + return r; + } + + n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups); + if (n < 0) + return n; + if (n > 0) /* already parent qgroups set up, let's bail */ + return 0; + + r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol); + if (r < 0) + return r; + + qgroups = mfree(qgroups); + n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups); + if (n < 0) + return n; + + if (insert_intermediary_qgroup) { + uint64_t lowest = 256, new_qgroupid; + bool created = false; + int i; + + /* Determine the lowest qgroup that the parent + * subvolume is assigned to. */ + + for (i = 0; i < n; i++) { + uint64_t level; + + r = btrfs_qgroupid_split(qgroups[i], &level, NULL); + if (r < 0) + return r; + + if (level < lowest) + lowest = level; + } + + if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */ + return -EBUSY; + + r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid); + if (r < 0) + return r; + + /* Create the new intermediary group, unless it already exists */ + r = btrfs_qgroup_create(fd, new_qgroupid); + if (r < 0 && r != -EEXIST) + return r; + if (r >= 0) + changed = created = true; + + for (i = 0; i < n; i++) { + r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]); + if (r < 0 && r != -EEXIST) { + if (created) + (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid); + + return r; + } + if (r >= 0) + changed = true; + } + + r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid); + if (r < 0 && r != -EEXIST) { + if (created) + (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid); + return r; + } + if (r >= 0) + changed = true; + + } else { + int i; + + /* Assign our subvolume to all the same qgroups as the parent */ + + for (i = 0; i < n; i++) { + r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]); + if (r < 0 && r != -EEXIST) + return r; + if (r >= 0) + changed = true; + } + } + + return changed; +} + +int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) { + _cleanup_close_ int fd = -1; + + fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY); + if (fd < 0) + return -errno; + + return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup); +} + +int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) { + + struct btrfs_ioctl_search_args args = { + /* Tree of tree roots */ + .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, + + /* Look precisely for the subvolume items */ + .key.min_type = BTRFS_ROOT_BACKREF_KEY, + .key.max_type = BTRFS_ROOT_BACKREF_KEY, + + /* No restrictions on the other components */ + .key.min_offset = 0, + .key.max_offset = (uint64_t) -1, + + .key.min_transid = 0, + .key.max_transid = (uint64_t) -1, + }; + int r; + + assert(fd >= 0); + assert(ret); + + if (subvol_id == 0) { + r = btrfs_subvol_get_id_fd(fd, &subvol_id); + if (r < 0) + return r; + } else { + r = btrfs_is_filesystem(fd); + if (r < 0) + return r; + if (!r) + return -ENOTTY; + } + + args.key.min_objectid = args.key.max_objectid = subvol_id; + + while (btrfs_ioctl_search_args_compare(&args) <= 0) { + const struct btrfs_ioctl_search_header *sh; + unsigned i; + + args.key.nr_items = 256; + if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + return -errno; + + if (args.key.nr_items <= 0) + break; + + FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) { + + if (sh->type != BTRFS_ROOT_BACKREF_KEY) + continue; + if (sh->objectid != subvol_id) + continue; + + *ret = sh->offset; + return 0; + } + } + + return -ENXIO; +} diff --git a/src/basic/btrfs-util.h b/src/basic/btrfs-util.h index ad7c7009abb..fc9efd72d52 100644 --- a/src/basic/btrfs-util.h +++ b/src/basic/btrfs-util.h @@ -47,45 +47,82 @@ typedef enum BtrfsSnapshotFlags { BTRFS_SNAPSHOT_FALLBACK_COPY = 1, BTRFS_SNAPSHOT_READ_ONLY = 2, BTRFS_SNAPSHOT_RECURSIVE = 4, + BTRFS_SNAPSHOT_QUOTA = 8, } BtrfsSnapshotFlags; +typedef enum BtrfsRemoveFlags { + BTRFS_REMOVE_RECURSIVE = 1, + BTRFS_REMOVE_QUOTA = 2, +} BtrfsRemoveFlags; + int btrfs_is_filesystem(int fd); int btrfs_is_subvol(int fd); +int btrfs_reflink(int infd, int outfd); +int btrfs_clone_range(int infd, uint64_t in_offset, int ofd, uint64_t out_offset, uint64_t sz); + +int btrfs_get_block_device_fd(int fd, dev_t *dev); +int btrfs_get_block_device(const char *path, dev_t *dev); + +int btrfs_defrag_fd(int fd); +int btrfs_defrag(const char *p); + +int btrfs_quota_enable_fd(int fd, bool b); +int btrfs_quota_enable(const char *path, bool b); + +int btrfs_quota_scan_start(int fd); +int btrfs_quota_scan_wait(int fd); +int btrfs_quota_scan_ongoing(int fd); + +int btrfs_resize_loopback_fd(int fd, uint64_t size, bool grow_only); +int btrfs_resize_loopback(const char *path, uint64_t size, bool grow_only); + int btrfs_subvol_make(const char *path); int btrfs_subvol_make_label(const char *path); int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, BtrfsSnapshotFlags flags); int btrfs_subvol_snapshot(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags); +int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags); +int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags); + int btrfs_subvol_set_read_only_fd(int fd, bool b); int btrfs_subvol_set_read_only(const char *path, bool b); int btrfs_subvol_get_read_only_fd(int fd); + int btrfs_subvol_get_id(int fd, const char *subvolume, uint64_t *ret); int btrfs_subvol_get_id_fd(int fd, uint64_t *ret); -int btrfs_subvol_get_info_fd(int fd, BtrfsSubvolInfo *info); -int btrfs_subvol_get_quota_fd(int fd, BtrfsQuotaInfo *quota); +int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret); -int btrfs_reflink(int infd, int outfd); -int btrfs_clone_range(int infd, uint64_t in_offset, int ofd, uint64_t out_offset, uint64_t sz); +int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *info); -int btrfs_get_block_device_fd(int fd, dev_t *dev); -int btrfs_get_block_device(const char *path, dev_t *dev); +int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret); -int btrfs_defrag_fd(int fd); -int btrfs_defrag(const char *p); +int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *quota); +int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *quota); -int btrfs_quota_enable_fd(int fd, bool b); -int btrfs_quota_enable(const char *path, bool b); +int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max); +int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max); -int btrfs_quota_limit_fd(int fd, uint64_t referenced_max); -int btrfs_quota_limit(const char *path, uint64_t referenced_max); +int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool new_qgroup); +int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup); -int btrfs_resize_loopback_fd(int fd, uint64_t size, bool grow_only); -int btrfs_resize_loopback(const char *path, uint64_t size, bool grow_only); +int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret); +int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id); + +int btrfs_qgroup_create(int fd, uint64_t qgroupid); +int btrfs_qgroup_destroy(int fd, uint64_t qgroupid); +int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid); + +int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max); +int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max); + +int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid); + +int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent); +int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent); -int btrfs_subvol_remove(const char *path, bool recursive); -int btrfs_subvol_remove_fd(int fd, const char *subvolume, bool recursive); +int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret); -int btrfs_qgroup_create(int fd, uint64_t level, uint64_t id); -int btrfs_qgroup_destroy(int fd, uint64_t level, uint64_t id); +int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *quota); +int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *quota); diff --git a/src/basic/missing.h b/src/basic/missing.h index 70d6c8308ee..306c56a1562 100644 --- a/src/basic/missing.h +++ b/src/basic/missing.h @@ -494,6 +494,10 @@ struct btrfs_ioctl_quota_ctl_args { #define BTRFS_QGROUP_LIMIT_KEY 244 #endif +#ifndef BTRFS_QGROUP_RELATION_KEY +#define BTRFS_QGROUP_RELATION_KEY 246 +#endif + #ifndef BTRFS_ROOT_BACKREF_KEY #define BTRFS_ROOT_BACKREF_KEY 144 #endif diff --git a/src/basic/path-util.c b/src/basic/path-util.c index 5cbfc145a48..10396233059 100644 --- a/src/basic/path-util.c +++ b/src/basic/path-util.c @@ -796,14 +796,11 @@ bool paths_check_timestamp(const char* const* paths, usec_t *timestamp, bool upd return changed; } -int fsck_exists(const char *fstype) { +static int binary_is_good(const char *binary) { _cleanup_free_ char *p = NULL, *d = NULL; - const char *checker; int r; - checker = strjoina("fsck.", fstype); - - r = find_binary(checker, true, &p); + r = find_binary(binary, true, &p); if (r < 0) return r; @@ -820,6 +817,22 @@ int fsck_exists(const char *fstype) { return 0; } +int fsck_exists(const char *fstype) { + const char *checker; + + checker = strjoina("fsck.", fstype); + + return binary_is_good(checker); +} + +int mkfs_exists(const char *fstype) { + const char *mkfs; + + mkfs = strjoina("mkfs.", fstype); + + return binary_is_good(mkfs); +} + char *prefix_root(const char *root, const char *path) { char *n, *p; size_t l; diff --git a/src/basic/path-util.h b/src/basic/path-util.h index 1eac89c51b6..71e25f1e577 100644 --- a/src/basic/path-util.h +++ b/src/basic/path-util.h @@ -63,6 +63,7 @@ int find_binary(const char *name, bool local, char **filename); bool paths_check_timestamp(const char* const* paths, usec_t *paths_ts_usec, bool update); int fsck_exists(const char *fstype); +int mkfs_exists(const char *fstype); /* Iterates through the path prefixes of the specified path, going up * the tree, to root. Also returns "" (and not "/"!) for the root diff --git a/src/basic/rm-rf.c b/src/basic/rm-rf.c index dbbe8176843..2ef63799d75 100644 --- a/src/basic/rm-rf.c +++ b/src/basic/rm-rf.c @@ -120,7 +120,7 @@ int rm_rf_children(int fd, RemoveFlags flags, struct stat *root_dev) { /* This could be a subvolume, try to remove it */ - r = btrfs_subvol_remove_fd(fd, de->d_name, true); + r = btrfs_subvol_remove_fd(fd, de->d_name, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); if (r < 0) { if (r != -ENOTTY && r != -EINVAL) { if (ret == 0) @@ -178,7 +178,7 @@ int rm_rf(const char *path, RemoveFlags flags) { if ((flags & (REMOVE_SUBVOLUME|REMOVE_ROOT|REMOVE_PHYSICAL)) == (REMOVE_SUBVOLUME|REMOVE_ROOT|REMOVE_PHYSICAL)) { /* Try to remove as subvolume first */ - r = btrfs_subvol_remove(path, true); + r = btrfs_subvol_remove(path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); if (r >= 0) return r; diff --git a/src/import/export-tar.c b/src/import/export-tar.c index 43fa9d1b038..a623745f5f2 100644 --- a/src/import/export-tar.c +++ b/src/import/export-tar.c @@ -78,7 +78,7 @@ TarExport *tar_export_unref(TarExport *e) { } if (e->temp_path) { - (void) btrfs_subvol_remove(e->temp_path, false); + (void) btrfs_subvol_remove(e->temp_path, BTRFS_REMOVE_QUOTA); free(e->temp_path); } @@ -283,7 +283,7 @@ int tar_export_start(TarExport *e, const char *path, int fd, ImportCompressType if (e->st.st_ino == 256) { /* might be a btrfs subvolume? */ BtrfsQuotaInfo q; - r = btrfs_subvol_get_quota_fd(sfd, &q); + r = btrfs_subvol_get_subtree_quota_fd(sfd, 0, &q); if (r >= 0) e->quota_referenced = q.referenced; diff --git a/src/import/pull-common.c b/src/import/pull-common.c index 1ddb48e03fd..90d28f04d9c 100644 --- a/src/import/pull-common.c +++ b/src/import/pull-common.c @@ -138,7 +138,7 @@ int pull_make_local_copy(const char *final, const char *image_root, const char * if (force_local) (void) rm_rf(p, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME); - r = btrfs_subvol_snapshot(final, p, 0); + r = btrfs_subvol_snapshot(final, p, BTRFS_SNAPSHOT_QUOTA); if (r == -ENOTTY) { r = copy_tree(final, p, false); if (r < 0) diff --git a/src/import/pull-dkr.c b/src/import/pull-dkr.c index 0dab184af12..b77f3e47f12 100644 --- a/src/import/pull-dkr.c +++ b/src/import/pull-dkr.c @@ -490,10 +490,16 @@ static int dkr_pull_make_local_copy(DkrPull *i, DkrPullVersion version) { return r; if (version == DKR_PULL_V2) { - char **k = NULL; + char **k; + STRV_FOREACH(k, i->ancestry) { - _cleanup_free_ char *d = strjoin(i->image_root, "/.dkr-", *k, NULL); - r = btrfs_subvol_remove(d, false); + _cleanup_free_ char *d; + + d = strjoin(i->image_root, "/.dkr-", *k, NULL); + if (!d) + return -ENOMEM; + + r = btrfs_subvol_remove(d, BTRFS_REMOVE_QUOTA); if (r < 0) return r; } @@ -531,7 +537,7 @@ static int dkr_pull_job_on_open_disk(PullJob *j) { const char *base_path; base_path = strjoina(i->image_root, "/.dkr-", base); - r = btrfs_subvol_snapshot(base_path, i->temp_path, BTRFS_SNAPSHOT_FALLBACK_COPY); + r = btrfs_subvol_snapshot(base_path, i->temp_path, BTRFS_SNAPSHOT_FALLBACK_COPY|BTRFS_SNAPSHOT_QUOTA); } else r = btrfs_subvol_make(i->temp_path); if (r < 0) diff --git a/src/machine/machined-dbus.c b/src/machine/machined-dbus.c index 41bb106d285..6e4c72e8a9d 100644 --- a/src/machine/machined-dbus.c +++ b/src/machine/machined-dbus.c @@ -79,7 +79,7 @@ static int property_get_pool_usage( if (fd >= 0) { BtrfsQuotaInfo q; - if (btrfs_subvol_get_quota_fd(fd, &q) >= 0) + if (btrfs_subvol_get_subtree_quota_fd(fd, 0, &q) >= 0) usage = q.referenced; } @@ -115,7 +115,7 @@ static int property_get_pool_limit( if (fd >= 0) { BtrfsQuotaInfo q; - if (btrfs_subvol_get_quota_fd(fd, &q) >= 0) + if (btrfs_subvol_get_subtree_quota_fd(fd, 0, &q) >= 0) size = q.referenced_max; } @@ -831,7 +831,9 @@ static int method_set_pool_limit(sd_bus_message *message, void *userdata, sd_bus if (r < 0 && r != -ENODEV) /* ignore ENODEV, as that's what is returned if the file system is not on loopback */ return sd_bus_error_set_errnof(error, r, "Failed to adjust loopback limit: %m"); - r = btrfs_quota_limit("/var/lib/machines", limit); + (void) btrfs_qgroup_set_limit("/var/lib/machines", 0, limit); + + r = btrfs_subvol_set_subtree_quota_limit("/var/lib/machines", 0, limit); if (r == -ENOTTY) return sd_bus_error_setf(error, SD_BUS_ERROR_NOT_SUPPORTED, "Quota is only supported on btrfs."); if (r < 0) diff --git a/src/nspawn/nspawn.c b/src/nspawn/nspawn.c index fca2b72edd4..ef042d518bb 100644 --- a/src/nspawn/nspawn.c +++ b/src/nspawn/nspawn.c @@ -3096,7 +3096,7 @@ int main(int argc, char *argv[]) { goto finish; } - r = btrfs_subvol_snapshot(arg_directory, np, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE); + r = btrfs_subvol_snapshot(arg_directory, np, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA); if (r < 0) { log_error_errno(r, "Failed to create snapshot %s from %s: %m", np, arg_directory); goto finish; @@ -3120,7 +3120,7 @@ int main(int argc, char *argv[]) { } if (arg_template) { - r = btrfs_subvol_snapshot(arg_template, arg_directory, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE); + r = btrfs_subvol_snapshot(arg_template, arg_directory, (arg_read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA); if (r == -EEXIST) { if (!arg_quiet) log_info("Directory %s already exists, not populating from template %s.", arg_directory, arg_template); @@ -3591,7 +3591,7 @@ finish: if (remove_subvol && arg_directory) { int k; - k = btrfs_subvol_remove(arg_directory, true); + k = btrfs_subvol_remove(arg_directory, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); if (k < 0) log_warning_errno(k, "Cannot remove subvolume '%s', ignoring: %m", arg_directory); } diff --git a/src/shared/machine-image.c b/src/shared/machine-image.c index 9c1e4d5e13e..8ed3ad7f44a 100644 --- a/src/shared/machine-image.c +++ b/src/shared/machine-image.c @@ -176,11 +176,10 @@ static int image_make( return r; if (r) { BtrfsSubvolInfo info; - BtrfsQuotaInfo quota; /* It's a btrfs subvolume */ - r = btrfs_subvol_get_info_fd(fd, &info); + r = btrfs_subvol_get_info_fd(fd, 0, &info); if (r < 0) return r; @@ -195,13 +194,17 @@ static int image_make( if (r < 0) return r; - r = btrfs_subvol_get_quota_fd(fd, "a); - if (r >= 0) { - (*ret)->usage = quota.referenced; - (*ret)->usage_exclusive = quota.exclusive; + if (btrfs_quota_scan_ongoing(fd) == 0) { + BtrfsQuotaInfo quota; - (*ret)->limit = quota.referenced_max; - (*ret)->limit_exclusive = quota.exclusive_max; + r = btrfs_subvol_get_subtree_quota_fd(fd, 0, "a); + if (r >= 0) { + (*ret)->usage = quota.referenced; + (*ret)->usage_exclusive = quota.exclusive; + + (*ret)->limit = quota.referenced_max; + (*ret)->limit_exclusive = quota.exclusive_max; + } } return 1; @@ -397,7 +400,7 @@ int image_remove(Image *i) { switch (i->type) { case IMAGE_SUBVOLUME: - r = btrfs_subvol_remove(i->path, true); + r = btrfs_subvol_remove(i->path, BTRFS_REMOVE_RECURSIVE|BTRFS_REMOVE_QUOTA); if (r < 0) return r; break; @@ -587,7 +590,12 @@ int image_clone(Image *i, const char *new_name, bool read_only) { case IMAGE_DIRECTORY: new_path = strjoina("/var/lib/machines/", new_name); - r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE); + r = btrfs_subvol_snapshot(i->path, new_path, (read_only ? BTRFS_SNAPSHOT_READ_ONLY : 0) | BTRFS_SNAPSHOT_FALLBACK_COPY | BTRFS_SNAPSHOT_RECURSIVE | BTRFS_SNAPSHOT_QUOTA); + + /* Enable "subtree" quotas for the copy, if we didn't + * copy any quota from the source. */ + (void) btrfs_subvol_auto_qgroup(i->path, 0, true); + break; case IMAGE_RAW: @@ -629,6 +637,10 @@ int image_read_only(Image *i, bool b) { switch (i->type) { case IMAGE_SUBVOLUME: + + /* Note that we set the flag only on the top-level + * subvolume of the image. */ + r = btrfs_subvol_set_read_only(i->path, b); if (r < 0) return r; @@ -729,7 +741,14 @@ int image_set_limit(Image *i, uint64_t referenced_max) { if (i->type != IMAGE_SUBVOLUME) return -EOPNOTSUPP; - return btrfs_quota_limit(i->path, referenced_max); + /* We set the quota both for the subvolume as well as for the + * subtree. The latter is mostly for historical reasons, since + * we didn't use to have a concept of subtree quota, and hence + * only modified the subvolume quota. */ + + (void) btrfs_qgroup_set_limit(i->path, 0, referenced_max); + (void) btrfs_subvol_auto_qgroup(i->path, 0, true); + return btrfs_subvol_set_subtree_quota_limit(i->path, 0, referenced_max); } int image_name_lock(const char *name, int operation, LockFile *ret) { diff --git a/src/shared/machine-pool.c b/src/shared/machine-pool.c index 8af78f47d5e..1da7d0815f4 100644 --- a/src/shared/machine-pool.c +++ b/src/shared/machine-pool.c @@ -170,7 +170,7 @@ int setup_machine_directory(uint64_t size, sd_bus_error *error) { }; _cleanup_close_ int fd = -1, control = -1, loop = -1; _cleanup_free_ char* loopdev = NULL; - char tmpdir[] = "/tmp/import-mount.XXXXXX", *mntdir = NULL; + char tmpdir[] = "/tmp/machine-pool.XXXXXX", *mntdir = NULL; bool tmpdir_made = false, mntdir_made = false, mntdir_mounted = false; char buf[FORMAT_BYTES_MAX]; int r, nr = -1; @@ -194,14 +194,35 @@ int setup_machine_directory(uint64_t size, sd_bus_error *error) { r = btrfs_quota_enable("/var/lib/machines", true); if (r < 0) - log_warning_errno(r, "Failed to enable quota, ignoring: %m"); + log_warning_errno(r, "Failed to enable quota for /var/lib/machines, ignoring: %m"); + r = btrfs_subvol_auto_qgroup("/var/lib/machines", 0, true); + if (r < 0) + log_warning_errno(r, "Failed to set up default quota hierarchy for /var/lib/machines, ignoring: %m"); + + return 1; + } + + if (path_is_mount_point("/var/lib/machines", AT_SYMLINK_FOLLOW) > 0) { + log_debug("/var/lib/machines is already a mount point, not creating loopback file for it."); return 0; } - if (path_is_mount_point("/var/lib/machines", AT_SYMLINK_FOLLOW) > 0 || - dir_is_empty("/var/lib/machines") == 0) - return sd_bus_error_setf(error, SD_BUS_ERROR_INVALID_ARGS, "/var/lib/machines is not a btrfs file system. Operation is not supported on legacy file systems."); + r = dir_is_populated("/var/lib/machines"); + if (r < 0 && r != -ENOENT) + return r; + if (r > 0) { + log_debug("/var/log/machines is already populated, not creating loopback file for it."); + return 0; + } + + r = mkfs_exists("btrfs"); + if (r == -ENOENT) { + log_debug("mkfs.btrfs is missing, cannot create loopback file for /var/lib/machines."); + return 0; + } + if (r < 0) + return r; fd = setup_machine_raw(size, error); if (fd < 0) @@ -266,6 +287,10 @@ int setup_machine_directory(uint64_t size, sd_bus_error *error) { if (r < 0) log_warning_errno(r, "Failed to enable quota, ignoring: %m"); + r = btrfs_subvol_auto_qgroup(mntdir, 0, true); + if (r < 0) + log_warning_errno(r, "Failed to set up default quota hierarchy, ignoring: %m"); + if (chmod(mntdir, 0700) < 0) { r = sd_bus_error_set_errnof(error, errno, "Failed to fix owner: %m"); goto fail; @@ -286,7 +311,7 @@ int setup_machine_directory(uint64_t size, sd_bus_error *error) { (void) rmdir(mntdir); (void) rmdir(tmpdir); - return 0; + return 1; fail: if (mntdir_mounted) @@ -370,9 +395,11 @@ int grow_machine_directory(void) { if (r <= 0) return r; - r = btrfs_quota_limit("/var/lib/machines", new_size); - if (r < 0) - return r; + /* Also bump the quota, of both the subvolume leaf qgroup, as + * well as of any subtree quota group by the same id but a + * higher level, if it exists. */ + (void) btrfs_qgroup_set_limit("/var/lib/machines", 0, new_size); + (void) btrfs_subvol_set_subtree_quota_limit("/var/lib/machines", 0, new_size); log_info("Grew /var/lib/machines btrfs loopback file system to %s.", format_bytes(buf, sizeof(buf), new_size)); return 1; diff --git a/src/test/test-btrfs.c b/src/test/test-btrfs.c index e4771c9dd73..60d1258a9b4 100644 --- a/src/test/test-btrfs.c +++ b/src/test/test-btrfs.c @@ -27,17 +27,17 @@ #include "btrfs-util.h" int main(int argc, char *argv[]) { + BtrfsQuotaInfo quota; int r, fd; fd = open("/", O_RDONLY|O_CLOEXEC|O_DIRECTORY); if (fd < 0) log_error_errno(errno, "Failed to open root directory: %m"); else { - BtrfsSubvolInfo info; - BtrfsQuotaInfo quota; char ts[FORMAT_TIMESTAMP_MAX], bs[FORMAT_BYTES_MAX]; + BtrfsSubvolInfo info; - r = btrfs_subvol_get_info_fd(fd, &info); + r = btrfs_subvol_get_info_fd(fd, 0, &info); if (r < 0) log_error_errno(r, "Failed to get subvolume info: %m"); else { @@ -45,7 +45,7 @@ int main(int argc, char *argv[]) { log_info("read-only (search): %s", yes_no(info.read_only)); } - r = btrfs_subvol_get_quota_fd(fd, "a); + r = btrfs_qgroup_get_quota_fd(fd, 0, "a); if (r < 0) log_error_errno(r, "Failed to get quota info: %m"); else { @@ -80,15 +80,15 @@ int main(int argc, char *argv[]) { if (r < 0) log_error_errno(r, "Failed to make snapshot: %m"); - r = btrfs_subvol_remove("/xxxtest", false); + r = btrfs_subvol_remove("/xxxtest", BTRFS_REMOVE_QUOTA); if (r < 0) log_error_errno(r, "Failed to remove subvolume: %m"); - r = btrfs_subvol_remove("/xxxtest2", false); + r = btrfs_subvol_remove("/xxxtest2", BTRFS_REMOVE_QUOTA); if (r < 0) log_error_errno(r, "Failed to remove subvolume: %m"); - r = btrfs_subvol_remove("/xxxtest3", false); + r = btrfs_subvol_remove("/xxxtest3", BTRFS_REMOVE_QUOTA); if (r < 0) log_error_errno(r, "Failed to remove subvolume: %m"); @@ -96,7 +96,7 @@ int main(int argc, char *argv[]) { if (r < 0) log_error_errno(r, "Failed to make snapshot: %m"); - r = btrfs_subvol_remove("/etc2", false); + r = btrfs_subvol_remove("/etc2", BTRFS_REMOVE_QUOTA); if (r < 0) log_error_errno(r, "Failed to remove subvolume: %m"); @@ -137,13 +137,61 @@ int main(int argc, char *argv[]) { if (r < 0) log_error_errno(r, "Failed to snapshot subvolume: %m"); - r = btrfs_subvol_remove("/xxxrectest", true); + r = btrfs_subvol_remove("/xxxrectest", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE); if (r < 0) log_error_errno(r, "Failed to recursively remove subvolume: %m"); - r = btrfs_subvol_remove("/xxxrectest2", true); + r = btrfs_subvol_remove("/xxxrectest2", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE); if (r < 0) log_error_errno(r, "Failed to recursively remove subvolume: %m"); + r = btrfs_subvol_make("/xxxquotatest"); + if (r < 0) + log_error_errno(r, "Failed to make subvolume: %m"); + + r = btrfs_subvol_auto_qgroup("/xxxquotatest", 0, true); + if (r < 0) + log_error_errno(r, "Failed to set up auto qgroup: %m"); + + r = btrfs_subvol_make("/xxxquotatest/beneath"); + if (r < 0) + log_error_errno(r, "Failed to make subvolume: %m"); + + r = btrfs_subvol_auto_qgroup("/xxxquotatest/beneath", 0, false); + if (r < 0) + log_error_errno(r, "Failed to set up auto qgroup: %m"); + + r = btrfs_qgroup_set_limit("/xxxquotatest/beneath", 0, 4ULL * 1024 * 1024 * 1024); + if (r < 0) + log_error_errno(r, "Failed to set up quota limit: %m"); + + r = btrfs_subvol_set_subtree_quota_limit("/xxxquotatest", 0, 5ULL * 1024 * 1024 * 1024); + if (r < 0) + log_error_errno(r, "Failed to set up quota limit: %m"); + + r = btrfs_subvol_snapshot("/xxxquotatest", "/xxxquotatest2", BTRFS_SNAPSHOT_RECURSIVE|BTRFS_SNAPSHOT_QUOTA); + if (r < 0) + log_error_errno(r, "Failed to setup snapshot: %m"); + + r = btrfs_qgroup_get_quota("/xxxquotatest2/beneath", 0, "a); + if (r < 0) + log_error_errno(r, "Failed to query quota: %m"); + + assert_se(quota.referenced_max == 4ULL * 1024 * 1024 * 1024); + + r = btrfs_subvol_get_subtree_quota("/xxxquotatest2", 0, "a); + if (r < 0) + log_error_errno(r, "Failed to query quota: %m"); + + assert_se(quota.referenced_max == 5ULL * 1024 * 1024 * 1024); + + r = btrfs_subvol_remove("/xxxquotatest", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE); + if (r < 0) + log_error_errno(r, "Failed remove subvolume: %m"); + + r = btrfs_subvol_remove("/xxxquotatest2", BTRFS_REMOVE_QUOTA|BTRFS_REMOVE_RECURSIVE); + if (r < 0) + log_error_errno(r, "Failed remove subvolume: %m"); + return 0; } -- 2.39.2