From: Daan De Meyer Date: Mon, 11 May 2026 19:58:24 +0000 (+0200) Subject: btrfs-util: Make nested subvolume operations work unpriv X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=60d6f70ca4ced0a426800c55eaefa17438b89c3e;p=thirdparty%2Fsystemd.git btrfs-util: Make nested subvolume operations work unpriv BTRFS_IOC_SEARCH is only available to root in the initial userns. This means we fail to recursively snapshot even if a subvolume has no nested subvolumes at the moment. Let's fix this by using the newer btrfs ioctls which do work even if we don't have CAP_SYS_ADMIN in the initial userns. --- diff --git a/src/shared/btrfs-util.c b/src/shared/btrfs-util.c index bb3e28f6bbb..9c86e49f0f8 100644 --- a/src/shared/btrfs-util.c +++ b/src/shared/btrfs-util.c @@ -863,19 +863,6 @@ int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) { } static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) { - struct btrfs_ioctl_search_args args = { - .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, - - .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID, - .key.max_objectid = BTRFS_LAST_FREE_OBJECTID, - - .key.min_type = BTRFS_ROOT_BACKREF_KEY, - .key.max_type = BTRFS_ROOT_BACKREF_KEY, - - .key.min_transid = 0, - .key.max_transid = UINT64_MAX, - }; - struct btrfs_ioctl_vol_args vol_args = {}; _cleanup_close_ int subvol_fd = -EBADF; bool made_writable = false; @@ -923,43 +910,29 @@ static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY) return -errno; - /* OK, the subvolume is not empty, let's look for child - * subvolumes, and remove them, first */ - - args.key.min_offset = args.key.max_offset = subvol_id; + /* OK, the subvolume is not empty, let's look for child subvolumes, and remove them, first. + * BTRFS_IOC_GET_SUBVOL_ROOTREF and BTRFS_IOC_INO_LOOKUP_USER (kernel 4.18+) enumerate child + * subvolumes without requiring CAP_SYS_ADMIN in the initial user namespace, unlike the older + * BTRFS_IOC_TREE_SEARCH ioctl. */ - while (btrfs_ioctl_search_args_compare(&args) <= 0) { - struct btrfs_ioctl_search_header sh; - const void *body; + struct btrfs_ioctl_get_subvol_rootref_args rootref_args = {}; - args.key.nr_items = 256; - if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + for (;;) { + /* BTRFS_IOC_GET_SUBVOL_ROOTREF returns up to BTRFS_MAX_ROOTREF_BUFFER_NUM entries per + * call. If more are available, it returns -EOVERFLOW with num_items filled in and + * min_treeid advanced so we can resume on the next iteration. */ + int ioctl_ret = ioctl(subvol_fd, BTRFS_IOC_GET_SUBVOL_ROOTREF, &rootref_args); + if (ioctl_ret < 0 && errno != EOVERFLOW) return -errno; - if (args.key.nr_items <= 0) - break; - - FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) { - _cleanup_free_ char *p = NULL; - - btrfs_ioctl_search_args_set(&args, &sh); - - if (sh.type != BTRFS_ROOT_BACKREF_KEY) - continue; - if (sh.offset != subvol_id) - continue; - - const struct btrfs_root_ref *ref = body; - p = memdup_suffix0((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len)); - if (!p) - return -ENOMEM; + for (uint8_t i = 0; i < rootref_args.num_items; i++) { + uint64_t child_subvol_id = rootref_args.rootref[i].treeid; - struct btrfs_ioctl_ino_lookup_args ino_args = { - .treeid = subvol_id, - .objectid = htole64(ref->dirid), + struct btrfs_ioctl_ino_lookup_user_args lookup_args = { + .dirid = rootref_args.rootref[i].dirid, + .treeid = child_subvol_id, }; - - if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0) + if (ioctl(subvol_fd, BTRFS_IOC_INO_LOOKUP_USER, &lookup_args) < 0) return -errno; if (!made_writable) { @@ -970,29 +943,26 @@ static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol made_writable = true; } - if (isempty(ino_args.name)) - /* Subvolume is in the top-level - * directory of the subvolume. */ - r = subvol_remove_children(subvol_fd, p, sh.objectid, flags); + if (isempty(lookup_args.path)) + /* Subvolume is in the top-level directory of the subvolume. */ + r = subvol_remove_children(subvol_fd, lookup_args.name, child_subvol_id, flags); else { _cleanup_close_ int child_fd = -EBADF; - /* Subvolume is somewhere further down, - * hence we need to open the + /* Subvolume is somewhere further down, hence we need to open the * containing directory first */ - child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW); + child_fd = openat(subvol_fd, lookup_args.path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW); if (child_fd < 0) return -errno; - r = subvol_remove_children(child_fd, p, sh.objectid, flags); + r = subvol_remove_children(child_fd, lookup_args.name, child_subvol_id, flags); } if (r < 0) return r; } - /* Increase search key by one, to read the next item, if we can. */ - if (!btrfs_ioctl_search_args_inc(&args)) + if (ioctl_ret >= 0) break; } @@ -1223,19 +1193,6 @@ static int subvol_snapshot_children( uint64_t old_subvol_id, BtrfsSnapshotFlags flags) { - struct btrfs_ioctl_search_args args = { - .key.tree_id = BTRFS_ROOT_TREE_OBJECTID, - - .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID, - .key.max_objectid = BTRFS_LAST_FREE_OBJECTID, - - .key.min_type = BTRFS_ROOT_BACKREF_KEY, - .key.max_type = BTRFS_ROOT_BACKREF_KEY, - - .key.min_transid = 0, - .key.max_transid = UINT64_MAX, - }; - struct btrfs_ioctl_vol_args_v2 vol_args = { .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0, .fd = old_fd, @@ -1293,50 +1250,36 @@ static int subvol_snapshot_children( return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0; } - args.key.min_offset = args.key.max_offset = old_subvol_id; + /* Enumerate child subvolumes via BTRFS_IOC_GET_SUBVOL_ROOTREF + BTRFS_IOC_INO_LOOKUP_USER + * (kernel 4.18+), neither of which requires CAP_SYS_ADMIN, unlike BTRFS_IOC_TREE_SEARCH. */ - while (btrfs_ioctl_search_args_compare(&args) <= 0) { - struct btrfs_ioctl_search_header sh; - const void *body; + struct btrfs_ioctl_get_subvol_rootref_args rootref_args = {}; - args.key.nr_items = 256; - if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) + for (;;) { + /* BTRFS_IOC_GET_SUBVOL_ROOTREF returns up to BTRFS_MAX_ROOTREF_BUFFER_NUM entries per + * call. If more are available, it returns -EOVERFLOW with num_items filled in and + * min_treeid advanced so we can resume on the next iteration. */ + int ioctl_ret = ioctl(old_fd, BTRFS_IOC_GET_SUBVOL_ROOTREF, &rootref_args); + if (ioctl_ret < 0 && errno != EOVERFLOW) return -errno; - if (args.key.nr_items <= 0) - break; - - FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) { - _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL; + for (uint8_t i = 0; i < rootref_args.num_items; i++) { + _cleanup_free_ char *c = NULL, *np = NULL; _cleanup_close_ int old_child_fd = -EBADF, new_child_fd = -EBADF; - - btrfs_ioctl_search_args_set(&args, &sh); - - if (sh.type != BTRFS_ROOT_BACKREF_KEY) - continue; - - /* Avoid finding the source subvolume a second time */ - if (sh.offset != old_subvol_id) - continue; + uint64_t child_subvol_id = rootref_args.rootref[i].treeid; /* Avoid running into loops if the new subvolume is below the old one. */ - if (sh.objectid == new_subvol_id) + if (child_subvol_id == new_subvol_id) continue; - const struct btrfs_root_ref *ref = body; - p = memdup_suffix0((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len)); - if (!p) - return -ENOMEM; - - struct btrfs_ioctl_ino_lookup_args ino_args = { - .treeid = old_subvol_id, - .objectid = htole64(ref->dirid), + struct btrfs_ioctl_ino_lookup_user_args lookup_args = { + .dirid = rootref_args.rootref[i].dirid, + .treeid = child_subvol_id, }; - - if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0) + if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP_USER, &lookup_args) < 0) return -errno; - c = path_join(ino_args.name, p); + c = path_join(lookup_args.path, lookup_args.name); if (!c) return -ENOMEM; @@ -1344,7 +1287,7 @@ static int subvol_snapshot_children( if (old_child_fd < 0) return -errno; - np = path_join(subvolume, ino_args.name); + np = path_join(subvolume, lookup_args.path); if (!np) return -ENOMEM; @@ -1369,7 +1312,7 @@ static int subvol_snapshot_children( /* When btrfs clones the subvolumes, child subvolumes appear as empty * directories. Remove them, so that we can create a new snapshot in their place */ - if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) { + if (unlinkat(new_child_fd, lookup_args.name, AT_REMOVEDIR) < 0) { int k = -errno; if (flags & BTRFS_SNAPSHOT_READ_ONLY) @@ -1378,7 +1321,7 @@ static int subvol_snapshot_children( return k; } - r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh.objectid, + r = subvol_snapshot_children(old_child_fd, new_child_fd, lookup_args.name, child_subvol_id, flags & ~(BTRFS_SNAPSHOT_FALLBACK_COPY|BTRFS_SNAPSHOT_LOCK_BSD)); /* Restore the readonly flag */ @@ -1394,8 +1337,7 @@ static int subvol_snapshot_children( return r; } - /* Increase search key by one, to read the next item, if we can. */ - if (!btrfs_ioctl_search_args_inc(&args)) + if (ioctl_ret >= 0) break; }