1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
4 #include <linux/btrfs.h>
5 #include <linux/btrfs_tree.h>
6 #include <linux/magic.h>
10 #include <sys/sysmacros.h>
13 #include "alloc-util.h"
14 #include "btrfs-util.h"
16 #include "chattr-util.h"
18 #include "errno-util.h"
22 #include "path-util.h"
24 #include "sparse-endian.h"
25 #include "stat-util.h"
26 #include "string-util.h"
27 #include "time-util.h"
29 /* WARNING: Be careful with file system ioctls! When we get an fd, we
30 * need to make sure it either refers to only a regular file or
31 * directory, or that it is located on btrfs, before invoking any
32 * btrfs ioctls. The ioctl numbers are reused by some device drivers
33 * (such as DRM), and hence might have bad effects when invoked on
34 * device nodes (that reference drivers) rather than fds to normal
35 * files or directories. */
37 int btrfs_is_subvol_at(int dir_fd
, const char *path
) {
40 assert(dir_fd
>= 0 || dir_fd
== AT_FDCWD
);
42 /* On btrfs subvolumes always have the inode 256 */
44 if (fstatat(dir_fd
, strempty(path
), &st
, isempty(path
) ? AT_EMPTY_PATH
: 0) < 0)
47 if (!btrfs_might_be_subvol(&st
))
50 return is_fs_type_at(dir_fd
, path
, BTRFS_SUPER_MAGIC
);
53 int btrfs_subvol_set_read_only_at(int dir_fd
, const char *path
, bool b
) {
54 _cleanup_close_
int fd
= -EBADF
;
55 uint64_t flags
, nflags
;
58 assert(dir_fd
>= 0 || dir_fd
== AT_FDCWD
);
60 fd
= xopenat(dir_fd
, path
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
64 if (fstat(fd
, &st
) < 0)
67 if (!btrfs_might_be_subvol(&st
))
70 if (ioctl(fd
, BTRFS_IOC_SUBVOL_GETFLAGS
, &flags
) < 0)
73 nflags
= UPDATE_FLAG(flags
, BTRFS_SUBVOL_RDONLY
, b
);
77 return RET_NERRNO(ioctl(fd
, BTRFS_IOC_SUBVOL_SETFLAGS
, &nflags
));
80 int btrfs_subvol_get_read_only_fd(int fd
) {
86 if (fstat(fd
, &st
) < 0)
89 if (!btrfs_might_be_subvol(&st
))
92 if (ioctl(fd
, BTRFS_IOC_SUBVOL_GETFLAGS
, &flags
) < 0)
95 return !!(flags
& BTRFS_SUBVOL_RDONLY
);
98 int btrfs_get_block_device_at(int dir_fd
, const char *path
, dev_t
*ret
) {
99 struct btrfs_ioctl_fs_info_args fsi
= {};
100 _cleanup_close_
int fd
= -EBADF
;
104 assert(dir_fd
>= 0 || dir_fd
== AT_FDCWD
);
108 fd
= xopenat(dir_fd
, path
, O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
112 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
118 if (ioctl(fd
, BTRFS_IOC_FS_INFO
, &fsi
) < 0)
121 /* We won't do this for btrfs RAID */
122 if (fsi
.num_devices
!= 1) {
127 for (id
= 1; id
<= fsi
.max_id
; id
++) {
128 struct btrfs_ioctl_dev_info_args di
= {
133 if (ioctl(fd
, BTRFS_IOC_DEV_INFO
, &di
) < 0) {
140 /* For the root fs — when no initrd is involved — btrfs returns /dev/root on any kernels from
141 * the past few years. That sucks, as we have no API to determine the actual root then. let's
142 * return an recognizable error for this case, so that the caller can maybe print a nice
143 * message about this.
145 * https://bugzilla.kernel.org/show_bug.cgi?id=89721 */
146 if (path_equal((char*) di
.path
, "/dev/root"))
149 if (stat((char*) di
.path
, &st
) < 0)
152 if (!S_ISBLK(st
.st_mode
))
155 if (major(st
.st_rdev
) == 0)
165 int btrfs_subvol_get_id_fd(int fd
, uint64_t *ret
) {
166 struct btrfs_ioctl_ino_lookup_args args
= {
167 .objectid
= BTRFS_FIRST_FREE_OBJECTID
174 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
180 if (ioctl(fd
, BTRFS_IOC_INO_LOOKUP
, &args
) < 0)
187 int btrfs_subvol_get_id(int fd
, const char *subvol
, uint64_t *ret
) {
188 _cleanup_close_
int subvol_fd
= -EBADF
;
193 subvol_fd
= openat(fd
, subvol
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
197 return btrfs_subvol_get_id_fd(subvol_fd
, ret
);
200 static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args
*args
) {
203 /* the objectid, type, offset together make up the btrfs key,
204 * which is considered a single 136byte integer when
205 * comparing. This call increases the counter by one, dealing
206 * with the overflow between the overflows */
208 if (args
->key
.min_offset
< UINT64_MAX
) {
209 args
->key
.min_offset
++;
213 if (args
->key
.min_type
< UINT8_MAX
) {
214 args
->key
.min_type
++;
215 args
->key
.min_offset
= 0;
219 if (args
->key
.min_objectid
< UINT64_MAX
) {
220 args
->key
.min_objectid
++;
221 args
->key
.min_offset
= 0;
222 args
->key
.min_type
= 0;
229 static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args
*args
, const struct btrfs_ioctl_search_header
*h
) {
233 args
->key
.min_objectid
= h
->objectid
;
234 args
->key
.min_type
= h
->type
;
235 args
->key
.min_offset
= h
->offset
;
238 static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args
*args
) {
243 /* Compare min and max */
245 r
= CMP(args
->key
.min_objectid
, args
->key
.max_objectid
);
249 r
= CMP(args
->key
.min_type
, args
->key
.max_type
);
253 return CMP(args
->key
.min_offset
, args
->key
.max_offset
);
256 typedef struct BtrfsForeachIterator
{
257 const struct btrfs_ioctl_search_args
*args
;
260 struct btrfs_ioctl_search_header
*header
;
262 } BtrfsForeachIterator
;
264 static int btrfs_iterate(BtrfsForeachIterator
*i
) {
270 if (i
->index
>= i
->args
->key
.nr_items
)
273 assert_cc(BTRFS_SEARCH_ARGS_BUFSIZE
>= sizeof(struct btrfs_ioctl_search_header
));
274 if (i
->offset
> BTRFS_SEARCH_ARGS_BUFSIZE
- sizeof(struct btrfs_ioctl_search_header
))
277 struct btrfs_ioctl_search_header h
;
278 memcpy(&h
, (const uint8_t*) i
->args
->buf
+ i
->offset
, sizeof(struct btrfs_ioctl_search_header
));
280 if (i
->offset
> BTRFS_SEARCH_ARGS_BUFSIZE
- sizeof(struct btrfs_ioctl_search_header
) - h
.len
)
283 *i
->body
= (const uint8_t*) i
->args
->buf
+ i
->offset
+ sizeof(struct btrfs_ioctl_search_header
);
285 i
->offset
+= sizeof(struct btrfs_ioctl_search_header
) + h
.len
;
291 /* Iterates through a series of struct btrfs_file_extent_item elements. They are unfortunately not aligned,
292 * hence we copy out the header from them */
293 #define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(_sh, _body, _args) \
294 for (BtrfsForeachIterator iterator = { \
299 btrfs_iterate(&iterator) > 0; )
301 int btrfs_subvol_get_info_fd(int fd
, uint64_t subvol_id
, BtrfsSubvolInfo
*ret
) {
302 struct btrfs_ioctl_search_args args
= {
303 /* Tree of tree roots */
304 .key
.tree_id
= BTRFS_ROOT_TREE_OBJECTID
,
306 /* Look precisely for the subvolume items */
307 .key
.min_type
= BTRFS_ROOT_ITEM_KEY
,
308 .key
.max_type
= BTRFS_ROOT_ITEM_KEY
,
311 .key
.max_offset
= UINT64_MAX
,
313 /* No restrictions on the other components */
314 .key
.min_transid
= 0,
315 .key
.max_transid
= UINT64_MAX
,
324 /* Make sure this works on O_PATH fds */
325 _cleanup_close_
int fd_close
= -EBADF
;
326 fd
= fd_reopen_condition(fd
, O_CLOEXEC
|O_RDONLY
|O_DIRECTORY
, O_PATH
, &fd_close
);
330 if (subvol_id
== 0) {
331 r
= btrfs_subvol_get_id_fd(fd
, &subvol_id
);
335 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
342 args
.key
.min_objectid
= args
.key
.max_objectid
= subvol_id
;
344 while (btrfs_ioctl_search_args_compare(&args
) <= 0) {
345 struct btrfs_ioctl_search_header sh
;
348 args
.key
.nr_items
= 256;
349 if (ioctl(fd
, BTRFS_IOC_TREE_SEARCH
, &args
) < 0)
352 if (args
.key
.nr_items
<= 0)
355 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh
, body
, args
) {
356 /* Make sure we start the next search at least from this entry */
357 btrfs_ioctl_search_args_set(&args
, &sh
);
359 if (sh
.objectid
!= subvol_id
)
361 if (sh
.type
!= BTRFS_ROOT_ITEM_KEY
)
364 /* Older versions of the struct lacked the otime setting */
365 if (sh
.len
< offsetof(struct btrfs_root_item
, otime
) + sizeof(struct btrfs_timespec
))
368 const struct btrfs_root_item
*ri
= body
;
369 ret
->otime
= (usec_t
) le64toh(ri
->otime
.sec
) * USEC_PER_SEC
+
370 (usec_t
) le32toh(ri
->otime
.nsec
) / NSEC_PER_USEC
;
372 ret
->subvol_id
= subvol_id
;
373 ret
->read_only
= le64toh(ri
->flags
) & BTRFS_ROOT_SUBVOL_RDONLY
;
375 assert_cc(sizeof(ri
->uuid
) == sizeof(ret
->uuid
));
376 memcpy(&ret
->uuid
, ri
->uuid
, sizeof(ret
->uuid
));
377 memcpy(&ret
->parent_uuid
, ri
->parent_uuid
, sizeof(ret
->parent_uuid
));
383 /* Increase search key by one, to read the next item, if we can. */
384 if (!btrfs_ioctl_search_args_inc(&args
))
389 return found
? 0 : -ENODATA
;
392 int btrfs_qgroup_get_quota_fd(int fd
, uint64_t qgroupid
, BtrfsQuotaInfo
*ret
) {
394 struct btrfs_ioctl_search_args args
= {
395 /* Tree of quota items */
396 .key
.tree_id
= BTRFS_QUOTA_TREE_OBJECTID
,
398 /* The object ID is always 0 */
399 .key
.min_objectid
= 0,
400 .key
.max_objectid
= 0,
402 /* Look precisely for the quota items */
403 .key
.min_type
= BTRFS_QGROUP_STATUS_KEY
,
404 .key
.max_type
= BTRFS_QGROUP_LIMIT_KEY
,
406 /* No restrictions on the other components */
407 .key
.min_transid
= 0,
408 .key
.max_transid
= UINT64_MAX
,
411 bool found_info
= false, found_limit
= false;
418 r
= btrfs_subvol_get_id_fd(fd
, &qgroupid
);
422 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
429 args
.key
.min_offset
= args
.key
.max_offset
= qgroupid
;
431 while (btrfs_ioctl_search_args_compare(&args
) <= 0) {
432 struct btrfs_ioctl_search_header sh
;
435 args
.key
.nr_items
= 256;
436 if (ioctl(fd
, BTRFS_IOC_TREE_SEARCH
, &args
) < 0) {
437 if (errno
== ENOENT
) /* quota tree is missing: quota disabled */
443 if (args
.key
.nr_items
<= 0)
446 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh
, body
, args
) {
448 /* Make sure we start the next search at least from this entry */
449 btrfs_ioctl_search_args_set(&args
, &sh
);
451 if (sh
.objectid
!= 0)
453 if (sh
.offset
!= qgroupid
)
456 if (sh
.type
== BTRFS_QGROUP_INFO_KEY
) {
457 const struct btrfs_qgroup_info_item
*qii
= body
;
459 ret
->referenced
= le64toh(qii
->rfer
);
460 ret
->exclusive
= le64toh(qii
->excl
);
464 } else if (sh
.type
== BTRFS_QGROUP_LIMIT_KEY
) {
465 const struct btrfs_qgroup_limit_item
*qli
= body
;
467 if (le64toh(qli
->flags
) & BTRFS_QGROUP_LIMIT_MAX_RFER
)
468 ret
->referenced_max
= le64toh(qli
->max_rfer
);
470 ret
->referenced_max
= UINT64_MAX
;
472 if (le64toh(qli
->flags
) & BTRFS_QGROUP_LIMIT_MAX_EXCL
)
473 ret
->exclusive_max
= le64toh(qli
->max_excl
);
475 ret
->exclusive_max
= UINT64_MAX
;
480 if (found_info
&& found_limit
)
484 /* Increase search key by one, to read the next item, if we can. */
485 if (!btrfs_ioctl_search_args_inc(&args
))
490 if (!found_limit
&& !found_info
)
494 ret
->referenced
= UINT64_MAX
;
495 ret
->exclusive
= UINT64_MAX
;
499 ret
->referenced_max
= UINT64_MAX
;
500 ret
->exclusive_max
= UINT64_MAX
;
506 int btrfs_log_dev_root(int level
, int ret
, const char *p
) {
507 return log_full_errno(level
, ret
,
508 "File system behind %s is reported by btrfs to be backed by pseudo-device /dev/root, which is not a valid userspace accessible device node. "
509 "Cannot determine correct backing block device.", p
);
512 int btrfs_qgroup_get_quota(const char *path
, uint64_t qgroupid
, BtrfsQuotaInfo
*ret
) {
513 _cleanup_close_
int fd
= -EBADF
;
515 fd
= open(path
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
519 return btrfs_qgroup_get_quota_fd(fd
, qgroupid
, ret
);
522 int btrfs_subvol_find_subtree_qgroup(int fd
, uint64_t subvol_id
, uint64_t *ret
) {
523 uint64_t level
, lowest
= UINT64_MAX
, lowest_qgroupid
= 0;
524 _cleanup_free_
uint64_t *qgroups
= NULL
;
530 /* This finds the "subtree" qgroup for a specific
531 * subvolume. This only works for subvolumes that have been
532 * prepared with btrfs_subvol_auto_qgroup_fd() with
533 * insert_intermediary_qgroup=true (or equivalent). For others
534 * it will return the leaf qgroup instead. The two cases may
535 * be distinguished via the return value, which is 1 in case
536 * an appropriate "subtree" qgroup was found, and 0
539 if (subvol_id
== 0) {
540 r
= btrfs_subvol_get_id_fd(fd
, &subvol_id
);
545 r
= btrfs_qgroupid_split(subvol_id
, &level
, NULL
);
548 if (level
!= 0) /* Input must be a leaf qgroup */
551 n
= btrfs_qgroup_find_parents(fd
, subvol_id
, &qgroups
);
555 for (int i
= 0; i
< n
; i
++) {
558 r
= btrfs_qgroupid_split(qgroups
[i
], &level
, &id
);
565 if (lowest
== UINT64_MAX
|| level
< lowest
) {
566 lowest_qgroupid
= qgroups
[i
];
571 if (lowest
== UINT64_MAX
) {
572 /* No suitable higher-level qgroup found, let's return
573 * the leaf qgroup instead, and indicate that with the
580 *ret
= lowest_qgroupid
;
584 int btrfs_subvol_get_subtree_quota_fd(int fd
, uint64_t subvol_id
, BtrfsQuotaInfo
*ret
) {
591 /* This determines the quota data of the qgroup with the
592 * lowest level, that shares the id part with the specified
593 * subvolume. This is useful for determining the quota data
594 * for entire subvolume subtrees, as long as the subtrees have
595 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
598 r
= btrfs_subvol_find_subtree_qgroup(fd
, subvol_id
, &qgroupid
);
602 return btrfs_qgroup_get_quota_fd(fd
, qgroupid
, ret
);
605 int btrfs_subvol_get_subtree_quota(const char *path
, uint64_t subvol_id
, BtrfsQuotaInfo
*ret
) {
606 _cleanup_close_
int fd
= -EBADF
;
608 fd
= open(path
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
612 return btrfs_subvol_get_subtree_quota_fd(fd
, subvol_id
, ret
);
615 int btrfs_defrag_fd(int fd
) {
620 r
= fd_verify_regular(fd
);
624 return RET_NERRNO(ioctl(fd
, BTRFS_IOC_DEFRAG
, NULL
));
627 int btrfs_defrag(const char *p
) {
628 _cleanup_close_
int fd
= -EBADF
;
630 fd
= open(p
, O_RDWR
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
634 return btrfs_defrag_fd(fd
);
637 int btrfs_quota_enable_fd(int fd
, bool b
) {
638 struct btrfs_ioctl_quota_ctl_args args
= {
639 .cmd
= b
? BTRFS_QUOTA_CTL_ENABLE
: BTRFS_QUOTA_CTL_DISABLE
,
645 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
651 return RET_NERRNO(ioctl(fd
, BTRFS_IOC_QUOTA_CTL
, &args
));
654 int btrfs_quota_enable(const char *path
, bool b
) {
655 _cleanup_close_
int fd
= -EBADF
;
657 fd
= open(path
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
661 return btrfs_quota_enable_fd(fd
, b
);
664 int btrfs_qgroup_set_limit_fd(int fd
, uint64_t qgroupid
, uint64_t referenced_max
) {
666 struct btrfs_ioctl_qgroup_limit_args args
= {
667 .lim
.max_rfer
= referenced_max
,
668 .lim
.flags
= BTRFS_QGROUP_LIMIT_MAX_RFER
,
675 r
= btrfs_subvol_get_id_fd(fd
, &qgroupid
);
679 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
686 args
.qgroupid
= qgroupid
;
688 for (unsigned c
= 0;; c
++) {
689 if (ioctl(fd
, BTRFS_IOC_QGROUP_LIMIT
, &args
) < 0) {
691 if (errno
== EBUSY
&& c
< 10) {
692 (void) btrfs_quota_scan_wait(fd
);
705 int btrfs_qgroup_set_limit(const char *path
, uint64_t qgroupid
, uint64_t referenced_max
) {
706 _cleanup_close_
int fd
= -EBADF
;
708 fd
= open(path
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
712 return btrfs_qgroup_set_limit_fd(fd
, qgroupid
, referenced_max
);
715 int btrfs_subvol_set_subtree_quota_limit_fd(int fd
, uint64_t subvol_id
, uint64_t referenced_max
) {
721 r
= btrfs_subvol_find_subtree_qgroup(fd
, subvol_id
, &qgroupid
);
725 return btrfs_qgroup_set_limit_fd(fd
, qgroupid
, referenced_max
);
728 int btrfs_subvol_set_subtree_quota_limit(const char *path
, uint64_t subvol_id
, uint64_t referenced_max
) {
729 _cleanup_close_
int fd
= -EBADF
;
731 fd
= open(path
, O_RDONLY
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
);
735 return btrfs_subvol_set_subtree_quota_limit_fd(fd
, subvol_id
, referenced_max
);
738 int btrfs_qgroupid_make(uint64_t level
, uint64_t id
, uint64_t *ret
) {
741 if (level
>= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT
)))
744 if (id
>= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT
))
747 *ret
= (level
<< BTRFS_QGROUP_LEVEL_SHIFT
) | id
;
751 int btrfs_qgroupid_split(uint64_t qgroupid
, uint64_t *level
, uint64_t *id
) {
755 *level
= qgroupid
>> BTRFS_QGROUP_LEVEL_SHIFT
;
758 *id
= qgroupid
& ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT
) - 1);
763 static int qgroup_create_or_destroy(int fd
, bool b
, uint64_t qgroupid
) {
765 struct btrfs_ioctl_qgroup_create_args args
= {
767 .qgroupid
= qgroupid
,
771 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
777 for (unsigned c
= 0;; c
++) {
778 if (ioctl(fd
, BTRFS_IOC_QGROUP_CREATE
, &args
) < 0) {
780 /* On old kernels if quota is not enabled, we get EINVAL. On newer kernels we get
781 * ENOTCONN. Let's always convert this to ENOTCONN to make this recognizable
782 * everywhere the same way. */
784 if (IN_SET(errno
, EINVAL
, ENOTCONN
))
787 if (errno
== EBUSY
&& c
< 10) {
788 (void) btrfs_quota_scan_wait(fd
);
801 int btrfs_qgroup_create(int fd
, uint64_t qgroupid
) {
802 return qgroup_create_or_destroy(fd
, true, qgroupid
);
805 int btrfs_qgroup_destroy(int fd
, uint64_t qgroupid
) {
806 return qgroup_create_or_destroy(fd
, false, qgroupid
);
809 int btrfs_qgroup_destroy_recursive(int fd
, uint64_t qgroupid
) {
810 _cleanup_free_
uint64_t *qgroups
= NULL
;
814 /* Destroys the specified qgroup, but unassigns it from all
815 * its parents first. Also, it recursively destroys all
816 * qgroups it is assigned to that have the same id part of the
817 * qgroupid as the specified group. */
819 r
= btrfs_qgroupid_split(qgroupid
, NULL
, &subvol_id
);
823 n
= btrfs_qgroup_find_parents(fd
, qgroupid
, &qgroups
);
827 for (int i
= 0; i
< n
; i
++) {
830 r
= btrfs_qgroupid_split(qgroups
[i
], NULL
, &id
);
834 r
= btrfs_qgroup_unassign(fd
, qgroupid
, qgroups
[i
]);
841 /* The parent qgroupid shares the same id part with
842 * us? If so, destroy it too. */
844 (void) btrfs_qgroup_destroy_recursive(fd
, qgroups
[i
]);
847 return btrfs_qgroup_destroy(fd
, qgroupid
);
850 int btrfs_quota_scan_start(int fd
) {
851 struct btrfs_ioctl_quota_rescan_args args
= {};
855 return RET_NERRNO(ioctl(fd
, BTRFS_IOC_QUOTA_RESCAN
, &args
));
858 int btrfs_quota_scan_wait(int fd
) {
861 return RET_NERRNO(ioctl(fd
, BTRFS_IOC_QUOTA_RESCAN_WAIT
));
864 int btrfs_quota_scan_ongoing(int fd
) {
865 struct btrfs_ioctl_quota_rescan_args args
= {};
869 if (ioctl(fd
, BTRFS_IOC_QUOTA_RESCAN_STATUS
, &args
) < 0)
875 static int qgroup_assign_or_unassign(int fd
, bool b
, uint64_t child
, uint64_t parent
) {
876 struct btrfs_ioctl_qgroup_assign_args args
= {
883 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
889 for (unsigned c
= 0;; c
++) {
890 r
= ioctl(fd
, BTRFS_IOC_QGROUP_ASSIGN
, &args
);
892 if (errno
== EBUSY
&& c
< 10) {
893 (void) btrfs_quota_scan_wait(fd
);
903 /* If the return value is > 0, we need to request a rescan */
905 (void) btrfs_quota_scan_start(fd
);
910 int btrfs_qgroup_assign(int fd
, uint64_t child
, uint64_t parent
) {
911 return qgroup_assign_or_unassign(fd
, true, child
, parent
);
914 int btrfs_qgroup_unassign(int fd
, uint64_t child
, uint64_t parent
) {
915 return qgroup_assign_or_unassign(fd
, false, child
, parent
);
918 static int subvol_remove_children(int fd
, const char *subvolume
, uint64_t subvol_id
, BtrfsRemoveFlags flags
) {
919 struct btrfs_ioctl_search_args args
= {
920 .key
.tree_id
= BTRFS_ROOT_TREE_OBJECTID
,
922 .key
.min_objectid
= BTRFS_FIRST_FREE_OBJECTID
,
923 .key
.max_objectid
= BTRFS_LAST_FREE_OBJECTID
,
925 .key
.min_type
= BTRFS_ROOT_BACKREF_KEY
,
926 .key
.max_type
= BTRFS_ROOT_BACKREF_KEY
,
928 .key
.min_transid
= 0,
929 .key
.max_transid
= UINT64_MAX
,
932 struct btrfs_ioctl_vol_args vol_args
= {};
933 _cleanup_close_
int subvol_fd
= -EBADF
;
935 bool made_writable
= false;
941 if (fstat(fd
, &st
) < 0)
944 if (!S_ISDIR(st
.st_mode
))
947 subvol_fd
= openat(fd
, subvolume
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
|O_NOFOLLOW
);
951 /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY
952 * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return
953 * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we
954 * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a
955 * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case
956 * let's prefer ENOTTY over EPERM/EACCES though. */
957 r
= btrfs_is_subvol_fd(subvol_fd
);
960 if (r
== 0) /* Not a btrfs subvolume */
963 if (subvol_id
== 0) {
964 r
= btrfs_subvol_get_id_fd(subvol_fd
, &subvol_id
);
969 /* First, try to remove the subvolume. If it happens to be
970 * already empty, this will just work. */
971 strncpy(vol_args
.name
, subvolume
, sizeof(vol_args
.name
)-1);
972 if (ioctl(fd
, BTRFS_IOC_SNAP_DESTROY
, &vol_args
) >= 0) {
973 (void) btrfs_qgroup_destroy_recursive(fd
, subvol_id
); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
976 if (!(flags
& BTRFS_REMOVE_RECURSIVE
) || errno
!= ENOTEMPTY
)
979 /* OK, the subvolume is not empty, let's look for child
980 * subvolumes, and remove them, first */
982 args
.key
.min_offset
= args
.key
.max_offset
= subvol_id
;
984 while (btrfs_ioctl_search_args_compare(&args
) <= 0) {
985 struct btrfs_ioctl_search_header sh
;
988 args
.key
.nr_items
= 256;
989 if (ioctl(fd
, BTRFS_IOC_TREE_SEARCH
, &args
) < 0)
992 if (args
.key
.nr_items
<= 0)
995 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh
, body
, args
) {
996 _cleanup_free_
char *p
= NULL
;
998 btrfs_ioctl_search_args_set(&args
, &sh
);
1000 if (sh
.type
!= BTRFS_ROOT_BACKREF_KEY
)
1002 if (sh
.offset
!= subvol_id
)
1005 const struct btrfs_root_ref
*ref
= body
;
1006 p
= memdup_suffix0((char*) ref
+ sizeof(struct btrfs_root_ref
), le64toh(ref
->name_len
));
1010 struct btrfs_ioctl_ino_lookup_args ino_args
= {
1011 .treeid
= subvol_id
,
1012 .objectid
= htole64(ref
->dirid
),
1015 if (ioctl(fd
, BTRFS_IOC_INO_LOOKUP
, &ino_args
) < 0)
1018 if (!made_writable
) {
1019 r
= btrfs_subvol_set_read_only_fd(subvol_fd
, false);
1023 made_writable
= true;
1026 if (isempty(ino_args
.name
))
1027 /* Subvolume is in the top-level
1028 * directory of the subvolume. */
1029 r
= subvol_remove_children(subvol_fd
, p
, sh
.objectid
, flags
);
1031 _cleanup_close_
int child_fd
= -EBADF
;
1033 /* Subvolume is somewhere further down,
1034 * hence we need to open the
1035 * containing directory first */
1037 child_fd
= openat(subvol_fd
, ino_args
.name
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
|O_NOFOLLOW
);
1041 r
= subvol_remove_children(child_fd
, p
, sh
.objectid
, flags
);
1047 /* Increase search key by one, to read the next item, if we can. */
1048 if (!btrfs_ioctl_search_args_inc(&args
))
1052 /* OK, the child subvolumes should all be gone now, let's try
1053 * again to remove the subvolume */
1054 if (ioctl(fd
, BTRFS_IOC_SNAP_DESTROY
, &vol_args
) < 0)
1057 (void) btrfs_qgroup_destroy_recursive(fd
, subvol_id
);
1061 int btrfs_subvol_remove_at(int dir_fd
, const char *path
, BtrfsRemoveFlags flags
) {
1062 _cleanup_free_
char *subvolume
= NULL
;
1063 _cleanup_close_
int fd
= -EBADF
;
1068 fd
= chase_and_openat(dir_fd
, path
, CHASE_PARENT
|CHASE_EXTRACT_FILENAME
, O_CLOEXEC
, &subvolume
);
1072 r
= btrfs_validate_subvolume_name(subvolume
);
1076 return subvol_remove_children(fd
, subvolume
, 0, flags
);
1079 int btrfs_qgroup_copy_limits(int fd
, uint64_t old_qgroupid
, uint64_t new_qgroupid
) {
1081 struct btrfs_ioctl_search_args args
= {
1082 /* Tree of quota items */
1083 .key
.tree_id
= BTRFS_QUOTA_TREE_OBJECTID
,
1085 /* The object ID is always 0 */
1086 .key
.min_objectid
= 0,
1087 .key
.max_objectid
= 0,
1089 /* Look precisely for the quota items */
1090 .key
.min_type
= BTRFS_QGROUP_LIMIT_KEY
,
1091 .key
.max_type
= BTRFS_QGROUP_LIMIT_KEY
,
1093 /* For our qgroup */
1094 .key
.min_offset
= old_qgroupid
,
1095 .key
.max_offset
= old_qgroupid
,
1097 /* No restrictions on the other components */
1098 .key
.min_transid
= 0,
1099 .key
.max_transid
= UINT64_MAX
,
1104 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
1110 while (btrfs_ioctl_search_args_compare(&args
) <= 0) {
1111 struct btrfs_ioctl_search_header sh
;
1114 args
.key
.nr_items
= 256;
1115 if (ioctl(fd
, BTRFS_IOC_TREE_SEARCH
, &args
) < 0) {
1116 if (errno
== ENOENT
) /* quota tree missing: quota is not enabled, hence nothing to copy */
1122 if (args
.key
.nr_items
<= 0)
1125 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh
, body
, args
) {
1126 struct btrfs_ioctl_qgroup_limit_args qargs
;
1129 /* Make sure we start the next search at least from this entry */
1130 btrfs_ioctl_search_args_set(&args
, &sh
);
1132 if (sh
.objectid
!= 0)
1134 if (sh
.type
!= BTRFS_QGROUP_LIMIT_KEY
)
1136 if (sh
.offset
!= old_qgroupid
)
1139 /* We found the entry, now copy things over. */
1141 const struct btrfs_qgroup_limit_item
*qli
= body
;
1142 qargs
= (struct btrfs_ioctl_qgroup_limit_args
) {
1143 .qgroupid
= new_qgroupid
,
1145 .lim
.max_rfer
= le64toh(qli
->max_rfer
),
1146 .lim
.max_excl
= le64toh(qli
->max_excl
),
1147 .lim
.rsv_rfer
= le64toh(qli
->rsv_rfer
),
1148 .lim
.rsv_excl
= le64toh(qli
->rsv_excl
),
1150 .lim
.flags
= le64toh(qli
->flags
) & (BTRFS_QGROUP_LIMIT_MAX_RFER
|
1151 BTRFS_QGROUP_LIMIT_MAX_EXCL
|
1152 BTRFS_QGROUP_LIMIT_RSV_RFER
|
1153 BTRFS_QGROUP_LIMIT_RSV_EXCL
),
1157 if (ioctl(fd
, BTRFS_IOC_QGROUP_LIMIT
, &qargs
) < 0) {
1158 if (errno
== EBUSY
&& c
< 10) {
1159 (void) btrfs_quota_scan_wait(fd
);
1171 /* Increase search key by one, to read the next item, if we can. */
1172 if (!btrfs_ioctl_search_args_inc(&args
))
1179 static int copy_quota_hierarchy(int fd
, uint64_t old_subvol_id
, uint64_t new_subvol_id
) {
1180 _cleanup_free_
uint64_t *old_qgroups
= NULL
, *old_parent_qgroups
= NULL
;
1181 bool copy_from_parent
= false, insert_intermediary_qgroup
= false;
1182 int n_old_qgroups
, n_old_parent_qgroups
, r
;
1183 uint64_t old_parent_id
;
1187 /* Copies a reduced form of quota information from the old to
1188 * the new subvolume. */
1190 n_old_qgroups
= btrfs_qgroup_find_parents(fd
, old_subvol_id
, &old_qgroups
);
1191 if (n_old_qgroups
<= 0) /* Nothing to copy */
1192 return n_old_qgroups
;
1194 assert(old_qgroups
); /* Coverity gets confused by the macro iterator allocating this, add a hint */
1196 r
= btrfs_subvol_get_parent(fd
, old_subvol_id
, &old_parent_id
);
1198 /* We have no parent, hence nothing to copy. */
1199 n_old_parent_qgroups
= 0;
1203 n_old_parent_qgroups
= btrfs_qgroup_find_parents(fd
, old_parent_id
, &old_parent_qgroups
);
1204 if (n_old_parent_qgroups
< 0)
1205 return n_old_parent_qgroups
;
1208 for (int i
= 0; i
< n_old_qgroups
; i
++) {
1211 r
= btrfs_qgroupid_split(old_qgroups
[i
], NULL
, &id
);
1215 if (id
== old_subvol_id
) {
1216 /* The old subvolume was member of a qgroup
1217 * that had the same id, but a different level
1218 * as it self. Let's set up something similar
1219 * in the destination. */
1220 insert_intermediary_qgroup
= true;
1224 for (int j
= 0; j
< n_old_parent_qgroups
; j
++)
1225 if (old_parent_qgroups
[j
] == old_qgroups
[i
])
1226 /* The old subvolume shared a common
1227 * parent qgroup with its parent
1228 * subvolume. Let's set up something
1229 * similar in the destination. */
1230 copy_from_parent
= true;
1233 if (!insert_intermediary_qgroup
&& !copy_from_parent
)
1236 return btrfs_subvol_auto_qgroup_fd(fd
, new_subvol_id
, insert_intermediary_qgroup
);
1239 static int copy_subtree_quota_limits(int fd
, uint64_t old_subvol
, uint64_t new_subvol
) {
1240 uint64_t old_subtree_qgroup
, new_subtree_qgroup
;
1244 /* First copy the leaf limits */
1245 r
= btrfs_qgroup_copy_limits(fd
, old_subvol
, new_subvol
);
1250 /* Then, try to copy the subtree limits, if there are any. */
1251 r
= btrfs_subvol_find_subtree_qgroup(fd
, old_subvol
, &old_subtree_qgroup
);
1257 r
= btrfs_subvol_find_subtree_qgroup(fd
, new_subvol
, &new_subtree_qgroup
);
1263 r
= btrfs_qgroup_copy_limits(fd
, old_subtree_qgroup
, new_subtree_qgroup
);
1270 static int subvol_snapshot_children(
1273 const char *subvolume
,
1274 uint64_t old_subvol_id
,
1275 BtrfsSnapshotFlags flags
) {
1277 struct btrfs_ioctl_search_args args
= {
1278 .key
.tree_id
= BTRFS_ROOT_TREE_OBJECTID
,
1280 .key
.min_objectid
= BTRFS_FIRST_FREE_OBJECTID
,
1281 .key
.max_objectid
= BTRFS_LAST_FREE_OBJECTID
,
1283 .key
.min_type
= BTRFS_ROOT_BACKREF_KEY
,
1284 .key
.max_type
= BTRFS_ROOT_BACKREF_KEY
,
1286 .key
.min_transid
= 0,
1287 .key
.max_transid
= UINT64_MAX
,
1290 struct btrfs_ioctl_vol_args_v2 vol_args
= {
1291 .flags
= flags
& BTRFS_SNAPSHOT_READ_ONLY
? BTRFS_SUBVOL_RDONLY
: 0,
1294 _cleanup_close_
int subvolume_fd
= -EBADF
;
1295 uint64_t new_subvol_id
;
1298 assert(old_fd
>= 0);
1299 assert(new_fd
>= 0);
1302 strncpy(vol_args
.name
, subvolume
, sizeof(vol_args
.name
)-1);
1304 if (ioctl(new_fd
, BTRFS_IOC_SNAP_CREATE_V2
, &vol_args
) < 0)
1307 if (FLAGS_SET(flags
, BTRFS_SNAPSHOT_LOCK_BSD
)) {
1308 subvolume_fd
= xopenat_lock(new_fd
, subvolume
,
1309 O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
|O_NOFOLLOW
,
1312 if (subvolume_fd
< 0)
1313 return subvolume_fd
;
1315 r
= btrfs_is_subvol_fd(subvolume_fd
);
1322 if (!(flags
& BTRFS_SNAPSHOT_RECURSIVE
) &&
1323 !(flags
& BTRFS_SNAPSHOT_QUOTA
))
1324 return flags
& BTRFS_SNAPSHOT_LOCK_BSD
? TAKE_FD(subvolume_fd
) : 0;
1326 if (old_subvol_id
== 0) {
1327 r
= btrfs_subvol_get_id_fd(old_fd
, &old_subvol_id
);
1332 r
= btrfs_subvol_get_id(new_fd
, vol_args
.name
, &new_subvol_id
);
1336 if (flags
& BTRFS_SNAPSHOT_QUOTA
)
1337 (void) copy_quota_hierarchy(new_fd
, old_subvol_id
, new_subvol_id
);
1339 if (!(flags
& BTRFS_SNAPSHOT_RECURSIVE
)) {
1341 if (flags
& BTRFS_SNAPSHOT_QUOTA
)
1342 (void) copy_subtree_quota_limits(new_fd
, old_subvol_id
, new_subvol_id
);
1344 return flags
& BTRFS_SNAPSHOT_LOCK_BSD
? TAKE_FD(subvolume_fd
) : 0;
1347 args
.key
.min_offset
= args
.key
.max_offset
= old_subvol_id
;
1349 while (btrfs_ioctl_search_args_compare(&args
) <= 0) {
1350 struct btrfs_ioctl_search_header sh
;
1353 args
.key
.nr_items
= 256;
1354 if (ioctl(old_fd
, BTRFS_IOC_TREE_SEARCH
, &args
) < 0)
1357 if (args
.key
.nr_items
<= 0)
1360 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh
, body
, args
) {
1361 _cleanup_free_
char *p
= NULL
, *c
= NULL
, *np
= NULL
;
1362 _cleanup_close_
int old_child_fd
= -EBADF
, new_child_fd
= -EBADF
;
1364 btrfs_ioctl_search_args_set(&args
, &sh
);
1366 if (sh
.type
!= BTRFS_ROOT_BACKREF_KEY
)
1369 /* Avoid finding the source subvolume a second time */
1370 if (sh
.offset
!= old_subvol_id
)
1373 /* Avoid running into loops if the new subvolume is below the old one. */
1374 if (sh
.objectid
== new_subvol_id
)
1377 const struct btrfs_root_ref
*ref
= body
;
1378 p
= memdup_suffix0((char*) ref
+ sizeof(struct btrfs_root_ref
), le64toh(ref
->name_len
));
1382 struct btrfs_ioctl_ino_lookup_args ino_args
= {
1383 .treeid
= old_subvol_id
,
1384 .objectid
= htole64(ref
->dirid
),
1387 if (ioctl(old_fd
, BTRFS_IOC_INO_LOOKUP
, &ino_args
) < 0)
1390 c
= path_join(ino_args
.name
, p
);
1394 old_child_fd
= openat(old_fd
, c
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
|O_NOFOLLOW
);
1395 if (old_child_fd
< 0)
1398 np
= path_join(subvolume
, ino_args
.name
);
1402 new_child_fd
= openat(new_fd
, np
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
|O_NOFOLLOW
);
1403 if (new_child_fd
< 0)
1406 if (flags
& BTRFS_SNAPSHOT_READ_ONLY
) {
1407 /* If the snapshot is read-only we need to mark it writable temporarily, to
1408 * put the subsnapshot into place. */
1410 if (subvolume_fd
< 0) {
1411 subvolume_fd
= openat(new_fd
, subvolume
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
|O_NOFOLLOW
);
1412 if (subvolume_fd
< 0)
1416 r
= btrfs_subvol_set_read_only_fd(subvolume_fd
, false);
1421 /* When btrfs clones the subvolumes, child subvolumes appear as empty
1422 * directories. Remove them, so that we can create a new snapshot in their place */
1423 if (unlinkat(new_child_fd
, p
, AT_REMOVEDIR
) < 0) {
1426 if (flags
& BTRFS_SNAPSHOT_READ_ONLY
)
1427 (void) btrfs_subvol_set_read_only_fd(subvolume_fd
, true);
1432 r
= subvol_snapshot_children(old_child_fd
, new_child_fd
, p
, sh
.objectid
,
1433 flags
& ~(BTRFS_SNAPSHOT_FALLBACK_COPY
|BTRFS_SNAPSHOT_LOCK_BSD
));
1435 /* Restore the readonly flag */
1436 if (flags
& BTRFS_SNAPSHOT_READ_ONLY
) {
1439 k
= btrfs_subvol_set_read_only_fd(subvolume_fd
, true);
1440 if (r
>= 0 && k
< 0)
1448 /* Increase search key by one, to read the next item, if we can. */
1449 if (!btrfs_ioctl_search_args_inc(&args
))
1453 if (flags
& BTRFS_SNAPSHOT_QUOTA
)
1454 (void) copy_subtree_quota_limits(new_fd
, old_subvol_id
, new_subvol_id
);
1456 return flags
& BTRFS_SNAPSHOT_LOCK_BSD
? TAKE_FD(subvolume_fd
) : 0;
1459 int btrfs_subvol_snapshot_at_full(
1464 BtrfsSnapshotFlags flags
,
1465 copy_progress_path_t progress_path
,
1466 copy_progress_bytes_t progress_bytes
,
1469 _cleanup_free_
char *subvolume
= NULL
;
1470 _cleanup_close_
int old_fd
= -EBADF
, new_fd
= -EBADF
, subvolume_fd
= -EBADF
;
1473 assert(dir_fdf
>= 0 || dir_fdf
== AT_FDCWD
);
1474 assert(dir_fdt
>= 0 || dir_fdt
== AT_FDCWD
);
1477 old_fd
= xopenat(dir_fdf
, from
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
1481 new_fd
= chase_and_openat(dir_fdt
, to
, CHASE_PARENT
|CHASE_EXTRACT_FILENAME
, O_CLOEXEC
, &subvolume
);
1485 r
= btrfs_validate_subvolume_name(subvolume
);
1489 r
= btrfs_is_subvol_at(dir_fdf
, from
);
1493 bool plain_directory
= false;
1495 /* If the source isn't a proper subvolume, fail unless fallback is requested */
1496 if (!(flags
& BTRFS_SNAPSHOT_FALLBACK_COPY
))
1499 r
= btrfs_subvol_make(new_fd
, subvolume
);
1501 if (ERRNO_IS_NOT_SUPPORTED(r
) && (flags
& BTRFS_SNAPSHOT_FALLBACK_DIRECTORY
)) {
1502 /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
1503 if (mkdirat(new_fd
, subvolume
, 0755) < 0)
1506 plain_directory
= true;
1511 if (FLAGS_SET(flags
, BTRFS_SNAPSHOT_LOCK_BSD
)) {
1512 subvolume_fd
= xopenat_lock(new_fd
, subvolume
,
1513 O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
|O_NOFOLLOW
,
1516 if (subvolume_fd
< 0)
1517 return subvolume_fd
;
1519 if (!plain_directory
) {
1520 r
= btrfs_is_subvol_fd(subvolume_fd
);
1528 r
= copy_directory_at_full(
1536 (FLAGS_SET(flags
, BTRFS_SNAPSHOT_SIGINT
) ? COPY_SIGINT
: 0)|
1537 (FLAGS_SET(flags
, BTRFS_SNAPSHOT_SIGTERM
) ? COPY_SIGTERM
: 0),
1544 if (flags
& BTRFS_SNAPSHOT_READ_ONLY
) {
1546 if (plain_directory
) {
1547 /* Plain directories have no recursive read-only flag, but something pretty close to
1548 * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
1550 if (flags
& BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE
)
1551 (void) chattr_at(new_fd
, subvolume
, FS_IMMUTABLE_FL
, FS_IMMUTABLE_FL
);
1553 r
= btrfs_subvol_set_read_only_at(new_fd
, subvolume
, true);
1559 return flags
& BTRFS_SNAPSHOT_LOCK_BSD
? TAKE_FD(subvolume_fd
) : 0;
1562 (void) rm_rf_at(new_fd
, subvolume
, REMOVE_ROOT
|REMOVE_PHYSICAL
|REMOVE_SUBVOLUME
);
1566 return subvol_snapshot_children(old_fd
, new_fd
, subvolume
, 0, flags
);
1569 int btrfs_qgroup_find_parents(int fd
, uint64_t qgroupid
, uint64_t **ret
) {
1571 struct btrfs_ioctl_search_args args
= {
1572 /* Tree of quota items */
1573 .key
.tree_id
= BTRFS_QUOTA_TREE_OBJECTID
,
1575 /* Look precisely for the quota relation items */
1576 .key
.min_type
= BTRFS_QGROUP_RELATION_KEY
,
1577 .key
.max_type
= BTRFS_QGROUP_RELATION_KEY
,
1579 /* No restrictions on the other components */
1580 .key
.min_offset
= 0,
1581 .key
.max_offset
= UINT64_MAX
,
1583 .key
.min_transid
= 0,
1584 .key
.max_transid
= UINT64_MAX
,
1587 _cleanup_free_
uint64_t *items
= NULL
;
1594 if (qgroupid
== 0) {
1595 r
= btrfs_subvol_get_id_fd(fd
, &qgroupid
);
1599 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
1606 args
.key
.min_objectid
= args
.key
.max_objectid
= qgroupid
;
1608 while (btrfs_ioctl_search_args_compare(&args
) <= 0) {
1609 struct btrfs_ioctl_search_header sh
;
1610 _unused_
const void *body
;
1612 args
.key
.nr_items
= 256;
1613 if (ioctl(fd
, BTRFS_IOC_TREE_SEARCH
, &args
) < 0) {
1614 if (errno
== ENOENT
) /* quota tree missing: quota is disabled */
1620 if (args
.key
.nr_items
<= 0)
1623 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh
, body
, args
) {
1625 /* Make sure we start the next search at least from this entry */
1626 btrfs_ioctl_search_args_set(&args
, &sh
);
1628 if (sh
.type
!= BTRFS_QGROUP_RELATION_KEY
)
1630 if (sh
.offset
< sh
.objectid
)
1632 if (sh
.objectid
!= qgroupid
)
1635 if (!GREEDY_REALLOC(items
, n_items
+1))
1638 items
[n_items
++] = sh
.offset
;
1641 /* Increase search key by one, to read the next item, if we can. */
1642 if (!btrfs_ioctl_search_args_inc(&args
))
1646 assert((n_items
> 0) == !!items
);
1647 assert(n_items
<= INT_MAX
);
1649 *ret
= TAKE_PTR(items
);
1650 return (int) n_items
;
1653 int btrfs_subvol_auto_qgroup_fd(int fd
, uint64_t subvol_id
, bool insert_intermediary_qgroup
) {
1654 _cleanup_free_
uint64_t *qgroups
= NULL
;
1655 _cleanup_close_
int real_fd
= -EBADF
;
1656 uint64_t parent_subvol
;
1657 bool changed
= false;
1663 * Sets up the specified subvolume's qgroup automatically in
1666 * If insert_intermediary_qgroup is false, the subvolume's
1667 * leaf qgroup will be assigned to the same parent qgroups as
1668 * the subvolume's parent subvolume.
1670 * If insert_intermediary_qgroup is true a new intermediary
1671 * higher-level qgroup is created, with a higher level number,
1672 * but reusing the id of the subvolume. The level number is
1673 * picked as one smaller than the lowest level qgroup the
1674 * parent subvolume is a member of. If the parent subvolume's
1675 * leaf qgroup is assigned to no higher-level qgroup a new
1676 * qgroup of level 255 is created instead. Either way, the new
1677 * qgroup is then assigned to the parent's higher-level
1678 * qgroup, and the subvolume itself is assigned to it.
1680 * If the subvolume is already assigned to a higher level
1681 * qgroup, no operation is executed.
1683 * Effectively this means: regardless if
1684 * insert_intermediary_qgroup is true or not, after this
1685 * function is invoked the subvolume will be accounted within
1686 * the same qgroups as the parent. However, if it is true, it
1687 * will also get its own higher-level qgroup, which may in
1688 * turn be used by subvolumes created beneath this subvolume
1691 * This hence defines a simple default qgroup setup for
1692 * subvolumes, as long as this function is invoked on each
1693 * created subvolume: each subvolume is always accounting
1694 * together with its immediate parents. Optionally, if
1695 * insert_intermediary_qgroup is true, it will also get a
1696 * qgroup that then includes all its own child subvolumes.
1699 /* Turn this into a proper fd, if it is currently O_PATH */
1700 fd
= fd_reopen_condition(fd
, O_RDONLY
|O_CLOEXEC
, O_PATH
, &real_fd
);
1704 if (subvol_id
== 0) {
1705 r
= btrfs_is_subvol_fd(fd
);
1711 r
= btrfs_subvol_get_id_fd(fd
, &subvol_id
);
1716 n
= btrfs_qgroup_find_parents(fd
, subvol_id
, &qgroups
);
1719 if (n
> 0) /* already parent qgroups set up, let's bail */
1722 qgroups
= mfree(qgroups
);
1724 r
= btrfs_subvol_get_parent(fd
, subvol_id
, &parent_subvol
);
1726 /* No parent, hence no qgroup memberships */
1731 n
= btrfs_qgroup_find_parents(fd
, parent_subvol
, &qgroups
);
1736 if (insert_intermediary_qgroup
) {
1737 uint64_t lowest
= 256, new_qgroupid
;
1738 bool created
= false;
1740 /* Determine the lowest qgroup that the parent
1741 * subvolume is assigned to. */
1743 for (int i
= 0; i
< n
; i
++) {
1746 r
= btrfs_qgroupid_split(qgroups
[i
], &level
, NULL
);
1754 if (lowest
<= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1757 r
= btrfs_qgroupid_make(lowest
- 1, subvol_id
, &new_qgroupid
);
1761 /* Create the new intermediary group, unless it already exists */
1762 r
= btrfs_qgroup_create(fd
, new_qgroupid
);
1763 if (r
< 0 && r
!= -EEXIST
)
1766 changed
= created
= true;
1768 for (int i
= 0; i
< n
; i
++) {
1769 r
= btrfs_qgroup_assign(fd
, new_qgroupid
, qgroups
[i
]);
1770 if (r
< 0 && r
!= -EEXIST
) {
1772 (void) btrfs_qgroup_destroy_recursive(fd
, new_qgroupid
);
1780 r
= btrfs_qgroup_assign(fd
, subvol_id
, new_qgroupid
);
1781 if (r
< 0 && r
!= -EEXIST
) {
1783 (void) btrfs_qgroup_destroy_recursive(fd
, new_qgroupid
);
1792 /* Assign our subvolume to all the same qgroups as the parent */
1794 for (i
= 0; i
< n
; i
++) {
1795 r
= btrfs_qgroup_assign(fd
, subvol_id
, qgroups
[i
]);
1796 if (r
< 0 && r
!= -EEXIST
)
1806 int btrfs_subvol_auto_qgroup(const char *path
, uint64_t subvol_id
, bool create_intermediary_qgroup
) {
1807 _cleanup_close_
int fd
= -EBADF
;
1809 fd
= open(path
, O_RDONLY
|O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
1813 return btrfs_subvol_auto_qgroup_fd(fd
, subvol_id
, create_intermediary_qgroup
);
1816 int btrfs_subvol_make_default(const char *path
) {
1817 _cleanup_close_
int fd
= -EBADF
;
1823 fd
= open(path
, O_NOCTTY
|O_CLOEXEC
|O_DIRECTORY
);
1827 r
= btrfs_subvol_get_id_fd(fd
, &id
);
1831 return RET_NERRNO(ioctl(fd
, BTRFS_IOC_DEFAULT_SUBVOL
, &id
));
1834 int btrfs_subvol_get_parent(int fd
, uint64_t subvol_id
, uint64_t *ret
) {
1836 struct btrfs_ioctl_search_args args
= {
1837 /* Tree of tree roots */
1838 .key
.tree_id
= BTRFS_ROOT_TREE_OBJECTID
,
1840 /* Look precisely for the subvolume items */
1841 .key
.min_type
= BTRFS_ROOT_BACKREF_KEY
,
1842 .key
.max_type
= BTRFS_ROOT_BACKREF_KEY
,
1844 /* No restrictions on the other components */
1845 .key
.min_offset
= 0,
1846 .key
.max_offset
= UINT64_MAX
,
1848 .key
.min_transid
= 0,
1849 .key
.max_transid
= UINT64_MAX
,
1856 if (subvol_id
== 0) {
1857 r
= btrfs_subvol_get_id_fd(fd
, &subvol_id
);
1861 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
1868 args
.key
.min_objectid
= args
.key
.max_objectid
= subvol_id
;
1870 while (btrfs_ioctl_search_args_compare(&args
) <= 0) {
1871 struct btrfs_ioctl_search_header sh
;
1872 _unused_
const void *body
= NULL
;
1874 args
.key
.nr_items
= 256;
1875 if (ioctl(fd
, BTRFS_IOC_TREE_SEARCH
, &args
) < 0)
1876 return negative_errno();
1878 if (args
.key
.nr_items
<= 0)
1881 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh
, body
, args
) {
1883 if (sh
.type
!= BTRFS_ROOT_BACKREF_KEY
)
1885 if (sh
.objectid
!= subvol_id
)
1896 bool btrfs_might_be_subvol(const struct stat
*st
) {
1900 /* Returns true if this 'struct stat' looks like it could refer to a btrfs subvolume. To make a final
1901 * decision, needs to be combined with an fstatfs() check to see if this is actually btrfs. */
1903 return S_ISDIR(st
->st_mode
) && st
->st_ino
== 256;
1906 int btrfs_forget_device(const char *path
) {
1907 _cleanup_close_
int control_fd
= -EBADF
;
1908 struct btrfs_ioctl_vol_args args
= {};
1912 if (strlen(path
) > BTRFS_PATH_NAME_MAX
)
1915 strcpy(args
.name
, path
);
1917 control_fd
= open("/dev/btrfs-control", O_RDWR
|O_CLOEXEC
);
1921 return RET_NERRNO(ioctl(control_fd
, BTRFS_IOC_FORGET_DEV
, &args
));
1924 typedef struct BtrfsStripe
{
1929 typedef struct BtrfsChunk
{
1934 BtrfsStripe
*stripes
;
1936 uint64_t stripe_len
;
1939 typedef struct BtrfsChunkTree
{
1940 BtrfsChunk
**chunks
;
1944 static BtrfsChunk
* btrfs_chunk_free(BtrfsChunk
*chunk
) {
1948 free(chunk
->stripes
);
1950 return mfree(chunk
);
1953 DEFINE_TRIVIAL_CLEANUP_FUNC(BtrfsChunk
*, btrfs_chunk_free
);
1955 static void btrfs_chunk_tree_done(BtrfsChunkTree
*tree
) {
1958 FOREACH_ARRAY(i
, tree
->chunks
, tree
->n_chunks
)
1959 btrfs_chunk_free(*i
);
1964 static int btrfs_read_chunk_tree_fd(int fd
, BtrfsChunkTree
*ret
) {
1966 struct btrfs_ioctl_search_args search_args
= {
1967 .key
.tree_id
= BTRFS_CHUNK_TREE_OBJECTID
,
1969 .key
.min_type
= BTRFS_CHUNK_ITEM_KEY
,
1970 .key
.max_type
= BTRFS_CHUNK_ITEM_KEY
,
1972 .key
.min_objectid
= BTRFS_FIRST_CHUNK_TREE_OBJECTID
,
1973 .key
.max_objectid
= BTRFS_FIRST_CHUNK_TREE_OBJECTID
,
1975 .key
.min_offset
= 0,
1976 .key
.max_offset
= UINT64_MAX
,
1978 .key
.min_transid
= 0,
1979 .key
.max_transid
= UINT64_MAX
,
1982 _cleanup_(btrfs_chunk_tree_done
) BtrfsChunkTree tree
= {};
1987 while (btrfs_ioctl_search_args_compare(&search_args
) <= 0) {
1988 struct btrfs_ioctl_search_header sh
;
1991 search_args
.key
.nr_items
= 256;
1993 if (ioctl(fd
, BTRFS_IOC_TREE_SEARCH
, &search_args
) < 0)
1996 if (search_args
.key
.nr_items
== 0)
1999 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh
, body
, search_args
) {
2000 _cleanup_(btrfs_chunk_freep
) BtrfsChunk
*chunk
= NULL
;
2002 btrfs_ioctl_search_args_set(&search_args
, &sh
);
2004 if (sh
.objectid
!= BTRFS_FIRST_CHUNK_TREE_OBJECTID
)
2006 if (sh
.type
!= BTRFS_CHUNK_ITEM_KEY
)
2009 chunk
= new(BtrfsChunk
, 1);
2013 const struct btrfs_chunk
*item
= body
;
2014 *chunk
= (BtrfsChunk
) {
2015 .offset
= sh
.offset
,
2016 .length
= le64toh(item
->length
),
2017 .type
= le64toh(item
->type
),
2018 .n_stripes
= le16toh(item
->num_stripes
),
2019 .stripe_len
= le64toh(item
->stripe_len
),
2022 chunk
->stripes
= new(BtrfsStripe
, chunk
->n_stripes
);
2023 if (!chunk
->stripes
)
2026 for (size_t j
= 0; j
< chunk
->n_stripes
; j
++) {
2027 const struct btrfs_stripe
*stripe
= &item
->stripe
+ j
;
2029 chunk
->stripes
[j
] = (BtrfsStripe
) {
2030 .devid
= le64toh(stripe
->devid
),
2031 .offset
= le64toh(stripe
->offset
),
2035 if (!GREEDY_REALLOC(tree
.chunks
, tree
.n_chunks
+ 1))
2038 tree
.chunks
[tree
.n_chunks
++] = TAKE_PTR(chunk
);
2041 if (!btrfs_ioctl_search_args_inc(&search_args
))
2045 *ret
= TAKE_STRUCT(tree
);
2049 static BtrfsChunk
* btrfs_find_chunk_from_logical_address(const BtrfsChunkTree
*tree
, uint64_t logical
) {
2050 size_t min_index
, max_index
;
2053 assert(tree
->chunks
|| tree
->n_chunks
== 0);
2055 if (tree
->n_chunks
== 0)
2060 max_index
= tree
->n_chunks
- 1;
2062 while (min_index
<= max_index
) {
2063 size_t mid
= (min_index
+ max_index
) / 2;
2065 if (logical
< tree
->chunks
[mid
]->offset
) {
2069 max_index
= mid
- 1;
2070 } else if (logical
>= tree
->chunks
[mid
]->offset
+ tree
->chunks
[mid
]->length
)
2071 min_index
= mid
+ 1;
2073 return tree
->chunks
[mid
];
2079 static int btrfs_is_nocow_fd(int fd
) {
2085 r
= fd_is_fs_type(fd
, BTRFS_SUPER_MAGIC
);
2091 r
= read_attr_fd(fd
, &flags
);
2095 return FLAGS_SET(flags
, FS_NOCOW_FL
) && !FLAGS_SET(flags
, FS_COMPR_FL
);
2098 int btrfs_get_file_physical_offset_fd(int fd
, uint64_t *ret
) {
2100 struct btrfs_ioctl_search_args search_args
= {
2101 .key
.min_type
= BTRFS_EXTENT_DATA_KEY
,
2102 .key
.max_type
= BTRFS_EXTENT_DATA_KEY
,
2104 .key
.min_offset
= 0,
2105 .key
.max_offset
= UINT64_MAX
,
2107 .key
.min_transid
= 0,
2108 .key
.max_transid
= UINT64_MAX
,
2111 _cleanup_(btrfs_chunk_tree_done
) BtrfsChunkTree tree
= {};
2119 if (fstat(fd
, &st
) < 0)
2122 r
= stat_verify_regular(&st
);
2126 r
= btrfs_is_nocow_fd(fd
);
2130 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
),
2131 "Cannot get physical address for btrfs extent: CoW enabled");
2133 r
= btrfs_subvol_get_id_fd(fd
, &subvol_id
);
2137 r
= btrfs_read_chunk_tree_fd(fd
, &tree
);
2141 search_args
.key
.tree_id
= subvol_id
;
2142 search_args
.key
.min_objectid
= search_args
.key
.max_objectid
= st
.st_ino
;
2144 while (btrfs_ioctl_search_args_compare(&search_args
) <= 0) {
2145 struct btrfs_ioctl_search_header sh
;
2148 search_args
.key
.nr_items
= 256;
2150 if (ioctl(fd
, BTRFS_IOC_TREE_SEARCH
, &search_args
) < 0)
2153 if (search_args
.key
.nr_items
== 0)
2156 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh
, body
, search_args
) {
2157 uint64_t logical_offset
;
2160 btrfs_ioctl_search_args_set(&search_args
, &sh
);
2162 if (sh
.type
!= BTRFS_EXTENT_DATA_KEY
)
2165 if (sh
.objectid
!= st
.st_ino
)
2168 const struct btrfs_file_extent_item
*item
= body
;
2169 if (!IN_SET(item
->type
, BTRFS_FILE_EXTENT_REG
, BTRFS_FILE_EXTENT_PREALLOC
))
2170 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
),
2171 "Cannot get physical address for btrfs extent: invalid type %" PRIu8
,
2174 if (item
->compression
!= 0 || item
->encryption
!= 0 || item
->other_encoding
!= 0)
2175 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
),
2176 "Cannot get physical address for btrfs extent: has incompatible property");
2178 logical_offset
= le64toh(item
->disk_bytenr
);
2179 if (logical_offset
== 0)
2180 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
),
2181 "Cannot get physical address for btrfs extent: failed to get logical offset");
2183 chunk
= btrfs_find_chunk_from_logical_address(&tree
, logical_offset
);
2185 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
),
2186 "Cannot get physical address for btrfs extent: no matching chunk found");
2188 if ((chunk
->type
& BTRFS_BLOCK_GROUP_PROFILE_MASK
) != 0)
2189 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL
),
2190 "Cannot get physical address for btrfs extent: unsupported profile");
2192 uint64_t relative_chunk
, relative_stripe
, stripe_nr
;
2193 uint16_t stripe_index
;
2195 assert(logical_offset
>= chunk
->offset
);
2196 assert(chunk
->n_stripes
> 0);
2197 assert(chunk
->stripe_len
> 0);
2199 relative_chunk
= logical_offset
- chunk
->offset
;
2200 stripe_nr
= relative_chunk
/ chunk
->stripe_len
;
2201 relative_stripe
= relative_chunk
- stripe_nr
* chunk
->stripe_len
;
2202 stripe_index
= stripe_nr
% chunk
->n_stripes
;
2204 *ret
= chunk
->stripes
[stripe_index
].offset
+
2205 stripe_nr
/ chunk
->n_stripes
* chunk
->stripe_len
+
2211 if (!btrfs_ioctl_search_args_inc(&search_args
))