]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/shared/btrfs-util.c
ukify: fix parsing uname version with '+'
[thirdparty/systemd.git] / src / shared / btrfs-util.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
d7c7c334 2
11c3a366 3#include <fcntl.h>
6be15dce
DDM
4#include <linux/btrfs.h>
5#include <linux/btrfs_tree.h>
01234e1f 6#include <linux/magic.h>
11c3a366 7#include <stdio.h>
ac8db36c 8#include <sys/file.h>
11c3a366 9#include <sys/ioctl.h>
11c3a366
TA
10#include <sys/sysmacros.h>
11#include <unistd.h>
12
b5efdb8a 13#include "alloc-util.h"
3ffd4af2 14#include "btrfs-util.h"
24dbe603 15#include "chase.h"
17cbb288 16#include "chattr-util.h"
07630cea 17#include "copy.h"
dc7b1512 18#include "errno-util.h"
3ffd4af2 19#include "fd-util.h"
ef8becfa 20#include "fs-util.h"
93a1f792 21#include "log.h"
d7c7c334 22#include "path-util.h"
17cbb288 23#include "rm-rf.h"
11c3a366 24#include "sparse-endian.h"
872a590e 25#include "stat-util.h"
07630cea 26#include "string-util.h"
93cc7779 27#include "time-util.h"
d7c7c334 28
62572894
LP
29/* WARNING: Be careful with file system ioctls! When we get an fd, we
30 * need to make sure it either refers to only a regular file or
31 * directory, or that it is located on btrfs, before invoking any
32 * btrfs ioctls. The ioctl numbers are reused by some device drivers
33 * (such as DRM), and hence might have bad effects when invoked on
34 * device nodes (that reference drivers) rather than fds to normal
35 * files or directories. */
36
6d2fd8df 37int btrfs_is_subvol_at(int dir_fd, const char *path) {
21222ea5
LP
38 struct stat st;
39
6d2fd8df 40 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
21222ea5 41
cd61c3bf
LP
42 /* On btrfs subvolumes always have the inode 256 */
43
e089efd7 44 if (fstatat(dir_fd, strempty(path), &st, isempty(path) ? AT_EMPTY_PATH : 0) < 0)
d7c7c334
LP
45 return -errno;
46
674b04ff 47 if (!btrfs_might_be_subvol(&st))
d7c7c334
LP
48 return 0;
49
6d2fd8df 50 return is_fs_type_at(dir_fd, path, BTRFS_SUPER_MAGIC);
2904e949
LP
51}
52
77c66be3
DDM
53int btrfs_subvol_set_read_only_at(int dir_fd, const char *path, bool b) {
54 _cleanup_close_ int fd = -EBADF;
d7c7c334 55 uint64_t flags, nflags;
0d6e763b 56 struct stat st;
d7c7c334 57
77c66be3
DDM
58 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
59
e40b11be 60 fd = xopenat(dir_fd, path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
77c66be3
DDM
61 if (fd < 0)
62 return fd;
0d6e763b
LP
63
64 if (fstat(fd, &st) < 0)
d7c7c334
LP
65 return -errno;
66
674b04ff 67 if (!btrfs_might_be_subvol(&st))
0d6e763b
LP
68 return -EINVAL;
69
d7c7c334
LP
70 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
71 return -errno;
72
0da96503 73 nflags = UPDATE_FLAG(flags, BTRFS_SUBVOL_RDONLY, b);
d7c7c334
LP
74 if (flags == nflags)
75 return 0;
76
7c248223 77 return RET_NERRNO(ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags));
d7c7c334
LP
78}
79
10f9c755 80int btrfs_subvol_get_read_only_fd(int fd) {
cd61c3bf 81 uint64_t flags;
62572894
LP
82 struct stat st;
83
84 assert(fd >= 0);
85
86 if (fstat(fd, &st) < 0)
87 return -errno;
88
674b04ff 89 if (!btrfs_might_be_subvol(&st))
62572894 90 return -EINVAL;
cd61c3bf
LP
91
92 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
93 return -errno;
94
95 return !!(flags & BTRFS_SUBVOL_RDONLY);
96}
97
c706b27f 98int btrfs_get_block_device_at(int dir_fd, const char *path, dev_t *ret) {
d7c7c334 99 struct btrfs_ioctl_fs_info_args fsi = {};
c706b27f 100 _cleanup_close_ int fd = -EBADF;
d7c7c334 101 uint64_t id;
62572894 102 int r;
d7c7c334 103
c706b27f
DDM
104 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
105 assert(path);
106 assert(ret);
d7c7c334 107
e40b11be 108 fd = xopenat(dir_fd, path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
698bc186
LP
109 if (fd < 0)
110 return fd;
111
65ddc2c5 112 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
62572894
LP
113 if (r < 0)
114 return r;
79de6eb1 115 if (r == 0)
62572894
LP
116 return -ENOTTY;
117
d7c7c334
LP
118 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
119 return -errno;
120
121 /* We won't do this for btrfs RAID */
66ae5130 122 if (fsi.num_devices != 1) {
c706b27f 123 *ret = 0;
d7c7c334 124 return 0;
66ae5130 125 }
d7c7c334
LP
126
127 for (id = 1; id <= fsi.max_id; id++) {
128 struct btrfs_ioctl_dev_info_args di = {
129 .devid = id,
130 };
131 struct stat st;
132
133 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
134 if (errno == ENODEV)
135 continue;
136
137 return -errno;
138 }
139
67f0ac8c
LP
140 /* For the root fs — when no initrd is involved — btrfs returns /dev/root on any kernels from
141 * the past few years. That sucks, as we have no API to determine the actual root then. let's
142 * return an recognizable error for this case, so that the caller can maybe print a nice
143 * message about this.
144 *
145 * https://bugzilla.kernel.org/show_bug.cgi?id=89721 */
146 if (path_equal((char*) di.path, "/dev/root"))
147 return -EUCLEAN;
148
d7c7c334
LP
149 if (stat((char*) di.path, &st) < 0)
150 return -errno;
151
152 if (!S_ISBLK(st.st_mode))
3468e5ac 153 return -ENOTBLK;
d7c7c334
LP
154
155 if (major(st.st_rdev) == 0)
156 return -ENODEV;
157
c706b27f 158 *ret = st.st_rdev;
d7c7c334
LP
159 return 1;
160 }
161
162 return -ENODEV;
163}
10f9c755
LP
164
165int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
166 struct btrfs_ioctl_ino_lookup_args args = {
167 .objectid = BTRFS_FIRST_FREE_OBJECTID
168 };
62572894 169 int r;
10f9c755
LP
170
171 assert(fd >= 0);
172 assert(ret);
173
65ddc2c5 174 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
62572894
LP
175 if (r < 0)
176 return r;
79de6eb1 177 if (r == 0)
62572894
LP
178 return -ENOTTY;
179
10f9c755
LP
180 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
181 return -errno;
182
183 *ret = args.treeid;
184 return 0;
185}
186
90578cbd 187int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
254d1313 188 _cleanup_close_ int subvol_fd = -EBADF;
90578cbd
LP
189
190 assert(fd >= 0);
191 assert(ret);
192
193 subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
194 if (subvol_fd < 0)
195 return -errno;
196
197 return btrfs_subvol_get_id_fd(subvol_fd, ret);
198}
199
5743a585
LP
200static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
201 assert(args);
202
203 /* the objectid, type, offset together make up the btrfs key,
204 * which is considered a single 136byte integer when
205 * comparing. This call increases the counter by one, dealing
206 * with the overflow between the overflows */
207
f5fbe71d 208 if (args->key.min_offset < UINT64_MAX) {
5743a585
LP
209 args->key.min_offset++;
210 return true;
211 }
212
f5fbe71d 213 if (args->key.min_type < UINT8_MAX) {
5743a585
LP
214 args->key.min_type++;
215 args->key.min_offset = 0;
216 return true;
217 }
218
f5fbe71d 219 if (args->key.min_objectid < UINT64_MAX) {
5743a585
LP
220 args->key.min_objectid++;
221 args->key.min_offset = 0;
222 args->key.min_type = 0;
223 return true;
224 }
225
226 return 0;
227}
228
229static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
230 assert(args);
231 assert(h);
232
233 args->key.min_objectid = h->objectid;
234 args->key.min_type = h->type;
235 args->key.min_offset = h->offset;
236}
237
238static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
90c88092
YW
239 int r;
240
5743a585
LP
241 assert(args);
242
243 /* Compare min and max */
244
90c88092
YW
245 r = CMP(args->key.min_objectid, args->key.max_objectid);
246 if (r != 0)
247 return r;
5743a585 248
90c88092
YW
249 r = CMP(args->key.min_type, args->key.max_type);
250 if (r != 0)
251 return r;
5743a585 252
6dd91b36 253 return CMP(args->key.min_offset, args->key.max_offset);
5743a585
LP
254}
255
801bf40c 256typedef struct BtrfsForeachIterator {
125cca1b
YW
257 const struct btrfs_ioctl_search_args *args;
258 size_t offset;
259 unsigned index;
260 struct btrfs_ioctl_search_header *header;
261 const void **body;
801bf40c
LP
262} BtrfsForeachIterator;
263
125cca1b
YW
264static int btrfs_iterate(BtrfsForeachIterator *i) {
265 assert(i);
266 assert(i->args);
267 assert(i->header);
268 assert(i->body);
269
270 if (i->index >= i->args->key.nr_items)
271 return 0; /* end */
272
273 assert_cc(BTRFS_SEARCH_ARGS_BUFSIZE >= sizeof(struct btrfs_ioctl_search_header));
274 if (i->offset > BTRFS_SEARCH_ARGS_BUFSIZE - sizeof(struct btrfs_ioctl_search_header))
275 return -EBADMSG;
276
277 struct btrfs_ioctl_search_header h;
278 memcpy(&h, (const uint8_t*) i->args->buf + i->offset, sizeof(struct btrfs_ioctl_search_header));
279
280 if (i->offset > BTRFS_SEARCH_ARGS_BUFSIZE - sizeof(struct btrfs_ioctl_search_header) - h.len)
281 return -EBADMSG;
282
283 *i->body = (const uint8_t*) i->args->buf + i->offset + sizeof(struct btrfs_ioctl_search_header);
284 *i->header = h;
285 i->offset += sizeof(struct btrfs_ioctl_search_header) + h.len;
286 i->index++;
287
288 return 1;
289}
290
801bf40c
LP
291/* Iterates through a series of struct btrfs_file_extent_item elements. They are unfortunately not aligned,
292 * hence we copy out the header from them */
125cca1b 293#define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(_sh, _body, _args) \
801bf40c 294 for (BtrfsForeachIterator iterator = { \
125cca1b
YW
295 .args = &(_args), \
296 .header = &(_sh), \
297 .body = &(_body), \
801bf40c 298 }; \
125cca1b 299 btrfs_iterate(&iterator) > 0; )
5743a585 300
5bcd08db 301int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
10f9c755
LP
302 struct btrfs_ioctl_search_args args = {
303 /* Tree of tree roots */
b6b18498 304 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
10f9c755
LP
305
306 /* Look precisely for the subvolume items */
307 .key.min_type = BTRFS_ROOT_ITEM_KEY,
308 .key.max_type = BTRFS_ROOT_ITEM_KEY,
309
10f9c755 310 .key.min_offset = 0,
f5fbe71d 311 .key.max_offset = UINT64_MAX,
5743a585
LP
312
313 /* No restrictions on the other components */
10f9c755 314 .key.min_transid = 0,
f5fbe71d 315 .key.max_transid = UINT64_MAX,
10f9c755
LP
316 };
317
b6b18498 318 bool found = false;
10f9c755
LP
319 int r;
320
321 assert(fd >= 0);
322 assert(ret);
323
abeedbde
LP
324 /* Make sure this works on O_PATH fds */
325 _cleanup_close_ int fd_close = -EBADF;
326 fd = fd_reopen_condition(fd, O_CLOEXEC|O_RDONLY|O_DIRECTORY, O_PATH, &fd_close);
327 if (fd < 0)
328 return fd;
329
5bcd08db
LP
330 if (subvol_id == 0) {
331 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
332 if (r < 0)
333 return r;
334 } else {
65ddc2c5 335 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
336 if (r < 0)
337 return r;
79de6eb1 338 if (r == 0)
5bcd08db
LP
339 return -ENOTTY;
340 }
10f9c755
LP
341
342 args.key.min_objectid = args.key.max_objectid = subvol_id;
10f9c755 343
5743a585 344 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
345 struct btrfs_ioctl_search_header sh;
346 const void *body;
b6b18498
LP
347
348 args.key.nr_items = 256;
349 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
350 return -errno;
351
352 if (args.key.nr_items <= 0)
353 break;
10f9c755 354
801bf40c 355 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
5743a585 356 /* Make sure we start the next search at least from this entry */
801bf40c 357 btrfs_ioctl_search_args_set(&args, &sh);
5743a585 358
801bf40c 359 if (sh.objectid != subvol_id)
b6b18498 360 continue;
801bf40c 361 if (sh.type != BTRFS_ROOT_ITEM_KEY)
b6b18498 362 continue;
5743a585
LP
363
364 /* Older versions of the struct lacked the otime setting */
801bf40c 365 if (sh.len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
b6b18498 366 continue;
10f9c755 367
801bf40c 368 const struct btrfs_root_item *ri = body;
b6b18498
LP
369 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
370 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
10f9c755 371
b6b18498 372 ret->subvol_id = subvol_id;
5d904a6a 373 ret->read_only = le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY;
10f9c755 374
b6b18498
LP
375 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
376 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
377 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
378
379 found = true;
380 goto finish;
381 }
382
5743a585
LP
383 /* Increase search key by one, to read the next item, if we can. */
384 if (!btrfs_ioctl_search_args_inc(&args))
b6b18498
LP
385 break;
386 }
387
388finish:
246caacb 389 return found ? 0 : -ENODATA;
b6b18498
LP
390}
391
5bcd08db 392int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
b6b18498
LP
393
394 struct btrfs_ioctl_search_args args = {
395 /* Tree of quota items */
396 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
397
5743a585
LP
398 /* The object ID is always 0 */
399 .key.min_objectid = 0,
400 .key.max_objectid = 0,
401
b6b18498
LP
402 /* Look precisely for the quota items */
403 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
404 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
405
b6b18498
LP
406 /* No restrictions on the other components */
407 .key.min_transid = 0,
f5fbe71d 408 .key.max_transid = UINT64_MAX,
b6b18498
LP
409 };
410
b6b18498
LP
411 bool found_info = false, found_limit = false;
412 int r;
413
414 assert(fd >= 0);
415 assert(ret);
416
5bcd08db
LP
417 if (qgroupid == 0) {
418 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
419 if (r < 0)
420 return r;
421 } else {
65ddc2c5 422 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
423 if (r < 0)
424 return r;
79de6eb1 425 if (r == 0)
5bcd08db
LP
426 return -ENOTTY;
427 }
b6b18498 428
5bcd08db 429 args.key.min_offset = args.key.max_offset = qgroupid;
b6b18498 430
5743a585 431 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
432 struct btrfs_ioctl_search_header sh;
433 const void *body;
b6b18498
LP
434
435 args.key.nr_items = 256;
12ee6186
LP
436 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
437 if (errno == ENOENT) /* quota tree is missing: quota disabled */
438 break;
439
b6b18498 440 return -errno;
12ee6186 441 }
b6b18498
LP
442
443 if (args.key.nr_items <= 0)
444 break;
445
801bf40c 446 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
b6b18498 447
5743a585 448 /* Make sure we start the next search at least from this entry */
801bf40c 449 btrfs_ioctl_search_args_set(&args, &sh);
b6b18498 450
801bf40c 451 if (sh.objectid != 0)
b6b18498 452 continue;
801bf40c 453 if (sh.offset != qgroupid)
b6b18498
LP
454 continue;
455
801bf40c
LP
456 if (sh.type == BTRFS_QGROUP_INFO_KEY) {
457 const struct btrfs_qgroup_info_item *qii = body;
b6b18498 458
cb81cd80 459 ret->referenced = le64toh(qii->rfer);
b6b18498
LP
460 ret->exclusive = le64toh(qii->excl);
461
462 found_info = true;
463
801bf40c
LP
464 } else if (sh.type == BTRFS_QGROUP_LIMIT_KEY) {
465 const struct btrfs_qgroup_limit_item *qli = body;
b6b18498 466
5bcd08db
LP
467 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
468 ret->referenced_max = le64toh(qli->max_rfer);
469 else
f5fbe71d 470 ret->referenced_max = UINT64_MAX;
5bcd08db
LP
471
472 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
473 ret->exclusive_max = le64toh(qli->max_excl);
474 else
f5fbe71d 475 ret->exclusive_max = UINT64_MAX;
b6b18498
LP
476
477 found_limit = true;
478 }
479
480 if (found_info && found_limit)
481 goto finish;
482 }
483
5743a585
LP
484 /* Increase search key by one, to read the next item, if we can. */
485 if (!btrfs_ioctl_search_args_inc(&args))
b6b18498
LP
486 break;
487 }
488
489finish:
490 if (!found_limit && !found_info)
491 return -ENODATA;
492
493 if (!found_info) {
f5fbe71d
YW
494 ret->referenced = UINT64_MAX;
495 ret->exclusive = UINT64_MAX;
b6b18498
LP
496 }
497
498 if (!found_limit) {
f5fbe71d
YW
499 ret->referenced_max = UINT64_MAX;
500 ret->exclusive_max = UINT64_MAX;
b6b18498 501 }
10f9c755
LP
502
503 return 0;
504}
f27a3864 505
93a1f792
DDM
506int btrfs_log_dev_root(int level, int ret, const char *p) {
507 return log_full_errno(level, ret,
508 "File system behind %s is reported by btrfs to be backed by pseudo-device /dev/root, which is not a valid userspace accessible device node. "
509 "Cannot determine correct backing block device.", p);
510}
511
5bcd08db 512int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
254d1313 513 _cleanup_close_ int fd = -EBADF;
5bcd08db
LP
514
515 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
516 if (fd < 0)
517 return -errno;
518
519 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
520}
521
522int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
f5fbe71d 523 uint64_t level, lowest = UINT64_MAX, lowest_qgroupid = 0;
5bcd08db 524 _cleanup_free_ uint64_t *qgroups = NULL;
cecaba20 525 int r, n;
5bcd08db
LP
526
527 assert(fd >= 0);
528 assert(ret);
529
530 /* This finds the "subtree" qgroup for a specific
531 * subvolume. This only works for subvolumes that have been
532 * prepared with btrfs_subvol_auto_qgroup_fd() with
533 * insert_intermediary_qgroup=true (or equivalent). For others
534 * it will return the leaf qgroup instead. The two cases may
3a258d3a 535 * be distinguished via the return value, which is 1 in case
5bcd08db
LP
536 * an appropriate "subtree" qgroup was found, and 0
537 * otherwise. */
538
539 if (subvol_id == 0) {
540 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
541 if (r < 0)
542 return r;
543 }
544
545 r = btrfs_qgroupid_split(subvol_id, &level, NULL);
546 if (r < 0)
547 return r;
548 if (level != 0) /* Input must be a leaf qgroup */
549 return -EINVAL;
550
551 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
552 if (n < 0)
553 return n;
554
cecaba20 555 for (int i = 0; i < n; i++) {
5bcd08db
LP
556 uint64_t id;
557
558 r = btrfs_qgroupid_split(qgroups[i], &level, &id);
559 if (r < 0)
560 return r;
561
562 if (id != subvol_id)
563 continue;
564
f5fbe71d 565 if (lowest == UINT64_MAX || level < lowest) {
5bcd08db
LP
566 lowest_qgroupid = qgroups[i];
567 lowest = level;
568 }
569 }
570
f5fbe71d 571 if (lowest == UINT64_MAX) {
5bcd08db
LP
572 /* No suitable higher-level qgroup found, let's return
573 * the leaf qgroup instead, and indicate that with the
574 * return value. */
575
576 *ret = subvol_id;
577 return 0;
578 }
579
580 *ret = lowest_qgroupid;
581 return 1;
582}
583
584int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
585 uint64_t qgroupid;
586 int r;
587
588 assert(fd >= 0);
589 assert(ret);
590
591 /* This determines the quota data of the qgroup with the
592 * lowest level, that shares the id part with the specified
593 * subvolume. This is useful for determining the quota data
594 * for entire subvolume subtrees, as long as the subtrees have
595 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
596 * compatible way */
597
598 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
599 if (r < 0)
600 return r;
601
602 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
603}
604
605int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
254d1313 606 _cleanup_close_ int fd = -EBADF;
5bcd08db
LP
607
608 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
609 if (fd < 0)
610 return -errno;
611
612 return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
613}
614
4117366a
YW
615int btrfs_defrag_fd(int fd) {
616 int r;
617
618 assert(fd >= 0);
619
620 r = fd_verify_regular(fd);
621 if (r < 0)
622 return r;
623
624 return RET_NERRNO(ioctl(fd, BTRFS_IOC_DEFRAG, NULL));
625}
626
f27a3864 627int btrfs_defrag(const char *p) {
254d1313 628 _cleanup_close_ int fd = -EBADF;
f27a3864
LP
629
630 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
631 if (fd < 0)
632 return -errno;
633
634 return btrfs_defrag_fd(fd);
635}
754061ce
LP
636
637int btrfs_quota_enable_fd(int fd, bool b) {
638 struct btrfs_ioctl_quota_ctl_args args = {
639 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
640 };
62572894 641 int r;
754061ce
LP
642
643 assert(fd >= 0);
644
65ddc2c5 645 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
62572894
LP
646 if (r < 0)
647 return r;
79de6eb1 648 if (r == 0)
62572894
LP
649 return -ENOTTY;
650
7c248223 651 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args));
754061ce
LP
652}
653
654int btrfs_quota_enable(const char *path, bool b) {
254d1313 655 _cleanup_close_ int fd = -EBADF;
754061ce
LP
656
657 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
658 if (fd < 0)
659 return -errno;
660
661 return btrfs_quota_enable_fd(fd, b);
662}
d6ce17c7 663
5bcd08db
LP
664int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
665
d6ce17c7 666 struct btrfs_ioctl_qgroup_limit_args args = {
5bcd08db 667 .lim.max_rfer = referenced_max,
d6ce17c7
LP
668 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
669 };
62572894 670 int r;
d6ce17c7
LP
671
672 assert(fd >= 0);
673
5bcd08db
LP
674 if (qgroupid == 0) {
675 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
676 if (r < 0)
677 return r;
678 } else {
65ddc2c5 679 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
680 if (r < 0)
681 return r;
79de6eb1 682 if (r == 0)
5bcd08db
LP
683 return -ENOTTY;
684 }
62572894 685
5bcd08db
LP
686 args.qgroupid = qgroupid;
687
cecaba20 688 for (unsigned c = 0;; c++) {
5bcd08db
LP
689 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
690
691 if (errno == EBUSY && c < 10) {
692 (void) btrfs_quota_scan_wait(fd);
693 continue;
694 }
695
696 return -errno;
697 }
698
699 break;
700 }
d6ce17c7
LP
701
702 return 0;
703}
704
5bcd08db 705int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
254d1313 706 _cleanup_close_ int fd = -EBADF;
5bcd08db
LP
707
708 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
709 if (fd < 0)
710 return -errno;
711
712 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
713}
714
715int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
716 uint64_t qgroupid;
717 int r;
718
719 assert(fd >= 0);
720
721 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
722 if (r < 0)
723 return r;
724
725 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
726}
727
728int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
254d1313 729 _cleanup_close_ int fd = -EBADF;
d6ce17c7
LP
730
731 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
732 if (fd < 0)
733 return -errno;
734
5bcd08db 735 return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
d6ce17c7 736}
efe02862 737
5bcd08db 738int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
3f952f92
LP
739 assert(ret);
740
741 if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
742 return -EINVAL;
743
744 if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
745 return -EINVAL;
746
747 *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
748 return 0;
749}
750
5bcd08db
LP
751int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
752 assert(level || id);
753
754 if (level)
755 *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
756
757 if (id)
758 *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
759
760 return 0;
761}
762
763static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
3f952f92
LP
764
765 struct btrfs_ioctl_qgroup_create_args args = {
766 .create = b,
5bcd08db 767 .qgroupid = qgroupid,
3f952f92 768 };
3f952f92
LP
769 int r;
770
65ddc2c5 771 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
3f952f92
LP
772 if (r < 0)
773 return r;
5bcd08db
LP
774 if (r == 0)
775 return -ENOTTY;
776
cecaba20 777 for (unsigned c = 0;; c++) {
5bcd08db
LP
778 if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
779
4b019d2f
LP
780 /* On old kernels if quota is not enabled, we get EINVAL. On newer kernels we get
781 * ENOTCONN. Let's always convert this to ENOTCONN to make this recognizable
782 * everywhere the same way. */
783
784 if (IN_SET(errno, EINVAL, ENOTCONN))
785 return -ENOTCONN;
be6d467c 786
5bcd08db
LP
787 if (errno == EBUSY && c < 10) {
788 (void) btrfs_quota_scan_wait(fd);
789 continue;
790 }
791
792 return -errno;
793 }
3f952f92 794
5bcd08db
LP
795 break;
796 }
797
798 return 0;
799}
800
801int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
802 return qgroup_create_or_destroy(fd, true, qgroupid);
803}
804
805int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
806 return qgroup_create_or_destroy(fd, false, qgroupid);
807}
808
809int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
810 _cleanup_free_ uint64_t *qgroups = NULL;
811 uint64_t subvol_id;
cecaba20 812 int n, r;
5bcd08db
LP
813
814 /* Destroys the specified qgroup, but unassigns it from all
815 * its parents first. Also, it recursively destroys all
1b2a7d92 816 * qgroups it is assigned to that have the same id part of the
5bcd08db
LP
817 * qgroupid as the specified group. */
818
819 r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
820 if (r < 0)
821 return r;
822
823 n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
824 if (n < 0)
825 return n;
826
cecaba20 827 for (int i = 0; i < n; i++) {
5bcd08db
LP
828 uint64_t id;
829
830 r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
831 if (r < 0)
832 return r;
833
834 r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
835 if (r < 0)
836 return r;
837
838 if (id != subvol_id)
839 continue;
840
841 /* The parent qgroupid shares the same id part with
842 * us? If so, destroy it too. */
843
844 (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
845 }
846
847 return btrfs_qgroup_destroy(fd, qgroupid);
848}
849
850int btrfs_quota_scan_start(int fd) {
851 struct btrfs_ioctl_quota_rescan_args args = {};
852
853 assert(fd >= 0);
854
7c248223 855 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args));
3f952f92
LP
856}
857
5bcd08db
LP
858int btrfs_quota_scan_wait(int fd) {
859 assert(fd >= 0);
860
7c248223 861 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT));
3f952f92
LP
862}
863
5bcd08db
LP
864int btrfs_quota_scan_ongoing(int fd) {
865 struct btrfs_ioctl_quota_rescan_args args = {};
866
867 assert(fd >= 0);
868
869 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
870 return -errno;
871
872 return !!args.flags;
3f952f92
LP
873}
874
5bcd08db
LP
875static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
876 struct btrfs_ioctl_qgroup_assign_args args = {
877 .assign = b,
878 .src = child,
879 .dst = parent,
880 };
5bcd08db
LP
881 int r;
882
65ddc2c5 883 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
884 if (r < 0)
885 return r;
886 if (r == 0)
887 return -ENOTTY;
888
cecaba20 889 for (unsigned c = 0;; c++) {
5bcd08db
LP
890 r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
891 if (r < 0) {
892 if (errno == EBUSY && c < 10) {
893 (void) btrfs_quota_scan_wait(fd);
894 continue;
895 }
896
897 return -errno;
898 }
899
900 if (r == 0)
901 return 0;
902
903 /* If the return value is > 0, we need to request a rescan */
904
905 (void) btrfs_quota_scan_start(fd);
906 return 1;
907 }
908}
909
910int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
911 return qgroup_assign_or_unassign(fd, true, child, parent);
912}
913
914int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
915 return qgroup_assign_or_unassign(fd, false, child, parent);
916}
917
918static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
d9e2daaf
LP
919 struct btrfs_ioctl_search_args args = {
920 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
921
922 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
923 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
924
925 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
926 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
927
928 .key.min_transid = 0,
f5fbe71d 929 .key.max_transid = UINT64_MAX,
d9e2daaf
LP
930 };
931
932 struct btrfs_ioctl_vol_args vol_args = {};
254d1313 933 _cleanup_close_ int subvol_fd = -EBADF;
62572894 934 struct stat st;
3986b258 935 bool made_writable = false;
d9e2daaf
LP
936 int r;
937
938 assert(fd >= 0);
939 assert(subvolume);
940
62572894
LP
941 if (fstat(fd, &st) < 0)
942 return -errno;
943
944 if (!S_ISDIR(st.st_mode))
945 return -EINVAL;
946
f7c9f4a2 947 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
d9e2daaf
LP
948 if (subvol_fd < 0)
949 return -errno;
950
ae1940d2
LP
951 /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY
952 * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return
953 * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we
954 * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a
955 * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case
956 * let's prefer ENOTTY over EPERM/EACCES though. */
957 r = btrfs_is_subvol_fd(subvol_fd);
958 if (r < 0)
959 return r;
960 if (r == 0) /* Not a btrfs subvolume */
961 return -ENOTTY;
962
d9e2daaf
LP
963 if (subvol_id == 0) {
964 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
965 if (r < 0)
966 return r;
967 }
968
3f952f92
LP
969 /* First, try to remove the subvolume. If it happens to be
970 * already empty, this will just work. */
971 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
972 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
5bcd08db 973 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
3f952f92
LP
974 return 0;
975 }
5bcd08db 976 if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
3f952f92
LP
977 return -errno;
978
979 /* OK, the subvolume is not empty, let's look for child
980 * subvolumes, and remove them, first */
981
d9e2daaf
LP
982 args.key.min_offset = args.key.max_offset = subvol_id;
983
984 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
985 struct btrfs_ioctl_search_header sh;
986 const void *body;
d9e2daaf
LP
987
988 args.key.nr_items = 256;
989 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
990 return -errno;
991
992 if (args.key.nr_items <= 0)
993 break;
994
801bf40c 995 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
d9e2daaf 996 _cleanup_free_ char *p = NULL;
d9e2daaf 997
801bf40c 998 btrfs_ioctl_search_args_set(&args, &sh);
d9e2daaf 999
801bf40c 1000 if (sh.type != BTRFS_ROOT_BACKREF_KEY)
d9e2daaf 1001 continue;
801bf40c 1002 if (sh.offset != subvol_id)
d9e2daaf
LP
1003 continue;
1004
801bf40c 1005 const struct btrfs_root_ref *ref = body;
e5c41c61 1006 p = memdup_suffix0((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
d9e2daaf
LP
1007 if (!p)
1008 return -ENOMEM;
1009
41ab8c67
LP
1010 struct btrfs_ioctl_ino_lookup_args ino_args = {
1011 .treeid = subvol_id,
1012 .objectid = htole64(ref->dirid),
1013 };
d9e2daaf
LP
1014
1015 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1016 return -errno;
1017
3986b258
LP
1018 if (!made_writable) {
1019 r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
1020 if (r < 0)
1021 return r;
1022
1023 made_writable = true;
1024 }
1025
d9e2daaf
LP
1026 if (isempty(ino_args.name))
1027 /* Subvolume is in the top-level
1028 * directory of the subvolume. */
801bf40c 1029 r = subvol_remove_children(subvol_fd, p, sh.objectid, flags);
d9e2daaf 1030 else {
254d1313 1031 _cleanup_close_ int child_fd = -EBADF;
d9e2daaf
LP
1032
1033 /* Subvolume is somewhere further down,
1034 * hence we need to open the
1035 * containing directory first */
1036
f7c9f4a2 1037 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
d9e2daaf
LP
1038 if (child_fd < 0)
1039 return -errno;
1040
801bf40c 1041 r = subvol_remove_children(child_fd, p, sh.objectid, flags);
d9e2daaf
LP
1042 }
1043 if (r < 0)
1044 return r;
1045 }
1046
1047 /* Increase search key by one, to read the next item, if we can. */
1048 if (!btrfs_ioctl_search_args_inc(&args))
1049 break;
1050 }
1051
1052 /* OK, the child subvolumes should all be gone now, let's try
1053 * again to remove the subvolume */
1054 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
1055 return -errno;
1056
5bcd08db 1057 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
d9e2daaf
LP
1058 return 0;
1059}
1060
24dbe603 1061int btrfs_subvol_remove_at(int dir_fd, const char *path, BtrfsRemoveFlags flags) {
03469b77 1062 _cleanup_free_ char *subvolume = NULL;
254d1313 1063 _cleanup_close_ int fd = -EBADF;
d9e2daaf
LP
1064 int r;
1065
1066 assert(path);
1067
24dbe603 1068 fd = chase_and_openat(dir_fd, path, CHASE_PARENT|CHASE_EXTRACT_FILENAME, O_CLOEXEC, &subvolume);
d9e2daaf
LP
1069 if (fd < 0)
1070 return fd;
1071
e54c79cc 1072 r = btrfs_validate_subvolume_name(subvolume);
24dbe603
DDM
1073 if (r < 0)
1074 return r;
5bcd08db 1075
5bcd08db
LP
1076 return subvol_remove_children(fd, subvolume, 0, flags);
1077}
1078
1079int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
1080
1081 struct btrfs_ioctl_search_args args = {
1082 /* Tree of quota items */
1083 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1084
1085 /* The object ID is always 0 */
1086 .key.min_objectid = 0,
1087 .key.max_objectid = 0,
1088
1089 /* Look precisely for the quota items */
1090 .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
1091 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
1092
1093 /* For our qgroup */
1094 .key.min_offset = old_qgroupid,
1095 .key.max_offset = old_qgroupid,
1096
1097 /* No restrictions on the other components */
1098 .key.min_transid = 0,
f5fbe71d 1099 .key.max_transid = UINT64_MAX,
5bcd08db
LP
1100 };
1101
1102 int r;
1103
65ddc2c5 1104 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
1105 if (r < 0)
1106 return r;
79de6eb1 1107 if (r == 0)
5bcd08db
LP
1108 return -ENOTTY;
1109
1110 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
1111 struct btrfs_ioctl_search_header sh;
1112 const void *body;
5bcd08db
LP
1113
1114 args.key.nr_items = 256;
12ee6186
LP
1115 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1116 if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
1117 break;
1118
5bcd08db 1119 return -errno;
12ee6186 1120 }
5bcd08db
LP
1121
1122 if (args.key.nr_items <= 0)
1123 break;
1124
801bf40c 1125 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
5bcd08db
LP
1126 struct btrfs_ioctl_qgroup_limit_args qargs;
1127 unsigned c;
1128
1129 /* Make sure we start the next search at least from this entry */
801bf40c 1130 btrfs_ioctl_search_args_set(&args, &sh);
5bcd08db 1131
801bf40c 1132 if (sh.objectid != 0)
5bcd08db 1133 continue;
801bf40c 1134 if (sh.type != BTRFS_QGROUP_LIMIT_KEY)
5bcd08db 1135 continue;
801bf40c 1136 if (sh.offset != old_qgroupid)
5bcd08db
LP
1137 continue;
1138
1139 /* We found the entry, now copy things over. */
1140
801bf40c 1141 const struct btrfs_qgroup_limit_item *qli = body;
5bcd08db
LP
1142 qargs = (struct btrfs_ioctl_qgroup_limit_args) {
1143 .qgroupid = new_qgroupid,
1144
1145 .lim.max_rfer = le64toh(qli->max_rfer),
1146 .lim.max_excl = le64toh(qli->max_excl),
1147 .lim.rsv_rfer = le64toh(qli->rsv_rfer),
1148 .lim.rsv_excl = le64toh(qli->rsv_excl),
1149
1150 .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
1151 BTRFS_QGROUP_LIMIT_MAX_EXCL|
1152 BTRFS_QGROUP_LIMIT_RSV_RFER|
1153 BTRFS_QGROUP_LIMIT_RSV_EXCL),
1154 };
1155
1156 for (c = 0;; c++) {
1157 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
1158 if (errno == EBUSY && c < 10) {
1159 (void) btrfs_quota_scan_wait(fd);
1160 continue;
1161 }
1162 return -errno;
1163 }
1164
1165 break;
1166 }
1167
1168 return 1;
1169 }
1170
1171 /* Increase search key by one, to read the next item, if we can. */
1172 if (!btrfs_ioctl_search_args_inc(&args))
1173 break;
1174 }
1175
1176 return 0;
d9e2daaf
LP
1177}
1178
5bcd08db
LP
1179static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
1180 _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
1181 bool copy_from_parent = false, insert_intermediary_qgroup = false;
cecaba20 1182 int n_old_qgroups, n_old_parent_qgroups, r;
5bcd08db
LP
1183 uint64_t old_parent_id;
1184
1185 assert(fd >= 0);
1186
1187 /* Copies a reduced form of quota information from the old to
1188 * the new subvolume. */
1189
1190 n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
1191 if (n_old_qgroups <= 0) /* Nothing to copy */
1192 return n_old_qgroups;
1193
5e30e6e2
LB
1194 assert(old_qgroups); /* Coverity gets confused by the macro iterator allocating this, add a hint */
1195
5bcd08db 1196 r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
08c77cf3
LP
1197 if (r == -ENXIO)
1198 /* We have no parent, hence nothing to copy. */
1199 n_old_parent_qgroups = 0;
1200 else if (r < 0)
5bcd08db 1201 return r;
08c77cf3
LP
1202 else {
1203 n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
1204 if (n_old_parent_qgroups < 0)
1205 return n_old_parent_qgroups;
1206 }
5bcd08db 1207
cecaba20 1208 for (int i = 0; i < n_old_qgroups; i++) {
5bcd08db 1209 uint64_t id;
5bcd08db
LP
1210
1211 r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
1212 if (r < 0)
1213 return r;
1214
1215 if (id == old_subvol_id) {
1216 /* The old subvolume was member of a qgroup
1217 * that had the same id, but a different level
1218 * as it self. Let's set up something similar
1219 * in the destination. */
1220 insert_intermediary_qgroup = true;
1221 break;
1222 }
1223
cecaba20 1224 for (int j = 0; j < n_old_parent_qgroups; j++)
d46b79bb 1225 if (old_parent_qgroups[j] == old_qgroups[i])
5bcd08db
LP
1226 /* The old subvolume shared a common
1227 * parent qgroup with its parent
1228 * subvolume. Let's set up something
1229 * similar in the destination. */
1230 copy_from_parent = true;
5bcd08db
LP
1231 }
1232
1233 if (!insert_intermediary_qgroup && !copy_from_parent)
1234 return 0;
1235
1236 return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
1237}
1238
1239static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
1240 uint64_t old_subtree_qgroup, new_subtree_qgroup;
1241 bool changed;
1242 int r;
1243
1244 /* First copy the leaf limits */
1245 r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
1246 if (r < 0)
1247 return r;
1248 changed = r > 0;
1249
1250 /* Then, try to copy the subtree limits, if there are any. */
1251 r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
1252 if (r < 0)
1253 return r;
1254 if (r == 0)
1255 return changed;
1256
1257 r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
1258 if (r < 0)
1259 return r;
1260 if (r == 0)
1261 return changed;
1262
1263 r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
1264 if (r != 0)
1265 return r;
1266
1267 return changed;
d9e2daaf 1268}
f70a17f8 1269
b3cade0c
LP
1270static int subvol_snapshot_children(
1271 int old_fd,
1272 int new_fd,
1273 const char *subvolume,
1274 uint64_t old_subvol_id,
1275 BtrfsSnapshotFlags flags) {
f70a17f8
LP
1276
1277 struct btrfs_ioctl_search_args args = {
1278 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1279
1280 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1281 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1282
1283 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1284 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1285
1286 .key.min_transid = 0,
f5fbe71d 1287 .key.max_transid = UINT64_MAX,
f70a17f8
LP
1288 };
1289
1290 struct btrfs_ioctl_vol_args_v2 vol_args = {
1291 .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
1292 .fd = old_fd,
1293 };
254d1313 1294 _cleanup_close_ int subvolume_fd = -EBADF;
90578cbd
LP
1295 uint64_t new_subvol_id;
1296 int r;
f70a17f8
LP
1297
1298 assert(old_fd >= 0);
1299 assert(new_fd >= 0);
1300 assert(subvolume);
1301
1302 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
f70a17f8
LP
1303
1304 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
1305 return -errno;
1306
d54f60c2
DDM
1307 if (FLAGS_SET(flags, BTRFS_SNAPSHOT_LOCK_BSD)) {
1308 subvolume_fd = xopenat_lock(new_fd, subvolume,
1309 O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW,
d54f60c2
DDM
1310 LOCK_BSD,
1311 LOCK_EX);
1312 if (subvolume_fd < 0)
1313 return subvolume_fd;
1314
1315 r = btrfs_is_subvol_fd(subvolume_fd);
1316 if (r < 0)
1317 return r;
1318 if (r == 0)
1319 return -EEXIST;
1320 }
1321
5bcd08db
LP
1322 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
1323 !(flags & BTRFS_SNAPSHOT_QUOTA))
d54f60c2 1324 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
f70a17f8 1325
90578cbd
LP
1326 if (old_subvol_id == 0) {
1327 r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
f70a17f8
LP
1328 if (r < 0)
1329 return r;
1330 }
1331
90578cbd
LP
1332 r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
1333 if (r < 0)
1334 return r;
1335
5bcd08db
LP
1336 if (flags & BTRFS_SNAPSHOT_QUOTA)
1337 (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
1338
1339 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
1340
1341 if (flags & BTRFS_SNAPSHOT_QUOTA)
1342 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1343
d54f60c2 1344 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
5bcd08db
LP
1345 }
1346
90578cbd 1347 args.key.min_offset = args.key.max_offset = old_subvol_id;
f70a17f8
LP
1348
1349 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
1350 struct btrfs_ioctl_search_header sh;
1351 const void *body;
f70a17f8
LP
1352
1353 args.key.nr_items = 256;
1354 if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1355 return -errno;
1356
1357 if (args.key.nr_items <= 0)
1358 break;
1359
801bf40c 1360 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
f70a17f8 1361 _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
254d1313 1362 _cleanup_close_ int old_child_fd = -EBADF, new_child_fd = -EBADF;
f70a17f8 1363
801bf40c 1364 btrfs_ioctl_search_args_set(&args, &sh);
f70a17f8 1365
801bf40c 1366 if (sh.type != BTRFS_ROOT_BACKREF_KEY)
f70a17f8 1367 continue;
90578cbd 1368
801bf40c
LP
1369 /* Avoid finding the source subvolume a second time */
1370 if (sh.offset != old_subvol_id)
f70a17f8
LP
1371 continue;
1372
801bf40c
LP
1373 /* Avoid running into loops if the new subvolume is below the old one. */
1374 if (sh.objectid == new_subvol_id)
90578cbd 1375 continue;
f70a17f8 1376
801bf40c 1377 const struct btrfs_root_ref *ref = body;
e5c41c61 1378 p = memdup_suffix0((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
f70a17f8
LP
1379 if (!p)
1380 return -ENOMEM;
1381
41ab8c67
LP
1382 struct btrfs_ioctl_ino_lookup_args ino_args = {
1383 .treeid = old_subvol_id,
1384 .objectid = htole64(ref->dirid),
1385 };
f70a17f8
LP
1386
1387 if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1388 return -errno;
1389
b910cc72 1390 c = path_join(ino_args.name, p);
f70a17f8
LP
1391 if (!c)
1392 return -ENOMEM;
1393
f7c9f4a2 1394 old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
f70a17f8
LP
1395 if (old_child_fd < 0)
1396 return -errno;
1397
657ee2d8 1398 np = path_join(subvolume, ino_args.name);
f70a17f8
LP
1399 if (!np)
1400 return -ENOMEM;
1401
f7c9f4a2 1402 new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
f70a17f8
LP
1403 if (new_child_fd < 0)
1404 return -errno;
1405
ffb296b2 1406 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
801bf40c
LP
1407 /* If the snapshot is read-only we need to mark it writable temporarily, to
1408 * put the subsnapshot into place. */
ffb296b2
LP
1409
1410 if (subvolume_fd < 0) {
f7c9f4a2 1411 subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
ffb296b2
LP
1412 if (subvolume_fd < 0)
1413 return -errno;
1414 }
1415
1416 r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
1417 if (r < 0)
1418 return r;
1419 }
1420
801bf40c
LP
1421 /* When btrfs clones the subvolumes, child subvolumes appear as empty
1422 * directories. Remove them, so that we can create a new snapshot in their place */
ffb296b2
LP
1423 if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
1424 int k = -errno;
1425
1426 if (flags & BTRFS_SNAPSHOT_READ_ONLY)
1427 (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1428
1429 return k;
1430 }
f70a17f8 1431
801bf40c 1432 r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh.objectid,
d54f60c2 1433 flags & ~(BTRFS_SNAPSHOT_FALLBACK_COPY|BTRFS_SNAPSHOT_LOCK_BSD));
ffb296b2
LP
1434
1435 /* Restore the readonly flag */
1436 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1437 int k;
1438
1439 k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1440 if (r >= 0 && k < 0)
1441 return k;
1442 }
1443
f70a17f8
LP
1444 if (r < 0)
1445 return r;
1446 }
1447
1448 /* Increase search key by one, to read the next item, if we can. */
1449 if (!btrfs_ioctl_search_args_inc(&args))
1450 break;
1451 }
1452
5bcd08db
LP
1453 if (flags & BTRFS_SNAPSHOT_QUOTA)
1454 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1455
d54f60c2 1456 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
f70a17f8
LP
1457}
1458
fab4ef72
DDM
1459int btrfs_subvol_snapshot_at_full(
1460 int dir_fdf,
1461 const char *from,
1462 int dir_fdt,
1463 const char *to,
b3cade0c
LP
1464 BtrfsSnapshotFlags flags,
1465 copy_progress_path_t progress_path,
1466 copy_progress_bytes_t progress_bytes,
1467 void *userdata) {
1468
03469b77 1469 _cleanup_free_ char *subvolume = NULL;
d54f60c2 1470 _cleanup_close_ int old_fd = -EBADF, new_fd = -EBADF, subvolume_fd = -EBADF;
f70a17f8
LP
1471 int r;
1472
fab4ef72
DDM
1473 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1474 assert(dir_fdt >= 0 || dir_fdt == AT_FDCWD);
1475 assert(to);
1476
e40b11be 1477 old_fd = xopenat(dir_fdf, from, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
fab4ef72
DDM
1478 if (old_fd < 0)
1479 return old_fd;
f70a17f8 1480
fab4ef72
DDM
1481 new_fd = chase_and_openat(dir_fdt, to, CHASE_PARENT|CHASE_EXTRACT_FILENAME, O_CLOEXEC, &subvolume);
1482 if (new_fd < 0)
1483 return new_fd;
1484
e54c79cc 1485 r = btrfs_validate_subvolume_name(subvolume);
fab4ef72
DDM
1486 if (r < 0)
1487 return r;
1488
1489 r = btrfs_is_subvol_at(dir_fdf, from);
f70a17f8
LP
1490 if (r < 0)
1491 return r;
1492 if (r == 0) {
17cbb288
LP
1493 bool plain_directory = false;
1494
1495 /* If the source isn't a proper subvolume, fail unless fallback is requested */
f70a17f8
LP
1496 if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
1497 return -EISDIR;
1498
e54c79cc 1499 r = btrfs_subvol_make(new_fd, subvolume);
08b8e913
DL
1500 if (r < 0) {
1501 if (ERRNO_IS_NOT_SUPPORTED(r) && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
1502 /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
1503 if (mkdirat(new_fd, subvolume, 0755) < 0)
1504 return -errno;
17cbb288 1505
08b8e913
DL
1506 plain_directory = true;
1507 } else
1508 return r;
1509 }
f70a17f8 1510
d54f60c2
DDM
1511 if (FLAGS_SET(flags, BTRFS_SNAPSHOT_LOCK_BSD)) {
1512 subvolume_fd = xopenat_lock(new_fd, subvolume,
1513 O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW,
d54f60c2
DDM
1514 LOCK_BSD,
1515 LOCK_EX);
1516 if (subvolume_fd < 0)
1517 return subvolume_fd;
1518
1519 if (!plain_directory) {
1520 r = btrfs_is_subvol_fd(subvolume_fd);
1521 if (r < 0)
1522 return r;
1523 if (r == 0)
1524 return -EEXIST;
1525 }
1526 }
1527
f9f70e06 1528 r = copy_directory_at_full(
fab4ef72
DDM
1529 dir_fdf, from,
1530 new_fd, subvolume,
28ba7e36
LP
1531 COPY_MERGE_EMPTY|
1532 COPY_REFLINK|
1533 COPY_SAME_MOUNT|
1534 COPY_HARDLINKS|
23e026de 1535 COPY_ALL_XATTRS|
28ba7e36
LP
1536 (FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGINT) ? COPY_SIGINT : 0)|
1537 (FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGTERM) ? COPY_SIGTERM : 0),
1538 progress_path,
1539 progress_bytes,
1540 userdata);
17cbb288
LP
1541 if (r < 0)
1542 goto fallback_fail;
f70a17f8
LP
1543
1544 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
17cbb288
LP
1545
1546 if (plain_directory) {
1547 /* Plain directories have no recursive read-only flag, but something pretty close to
1548 * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
1549
1550 if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
a997f338 1551 (void) chattr_at(new_fd, subvolume, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL);
17cbb288 1552 } else {
fab4ef72 1553 r = btrfs_subvol_set_read_only_at(new_fd, subvolume, true);
17cbb288
LP
1554 if (r < 0)
1555 goto fallback_fail;
f70a17f8
LP
1556 }
1557 }
1558
d54f60c2 1559 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
17cbb288
LP
1560
1561 fallback_fail:
fab4ef72 1562 (void) rm_rf_at(new_fd, subvolume, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
17cbb288 1563 return r;
f70a17f8
LP
1564 }
1565
f70a17f8
LP
1566 return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
1567}
1568
5bcd08db
LP
1569int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
1570
1571 struct btrfs_ioctl_search_args args = {
1572 /* Tree of quota items */
1573 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1574
1575 /* Look precisely for the quota relation items */
1576 .key.min_type = BTRFS_QGROUP_RELATION_KEY,
1577 .key.max_type = BTRFS_QGROUP_RELATION_KEY,
1578
1579 /* No restrictions on the other components */
1580 .key.min_offset = 0,
f5fbe71d 1581 .key.max_offset = UINT64_MAX,
5bcd08db
LP
1582
1583 .key.min_transid = 0,
f5fbe71d 1584 .key.max_transid = UINT64_MAX,
5bcd08db
LP
1585 };
1586
1587 _cleanup_free_ uint64_t *items = NULL;
319a4f4b 1588 size_t n_items = 0;
5bcd08db
LP
1589 int r;
1590
1591 assert(fd >= 0);
1592 assert(ret);
1593
1594 if (qgroupid == 0) {
1595 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
1596 if (r < 0)
1597 return r;
1598 } else {
65ddc2c5 1599 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
1600 if (r < 0)
1601 return r;
79de6eb1 1602 if (r == 0)
5bcd08db
LP
1603 return -ENOTTY;
1604 }
1605
1606 args.key.min_objectid = args.key.max_objectid = qgroupid;
1607
1608 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
1609 struct btrfs_ioctl_search_header sh;
1610 _unused_ const void *body;
5bcd08db
LP
1611
1612 args.key.nr_items = 256;
12ee6186
LP
1613 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1614 if (errno == ENOENT) /* quota tree missing: quota is disabled */
1615 break;
1616
5bcd08db 1617 return -errno;
12ee6186 1618 }
5bcd08db
LP
1619
1620 if (args.key.nr_items <= 0)
1621 break;
1622
801bf40c 1623 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
5bcd08db
LP
1624
1625 /* Make sure we start the next search at least from this entry */
801bf40c 1626 btrfs_ioctl_search_args_set(&args, &sh);
5bcd08db 1627
801bf40c 1628 if (sh.type != BTRFS_QGROUP_RELATION_KEY)
5bcd08db 1629 continue;
801bf40c 1630 if (sh.offset < sh.objectid)
5bcd08db 1631 continue;
801bf40c 1632 if (sh.objectid != qgroupid)
5bcd08db
LP
1633 continue;
1634
319a4f4b 1635 if (!GREEDY_REALLOC(items, n_items+1))
5bcd08db
LP
1636 return -ENOMEM;
1637
801bf40c 1638 items[n_items++] = sh.offset;
5bcd08db
LP
1639 }
1640
1641 /* Increase search key by one, to read the next item, if we can. */
1642 if (!btrfs_ioctl_search_args_inc(&args))
1643 break;
1644 }
1645
ab0137b4
ZJS
1646 assert((n_items > 0) == !!items);
1647 assert(n_items <= INT_MAX);
5bcd08db 1648
1cc6c93a 1649 *ret = TAKE_PTR(items);
5bcd08db
LP
1650 return (int) n_items;
1651}
1652
1653int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
1654 _cleanup_free_ uint64_t *qgroups = NULL;
254d1313 1655 _cleanup_close_ int real_fd = -EBADF;
5bcd08db
LP
1656 uint64_t parent_subvol;
1657 bool changed = false;
1658 int n = 0, r;
1659
1660 assert(fd >= 0);
1661
1662 /*
1663 * Sets up the specified subvolume's qgroup automatically in
1664 * one of two ways:
1665 *
1666 * If insert_intermediary_qgroup is false, the subvolume's
1667 * leaf qgroup will be assigned to the same parent qgroups as
1668 * the subvolume's parent subvolume.
1669 *
1670 * If insert_intermediary_qgroup is true a new intermediary
1671 * higher-level qgroup is created, with a higher level number,
1672 * but reusing the id of the subvolume. The level number is
1673 * picked as one smaller than the lowest level qgroup the
1674 * parent subvolume is a member of. If the parent subvolume's
1675 * leaf qgroup is assigned to no higher-level qgroup a new
1676 * qgroup of level 255 is created instead. Either way, the new
1677 * qgroup is then assigned to the parent's higher-level
1678 * qgroup, and the subvolume itself is assigned to it.
1679 *
1680 * If the subvolume is already assigned to a higher level
1681 * qgroup, no operation is executed.
1682 *
1683 * Effectively this means: regardless if
1684 * insert_intermediary_qgroup is true or not, after this
1685 * function is invoked the subvolume will be accounted within
1686 * the same qgroups as the parent. However, if it is true, it
1687 * will also get its own higher-level qgroup, which may in
1688 * turn be used by subvolumes created beneath this subvolume
1689 * later on.
1690 *
1691 * This hence defines a simple default qgroup setup for
1692 * subvolumes, as long as this function is invoked on each
1693 * created subvolume: each subvolume is always accounting
1694 * together with its immediate parents. Optionally, if
1695 * insert_intermediary_qgroup is true, it will also get a
1696 * qgroup that then includes all its own child subvolumes.
1697 */
1698
e6d1d4c0
LP
1699 /* Turn this into a proper fd, if it is currently O_PATH */
1700 fd = fd_reopen_condition(fd, O_RDONLY|O_CLOEXEC, O_PATH, &real_fd);
1701 if (fd < 0)
1702 return fd;
1703
5bcd08db 1704 if (subvol_id == 0) {
2904e949 1705 r = btrfs_is_subvol_fd(fd);
5bcd08db
LP
1706 if (r < 0)
1707 return r;
1708 if (!r)
1709 return -ENOTTY;
1710
1711 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1712 if (r < 0)
1713 return r;
1714 }
1715
1716 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
1717 if (n < 0)
1718 return n;
1719 if (n > 0) /* already parent qgroups set up, let's bail */
1720 return 0;
1721
08c77cf3
LP
1722 qgroups = mfree(qgroups);
1723
5bcd08db 1724 r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
08c77cf3
LP
1725 if (r == -ENXIO)
1726 /* No parent, hence no qgroup memberships */
1727 n = 0;
1728 else if (r < 0)
5bcd08db 1729 return r;
08c77cf3
LP
1730 else {
1731 n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
1732 if (n < 0)
1733 return n;
1734 }
5bcd08db
LP
1735
1736 if (insert_intermediary_qgroup) {
1737 uint64_t lowest = 256, new_qgroupid;
1738 bool created = false;
5bcd08db
LP
1739
1740 /* Determine the lowest qgroup that the parent
1741 * subvolume is assigned to. */
1742
cecaba20 1743 for (int i = 0; i < n; i++) {
5bcd08db
LP
1744 uint64_t level;
1745
1746 r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
1747 if (r < 0)
1748 return r;
1749
1750 if (level < lowest)
1751 lowest = level;
1752 }
1753
1754 if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1755 return -EBUSY;
1756
1757 r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
1758 if (r < 0)
1759 return r;
1760
1761 /* Create the new intermediary group, unless it already exists */
1762 r = btrfs_qgroup_create(fd, new_qgroupid);
1763 if (r < 0 && r != -EEXIST)
1764 return r;
1765 if (r >= 0)
1766 changed = created = true;
1767
cecaba20 1768 for (int i = 0; i < n; i++) {
5bcd08db
LP
1769 r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
1770 if (r < 0 && r != -EEXIST) {
1771 if (created)
1772 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1773
1774 return r;
1775 }
1776 if (r >= 0)
1777 changed = true;
1778 }
1779
1780 r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
1781 if (r < 0 && r != -EEXIST) {
1782 if (created)
1783 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1784 return r;
1785 }
1786 if (r >= 0)
1787 changed = true;
1788
1789 } else {
1790 int i;
1791
1792 /* Assign our subvolume to all the same qgroups as the parent */
1793
1794 for (i = 0; i < n; i++) {
1795 r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
1796 if (r < 0 && r != -EEXIST)
1797 return r;
1798 if (r >= 0)
1799 changed = true;
1800 }
1801 }
1802
1803 return changed;
1804}
1805
1806int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
254d1313 1807 _cleanup_close_ int fd = -EBADF;
5bcd08db
LP
1808
1809 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1810 if (fd < 0)
1811 return -errno;
1812
1813 return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
1814}
1815
3799fa80
DDM
1816int btrfs_subvol_make_default(const char *path) {
1817 _cleanup_close_ int fd = -EBADF;
1818 uint64_t id;
1819 int r;
1820
1821 assert(path);
1822
1823 fd = open(path, O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1824 if (fd < 0)
1825 return -errno;
1826
1827 r = btrfs_subvol_get_id_fd(fd, &id);
1828 if (r < 0)
1829 return r;
1830
1831 return RET_NERRNO(ioctl(fd, BTRFS_IOC_DEFAULT_SUBVOL, &id));
1832}
1833
5bcd08db
LP
1834int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
1835
1836 struct btrfs_ioctl_search_args args = {
1837 /* Tree of tree roots */
1838 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1839
1840 /* Look precisely for the subvolume items */
1841 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1842 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1843
1844 /* No restrictions on the other components */
1845 .key.min_offset = 0,
f5fbe71d 1846 .key.max_offset = UINT64_MAX,
5bcd08db
LP
1847
1848 .key.min_transid = 0,
f5fbe71d 1849 .key.max_transid = UINT64_MAX,
5bcd08db
LP
1850 };
1851 int r;
1852
1853 assert(fd >= 0);
1854 assert(ret);
1855
1856 if (subvol_id == 0) {
1857 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1858 if (r < 0)
1859 return r;
1860 } else {
65ddc2c5 1861 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
5bcd08db
LP
1862 if (r < 0)
1863 return r;
79de6eb1 1864 if (r == 0)
5bcd08db
LP
1865 return -ENOTTY;
1866 }
1867
1868 args.key.min_objectid = args.key.max_objectid = subvol_id;
1869
1870 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
801bf40c
LP
1871 struct btrfs_ioctl_search_header sh;
1872 _unused_ const void *body = NULL;
5bcd08db
LP
1873
1874 args.key.nr_items = 256;
1875 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
9c4615fb 1876 return negative_errno();
5bcd08db
LP
1877
1878 if (args.key.nr_items <= 0)
1879 break;
1880
801bf40c 1881 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
5bcd08db 1882
801bf40c 1883 if (sh.type != BTRFS_ROOT_BACKREF_KEY)
5bcd08db 1884 continue;
801bf40c 1885 if (sh.objectid != subvol_id)
5bcd08db
LP
1886 continue;
1887
801bf40c 1888 *ret = sh.offset;
5bcd08db
LP
1889 return 0;
1890 }
1891 }
1892
1893 return -ENXIO;
1894}
5228c58e 1895
69a283c5
DDM
1896bool btrfs_might_be_subvol(const struct stat *st) {
1897 if (!st)
1898 return false;
1899
1900 /* Returns true if this 'struct stat' looks like it could refer to a btrfs subvolume. To make a final
1901 * decision, needs to be combined with an fstatfs() check to see if this is actually btrfs. */
1902
1903 return S_ISDIR(st->st_mode) && st->st_ino == 256;
1904}
1905
5228c58e
DDM
1906int btrfs_forget_device(const char *path) {
1907 _cleanup_close_ int control_fd = -EBADF;
1908 struct btrfs_ioctl_vol_args args = {};
1909
1910 assert(path);
1911
1912 if (strlen(path) > BTRFS_PATH_NAME_MAX)
1913 return -E2BIG;
1914
1915 strcpy(args.name, path);
1916
1917 control_fd = open("/dev/btrfs-control", O_RDWR|O_CLOEXEC);
1918 if (control_fd < 0)
1919 return -errno;
1920
1921 return RET_NERRNO(ioctl(control_fd, BTRFS_IOC_FORGET_DEV, &args));
1922}
efb6a76a
MY
1923
1924typedef struct BtrfsStripe {
1925 uint64_t devid;
1926 uint64_t offset;
1927} BtrfsStripe;
1928
1929typedef struct BtrfsChunk {
1930 uint64_t offset;
1931 uint64_t length;
1932 uint64_t type;
1933
1934 BtrfsStripe *stripes;
1935 uint16_t n_stripes;
1936 uint64_t stripe_len;
1937} BtrfsChunk;
1938
1939typedef struct BtrfsChunkTree {
1940 BtrfsChunk **chunks;
1941 size_t n_chunks;
1942} BtrfsChunkTree;
1943
1944static BtrfsChunk* btrfs_chunk_free(BtrfsChunk *chunk) {
1945 if (!chunk)
1946 return NULL;
1947
1948 free(chunk->stripes);
1949
1950 return mfree(chunk);
1951}
1952
1953DEFINE_TRIVIAL_CLEANUP_FUNC(BtrfsChunk*, btrfs_chunk_free);
1954
1955static void btrfs_chunk_tree_done(BtrfsChunkTree *tree) {
1956 assert(tree);
1957
1958 FOREACH_ARRAY(i, tree->chunks, tree->n_chunks)
1959 btrfs_chunk_free(*i);
e504e465
MY
1960
1961 free(tree->chunks);
efb6a76a
MY
1962}
1963
1964static int btrfs_read_chunk_tree_fd(int fd, BtrfsChunkTree *ret) {
1965
1966 struct btrfs_ioctl_search_args search_args = {
1967 .key.tree_id = BTRFS_CHUNK_TREE_OBJECTID,
1968
1969 .key.min_type = BTRFS_CHUNK_ITEM_KEY,
1970 .key.max_type = BTRFS_CHUNK_ITEM_KEY,
1971
1972 .key.min_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1973 .key.max_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1974
1975 .key.min_offset = 0,
1976 .key.max_offset = UINT64_MAX,
1977
1978 .key.min_transid = 0,
1979 .key.max_transid = UINT64_MAX,
1980 };
1981
1982 _cleanup_(btrfs_chunk_tree_done) BtrfsChunkTree tree = {};
1983
1984 assert(fd >= 0);
1985 assert(ret);
1986
1987 while (btrfs_ioctl_search_args_compare(&search_args) <= 0) {
801bf40c
LP
1988 struct btrfs_ioctl_search_header sh;
1989 const void *body;
efb6a76a
MY
1990
1991 search_args.key.nr_items = 256;
1992
1993 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_args) < 0)
1994 return -errno;
1995
1996 if (search_args.key.nr_items == 0)
1997 break;
1998
801bf40c 1999 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, search_args) {
efb6a76a 2000 _cleanup_(btrfs_chunk_freep) BtrfsChunk *chunk = NULL;
efb6a76a 2001
801bf40c 2002 btrfs_ioctl_search_args_set(&search_args, &sh);
efb6a76a 2003
801bf40c 2004 if (sh.objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)
efb6a76a 2005 continue;
801bf40c 2006 if (sh.type != BTRFS_CHUNK_ITEM_KEY)
efb6a76a
MY
2007 continue;
2008
2009 chunk = new(BtrfsChunk, 1);
2010 if (!chunk)
2011 return -ENOMEM;
2012
801bf40c 2013 const struct btrfs_chunk *item = body;
efb6a76a 2014 *chunk = (BtrfsChunk) {
801bf40c 2015 .offset = sh.offset,
efb6a76a
MY
2016 .length = le64toh(item->length),
2017 .type = le64toh(item->type),
2018 .n_stripes = le16toh(item->num_stripes),
2019 .stripe_len = le64toh(item->stripe_len),
2020 };
2021
2022 chunk->stripes = new(BtrfsStripe, chunk->n_stripes);
2023 if (!chunk->stripes)
2024 return -ENOMEM;
2025
2026 for (size_t j = 0; j < chunk->n_stripes; j++) {
2027 const struct btrfs_stripe *stripe = &item->stripe + j;
2028
2029 chunk->stripes[j] = (BtrfsStripe) {
2030 .devid = le64toh(stripe->devid),
2031 .offset = le64toh(stripe->offset),
2032 };
2033 }
2034
2035 if (!GREEDY_REALLOC(tree.chunks, tree.n_chunks + 1))
2036 return -ENOMEM;
2037
2038 tree.chunks[tree.n_chunks++] = TAKE_PTR(chunk);
2039 }
2040
2041 if (!btrfs_ioctl_search_args_inc(&search_args))
2042 break;
2043 }
2044
2045 *ret = TAKE_STRUCT(tree);
2046 return 0;
2047}
2048
2049static BtrfsChunk* btrfs_find_chunk_from_logical_address(const BtrfsChunkTree *tree, uint64_t logical) {
2050 size_t min_index, max_index;
2051
2052 assert(tree);
2053 assert(tree->chunks || tree->n_chunks == 0);
2054
2055 if (tree->n_chunks == 0)
2056 return NULL;
2057
2058 /* bisection */
2059 min_index = 0;
2060 max_index = tree->n_chunks - 1;
2061
2062 while (min_index <= max_index) {
2063 size_t mid = (min_index + max_index) / 2;
2064
2065 if (logical < tree->chunks[mid]->offset) {
2066 if (mid < 1)
2067 return NULL;
2068
2069 max_index = mid - 1;
2070 } else if (logical >= tree->chunks[mid]->offset + tree->chunks[mid]->length)
2071 min_index = mid + 1;
2072 else
2073 return tree->chunks[mid];
2074 }
2075
2076 return NULL;
2077}
2078
2079static int btrfs_is_nocow_fd(int fd) {
efb6a76a 2080 unsigned flags;
05f38c89 2081 int r;
efb6a76a
MY
2082
2083 assert(fd >= 0);
2084
05f38c89
LP
2085 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
2086 if (r < 0)
2087 return r;
2088 if (r == 0)
efb6a76a
MY
2089 return -ENOTTY;
2090
05f38c89
LP
2091 r = read_attr_fd(fd, &flags);
2092 if (r < 0)
2093 return r;
efb6a76a
MY
2094
2095 return FLAGS_SET(flags, FS_NOCOW_FL) && !FLAGS_SET(flags, FS_COMPR_FL);
2096}
2097
2098int btrfs_get_file_physical_offset_fd(int fd, uint64_t *ret) {
2099
2100 struct btrfs_ioctl_search_args search_args = {
2101 .key.min_type = BTRFS_EXTENT_DATA_KEY,
2102 .key.max_type = BTRFS_EXTENT_DATA_KEY,
2103
2104 .key.min_offset = 0,
2105 .key.max_offset = UINT64_MAX,
2106
2107 .key.min_transid = 0,
2108 .key.max_transid = UINT64_MAX,
2109 };
2110
2111 _cleanup_(btrfs_chunk_tree_done) BtrfsChunkTree tree = {};
2112 uint64_t subvol_id;
2113 struct stat st;
2114 int r;
2115
2116 assert(fd >= 0);
2117 assert(ret);
2118
2119 if (fstat(fd, &st) < 0)
2120 return -errno;
2121
2122 r = stat_verify_regular(&st);
2123 if (r < 0)
2124 return r;
2125
2126 r = btrfs_is_nocow_fd(fd);
2127 if (r < 0)
2128 return r;
2129 if (r == 0)
2130 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2131 "Cannot get physical address for btrfs extent: CoW enabled");
2132
2133 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
2134 if (r < 0)
2135 return r;
2136
2137 r = btrfs_read_chunk_tree_fd(fd, &tree);
2138 if (r < 0)
2139 return r;
2140
2141 search_args.key.tree_id = subvol_id;
2142 search_args.key.min_objectid = search_args.key.max_objectid = st.st_ino;
2143
2144 while (btrfs_ioctl_search_args_compare(&search_args) <= 0) {
801bf40c
LP
2145 struct btrfs_ioctl_search_header sh;
2146 const void *body;
efb6a76a
MY
2147
2148 search_args.key.nr_items = 256;
2149
2150 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_args) < 0)
2151 return -errno;
2152
2153 if (search_args.key.nr_items == 0)
2154 break;
2155
801bf40c 2156 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, search_args) {
efb6a76a
MY
2157 uint64_t logical_offset;
2158 BtrfsChunk *chunk;
2159
801bf40c 2160 btrfs_ioctl_search_args_set(&search_args, &sh);
efb6a76a 2161
801bf40c 2162 if (sh.type != BTRFS_EXTENT_DATA_KEY)
efb6a76a
MY
2163 continue;
2164
801bf40c 2165 if (sh.objectid != st.st_ino)
efb6a76a
MY
2166 continue;
2167
801bf40c 2168 const struct btrfs_file_extent_item *item = body;
efb6a76a
MY
2169 if (!IN_SET(item->type, BTRFS_FILE_EXTENT_REG, BTRFS_FILE_EXTENT_PREALLOC))
2170 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2171 "Cannot get physical address for btrfs extent: invalid type %" PRIu8,
2172 item->type);
2173
2174 if (item->compression != 0 || item->encryption != 0 || item->other_encoding != 0)
2175 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2176 "Cannot get physical address for btrfs extent: has incompatible property");
2177
2178 logical_offset = le64toh(item->disk_bytenr);
2179 if (logical_offset == 0)
2180 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2181 "Cannot get physical address for btrfs extent: failed to get logical offset");
2182
2183 chunk = btrfs_find_chunk_from_logical_address(&tree, logical_offset);
2184 if (!chunk)
2185 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2186 "Cannot get physical address for btrfs extent: no matching chunk found");
2187
2188 if ((chunk->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)
2189 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2190 "Cannot get physical address for btrfs extent: unsupported profile");
2191
2192 uint64_t relative_chunk, relative_stripe, stripe_nr;
2193 uint16_t stripe_index;
2194
2195 assert(logical_offset >= chunk->offset);
2196 assert(chunk->n_stripes > 0);
2197 assert(chunk->stripe_len > 0);
2198
2199 relative_chunk = logical_offset - chunk->offset;
2200 stripe_nr = relative_chunk / chunk->stripe_len;
2201 relative_stripe = relative_chunk - stripe_nr * chunk->stripe_len;
2202 stripe_index = stripe_nr % chunk->n_stripes;
2203
2204 *ret = chunk->stripes[stripe_index].offset +
2205 stripe_nr / chunk->n_stripes * chunk->stripe_len +
2206 relative_stripe;
2207
2208 return 0;
2209 }
2210
2211 if (!btrfs_ioctl_search_args_inc(&search_args))
2212 break;
2213 }
2214
2215 return -ENODATA;
2216}