]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/btrfs-util.c
61fe50e012aa999c2d5c901957d3cd9916f9de4c
[thirdparty/systemd.git] / src / shared / btrfs-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <fcntl.h>
4 #include <linux/btrfs.h>
5 #include <linux/btrfs_tree.h>
6 #include <linux/magic.h>
7 #include <stdio.h>
8 #include <sys/file.h>
9 #include <sys/ioctl.h>
10 #include <sys/sysmacros.h>
11 #include <unistd.h>
12
13 #include "alloc-util.h"
14 #include "btrfs-util.h"
15 #include "chase.h"
16 #include "chattr-util.h"
17 #include "copy.h"
18 #include "errno-util.h"
19 #include "fd-util.h"
20 #include "fs-util.h"
21 #include "log.h"
22 #include "path-util.h"
23 #include "rm-rf.h"
24 #include "sparse-endian.h"
25 #include "stat-util.h"
26 #include "string-util.h"
27 #include "time-util.h"
28
29 /* WARNING: Be careful with file system ioctls! When we get an fd, we
30 * need to make sure it either refers to only a regular file or
31 * directory, or that it is located on btrfs, before invoking any
32 * btrfs ioctls. The ioctl numbers are reused by some device drivers
33 * (such as DRM), and hence might have bad effects when invoked on
34 * device nodes (that reference drivers) rather than fds to normal
35 * files or directories. */
36
37 int btrfs_is_subvol_at(int dir_fd, const char *path) {
38 struct stat st;
39
40 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
41
42 /* On btrfs subvolumes always have the inode 256 */
43
44 if (fstatat(dir_fd, strempty(path), &st, isempty(path) ? AT_EMPTY_PATH : 0) < 0)
45 return -errno;
46
47 if (!btrfs_might_be_subvol(&st))
48 return 0;
49
50 return is_fs_type_at(dir_fd, path, BTRFS_SUPER_MAGIC);
51 }
52
53 int btrfs_subvol_set_read_only_at(int dir_fd, const char *path, bool b) {
54 _cleanup_close_ int fd = -EBADF;
55 uint64_t flags, nflags;
56 struct stat st;
57
58 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
59
60 fd = xopenat(dir_fd, path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
61 if (fd < 0)
62 return fd;
63
64 if (fstat(fd, &st) < 0)
65 return -errno;
66
67 if (!btrfs_might_be_subvol(&st))
68 return -EINVAL;
69
70 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
71 return -errno;
72
73 nflags = UPDATE_FLAG(flags, BTRFS_SUBVOL_RDONLY, b);
74 if (flags == nflags)
75 return 0;
76
77 return RET_NERRNO(ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags));
78 }
79
80 int btrfs_subvol_get_read_only_fd(int fd) {
81 uint64_t flags;
82 struct stat st;
83
84 assert(fd >= 0);
85
86 if (fstat(fd, &st) < 0)
87 return -errno;
88
89 if (!btrfs_might_be_subvol(&st))
90 return -EINVAL;
91
92 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
93 return -errno;
94
95 return !!(flags & BTRFS_SUBVOL_RDONLY);
96 }
97
98 int btrfs_get_block_device_at(int dir_fd, const char *path, dev_t *ret) {
99 struct btrfs_ioctl_fs_info_args fsi = {};
100 _cleanup_close_ int fd = -EBADF;
101 uint64_t id;
102 int r;
103
104 assert(dir_fd >= 0 || dir_fd == AT_FDCWD);
105 assert(path);
106 assert(ret);
107
108 fd = xopenat(dir_fd, path, O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
109 if (fd < 0)
110 return fd;
111
112 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
113 if (r < 0)
114 return r;
115 if (r == 0)
116 return -ENOTTY;
117
118 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
119 return -errno;
120
121 /* We won't do this for btrfs RAID */
122 if (fsi.num_devices != 1) {
123 *ret = 0;
124 return 0;
125 }
126
127 for (id = 1; id <= fsi.max_id; id++) {
128 struct btrfs_ioctl_dev_info_args di = {
129 .devid = id,
130 };
131 struct stat st;
132
133 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
134 if (errno == ENODEV)
135 continue;
136
137 return -errno;
138 }
139
140 /* For the root fs — when no initrd is involved — btrfs returns /dev/root on any kernels from
141 * the past few years. That sucks, as we have no API to determine the actual root then. let's
142 * return an recognizable error for this case, so that the caller can maybe print a nice
143 * message about this.
144 *
145 * https://bugzilla.kernel.org/show_bug.cgi?id=89721 */
146 if (path_equal((char*) di.path, "/dev/root"))
147 return -EUCLEAN;
148
149 if (stat((char*) di.path, &st) < 0)
150 return -errno;
151
152 if (!S_ISBLK(st.st_mode))
153 return -ENOTBLK;
154
155 if (major(st.st_rdev) == 0)
156 return -ENODEV;
157
158 *ret = st.st_rdev;
159 return 1;
160 }
161
162 return -ENODEV;
163 }
164
165 int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
166 struct btrfs_ioctl_ino_lookup_args args = {
167 .objectid = BTRFS_FIRST_FREE_OBJECTID
168 };
169 int r;
170
171 assert(fd >= 0);
172 assert(ret);
173
174 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
175 if (r < 0)
176 return r;
177 if (r == 0)
178 return -ENOTTY;
179
180 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
181 return -errno;
182
183 *ret = args.treeid;
184 return 0;
185 }
186
187 int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
188 _cleanup_close_ int subvol_fd = -EBADF;
189
190 assert(fd >= 0);
191 assert(ret);
192
193 subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
194 if (subvol_fd < 0)
195 return -errno;
196
197 return btrfs_subvol_get_id_fd(subvol_fd, ret);
198 }
199
200 static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
201 assert(args);
202
203 /* the objectid, type, offset together make up the btrfs key,
204 * which is considered a single 136byte integer when
205 * comparing. This call increases the counter by one, dealing
206 * with the overflow between the overflows */
207
208 if (args->key.min_offset < UINT64_MAX) {
209 args->key.min_offset++;
210 return true;
211 }
212
213 if (args->key.min_type < UINT8_MAX) {
214 args->key.min_type++;
215 args->key.min_offset = 0;
216 return true;
217 }
218
219 if (args->key.min_objectid < UINT64_MAX) {
220 args->key.min_objectid++;
221 args->key.min_offset = 0;
222 args->key.min_type = 0;
223 return true;
224 }
225
226 return 0;
227 }
228
229 static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
230 assert(args);
231 assert(h);
232
233 args->key.min_objectid = h->objectid;
234 args->key.min_type = h->type;
235 args->key.min_offset = h->offset;
236 }
237
238 static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
239 int r;
240
241 assert(args);
242
243 /* Compare min and max */
244
245 r = CMP(args->key.min_objectid, args->key.max_objectid);
246 if (r != 0)
247 return r;
248
249 r = CMP(args->key.min_type, args->key.max_type);
250 if (r != 0)
251 return r;
252
253 return CMP(args->key.min_offset, args->key.max_offset);
254 }
255
256 typedef struct BtrfsForeachIterator {
257 const struct btrfs_ioctl_search_args *args;
258 size_t offset;
259 unsigned index;
260 struct btrfs_ioctl_search_header *header;
261 const void **body;
262 } BtrfsForeachIterator;
263
264 static int btrfs_iterate(BtrfsForeachIterator *i) {
265 assert(i);
266 assert(i->args);
267 assert(i->header);
268 assert(i->body);
269
270 if (i->index >= i->args->key.nr_items)
271 return 0; /* end */
272
273 assert_cc(BTRFS_SEARCH_ARGS_BUFSIZE >= sizeof(struct btrfs_ioctl_search_header));
274 if (i->offset > BTRFS_SEARCH_ARGS_BUFSIZE - sizeof(struct btrfs_ioctl_search_header))
275 return -EBADMSG;
276
277 struct btrfs_ioctl_search_header h;
278 memcpy(&h, (const uint8_t*) i->args->buf + i->offset, sizeof(struct btrfs_ioctl_search_header));
279
280 if (i->offset > BTRFS_SEARCH_ARGS_BUFSIZE - sizeof(struct btrfs_ioctl_search_header) - h.len)
281 return -EBADMSG;
282
283 *i->body = (const uint8_t*) i->args->buf + i->offset + sizeof(struct btrfs_ioctl_search_header);
284 *i->header = h;
285 i->offset += sizeof(struct btrfs_ioctl_search_header) + h.len;
286 i->index++;
287
288 return 1;
289 }
290
291 /* Iterates through a series of struct btrfs_file_extent_item elements. They are unfortunately not aligned,
292 * hence we copy out the header from them */
293 #define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(_sh, _body, _args) \
294 for (BtrfsForeachIterator iterator = { \
295 .args = &(_args), \
296 .header = &(_sh), \
297 .body = &(_body), \
298 }; \
299 btrfs_iterate(&iterator) > 0; )
300
301 int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
302 struct btrfs_ioctl_search_args args = {
303 /* Tree of tree roots */
304 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
305
306 /* Look precisely for the subvolume items */
307 .key.min_type = BTRFS_ROOT_ITEM_KEY,
308 .key.max_type = BTRFS_ROOT_ITEM_KEY,
309
310 .key.min_offset = 0,
311 .key.max_offset = UINT64_MAX,
312
313 /* No restrictions on the other components */
314 .key.min_transid = 0,
315 .key.max_transid = UINT64_MAX,
316 };
317
318 bool found = false;
319 int r;
320
321 assert(fd >= 0);
322 assert(ret);
323
324 /* Make sure this works on O_PATH fds */
325 _cleanup_close_ int fd_close = -EBADF;
326 fd = fd_reopen_condition(fd, O_CLOEXEC|O_RDONLY|O_DIRECTORY, O_PATH, &fd_close);
327 if (fd < 0)
328 return fd;
329
330 if (subvol_id == 0) {
331 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
332 if (r < 0)
333 return r;
334 } else {
335 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
336 if (r < 0)
337 return r;
338 if (r == 0)
339 return -ENOTTY;
340 }
341
342 args.key.min_objectid = args.key.max_objectid = subvol_id;
343
344 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
345 struct btrfs_ioctl_search_header sh;
346 const void *body;
347
348 args.key.nr_items = 256;
349 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
350 return -errno;
351
352 if (args.key.nr_items <= 0)
353 break;
354
355 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
356 /* Make sure we start the next search at least from this entry */
357 btrfs_ioctl_search_args_set(&args, &sh);
358
359 if (sh.objectid != subvol_id)
360 continue;
361 if (sh.type != BTRFS_ROOT_ITEM_KEY)
362 continue;
363
364 /* Older versions of the struct lacked the otime setting */
365 if (sh.len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
366 continue;
367
368 const struct btrfs_root_item *ri = body;
369 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
370 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
371
372 ret->subvol_id = subvol_id;
373 ret->read_only = le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY;
374
375 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
376 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
377 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
378
379 found = true;
380 goto finish;
381 }
382
383 /* Increase search key by one, to read the next item, if we can. */
384 if (!btrfs_ioctl_search_args_inc(&args))
385 break;
386 }
387
388 finish:
389 return found ? 0 : -ENODATA;
390 }
391
392 int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
393
394 struct btrfs_ioctl_search_args args = {
395 /* Tree of quota items */
396 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
397
398 /* The object ID is always 0 */
399 .key.min_objectid = 0,
400 .key.max_objectid = 0,
401
402 /* Look precisely for the quota items */
403 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
404 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
405
406 /* No restrictions on the other components */
407 .key.min_transid = 0,
408 .key.max_transid = UINT64_MAX,
409 };
410
411 bool found_info = false, found_limit = false;
412 int r;
413
414 assert(fd >= 0);
415 assert(ret);
416
417 if (qgroupid == 0) {
418 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
419 if (r < 0)
420 return r;
421 } else {
422 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
423 if (r < 0)
424 return r;
425 if (r == 0)
426 return -ENOTTY;
427 }
428
429 args.key.min_offset = args.key.max_offset = qgroupid;
430
431 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
432 struct btrfs_ioctl_search_header sh;
433 const void *body;
434
435 args.key.nr_items = 256;
436 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
437 if (errno == ENOENT) /* quota tree is missing: quota disabled */
438 break;
439
440 return -errno;
441 }
442
443 if (args.key.nr_items <= 0)
444 break;
445
446 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
447
448 /* Make sure we start the next search at least from this entry */
449 btrfs_ioctl_search_args_set(&args, &sh);
450
451 if (sh.objectid != 0)
452 continue;
453 if (sh.offset != qgroupid)
454 continue;
455
456 if (sh.type == BTRFS_QGROUP_INFO_KEY) {
457 const struct btrfs_qgroup_info_item *qii = body;
458
459 ret->referenced = le64toh(qii->rfer);
460 ret->exclusive = le64toh(qii->excl);
461
462 found_info = true;
463
464 } else if (sh.type == BTRFS_QGROUP_LIMIT_KEY) {
465 const struct btrfs_qgroup_limit_item *qli = body;
466
467 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
468 ret->referenced_max = le64toh(qli->max_rfer);
469 else
470 ret->referenced_max = UINT64_MAX;
471
472 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
473 ret->exclusive_max = le64toh(qli->max_excl);
474 else
475 ret->exclusive_max = UINT64_MAX;
476
477 found_limit = true;
478 }
479
480 if (found_info && found_limit)
481 goto finish;
482 }
483
484 /* Increase search key by one, to read the next item, if we can. */
485 if (!btrfs_ioctl_search_args_inc(&args))
486 break;
487 }
488
489 finish:
490 if (!found_limit && !found_info)
491 return -ENODATA;
492
493 if (!found_info) {
494 ret->referenced = UINT64_MAX;
495 ret->exclusive = UINT64_MAX;
496 }
497
498 if (!found_limit) {
499 ret->referenced_max = UINT64_MAX;
500 ret->exclusive_max = UINT64_MAX;
501 }
502
503 return 0;
504 }
505
506 int btrfs_log_dev_root(int level, int ret, const char *p) {
507 return log_full_errno(level, ret,
508 "File system behind %s is reported by btrfs to be backed by pseudo-device /dev/root, which is not a valid userspace accessible device node. "
509 "Cannot determine correct backing block device.", p);
510 }
511
512 int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
513 _cleanup_close_ int fd = -EBADF;
514
515 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
516 if (fd < 0)
517 return -errno;
518
519 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
520 }
521
522 int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
523 uint64_t level, lowest = UINT64_MAX, lowest_qgroupid = 0;
524 _cleanup_free_ uint64_t *qgroups = NULL;
525 int r, n;
526
527 assert(fd >= 0);
528 assert(ret);
529
530 /* This finds the "subtree" qgroup for a specific
531 * subvolume. This only works for subvolumes that have been
532 * prepared with btrfs_subvol_auto_qgroup_fd() with
533 * insert_intermediary_qgroup=true (or equivalent). For others
534 * it will return the leaf qgroup instead. The two cases may
535 * be distinguished via the return value, which is 1 in case
536 * an appropriate "subtree" qgroup was found, and 0
537 * otherwise. */
538
539 if (subvol_id == 0) {
540 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
541 if (r < 0)
542 return r;
543 }
544
545 r = btrfs_qgroupid_split(subvol_id, &level, NULL);
546 if (r < 0)
547 return r;
548 if (level != 0) /* Input must be a leaf qgroup */
549 return -EINVAL;
550
551 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
552 if (n < 0)
553 return n;
554
555 for (int i = 0; i < n; i++) {
556 uint64_t id;
557
558 r = btrfs_qgroupid_split(qgroups[i], &level, &id);
559 if (r < 0)
560 return r;
561
562 if (id != subvol_id)
563 continue;
564
565 if (lowest == UINT64_MAX || level < lowest) {
566 lowest_qgroupid = qgroups[i];
567 lowest = level;
568 }
569 }
570
571 if (lowest == UINT64_MAX) {
572 /* No suitable higher-level qgroup found, let's return
573 * the leaf qgroup instead, and indicate that with the
574 * return value. */
575
576 *ret = subvol_id;
577 return 0;
578 }
579
580 *ret = lowest_qgroupid;
581 return 1;
582 }
583
584 int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
585 uint64_t qgroupid;
586 int r;
587
588 assert(fd >= 0);
589 assert(ret);
590
591 /* This determines the quota data of the qgroup with the
592 * lowest level, that shares the id part with the specified
593 * subvolume. This is useful for determining the quota data
594 * for entire subvolume subtrees, as long as the subtrees have
595 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
596 * compatible way */
597
598 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
599 if (r < 0)
600 return r;
601
602 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
603 }
604
605 int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
606 _cleanup_close_ int fd = -EBADF;
607
608 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
609 if (fd < 0)
610 return -errno;
611
612 return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
613 }
614
615 int btrfs_defrag_fd(int fd) {
616 int r;
617
618 assert(fd >= 0);
619
620 r = fd_verify_regular(fd);
621 if (r < 0)
622 return r;
623
624 return RET_NERRNO(ioctl(fd, BTRFS_IOC_DEFRAG, NULL));
625 }
626
627 int btrfs_defrag(const char *p) {
628 _cleanup_close_ int fd = -EBADF;
629
630 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
631 if (fd < 0)
632 return -errno;
633
634 return btrfs_defrag_fd(fd);
635 }
636
637 int btrfs_quota_enable_fd(int fd, bool b) {
638 struct btrfs_ioctl_quota_ctl_args args = {
639 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
640 };
641 int r;
642
643 assert(fd >= 0);
644
645 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
646 if (r < 0)
647 return r;
648 if (r == 0)
649 return -ENOTTY;
650
651 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args));
652 }
653
654 int btrfs_quota_enable(const char *path, bool b) {
655 _cleanup_close_ int fd = -EBADF;
656
657 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
658 if (fd < 0)
659 return -errno;
660
661 return btrfs_quota_enable_fd(fd, b);
662 }
663
664 int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
665
666 struct btrfs_ioctl_qgroup_limit_args args = {
667 .lim.max_rfer = referenced_max,
668 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
669 };
670 int r;
671
672 assert(fd >= 0);
673
674 if (qgroupid == 0) {
675 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
676 if (r < 0)
677 return r;
678 } else {
679 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
680 if (r < 0)
681 return r;
682 if (r == 0)
683 return -ENOTTY;
684 }
685
686 args.qgroupid = qgroupid;
687
688 for (unsigned c = 0;; c++) {
689 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
690
691 if (errno == EBUSY && c < 10) {
692 (void) btrfs_quota_scan_wait(fd);
693 continue;
694 }
695
696 return -errno;
697 }
698
699 break;
700 }
701
702 return 0;
703 }
704
705 int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
706 _cleanup_close_ int fd = -EBADF;
707
708 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
709 if (fd < 0)
710 return -errno;
711
712 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
713 }
714
715 int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
716 uint64_t qgroupid;
717 int r;
718
719 assert(fd >= 0);
720
721 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
722 if (r < 0)
723 return r;
724
725 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
726 }
727
728 int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
729 _cleanup_close_ int fd = -EBADF;
730
731 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
732 if (fd < 0)
733 return -errno;
734
735 return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
736 }
737
738 int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
739 assert(ret);
740
741 if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
742 return -EINVAL;
743
744 if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
745 return -EINVAL;
746
747 *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
748 return 0;
749 }
750
751 int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
752 assert(level || id);
753
754 if (level)
755 *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
756
757 if (id)
758 *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
759
760 return 0;
761 }
762
763 static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
764
765 struct btrfs_ioctl_qgroup_create_args args = {
766 .create = b,
767 .qgroupid = qgroupid,
768 };
769 int r;
770
771 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
772 if (r < 0)
773 return r;
774 if (r == 0)
775 return -ENOTTY;
776
777 for (unsigned c = 0;; c++) {
778 if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
779
780 /* On old kernels if quota is not enabled, we get EINVAL. On newer kernels we get
781 * ENOTCONN. Let's always convert this to ENOTCONN to make this recognizable
782 * everywhere the same way. */
783
784 if (IN_SET(errno, EINVAL, ENOTCONN))
785 return -ENOTCONN;
786
787 if (errno == EBUSY && c < 10) {
788 (void) btrfs_quota_scan_wait(fd);
789 continue;
790 }
791
792 return -errno;
793 }
794
795 break;
796 }
797
798 return 0;
799 }
800
801 int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
802 return qgroup_create_or_destroy(fd, true, qgroupid);
803 }
804
805 int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
806 return qgroup_create_or_destroy(fd, false, qgroupid);
807 }
808
809 int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
810 _cleanup_free_ uint64_t *qgroups = NULL;
811 uint64_t subvol_id;
812 int n, r;
813
814 /* Destroys the specified qgroup, but unassigns it from all
815 * its parents first. Also, it recursively destroys all
816 * qgroups it is assigned to that have the same id part of the
817 * qgroupid as the specified group. */
818
819 r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
820 if (r < 0)
821 return r;
822
823 n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
824 if (n < 0)
825 return n;
826
827 for (int i = 0; i < n; i++) {
828 uint64_t id;
829
830 r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
831 if (r < 0)
832 return r;
833
834 r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
835 if (r < 0)
836 return r;
837
838 if (id != subvol_id)
839 continue;
840
841 /* The parent qgroupid shares the same id part with
842 * us? If so, destroy it too. */
843
844 (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
845 }
846
847 return btrfs_qgroup_destroy(fd, qgroupid);
848 }
849
850 int btrfs_quota_scan_start(int fd) {
851 struct btrfs_ioctl_quota_rescan_args args = {};
852
853 assert(fd >= 0);
854
855 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args));
856 }
857
858 int btrfs_quota_scan_wait(int fd) {
859 assert(fd >= 0);
860
861 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT));
862 }
863
864 int btrfs_quota_scan_ongoing(int fd) {
865 struct btrfs_ioctl_quota_rescan_args args = {};
866
867 assert(fd >= 0);
868
869 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
870 return -errno;
871
872 return !!args.flags;
873 }
874
875 static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
876 struct btrfs_ioctl_qgroup_assign_args args = {
877 .assign = b,
878 .src = child,
879 .dst = parent,
880 };
881 int r;
882
883 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
884 if (r < 0)
885 return r;
886 if (r == 0)
887 return -ENOTTY;
888
889 for (unsigned c = 0;; c++) {
890 r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
891 if (r < 0) {
892 if (errno == EBUSY && c < 10) {
893 (void) btrfs_quota_scan_wait(fd);
894 continue;
895 }
896
897 return -errno;
898 }
899
900 if (r == 0)
901 return 0;
902
903 /* If the return value is > 0, we need to request a rescan */
904
905 (void) btrfs_quota_scan_start(fd);
906 return 1;
907 }
908 }
909
910 int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
911 return qgroup_assign_or_unassign(fd, true, child, parent);
912 }
913
914 int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
915 return qgroup_assign_or_unassign(fd, false, child, parent);
916 }
917
918 static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
919 struct btrfs_ioctl_search_args args = {
920 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
921
922 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
923 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
924
925 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
926 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
927
928 .key.min_transid = 0,
929 .key.max_transid = UINT64_MAX,
930 };
931
932 struct btrfs_ioctl_vol_args vol_args = {};
933 _cleanup_close_ int subvol_fd = -EBADF;
934 struct stat st;
935 bool made_writable = false;
936 int r;
937
938 assert(fd >= 0);
939 assert(subvolume);
940
941 if (fstat(fd, &st) < 0)
942 return -errno;
943
944 if (!S_ISDIR(st.st_mode))
945 return -EINVAL;
946
947 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
948 if (subvol_fd < 0)
949 return -errno;
950
951 /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY
952 * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return
953 * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we
954 * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a
955 * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case
956 * let's prefer ENOTTY over EPERM/EACCES though. */
957 r = btrfs_is_subvol_fd(subvol_fd);
958 if (r < 0)
959 return r;
960 if (r == 0) /* Not a btrfs subvolume */
961 return -ENOTTY;
962
963 if (subvol_id == 0) {
964 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
965 if (r < 0)
966 return r;
967 }
968
969 /* First, try to remove the subvolume. If it happens to be
970 * already empty, this will just work. */
971 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
972 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
973 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
974 return 0;
975 }
976 if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
977 return -errno;
978
979 /* OK, the subvolume is not empty, let's look for child
980 * subvolumes, and remove them, first */
981
982 args.key.min_offset = args.key.max_offset = subvol_id;
983
984 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
985 struct btrfs_ioctl_search_header sh;
986 const void *body;
987
988 args.key.nr_items = 256;
989 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
990 return -errno;
991
992 if (args.key.nr_items <= 0)
993 break;
994
995 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
996 _cleanup_free_ char *p = NULL;
997
998 btrfs_ioctl_search_args_set(&args, &sh);
999
1000 if (sh.type != BTRFS_ROOT_BACKREF_KEY)
1001 continue;
1002 if (sh.offset != subvol_id)
1003 continue;
1004
1005 const struct btrfs_root_ref *ref = body;
1006 p = memdup_suffix0((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1007 if (!p)
1008 return -ENOMEM;
1009
1010 struct btrfs_ioctl_ino_lookup_args ino_args = {
1011 .treeid = subvol_id,
1012 .objectid = htole64(ref->dirid),
1013 };
1014
1015 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1016 return -errno;
1017
1018 if (!made_writable) {
1019 r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
1020 if (r < 0)
1021 return r;
1022
1023 made_writable = true;
1024 }
1025
1026 if (isempty(ino_args.name))
1027 /* Subvolume is in the top-level
1028 * directory of the subvolume. */
1029 r = subvol_remove_children(subvol_fd, p, sh.objectid, flags);
1030 else {
1031 _cleanup_close_ int child_fd = -EBADF;
1032
1033 /* Subvolume is somewhere further down,
1034 * hence we need to open the
1035 * containing directory first */
1036
1037 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1038 if (child_fd < 0)
1039 return -errno;
1040
1041 r = subvol_remove_children(child_fd, p, sh.objectid, flags);
1042 }
1043 if (r < 0)
1044 return r;
1045 }
1046
1047 /* Increase search key by one, to read the next item, if we can. */
1048 if (!btrfs_ioctl_search_args_inc(&args))
1049 break;
1050 }
1051
1052 /* OK, the child subvolumes should all be gone now, let's try
1053 * again to remove the subvolume */
1054 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
1055 return -errno;
1056
1057 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
1058 return 0;
1059 }
1060
1061 int btrfs_subvol_remove_at(int dir_fd, const char *path, BtrfsRemoveFlags flags) {
1062 _cleanup_free_ char *subvolume = NULL;
1063 _cleanup_close_ int fd = -EBADF;
1064 int r;
1065
1066 assert(path);
1067
1068 fd = chase_and_openat(dir_fd, path, CHASE_PARENT|CHASE_EXTRACT_FILENAME, O_CLOEXEC, &subvolume);
1069 if (fd < 0)
1070 return fd;
1071
1072 r = btrfs_validate_subvolume_name(subvolume);
1073 if (r < 0)
1074 return r;
1075
1076 return subvol_remove_children(fd, subvolume, 0, flags);
1077 }
1078
1079 int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
1080
1081 struct btrfs_ioctl_search_args args = {
1082 /* Tree of quota items */
1083 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1084
1085 /* The object ID is always 0 */
1086 .key.min_objectid = 0,
1087 .key.max_objectid = 0,
1088
1089 /* Look precisely for the quota items */
1090 .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
1091 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
1092
1093 /* For our qgroup */
1094 .key.min_offset = old_qgroupid,
1095 .key.max_offset = old_qgroupid,
1096
1097 /* No restrictions on the other components */
1098 .key.min_transid = 0,
1099 .key.max_transid = UINT64_MAX,
1100 };
1101
1102 int r;
1103
1104 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
1105 if (r < 0)
1106 return r;
1107 if (r == 0)
1108 return -ENOTTY;
1109
1110 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1111 struct btrfs_ioctl_search_header sh;
1112 const void *body;
1113
1114 args.key.nr_items = 256;
1115 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1116 if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
1117 break;
1118
1119 return -errno;
1120 }
1121
1122 if (args.key.nr_items <= 0)
1123 break;
1124
1125 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
1126 struct btrfs_ioctl_qgroup_limit_args qargs;
1127 unsigned c;
1128
1129 /* Make sure we start the next search at least from this entry */
1130 btrfs_ioctl_search_args_set(&args, &sh);
1131
1132 if (sh.objectid != 0)
1133 continue;
1134 if (sh.type != BTRFS_QGROUP_LIMIT_KEY)
1135 continue;
1136 if (sh.offset != old_qgroupid)
1137 continue;
1138
1139 /* We found the entry, now copy things over. */
1140
1141 const struct btrfs_qgroup_limit_item *qli = body;
1142 qargs = (struct btrfs_ioctl_qgroup_limit_args) {
1143 .qgroupid = new_qgroupid,
1144
1145 .lim.max_rfer = le64toh(qli->max_rfer),
1146 .lim.max_excl = le64toh(qli->max_excl),
1147 .lim.rsv_rfer = le64toh(qli->rsv_rfer),
1148 .lim.rsv_excl = le64toh(qli->rsv_excl),
1149
1150 .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
1151 BTRFS_QGROUP_LIMIT_MAX_EXCL|
1152 BTRFS_QGROUP_LIMIT_RSV_RFER|
1153 BTRFS_QGROUP_LIMIT_RSV_EXCL),
1154 };
1155
1156 for (c = 0;; c++) {
1157 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
1158 if (errno == EBUSY && c < 10) {
1159 (void) btrfs_quota_scan_wait(fd);
1160 continue;
1161 }
1162 return -errno;
1163 }
1164
1165 break;
1166 }
1167
1168 return 1;
1169 }
1170
1171 /* Increase search key by one, to read the next item, if we can. */
1172 if (!btrfs_ioctl_search_args_inc(&args))
1173 break;
1174 }
1175
1176 return 0;
1177 }
1178
1179 static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
1180 _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
1181 bool copy_from_parent = false, insert_intermediary_qgroup = false;
1182 int n_old_qgroups, n_old_parent_qgroups, r;
1183 uint64_t old_parent_id;
1184
1185 assert(fd >= 0);
1186
1187 /* Copies a reduced form of quota information from the old to
1188 * the new subvolume. */
1189
1190 n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
1191 if (n_old_qgroups <= 0) /* Nothing to copy */
1192 return n_old_qgroups;
1193
1194 assert(old_qgroups); /* Coverity gets confused by the macro iterator allocating this, add a hint */
1195
1196 r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
1197 if (r == -ENXIO)
1198 /* We have no parent, hence nothing to copy. */
1199 n_old_parent_qgroups = 0;
1200 else if (r < 0)
1201 return r;
1202 else {
1203 n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
1204 if (n_old_parent_qgroups < 0)
1205 return n_old_parent_qgroups;
1206 }
1207
1208 for (int i = 0; i < n_old_qgroups; i++) {
1209 uint64_t id;
1210
1211 r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
1212 if (r < 0)
1213 return r;
1214
1215 if (id == old_subvol_id) {
1216 /* The old subvolume was member of a qgroup
1217 * that had the same id, but a different level
1218 * as it self. Let's set up something similar
1219 * in the destination. */
1220 insert_intermediary_qgroup = true;
1221 break;
1222 }
1223
1224 for (int j = 0; j < n_old_parent_qgroups; j++)
1225 if (old_parent_qgroups[j] == old_qgroups[i])
1226 /* The old subvolume shared a common
1227 * parent qgroup with its parent
1228 * subvolume. Let's set up something
1229 * similar in the destination. */
1230 copy_from_parent = true;
1231 }
1232
1233 if (!insert_intermediary_qgroup && !copy_from_parent)
1234 return 0;
1235
1236 return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
1237 }
1238
1239 static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
1240 uint64_t old_subtree_qgroup, new_subtree_qgroup;
1241 bool changed;
1242 int r;
1243
1244 /* First copy the leaf limits */
1245 r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
1246 if (r < 0)
1247 return r;
1248 changed = r > 0;
1249
1250 /* Then, try to copy the subtree limits, if there are any. */
1251 r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
1252 if (r < 0)
1253 return r;
1254 if (r == 0)
1255 return changed;
1256
1257 r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
1258 if (r < 0)
1259 return r;
1260 if (r == 0)
1261 return changed;
1262
1263 r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
1264 if (r != 0)
1265 return r;
1266
1267 return changed;
1268 }
1269
1270 static int subvol_snapshot_children(
1271 int old_fd,
1272 int new_fd,
1273 const char *subvolume,
1274 uint64_t old_subvol_id,
1275 BtrfsSnapshotFlags flags) {
1276
1277 struct btrfs_ioctl_search_args args = {
1278 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1279
1280 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1281 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1282
1283 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1284 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1285
1286 .key.min_transid = 0,
1287 .key.max_transid = UINT64_MAX,
1288 };
1289
1290 struct btrfs_ioctl_vol_args_v2 vol_args = {
1291 .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
1292 .fd = old_fd,
1293 };
1294 _cleanup_close_ int subvolume_fd = -EBADF;
1295 uint64_t new_subvol_id;
1296 int r;
1297
1298 assert(old_fd >= 0);
1299 assert(new_fd >= 0);
1300 assert(subvolume);
1301
1302 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1303
1304 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
1305 return -errno;
1306
1307 if (FLAGS_SET(flags, BTRFS_SNAPSHOT_LOCK_BSD)) {
1308 subvolume_fd = xopenat_lock(new_fd, subvolume,
1309 O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW,
1310 LOCK_BSD,
1311 LOCK_EX);
1312 if (subvolume_fd < 0)
1313 return subvolume_fd;
1314
1315 r = btrfs_is_subvol_fd(subvolume_fd);
1316 if (r < 0)
1317 return r;
1318 if (r == 0)
1319 return -EEXIST;
1320 }
1321
1322 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
1323 !(flags & BTRFS_SNAPSHOT_QUOTA))
1324 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
1325
1326 if (old_subvol_id == 0) {
1327 r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
1328 if (r < 0)
1329 return r;
1330 }
1331
1332 r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
1333 if (r < 0)
1334 return r;
1335
1336 if (flags & BTRFS_SNAPSHOT_QUOTA)
1337 (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
1338
1339 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
1340
1341 if (flags & BTRFS_SNAPSHOT_QUOTA)
1342 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1343
1344 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
1345 }
1346
1347 args.key.min_offset = args.key.max_offset = old_subvol_id;
1348
1349 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1350 struct btrfs_ioctl_search_header sh;
1351 const void *body;
1352
1353 args.key.nr_items = 256;
1354 if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1355 return -errno;
1356
1357 if (args.key.nr_items <= 0)
1358 break;
1359
1360 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
1361 _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
1362 _cleanup_close_ int old_child_fd = -EBADF, new_child_fd = -EBADF;
1363
1364 btrfs_ioctl_search_args_set(&args, &sh);
1365
1366 if (sh.type != BTRFS_ROOT_BACKREF_KEY)
1367 continue;
1368
1369 /* Avoid finding the source subvolume a second time */
1370 if (sh.offset != old_subvol_id)
1371 continue;
1372
1373 /* Avoid running into loops if the new subvolume is below the old one. */
1374 if (sh.objectid == new_subvol_id)
1375 continue;
1376
1377 const struct btrfs_root_ref *ref = body;
1378 p = memdup_suffix0((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1379 if (!p)
1380 return -ENOMEM;
1381
1382 struct btrfs_ioctl_ino_lookup_args ino_args = {
1383 .treeid = old_subvol_id,
1384 .objectid = htole64(ref->dirid),
1385 };
1386
1387 if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1388 return -errno;
1389
1390 c = path_join(ino_args.name, p);
1391 if (!c)
1392 return -ENOMEM;
1393
1394 old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1395 if (old_child_fd < 0)
1396 return -errno;
1397
1398 np = path_join(subvolume, ino_args.name);
1399 if (!np)
1400 return -ENOMEM;
1401
1402 new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1403 if (new_child_fd < 0)
1404 return -errno;
1405
1406 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1407 /* If the snapshot is read-only we need to mark it writable temporarily, to
1408 * put the subsnapshot into place. */
1409
1410 if (subvolume_fd < 0) {
1411 subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1412 if (subvolume_fd < 0)
1413 return -errno;
1414 }
1415
1416 r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
1417 if (r < 0)
1418 return r;
1419 }
1420
1421 /* When btrfs clones the subvolumes, child subvolumes appear as empty
1422 * directories. Remove them, so that we can create a new snapshot in their place */
1423 if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
1424 int k = -errno;
1425
1426 if (flags & BTRFS_SNAPSHOT_READ_ONLY)
1427 (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1428
1429 return k;
1430 }
1431
1432 r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh.objectid,
1433 flags & ~(BTRFS_SNAPSHOT_FALLBACK_COPY|BTRFS_SNAPSHOT_LOCK_BSD));
1434
1435 /* Restore the readonly flag */
1436 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1437 int k;
1438
1439 k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1440 if (r >= 0 && k < 0)
1441 return k;
1442 }
1443
1444 if (r < 0)
1445 return r;
1446 }
1447
1448 /* Increase search key by one, to read the next item, if we can. */
1449 if (!btrfs_ioctl_search_args_inc(&args))
1450 break;
1451 }
1452
1453 if (flags & BTRFS_SNAPSHOT_QUOTA)
1454 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1455
1456 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
1457 }
1458
1459 int btrfs_subvol_snapshot_at_full(
1460 int dir_fdf,
1461 const char *from,
1462 int dir_fdt,
1463 const char *to,
1464 BtrfsSnapshotFlags flags,
1465 copy_progress_path_t progress_path,
1466 copy_progress_bytes_t progress_bytes,
1467 void *userdata) {
1468
1469 _cleanup_free_ char *subvolume = NULL;
1470 _cleanup_close_ int old_fd = -EBADF, new_fd = -EBADF, subvolume_fd = -EBADF;
1471 int r;
1472
1473 assert(dir_fdf >= 0 || dir_fdf == AT_FDCWD);
1474 assert(dir_fdt >= 0 || dir_fdt == AT_FDCWD);
1475 assert(to);
1476
1477 old_fd = xopenat(dir_fdf, from, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1478 if (old_fd < 0)
1479 return old_fd;
1480
1481 new_fd = chase_and_openat(dir_fdt, to, CHASE_PARENT|CHASE_EXTRACT_FILENAME, O_CLOEXEC, &subvolume);
1482 if (new_fd < 0)
1483 return new_fd;
1484
1485 r = btrfs_validate_subvolume_name(subvolume);
1486 if (r < 0)
1487 return r;
1488
1489 r = btrfs_is_subvol_at(dir_fdf, from);
1490 if (r < 0)
1491 return r;
1492 if (r == 0) {
1493 bool plain_directory = false;
1494
1495 /* If the source isn't a proper subvolume, fail unless fallback is requested */
1496 if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
1497 return -EISDIR;
1498
1499 r = btrfs_subvol_make(new_fd, subvolume);
1500 if (r < 0) {
1501 if (ERRNO_IS_NOT_SUPPORTED(r) && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
1502 /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
1503 if (mkdirat(new_fd, subvolume, 0755) < 0)
1504 return -errno;
1505
1506 plain_directory = true;
1507 } else
1508 return r;
1509 }
1510
1511 if (FLAGS_SET(flags, BTRFS_SNAPSHOT_LOCK_BSD)) {
1512 subvolume_fd = xopenat_lock(new_fd, subvolume,
1513 O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW,
1514 LOCK_BSD,
1515 LOCK_EX);
1516 if (subvolume_fd < 0)
1517 return subvolume_fd;
1518
1519 if (!plain_directory) {
1520 r = btrfs_is_subvol_fd(subvolume_fd);
1521 if (r < 0)
1522 return r;
1523 if (r == 0)
1524 return -EEXIST;
1525 }
1526 }
1527
1528 r = copy_directory_at_full(
1529 dir_fdf, from,
1530 new_fd, subvolume,
1531 COPY_MERGE_EMPTY|
1532 COPY_REFLINK|
1533 COPY_SAME_MOUNT|
1534 COPY_HARDLINKS|
1535 COPY_ALL_XATTRS|
1536 (FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGINT) ? COPY_SIGINT : 0)|
1537 (FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGTERM) ? COPY_SIGTERM : 0),
1538 progress_path,
1539 progress_bytes,
1540 userdata);
1541 if (r < 0)
1542 goto fallback_fail;
1543
1544 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1545
1546 if (plain_directory) {
1547 /* Plain directories have no recursive read-only flag, but something pretty close to
1548 * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
1549
1550 if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
1551 (void) chattr_at(new_fd, subvolume, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL);
1552 } else {
1553 r = btrfs_subvol_set_read_only_at(new_fd, subvolume, true);
1554 if (r < 0)
1555 goto fallback_fail;
1556 }
1557 }
1558
1559 return flags & BTRFS_SNAPSHOT_LOCK_BSD ? TAKE_FD(subvolume_fd) : 0;
1560
1561 fallback_fail:
1562 (void) rm_rf_at(new_fd, subvolume, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
1563 return r;
1564 }
1565
1566 return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
1567 }
1568
1569 int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
1570
1571 struct btrfs_ioctl_search_args args = {
1572 /* Tree of quota items */
1573 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1574
1575 /* Look precisely for the quota relation items */
1576 .key.min_type = BTRFS_QGROUP_RELATION_KEY,
1577 .key.max_type = BTRFS_QGROUP_RELATION_KEY,
1578
1579 /* No restrictions on the other components */
1580 .key.min_offset = 0,
1581 .key.max_offset = UINT64_MAX,
1582
1583 .key.min_transid = 0,
1584 .key.max_transid = UINT64_MAX,
1585 };
1586
1587 _cleanup_free_ uint64_t *items = NULL;
1588 size_t n_items = 0;
1589 int r;
1590
1591 assert(fd >= 0);
1592 assert(ret);
1593
1594 if (qgroupid == 0) {
1595 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
1596 if (r < 0)
1597 return r;
1598 } else {
1599 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
1600 if (r < 0)
1601 return r;
1602 if (r == 0)
1603 return -ENOTTY;
1604 }
1605
1606 args.key.min_objectid = args.key.max_objectid = qgroupid;
1607
1608 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1609 struct btrfs_ioctl_search_header sh;
1610 _unused_ const void *body;
1611
1612 args.key.nr_items = 256;
1613 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1614 if (errno == ENOENT) /* quota tree missing: quota is disabled */
1615 break;
1616
1617 return -errno;
1618 }
1619
1620 if (args.key.nr_items <= 0)
1621 break;
1622
1623 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
1624
1625 /* Make sure we start the next search at least from this entry */
1626 btrfs_ioctl_search_args_set(&args, &sh);
1627
1628 if (sh.type != BTRFS_QGROUP_RELATION_KEY)
1629 continue;
1630 if (sh.offset < sh.objectid)
1631 continue;
1632 if (sh.objectid != qgroupid)
1633 continue;
1634
1635 if (!GREEDY_REALLOC(items, n_items+1))
1636 return -ENOMEM;
1637
1638 items[n_items++] = sh.offset;
1639 }
1640
1641 /* Increase search key by one, to read the next item, if we can. */
1642 if (!btrfs_ioctl_search_args_inc(&args))
1643 break;
1644 }
1645
1646 assert((n_items > 0) == !!items);
1647 assert(n_items <= INT_MAX);
1648
1649 *ret = TAKE_PTR(items);
1650 return (int) n_items;
1651 }
1652
1653 int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
1654 _cleanup_free_ uint64_t *qgroups = NULL;
1655 _cleanup_close_ int real_fd = -EBADF;
1656 uint64_t parent_subvol;
1657 bool changed = false;
1658 int n = 0, r;
1659
1660 assert(fd >= 0);
1661
1662 /*
1663 * Sets up the specified subvolume's qgroup automatically in
1664 * one of two ways:
1665 *
1666 * If insert_intermediary_qgroup is false, the subvolume's
1667 * leaf qgroup will be assigned to the same parent qgroups as
1668 * the subvolume's parent subvolume.
1669 *
1670 * If insert_intermediary_qgroup is true a new intermediary
1671 * higher-level qgroup is created, with a higher level number,
1672 * but reusing the id of the subvolume. The level number is
1673 * picked as one smaller than the lowest level qgroup the
1674 * parent subvolume is a member of. If the parent subvolume's
1675 * leaf qgroup is assigned to no higher-level qgroup a new
1676 * qgroup of level 255 is created instead. Either way, the new
1677 * qgroup is then assigned to the parent's higher-level
1678 * qgroup, and the subvolume itself is assigned to it.
1679 *
1680 * If the subvolume is already assigned to a higher level
1681 * qgroup, no operation is executed.
1682 *
1683 * Effectively this means: regardless if
1684 * insert_intermediary_qgroup is true or not, after this
1685 * function is invoked the subvolume will be accounted within
1686 * the same qgroups as the parent. However, if it is true, it
1687 * will also get its own higher-level qgroup, which may in
1688 * turn be used by subvolumes created beneath this subvolume
1689 * later on.
1690 *
1691 * This hence defines a simple default qgroup setup for
1692 * subvolumes, as long as this function is invoked on each
1693 * created subvolume: each subvolume is always accounting
1694 * together with its immediate parents. Optionally, if
1695 * insert_intermediary_qgroup is true, it will also get a
1696 * qgroup that then includes all its own child subvolumes.
1697 */
1698
1699 /* Turn this into a proper fd, if it is currently O_PATH */
1700 fd = fd_reopen_condition(fd, O_RDONLY|O_CLOEXEC, O_PATH, &real_fd);
1701 if (fd < 0)
1702 return fd;
1703
1704 if (subvol_id == 0) {
1705 r = btrfs_is_subvol_fd(fd);
1706 if (r < 0)
1707 return r;
1708 if (!r)
1709 return -ENOTTY;
1710
1711 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1712 if (r < 0)
1713 return r;
1714 }
1715
1716 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
1717 if (n < 0)
1718 return n;
1719 if (n > 0) /* already parent qgroups set up, let's bail */
1720 return 0;
1721
1722 qgroups = mfree(qgroups);
1723
1724 r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
1725 if (r == -ENXIO)
1726 /* No parent, hence no qgroup memberships */
1727 n = 0;
1728 else if (r < 0)
1729 return r;
1730 else {
1731 n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
1732 if (n < 0)
1733 return n;
1734 }
1735
1736 if (insert_intermediary_qgroup) {
1737 uint64_t lowest = 256, new_qgroupid;
1738 bool created = false;
1739
1740 /* Determine the lowest qgroup that the parent
1741 * subvolume is assigned to. */
1742
1743 for (int i = 0; i < n; i++) {
1744 uint64_t level;
1745
1746 r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
1747 if (r < 0)
1748 return r;
1749
1750 if (level < lowest)
1751 lowest = level;
1752 }
1753
1754 if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1755 return -EBUSY;
1756
1757 r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
1758 if (r < 0)
1759 return r;
1760
1761 /* Create the new intermediary group, unless it already exists */
1762 r = btrfs_qgroup_create(fd, new_qgroupid);
1763 if (r < 0 && r != -EEXIST)
1764 return r;
1765 if (r >= 0)
1766 changed = created = true;
1767
1768 for (int i = 0; i < n; i++) {
1769 r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
1770 if (r < 0 && r != -EEXIST) {
1771 if (created)
1772 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1773
1774 return r;
1775 }
1776 if (r >= 0)
1777 changed = true;
1778 }
1779
1780 r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
1781 if (r < 0 && r != -EEXIST) {
1782 if (created)
1783 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1784 return r;
1785 }
1786 if (r >= 0)
1787 changed = true;
1788
1789 } else {
1790 int i;
1791
1792 /* Assign our subvolume to all the same qgroups as the parent */
1793
1794 for (i = 0; i < n; i++) {
1795 r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
1796 if (r < 0 && r != -EEXIST)
1797 return r;
1798 if (r >= 0)
1799 changed = true;
1800 }
1801 }
1802
1803 return changed;
1804 }
1805
1806 int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
1807 _cleanup_close_ int fd = -EBADF;
1808
1809 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1810 if (fd < 0)
1811 return -errno;
1812
1813 return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
1814 }
1815
1816 int btrfs_subvol_make_default(const char *path) {
1817 _cleanup_close_ int fd = -EBADF;
1818 uint64_t id;
1819 int r;
1820
1821 assert(path);
1822
1823 fd = open(path, O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1824 if (fd < 0)
1825 return -errno;
1826
1827 r = btrfs_subvol_get_id_fd(fd, &id);
1828 if (r < 0)
1829 return r;
1830
1831 return RET_NERRNO(ioctl(fd, BTRFS_IOC_DEFAULT_SUBVOL, &id));
1832 }
1833
1834 int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
1835
1836 struct btrfs_ioctl_search_args args = {
1837 /* Tree of tree roots */
1838 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1839
1840 /* Look precisely for the subvolume items */
1841 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1842 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1843
1844 /* No restrictions on the other components */
1845 .key.min_offset = 0,
1846 .key.max_offset = UINT64_MAX,
1847
1848 .key.min_transid = 0,
1849 .key.max_transid = UINT64_MAX,
1850 };
1851 int r;
1852
1853 assert(fd >= 0);
1854 assert(ret);
1855
1856 if (subvol_id == 0) {
1857 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1858 if (r < 0)
1859 return r;
1860 } else {
1861 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
1862 if (r < 0)
1863 return r;
1864 if (r == 0)
1865 return -ENOTTY;
1866 }
1867
1868 args.key.min_objectid = args.key.max_objectid = subvol_id;
1869
1870 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1871 struct btrfs_ioctl_search_header sh;
1872 _unused_ const void *body = NULL;
1873
1874 args.key.nr_items = 256;
1875 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1876 return negative_errno();
1877
1878 if (args.key.nr_items <= 0)
1879 break;
1880
1881 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, args) {
1882
1883 if (sh.type != BTRFS_ROOT_BACKREF_KEY)
1884 continue;
1885 if (sh.objectid != subvol_id)
1886 continue;
1887
1888 *ret = sh.offset;
1889 return 0;
1890 }
1891 }
1892
1893 return -ENXIO;
1894 }
1895
1896 bool btrfs_might_be_subvol(const struct stat *st) {
1897 if (!st)
1898 return false;
1899
1900 /* Returns true if this 'struct stat' looks like it could refer to a btrfs subvolume. To make a final
1901 * decision, needs to be combined with an fstatfs() check to see if this is actually btrfs. */
1902
1903 return S_ISDIR(st->st_mode) && st->st_ino == 256;
1904 }
1905
1906 int btrfs_forget_device(const char *path) {
1907 _cleanup_close_ int control_fd = -EBADF;
1908 struct btrfs_ioctl_vol_args args = {};
1909
1910 assert(path);
1911
1912 if (strlen(path) > BTRFS_PATH_NAME_MAX)
1913 return -E2BIG;
1914
1915 strcpy(args.name, path);
1916
1917 control_fd = open("/dev/btrfs-control", O_RDWR|O_CLOEXEC);
1918 if (control_fd < 0)
1919 return -errno;
1920
1921 return RET_NERRNO(ioctl(control_fd, BTRFS_IOC_FORGET_DEV, &args));
1922 }
1923
1924 typedef struct BtrfsStripe {
1925 uint64_t devid;
1926 uint64_t offset;
1927 } BtrfsStripe;
1928
1929 typedef struct BtrfsChunk {
1930 uint64_t offset;
1931 uint64_t length;
1932 uint64_t type;
1933
1934 BtrfsStripe *stripes;
1935 uint16_t n_stripes;
1936 uint64_t stripe_len;
1937 } BtrfsChunk;
1938
1939 typedef struct BtrfsChunkTree {
1940 BtrfsChunk **chunks;
1941 size_t n_chunks;
1942 } BtrfsChunkTree;
1943
1944 static BtrfsChunk* btrfs_chunk_free(BtrfsChunk *chunk) {
1945 if (!chunk)
1946 return NULL;
1947
1948 free(chunk->stripes);
1949
1950 return mfree(chunk);
1951 }
1952
1953 DEFINE_TRIVIAL_CLEANUP_FUNC(BtrfsChunk*, btrfs_chunk_free);
1954
1955 static void btrfs_chunk_tree_done(BtrfsChunkTree *tree) {
1956 assert(tree);
1957
1958 FOREACH_ARRAY(i, tree->chunks, tree->n_chunks)
1959 btrfs_chunk_free(*i);
1960
1961 free(tree->chunks);
1962 }
1963
1964 static int btrfs_read_chunk_tree_fd(int fd, BtrfsChunkTree *ret) {
1965
1966 struct btrfs_ioctl_search_args search_args = {
1967 .key.tree_id = BTRFS_CHUNK_TREE_OBJECTID,
1968
1969 .key.min_type = BTRFS_CHUNK_ITEM_KEY,
1970 .key.max_type = BTRFS_CHUNK_ITEM_KEY,
1971
1972 .key.min_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1973 .key.max_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1974
1975 .key.min_offset = 0,
1976 .key.max_offset = UINT64_MAX,
1977
1978 .key.min_transid = 0,
1979 .key.max_transid = UINT64_MAX,
1980 };
1981
1982 _cleanup_(btrfs_chunk_tree_done) BtrfsChunkTree tree = {};
1983
1984 assert(fd >= 0);
1985 assert(ret);
1986
1987 while (btrfs_ioctl_search_args_compare(&search_args) <= 0) {
1988 struct btrfs_ioctl_search_header sh;
1989 const void *body;
1990
1991 search_args.key.nr_items = 256;
1992
1993 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_args) < 0)
1994 return -errno;
1995
1996 if (search_args.key.nr_items == 0)
1997 break;
1998
1999 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, search_args) {
2000 _cleanup_(btrfs_chunk_freep) BtrfsChunk *chunk = NULL;
2001
2002 btrfs_ioctl_search_args_set(&search_args, &sh);
2003
2004 if (sh.objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)
2005 continue;
2006 if (sh.type != BTRFS_CHUNK_ITEM_KEY)
2007 continue;
2008
2009 chunk = new(BtrfsChunk, 1);
2010 if (!chunk)
2011 return -ENOMEM;
2012
2013 const struct btrfs_chunk *item = body;
2014 *chunk = (BtrfsChunk) {
2015 .offset = sh.offset,
2016 .length = le64toh(item->length),
2017 .type = le64toh(item->type),
2018 .n_stripes = le16toh(item->num_stripes),
2019 .stripe_len = le64toh(item->stripe_len),
2020 };
2021
2022 chunk->stripes = new(BtrfsStripe, chunk->n_stripes);
2023 if (!chunk->stripes)
2024 return -ENOMEM;
2025
2026 for (size_t j = 0; j < chunk->n_stripes; j++) {
2027 const struct btrfs_stripe *stripe = &item->stripe + j;
2028
2029 chunk->stripes[j] = (BtrfsStripe) {
2030 .devid = le64toh(stripe->devid),
2031 .offset = le64toh(stripe->offset),
2032 };
2033 }
2034
2035 if (!GREEDY_REALLOC(tree.chunks, tree.n_chunks + 1))
2036 return -ENOMEM;
2037
2038 tree.chunks[tree.n_chunks++] = TAKE_PTR(chunk);
2039 }
2040
2041 if (!btrfs_ioctl_search_args_inc(&search_args))
2042 break;
2043 }
2044
2045 *ret = TAKE_STRUCT(tree);
2046 return 0;
2047 }
2048
2049 static BtrfsChunk* btrfs_find_chunk_from_logical_address(const BtrfsChunkTree *tree, uint64_t logical) {
2050 size_t min_index, max_index;
2051
2052 assert(tree);
2053 assert(tree->chunks || tree->n_chunks == 0);
2054
2055 if (tree->n_chunks == 0)
2056 return NULL;
2057
2058 /* bisection */
2059 min_index = 0;
2060 max_index = tree->n_chunks - 1;
2061
2062 while (min_index <= max_index) {
2063 size_t mid = (min_index + max_index) / 2;
2064
2065 if (logical < tree->chunks[mid]->offset) {
2066 if (mid < 1)
2067 return NULL;
2068
2069 max_index = mid - 1;
2070 } else if (logical >= tree->chunks[mid]->offset + tree->chunks[mid]->length)
2071 min_index = mid + 1;
2072 else
2073 return tree->chunks[mid];
2074 }
2075
2076 return NULL;
2077 }
2078
2079 static int btrfs_is_nocow_fd(int fd) {
2080 unsigned flags;
2081 int r;
2082
2083 assert(fd >= 0);
2084
2085 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
2086 if (r < 0)
2087 return r;
2088 if (r == 0)
2089 return -ENOTTY;
2090
2091 r = read_attr_fd(fd, &flags);
2092 if (r < 0)
2093 return r;
2094
2095 return FLAGS_SET(flags, FS_NOCOW_FL) && !FLAGS_SET(flags, FS_COMPR_FL);
2096 }
2097
2098 int btrfs_get_file_physical_offset_fd(int fd, uint64_t *ret) {
2099
2100 struct btrfs_ioctl_search_args search_args = {
2101 .key.min_type = BTRFS_EXTENT_DATA_KEY,
2102 .key.max_type = BTRFS_EXTENT_DATA_KEY,
2103
2104 .key.min_offset = 0,
2105 .key.max_offset = UINT64_MAX,
2106
2107 .key.min_transid = 0,
2108 .key.max_transid = UINT64_MAX,
2109 };
2110
2111 _cleanup_(btrfs_chunk_tree_done) BtrfsChunkTree tree = {};
2112 uint64_t subvol_id;
2113 struct stat st;
2114 int r;
2115
2116 assert(fd >= 0);
2117 assert(ret);
2118
2119 if (fstat(fd, &st) < 0)
2120 return -errno;
2121
2122 r = stat_verify_regular(&st);
2123 if (r < 0)
2124 return r;
2125
2126 r = btrfs_is_nocow_fd(fd);
2127 if (r < 0)
2128 return r;
2129 if (r == 0)
2130 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2131 "Cannot get physical address for btrfs extent: CoW enabled");
2132
2133 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
2134 if (r < 0)
2135 return r;
2136
2137 r = btrfs_read_chunk_tree_fd(fd, &tree);
2138 if (r < 0)
2139 return r;
2140
2141 search_args.key.tree_id = subvol_id;
2142 search_args.key.min_objectid = search_args.key.max_objectid = st.st_ino;
2143
2144 while (btrfs_ioctl_search_args_compare(&search_args) <= 0) {
2145 struct btrfs_ioctl_search_header sh;
2146 const void *body;
2147
2148 search_args.key.nr_items = 256;
2149
2150 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &search_args) < 0)
2151 return -errno;
2152
2153 if (search_args.key.nr_items == 0)
2154 break;
2155
2156 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(sh, body, search_args) {
2157 uint64_t logical_offset;
2158 BtrfsChunk *chunk;
2159
2160 btrfs_ioctl_search_args_set(&search_args, &sh);
2161
2162 if (sh.type != BTRFS_EXTENT_DATA_KEY)
2163 continue;
2164
2165 if (sh.objectid != st.st_ino)
2166 continue;
2167
2168 const struct btrfs_file_extent_item *item = body;
2169 if (!IN_SET(item->type, BTRFS_FILE_EXTENT_REG, BTRFS_FILE_EXTENT_PREALLOC))
2170 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2171 "Cannot get physical address for btrfs extent: invalid type %" PRIu8,
2172 item->type);
2173
2174 if (item->compression != 0 || item->encryption != 0 || item->other_encoding != 0)
2175 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2176 "Cannot get physical address for btrfs extent: has incompatible property");
2177
2178 logical_offset = le64toh(item->disk_bytenr);
2179 if (logical_offset == 0)
2180 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2181 "Cannot get physical address for btrfs extent: failed to get logical offset");
2182
2183 chunk = btrfs_find_chunk_from_logical_address(&tree, logical_offset);
2184 if (!chunk)
2185 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2186 "Cannot get physical address for btrfs extent: no matching chunk found");
2187
2188 if ((chunk->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0)
2189 return log_debug_errno(SYNTHETIC_ERRNO(EINVAL),
2190 "Cannot get physical address for btrfs extent: unsupported profile");
2191
2192 uint64_t relative_chunk, relative_stripe, stripe_nr;
2193 uint16_t stripe_index;
2194
2195 assert(logical_offset >= chunk->offset);
2196 assert(chunk->n_stripes > 0);
2197 assert(chunk->stripe_len > 0);
2198
2199 relative_chunk = logical_offset - chunk->offset;
2200 stripe_nr = relative_chunk / chunk->stripe_len;
2201 relative_stripe = relative_chunk - stripe_nr * chunk->stripe_len;
2202 stripe_index = stripe_nr % chunk->n_stripes;
2203
2204 *ret = chunk->stripes[stripe_index].offset +
2205 stripe_nr / chunk->n_stripes * chunk->stripe_len +
2206 relative_stripe;
2207
2208 return 0;
2209 }
2210
2211 if (!btrfs_ioctl_search_args_inc(&search_args))
2212 break;
2213 }
2214
2215 return -ENODATA;
2216 }