]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/btrfs-util.c
alloc-util: simplify GREEDY_REALLOC() logic by relying on malloc_usable_size()
[thirdparty/systemd.git] / src / basic / btrfs-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <inttypes.h>
6 #include <linux/btrfs_tree.h>
7 #include <linux/fs.h>
8 #include <linux/loop.h>
9 #include <linux/magic.h>
10 #include <stddef.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <unistd.h>
16
17 #include "alloc-util.h"
18 #include "blockdev-util.h"
19 #include "btrfs-util.h"
20 #include "chattr-util.h"
21 #include "copy.h"
22 #include "device-nodes.h"
23 #include "fd-util.h"
24 #include "fileio.h"
25 #include "fs-util.h"
26 #include "io-util.h"
27 #include "macro.h"
28 #include "path-util.h"
29 #include "rm-rf.h"
30 #include "smack-util.h"
31 #include "sparse-endian.h"
32 #include "stat-util.h"
33 #include "string-util.h"
34 #include "time-util.h"
35 #include "util.h"
36
37 /* WARNING: Be careful with file system ioctls! When we get an fd, we
38 * need to make sure it either refers to only a regular file or
39 * directory, or that it is located on btrfs, before invoking any
40 * btrfs ioctls. The ioctl numbers are reused by some device drivers
41 * (such as DRM), and hence might have bad effects when invoked on
42 * device nodes (that reference drivers) rather than fds to normal
43 * files or directories. */
44
45 static int validate_subvolume_name(const char *name) {
46
47 if (!filename_is_valid(name))
48 return -EINVAL;
49
50 if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
51 return -E2BIG;
52
53 return 0;
54 }
55
56 static int extract_subvolume_name(const char *path, const char **subvolume) {
57 const char *fn;
58 int r;
59
60 assert(path);
61 assert(subvolume);
62
63 fn = basename(path);
64
65 r = validate_subvolume_name(fn);
66 if (r < 0)
67 return r;
68
69 *subvolume = fn;
70 return 0;
71 }
72
73 int btrfs_is_filesystem(int fd) {
74 struct statfs sfs;
75
76 assert(fd >= 0);
77
78 if (fstatfs(fd, &sfs) < 0)
79 return -errno;
80
81 return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
82 }
83
84 int btrfs_is_subvol_fd(int fd) {
85 struct stat st;
86
87 assert(fd >= 0);
88
89 /* On btrfs subvolumes always have the inode 256 */
90
91 if (fstat(fd, &st) < 0)
92 return -errno;
93
94 if (!btrfs_might_be_subvol(&st))
95 return 0;
96
97 return btrfs_is_filesystem(fd);
98 }
99
100 int btrfs_is_subvol(const char *path) {
101 _cleanup_close_ int fd = -1;
102
103 assert(path);
104
105 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
106 if (fd < 0)
107 return -errno;
108
109 return btrfs_is_subvol_fd(fd);
110 }
111
112 int btrfs_subvol_make_fd(int fd, const char *subvolume) {
113 struct btrfs_ioctl_vol_args args = {};
114 _cleanup_close_ int real_fd = -1;
115 int r;
116
117 assert(subvolume);
118
119 r = validate_subvolume_name(subvolume);
120 if (r < 0)
121 return r;
122
123 r = fcntl(fd, F_GETFL);
124 if (r < 0)
125 return -errno;
126 if (FLAGS_SET(r, O_PATH)) {
127 /* An O_PATH fd was specified, let's convert here to a proper one, as btrfs ioctl's can't deal with
128 * O_PATH. */
129
130 real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
131 if (real_fd < 0)
132 return real_fd;
133
134 fd = real_fd;
135 }
136
137 strncpy(args.name, subvolume, sizeof(args.name)-1);
138
139 if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
140 return -errno;
141
142 return 0;
143 }
144
145 int btrfs_subvol_make(const char *path) {
146 _cleanup_close_ int fd = -1;
147 const char *subvolume;
148 int r;
149
150 assert(path);
151
152 r = extract_subvolume_name(path, &subvolume);
153 if (r < 0)
154 return r;
155
156 fd = open_parent(path, O_CLOEXEC, 0);
157 if (fd < 0)
158 return fd;
159
160 return btrfs_subvol_make_fd(fd, subvolume);
161 }
162
163 int btrfs_subvol_make_fallback(const char *path, mode_t mode) {
164 mode_t old, combined;
165 int r;
166
167 assert(path);
168
169 /* Let's work like mkdir(), i.e. take the specified mode, and mask it with the current umask. */
170 old = umask(~mode);
171 combined = old | ~mode;
172 if (combined != ~mode)
173 umask(combined);
174 r = btrfs_subvol_make(path);
175 umask(old);
176
177 if (r >= 0)
178 return 1; /* subvol worked */
179 if (r != -ENOTTY)
180 return r;
181
182 if (mkdir(path, mode) < 0)
183 return -errno;
184
185 return 0; /* plain directory */
186 }
187
188 int btrfs_subvol_set_read_only_fd(int fd, bool b) {
189 uint64_t flags, nflags;
190 struct stat st;
191
192 assert(fd >= 0);
193
194 if (fstat(fd, &st) < 0)
195 return -errno;
196
197 if (!btrfs_might_be_subvol(&st))
198 return -EINVAL;
199
200 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
201 return -errno;
202
203 nflags = UPDATE_FLAG(flags, BTRFS_SUBVOL_RDONLY, b);
204 if (flags == nflags)
205 return 0;
206
207 if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
208 return -errno;
209
210 return 0;
211 }
212
213 int btrfs_subvol_set_read_only(const char *path, bool b) {
214 _cleanup_close_ int fd = -1;
215
216 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
217 if (fd < 0)
218 return -errno;
219
220 return btrfs_subvol_set_read_only_fd(fd, b);
221 }
222
223 int btrfs_subvol_get_read_only_fd(int fd) {
224 uint64_t flags;
225 struct stat st;
226
227 assert(fd >= 0);
228
229 if (fstat(fd, &st) < 0)
230 return -errno;
231
232 if (!btrfs_might_be_subvol(&st))
233 return -EINVAL;
234
235 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
236 return -errno;
237
238 return !!(flags & BTRFS_SUBVOL_RDONLY);
239 }
240
241 int btrfs_reflink(int infd, int outfd) {
242 int r;
243
244 assert(infd >= 0);
245 assert(outfd >= 0);
246
247 /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */
248
249 r = fd_verify_regular(outfd);
250 if (r < 0)
251 return r;
252
253 if (ioctl(outfd, BTRFS_IOC_CLONE, infd) < 0)
254 return -errno;
255
256 return 0;
257 }
258
259 int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
260 struct btrfs_ioctl_clone_range_args args = {
261 .src_fd = infd,
262 .src_offset = in_offset,
263 .src_length = sz,
264 .dest_offset = out_offset,
265 };
266 int r;
267
268 assert(infd >= 0);
269 assert(outfd >= 0);
270 assert(sz > 0);
271
272 r = fd_verify_regular(outfd);
273 if (r < 0)
274 return r;
275
276 if (ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args) < 0)
277 return -errno;
278
279 return 0;
280 }
281
282 int btrfs_get_block_device_fd(int fd, dev_t *dev) {
283 struct btrfs_ioctl_fs_info_args fsi = {};
284 uint64_t id;
285 int r;
286
287 assert(fd >= 0);
288 assert(dev);
289
290 r = btrfs_is_filesystem(fd);
291 if (r < 0)
292 return r;
293 if (!r)
294 return -ENOTTY;
295
296 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
297 return -errno;
298
299 /* We won't do this for btrfs RAID */
300 if (fsi.num_devices != 1) {
301 *dev = 0;
302 return 0;
303 }
304
305 for (id = 1; id <= fsi.max_id; id++) {
306 struct btrfs_ioctl_dev_info_args di = {
307 .devid = id,
308 };
309 struct stat st;
310
311 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
312 if (errno == ENODEV)
313 continue;
314
315 return -errno;
316 }
317
318 /* For the root fs — when no initrd is involved — btrfs returns /dev/root on any kernels from
319 * the past few years. That sucks, as we have no API to determine the actual root then. let's
320 * return an recognizable error for this case, so that the caller can maybe print a nice
321 * message about this.
322 *
323 * https://bugzilla.kernel.org/show_bug.cgi?id=89721 */
324 if (path_equal((char*) di.path, "/dev/root"))
325 return -EUCLEAN;
326
327 if (stat((char*) di.path, &st) < 0)
328 return -errno;
329
330 if (!S_ISBLK(st.st_mode))
331 return -ENOTBLK;
332
333 if (major(st.st_rdev) == 0)
334 return -ENODEV;
335
336 *dev = st.st_rdev;
337 return 1;
338 }
339
340 return -ENODEV;
341 }
342
343 int btrfs_get_block_device(const char *path, dev_t *dev) {
344 _cleanup_close_ int fd = -1;
345
346 assert(path);
347 assert(dev);
348
349 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
350 if (fd < 0)
351 return -errno;
352
353 return btrfs_get_block_device_fd(fd, dev);
354 }
355
356 int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
357 struct btrfs_ioctl_ino_lookup_args args = {
358 .objectid = BTRFS_FIRST_FREE_OBJECTID
359 };
360 int r;
361
362 assert(fd >= 0);
363 assert(ret);
364
365 r = btrfs_is_filesystem(fd);
366 if (r < 0)
367 return r;
368 if (!r)
369 return -ENOTTY;
370
371 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
372 return -errno;
373
374 *ret = args.treeid;
375 return 0;
376 }
377
378 int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
379 _cleanup_close_ int subvol_fd = -1;
380
381 assert(fd >= 0);
382 assert(ret);
383
384 subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
385 if (subvol_fd < 0)
386 return -errno;
387
388 return btrfs_subvol_get_id_fd(subvol_fd, ret);
389 }
390
391 static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
392 assert(args);
393
394 /* the objectid, type, offset together make up the btrfs key,
395 * which is considered a single 136byte integer when
396 * comparing. This call increases the counter by one, dealing
397 * with the overflow between the overflows */
398
399 if (args->key.min_offset < UINT64_MAX) {
400 args->key.min_offset++;
401 return true;
402 }
403
404 if (args->key.min_type < UINT8_MAX) {
405 args->key.min_type++;
406 args->key.min_offset = 0;
407 return true;
408 }
409
410 if (args->key.min_objectid < UINT64_MAX) {
411 args->key.min_objectid++;
412 args->key.min_offset = 0;
413 args->key.min_type = 0;
414 return true;
415 }
416
417 return 0;
418 }
419
420 static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
421 assert(args);
422 assert(h);
423
424 args->key.min_objectid = h->objectid;
425 args->key.min_type = h->type;
426 args->key.min_offset = h->offset;
427 }
428
429 static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
430 int r;
431
432 assert(args);
433
434 /* Compare min and max */
435
436 r = CMP(args->key.min_objectid, args->key.max_objectid);
437 if (r != 0)
438 return r;
439
440 r = CMP(args->key.min_type, args->key.max_type);
441 if (r != 0)
442 return r;
443
444 return CMP(args->key.min_offset, args->key.max_offset);
445 }
446
447 #define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
448 for ((i) = 0, \
449 (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
450 (i) < (args).key.nr_items; \
451 (i)++, \
452 (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
453
454 #define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
455 ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
456
457 int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
458 struct btrfs_ioctl_search_args args = {
459 /* Tree of tree roots */
460 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
461
462 /* Look precisely for the subvolume items */
463 .key.min_type = BTRFS_ROOT_ITEM_KEY,
464 .key.max_type = BTRFS_ROOT_ITEM_KEY,
465
466 .key.min_offset = 0,
467 .key.max_offset = UINT64_MAX,
468
469 /* No restrictions on the other components */
470 .key.min_transid = 0,
471 .key.max_transid = UINT64_MAX,
472 };
473
474 bool found = false;
475 int r;
476
477 assert(fd >= 0);
478 assert(ret);
479
480 if (subvol_id == 0) {
481 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
482 if (r < 0)
483 return r;
484 } else {
485 r = btrfs_is_filesystem(fd);
486 if (r < 0)
487 return r;
488 if (!r)
489 return -ENOTTY;
490 }
491
492 args.key.min_objectid = args.key.max_objectid = subvol_id;
493
494 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
495 const struct btrfs_ioctl_search_header *sh;
496 unsigned i;
497
498 args.key.nr_items = 256;
499 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
500 return -errno;
501
502 if (args.key.nr_items <= 0)
503 break;
504
505 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
506
507 const struct btrfs_root_item *ri;
508
509 /* Make sure we start the next search at least from this entry */
510 btrfs_ioctl_search_args_set(&args, sh);
511
512 if (sh->objectid != subvol_id)
513 continue;
514 if (sh->type != BTRFS_ROOT_ITEM_KEY)
515 continue;
516
517 /* Older versions of the struct lacked the otime setting */
518 if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
519 continue;
520
521 ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
522
523 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
524 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
525
526 ret->subvol_id = subvol_id;
527 ret->read_only = le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY;
528
529 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
530 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
531 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
532
533 found = true;
534 goto finish;
535 }
536
537 /* Increase search key by one, to read the next item, if we can. */
538 if (!btrfs_ioctl_search_args_inc(&args))
539 break;
540 }
541
542 finish:
543 if (!found)
544 return -ENODATA;
545
546 return 0;
547 }
548
549 int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
550
551 struct btrfs_ioctl_search_args args = {
552 /* Tree of quota items */
553 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
554
555 /* The object ID is always 0 */
556 .key.min_objectid = 0,
557 .key.max_objectid = 0,
558
559 /* Look precisely for the quota items */
560 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
561 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
562
563 /* No restrictions on the other components */
564 .key.min_transid = 0,
565 .key.max_transid = UINT64_MAX,
566 };
567
568 bool found_info = false, found_limit = false;
569 int r;
570
571 assert(fd >= 0);
572 assert(ret);
573
574 if (qgroupid == 0) {
575 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
576 if (r < 0)
577 return r;
578 } else {
579 r = btrfs_is_filesystem(fd);
580 if (r < 0)
581 return r;
582 if (!r)
583 return -ENOTTY;
584 }
585
586 args.key.min_offset = args.key.max_offset = qgroupid;
587
588 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
589 const struct btrfs_ioctl_search_header *sh;
590 unsigned i;
591
592 args.key.nr_items = 256;
593 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
594 if (errno == ENOENT) /* quota tree is missing: quota disabled */
595 break;
596
597 return -errno;
598 }
599
600 if (args.key.nr_items <= 0)
601 break;
602
603 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
604
605 /* Make sure we start the next search at least from this entry */
606 btrfs_ioctl_search_args_set(&args, sh);
607
608 if (sh->objectid != 0)
609 continue;
610 if (sh->offset != qgroupid)
611 continue;
612
613 if (sh->type == BTRFS_QGROUP_INFO_KEY) {
614 const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
615
616 ret->referenced = le64toh(qii->rfer);
617 ret->exclusive = le64toh(qii->excl);
618
619 found_info = true;
620
621 } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
622 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
623
624 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
625 ret->referenced_max = le64toh(qli->max_rfer);
626 else
627 ret->referenced_max = UINT64_MAX;
628
629 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
630 ret->exclusive_max = le64toh(qli->max_excl);
631 else
632 ret->exclusive_max = UINT64_MAX;
633
634 found_limit = true;
635 }
636
637 if (found_info && found_limit)
638 goto finish;
639 }
640
641 /* Increase search key by one, to read the next item, if we can. */
642 if (!btrfs_ioctl_search_args_inc(&args))
643 break;
644 }
645
646 finish:
647 if (!found_limit && !found_info)
648 return -ENODATA;
649
650 if (!found_info) {
651 ret->referenced = UINT64_MAX;
652 ret->exclusive = UINT64_MAX;
653 }
654
655 if (!found_limit) {
656 ret->referenced_max = UINT64_MAX;
657 ret->exclusive_max = UINT64_MAX;
658 }
659
660 return 0;
661 }
662
663 int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
664 _cleanup_close_ int fd = -1;
665
666 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
667 if (fd < 0)
668 return -errno;
669
670 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
671 }
672
673 int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
674 uint64_t level, lowest = UINT64_MAX, lowest_qgroupid = 0;
675 _cleanup_free_ uint64_t *qgroups = NULL;
676 int r, n;
677
678 assert(fd >= 0);
679 assert(ret);
680
681 /* This finds the "subtree" qgroup for a specific
682 * subvolume. This only works for subvolumes that have been
683 * prepared with btrfs_subvol_auto_qgroup_fd() with
684 * insert_intermediary_qgroup=true (or equivalent). For others
685 * it will return the leaf qgroup instead. The two cases may
686 * be distuingished via the return value, which is 1 in case
687 * an appropriate "subtree" qgroup was found, and 0
688 * otherwise. */
689
690 if (subvol_id == 0) {
691 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
692 if (r < 0)
693 return r;
694 }
695
696 r = btrfs_qgroupid_split(subvol_id, &level, NULL);
697 if (r < 0)
698 return r;
699 if (level != 0) /* Input must be a leaf qgroup */
700 return -EINVAL;
701
702 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
703 if (n < 0)
704 return n;
705
706 for (int i = 0; i < n; i++) {
707 uint64_t id;
708
709 r = btrfs_qgroupid_split(qgroups[i], &level, &id);
710 if (r < 0)
711 return r;
712
713 if (id != subvol_id)
714 continue;
715
716 if (lowest == UINT64_MAX || level < lowest) {
717 lowest_qgroupid = qgroups[i];
718 lowest = level;
719 }
720 }
721
722 if (lowest == UINT64_MAX) {
723 /* No suitable higher-level qgroup found, let's return
724 * the leaf qgroup instead, and indicate that with the
725 * return value. */
726
727 *ret = subvol_id;
728 return 0;
729 }
730
731 *ret = lowest_qgroupid;
732 return 1;
733 }
734
735 int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
736 uint64_t qgroupid;
737 int r;
738
739 assert(fd >= 0);
740 assert(ret);
741
742 /* This determines the quota data of the qgroup with the
743 * lowest level, that shares the id part with the specified
744 * subvolume. This is useful for determining the quota data
745 * for entire subvolume subtrees, as long as the subtrees have
746 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
747 * compatible way */
748
749 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
750 if (r < 0)
751 return r;
752
753 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
754 }
755
756 int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
757 _cleanup_close_ int fd = -1;
758
759 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
760 if (fd < 0)
761 return -errno;
762
763 return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
764 }
765
766 int btrfs_defrag_fd(int fd) {
767 int r;
768
769 assert(fd >= 0);
770
771 r = fd_verify_regular(fd);
772 if (r < 0)
773 return r;
774
775 if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0)
776 return -errno;
777
778 return 0;
779 }
780
781 int btrfs_defrag(const char *p) {
782 _cleanup_close_ int fd = -1;
783
784 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
785 if (fd < 0)
786 return -errno;
787
788 return btrfs_defrag_fd(fd);
789 }
790
791 int btrfs_quota_enable_fd(int fd, bool b) {
792 struct btrfs_ioctl_quota_ctl_args args = {
793 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
794 };
795 int r;
796
797 assert(fd >= 0);
798
799 r = btrfs_is_filesystem(fd);
800 if (r < 0)
801 return r;
802 if (!r)
803 return -ENOTTY;
804
805 if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0)
806 return -errno;
807
808 return 0;
809 }
810
811 int btrfs_quota_enable(const char *path, bool b) {
812 _cleanup_close_ int fd = -1;
813
814 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
815 if (fd < 0)
816 return -errno;
817
818 return btrfs_quota_enable_fd(fd, b);
819 }
820
821 int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
822
823 struct btrfs_ioctl_qgroup_limit_args args = {
824 .lim.max_rfer = referenced_max,
825 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
826 };
827 int r;
828
829 assert(fd >= 0);
830
831 if (qgroupid == 0) {
832 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
833 if (r < 0)
834 return r;
835 } else {
836 r = btrfs_is_filesystem(fd);
837 if (r < 0)
838 return r;
839 if (!r)
840 return -ENOTTY;
841 }
842
843 args.qgroupid = qgroupid;
844
845 for (unsigned c = 0;; c++) {
846 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
847
848 if (errno == EBUSY && c < 10) {
849 (void) btrfs_quota_scan_wait(fd);
850 continue;
851 }
852
853 return -errno;
854 }
855
856 break;
857 }
858
859 return 0;
860 }
861
862 int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
863 _cleanup_close_ int fd = -1;
864
865 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
866 if (fd < 0)
867 return -errno;
868
869 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
870 }
871
872 int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
873 uint64_t qgroupid;
874 int r;
875
876 assert(fd >= 0);
877
878 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
879 if (r < 0)
880 return r;
881
882 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
883 }
884
885 int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
886 _cleanup_close_ int fd = -1;
887
888 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
889 if (fd < 0)
890 return -errno;
891
892 return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
893 }
894
895 int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
896 assert(ret);
897
898 if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
899 return -EINVAL;
900
901 if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
902 return -EINVAL;
903
904 *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
905 return 0;
906 }
907
908 int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
909 assert(level || id);
910
911 if (level)
912 *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
913
914 if (id)
915 *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
916
917 return 0;
918 }
919
920 static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
921
922 struct btrfs_ioctl_qgroup_create_args args = {
923 .create = b,
924 .qgroupid = qgroupid,
925 };
926 int r;
927
928 r = btrfs_is_filesystem(fd);
929 if (r < 0)
930 return r;
931 if (r == 0)
932 return -ENOTTY;
933
934 for (unsigned c = 0;; c++) {
935 if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
936
937 /* On old kernels if quota is not enabled, we get EINVAL. On newer kernels we get
938 * ENOTCONN. Let's always convert this to ENOTCONN to make this recognizable
939 * everywhere the same way. */
940
941 if (IN_SET(errno, EINVAL, ENOTCONN))
942 return -ENOTCONN;
943
944 if (errno == EBUSY && c < 10) {
945 (void) btrfs_quota_scan_wait(fd);
946 continue;
947 }
948
949 return -errno;
950 }
951
952 break;
953 }
954
955 return 0;
956 }
957
958 int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
959 return qgroup_create_or_destroy(fd, true, qgroupid);
960 }
961
962 int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
963 return qgroup_create_or_destroy(fd, false, qgroupid);
964 }
965
966 int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
967 _cleanup_free_ uint64_t *qgroups = NULL;
968 uint64_t subvol_id;
969 int n, r;
970
971 /* Destroys the specified qgroup, but unassigns it from all
972 * its parents first. Also, it recursively destroys all
973 * qgroups it is assigned to that have the same id part of the
974 * qgroupid as the specified group. */
975
976 r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
977 if (r < 0)
978 return r;
979
980 n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
981 if (n < 0)
982 return n;
983
984 for (int i = 0; i < n; i++) {
985 uint64_t id;
986
987 r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
988 if (r < 0)
989 return r;
990
991 r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
992 if (r < 0)
993 return r;
994
995 if (id != subvol_id)
996 continue;
997
998 /* The parent qgroupid shares the same id part with
999 * us? If so, destroy it too. */
1000
1001 (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
1002 }
1003
1004 return btrfs_qgroup_destroy(fd, qgroupid);
1005 }
1006
1007 int btrfs_quota_scan_start(int fd) {
1008 struct btrfs_ioctl_quota_rescan_args args = {};
1009
1010 assert(fd >= 0);
1011
1012 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0)
1013 return -errno;
1014
1015 return 0;
1016 }
1017
1018 int btrfs_quota_scan_wait(int fd) {
1019 assert(fd >= 0);
1020
1021 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0)
1022 return -errno;
1023
1024 return 0;
1025 }
1026
1027 int btrfs_quota_scan_ongoing(int fd) {
1028 struct btrfs_ioctl_quota_rescan_args args = {};
1029
1030 assert(fd >= 0);
1031
1032 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
1033 return -errno;
1034
1035 return !!args.flags;
1036 }
1037
1038 static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
1039 struct btrfs_ioctl_qgroup_assign_args args = {
1040 .assign = b,
1041 .src = child,
1042 .dst = parent,
1043 };
1044 int r;
1045
1046 r = btrfs_is_filesystem(fd);
1047 if (r < 0)
1048 return r;
1049 if (r == 0)
1050 return -ENOTTY;
1051
1052 for (unsigned c = 0;; c++) {
1053 r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
1054 if (r < 0) {
1055 if (errno == EBUSY && c < 10) {
1056 (void) btrfs_quota_scan_wait(fd);
1057 continue;
1058 }
1059
1060 return -errno;
1061 }
1062
1063 if (r == 0)
1064 return 0;
1065
1066 /* If the return value is > 0, we need to request a rescan */
1067
1068 (void) btrfs_quota_scan_start(fd);
1069 return 1;
1070 }
1071 }
1072
1073 int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
1074 return qgroup_assign_or_unassign(fd, true, child, parent);
1075 }
1076
1077 int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
1078 return qgroup_assign_or_unassign(fd, false, child, parent);
1079 }
1080
1081 static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
1082 struct btrfs_ioctl_search_args args = {
1083 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1084
1085 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1086 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1087
1088 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1089 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1090
1091 .key.min_transid = 0,
1092 .key.max_transid = UINT64_MAX,
1093 };
1094
1095 struct btrfs_ioctl_vol_args vol_args = {};
1096 _cleanup_close_ int subvol_fd = -1;
1097 struct stat st;
1098 bool made_writable = false;
1099 int r;
1100
1101 assert(fd >= 0);
1102 assert(subvolume);
1103
1104 if (fstat(fd, &st) < 0)
1105 return -errno;
1106
1107 if (!S_ISDIR(st.st_mode))
1108 return -EINVAL;
1109
1110 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1111 if (subvol_fd < 0)
1112 return -errno;
1113
1114 /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY
1115 * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return
1116 * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we
1117 * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a
1118 * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case
1119 * let's prefer ENOTTY over EPERM/EACCES though. */
1120 r = btrfs_is_subvol_fd(subvol_fd);
1121 if (r < 0)
1122 return r;
1123 if (r == 0) /* Not a btrfs subvolume */
1124 return -ENOTTY;
1125
1126 if (subvol_id == 0) {
1127 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
1128 if (r < 0)
1129 return r;
1130 }
1131
1132 /* First, try to remove the subvolume. If it happens to be
1133 * already empty, this will just work. */
1134 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1135 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
1136 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
1137 return 0;
1138 }
1139 if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
1140 return -errno;
1141
1142 /* OK, the subvolume is not empty, let's look for child
1143 * subvolumes, and remove them, first */
1144
1145 args.key.min_offset = args.key.max_offset = subvol_id;
1146
1147 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1148 const struct btrfs_ioctl_search_header *sh;
1149 unsigned i;
1150
1151 args.key.nr_items = 256;
1152 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1153 return -errno;
1154
1155 if (args.key.nr_items <= 0)
1156 break;
1157
1158 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1159 _cleanup_free_ char *p = NULL;
1160 const struct btrfs_root_ref *ref;
1161
1162 btrfs_ioctl_search_args_set(&args, sh);
1163
1164 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1165 continue;
1166 if (sh->offset != subvol_id)
1167 continue;
1168
1169 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1170
1171 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1172 if (!p)
1173 return -ENOMEM;
1174
1175 struct btrfs_ioctl_ino_lookup_args ino_args = {
1176 .treeid = subvol_id,
1177 .objectid = htole64(ref->dirid),
1178 };
1179
1180 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1181 return -errno;
1182
1183 if (!made_writable) {
1184 r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
1185 if (r < 0)
1186 return r;
1187
1188 made_writable = true;
1189 }
1190
1191 if (isempty(ino_args.name))
1192 /* Subvolume is in the top-level
1193 * directory of the subvolume. */
1194 r = subvol_remove_children(subvol_fd, p, sh->objectid, flags);
1195 else {
1196 _cleanup_close_ int child_fd = -1;
1197
1198 /* Subvolume is somewhere further down,
1199 * hence we need to open the
1200 * containing directory first */
1201
1202 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1203 if (child_fd < 0)
1204 return -errno;
1205
1206 r = subvol_remove_children(child_fd, p, sh->objectid, flags);
1207 }
1208 if (r < 0)
1209 return r;
1210 }
1211
1212 /* Increase search key by one, to read the next item, if we can. */
1213 if (!btrfs_ioctl_search_args_inc(&args))
1214 break;
1215 }
1216
1217 /* OK, the child subvolumes should all be gone now, let's try
1218 * again to remove the subvolume */
1219 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
1220 return -errno;
1221
1222 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
1223 return 0;
1224 }
1225
1226 int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) {
1227 _cleanup_close_ int fd = -1;
1228 const char *subvolume;
1229 int r;
1230
1231 assert(path);
1232
1233 r = extract_subvolume_name(path, &subvolume);
1234 if (r < 0)
1235 return r;
1236
1237 fd = open_parent(path, O_CLOEXEC, 0);
1238 if (fd < 0)
1239 return fd;
1240
1241 return subvol_remove_children(fd, subvolume, 0, flags);
1242 }
1243
1244 int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) {
1245 return subvol_remove_children(fd, subvolume, 0, flags);
1246 }
1247
1248 int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
1249
1250 struct btrfs_ioctl_search_args args = {
1251 /* Tree of quota items */
1252 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1253
1254 /* The object ID is always 0 */
1255 .key.min_objectid = 0,
1256 .key.max_objectid = 0,
1257
1258 /* Look precisely for the quota items */
1259 .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
1260 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
1261
1262 /* For our qgroup */
1263 .key.min_offset = old_qgroupid,
1264 .key.max_offset = old_qgroupid,
1265
1266 /* No restrictions on the other components */
1267 .key.min_transid = 0,
1268 .key.max_transid = UINT64_MAX,
1269 };
1270
1271 int r;
1272
1273 r = btrfs_is_filesystem(fd);
1274 if (r < 0)
1275 return r;
1276 if (!r)
1277 return -ENOTTY;
1278
1279 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1280 const struct btrfs_ioctl_search_header *sh;
1281 unsigned i;
1282
1283 args.key.nr_items = 256;
1284 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1285 if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
1286 break;
1287
1288 return -errno;
1289 }
1290
1291 if (args.key.nr_items <= 0)
1292 break;
1293
1294 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1295 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1296 struct btrfs_ioctl_qgroup_limit_args qargs;
1297 unsigned c;
1298
1299 /* Make sure we start the next search at least from this entry */
1300 btrfs_ioctl_search_args_set(&args, sh);
1301
1302 if (sh->objectid != 0)
1303 continue;
1304 if (sh->type != BTRFS_QGROUP_LIMIT_KEY)
1305 continue;
1306 if (sh->offset != old_qgroupid)
1307 continue;
1308
1309 /* We found the entry, now copy things over. */
1310
1311 qargs = (struct btrfs_ioctl_qgroup_limit_args) {
1312 .qgroupid = new_qgroupid,
1313
1314 .lim.max_rfer = le64toh(qli->max_rfer),
1315 .lim.max_excl = le64toh(qli->max_excl),
1316 .lim.rsv_rfer = le64toh(qli->rsv_rfer),
1317 .lim.rsv_excl = le64toh(qli->rsv_excl),
1318
1319 .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
1320 BTRFS_QGROUP_LIMIT_MAX_EXCL|
1321 BTRFS_QGROUP_LIMIT_RSV_RFER|
1322 BTRFS_QGROUP_LIMIT_RSV_EXCL),
1323 };
1324
1325 for (c = 0;; c++) {
1326 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
1327 if (errno == EBUSY && c < 10) {
1328 (void) btrfs_quota_scan_wait(fd);
1329 continue;
1330 }
1331 return -errno;
1332 }
1333
1334 break;
1335 }
1336
1337 return 1;
1338 }
1339
1340 /* Increase search key by one, to read the next item, if we can. */
1341 if (!btrfs_ioctl_search_args_inc(&args))
1342 break;
1343 }
1344
1345 return 0;
1346 }
1347
1348 static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
1349 _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
1350 bool copy_from_parent = false, insert_intermediary_qgroup = false;
1351 int n_old_qgroups, n_old_parent_qgroups, r;
1352 uint64_t old_parent_id;
1353
1354 assert(fd >= 0);
1355
1356 /* Copies a reduced form of quota information from the old to
1357 * the new subvolume. */
1358
1359 n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
1360 if (n_old_qgroups <= 0) /* Nothing to copy */
1361 return n_old_qgroups;
1362
1363 r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
1364 if (r == -ENXIO)
1365 /* We have no parent, hence nothing to copy. */
1366 n_old_parent_qgroups = 0;
1367 else if (r < 0)
1368 return r;
1369 else {
1370 n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
1371 if (n_old_parent_qgroups < 0)
1372 return n_old_parent_qgroups;
1373 }
1374
1375 for (int i = 0; i < n_old_qgroups; i++) {
1376 uint64_t id;
1377
1378 r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
1379 if (r < 0)
1380 return r;
1381
1382 if (id == old_subvol_id) {
1383 /* The old subvolume was member of a qgroup
1384 * that had the same id, but a different level
1385 * as it self. Let's set up something similar
1386 * in the destination. */
1387 insert_intermediary_qgroup = true;
1388 break;
1389 }
1390
1391 for (int j = 0; j < n_old_parent_qgroups; j++)
1392 if (old_parent_qgroups[j] == old_qgroups[i])
1393 /* The old subvolume shared a common
1394 * parent qgroup with its parent
1395 * subvolume. Let's set up something
1396 * similar in the destination. */
1397 copy_from_parent = true;
1398 }
1399
1400 if (!insert_intermediary_qgroup && !copy_from_parent)
1401 return 0;
1402
1403 return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
1404 }
1405
1406 static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
1407 uint64_t old_subtree_qgroup, new_subtree_qgroup;
1408 bool changed;
1409 int r;
1410
1411 /* First copy the leaf limits */
1412 r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
1413 if (r < 0)
1414 return r;
1415 changed = r > 0;
1416
1417 /* Then, try to copy the subtree limits, if there are any. */
1418 r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
1419 if (r < 0)
1420 return r;
1421 if (r == 0)
1422 return changed;
1423
1424 r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
1425 if (r < 0)
1426 return r;
1427 if (r == 0)
1428 return changed;
1429
1430 r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
1431 if (r != 0)
1432 return r;
1433
1434 return changed;
1435 }
1436
1437 static int subvol_snapshot_children(
1438 int old_fd,
1439 int new_fd,
1440 const char *subvolume,
1441 uint64_t old_subvol_id,
1442 BtrfsSnapshotFlags flags) {
1443
1444 struct btrfs_ioctl_search_args args = {
1445 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1446
1447 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1448 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1449
1450 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1451 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1452
1453 .key.min_transid = 0,
1454 .key.max_transid = UINT64_MAX,
1455 };
1456
1457 struct btrfs_ioctl_vol_args_v2 vol_args = {
1458 .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
1459 .fd = old_fd,
1460 };
1461 _cleanup_close_ int subvolume_fd = -1;
1462 uint64_t new_subvol_id;
1463 int r;
1464
1465 assert(old_fd >= 0);
1466 assert(new_fd >= 0);
1467 assert(subvolume);
1468
1469 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1470
1471 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
1472 return -errno;
1473
1474 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
1475 !(flags & BTRFS_SNAPSHOT_QUOTA))
1476 return 0;
1477
1478 if (old_subvol_id == 0) {
1479 r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
1480 if (r < 0)
1481 return r;
1482 }
1483
1484 r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
1485 if (r < 0)
1486 return r;
1487
1488 if (flags & BTRFS_SNAPSHOT_QUOTA)
1489 (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
1490
1491 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
1492
1493 if (flags & BTRFS_SNAPSHOT_QUOTA)
1494 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1495
1496 return 0;
1497 }
1498
1499 args.key.min_offset = args.key.max_offset = old_subvol_id;
1500
1501 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1502 const struct btrfs_ioctl_search_header *sh;
1503 unsigned i;
1504
1505 args.key.nr_items = 256;
1506 if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1507 return -errno;
1508
1509 if (args.key.nr_items <= 0)
1510 break;
1511
1512 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1513 _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
1514 const struct btrfs_root_ref *ref;
1515 _cleanup_close_ int old_child_fd = -1, new_child_fd = -1;
1516
1517 btrfs_ioctl_search_args_set(&args, sh);
1518
1519 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1520 continue;
1521
1522 /* Avoid finding the source subvolume a second
1523 * time */
1524 if (sh->offset != old_subvol_id)
1525 continue;
1526
1527 /* Avoid running into loops if the new
1528 * subvolume is below the old one. */
1529 if (sh->objectid == new_subvol_id)
1530 continue;
1531
1532 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1533 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1534 if (!p)
1535 return -ENOMEM;
1536
1537 struct btrfs_ioctl_ino_lookup_args ino_args = {
1538 .treeid = old_subvol_id,
1539 .objectid = htole64(ref->dirid),
1540 };
1541
1542 if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1543 return -errno;
1544
1545 c = path_join(ino_args.name, p);
1546 if (!c)
1547 return -ENOMEM;
1548
1549 old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1550 if (old_child_fd < 0)
1551 return -errno;
1552
1553 np = path_join(subvolume, ino_args.name);
1554 if (!np)
1555 return -ENOMEM;
1556
1557 new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1558 if (new_child_fd < 0)
1559 return -errno;
1560
1561 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1562 /* If the snapshot is read-only we
1563 * need to mark it writable
1564 * temporarily, to put the subsnapshot
1565 * into place. */
1566
1567 if (subvolume_fd < 0) {
1568 subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1569 if (subvolume_fd < 0)
1570 return -errno;
1571 }
1572
1573 r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
1574 if (r < 0)
1575 return r;
1576 }
1577
1578 /* When btrfs clones the subvolumes, child
1579 * subvolumes appear as empty directories. Remove
1580 * them, so that we can create a new snapshot
1581 * in their place */
1582 if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
1583 int k = -errno;
1584
1585 if (flags & BTRFS_SNAPSHOT_READ_ONLY)
1586 (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1587
1588 return k;
1589 }
1590
1591 r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
1592
1593 /* Restore the readonly flag */
1594 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1595 int k;
1596
1597 k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1598 if (r >= 0 && k < 0)
1599 return k;
1600 }
1601
1602 if (r < 0)
1603 return r;
1604 }
1605
1606 /* Increase search key by one, to read the next item, if we can. */
1607 if (!btrfs_ioctl_search_args_inc(&args))
1608 break;
1609 }
1610
1611 if (flags & BTRFS_SNAPSHOT_QUOTA)
1612 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1613
1614 return 0;
1615 }
1616
1617 int btrfs_subvol_snapshot_fd_full(
1618 int old_fd,
1619 const char *new_path,
1620 BtrfsSnapshotFlags flags,
1621 copy_progress_path_t progress_path,
1622 copy_progress_bytes_t progress_bytes,
1623 void *userdata) {
1624
1625 _cleanup_close_ int new_fd = -1;
1626 const char *subvolume;
1627 int r;
1628
1629 assert(old_fd >= 0);
1630 assert(new_path);
1631
1632 r = btrfs_is_subvol_fd(old_fd);
1633 if (r < 0)
1634 return r;
1635 if (r == 0) {
1636 bool plain_directory = false;
1637
1638 /* If the source isn't a proper subvolume, fail unless fallback is requested */
1639 if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
1640 return -EISDIR;
1641
1642 r = btrfs_subvol_make(new_path);
1643 if (r == -ENOTTY && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
1644 /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
1645 if (mkdir(new_path, 0755) < 0)
1646 return -errno;
1647
1648 plain_directory = true;
1649 } else if (r < 0)
1650 return r;
1651
1652 r = copy_directory_fd_full(
1653 old_fd, new_path,
1654 COPY_MERGE|COPY_REFLINK|COPY_SAME_MOUNT|COPY_HARDLINKS|(FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGINT) ? COPY_SIGINT : 0),
1655 progress_path, progress_bytes, userdata);
1656 if (r < 0)
1657 goto fallback_fail;
1658
1659 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1660
1661 if (plain_directory) {
1662 /* Plain directories have no recursive read-only flag, but something pretty close to
1663 * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
1664
1665 if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
1666 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
1667 } else {
1668 r = btrfs_subvol_set_read_only(new_path, true);
1669 if (r < 0)
1670 goto fallback_fail;
1671 }
1672 }
1673
1674 return 0;
1675
1676 fallback_fail:
1677 (void) rm_rf(new_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
1678 return r;
1679 }
1680
1681 r = extract_subvolume_name(new_path, &subvolume);
1682 if (r < 0)
1683 return r;
1684
1685 new_fd = open_parent(new_path, O_CLOEXEC, 0);
1686 if (new_fd < 0)
1687 return new_fd;
1688
1689 return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
1690 }
1691
1692 int btrfs_subvol_snapshot_full(
1693 const char *old_path,
1694 const char *new_path,
1695 BtrfsSnapshotFlags flags,
1696 copy_progress_path_t progress_path,
1697 copy_progress_bytes_t progress_bytes,
1698 void *userdata) {
1699
1700 _cleanup_close_ int old_fd = -1;
1701
1702 assert(old_path);
1703 assert(new_path);
1704
1705 old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1706 if (old_fd < 0)
1707 return -errno;
1708
1709 return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, progress_path, progress_bytes, userdata);
1710 }
1711
1712 int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
1713
1714 struct btrfs_ioctl_search_args args = {
1715 /* Tree of quota items */
1716 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1717
1718 /* Look precisely for the quota relation items */
1719 .key.min_type = BTRFS_QGROUP_RELATION_KEY,
1720 .key.max_type = BTRFS_QGROUP_RELATION_KEY,
1721
1722 /* No restrictions on the other components */
1723 .key.min_offset = 0,
1724 .key.max_offset = UINT64_MAX,
1725
1726 .key.min_transid = 0,
1727 .key.max_transid = UINT64_MAX,
1728 };
1729
1730 _cleanup_free_ uint64_t *items = NULL;
1731 size_t n_items = 0;
1732 int r;
1733
1734 assert(fd >= 0);
1735 assert(ret);
1736
1737 if (qgroupid == 0) {
1738 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
1739 if (r < 0)
1740 return r;
1741 } else {
1742 r = btrfs_is_filesystem(fd);
1743 if (r < 0)
1744 return r;
1745 if (!r)
1746 return -ENOTTY;
1747 }
1748
1749 args.key.min_objectid = args.key.max_objectid = qgroupid;
1750
1751 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1752 const struct btrfs_ioctl_search_header *sh;
1753 unsigned i;
1754
1755 args.key.nr_items = 256;
1756 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1757 if (errno == ENOENT) /* quota tree missing: quota is disabled */
1758 break;
1759
1760 return -errno;
1761 }
1762
1763 if (args.key.nr_items <= 0)
1764 break;
1765
1766 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1767
1768 /* Make sure we start the next search at least from this entry */
1769 btrfs_ioctl_search_args_set(&args, sh);
1770
1771 if (sh->type != BTRFS_QGROUP_RELATION_KEY)
1772 continue;
1773 if (sh->offset < sh->objectid)
1774 continue;
1775 if (sh->objectid != qgroupid)
1776 continue;
1777
1778 if (!GREEDY_REALLOC(items, n_items+1))
1779 return -ENOMEM;
1780
1781 items[n_items++] = sh->offset;
1782 }
1783
1784 /* Increase search key by one, to read the next item, if we can. */
1785 if (!btrfs_ioctl_search_args_inc(&args))
1786 break;
1787 }
1788
1789 if (n_items <= 0) {
1790 *ret = NULL;
1791 return 0;
1792 }
1793
1794 *ret = TAKE_PTR(items);
1795
1796 return (int) n_items;
1797 }
1798
1799 int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
1800 _cleanup_free_ uint64_t *qgroups = NULL;
1801 uint64_t parent_subvol;
1802 bool changed = false;
1803 int n = 0, r;
1804
1805 assert(fd >= 0);
1806
1807 /*
1808 * Sets up the specified subvolume's qgroup automatically in
1809 * one of two ways:
1810 *
1811 * If insert_intermediary_qgroup is false, the subvolume's
1812 * leaf qgroup will be assigned to the same parent qgroups as
1813 * the subvolume's parent subvolume.
1814 *
1815 * If insert_intermediary_qgroup is true a new intermediary
1816 * higher-level qgroup is created, with a higher level number,
1817 * but reusing the id of the subvolume. The level number is
1818 * picked as one smaller than the lowest level qgroup the
1819 * parent subvolume is a member of. If the parent subvolume's
1820 * leaf qgroup is assigned to no higher-level qgroup a new
1821 * qgroup of level 255 is created instead. Either way, the new
1822 * qgroup is then assigned to the parent's higher-level
1823 * qgroup, and the subvolume itself is assigned to it.
1824 *
1825 * If the subvolume is already assigned to a higher level
1826 * qgroup, no operation is executed.
1827 *
1828 * Effectively this means: regardless if
1829 * insert_intermediary_qgroup is true or not, after this
1830 * function is invoked the subvolume will be accounted within
1831 * the same qgroups as the parent. However, if it is true, it
1832 * will also get its own higher-level qgroup, which may in
1833 * turn be used by subvolumes created beneath this subvolume
1834 * later on.
1835 *
1836 * This hence defines a simple default qgroup setup for
1837 * subvolumes, as long as this function is invoked on each
1838 * created subvolume: each subvolume is always accounting
1839 * together with its immediate parents. Optionally, if
1840 * insert_intermediary_qgroup is true, it will also get a
1841 * qgroup that then includes all its own child subvolumes.
1842 */
1843
1844 if (subvol_id == 0) {
1845 r = btrfs_is_subvol_fd(fd);
1846 if (r < 0)
1847 return r;
1848 if (!r)
1849 return -ENOTTY;
1850
1851 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1852 if (r < 0)
1853 return r;
1854 }
1855
1856 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
1857 if (n < 0)
1858 return n;
1859 if (n > 0) /* already parent qgroups set up, let's bail */
1860 return 0;
1861
1862 qgroups = mfree(qgroups);
1863
1864 r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
1865 if (r == -ENXIO)
1866 /* No parent, hence no qgroup memberships */
1867 n = 0;
1868 else if (r < 0)
1869 return r;
1870 else {
1871 n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
1872 if (n < 0)
1873 return n;
1874 }
1875
1876 if (insert_intermediary_qgroup) {
1877 uint64_t lowest = 256, new_qgroupid;
1878 bool created = false;
1879
1880 /* Determine the lowest qgroup that the parent
1881 * subvolume is assigned to. */
1882
1883 for (int i = 0; i < n; i++) {
1884 uint64_t level;
1885
1886 r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
1887 if (r < 0)
1888 return r;
1889
1890 if (level < lowest)
1891 lowest = level;
1892 }
1893
1894 if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1895 return -EBUSY;
1896
1897 r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
1898 if (r < 0)
1899 return r;
1900
1901 /* Create the new intermediary group, unless it already exists */
1902 r = btrfs_qgroup_create(fd, new_qgroupid);
1903 if (r < 0 && r != -EEXIST)
1904 return r;
1905 if (r >= 0)
1906 changed = created = true;
1907
1908 for (int i = 0; i < n; i++) {
1909 r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
1910 if (r < 0 && r != -EEXIST) {
1911 if (created)
1912 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1913
1914 return r;
1915 }
1916 if (r >= 0)
1917 changed = true;
1918 }
1919
1920 r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
1921 if (r < 0 && r != -EEXIST) {
1922 if (created)
1923 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1924 return r;
1925 }
1926 if (r >= 0)
1927 changed = true;
1928
1929 } else {
1930 int i;
1931
1932 /* Assign our subvolume to all the same qgroups as the parent */
1933
1934 for (i = 0; i < n; i++) {
1935 r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
1936 if (r < 0 && r != -EEXIST)
1937 return r;
1938 if (r >= 0)
1939 changed = true;
1940 }
1941 }
1942
1943 return changed;
1944 }
1945
1946 int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
1947 _cleanup_close_ int fd = -1;
1948
1949 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1950 if (fd < 0)
1951 return -errno;
1952
1953 return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
1954 }
1955
1956 int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
1957
1958 struct btrfs_ioctl_search_args args = {
1959 /* Tree of tree roots */
1960 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1961
1962 /* Look precisely for the subvolume items */
1963 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1964 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1965
1966 /* No restrictions on the other components */
1967 .key.min_offset = 0,
1968 .key.max_offset = UINT64_MAX,
1969
1970 .key.min_transid = 0,
1971 .key.max_transid = UINT64_MAX,
1972 };
1973 int r;
1974
1975 assert(fd >= 0);
1976 assert(ret);
1977
1978 if (subvol_id == 0) {
1979 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1980 if (r < 0)
1981 return r;
1982 } else {
1983 r = btrfs_is_filesystem(fd);
1984 if (r < 0)
1985 return r;
1986 if (!r)
1987 return -ENOTTY;
1988 }
1989
1990 args.key.min_objectid = args.key.max_objectid = subvol_id;
1991
1992 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1993 const struct btrfs_ioctl_search_header *sh;
1994 unsigned i;
1995
1996 args.key.nr_items = 256;
1997 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1998 return negative_errno();
1999
2000 if (args.key.nr_items <= 0)
2001 break;
2002
2003 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
2004
2005 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
2006 continue;
2007 if (sh->objectid != subvol_id)
2008 continue;
2009
2010 *ret = sh->offset;
2011 return 0;
2012 }
2013 }
2014
2015 return -ENXIO;
2016 }