]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/btrfs-util.c
Merge pull request #17474 from yuwata/network-drop-link-deserialization-logic
[thirdparty/systemd.git] / src / basic / btrfs-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <inttypes.h>
6 #include <linux/btrfs_tree.h>
7 #include <linux/fs.h>
8 #include <linux/loop.h>
9 #include <linux/magic.h>
10 #include <stddef.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <unistd.h>
16
17 #include "alloc-util.h"
18 #include "blockdev-util.h"
19 #include "btrfs-util.h"
20 #include "chattr-util.h"
21 #include "copy.h"
22 #include "device-nodes.h"
23 #include "fd-util.h"
24 #include "fileio.h"
25 #include "fs-util.h"
26 #include "io-util.h"
27 #include "macro.h"
28 #include "path-util.h"
29 #include "rm-rf.h"
30 #include "smack-util.h"
31 #include "sparse-endian.h"
32 #include "stat-util.h"
33 #include "string-util.h"
34 #include "time-util.h"
35 #include "util.h"
36
37 /* WARNING: Be careful with file system ioctls! When we get an fd, we
38 * need to make sure it either refers to only a regular file or
39 * directory, or that it is located on btrfs, before invoking any
40 * btrfs ioctls. The ioctl numbers are reused by some device drivers
41 * (such as DRM), and hence might have bad effects when invoked on
42 * device nodes (that reference drivers) rather than fds to normal
43 * files or directories. */
44
45 static int validate_subvolume_name(const char *name) {
46
47 if (!filename_is_valid(name))
48 return -EINVAL;
49
50 if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
51 return -E2BIG;
52
53 return 0;
54 }
55
56 static int extract_subvolume_name(const char *path, const char **subvolume) {
57 const char *fn;
58 int r;
59
60 assert(path);
61 assert(subvolume);
62
63 fn = basename(path);
64
65 r = validate_subvolume_name(fn);
66 if (r < 0)
67 return r;
68
69 *subvolume = fn;
70 return 0;
71 }
72
73 int btrfs_is_filesystem(int fd) {
74 struct statfs sfs;
75
76 assert(fd >= 0);
77
78 if (fstatfs(fd, &sfs) < 0)
79 return -errno;
80
81 return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
82 }
83
84 int btrfs_is_subvol_fd(int fd) {
85 struct stat st;
86
87 assert(fd >= 0);
88
89 /* On btrfs subvolumes always have the inode 256 */
90
91 if (fstat(fd, &st) < 0)
92 return -errno;
93
94 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
95 return 0;
96
97 return btrfs_is_filesystem(fd);
98 }
99
100 int btrfs_is_subvol(const char *path) {
101 _cleanup_close_ int fd = -1;
102
103 assert(path);
104
105 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
106 if (fd < 0)
107 return -errno;
108
109 return btrfs_is_subvol_fd(fd);
110 }
111
112 int btrfs_subvol_make_fd(int fd, const char *subvolume) {
113 struct btrfs_ioctl_vol_args args = {};
114 _cleanup_close_ int real_fd = -1;
115 int r;
116
117 assert(subvolume);
118
119 r = validate_subvolume_name(subvolume);
120 if (r < 0)
121 return r;
122
123 r = fcntl(fd, F_GETFL);
124 if (r < 0)
125 return -errno;
126 if (FLAGS_SET(r, O_PATH)) {
127 /* An O_PATH fd was specified, let's convert here to a proper one, as btrfs ioctl's can't deal with
128 * O_PATH. */
129
130 real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
131 if (real_fd < 0)
132 return real_fd;
133
134 fd = real_fd;
135 }
136
137 strncpy(args.name, subvolume, sizeof(args.name)-1);
138
139 if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
140 return -errno;
141
142 return 0;
143 }
144
145 int btrfs_subvol_make(const char *path) {
146 _cleanup_close_ int fd = -1;
147 const char *subvolume;
148 int r;
149
150 assert(path);
151
152 r = extract_subvolume_name(path, &subvolume);
153 if (r < 0)
154 return r;
155
156 fd = open_parent(path, O_CLOEXEC, 0);
157 if (fd < 0)
158 return fd;
159
160 return btrfs_subvol_make_fd(fd, subvolume);
161 }
162
163 int btrfs_subvol_make_fallback(const char *path, mode_t mode) {
164 mode_t old, combined;
165 int r;
166
167 assert(path);
168
169 /* Let's work like mkdir(), i.e. take the specified mode, and mask it with the current umask. */
170 old = umask(~mode);
171 combined = old | ~mode;
172 if (combined != ~mode)
173 umask(combined);
174 r = btrfs_subvol_make(path);
175 umask(old);
176
177 if (r >= 0)
178 return 1; /* subvol worked */
179 if (r != -ENOTTY)
180 return r;
181
182 if (mkdir(path, mode) < 0)
183 return -errno;
184
185 return 0; /* plain directory */
186 }
187
188 int btrfs_subvol_set_read_only_fd(int fd, bool b) {
189 uint64_t flags, nflags;
190 struct stat st;
191
192 assert(fd >= 0);
193
194 if (fstat(fd, &st) < 0)
195 return -errno;
196
197 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
198 return -EINVAL;
199
200 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
201 return -errno;
202
203 nflags = UPDATE_FLAG(flags, BTRFS_SUBVOL_RDONLY, b);
204 if (flags == nflags)
205 return 0;
206
207 if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
208 return -errno;
209
210 return 0;
211 }
212
213 int btrfs_subvol_set_read_only(const char *path, bool b) {
214 _cleanup_close_ int fd = -1;
215
216 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
217 if (fd < 0)
218 return -errno;
219
220 return btrfs_subvol_set_read_only_fd(fd, b);
221 }
222
223 int btrfs_subvol_get_read_only_fd(int fd) {
224 uint64_t flags;
225 struct stat st;
226
227 assert(fd >= 0);
228
229 if (fstat(fd, &st) < 0)
230 return -errno;
231
232 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
233 return -EINVAL;
234
235 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
236 return -errno;
237
238 return !!(flags & BTRFS_SUBVOL_RDONLY);
239 }
240
241 int btrfs_reflink(int infd, int outfd) {
242 int r;
243
244 assert(infd >= 0);
245 assert(outfd >= 0);
246
247 /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */
248
249 r = fd_verify_regular(outfd);
250 if (r < 0)
251 return r;
252
253 if (ioctl(outfd, BTRFS_IOC_CLONE, infd) < 0)
254 return -errno;
255
256 return 0;
257 }
258
259 int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
260 struct btrfs_ioctl_clone_range_args args = {
261 .src_fd = infd,
262 .src_offset = in_offset,
263 .src_length = sz,
264 .dest_offset = out_offset,
265 };
266 int r;
267
268 assert(infd >= 0);
269 assert(outfd >= 0);
270 assert(sz > 0);
271
272 r = fd_verify_regular(outfd);
273 if (r < 0)
274 return r;
275
276 if (ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args) < 0)
277 return -errno;
278
279 return 0;
280 }
281
282 int btrfs_get_block_device_fd(int fd, dev_t *dev) {
283 struct btrfs_ioctl_fs_info_args fsi = {};
284 uint64_t id;
285 int r;
286
287 assert(fd >= 0);
288 assert(dev);
289
290 r = btrfs_is_filesystem(fd);
291 if (r < 0)
292 return r;
293 if (!r)
294 return -ENOTTY;
295
296 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
297 return -errno;
298
299 /* We won't do this for btrfs RAID */
300 if (fsi.num_devices != 1) {
301 *dev = 0;
302 return 0;
303 }
304
305 for (id = 1; id <= fsi.max_id; id++) {
306 struct btrfs_ioctl_dev_info_args di = {
307 .devid = id,
308 };
309 struct stat st;
310
311 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
312 if (errno == ENODEV)
313 continue;
314
315 return -errno;
316 }
317
318 /* For the root fs — when no initrd is involved — btrfs returns /dev/root on any kernels from
319 * the past few years. That sucks, as we have no API to determine the actual root then. let's
320 * return an recognizable error for this case, so that the caller can maybe print a nice
321 * message about this.
322 *
323 * https://bugzilla.kernel.org/show_bug.cgi?id=89721 */
324 if (path_equal((char*) di.path, "/dev/root"))
325 return -EUCLEAN;
326
327 if (stat((char*) di.path, &st) < 0)
328 return -errno;
329
330 if (!S_ISBLK(st.st_mode))
331 return -ENOTBLK;
332
333 if (major(st.st_rdev) == 0)
334 return -ENODEV;
335
336 *dev = st.st_rdev;
337 return 1;
338 }
339
340 return -ENODEV;
341 }
342
343 int btrfs_get_block_device(const char *path, dev_t *dev) {
344 _cleanup_close_ int fd = -1;
345
346 assert(path);
347 assert(dev);
348
349 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
350 if (fd < 0)
351 return -errno;
352
353 return btrfs_get_block_device_fd(fd, dev);
354 }
355
356 int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
357 struct btrfs_ioctl_ino_lookup_args args = {
358 .objectid = BTRFS_FIRST_FREE_OBJECTID
359 };
360 int r;
361
362 assert(fd >= 0);
363 assert(ret);
364
365 r = btrfs_is_filesystem(fd);
366 if (r < 0)
367 return r;
368 if (!r)
369 return -ENOTTY;
370
371 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
372 return -errno;
373
374 *ret = args.treeid;
375 return 0;
376 }
377
378 int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
379 _cleanup_close_ int subvol_fd = -1;
380
381 assert(fd >= 0);
382 assert(ret);
383
384 subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
385 if (subvol_fd < 0)
386 return -errno;
387
388 return btrfs_subvol_get_id_fd(subvol_fd, ret);
389 }
390
391 static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
392 assert(args);
393
394 /* the objectid, type, offset together make up the btrfs key,
395 * which is considered a single 136byte integer when
396 * comparing. This call increases the counter by one, dealing
397 * with the overflow between the overflows */
398
399 if (args->key.min_offset < (uint64_t) -1) {
400 args->key.min_offset++;
401 return true;
402 }
403
404 if (args->key.min_type < (uint8_t) -1) {
405 args->key.min_type++;
406 args->key.min_offset = 0;
407 return true;
408 }
409
410 if (args->key.min_objectid < (uint64_t) -1) {
411 args->key.min_objectid++;
412 args->key.min_offset = 0;
413 args->key.min_type = 0;
414 return true;
415 }
416
417 return 0;
418 }
419
420 static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
421 assert(args);
422 assert(h);
423
424 args->key.min_objectid = h->objectid;
425 args->key.min_type = h->type;
426 args->key.min_offset = h->offset;
427 }
428
429 static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
430 int r;
431
432 assert(args);
433
434 /* Compare min and max */
435
436 r = CMP(args->key.min_objectid, args->key.max_objectid);
437 if (r != 0)
438 return r;
439
440 r = CMP(args->key.min_type, args->key.max_type);
441 if (r != 0)
442 return r;
443
444 return CMP(args->key.min_offset, args->key.max_offset);
445 }
446
447 #define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
448 for ((i) = 0, \
449 (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
450 (i) < (args).key.nr_items; \
451 (i)++, \
452 (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
453
454 #define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
455 ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
456
457 int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
458 struct btrfs_ioctl_search_args args = {
459 /* Tree of tree roots */
460 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
461
462 /* Look precisely for the subvolume items */
463 .key.min_type = BTRFS_ROOT_ITEM_KEY,
464 .key.max_type = BTRFS_ROOT_ITEM_KEY,
465
466 .key.min_offset = 0,
467 .key.max_offset = (uint64_t) -1,
468
469 /* No restrictions on the other components */
470 .key.min_transid = 0,
471 .key.max_transid = (uint64_t) -1,
472 };
473
474 bool found = false;
475 int r;
476
477 assert(fd >= 0);
478 assert(ret);
479
480 if (subvol_id == 0) {
481 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
482 if (r < 0)
483 return r;
484 } else {
485 r = btrfs_is_filesystem(fd);
486 if (r < 0)
487 return r;
488 if (!r)
489 return -ENOTTY;
490 }
491
492 args.key.min_objectid = args.key.max_objectid = subvol_id;
493
494 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
495 const struct btrfs_ioctl_search_header *sh;
496 unsigned i;
497
498 args.key.nr_items = 256;
499 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
500 return -errno;
501
502 if (args.key.nr_items <= 0)
503 break;
504
505 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
506
507 const struct btrfs_root_item *ri;
508
509 /* Make sure we start the next search at least from this entry */
510 btrfs_ioctl_search_args_set(&args, sh);
511
512 if (sh->objectid != subvol_id)
513 continue;
514 if (sh->type != BTRFS_ROOT_ITEM_KEY)
515 continue;
516
517 /* Older versions of the struct lacked the otime setting */
518 if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
519 continue;
520
521 ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
522
523 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
524 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
525
526 ret->subvol_id = subvol_id;
527 ret->read_only = le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY;
528
529 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
530 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
531 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
532
533 found = true;
534 goto finish;
535 }
536
537 /* Increase search key by one, to read the next item, if we can. */
538 if (!btrfs_ioctl_search_args_inc(&args))
539 break;
540 }
541
542 finish:
543 if (!found)
544 return -ENODATA;
545
546 return 0;
547 }
548
549 int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
550
551 struct btrfs_ioctl_search_args args = {
552 /* Tree of quota items */
553 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
554
555 /* The object ID is always 0 */
556 .key.min_objectid = 0,
557 .key.max_objectid = 0,
558
559 /* Look precisely for the quota items */
560 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
561 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
562
563 /* No restrictions on the other components */
564 .key.min_transid = 0,
565 .key.max_transid = (uint64_t) -1,
566 };
567
568 bool found_info = false, found_limit = false;
569 int r;
570
571 assert(fd >= 0);
572 assert(ret);
573
574 if (qgroupid == 0) {
575 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
576 if (r < 0)
577 return r;
578 } else {
579 r = btrfs_is_filesystem(fd);
580 if (r < 0)
581 return r;
582 if (!r)
583 return -ENOTTY;
584 }
585
586 args.key.min_offset = args.key.max_offset = qgroupid;
587
588 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
589 const struct btrfs_ioctl_search_header *sh;
590 unsigned i;
591
592 args.key.nr_items = 256;
593 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
594 if (errno == ENOENT) /* quota tree is missing: quota disabled */
595 break;
596
597 return -errno;
598 }
599
600 if (args.key.nr_items <= 0)
601 break;
602
603 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
604
605 /* Make sure we start the next search at least from this entry */
606 btrfs_ioctl_search_args_set(&args, sh);
607
608 if (sh->objectid != 0)
609 continue;
610 if (sh->offset != qgroupid)
611 continue;
612
613 if (sh->type == BTRFS_QGROUP_INFO_KEY) {
614 const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
615
616 ret->referenced = le64toh(qii->rfer);
617 ret->exclusive = le64toh(qii->excl);
618
619 found_info = true;
620
621 } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
622 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
623
624 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
625 ret->referenced_max = le64toh(qli->max_rfer);
626 else
627 ret->referenced_max = (uint64_t) -1;
628
629 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
630 ret->exclusive_max = le64toh(qli->max_excl);
631 else
632 ret->exclusive_max = (uint64_t) -1;
633
634 found_limit = true;
635 }
636
637 if (found_info && found_limit)
638 goto finish;
639 }
640
641 /* Increase search key by one, to read the next item, if we can. */
642 if (!btrfs_ioctl_search_args_inc(&args))
643 break;
644 }
645
646 finish:
647 if (!found_limit && !found_info)
648 return -ENODATA;
649
650 if (!found_info) {
651 ret->referenced = (uint64_t) -1;
652 ret->exclusive = (uint64_t) -1;
653 }
654
655 if (!found_limit) {
656 ret->referenced_max = (uint64_t) -1;
657 ret->exclusive_max = (uint64_t) -1;
658 }
659
660 return 0;
661 }
662
663 int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
664 _cleanup_close_ int fd = -1;
665
666 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
667 if (fd < 0)
668 return -errno;
669
670 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
671 }
672
673 int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
674 uint64_t level, lowest = (uint64_t) -1, lowest_qgroupid = 0;
675 _cleanup_free_ uint64_t *qgroups = NULL;
676 int r, n, i;
677
678 assert(fd >= 0);
679 assert(ret);
680
681 /* This finds the "subtree" qgroup for a specific
682 * subvolume. This only works for subvolumes that have been
683 * prepared with btrfs_subvol_auto_qgroup_fd() with
684 * insert_intermediary_qgroup=true (or equivalent). For others
685 * it will return the leaf qgroup instead. The two cases may
686 * be distuingished via the return value, which is 1 in case
687 * an appropriate "subtree" qgroup was found, and 0
688 * otherwise. */
689
690 if (subvol_id == 0) {
691 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
692 if (r < 0)
693 return r;
694 }
695
696 r = btrfs_qgroupid_split(subvol_id, &level, NULL);
697 if (r < 0)
698 return r;
699 if (level != 0) /* Input must be a leaf qgroup */
700 return -EINVAL;
701
702 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
703 if (n < 0)
704 return n;
705
706 for (i = 0; i < n; i++) {
707 uint64_t id;
708
709 r = btrfs_qgroupid_split(qgroups[i], &level, &id);
710 if (r < 0)
711 return r;
712
713 if (id != subvol_id)
714 continue;
715
716 if (lowest == (uint64_t) -1 || level < lowest) {
717 lowest_qgroupid = qgroups[i];
718 lowest = level;
719 }
720 }
721
722 if (lowest == (uint64_t) -1) {
723 /* No suitable higher-level qgroup found, let's return
724 * the leaf qgroup instead, and indicate that with the
725 * return value. */
726
727 *ret = subvol_id;
728 return 0;
729 }
730
731 *ret = lowest_qgroupid;
732 return 1;
733 }
734
735 int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
736 uint64_t qgroupid;
737 int r;
738
739 assert(fd >= 0);
740 assert(ret);
741
742 /* This determines the quota data of the qgroup with the
743 * lowest level, that shares the id part with the specified
744 * subvolume. This is useful for determining the quota data
745 * for entire subvolume subtrees, as long as the subtrees have
746 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
747 * compatible way */
748
749 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
750 if (r < 0)
751 return r;
752
753 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
754 }
755
756 int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
757 _cleanup_close_ int fd = -1;
758
759 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
760 if (fd < 0)
761 return -errno;
762
763 return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
764 }
765
766 int btrfs_defrag_fd(int fd) {
767 int r;
768
769 assert(fd >= 0);
770
771 r = fd_verify_regular(fd);
772 if (r < 0)
773 return r;
774
775 if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0)
776 return -errno;
777
778 return 0;
779 }
780
781 int btrfs_defrag(const char *p) {
782 _cleanup_close_ int fd = -1;
783
784 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
785 if (fd < 0)
786 return -errno;
787
788 return btrfs_defrag_fd(fd);
789 }
790
791 int btrfs_quota_enable_fd(int fd, bool b) {
792 struct btrfs_ioctl_quota_ctl_args args = {
793 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
794 };
795 int r;
796
797 assert(fd >= 0);
798
799 r = btrfs_is_filesystem(fd);
800 if (r < 0)
801 return r;
802 if (!r)
803 return -ENOTTY;
804
805 if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0)
806 return -errno;
807
808 return 0;
809 }
810
811 int btrfs_quota_enable(const char *path, bool b) {
812 _cleanup_close_ int fd = -1;
813
814 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
815 if (fd < 0)
816 return -errno;
817
818 return btrfs_quota_enable_fd(fd, b);
819 }
820
821 int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
822
823 struct btrfs_ioctl_qgroup_limit_args args = {
824 .lim.max_rfer = referenced_max,
825 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
826 };
827 unsigned c;
828 int r;
829
830 assert(fd >= 0);
831
832 if (qgroupid == 0) {
833 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
834 if (r < 0)
835 return r;
836 } else {
837 r = btrfs_is_filesystem(fd);
838 if (r < 0)
839 return r;
840 if (!r)
841 return -ENOTTY;
842 }
843
844 args.qgroupid = qgroupid;
845
846 for (c = 0;; c++) {
847 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
848
849 if (errno == EBUSY && c < 10) {
850 (void) btrfs_quota_scan_wait(fd);
851 continue;
852 }
853
854 return -errno;
855 }
856
857 break;
858 }
859
860 return 0;
861 }
862
863 int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
864 _cleanup_close_ int fd = -1;
865
866 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
867 if (fd < 0)
868 return -errno;
869
870 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
871 }
872
873 int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
874 uint64_t qgroupid;
875 int r;
876
877 assert(fd >= 0);
878
879 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
880 if (r < 0)
881 return r;
882
883 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
884 }
885
886 int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
887 _cleanup_close_ int fd = -1;
888
889 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
890 if (fd < 0)
891 return -errno;
892
893 return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
894 }
895
896 int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
897 assert(ret);
898
899 if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
900 return -EINVAL;
901
902 if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
903 return -EINVAL;
904
905 *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
906 return 0;
907 }
908
909 int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
910 assert(level || id);
911
912 if (level)
913 *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
914
915 if (id)
916 *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
917
918 return 0;
919 }
920
921 static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
922
923 struct btrfs_ioctl_qgroup_create_args args = {
924 .create = b,
925 .qgroupid = qgroupid,
926 };
927 unsigned c;
928 int r;
929
930 r = btrfs_is_filesystem(fd);
931 if (r < 0)
932 return r;
933 if (r == 0)
934 return -ENOTTY;
935
936 for (c = 0;; c++) {
937 if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
938
939 /* On old kernels if quota is not enabled, we get EINVAL. On newer kernels we get
940 * ENOTCONN. Let's always convert this to ENOTCONN to make this recognizable
941 * everywhere the same way. */
942
943 if (IN_SET(errno, EINVAL, ENOTCONN))
944 return -ENOTCONN;
945
946 if (errno == EBUSY && c < 10) {
947 (void) btrfs_quota_scan_wait(fd);
948 continue;
949 }
950
951 return -errno;
952 }
953
954 break;
955 }
956
957 return 0;
958 }
959
960 int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
961 return qgroup_create_or_destroy(fd, true, qgroupid);
962 }
963
964 int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
965 return qgroup_create_or_destroy(fd, false, qgroupid);
966 }
967
968 int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
969 _cleanup_free_ uint64_t *qgroups = NULL;
970 uint64_t subvol_id;
971 int i, n, r;
972
973 /* Destroys the specified qgroup, but unassigns it from all
974 * its parents first. Also, it recursively destroys all
975 * qgroups it is assigned to that have the same id part of the
976 * qgroupid as the specified group. */
977
978 r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
979 if (r < 0)
980 return r;
981
982 n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
983 if (n < 0)
984 return n;
985
986 for (i = 0; i < n; i++) {
987 uint64_t id;
988
989 r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
990 if (r < 0)
991 return r;
992
993 r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
994 if (r < 0)
995 return r;
996
997 if (id != subvol_id)
998 continue;
999
1000 /* The parent qgroupid shares the same id part with
1001 * us? If so, destroy it too. */
1002
1003 (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
1004 }
1005
1006 return btrfs_qgroup_destroy(fd, qgroupid);
1007 }
1008
1009 int btrfs_quota_scan_start(int fd) {
1010 struct btrfs_ioctl_quota_rescan_args args = {};
1011
1012 assert(fd >= 0);
1013
1014 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0)
1015 return -errno;
1016
1017 return 0;
1018 }
1019
1020 int btrfs_quota_scan_wait(int fd) {
1021 assert(fd >= 0);
1022
1023 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0)
1024 return -errno;
1025
1026 return 0;
1027 }
1028
1029 int btrfs_quota_scan_ongoing(int fd) {
1030 struct btrfs_ioctl_quota_rescan_args args = {};
1031
1032 assert(fd >= 0);
1033
1034 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
1035 return -errno;
1036
1037 return !!args.flags;
1038 }
1039
1040 static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
1041 struct btrfs_ioctl_qgroup_assign_args args = {
1042 .assign = b,
1043 .src = child,
1044 .dst = parent,
1045 };
1046 unsigned c;
1047 int r;
1048
1049 r = btrfs_is_filesystem(fd);
1050 if (r < 0)
1051 return r;
1052 if (r == 0)
1053 return -ENOTTY;
1054
1055 for (c = 0;; c++) {
1056 r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
1057 if (r < 0) {
1058 if (errno == EBUSY && c < 10) {
1059 (void) btrfs_quota_scan_wait(fd);
1060 continue;
1061 }
1062
1063 return -errno;
1064 }
1065
1066 if (r == 0)
1067 return 0;
1068
1069 /* If the return value is > 0, we need to request a rescan */
1070
1071 (void) btrfs_quota_scan_start(fd);
1072 return 1;
1073 }
1074 }
1075
1076 int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
1077 return qgroup_assign_or_unassign(fd, true, child, parent);
1078 }
1079
1080 int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
1081 return qgroup_assign_or_unassign(fd, false, child, parent);
1082 }
1083
1084 static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
1085 struct btrfs_ioctl_search_args args = {
1086 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1087
1088 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1089 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1090
1091 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1092 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1093
1094 .key.min_transid = 0,
1095 .key.max_transid = (uint64_t) -1,
1096 };
1097
1098 struct btrfs_ioctl_vol_args vol_args = {};
1099 _cleanup_close_ int subvol_fd = -1;
1100 struct stat st;
1101 bool made_writable = false;
1102 int r;
1103
1104 assert(fd >= 0);
1105 assert(subvolume);
1106
1107 if (fstat(fd, &st) < 0)
1108 return -errno;
1109
1110 if (!S_ISDIR(st.st_mode))
1111 return -EINVAL;
1112
1113 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1114 if (subvol_fd < 0)
1115 return -errno;
1116
1117 /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY
1118 * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return
1119 * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we
1120 * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a
1121 * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case
1122 * let's prefer ENOTTY over EPERM/EACCES though. */
1123 r = btrfs_is_subvol_fd(subvol_fd);
1124 if (r < 0)
1125 return r;
1126 if (r == 0) /* Not a btrfs subvolume */
1127 return -ENOTTY;
1128
1129 if (subvol_id == 0) {
1130 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
1131 if (r < 0)
1132 return r;
1133 }
1134
1135 /* First, try to remove the subvolume. If it happens to be
1136 * already empty, this will just work. */
1137 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1138 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
1139 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
1140 return 0;
1141 }
1142 if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
1143 return -errno;
1144
1145 /* OK, the subvolume is not empty, let's look for child
1146 * subvolumes, and remove them, first */
1147
1148 args.key.min_offset = args.key.max_offset = subvol_id;
1149
1150 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1151 const struct btrfs_ioctl_search_header *sh;
1152 unsigned i;
1153
1154 args.key.nr_items = 256;
1155 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1156 return -errno;
1157
1158 if (args.key.nr_items <= 0)
1159 break;
1160
1161 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1162 _cleanup_free_ char *p = NULL;
1163 const struct btrfs_root_ref *ref;
1164
1165 btrfs_ioctl_search_args_set(&args, sh);
1166
1167 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1168 continue;
1169 if (sh->offset != subvol_id)
1170 continue;
1171
1172 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1173
1174 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1175 if (!p)
1176 return -ENOMEM;
1177
1178 struct btrfs_ioctl_ino_lookup_args ino_args = {
1179 .treeid = subvol_id,
1180 .objectid = htole64(ref->dirid),
1181 };
1182
1183 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1184 return -errno;
1185
1186 if (!made_writable) {
1187 r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
1188 if (r < 0)
1189 return r;
1190
1191 made_writable = true;
1192 }
1193
1194 if (isempty(ino_args.name))
1195 /* Subvolume is in the top-level
1196 * directory of the subvolume. */
1197 r = subvol_remove_children(subvol_fd, p, sh->objectid, flags);
1198 else {
1199 _cleanup_close_ int child_fd = -1;
1200
1201 /* Subvolume is somewhere further down,
1202 * hence we need to open the
1203 * containing directory first */
1204
1205 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1206 if (child_fd < 0)
1207 return -errno;
1208
1209 r = subvol_remove_children(child_fd, p, sh->objectid, flags);
1210 }
1211 if (r < 0)
1212 return r;
1213 }
1214
1215 /* Increase search key by one, to read the next item, if we can. */
1216 if (!btrfs_ioctl_search_args_inc(&args))
1217 break;
1218 }
1219
1220 /* OK, the child subvolumes should all be gone now, let's try
1221 * again to remove the subvolume */
1222 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
1223 return -errno;
1224
1225 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
1226 return 0;
1227 }
1228
1229 int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) {
1230 _cleanup_close_ int fd = -1;
1231 const char *subvolume;
1232 int r;
1233
1234 assert(path);
1235
1236 r = extract_subvolume_name(path, &subvolume);
1237 if (r < 0)
1238 return r;
1239
1240 fd = open_parent(path, O_CLOEXEC, 0);
1241 if (fd < 0)
1242 return fd;
1243
1244 return subvol_remove_children(fd, subvolume, 0, flags);
1245 }
1246
1247 int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) {
1248 return subvol_remove_children(fd, subvolume, 0, flags);
1249 }
1250
1251 int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
1252
1253 struct btrfs_ioctl_search_args args = {
1254 /* Tree of quota items */
1255 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1256
1257 /* The object ID is always 0 */
1258 .key.min_objectid = 0,
1259 .key.max_objectid = 0,
1260
1261 /* Look precisely for the quota items */
1262 .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
1263 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
1264
1265 /* For our qgroup */
1266 .key.min_offset = old_qgroupid,
1267 .key.max_offset = old_qgroupid,
1268
1269 /* No restrictions on the other components */
1270 .key.min_transid = 0,
1271 .key.max_transid = (uint64_t) -1,
1272 };
1273
1274 int r;
1275
1276 r = btrfs_is_filesystem(fd);
1277 if (r < 0)
1278 return r;
1279 if (!r)
1280 return -ENOTTY;
1281
1282 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1283 const struct btrfs_ioctl_search_header *sh;
1284 unsigned i;
1285
1286 args.key.nr_items = 256;
1287 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1288 if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
1289 break;
1290
1291 return -errno;
1292 }
1293
1294 if (args.key.nr_items <= 0)
1295 break;
1296
1297 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1298 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1299 struct btrfs_ioctl_qgroup_limit_args qargs;
1300 unsigned c;
1301
1302 /* Make sure we start the next search at least from this entry */
1303 btrfs_ioctl_search_args_set(&args, sh);
1304
1305 if (sh->objectid != 0)
1306 continue;
1307 if (sh->type != BTRFS_QGROUP_LIMIT_KEY)
1308 continue;
1309 if (sh->offset != old_qgroupid)
1310 continue;
1311
1312 /* We found the entry, now copy things over. */
1313
1314 qargs = (struct btrfs_ioctl_qgroup_limit_args) {
1315 .qgroupid = new_qgroupid,
1316
1317 .lim.max_rfer = le64toh(qli->max_rfer),
1318 .lim.max_excl = le64toh(qli->max_excl),
1319 .lim.rsv_rfer = le64toh(qli->rsv_rfer),
1320 .lim.rsv_excl = le64toh(qli->rsv_excl),
1321
1322 .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
1323 BTRFS_QGROUP_LIMIT_MAX_EXCL|
1324 BTRFS_QGROUP_LIMIT_RSV_RFER|
1325 BTRFS_QGROUP_LIMIT_RSV_EXCL),
1326 };
1327
1328 for (c = 0;; c++) {
1329 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
1330 if (errno == EBUSY && c < 10) {
1331 (void) btrfs_quota_scan_wait(fd);
1332 continue;
1333 }
1334 return -errno;
1335 }
1336
1337 break;
1338 }
1339
1340 return 1;
1341 }
1342
1343 /* Increase search key by one, to read the next item, if we can. */
1344 if (!btrfs_ioctl_search_args_inc(&args))
1345 break;
1346 }
1347
1348 return 0;
1349 }
1350
1351 static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
1352 _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
1353 bool copy_from_parent = false, insert_intermediary_qgroup = false;
1354 int n_old_qgroups, n_old_parent_qgroups, r, i;
1355 uint64_t old_parent_id;
1356
1357 assert(fd >= 0);
1358
1359 /* Copies a reduced form of quota information from the old to
1360 * the new subvolume. */
1361
1362 n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
1363 if (n_old_qgroups <= 0) /* Nothing to copy */
1364 return n_old_qgroups;
1365
1366 r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
1367 if (r == -ENXIO)
1368 /* We have no parent, hence nothing to copy. */
1369 n_old_parent_qgroups = 0;
1370 else if (r < 0)
1371 return r;
1372 else {
1373 n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
1374 if (n_old_parent_qgroups < 0)
1375 return n_old_parent_qgroups;
1376 }
1377
1378 for (i = 0; i < n_old_qgroups; i++) {
1379 uint64_t id;
1380 int j;
1381
1382 r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
1383 if (r < 0)
1384 return r;
1385
1386 if (id == old_subvol_id) {
1387 /* The old subvolume was member of a qgroup
1388 * that had the same id, but a different level
1389 * as it self. Let's set up something similar
1390 * in the destination. */
1391 insert_intermediary_qgroup = true;
1392 break;
1393 }
1394
1395 for (j = 0; j < n_old_parent_qgroups; j++)
1396 if (old_parent_qgroups[j] == old_qgroups[i])
1397 /* The old subvolume shared a common
1398 * parent qgroup with its parent
1399 * subvolume. Let's set up something
1400 * similar in the destination. */
1401 copy_from_parent = true;
1402 }
1403
1404 if (!insert_intermediary_qgroup && !copy_from_parent)
1405 return 0;
1406
1407 return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
1408 }
1409
1410 static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
1411 uint64_t old_subtree_qgroup, new_subtree_qgroup;
1412 bool changed;
1413 int r;
1414
1415 /* First copy the leaf limits */
1416 r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
1417 if (r < 0)
1418 return r;
1419 changed = r > 0;
1420
1421 /* Then, try to copy the subtree limits, if there are any. */
1422 r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
1423 if (r < 0)
1424 return r;
1425 if (r == 0)
1426 return changed;
1427
1428 r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
1429 if (r < 0)
1430 return r;
1431 if (r == 0)
1432 return changed;
1433
1434 r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
1435 if (r != 0)
1436 return r;
1437
1438 return changed;
1439 }
1440
1441 static int subvol_snapshot_children(
1442 int old_fd,
1443 int new_fd,
1444 const char *subvolume,
1445 uint64_t old_subvol_id,
1446 BtrfsSnapshotFlags flags) {
1447
1448 struct btrfs_ioctl_search_args args = {
1449 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1450
1451 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1452 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1453
1454 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1455 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1456
1457 .key.min_transid = 0,
1458 .key.max_transid = (uint64_t) -1,
1459 };
1460
1461 struct btrfs_ioctl_vol_args_v2 vol_args = {
1462 .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
1463 .fd = old_fd,
1464 };
1465 _cleanup_close_ int subvolume_fd = -1;
1466 uint64_t new_subvol_id;
1467 int r;
1468
1469 assert(old_fd >= 0);
1470 assert(new_fd >= 0);
1471 assert(subvolume);
1472
1473 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1474
1475 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
1476 return -errno;
1477
1478 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
1479 !(flags & BTRFS_SNAPSHOT_QUOTA))
1480 return 0;
1481
1482 if (old_subvol_id == 0) {
1483 r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
1484 if (r < 0)
1485 return r;
1486 }
1487
1488 r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
1489 if (r < 0)
1490 return r;
1491
1492 if (flags & BTRFS_SNAPSHOT_QUOTA)
1493 (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
1494
1495 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
1496
1497 if (flags & BTRFS_SNAPSHOT_QUOTA)
1498 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1499
1500 return 0;
1501 }
1502
1503 args.key.min_offset = args.key.max_offset = old_subvol_id;
1504
1505 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1506 const struct btrfs_ioctl_search_header *sh;
1507 unsigned i;
1508
1509 args.key.nr_items = 256;
1510 if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1511 return -errno;
1512
1513 if (args.key.nr_items <= 0)
1514 break;
1515
1516 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1517 _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
1518 const struct btrfs_root_ref *ref;
1519 _cleanup_close_ int old_child_fd = -1, new_child_fd = -1;
1520
1521 btrfs_ioctl_search_args_set(&args, sh);
1522
1523 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1524 continue;
1525
1526 /* Avoid finding the source subvolume a second
1527 * time */
1528 if (sh->offset != old_subvol_id)
1529 continue;
1530
1531 /* Avoid running into loops if the new
1532 * subvolume is below the old one. */
1533 if (sh->objectid == new_subvol_id)
1534 continue;
1535
1536 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1537 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1538 if (!p)
1539 return -ENOMEM;
1540
1541 struct btrfs_ioctl_ino_lookup_args ino_args = {
1542 .treeid = old_subvol_id,
1543 .objectid = htole64(ref->dirid),
1544 };
1545
1546 if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1547 return -errno;
1548
1549 c = path_join(ino_args.name, p);
1550 if (!c)
1551 return -ENOMEM;
1552
1553 old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1554 if (old_child_fd < 0)
1555 return -errno;
1556
1557 np = path_join(subvolume, ino_args.name);
1558 if (!np)
1559 return -ENOMEM;
1560
1561 new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1562 if (new_child_fd < 0)
1563 return -errno;
1564
1565 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1566 /* If the snapshot is read-only we
1567 * need to mark it writable
1568 * temporarily, to put the subsnapshot
1569 * into place. */
1570
1571 if (subvolume_fd < 0) {
1572 subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1573 if (subvolume_fd < 0)
1574 return -errno;
1575 }
1576
1577 r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
1578 if (r < 0)
1579 return r;
1580 }
1581
1582 /* When btrfs clones the subvolumes, child
1583 * subvolumes appear as empty directories. Remove
1584 * them, so that we can create a new snapshot
1585 * in their place */
1586 if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
1587 int k = -errno;
1588
1589 if (flags & BTRFS_SNAPSHOT_READ_ONLY)
1590 (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1591
1592 return k;
1593 }
1594
1595 r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
1596
1597 /* Restore the readonly flag */
1598 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1599 int k;
1600
1601 k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1602 if (r >= 0 && k < 0)
1603 return k;
1604 }
1605
1606 if (r < 0)
1607 return r;
1608 }
1609
1610 /* Increase search key by one, to read the next item, if we can. */
1611 if (!btrfs_ioctl_search_args_inc(&args))
1612 break;
1613 }
1614
1615 if (flags & BTRFS_SNAPSHOT_QUOTA)
1616 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1617
1618 return 0;
1619 }
1620
1621 int btrfs_subvol_snapshot_fd_full(
1622 int old_fd,
1623 const char *new_path,
1624 BtrfsSnapshotFlags flags,
1625 copy_progress_path_t progress_path,
1626 copy_progress_bytes_t progress_bytes,
1627 void *userdata) {
1628
1629 _cleanup_close_ int new_fd = -1;
1630 const char *subvolume;
1631 int r;
1632
1633 assert(old_fd >= 0);
1634 assert(new_path);
1635
1636 r = btrfs_is_subvol_fd(old_fd);
1637 if (r < 0)
1638 return r;
1639 if (r == 0) {
1640 bool plain_directory = false;
1641
1642 /* If the source isn't a proper subvolume, fail unless fallback is requested */
1643 if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
1644 return -EISDIR;
1645
1646 r = btrfs_subvol_make(new_path);
1647 if (r == -ENOTTY && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
1648 /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
1649 if (mkdir(new_path, 0755) < 0)
1650 return -errno;
1651
1652 plain_directory = true;
1653 } else if (r < 0)
1654 return r;
1655
1656 r = copy_directory_fd_full(
1657 old_fd, new_path,
1658 COPY_MERGE|COPY_REFLINK|COPY_SAME_MOUNT|COPY_HARDLINKS|(FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGINT) ? COPY_SIGINT : 0),
1659 progress_path, progress_bytes, userdata);
1660 if (r < 0)
1661 goto fallback_fail;
1662
1663 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1664
1665 if (plain_directory) {
1666 /* Plain directories have no recursive read-only flag, but something pretty close to
1667 * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
1668
1669 if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
1670 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
1671 } else {
1672 r = btrfs_subvol_set_read_only(new_path, true);
1673 if (r < 0)
1674 goto fallback_fail;
1675 }
1676 }
1677
1678 return 0;
1679
1680 fallback_fail:
1681 (void) rm_rf(new_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
1682 return r;
1683 }
1684
1685 r = extract_subvolume_name(new_path, &subvolume);
1686 if (r < 0)
1687 return r;
1688
1689 new_fd = open_parent(new_path, O_CLOEXEC, 0);
1690 if (new_fd < 0)
1691 return new_fd;
1692
1693 return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
1694 }
1695
1696 int btrfs_subvol_snapshot_full(
1697 const char *old_path,
1698 const char *new_path,
1699 BtrfsSnapshotFlags flags,
1700 copy_progress_path_t progress_path,
1701 copy_progress_bytes_t progress_bytes,
1702 void *userdata) {
1703
1704 _cleanup_close_ int old_fd = -1;
1705
1706 assert(old_path);
1707 assert(new_path);
1708
1709 old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1710 if (old_fd < 0)
1711 return -errno;
1712
1713 return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, progress_path, progress_bytes, userdata);
1714 }
1715
1716 int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
1717
1718 struct btrfs_ioctl_search_args args = {
1719 /* Tree of quota items */
1720 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1721
1722 /* Look precisely for the quota relation items */
1723 .key.min_type = BTRFS_QGROUP_RELATION_KEY,
1724 .key.max_type = BTRFS_QGROUP_RELATION_KEY,
1725
1726 /* No restrictions on the other components */
1727 .key.min_offset = 0,
1728 .key.max_offset = (uint64_t) -1,
1729
1730 .key.min_transid = 0,
1731 .key.max_transid = (uint64_t) -1,
1732 };
1733
1734 _cleanup_free_ uint64_t *items = NULL;
1735 size_t n_items = 0, n_allocated = 0;
1736 int r;
1737
1738 assert(fd >= 0);
1739 assert(ret);
1740
1741 if (qgroupid == 0) {
1742 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
1743 if (r < 0)
1744 return r;
1745 } else {
1746 r = btrfs_is_filesystem(fd);
1747 if (r < 0)
1748 return r;
1749 if (!r)
1750 return -ENOTTY;
1751 }
1752
1753 args.key.min_objectid = args.key.max_objectid = qgroupid;
1754
1755 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1756 const struct btrfs_ioctl_search_header *sh;
1757 unsigned i;
1758
1759 args.key.nr_items = 256;
1760 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1761 if (errno == ENOENT) /* quota tree missing: quota is disabled */
1762 break;
1763
1764 return -errno;
1765 }
1766
1767 if (args.key.nr_items <= 0)
1768 break;
1769
1770 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1771
1772 /* Make sure we start the next search at least from this entry */
1773 btrfs_ioctl_search_args_set(&args, sh);
1774
1775 if (sh->type != BTRFS_QGROUP_RELATION_KEY)
1776 continue;
1777 if (sh->offset < sh->objectid)
1778 continue;
1779 if (sh->objectid != qgroupid)
1780 continue;
1781
1782 if (!GREEDY_REALLOC(items, n_allocated, n_items+1))
1783 return -ENOMEM;
1784
1785 items[n_items++] = sh->offset;
1786 }
1787
1788 /* Increase search key by one, to read the next item, if we can. */
1789 if (!btrfs_ioctl_search_args_inc(&args))
1790 break;
1791 }
1792
1793 if (n_items <= 0) {
1794 *ret = NULL;
1795 return 0;
1796 }
1797
1798 *ret = TAKE_PTR(items);
1799
1800 return (int) n_items;
1801 }
1802
1803 int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
1804 _cleanup_free_ uint64_t *qgroups = NULL;
1805 uint64_t parent_subvol;
1806 bool changed = false;
1807 int n = 0, r;
1808
1809 assert(fd >= 0);
1810
1811 /*
1812 * Sets up the specified subvolume's qgroup automatically in
1813 * one of two ways:
1814 *
1815 * If insert_intermediary_qgroup is false, the subvolume's
1816 * leaf qgroup will be assigned to the same parent qgroups as
1817 * the subvolume's parent subvolume.
1818 *
1819 * If insert_intermediary_qgroup is true a new intermediary
1820 * higher-level qgroup is created, with a higher level number,
1821 * but reusing the id of the subvolume. The level number is
1822 * picked as one smaller than the lowest level qgroup the
1823 * parent subvolume is a member of. If the parent subvolume's
1824 * leaf qgroup is assigned to no higher-level qgroup a new
1825 * qgroup of level 255 is created instead. Either way, the new
1826 * qgroup is then assigned to the parent's higher-level
1827 * qgroup, and the subvolume itself is assigned to it.
1828 *
1829 * If the subvolume is already assigned to a higher level
1830 * qgroup, no operation is executed.
1831 *
1832 * Effectively this means: regardless if
1833 * insert_intermediary_qgroup is true or not, after this
1834 * function is invoked the subvolume will be accounted within
1835 * the same qgroups as the parent. However, if it is true, it
1836 * will also get its own higher-level qgroup, which may in
1837 * turn be used by subvolumes created beneath this subvolume
1838 * later on.
1839 *
1840 * This hence defines a simple default qgroup setup for
1841 * subvolumes, as long as this function is invoked on each
1842 * created subvolume: each subvolume is always accounting
1843 * together with its immediate parents. Optionally, if
1844 * insert_intermediary_qgroup is true, it will also get a
1845 * qgroup that then includes all its own child subvolumes.
1846 */
1847
1848 if (subvol_id == 0) {
1849 r = btrfs_is_subvol_fd(fd);
1850 if (r < 0)
1851 return r;
1852 if (!r)
1853 return -ENOTTY;
1854
1855 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1856 if (r < 0)
1857 return r;
1858 }
1859
1860 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
1861 if (n < 0)
1862 return n;
1863 if (n > 0) /* already parent qgroups set up, let's bail */
1864 return 0;
1865
1866 qgroups = mfree(qgroups);
1867
1868 r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
1869 if (r == -ENXIO)
1870 /* No parent, hence no qgroup memberships */
1871 n = 0;
1872 else if (r < 0)
1873 return r;
1874 else {
1875 n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
1876 if (n < 0)
1877 return n;
1878 }
1879
1880 if (insert_intermediary_qgroup) {
1881 uint64_t lowest = 256, new_qgroupid;
1882 bool created = false;
1883 int i;
1884
1885 /* Determine the lowest qgroup that the parent
1886 * subvolume is assigned to. */
1887
1888 for (i = 0; i < n; i++) {
1889 uint64_t level;
1890
1891 r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
1892 if (r < 0)
1893 return r;
1894
1895 if (level < lowest)
1896 lowest = level;
1897 }
1898
1899 if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1900 return -EBUSY;
1901
1902 r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
1903 if (r < 0)
1904 return r;
1905
1906 /* Create the new intermediary group, unless it already exists */
1907 r = btrfs_qgroup_create(fd, new_qgroupid);
1908 if (r < 0 && r != -EEXIST)
1909 return r;
1910 if (r >= 0)
1911 changed = created = true;
1912
1913 for (i = 0; i < n; i++) {
1914 r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
1915 if (r < 0 && r != -EEXIST) {
1916 if (created)
1917 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1918
1919 return r;
1920 }
1921 if (r >= 0)
1922 changed = true;
1923 }
1924
1925 r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
1926 if (r < 0 && r != -EEXIST) {
1927 if (created)
1928 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1929 return r;
1930 }
1931 if (r >= 0)
1932 changed = true;
1933
1934 } else {
1935 int i;
1936
1937 /* Assign our subvolume to all the same qgroups as the parent */
1938
1939 for (i = 0; i < n; i++) {
1940 r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
1941 if (r < 0 && r != -EEXIST)
1942 return r;
1943 if (r >= 0)
1944 changed = true;
1945 }
1946 }
1947
1948 return changed;
1949 }
1950
1951 int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
1952 _cleanup_close_ int fd = -1;
1953
1954 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1955 if (fd < 0)
1956 return -errno;
1957
1958 return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
1959 }
1960
1961 int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
1962
1963 struct btrfs_ioctl_search_args args = {
1964 /* Tree of tree roots */
1965 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1966
1967 /* Look precisely for the subvolume items */
1968 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1969 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1970
1971 /* No restrictions on the other components */
1972 .key.min_offset = 0,
1973 .key.max_offset = (uint64_t) -1,
1974
1975 .key.min_transid = 0,
1976 .key.max_transid = (uint64_t) -1,
1977 };
1978 int r;
1979
1980 assert(fd >= 0);
1981 assert(ret);
1982
1983 if (subvol_id == 0) {
1984 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1985 if (r < 0)
1986 return r;
1987 } else {
1988 r = btrfs_is_filesystem(fd);
1989 if (r < 0)
1990 return r;
1991 if (!r)
1992 return -ENOTTY;
1993 }
1994
1995 args.key.min_objectid = args.key.max_objectid = subvol_id;
1996
1997 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1998 const struct btrfs_ioctl_search_header *sh;
1999 unsigned i;
2000
2001 args.key.nr_items = 256;
2002 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
2003 return negative_errno();
2004
2005 if (args.key.nr_items <= 0)
2006 break;
2007
2008 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
2009
2010 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
2011 continue;
2012 if (sh->objectid != subvol_id)
2013 continue;
2014
2015 *ret = sh->offset;
2016 return 0;
2017 }
2018 }
2019
2020 return -ENXIO;
2021 }