]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/btrfs-util.c
Merge pull request #21373 from poettering/filesystems-more-groups
[thirdparty/systemd.git] / src / shared / btrfs-util.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <inttypes.h>
6 #include <linux/btrfs_tree.h>
7 #include <linux/fs.h>
8 #include <linux/loop.h>
9 #include <linux/magic.h>
10 #include <stddef.h>
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <sys/ioctl.h>
14 #include <sys/sysmacros.h>
15 #include <unistd.h>
16
17 #include "alloc-util.h"
18 #include "blockdev-util.h"
19 #include "btrfs-util.h"
20 #include "chattr-util.h"
21 #include "copy.h"
22 #include "fd-util.h"
23 #include "fileio.h"
24 #include "fs-util.h"
25 #include "io-util.h"
26 #include "macro.h"
27 #include "path-util.h"
28 #include "rm-rf.h"
29 #include "smack-util.h"
30 #include "sparse-endian.h"
31 #include "stat-util.h"
32 #include "string-util.h"
33 #include "time-util.h"
34 #include "util.h"
35
36 /* WARNING: Be careful with file system ioctls! When we get an fd, we
37 * need to make sure it either refers to only a regular file or
38 * directory, or that it is located on btrfs, before invoking any
39 * btrfs ioctls. The ioctl numbers are reused by some device drivers
40 * (such as DRM), and hence might have bad effects when invoked on
41 * device nodes (that reference drivers) rather than fds to normal
42 * files or directories. */
43
44 static int validate_subvolume_name(const char *name) {
45
46 if (!filename_is_valid(name))
47 return -EINVAL;
48
49 if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
50 return -E2BIG;
51
52 return 0;
53 }
54
55 static int extract_subvolume_name(const char *path, const char **subvolume) {
56 const char *fn;
57 int r;
58
59 assert(path);
60 assert(subvolume);
61
62 fn = basename(path);
63
64 r = validate_subvolume_name(fn);
65 if (r < 0)
66 return r;
67
68 *subvolume = fn;
69 return 0;
70 }
71
72 int btrfs_is_subvol_fd(int fd) {
73 struct stat st;
74
75 assert(fd >= 0);
76
77 /* On btrfs subvolumes always have the inode 256 */
78
79 if (fstat(fd, &st) < 0)
80 return -errno;
81
82 if (!btrfs_might_be_subvol(&st))
83 return 0;
84
85 return fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
86 }
87
88 int btrfs_is_subvol(const char *path) {
89 _cleanup_close_ int fd = -1;
90
91 assert(path);
92
93 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
94 if (fd < 0)
95 return -errno;
96
97 return btrfs_is_subvol_fd(fd);
98 }
99
100 int btrfs_subvol_make_fd(int fd, const char *subvolume) {
101 struct btrfs_ioctl_vol_args args = {};
102 _cleanup_close_ int real_fd = -1;
103 int r;
104
105 assert(subvolume);
106
107 r = validate_subvolume_name(subvolume);
108 if (r < 0)
109 return r;
110
111 r = fcntl(fd, F_GETFL);
112 if (r < 0)
113 return -errno;
114 if (FLAGS_SET(r, O_PATH)) {
115 /* An O_PATH fd was specified, let's convert here to a proper one, as btrfs ioctl's can't deal with
116 * O_PATH. */
117
118 real_fd = fd_reopen(fd, O_RDONLY|O_CLOEXEC|O_DIRECTORY);
119 if (real_fd < 0)
120 return real_fd;
121
122 fd = real_fd;
123 }
124
125 strncpy(args.name, subvolume, sizeof(args.name)-1);
126
127 return RET_NERRNO(ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args));
128 }
129
130 int btrfs_subvol_make(const char *path) {
131 _cleanup_close_ int fd = -1;
132 const char *subvolume;
133 int r;
134
135 assert(path);
136
137 r = extract_subvolume_name(path, &subvolume);
138 if (r < 0)
139 return r;
140
141 fd = open_parent(path, O_CLOEXEC, 0);
142 if (fd < 0)
143 return fd;
144
145 return btrfs_subvol_make_fd(fd, subvolume);
146 }
147
148 int btrfs_subvol_make_fallback(const char *path, mode_t mode) {
149 mode_t old, combined;
150 int r;
151
152 assert(path);
153
154 /* Let's work like mkdir(), i.e. take the specified mode, and mask it with the current umask. */
155 old = umask(~mode);
156 combined = old | ~mode;
157 if (combined != ~mode)
158 umask(combined);
159 r = btrfs_subvol_make(path);
160 umask(old);
161
162 if (r >= 0)
163 return 1; /* subvol worked */
164 if (r != -ENOTTY)
165 return r;
166
167 if (mkdir(path, mode) < 0)
168 return -errno;
169
170 return 0; /* plain directory */
171 }
172
173 int btrfs_subvol_set_read_only_fd(int fd, bool b) {
174 uint64_t flags, nflags;
175 struct stat st;
176
177 assert(fd >= 0);
178
179 if (fstat(fd, &st) < 0)
180 return -errno;
181
182 if (!btrfs_might_be_subvol(&st))
183 return -EINVAL;
184
185 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
186 return -errno;
187
188 nflags = UPDATE_FLAG(flags, BTRFS_SUBVOL_RDONLY, b);
189 if (flags == nflags)
190 return 0;
191
192 return RET_NERRNO(ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags));
193 }
194
195 int btrfs_subvol_set_read_only(const char *path, bool b) {
196 _cleanup_close_ int fd = -1;
197
198 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
199 if (fd < 0)
200 return -errno;
201
202 return btrfs_subvol_set_read_only_fd(fd, b);
203 }
204
205 int btrfs_subvol_get_read_only_fd(int fd) {
206 uint64_t flags;
207 struct stat st;
208
209 assert(fd >= 0);
210
211 if (fstat(fd, &st) < 0)
212 return -errno;
213
214 if (!btrfs_might_be_subvol(&st))
215 return -EINVAL;
216
217 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
218 return -errno;
219
220 return !!(flags & BTRFS_SUBVOL_RDONLY);
221 }
222
223 int btrfs_reflink(int infd, int outfd) {
224 int r;
225
226 assert(infd >= 0);
227 assert(outfd >= 0);
228
229 /* Make sure we invoke the ioctl on a regular file, so that no device driver accidentally gets it. */
230
231 r = fd_verify_regular(outfd);
232 if (r < 0)
233 return r;
234
235 return RET_NERRNO(ioctl(outfd, BTRFS_IOC_CLONE, infd));
236 }
237
238 int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
239 struct btrfs_ioctl_clone_range_args args = {
240 .src_fd = infd,
241 .src_offset = in_offset,
242 .src_length = sz,
243 .dest_offset = out_offset,
244 };
245 int r;
246
247 assert(infd >= 0);
248 assert(outfd >= 0);
249 assert(sz > 0);
250
251 r = fd_verify_regular(outfd);
252 if (r < 0)
253 return r;
254
255 return RET_NERRNO(ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args));
256 }
257
258 int btrfs_get_block_device_fd(int fd, dev_t *dev) {
259 struct btrfs_ioctl_fs_info_args fsi = {};
260 uint64_t id;
261 int r;
262
263 assert(fd >= 0);
264 assert(dev);
265
266 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
267 if (r < 0)
268 return r;
269 if (!r)
270 return -ENOTTY;
271
272 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
273 return -errno;
274
275 /* We won't do this for btrfs RAID */
276 if (fsi.num_devices != 1) {
277 *dev = 0;
278 return 0;
279 }
280
281 for (id = 1; id <= fsi.max_id; id++) {
282 struct btrfs_ioctl_dev_info_args di = {
283 .devid = id,
284 };
285 struct stat st;
286
287 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
288 if (errno == ENODEV)
289 continue;
290
291 return -errno;
292 }
293
294 /* For the root fs — when no initrd is involved — btrfs returns /dev/root on any kernels from
295 * the past few years. That sucks, as we have no API to determine the actual root then. let's
296 * return an recognizable error for this case, so that the caller can maybe print a nice
297 * message about this.
298 *
299 * https://bugzilla.kernel.org/show_bug.cgi?id=89721 */
300 if (path_equal((char*) di.path, "/dev/root"))
301 return -EUCLEAN;
302
303 if (stat((char*) di.path, &st) < 0)
304 return -errno;
305
306 if (!S_ISBLK(st.st_mode))
307 return -ENOTBLK;
308
309 if (major(st.st_rdev) == 0)
310 return -ENODEV;
311
312 *dev = st.st_rdev;
313 return 1;
314 }
315
316 return -ENODEV;
317 }
318
319 int btrfs_get_block_device(const char *path, dev_t *dev) {
320 _cleanup_close_ int fd = -1;
321
322 assert(path);
323 assert(dev);
324
325 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
326 if (fd < 0)
327 return -errno;
328
329 return btrfs_get_block_device_fd(fd, dev);
330 }
331
332 int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
333 struct btrfs_ioctl_ino_lookup_args args = {
334 .objectid = BTRFS_FIRST_FREE_OBJECTID
335 };
336 int r;
337
338 assert(fd >= 0);
339 assert(ret);
340
341 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
342 if (r < 0)
343 return r;
344 if (!r)
345 return -ENOTTY;
346
347 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
348 return -errno;
349
350 *ret = args.treeid;
351 return 0;
352 }
353
354 int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
355 _cleanup_close_ int subvol_fd = -1;
356
357 assert(fd >= 0);
358 assert(ret);
359
360 subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
361 if (subvol_fd < 0)
362 return -errno;
363
364 return btrfs_subvol_get_id_fd(subvol_fd, ret);
365 }
366
367 static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
368 assert(args);
369
370 /* the objectid, type, offset together make up the btrfs key,
371 * which is considered a single 136byte integer when
372 * comparing. This call increases the counter by one, dealing
373 * with the overflow between the overflows */
374
375 if (args->key.min_offset < UINT64_MAX) {
376 args->key.min_offset++;
377 return true;
378 }
379
380 if (args->key.min_type < UINT8_MAX) {
381 args->key.min_type++;
382 args->key.min_offset = 0;
383 return true;
384 }
385
386 if (args->key.min_objectid < UINT64_MAX) {
387 args->key.min_objectid++;
388 args->key.min_offset = 0;
389 args->key.min_type = 0;
390 return true;
391 }
392
393 return 0;
394 }
395
396 static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
397 assert(args);
398 assert(h);
399
400 args->key.min_objectid = h->objectid;
401 args->key.min_type = h->type;
402 args->key.min_offset = h->offset;
403 }
404
405 static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
406 int r;
407
408 assert(args);
409
410 /* Compare min and max */
411
412 r = CMP(args->key.min_objectid, args->key.max_objectid);
413 if (r != 0)
414 return r;
415
416 r = CMP(args->key.min_type, args->key.max_type);
417 if (r != 0)
418 return r;
419
420 return CMP(args->key.min_offset, args->key.max_offset);
421 }
422
423 #define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
424 for ((i) = 0, \
425 (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
426 (i) < (args).key.nr_items; \
427 (i)++, \
428 (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
429
430 #define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
431 ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
432
433 int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
434 struct btrfs_ioctl_search_args args = {
435 /* Tree of tree roots */
436 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
437
438 /* Look precisely for the subvolume items */
439 .key.min_type = BTRFS_ROOT_ITEM_KEY,
440 .key.max_type = BTRFS_ROOT_ITEM_KEY,
441
442 .key.min_offset = 0,
443 .key.max_offset = UINT64_MAX,
444
445 /* No restrictions on the other components */
446 .key.min_transid = 0,
447 .key.max_transid = UINT64_MAX,
448 };
449
450 bool found = false;
451 int r;
452
453 assert(fd >= 0);
454 assert(ret);
455
456 if (subvol_id == 0) {
457 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
458 if (r < 0)
459 return r;
460 } else {
461 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
462 if (r < 0)
463 return r;
464 if (!r)
465 return -ENOTTY;
466 }
467
468 args.key.min_objectid = args.key.max_objectid = subvol_id;
469
470 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
471 const struct btrfs_ioctl_search_header *sh;
472 unsigned i;
473
474 args.key.nr_items = 256;
475 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
476 return -errno;
477
478 if (args.key.nr_items <= 0)
479 break;
480
481 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
482
483 const struct btrfs_root_item *ri;
484
485 /* Make sure we start the next search at least from this entry */
486 btrfs_ioctl_search_args_set(&args, sh);
487
488 if (sh->objectid != subvol_id)
489 continue;
490 if (sh->type != BTRFS_ROOT_ITEM_KEY)
491 continue;
492
493 /* Older versions of the struct lacked the otime setting */
494 if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
495 continue;
496
497 ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
498
499 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
500 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
501
502 ret->subvol_id = subvol_id;
503 ret->read_only = le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY;
504
505 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
506 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
507 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
508
509 found = true;
510 goto finish;
511 }
512
513 /* Increase search key by one, to read the next item, if we can. */
514 if (!btrfs_ioctl_search_args_inc(&args))
515 break;
516 }
517
518 finish:
519 if (!found)
520 return -ENODATA;
521
522 return 0;
523 }
524
525 int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
526
527 struct btrfs_ioctl_search_args args = {
528 /* Tree of quota items */
529 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
530
531 /* The object ID is always 0 */
532 .key.min_objectid = 0,
533 .key.max_objectid = 0,
534
535 /* Look precisely for the quota items */
536 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
537 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
538
539 /* No restrictions on the other components */
540 .key.min_transid = 0,
541 .key.max_transid = UINT64_MAX,
542 };
543
544 bool found_info = false, found_limit = false;
545 int r;
546
547 assert(fd >= 0);
548 assert(ret);
549
550 if (qgroupid == 0) {
551 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
552 if (r < 0)
553 return r;
554 } else {
555 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
556 if (r < 0)
557 return r;
558 if (!r)
559 return -ENOTTY;
560 }
561
562 args.key.min_offset = args.key.max_offset = qgroupid;
563
564 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
565 const struct btrfs_ioctl_search_header *sh;
566 unsigned i;
567
568 args.key.nr_items = 256;
569 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
570 if (errno == ENOENT) /* quota tree is missing: quota disabled */
571 break;
572
573 return -errno;
574 }
575
576 if (args.key.nr_items <= 0)
577 break;
578
579 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
580
581 /* Make sure we start the next search at least from this entry */
582 btrfs_ioctl_search_args_set(&args, sh);
583
584 if (sh->objectid != 0)
585 continue;
586 if (sh->offset != qgroupid)
587 continue;
588
589 if (sh->type == BTRFS_QGROUP_INFO_KEY) {
590 const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
591
592 ret->referenced = le64toh(qii->rfer);
593 ret->exclusive = le64toh(qii->excl);
594
595 found_info = true;
596
597 } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
598 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
599
600 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
601 ret->referenced_max = le64toh(qli->max_rfer);
602 else
603 ret->referenced_max = UINT64_MAX;
604
605 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
606 ret->exclusive_max = le64toh(qli->max_excl);
607 else
608 ret->exclusive_max = UINT64_MAX;
609
610 found_limit = true;
611 }
612
613 if (found_info && found_limit)
614 goto finish;
615 }
616
617 /* Increase search key by one, to read the next item, if we can. */
618 if (!btrfs_ioctl_search_args_inc(&args))
619 break;
620 }
621
622 finish:
623 if (!found_limit && !found_info)
624 return -ENODATA;
625
626 if (!found_info) {
627 ret->referenced = UINT64_MAX;
628 ret->exclusive = UINT64_MAX;
629 }
630
631 if (!found_limit) {
632 ret->referenced_max = UINT64_MAX;
633 ret->exclusive_max = UINT64_MAX;
634 }
635
636 return 0;
637 }
638
639 int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
640 _cleanup_close_ int fd = -1;
641
642 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
643 if (fd < 0)
644 return -errno;
645
646 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
647 }
648
649 int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
650 uint64_t level, lowest = UINT64_MAX, lowest_qgroupid = 0;
651 _cleanup_free_ uint64_t *qgroups = NULL;
652 int r, n;
653
654 assert(fd >= 0);
655 assert(ret);
656
657 /* This finds the "subtree" qgroup for a specific
658 * subvolume. This only works for subvolumes that have been
659 * prepared with btrfs_subvol_auto_qgroup_fd() with
660 * insert_intermediary_qgroup=true (or equivalent). For others
661 * it will return the leaf qgroup instead. The two cases may
662 * be distuingished via the return value, which is 1 in case
663 * an appropriate "subtree" qgroup was found, and 0
664 * otherwise. */
665
666 if (subvol_id == 0) {
667 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
668 if (r < 0)
669 return r;
670 }
671
672 r = btrfs_qgroupid_split(subvol_id, &level, NULL);
673 if (r < 0)
674 return r;
675 if (level != 0) /* Input must be a leaf qgroup */
676 return -EINVAL;
677
678 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
679 if (n < 0)
680 return n;
681
682 for (int i = 0; i < n; i++) {
683 uint64_t id;
684
685 r = btrfs_qgroupid_split(qgroups[i], &level, &id);
686 if (r < 0)
687 return r;
688
689 if (id != subvol_id)
690 continue;
691
692 if (lowest == UINT64_MAX || level < lowest) {
693 lowest_qgroupid = qgroups[i];
694 lowest = level;
695 }
696 }
697
698 if (lowest == UINT64_MAX) {
699 /* No suitable higher-level qgroup found, let's return
700 * the leaf qgroup instead, and indicate that with the
701 * return value. */
702
703 *ret = subvol_id;
704 return 0;
705 }
706
707 *ret = lowest_qgroupid;
708 return 1;
709 }
710
711 int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
712 uint64_t qgroupid;
713 int r;
714
715 assert(fd >= 0);
716 assert(ret);
717
718 /* This determines the quota data of the qgroup with the
719 * lowest level, that shares the id part with the specified
720 * subvolume. This is useful for determining the quota data
721 * for entire subvolume subtrees, as long as the subtrees have
722 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
723 * compatible way */
724
725 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
726 if (r < 0)
727 return r;
728
729 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
730 }
731
732 int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
733 _cleanup_close_ int fd = -1;
734
735 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
736 if (fd < 0)
737 return -errno;
738
739 return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
740 }
741
742 int btrfs_defrag(const char *p) {
743 _cleanup_close_ int fd = -1;
744
745 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
746 if (fd < 0)
747 return -errno;
748
749 return btrfs_defrag_fd(fd);
750 }
751
752 int btrfs_quota_enable_fd(int fd, bool b) {
753 struct btrfs_ioctl_quota_ctl_args args = {
754 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
755 };
756 int r;
757
758 assert(fd >= 0);
759
760 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
761 if (r < 0)
762 return r;
763 if (!r)
764 return -ENOTTY;
765
766 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args));
767 }
768
769 int btrfs_quota_enable(const char *path, bool b) {
770 _cleanup_close_ int fd = -1;
771
772 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
773 if (fd < 0)
774 return -errno;
775
776 return btrfs_quota_enable_fd(fd, b);
777 }
778
779 int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
780
781 struct btrfs_ioctl_qgroup_limit_args args = {
782 .lim.max_rfer = referenced_max,
783 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
784 };
785 int r;
786
787 assert(fd >= 0);
788
789 if (qgroupid == 0) {
790 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
791 if (r < 0)
792 return r;
793 } else {
794 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
795 if (r < 0)
796 return r;
797 if (!r)
798 return -ENOTTY;
799 }
800
801 args.qgroupid = qgroupid;
802
803 for (unsigned c = 0;; c++) {
804 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
805
806 if (errno == EBUSY && c < 10) {
807 (void) btrfs_quota_scan_wait(fd);
808 continue;
809 }
810
811 return -errno;
812 }
813
814 break;
815 }
816
817 return 0;
818 }
819
820 int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
821 _cleanup_close_ int fd = -1;
822
823 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
824 if (fd < 0)
825 return -errno;
826
827 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
828 }
829
830 int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
831 uint64_t qgroupid;
832 int r;
833
834 assert(fd >= 0);
835
836 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
837 if (r < 0)
838 return r;
839
840 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
841 }
842
843 int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
844 _cleanup_close_ int fd = -1;
845
846 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
847 if (fd < 0)
848 return -errno;
849
850 return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
851 }
852
853 int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
854 assert(ret);
855
856 if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
857 return -EINVAL;
858
859 if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
860 return -EINVAL;
861
862 *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
863 return 0;
864 }
865
866 int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
867 assert(level || id);
868
869 if (level)
870 *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
871
872 if (id)
873 *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
874
875 return 0;
876 }
877
878 static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
879
880 struct btrfs_ioctl_qgroup_create_args args = {
881 .create = b,
882 .qgroupid = qgroupid,
883 };
884 int r;
885
886 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
887 if (r < 0)
888 return r;
889 if (r == 0)
890 return -ENOTTY;
891
892 for (unsigned c = 0;; c++) {
893 if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
894
895 /* On old kernels if quota is not enabled, we get EINVAL. On newer kernels we get
896 * ENOTCONN. Let's always convert this to ENOTCONN to make this recognizable
897 * everywhere the same way. */
898
899 if (IN_SET(errno, EINVAL, ENOTCONN))
900 return -ENOTCONN;
901
902 if (errno == EBUSY && c < 10) {
903 (void) btrfs_quota_scan_wait(fd);
904 continue;
905 }
906
907 return -errno;
908 }
909
910 break;
911 }
912
913 return 0;
914 }
915
916 int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
917 return qgroup_create_or_destroy(fd, true, qgroupid);
918 }
919
920 int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
921 return qgroup_create_or_destroy(fd, false, qgroupid);
922 }
923
924 int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
925 _cleanup_free_ uint64_t *qgroups = NULL;
926 uint64_t subvol_id;
927 int n, r;
928
929 /* Destroys the specified qgroup, but unassigns it from all
930 * its parents first. Also, it recursively destroys all
931 * qgroups it is assigned to that have the same id part of the
932 * qgroupid as the specified group. */
933
934 r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
935 if (r < 0)
936 return r;
937
938 n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
939 if (n < 0)
940 return n;
941
942 for (int i = 0; i < n; i++) {
943 uint64_t id;
944
945 r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
946 if (r < 0)
947 return r;
948
949 r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
950 if (r < 0)
951 return r;
952
953 if (id != subvol_id)
954 continue;
955
956 /* The parent qgroupid shares the same id part with
957 * us? If so, destroy it too. */
958
959 (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
960 }
961
962 return btrfs_qgroup_destroy(fd, qgroupid);
963 }
964
965 int btrfs_quota_scan_start(int fd) {
966 struct btrfs_ioctl_quota_rescan_args args = {};
967
968 assert(fd >= 0);
969
970 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args));
971 }
972
973 int btrfs_quota_scan_wait(int fd) {
974 assert(fd >= 0);
975
976 return RET_NERRNO(ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT));
977 }
978
979 int btrfs_quota_scan_ongoing(int fd) {
980 struct btrfs_ioctl_quota_rescan_args args = {};
981
982 assert(fd >= 0);
983
984 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
985 return -errno;
986
987 return !!args.flags;
988 }
989
990 static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
991 struct btrfs_ioctl_qgroup_assign_args args = {
992 .assign = b,
993 .src = child,
994 .dst = parent,
995 };
996 int r;
997
998 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
999 if (r < 0)
1000 return r;
1001 if (r == 0)
1002 return -ENOTTY;
1003
1004 for (unsigned c = 0;; c++) {
1005 r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
1006 if (r < 0) {
1007 if (errno == EBUSY && c < 10) {
1008 (void) btrfs_quota_scan_wait(fd);
1009 continue;
1010 }
1011
1012 return -errno;
1013 }
1014
1015 if (r == 0)
1016 return 0;
1017
1018 /* If the return value is > 0, we need to request a rescan */
1019
1020 (void) btrfs_quota_scan_start(fd);
1021 return 1;
1022 }
1023 }
1024
1025 int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
1026 return qgroup_assign_or_unassign(fd, true, child, parent);
1027 }
1028
1029 int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
1030 return qgroup_assign_or_unassign(fd, false, child, parent);
1031 }
1032
1033 static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
1034 struct btrfs_ioctl_search_args args = {
1035 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1036
1037 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1038 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1039
1040 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1041 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1042
1043 .key.min_transid = 0,
1044 .key.max_transid = UINT64_MAX,
1045 };
1046
1047 struct btrfs_ioctl_vol_args vol_args = {};
1048 _cleanup_close_ int subvol_fd = -1;
1049 struct stat st;
1050 bool made_writable = false;
1051 int r;
1052
1053 assert(fd >= 0);
1054 assert(subvolume);
1055
1056 if (fstat(fd, &st) < 0)
1057 return -errno;
1058
1059 if (!S_ISDIR(st.st_mode))
1060 return -EINVAL;
1061
1062 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1063 if (subvol_fd < 0)
1064 return -errno;
1065
1066 /* Let's check if this is actually a subvolume. Note that this is mostly redundant, as BTRFS_IOC_SNAP_DESTROY
1067 * would fail anyway if it is not. However, it's a good thing to check this ahead of time so that we can return
1068 * ENOTTY unconditionally in this case. This is different from the ioctl() which will return EPERM/EACCES if we
1069 * don't have the privileges to remove subvolumes, regardless if the specified directory is actually a
1070 * subvolume or not. In order to make it easy for callers to cover the "this is not a btrfs subvolume" case
1071 * let's prefer ENOTTY over EPERM/EACCES though. */
1072 r = btrfs_is_subvol_fd(subvol_fd);
1073 if (r < 0)
1074 return r;
1075 if (r == 0) /* Not a btrfs subvolume */
1076 return -ENOTTY;
1077
1078 if (subvol_id == 0) {
1079 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
1080 if (r < 0)
1081 return r;
1082 }
1083
1084 /* First, try to remove the subvolume. If it happens to be
1085 * already empty, this will just work. */
1086 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1087 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
1088 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
1089 return 0;
1090 }
1091 if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
1092 return -errno;
1093
1094 /* OK, the subvolume is not empty, let's look for child
1095 * subvolumes, and remove them, first */
1096
1097 args.key.min_offset = args.key.max_offset = subvol_id;
1098
1099 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1100 const struct btrfs_ioctl_search_header *sh;
1101 unsigned i;
1102
1103 args.key.nr_items = 256;
1104 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1105 return -errno;
1106
1107 if (args.key.nr_items <= 0)
1108 break;
1109
1110 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1111 _cleanup_free_ char *p = NULL;
1112 const struct btrfs_root_ref *ref;
1113
1114 btrfs_ioctl_search_args_set(&args, sh);
1115
1116 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1117 continue;
1118 if (sh->offset != subvol_id)
1119 continue;
1120
1121 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1122
1123 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1124 if (!p)
1125 return -ENOMEM;
1126
1127 struct btrfs_ioctl_ino_lookup_args ino_args = {
1128 .treeid = subvol_id,
1129 .objectid = htole64(ref->dirid),
1130 };
1131
1132 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1133 return -errno;
1134
1135 if (!made_writable) {
1136 r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
1137 if (r < 0)
1138 return r;
1139
1140 made_writable = true;
1141 }
1142
1143 if (isempty(ino_args.name))
1144 /* Subvolume is in the top-level
1145 * directory of the subvolume. */
1146 r = subvol_remove_children(subvol_fd, p, sh->objectid, flags);
1147 else {
1148 _cleanup_close_ int child_fd = -1;
1149
1150 /* Subvolume is somewhere further down,
1151 * hence we need to open the
1152 * containing directory first */
1153
1154 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1155 if (child_fd < 0)
1156 return -errno;
1157
1158 r = subvol_remove_children(child_fd, p, sh->objectid, flags);
1159 }
1160 if (r < 0)
1161 return r;
1162 }
1163
1164 /* Increase search key by one, to read the next item, if we can. */
1165 if (!btrfs_ioctl_search_args_inc(&args))
1166 break;
1167 }
1168
1169 /* OK, the child subvolumes should all be gone now, let's try
1170 * again to remove the subvolume */
1171 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
1172 return -errno;
1173
1174 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
1175 return 0;
1176 }
1177
1178 int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) {
1179 _cleanup_close_ int fd = -1;
1180 const char *subvolume;
1181 int r;
1182
1183 assert(path);
1184
1185 r = extract_subvolume_name(path, &subvolume);
1186 if (r < 0)
1187 return r;
1188
1189 fd = open_parent(path, O_CLOEXEC, 0);
1190 if (fd < 0)
1191 return fd;
1192
1193 return subvol_remove_children(fd, subvolume, 0, flags);
1194 }
1195
1196 int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) {
1197 return subvol_remove_children(fd, subvolume, 0, flags);
1198 }
1199
1200 int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
1201
1202 struct btrfs_ioctl_search_args args = {
1203 /* Tree of quota items */
1204 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1205
1206 /* The object ID is always 0 */
1207 .key.min_objectid = 0,
1208 .key.max_objectid = 0,
1209
1210 /* Look precisely for the quota items */
1211 .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
1212 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
1213
1214 /* For our qgroup */
1215 .key.min_offset = old_qgroupid,
1216 .key.max_offset = old_qgroupid,
1217
1218 /* No restrictions on the other components */
1219 .key.min_transid = 0,
1220 .key.max_transid = UINT64_MAX,
1221 };
1222
1223 int r;
1224
1225 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
1226 if (r < 0)
1227 return r;
1228 if (!r)
1229 return -ENOTTY;
1230
1231 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1232 const struct btrfs_ioctl_search_header *sh;
1233 unsigned i;
1234
1235 args.key.nr_items = 256;
1236 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1237 if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
1238 break;
1239
1240 return -errno;
1241 }
1242
1243 if (args.key.nr_items <= 0)
1244 break;
1245
1246 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1247 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1248 struct btrfs_ioctl_qgroup_limit_args qargs;
1249 unsigned c;
1250
1251 /* Make sure we start the next search at least from this entry */
1252 btrfs_ioctl_search_args_set(&args, sh);
1253
1254 if (sh->objectid != 0)
1255 continue;
1256 if (sh->type != BTRFS_QGROUP_LIMIT_KEY)
1257 continue;
1258 if (sh->offset != old_qgroupid)
1259 continue;
1260
1261 /* We found the entry, now copy things over. */
1262
1263 qargs = (struct btrfs_ioctl_qgroup_limit_args) {
1264 .qgroupid = new_qgroupid,
1265
1266 .lim.max_rfer = le64toh(qli->max_rfer),
1267 .lim.max_excl = le64toh(qli->max_excl),
1268 .lim.rsv_rfer = le64toh(qli->rsv_rfer),
1269 .lim.rsv_excl = le64toh(qli->rsv_excl),
1270
1271 .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
1272 BTRFS_QGROUP_LIMIT_MAX_EXCL|
1273 BTRFS_QGROUP_LIMIT_RSV_RFER|
1274 BTRFS_QGROUP_LIMIT_RSV_EXCL),
1275 };
1276
1277 for (c = 0;; c++) {
1278 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
1279 if (errno == EBUSY && c < 10) {
1280 (void) btrfs_quota_scan_wait(fd);
1281 continue;
1282 }
1283 return -errno;
1284 }
1285
1286 break;
1287 }
1288
1289 return 1;
1290 }
1291
1292 /* Increase search key by one, to read the next item, if we can. */
1293 if (!btrfs_ioctl_search_args_inc(&args))
1294 break;
1295 }
1296
1297 return 0;
1298 }
1299
1300 static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
1301 _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
1302 bool copy_from_parent = false, insert_intermediary_qgroup = false;
1303 int n_old_qgroups, n_old_parent_qgroups, r;
1304 uint64_t old_parent_id;
1305
1306 assert(fd >= 0);
1307
1308 /* Copies a reduced form of quota information from the old to
1309 * the new subvolume. */
1310
1311 n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
1312 if (n_old_qgroups <= 0) /* Nothing to copy */
1313 return n_old_qgroups;
1314
1315 r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
1316 if (r == -ENXIO)
1317 /* We have no parent, hence nothing to copy. */
1318 n_old_parent_qgroups = 0;
1319 else if (r < 0)
1320 return r;
1321 else {
1322 n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
1323 if (n_old_parent_qgroups < 0)
1324 return n_old_parent_qgroups;
1325 }
1326
1327 for (int i = 0; i < n_old_qgroups; i++) {
1328 uint64_t id;
1329
1330 r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
1331 if (r < 0)
1332 return r;
1333
1334 if (id == old_subvol_id) {
1335 /* The old subvolume was member of a qgroup
1336 * that had the same id, but a different level
1337 * as it self. Let's set up something similar
1338 * in the destination. */
1339 insert_intermediary_qgroup = true;
1340 break;
1341 }
1342
1343 for (int j = 0; j < n_old_parent_qgroups; j++)
1344 if (old_parent_qgroups[j] == old_qgroups[i])
1345 /* The old subvolume shared a common
1346 * parent qgroup with its parent
1347 * subvolume. Let's set up something
1348 * similar in the destination. */
1349 copy_from_parent = true;
1350 }
1351
1352 if (!insert_intermediary_qgroup && !copy_from_parent)
1353 return 0;
1354
1355 return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
1356 }
1357
1358 static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
1359 uint64_t old_subtree_qgroup, new_subtree_qgroup;
1360 bool changed;
1361 int r;
1362
1363 /* First copy the leaf limits */
1364 r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
1365 if (r < 0)
1366 return r;
1367 changed = r > 0;
1368
1369 /* Then, try to copy the subtree limits, if there are any. */
1370 r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
1371 if (r < 0)
1372 return r;
1373 if (r == 0)
1374 return changed;
1375
1376 r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
1377 if (r < 0)
1378 return r;
1379 if (r == 0)
1380 return changed;
1381
1382 r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
1383 if (r != 0)
1384 return r;
1385
1386 return changed;
1387 }
1388
1389 static int subvol_snapshot_children(
1390 int old_fd,
1391 int new_fd,
1392 const char *subvolume,
1393 uint64_t old_subvol_id,
1394 BtrfsSnapshotFlags flags) {
1395
1396 struct btrfs_ioctl_search_args args = {
1397 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1398
1399 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1400 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1401
1402 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1403 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1404
1405 .key.min_transid = 0,
1406 .key.max_transid = UINT64_MAX,
1407 };
1408
1409 struct btrfs_ioctl_vol_args_v2 vol_args = {
1410 .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
1411 .fd = old_fd,
1412 };
1413 _cleanup_close_ int subvolume_fd = -1;
1414 uint64_t new_subvol_id;
1415 int r;
1416
1417 assert(old_fd >= 0);
1418 assert(new_fd >= 0);
1419 assert(subvolume);
1420
1421 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1422
1423 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
1424 return -errno;
1425
1426 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
1427 !(flags & BTRFS_SNAPSHOT_QUOTA))
1428 return 0;
1429
1430 if (old_subvol_id == 0) {
1431 r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
1432 if (r < 0)
1433 return r;
1434 }
1435
1436 r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
1437 if (r < 0)
1438 return r;
1439
1440 if (flags & BTRFS_SNAPSHOT_QUOTA)
1441 (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
1442
1443 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
1444
1445 if (flags & BTRFS_SNAPSHOT_QUOTA)
1446 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1447
1448 return 0;
1449 }
1450
1451 args.key.min_offset = args.key.max_offset = old_subvol_id;
1452
1453 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1454 const struct btrfs_ioctl_search_header *sh;
1455 unsigned i;
1456
1457 args.key.nr_items = 256;
1458 if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1459 return -errno;
1460
1461 if (args.key.nr_items <= 0)
1462 break;
1463
1464 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1465 _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
1466 const struct btrfs_root_ref *ref;
1467 _cleanup_close_ int old_child_fd = -1, new_child_fd = -1;
1468
1469 btrfs_ioctl_search_args_set(&args, sh);
1470
1471 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1472 continue;
1473
1474 /* Avoid finding the source subvolume a second
1475 * time */
1476 if (sh->offset != old_subvol_id)
1477 continue;
1478
1479 /* Avoid running into loops if the new
1480 * subvolume is below the old one. */
1481 if (sh->objectid == new_subvol_id)
1482 continue;
1483
1484 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1485 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1486 if (!p)
1487 return -ENOMEM;
1488
1489 struct btrfs_ioctl_ino_lookup_args ino_args = {
1490 .treeid = old_subvol_id,
1491 .objectid = htole64(ref->dirid),
1492 };
1493
1494 if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1495 return -errno;
1496
1497 c = path_join(ino_args.name, p);
1498 if (!c)
1499 return -ENOMEM;
1500
1501 old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1502 if (old_child_fd < 0)
1503 return -errno;
1504
1505 np = path_join(subvolume, ino_args.name);
1506 if (!np)
1507 return -ENOMEM;
1508
1509 new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1510 if (new_child_fd < 0)
1511 return -errno;
1512
1513 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1514 /* If the snapshot is read-only we
1515 * need to mark it writable
1516 * temporarily, to put the subsnapshot
1517 * into place. */
1518
1519 if (subvolume_fd < 0) {
1520 subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1521 if (subvolume_fd < 0)
1522 return -errno;
1523 }
1524
1525 r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
1526 if (r < 0)
1527 return r;
1528 }
1529
1530 /* When btrfs clones the subvolumes, child
1531 * subvolumes appear as empty directories. Remove
1532 * them, so that we can create a new snapshot
1533 * in their place */
1534 if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
1535 int k = -errno;
1536
1537 if (flags & BTRFS_SNAPSHOT_READ_ONLY)
1538 (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1539
1540 return k;
1541 }
1542
1543 r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
1544
1545 /* Restore the readonly flag */
1546 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1547 int k;
1548
1549 k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1550 if (r >= 0 && k < 0)
1551 return k;
1552 }
1553
1554 if (r < 0)
1555 return r;
1556 }
1557
1558 /* Increase search key by one, to read the next item, if we can. */
1559 if (!btrfs_ioctl_search_args_inc(&args))
1560 break;
1561 }
1562
1563 if (flags & BTRFS_SNAPSHOT_QUOTA)
1564 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1565
1566 return 0;
1567 }
1568
1569 int btrfs_subvol_snapshot_fd_full(
1570 int old_fd,
1571 const char *new_path,
1572 BtrfsSnapshotFlags flags,
1573 copy_progress_path_t progress_path,
1574 copy_progress_bytes_t progress_bytes,
1575 void *userdata) {
1576
1577 _cleanup_close_ int new_fd = -1;
1578 const char *subvolume;
1579 int r;
1580
1581 assert(old_fd >= 0);
1582 assert(new_path);
1583
1584 r = btrfs_is_subvol_fd(old_fd);
1585 if (r < 0)
1586 return r;
1587 if (r == 0) {
1588 bool plain_directory = false;
1589
1590 /* If the source isn't a proper subvolume, fail unless fallback is requested */
1591 if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
1592 return -EISDIR;
1593
1594 r = btrfs_subvol_make(new_path);
1595 if (ERRNO_IS_NOT_SUPPORTED(r) && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
1596 /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
1597 if (mkdir(new_path, 0755) < 0)
1598 return -errno;
1599
1600 plain_directory = true;
1601 } else if (r < 0)
1602 return r;
1603
1604 r = copy_directory_fd_full(
1605 old_fd, new_path,
1606 COPY_MERGE_EMPTY|
1607 COPY_REFLINK|
1608 COPY_SAME_MOUNT|
1609 COPY_HARDLINKS|
1610 COPY_ALL_XATTRS|
1611 (FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGINT) ? COPY_SIGINT : 0)|
1612 (FLAGS_SET(flags, BTRFS_SNAPSHOT_SIGTERM) ? COPY_SIGTERM : 0),
1613 progress_path,
1614 progress_bytes,
1615 userdata);
1616 if (r < 0)
1617 goto fallback_fail;
1618
1619 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1620
1621 if (plain_directory) {
1622 /* Plain directories have no recursive read-only flag, but something pretty close to
1623 * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
1624
1625 if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
1626 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL, NULL);
1627 } else {
1628 r = btrfs_subvol_set_read_only(new_path, true);
1629 if (r < 0)
1630 goto fallback_fail;
1631 }
1632 }
1633
1634 return 0;
1635
1636 fallback_fail:
1637 (void) rm_rf(new_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
1638 return r;
1639 }
1640
1641 r = extract_subvolume_name(new_path, &subvolume);
1642 if (r < 0)
1643 return r;
1644
1645 new_fd = open_parent(new_path, O_CLOEXEC, 0);
1646 if (new_fd < 0)
1647 return new_fd;
1648
1649 return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
1650 }
1651
1652 int btrfs_subvol_snapshot_full(
1653 const char *old_path,
1654 const char *new_path,
1655 BtrfsSnapshotFlags flags,
1656 copy_progress_path_t progress_path,
1657 copy_progress_bytes_t progress_bytes,
1658 void *userdata) {
1659
1660 _cleanup_close_ int old_fd = -1;
1661
1662 assert(old_path);
1663 assert(new_path);
1664
1665 old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1666 if (old_fd < 0)
1667 return -errno;
1668
1669 return btrfs_subvol_snapshot_fd_full(old_fd, new_path, flags, progress_path, progress_bytes, userdata);
1670 }
1671
1672 int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
1673
1674 struct btrfs_ioctl_search_args args = {
1675 /* Tree of quota items */
1676 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1677
1678 /* Look precisely for the quota relation items */
1679 .key.min_type = BTRFS_QGROUP_RELATION_KEY,
1680 .key.max_type = BTRFS_QGROUP_RELATION_KEY,
1681
1682 /* No restrictions on the other components */
1683 .key.min_offset = 0,
1684 .key.max_offset = UINT64_MAX,
1685
1686 .key.min_transid = 0,
1687 .key.max_transid = UINT64_MAX,
1688 };
1689
1690 _cleanup_free_ uint64_t *items = NULL;
1691 size_t n_items = 0;
1692 int r;
1693
1694 assert(fd >= 0);
1695 assert(ret);
1696
1697 if (qgroupid == 0) {
1698 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
1699 if (r < 0)
1700 return r;
1701 } else {
1702 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
1703 if (r < 0)
1704 return r;
1705 if (!r)
1706 return -ENOTTY;
1707 }
1708
1709 args.key.min_objectid = args.key.max_objectid = qgroupid;
1710
1711 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1712 const struct btrfs_ioctl_search_header *sh;
1713 unsigned i;
1714
1715 args.key.nr_items = 256;
1716 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1717 if (errno == ENOENT) /* quota tree missing: quota is disabled */
1718 break;
1719
1720 return -errno;
1721 }
1722
1723 if (args.key.nr_items <= 0)
1724 break;
1725
1726 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1727
1728 /* Make sure we start the next search at least from this entry */
1729 btrfs_ioctl_search_args_set(&args, sh);
1730
1731 if (sh->type != BTRFS_QGROUP_RELATION_KEY)
1732 continue;
1733 if (sh->offset < sh->objectid)
1734 continue;
1735 if (sh->objectid != qgroupid)
1736 continue;
1737
1738 if (!GREEDY_REALLOC(items, n_items+1))
1739 return -ENOMEM;
1740
1741 items[n_items++] = sh->offset;
1742 }
1743
1744 /* Increase search key by one, to read the next item, if we can. */
1745 if (!btrfs_ioctl_search_args_inc(&args))
1746 break;
1747 }
1748
1749 if (n_items <= 0) {
1750 *ret = NULL;
1751 return 0;
1752 }
1753
1754 *ret = TAKE_PTR(items);
1755
1756 return (int) n_items;
1757 }
1758
1759 int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
1760 _cleanup_free_ uint64_t *qgroups = NULL;
1761 uint64_t parent_subvol;
1762 bool changed = false;
1763 int n = 0, r;
1764
1765 assert(fd >= 0);
1766
1767 /*
1768 * Sets up the specified subvolume's qgroup automatically in
1769 * one of two ways:
1770 *
1771 * If insert_intermediary_qgroup is false, the subvolume's
1772 * leaf qgroup will be assigned to the same parent qgroups as
1773 * the subvolume's parent subvolume.
1774 *
1775 * If insert_intermediary_qgroup is true a new intermediary
1776 * higher-level qgroup is created, with a higher level number,
1777 * but reusing the id of the subvolume. The level number is
1778 * picked as one smaller than the lowest level qgroup the
1779 * parent subvolume is a member of. If the parent subvolume's
1780 * leaf qgroup is assigned to no higher-level qgroup a new
1781 * qgroup of level 255 is created instead. Either way, the new
1782 * qgroup is then assigned to the parent's higher-level
1783 * qgroup, and the subvolume itself is assigned to it.
1784 *
1785 * If the subvolume is already assigned to a higher level
1786 * qgroup, no operation is executed.
1787 *
1788 * Effectively this means: regardless if
1789 * insert_intermediary_qgroup is true or not, after this
1790 * function is invoked the subvolume will be accounted within
1791 * the same qgroups as the parent. However, if it is true, it
1792 * will also get its own higher-level qgroup, which may in
1793 * turn be used by subvolumes created beneath this subvolume
1794 * later on.
1795 *
1796 * This hence defines a simple default qgroup setup for
1797 * subvolumes, as long as this function is invoked on each
1798 * created subvolume: each subvolume is always accounting
1799 * together with its immediate parents. Optionally, if
1800 * insert_intermediary_qgroup is true, it will also get a
1801 * qgroup that then includes all its own child subvolumes.
1802 */
1803
1804 if (subvol_id == 0) {
1805 r = btrfs_is_subvol_fd(fd);
1806 if (r < 0)
1807 return r;
1808 if (!r)
1809 return -ENOTTY;
1810
1811 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1812 if (r < 0)
1813 return r;
1814 }
1815
1816 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
1817 if (n < 0)
1818 return n;
1819 if (n > 0) /* already parent qgroups set up, let's bail */
1820 return 0;
1821
1822 qgroups = mfree(qgroups);
1823
1824 r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
1825 if (r == -ENXIO)
1826 /* No parent, hence no qgroup memberships */
1827 n = 0;
1828 else if (r < 0)
1829 return r;
1830 else {
1831 n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
1832 if (n < 0)
1833 return n;
1834 }
1835
1836 if (insert_intermediary_qgroup) {
1837 uint64_t lowest = 256, new_qgroupid;
1838 bool created = false;
1839
1840 /* Determine the lowest qgroup that the parent
1841 * subvolume is assigned to. */
1842
1843 for (int i = 0; i < n; i++) {
1844 uint64_t level;
1845
1846 r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
1847 if (r < 0)
1848 return r;
1849
1850 if (level < lowest)
1851 lowest = level;
1852 }
1853
1854 if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1855 return -EBUSY;
1856
1857 r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
1858 if (r < 0)
1859 return r;
1860
1861 /* Create the new intermediary group, unless it already exists */
1862 r = btrfs_qgroup_create(fd, new_qgroupid);
1863 if (r < 0 && r != -EEXIST)
1864 return r;
1865 if (r >= 0)
1866 changed = created = true;
1867
1868 for (int i = 0; i < n; i++) {
1869 r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
1870 if (r < 0 && r != -EEXIST) {
1871 if (created)
1872 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1873
1874 return r;
1875 }
1876 if (r >= 0)
1877 changed = true;
1878 }
1879
1880 r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
1881 if (r < 0 && r != -EEXIST) {
1882 if (created)
1883 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1884 return r;
1885 }
1886 if (r >= 0)
1887 changed = true;
1888
1889 } else {
1890 int i;
1891
1892 /* Assign our subvolume to all the same qgroups as the parent */
1893
1894 for (i = 0; i < n; i++) {
1895 r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
1896 if (r < 0 && r != -EEXIST)
1897 return r;
1898 if (r >= 0)
1899 changed = true;
1900 }
1901 }
1902
1903 return changed;
1904 }
1905
1906 int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
1907 _cleanup_close_ int fd = -1;
1908
1909 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1910 if (fd < 0)
1911 return -errno;
1912
1913 return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
1914 }
1915
1916 int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
1917
1918 struct btrfs_ioctl_search_args args = {
1919 /* Tree of tree roots */
1920 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1921
1922 /* Look precisely for the subvolume items */
1923 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1924 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1925
1926 /* No restrictions on the other components */
1927 .key.min_offset = 0,
1928 .key.max_offset = UINT64_MAX,
1929
1930 .key.min_transid = 0,
1931 .key.max_transid = UINT64_MAX,
1932 };
1933 int r;
1934
1935 assert(fd >= 0);
1936 assert(ret);
1937
1938 if (subvol_id == 0) {
1939 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1940 if (r < 0)
1941 return r;
1942 } else {
1943 r = fd_is_fs_type(fd, BTRFS_SUPER_MAGIC);
1944 if (r < 0)
1945 return r;
1946 if (!r)
1947 return -ENOTTY;
1948 }
1949
1950 args.key.min_objectid = args.key.max_objectid = subvol_id;
1951
1952 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1953 const struct btrfs_ioctl_search_header *sh;
1954 unsigned i;
1955
1956 args.key.nr_items = 256;
1957 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1958 return negative_errno();
1959
1960 if (args.key.nr_items <= 0)
1961 break;
1962
1963 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1964
1965 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1966 continue;
1967 if (sh->objectid != subvol_id)
1968 continue;
1969
1970 *ret = sh->offset;
1971 return 0;
1972 }
1973 }
1974
1975 return -ENXIO;
1976 }