]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/btrfs-util.c
Merge pull request #7675 from shawnl/unaligned
[thirdparty/systemd.git] / src / basic / btrfs-util.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2014 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <errno.h>
22 #include <fcntl.h>
23 #include <inttypes.h>
24 #include <linux/fs.h>
25 #include <linux/loop.h>
26 #include <stddef.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/ioctl.h>
31 #include <sys/stat.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 #include <unistd.h>
35
36 #if HAVE_LINUX_BTRFS_H
37 #include <linux/btrfs.h>
38 #endif
39
40 #include "alloc-util.h"
41 #include "blockdev-util.h"
42 #include "btrfs-ctree.h"
43 #include "btrfs-util.h"
44 #include "chattr-util.h"
45 #include "copy.h"
46 #include "device-nodes.h"
47 #include "fd-util.h"
48 #include "fileio.h"
49 #include "io-util.h"
50 #include "macro.h"
51 #include "missing.h"
52 #include "path-util.h"
53 #include "rm-rf.h"
54 #include "smack-util.h"
55 #include "sparse-endian.h"
56 #include "stat-util.h"
57 #include "string-util.h"
58 #include "time-util.h"
59 #include "util.h"
60
61 /* WARNING: Be careful with file system ioctls! When we get an fd, we
62 * need to make sure it either refers to only a regular file or
63 * directory, or that it is located on btrfs, before invoking any
64 * btrfs ioctls. The ioctl numbers are reused by some device drivers
65 * (such as DRM), and hence might have bad effects when invoked on
66 * device nodes (that reference drivers) rather than fds to normal
67 * files or directories. */
68
69 static int validate_subvolume_name(const char *name) {
70
71 if (!filename_is_valid(name))
72 return -EINVAL;
73
74 if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
75 return -E2BIG;
76
77 return 0;
78 }
79
80 static int open_parent(const char *path, int flags) {
81 _cleanup_free_ char *parent = NULL;
82 int fd;
83
84 assert(path);
85
86 parent = dirname_malloc(path);
87 if (!parent)
88 return -ENOMEM;
89
90 fd = open(parent, flags);
91 if (fd < 0)
92 return -errno;
93
94 return fd;
95 }
96
97 static int extract_subvolume_name(const char *path, const char **subvolume) {
98 const char *fn;
99 int r;
100
101 assert(path);
102 assert(subvolume);
103
104 fn = basename(path);
105
106 r = validate_subvolume_name(fn);
107 if (r < 0)
108 return r;
109
110 *subvolume = fn;
111 return 0;
112 }
113
114 int btrfs_is_filesystem(int fd) {
115 struct statfs sfs;
116
117 assert(fd >= 0);
118
119 if (fstatfs(fd, &sfs) < 0)
120 return -errno;
121
122 return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
123 }
124
125 int btrfs_is_subvol_fd(int fd) {
126 struct stat st;
127
128 assert(fd >= 0);
129
130 /* On btrfs subvolumes always have the inode 256 */
131
132 if (fstat(fd, &st) < 0)
133 return -errno;
134
135 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
136 return 0;
137
138 return btrfs_is_filesystem(fd);
139 }
140
141 int btrfs_is_subvol(const char *path) {
142 _cleanup_close_ int fd = -1;
143
144 assert(path);
145
146 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
147 if (fd < 0)
148 return -errno;
149
150 return btrfs_is_subvol_fd(fd);
151 }
152
153 int btrfs_subvol_make(const char *path) {
154 struct btrfs_ioctl_vol_args args = {};
155 _cleanup_close_ int fd = -1;
156 const char *subvolume;
157 int r;
158
159 assert(path);
160
161 r = extract_subvolume_name(path, &subvolume);
162 if (r < 0)
163 return r;
164
165 fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
166 if (fd < 0)
167 return fd;
168
169 strncpy(args.name, subvolume, sizeof(args.name)-1);
170
171 if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
172 return -errno;
173
174 return 0;
175 }
176
177 int btrfs_subvol_set_read_only_fd(int fd, bool b) {
178 uint64_t flags, nflags;
179 struct stat st;
180
181 assert(fd >= 0);
182
183 if (fstat(fd, &st) < 0)
184 return -errno;
185
186 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
187 return -EINVAL;
188
189 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
190 return -errno;
191
192 if (b)
193 nflags = flags | BTRFS_SUBVOL_RDONLY;
194 else
195 nflags = flags & ~BTRFS_SUBVOL_RDONLY;
196
197 if (flags == nflags)
198 return 0;
199
200 if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
201 return -errno;
202
203 return 0;
204 }
205
206 int btrfs_subvol_set_read_only(const char *path, bool b) {
207 _cleanup_close_ int fd = -1;
208
209 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
210 if (fd < 0)
211 return -errno;
212
213 return btrfs_subvol_set_read_only_fd(fd, b);
214 }
215
216 int btrfs_subvol_get_read_only_fd(int fd) {
217 uint64_t flags;
218 struct stat st;
219
220 assert(fd >= 0);
221
222 if (fstat(fd, &st) < 0)
223 return -errno;
224
225 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
226 return -EINVAL;
227
228 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
229 return -errno;
230
231 return !!(flags & BTRFS_SUBVOL_RDONLY);
232 }
233
234 int btrfs_reflink(int infd, int outfd) {
235 struct stat st;
236 int r;
237
238 assert(infd >= 0);
239 assert(outfd >= 0);
240
241 /* Make sure we invoke the ioctl on a regular file, so that no
242 * device driver accidentally gets it. */
243
244 if (fstat(outfd, &st) < 0)
245 return -errno;
246
247 if (!S_ISREG(st.st_mode))
248 return -EINVAL;
249
250 r = ioctl(outfd, BTRFS_IOC_CLONE, infd);
251 if (r < 0)
252 return -errno;
253
254 return 0;
255 }
256
257 int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
258 struct btrfs_ioctl_clone_range_args args = {
259 .src_fd = infd,
260 .src_offset = in_offset,
261 .src_length = sz,
262 .dest_offset = out_offset,
263 };
264 struct stat st;
265 int r;
266
267 assert(infd >= 0);
268 assert(outfd >= 0);
269 assert(sz > 0);
270
271 if (fstat(outfd, &st) < 0)
272 return -errno;
273
274 if (!S_ISREG(st.st_mode))
275 return -EINVAL;
276
277 r = ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args);
278 if (r < 0)
279 return -errno;
280
281 return 0;
282 }
283
284 int btrfs_get_block_device_fd(int fd, dev_t *dev) {
285 struct btrfs_ioctl_fs_info_args fsi = {};
286 uint64_t id;
287 int r;
288
289 assert(fd >= 0);
290 assert(dev);
291
292 r = btrfs_is_filesystem(fd);
293 if (r < 0)
294 return r;
295 if (!r)
296 return -ENOTTY;
297
298 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
299 return -errno;
300
301 /* We won't do this for btrfs RAID */
302 if (fsi.num_devices != 1)
303 return 0;
304
305 for (id = 1; id <= fsi.max_id; id++) {
306 struct btrfs_ioctl_dev_info_args di = {
307 .devid = id,
308 };
309 struct stat st;
310
311 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
312 if (errno == ENODEV)
313 continue;
314
315 return -errno;
316 }
317
318 if (stat((char*) di.path, &st) < 0)
319 return -errno;
320
321 if (!S_ISBLK(st.st_mode))
322 return -ENODEV;
323
324 if (major(st.st_rdev) == 0)
325 return -ENODEV;
326
327 *dev = st.st_rdev;
328 return 1;
329 }
330
331 return -ENODEV;
332 }
333
334 int btrfs_get_block_device(const char *path, dev_t *dev) {
335 _cleanup_close_ int fd = -1;
336
337 assert(path);
338 assert(dev);
339
340 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
341 if (fd < 0)
342 return -errno;
343
344 return btrfs_get_block_device_fd(fd, dev);
345 }
346
347 int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
348 struct btrfs_ioctl_ino_lookup_args args = {
349 .objectid = BTRFS_FIRST_FREE_OBJECTID
350 };
351 int r;
352
353 assert(fd >= 0);
354 assert(ret);
355
356 r = btrfs_is_filesystem(fd);
357 if (r < 0)
358 return r;
359 if (!r)
360 return -ENOTTY;
361
362 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
363 return -errno;
364
365 *ret = args.treeid;
366 return 0;
367 }
368
369 int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
370 _cleanup_close_ int subvol_fd = -1;
371
372 assert(fd >= 0);
373 assert(ret);
374
375 subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
376 if (subvol_fd < 0)
377 return -errno;
378
379 return btrfs_subvol_get_id_fd(subvol_fd, ret);
380 }
381
382 static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
383 assert(args);
384
385 /* the objectid, type, offset together make up the btrfs key,
386 * which is considered a single 136byte integer when
387 * comparing. This call increases the counter by one, dealing
388 * with the overflow between the overflows */
389
390 if (args->key.min_offset < (uint64_t) -1) {
391 args->key.min_offset++;
392 return true;
393 }
394
395 if (args->key.min_type < (uint8_t) -1) {
396 args->key.min_type++;
397 args->key.min_offset = 0;
398 return true;
399 }
400
401 if (args->key.min_objectid < (uint64_t) -1) {
402 args->key.min_objectid++;
403 args->key.min_offset = 0;
404 args->key.min_type = 0;
405 return true;
406 }
407
408 return 0;
409 }
410
411 static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
412 assert(args);
413 assert(h);
414
415 args->key.min_objectid = h->objectid;
416 args->key.min_type = h->type;
417 args->key.min_offset = h->offset;
418 }
419
420 static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
421 assert(args);
422
423 /* Compare min and max */
424
425 if (args->key.min_objectid < args->key.max_objectid)
426 return -1;
427 if (args->key.min_objectid > args->key.max_objectid)
428 return 1;
429
430 if (args->key.min_type < args->key.max_type)
431 return -1;
432 if (args->key.min_type > args->key.max_type)
433 return 1;
434
435 if (args->key.min_offset < args->key.max_offset)
436 return -1;
437 if (args->key.min_offset > args->key.max_offset)
438 return 1;
439
440 return 0;
441 }
442
443 #define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
444 for ((i) = 0, \
445 (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
446 (i) < (args).key.nr_items; \
447 (i)++, \
448 (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
449
450 #define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
451 ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
452
453 int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
454 struct btrfs_ioctl_search_args args = {
455 /* Tree of tree roots */
456 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
457
458 /* Look precisely for the subvolume items */
459 .key.min_type = BTRFS_ROOT_ITEM_KEY,
460 .key.max_type = BTRFS_ROOT_ITEM_KEY,
461
462 .key.min_offset = 0,
463 .key.max_offset = (uint64_t) -1,
464
465 /* No restrictions on the other components */
466 .key.min_transid = 0,
467 .key.max_transid = (uint64_t) -1,
468 };
469
470 bool found = false;
471 int r;
472
473 assert(fd >= 0);
474 assert(ret);
475
476 if (subvol_id == 0) {
477 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
478 if (r < 0)
479 return r;
480 } else {
481 r = btrfs_is_filesystem(fd);
482 if (r < 0)
483 return r;
484 if (!r)
485 return -ENOTTY;
486 }
487
488 args.key.min_objectid = args.key.max_objectid = subvol_id;
489
490 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
491 const struct btrfs_ioctl_search_header *sh;
492 unsigned i;
493
494 args.key.nr_items = 256;
495 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
496 return -errno;
497
498 if (args.key.nr_items <= 0)
499 break;
500
501 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
502
503 const struct btrfs_root_item *ri;
504
505 /* Make sure we start the next search at least from this entry */
506 btrfs_ioctl_search_args_set(&args, sh);
507
508 if (sh->objectid != subvol_id)
509 continue;
510 if (sh->type != BTRFS_ROOT_ITEM_KEY)
511 continue;
512
513 /* Older versions of the struct lacked the otime setting */
514 if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
515 continue;
516
517 ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
518
519 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
520 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
521
522 ret->subvol_id = subvol_id;
523 ret->read_only = !!(le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY);
524
525 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
526 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
527 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
528
529 found = true;
530 goto finish;
531 }
532
533 /* Increase search key by one, to read the next item, if we can. */
534 if (!btrfs_ioctl_search_args_inc(&args))
535 break;
536 }
537
538 finish:
539 if (!found)
540 return -ENODATA;
541
542 return 0;
543 }
544
545 int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
546
547 struct btrfs_ioctl_search_args args = {
548 /* Tree of quota items */
549 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
550
551 /* The object ID is always 0 */
552 .key.min_objectid = 0,
553 .key.max_objectid = 0,
554
555 /* Look precisely for the quota items */
556 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
557 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
558
559 /* No restrictions on the other components */
560 .key.min_transid = 0,
561 .key.max_transid = (uint64_t) -1,
562 };
563
564 bool found_info = false, found_limit = false;
565 int r;
566
567 assert(fd >= 0);
568 assert(ret);
569
570 if (qgroupid == 0) {
571 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
572 if (r < 0)
573 return r;
574 } else {
575 r = btrfs_is_filesystem(fd);
576 if (r < 0)
577 return r;
578 if (!r)
579 return -ENOTTY;
580 }
581
582 args.key.min_offset = args.key.max_offset = qgroupid;
583
584 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
585 const struct btrfs_ioctl_search_header *sh;
586 unsigned i;
587
588 args.key.nr_items = 256;
589 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
590 if (errno == ENOENT) /* quota tree is missing: quota disabled */
591 break;
592
593 return -errno;
594 }
595
596 if (args.key.nr_items <= 0)
597 break;
598
599 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
600
601 /* Make sure we start the next search at least from this entry */
602 btrfs_ioctl_search_args_set(&args, sh);
603
604 if (sh->objectid != 0)
605 continue;
606 if (sh->offset != qgroupid)
607 continue;
608
609 if (sh->type == BTRFS_QGROUP_INFO_KEY) {
610 const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
611
612 ret->referenced = le64toh(qii->rfer);
613 ret->exclusive = le64toh(qii->excl);
614
615 found_info = true;
616
617 } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
618 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
619
620 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
621 ret->referenced_max = le64toh(qli->max_rfer);
622 else
623 ret->referenced_max = (uint64_t) -1;
624
625 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
626 ret->exclusive_max = le64toh(qli->max_excl);
627 else
628 ret->exclusive_max = (uint64_t) -1;
629
630 found_limit = true;
631 }
632
633 if (found_info && found_limit)
634 goto finish;
635 }
636
637 /* Increase search key by one, to read the next item, if we can. */
638 if (!btrfs_ioctl_search_args_inc(&args))
639 break;
640 }
641
642 finish:
643 if (!found_limit && !found_info)
644 return -ENODATA;
645
646 if (!found_info) {
647 ret->referenced = (uint64_t) -1;
648 ret->exclusive = (uint64_t) -1;
649 }
650
651 if (!found_limit) {
652 ret->referenced_max = (uint64_t) -1;
653 ret->exclusive_max = (uint64_t) -1;
654 }
655
656 return 0;
657 }
658
659 int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
660 _cleanup_close_ int fd = -1;
661
662 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
663 if (fd < 0)
664 return -errno;
665
666 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
667 }
668
669 int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
670 uint64_t level, lowest = (uint64_t) -1, lowest_qgroupid = 0;
671 _cleanup_free_ uint64_t *qgroups = NULL;
672 int r, n, i;
673
674 assert(fd >= 0);
675 assert(ret);
676
677 /* This finds the "subtree" qgroup for a specific
678 * subvolume. This only works for subvolumes that have been
679 * prepared with btrfs_subvol_auto_qgroup_fd() with
680 * insert_intermediary_qgroup=true (or equivalent). For others
681 * it will return the leaf qgroup instead. The two cases may
682 * be distuingished via the return value, which is 1 in case
683 * an appropriate "subtree" qgroup was found, and 0
684 * otherwise. */
685
686 if (subvol_id == 0) {
687 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
688 if (r < 0)
689 return r;
690 }
691
692 r = btrfs_qgroupid_split(subvol_id, &level, NULL);
693 if (r < 0)
694 return r;
695 if (level != 0) /* Input must be a leaf qgroup */
696 return -EINVAL;
697
698 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
699 if (n < 0)
700 return n;
701
702 for (i = 0; i < n; i++) {
703 uint64_t id;
704
705 r = btrfs_qgroupid_split(qgroups[i], &level, &id);
706 if (r < 0)
707 return r;
708
709 if (id != subvol_id)
710 continue;
711
712 if (lowest == (uint64_t) -1 || level < lowest) {
713 lowest_qgroupid = qgroups[i];
714 lowest = level;
715 }
716 }
717
718 if (lowest == (uint64_t) -1) {
719 /* No suitable higher-level qgroup found, let's return
720 * the leaf qgroup instead, and indicate that with the
721 * return value. */
722
723 *ret = subvol_id;
724 return 0;
725 }
726
727 *ret = lowest_qgroupid;
728 return 1;
729 }
730
731 int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
732 uint64_t qgroupid;
733 int r;
734
735 assert(fd >= 0);
736 assert(ret);
737
738 /* This determines the quota data of the qgroup with the
739 * lowest level, that shares the id part with the specified
740 * subvolume. This is useful for determining the quota data
741 * for entire subvolume subtrees, as long as the subtrees have
742 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
743 * compatible way */
744
745 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
746 if (r < 0)
747 return r;
748
749 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
750 }
751
752 int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
753 _cleanup_close_ int fd = -1;
754
755 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
756 if (fd < 0)
757 return -errno;
758
759 return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
760 }
761
762 int btrfs_defrag_fd(int fd) {
763 struct stat st;
764
765 assert(fd >= 0);
766
767 if (fstat(fd, &st) < 0)
768 return -errno;
769
770 if (!S_ISREG(st.st_mode))
771 return -EINVAL;
772
773 if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0)
774 return -errno;
775
776 return 0;
777 }
778
779 int btrfs_defrag(const char *p) {
780 _cleanup_close_ int fd = -1;
781
782 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
783 if (fd < 0)
784 return -errno;
785
786 return btrfs_defrag_fd(fd);
787 }
788
789 int btrfs_quota_enable_fd(int fd, bool b) {
790 struct btrfs_ioctl_quota_ctl_args args = {
791 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
792 };
793 int r;
794
795 assert(fd >= 0);
796
797 r = btrfs_is_filesystem(fd);
798 if (r < 0)
799 return r;
800 if (!r)
801 return -ENOTTY;
802
803 if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0)
804 return -errno;
805
806 return 0;
807 }
808
809 int btrfs_quota_enable(const char *path, bool b) {
810 _cleanup_close_ int fd = -1;
811
812 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
813 if (fd < 0)
814 return -errno;
815
816 return btrfs_quota_enable_fd(fd, b);
817 }
818
819 int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
820
821 struct btrfs_ioctl_qgroup_limit_args args = {
822 .lim.max_rfer = referenced_max,
823 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
824 };
825 unsigned c;
826 int r;
827
828 assert(fd >= 0);
829
830 if (qgroupid == 0) {
831 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
832 if (r < 0)
833 return r;
834 } else {
835 r = btrfs_is_filesystem(fd);
836 if (r < 0)
837 return r;
838 if (!r)
839 return -ENOTTY;
840 }
841
842 args.qgroupid = qgroupid;
843
844 for (c = 0;; c++) {
845 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
846
847 if (errno == EBUSY && c < 10) {
848 (void) btrfs_quota_scan_wait(fd);
849 continue;
850 }
851
852 return -errno;
853 }
854
855 break;
856 }
857
858 return 0;
859 }
860
861 int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
862 _cleanup_close_ int fd = -1;
863
864 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
865 if (fd < 0)
866 return -errno;
867
868 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
869 }
870
871 int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
872 uint64_t qgroupid;
873 int r;
874
875 assert(fd >= 0);
876
877 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
878 if (r < 0)
879 return r;
880
881 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
882 }
883
884 int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
885 _cleanup_close_ int fd = -1;
886
887 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
888 if (fd < 0)
889 return -errno;
890
891 return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
892 }
893
894 int btrfs_resize_loopback_fd(int fd, uint64_t new_size, bool grow_only) {
895 struct btrfs_ioctl_vol_args args = {};
896 char p[SYS_BLOCK_PATH_MAX("/loop/backing_file")];
897 _cleanup_free_ char *backing = NULL;
898 _cleanup_close_ int loop_fd = -1, backing_fd = -1;
899 struct stat st;
900 dev_t dev = 0;
901 int r;
902
903 /* In contrast to btrfs quota ioctls ftruncate() cannot make sense of "infinity" or file sizes > 2^31 */
904 if (!FILE_SIZE_VALID(new_size))
905 return -EINVAL;
906
907 /* btrfs cannot handle file systems < 16M, hence use this as minimum */
908 if (new_size < 16*1024*1024)
909 new_size = 16*1024*1024;
910
911 r = btrfs_get_block_device_fd(fd, &dev);
912 if (r < 0)
913 return r;
914 if (r == 0)
915 return -ENODEV;
916
917 xsprintf_sys_block_path(p, "/loop/backing_file", dev);
918 r = read_one_line_file(p, &backing);
919 if (r == -ENOENT)
920 return -ENODEV;
921 if (r < 0)
922 return r;
923 if (isempty(backing) || !path_is_absolute(backing))
924 return -ENODEV;
925
926 backing_fd = open(backing, O_RDWR|O_CLOEXEC|O_NOCTTY);
927 if (backing_fd < 0)
928 return -errno;
929
930 if (fstat(backing_fd, &st) < 0)
931 return -errno;
932 if (!S_ISREG(st.st_mode))
933 return -ENODEV;
934
935 if (new_size == (uint64_t) st.st_size)
936 return 0;
937
938 if (grow_only && new_size < (uint64_t) st.st_size)
939 return -EINVAL;
940
941 xsprintf_sys_block_path(p, NULL, dev);
942 loop_fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY);
943 if (loop_fd < 0)
944 return -errno;
945
946 if (snprintf(args.name, sizeof(args.name), "%" PRIu64, new_size) >= (int) sizeof(args.name))
947 return -EINVAL;
948
949 if (new_size < (uint64_t) st.st_size) {
950 /* Decrease size: first decrease btrfs size, then shorten loopback */
951 if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
952 return -errno;
953 }
954
955 if (ftruncate(backing_fd, new_size) < 0)
956 return -errno;
957
958 if (ioctl(loop_fd, LOOP_SET_CAPACITY, 0) < 0)
959 return -errno;
960
961 if (new_size > (uint64_t) st.st_size) {
962 /* Increase size: first enlarge loopback, then increase btrfs size */
963 if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
964 return -errno;
965 }
966
967 /* Make sure the free disk space is correctly updated for both file systems */
968 (void) fsync(fd);
969 (void) fsync(backing_fd);
970
971 return 1;
972 }
973
974 int btrfs_resize_loopback(const char *p, uint64_t new_size, bool grow_only) {
975 _cleanup_close_ int fd = -1;
976
977 fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
978 if (fd < 0)
979 return -errno;
980
981 return btrfs_resize_loopback_fd(fd, new_size, grow_only);
982 }
983
984 int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
985 assert(ret);
986
987 if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
988 return -EINVAL;
989
990 if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
991 return -EINVAL;
992
993 *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
994 return 0;
995 }
996
997 int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
998 assert(level || id);
999
1000 if (level)
1001 *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
1002
1003 if (id)
1004 *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
1005
1006 return 0;
1007 }
1008
1009 static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
1010
1011 struct btrfs_ioctl_qgroup_create_args args = {
1012 .create = b,
1013 .qgroupid = qgroupid,
1014 };
1015 unsigned c;
1016 int r;
1017
1018 r = btrfs_is_filesystem(fd);
1019 if (r < 0)
1020 return r;
1021 if (r == 0)
1022 return -ENOTTY;
1023
1024 for (c = 0;; c++) {
1025 if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
1026
1027 /* If quota is not enabled, we get EINVAL. Turn this into a recognizable error */
1028 if (errno == EINVAL)
1029 return -ENOPROTOOPT;
1030
1031 if (errno == EBUSY && c < 10) {
1032 (void) btrfs_quota_scan_wait(fd);
1033 continue;
1034 }
1035
1036 return -errno;
1037 }
1038
1039 break;
1040 }
1041
1042 return 0;
1043 }
1044
1045 int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
1046 return qgroup_create_or_destroy(fd, true, qgroupid);
1047 }
1048
1049 int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
1050 return qgroup_create_or_destroy(fd, false, qgroupid);
1051 }
1052
1053 int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
1054 _cleanup_free_ uint64_t *qgroups = NULL;
1055 uint64_t subvol_id;
1056 int i, n, r;
1057
1058 /* Destroys the specified qgroup, but unassigns it from all
1059 * its parents first. Also, it recursively destroys all
1060 * qgroups it is assgined to that have the same id part of the
1061 * qgroupid as the specified group. */
1062
1063 r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
1064 if (r < 0)
1065 return r;
1066
1067 n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
1068 if (n < 0)
1069 return n;
1070
1071 for (i = 0; i < n; i++) {
1072 uint64_t id;
1073
1074 r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
1075 if (r < 0)
1076 return r;
1077
1078 r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
1079 if (r < 0)
1080 return r;
1081
1082 if (id != subvol_id)
1083 continue;
1084
1085 /* The parent qgroupid shares the same id part with
1086 * us? If so, destroy it too. */
1087
1088 (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
1089 }
1090
1091 return btrfs_qgroup_destroy(fd, qgroupid);
1092 }
1093
1094 int btrfs_quota_scan_start(int fd) {
1095 struct btrfs_ioctl_quota_rescan_args args = {};
1096
1097 assert(fd >= 0);
1098
1099 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0)
1100 return -errno;
1101
1102 return 0;
1103 }
1104
1105 int btrfs_quota_scan_wait(int fd) {
1106 assert(fd >= 0);
1107
1108 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0)
1109 return -errno;
1110
1111 return 0;
1112 }
1113
1114 int btrfs_quota_scan_ongoing(int fd) {
1115 struct btrfs_ioctl_quota_rescan_args args = {};
1116
1117 assert(fd >= 0);
1118
1119 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
1120 return -errno;
1121
1122 return !!args.flags;
1123 }
1124
1125 static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
1126 struct btrfs_ioctl_qgroup_assign_args args = {
1127 .assign = b,
1128 .src = child,
1129 .dst = parent,
1130 };
1131 unsigned c;
1132 int r;
1133
1134 r = btrfs_is_filesystem(fd);
1135 if (r < 0)
1136 return r;
1137 if (r == 0)
1138 return -ENOTTY;
1139
1140 for (c = 0;; c++) {
1141 r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
1142 if (r < 0) {
1143 if (errno == EBUSY && c < 10) {
1144 (void) btrfs_quota_scan_wait(fd);
1145 continue;
1146 }
1147
1148 return -errno;
1149 }
1150
1151 if (r == 0)
1152 return 0;
1153
1154 /* If the return value is > 0, we need to request a rescan */
1155
1156 (void) btrfs_quota_scan_start(fd);
1157 return 1;
1158 }
1159 }
1160
1161 int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
1162 return qgroup_assign_or_unassign(fd, true, child, parent);
1163 }
1164
1165 int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
1166 return qgroup_assign_or_unassign(fd, false, child, parent);
1167 }
1168
1169 static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
1170 struct btrfs_ioctl_search_args args = {
1171 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1172
1173 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1174 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1175
1176 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1177 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1178
1179 .key.min_transid = 0,
1180 .key.max_transid = (uint64_t) -1,
1181 };
1182
1183 struct btrfs_ioctl_vol_args vol_args = {};
1184 _cleanup_close_ int subvol_fd = -1;
1185 struct stat st;
1186 bool made_writable = false;
1187 int r;
1188
1189 assert(fd >= 0);
1190 assert(subvolume);
1191
1192 if (fstat(fd, &st) < 0)
1193 return -errno;
1194
1195 if (!S_ISDIR(st.st_mode))
1196 return -EINVAL;
1197
1198 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1199 if (subvol_fd < 0)
1200 return -errno;
1201
1202 if (subvol_id == 0) {
1203 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
1204 if (r < 0)
1205 return r;
1206 }
1207
1208 /* First, try to remove the subvolume. If it happens to be
1209 * already empty, this will just work. */
1210 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1211 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
1212 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
1213 return 0;
1214 }
1215 if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
1216 return -errno;
1217
1218 /* OK, the subvolume is not empty, let's look for child
1219 * subvolumes, and remove them, first */
1220
1221 args.key.min_offset = args.key.max_offset = subvol_id;
1222
1223 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1224 const struct btrfs_ioctl_search_header *sh;
1225 unsigned i;
1226
1227 args.key.nr_items = 256;
1228 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1229 return -errno;
1230
1231 if (args.key.nr_items <= 0)
1232 break;
1233
1234 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1235 _cleanup_free_ char *p = NULL;
1236 const struct btrfs_root_ref *ref;
1237 struct btrfs_ioctl_ino_lookup_args ino_args;
1238
1239 btrfs_ioctl_search_args_set(&args, sh);
1240
1241 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1242 continue;
1243 if (sh->offset != subvol_id)
1244 continue;
1245
1246 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1247
1248 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1249 if (!p)
1250 return -ENOMEM;
1251
1252 zero(ino_args);
1253 ino_args.treeid = subvol_id;
1254 ino_args.objectid = htole64(ref->dirid);
1255
1256 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1257 return -errno;
1258
1259 if (!made_writable) {
1260 r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
1261 if (r < 0)
1262 return r;
1263
1264 made_writable = true;
1265 }
1266
1267 if (isempty(ino_args.name))
1268 /* Subvolume is in the top-level
1269 * directory of the subvolume. */
1270 r = subvol_remove_children(subvol_fd, p, sh->objectid, flags);
1271 else {
1272 _cleanup_close_ int child_fd = -1;
1273
1274 /* Subvolume is somewhere further down,
1275 * hence we need to open the
1276 * containing directory first */
1277
1278 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1279 if (child_fd < 0)
1280 return -errno;
1281
1282 r = subvol_remove_children(child_fd, p, sh->objectid, flags);
1283 }
1284 if (r < 0)
1285 return r;
1286 }
1287
1288 /* Increase search key by one, to read the next item, if we can. */
1289 if (!btrfs_ioctl_search_args_inc(&args))
1290 break;
1291 }
1292
1293 /* OK, the child subvolumes should all be gone now, let's try
1294 * again to remove the subvolume */
1295 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
1296 return -errno;
1297
1298 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
1299 return 0;
1300 }
1301
1302 int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) {
1303 _cleanup_close_ int fd = -1;
1304 const char *subvolume;
1305 int r;
1306
1307 assert(path);
1308
1309 r = extract_subvolume_name(path, &subvolume);
1310 if (r < 0)
1311 return r;
1312
1313 fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1314 if (fd < 0)
1315 return fd;
1316
1317 return subvol_remove_children(fd, subvolume, 0, flags);
1318 }
1319
1320 int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) {
1321 return subvol_remove_children(fd, subvolume, 0, flags);
1322 }
1323
1324 int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
1325
1326 struct btrfs_ioctl_search_args args = {
1327 /* Tree of quota items */
1328 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1329
1330 /* The object ID is always 0 */
1331 .key.min_objectid = 0,
1332 .key.max_objectid = 0,
1333
1334 /* Look precisely for the quota items */
1335 .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
1336 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
1337
1338 /* For our qgroup */
1339 .key.min_offset = old_qgroupid,
1340 .key.max_offset = old_qgroupid,
1341
1342 /* No restrictions on the other components */
1343 .key.min_transid = 0,
1344 .key.max_transid = (uint64_t) -1,
1345 };
1346
1347 int r;
1348
1349 r = btrfs_is_filesystem(fd);
1350 if (r < 0)
1351 return r;
1352 if (!r)
1353 return -ENOTTY;
1354
1355 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1356 const struct btrfs_ioctl_search_header *sh;
1357 unsigned i;
1358
1359 args.key.nr_items = 256;
1360 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1361 if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
1362 break;
1363
1364 return -errno;
1365 }
1366
1367 if (args.key.nr_items <= 0)
1368 break;
1369
1370 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1371 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1372 struct btrfs_ioctl_qgroup_limit_args qargs;
1373 unsigned c;
1374
1375 /* Make sure we start the next search at least from this entry */
1376 btrfs_ioctl_search_args_set(&args, sh);
1377
1378 if (sh->objectid != 0)
1379 continue;
1380 if (sh->type != BTRFS_QGROUP_LIMIT_KEY)
1381 continue;
1382 if (sh->offset != old_qgroupid)
1383 continue;
1384
1385 /* We found the entry, now copy things over. */
1386
1387 qargs = (struct btrfs_ioctl_qgroup_limit_args) {
1388 .qgroupid = new_qgroupid,
1389
1390 .lim.max_rfer = le64toh(qli->max_rfer),
1391 .lim.max_excl = le64toh(qli->max_excl),
1392 .lim.rsv_rfer = le64toh(qli->rsv_rfer),
1393 .lim.rsv_excl = le64toh(qli->rsv_excl),
1394
1395 .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
1396 BTRFS_QGROUP_LIMIT_MAX_EXCL|
1397 BTRFS_QGROUP_LIMIT_RSV_RFER|
1398 BTRFS_QGROUP_LIMIT_RSV_EXCL),
1399 };
1400
1401 for (c = 0;; c++) {
1402 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
1403 if (errno == EBUSY && c < 10) {
1404 (void) btrfs_quota_scan_wait(fd);
1405 continue;
1406 }
1407 return -errno;
1408 }
1409
1410 break;
1411 }
1412
1413 return 1;
1414 }
1415
1416 /* Increase search key by one, to read the next item, if we can. */
1417 if (!btrfs_ioctl_search_args_inc(&args))
1418 break;
1419 }
1420
1421 return 0;
1422 }
1423
1424 static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
1425 _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
1426 bool copy_from_parent = false, insert_intermediary_qgroup = false;
1427 int n_old_qgroups, n_old_parent_qgroups, r, i;
1428 uint64_t old_parent_id;
1429
1430 assert(fd >= 0);
1431
1432 /* Copies a reduced form of quota information from the old to
1433 * the new subvolume. */
1434
1435 n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
1436 if (n_old_qgroups <= 0) /* Nothing to copy */
1437 return n_old_qgroups;
1438
1439 r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
1440 if (r == -ENXIO)
1441 /* We have no parent, hence nothing to copy. */
1442 n_old_parent_qgroups = 0;
1443 else if (r < 0)
1444 return r;
1445 else {
1446 n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
1447 if (n_old_parent_qgroups < 0)
1448 return n_old_parent_qgroups;
1449 }
1450
1451 for (i = 0; i < n_old_qgroups; i++) {
1452 uint64_t id;
1453 int j;
1454
1455 r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
1456 if (r < 0)
1457 return r;
1458
1459 if (id == old_subvol_id) {
1460 /* The old subvolume was member of a qgroup
1461 * that had the same id, but a different level
1462 * as it self. Let's set up something similar
1463 * in the destination. */
1464 insert_intermediary_qgroup = true;
1465 break;
1466 }
1467
1468 for (j = 0; j < n_old_parent_qgroups; j++)
1469 if (old_parent_qgroups[j] == old_qgroups[i]) {
1470 /* The old subvolume shared a common
1471 * parent qgroup with its parent
1472 * subvolume. Let's set up something
1473 * similar in the destination. */
1474 copy_from_parent = true;
1475 }
1476 }
1477
1478 if (!insert_intermediary_qgroup && !copy_from_parent)
1479 return 0;
1480
1481 return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
1482 }
1483
1484 static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
1485 uint64_t old_subtree_qgroup, new_subtree_qgroup;
1486 bool changed;
1487 int r;
1488
1489 /* First copy the leaf limits */
1490 r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
1491 if (r < 0)
1492 return r;
1493 changed = r > 0;
1494
1495 /* Then, try to copy the subtree limits, if there are any. */
1496 r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
1497 if (r < 0)
1498 return r;
1499 if (r == 0)
1500 return changed;
1501
1502 r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
1503 if (r < 0)
1504 return r;
1505 if (r == 0)
1506 return changed;
1507
1508 r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
1509 if (r != 0)
1510 return r;
1511
1512 return changed;
1513 }
1514
1515 static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolume, uint64_t old_subvol_id, BtrfsSnapshotFlags flags) {
1516
1517 struct btrfs_ioctl_search_args args = {
1518 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1519
1520 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1521 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1522
1523 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1524 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1525
1526 .key.min_transid = 0,
1527 .key.max_transid = (uint64_t) -1,
1528 };
1529
1530 struct btrfs_ioctl_vol_args_v2 vol_args = {
1531 .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
1532 .fd = old_fd,
1533 };
1534 _cleanup_close_ int subvolume_fd = -1;
1535 uint64_t new_subvol_id;
1536 int r;
1537
1538 assert(old_fd >= 0);
1539 assert(new_fd >= 0);
1540 assert(subvolume);
1541
1542 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1543
1544 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
1545 return -errno;
1546
1547 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
1548 !(flags & BTRFS_SNAPSHOT_QUOTA))
1549 return 0;
1550
1551 if (old_subvol_id == 0) {
1552 r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
1553 if (r < 0)
1554 return r;
1555 }
1556
1557 r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
1558 if (r < 0)
1559 return r;
1560
1561 if (flags & BTRFS_SNAPSHOT_QUOTA)
1562 (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
1563
1564 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
1565
1566 if (flags & BTRFS_SNAPSHOT_QUOTA)
1567 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1568
1569 return 0;
1570 }
1571
1572 args.key.min_offset = args.key.max_offset = old_subvol_id;
1573
1574 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1575 const struct btrfs_ioctl_search_header *sh;
1576 unsigned i;
1577
1578 args.key.nr_items = 256;
1579 if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1580 return -errno;
1581
1582 if (args.key.nr_items <= 0)
1583 break;
1584
1585 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1586 _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
1587 struct btrfs_ioctl_ino_lookup_args ino_args;
1588 const struct btrfs_root_ref *ref;
1589 _cleanup_close_ int old_child_fd = -1, new_child_fd = -1;
1590
1591 btrfs_ioctl_search_args_set(&args, sh);
1592
1593 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1594 continue;
1595
1596 /* Avoid finding the source subvolume a second
1597 * time */
1598 if (sh->offset != old_subvol_id)
1599 continue;
1600
1601 /* Avoid running into loops if the new
1602 * subvolume is below the old one. */
1603 if (sh->objectid == new_subvol_id)
1604 continue;
1605
1606 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1607 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1608 if (!p)
1609 return -ENOMEM;
1610
1611 zero(ino_args);
1612 ino_args.treeid = old_subvol_id;
1613 ino_args.objectid = htole64(ref->dirid);
1614
1615 if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1616 return -errno;
1617
1618 /* The kernel returns an empty name if the
1619 * subvolume is in the top-level directory,
1620 * and otherwise appends a slash, so that we
1621 * can just concatenate easily here, without
1622 * adding a slash. */
1623 c = strappend(ino_args.name, p);
1624 if (!c)
1625 return -ENOMEM;
1626
1627 old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1628 if (old_child_fd < 0)
1629 return -errno;
1630
1631 np = strjoin(subvolume, "/", ino_args.name);
1632 if (!np)
1633 return -ENOMEM;
1634
1635 new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1636 if (new_child_fd < 0)
1637 return -errno;
1638
1639 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1640 /* If the snapshot is read-only we
1641 * need to mark it writable
1642 * temporarily, to put the subsnapshot
1643 * into place. */
1644
1645 if (subvolume_fd < 0) {
1646 subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY|O_NOFOLLOW);
1647 if (subvolume_fd < 0)
1648 return -errno;
1649 }
1650
1651 r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
1652 if (r < 0)
1653 return r;
1654 }
1655
1656 /* When btrfs clones the subvolumes, child
1657 * subvolumes appear as empty directories. Remove
1658 * them, so that we can create a new snapshot
1659 * in their place */
1660 if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
1661 int k = -errno;
1662
1663 if (flags & BTRFS_SNAPSHOT_READ_ONLY)
1664 (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1665
1666 return k;
1667 }
1668
1669 r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
1670
1671 /* Restore the readonly flag */
1672 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1673 int k;
1674
1675 k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1676 if (r >= 0 && k < 0)
1677 return k;
1678 }
1679
1680 if (r < 0)
1681 return r;
1682 }
1683
1684 /* Increase search key by one, to read the next item, if we can. */
1685 if (!btrfs_ioctl_search_args_inc(&args))
1686 break;
1687 }
1688
1689 if (flags & BTRFS_SNAPSHOT_QUOTA)
1690 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1691
1692 return 0;
1693 }
1694
1695 int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, BtrfsSnapshotFlags flags) {
1696 _cleanup_close_ int new_fd = -1;
1697 const char *subvolume;
1698 int r;
1699
1700 assert(old_fd >= 0);
1701 assert(new_path);
1702
1703 r = btrfs_is_subvol_fd(old_fd);
1704 if (r < 0)
1705 return r;
1706 if (r == 0) {
1707 bool plain_directory = false;
1708
1709 /* If the source isn't a proper subvolume, fail unless fallback is requested */
1710 if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
1711 return -EISDIR;
1712
1713 r = btrfs_subvol_make(new_path);
1714 if (r == -ENOTTY && (flags & BTRFS_SNAPSHOT_FALLBACK_DIRECTORY)) {
1715 /* If the destination doesn't support subvolumes, then use a plain directory, if that's requested. */
1716 if (mkdir(new_path, 0755) < 0)
1717 return r;
1718
1719 plain_directory = true;
1720 } else if (r < 0)
1721 return r;
1722
1723 r = copy_directory_fd(old_fd, new_path, COPY_MERGE|COPY_REFLINK);
1724 if (r < 0)
1725 goto fallback_fail;
1726
1727 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1728
1729 if (plain_directory) {
1730 /* Plain directories have no recursive read-only flag, but something pretty close to
1731 * it: the IMMUTABLE bit. Let's use this here, if this is requested. */
1732
1733 if (flags & BTRFS_SNAPSHOT_FALLBACK_IMMUTABLE)
1734 (void) chattr_path(new_path, FS_IMMUTABLE_FL, FS_IMMUTABLE_FL);
1735 } else {
1736 r = btrfs_subvol_set_read_only(new_path, true);
1737 if (r < 0)
1738 goto fallback_fail;
1739 }
1740 }
1741
1742 return 0;
1743
1744 fallback_fail:
1745 (void) rm_rf(new_path, REMOVE_ROOT|REMOVE_PHYSICAL|REMOVE_SUBVOLUME);
1746 return r;
1747 }
1748
1749 r = extract_subvolume_name(new_path, &subvolume);
1750 if (r < 0)
1751 return r;
1752
1753 new_fd = open_parent(new_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1754 if (new_fd < 0)
1755 return new_fd;
1756
1757 return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
1758 }
1759
1760 int btrfs_subvol_snapshot(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags) {
1761 _cleanup_close_ int old_fd = -1;
1762
1763 assert(old_path);
1764 assert(new_path);
1765
1766 old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1767 if (old_fd < 0)
1768 return -errno;
1769
1770 return btrfs_subvol_snapshot_fd(old_fd, new_path, flags);
1771 }
1772
1773 int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
1774
1775 struct btrfs_ioctl_search_args args = {
1776 /* Tree of quota items */
1777 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1778
1779 /* Look precisely for the quota relation items */
1780 .key.min_type = BTRFS_QGROUP_RELATION_KEY,
1781 .key.max_type = BTRFS_QGROUP_RELATION_KEY,
1782
1783 /* No restrictions on the other components */
1784 .key.min_offset = 0,
1785 .key.max_offset = (uint64_t) -1,
1786
1787 .key.min_transid = 0,
1788 .key.max_transid = (uint64_t) -1,
1789 };
1790
1791 _cleanup_free_ uint64_t *items = NULL;
1792 size_t n_items = 0, n_allocated = 0;
1793 int r;
1794
1795 assert(fd >= 0);
1796 assert(ret);
1797
1798 if (qgroupid == 0) {
1799 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
1800 if (r < 0)
1801 return r;
1802 } else {
1803 r = btrfs_is_filesystem(fd);
1804 if (r < 0)
1805 return r;
1806 if (!r)
1807 return -ENOTTY;
1808 }
1809
1810 args.key.min_objectid = args.key.max_objectid = qgroupid;
1811
1812 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1813 const struct btrfs_ioctl_search_header *sh;
1814 unsigned i;
1815
1816 args.key.nr_items = 256;
1817 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1818 if (errno == ENOENT) /* quota tree missing: quota is disabled */
1819 break;
1820
1821 return -errno;
1822 }
1823
1824 if (args.key.nr_items <= 0)
1825 break;
1826
1827 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1828
1829 /* Make sure we start the next search at least from this entry */
1830 btrfs_ioctl_search_args_set(&args, sh);
1831
1832 if (sh->type != BTRFS_QGROUP_RELATION_KEY)
1833 continue;
1834 if (sh->offset < sh->objectid)
1835 continue;
1836 if (sh->objectid != qgroupid)
1837 continue;
1838
1839 if (!GREEDY_REALLOC(items, n_allocated, n_items+1))
1840 return -ENOMEM;
1841
1842 items[n_items++] = sh->offset;
1843 }
1844
1845 /* Increase search key by one, to read the next item, if we can. */
1846 if (!btrfs_ioctl_search_args_inc(&args))
1847 break;
1848 }
1849
1850 if (n_items <= 0) {
1851 *ret = NULL;
1852 return 0;
1853 }
1854
1855 *ret = items;
1856 items = NULL;
1857
1858 return (int) n_items;
1859 }
1860
1861 int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
1862 _cleanup_free_ uint64_t *qgroups = NULL;
1863 uint64_t parent_subvol;
1864 bool changed = false;
1865 int n = 0, r;
1866
1867 assert(fd >= 0);
1868
1869 /*
1870 * Sets up the specified subvolume's qgroup automatically in
1871 * one of two ways:
1872 *
1873 * If insert_intermediary_qgroup is false, the subvolume's
1874 * leaf qgroup will be assigned to the same parent qgroups as
1875 * the subvolume's parent subvolume.
1876 *
1877 * If insert_intermediary_qgroup is true a new intermediary
1878 * higher-level qgroup is created, with a higher level number,
1879 * but reusing the id of the subvolume. The level number is
1880 * picked as one smaller than the lowest level qgroup the
1881 * parent subvolume is a member of. If the parent subvolume's
1882 * leaf qgroup is assigned to no higher-level qgroup a new
1883 * qgroup of level 255 is created instead. Either way, the new
1884 * qgroup is then assigned to the parent's higher-level
1885 * qgroup, and the subvolume itself is assigned to it.
1886 *
1887 * If the subvolume is already assigned to a higher level
1888 * qgroup, no operation is executed.
1889 *
1890 * Effectively this means: regardless if
1891 * insert_intermediary_qgroup is true or not, after this
1892 * function is invoked the subvolume will be accounted within
1893 * the same qgroups as the parent. However, if it is true, it
1894 * will also get its own higher-level qgroup, which may in
1895 * turn be used by subvolumes created beneath this subvolume
1896 * later on.
1897 *
1898 * This hence defines a simple default qgroup setup for
1899 * subvolumes, as long as this function is invoked on each
1900 * created subvolume: each subvolume is always accounting
1901 * together with its immediate parents. Optionally, if
1902 * insert_intermediary_qgroup is true, it will also get a
1903 * qgroup that then includes all its own child subvolumes.
1904 */
1905
1906 if (subvol_id == 0) {
1907 r = btrfs_is_subvol_fd(fd);
1908 if (r < 0)
1909 return r;
1910 if (!r)
1911 return -ENOTTY;
1912
1913 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1914 if (r < 0)
1915 return r;
1916 }
1917
1918 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
1919 if (n < 0)
1920 return n;
1921 if (n > 0) /* already parent qgroups set up, let's bail */
1922 return 0;
1923
1924 qgroups = mfree(qgroups);
1925
1926 r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
1927 if (r == -ENXIO)
1928 /* No parent, hence no qgroup memberships */
1929 n = 0;
1930 else if (r < 0)
1931 return r;
1932 else {
1933 n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
1934 if (n < 0)
1935 return n;
1936 }
1937
1938 if (insert_intermediary_qgroup) {
1939 uint64_t lowest = 256, new_qgroupid;
1940 bool created = false;
1941 int i;
1942
1943 /* Determine the lowest qgroup that the parent
1944 * subvolume is assigned to. */
1945
1946 for (i = 0; i < n; i++) {
1947 uint64_t level;
1948
1949 r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
1950 if (r < 0)
1951 return r;
1952
1953 if (level < lowest)
1954 lowest = level;
1955 }
1956
1957 if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1958 return -EBUSY;
1959
1960 r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
1961 if (r < 0)
1962 return r;
1963
1964 /* Create the new intermediary group, unless it already exists */
1965 r = btrfs_qgroup_create(fd, new_qgroupid);
1966 if (r < 0 && r != -EEXIST)
1967 return r;
1968 if (r >= 0)
1969 changed = created = true;
1970
1971 for (i = 0; i < n; i++) {
1972 r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
1973 if (r < 0 && r != -EEXIST) {
1974 if (created)
1975 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1976
1977 return r;
1978 }
1979 if (r >= 0)
1980 changed = true;
1981 }
1982
1983 r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
1984 if (r < 0 && r != -EEXIST) {
1985 if (created)
1986 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1987 return r;
1988 }
1989 if (r >= 0)
1990 changed = true;
1991
1992 } else {
1993 int i;
1994
1995 /* Assign our subvolume to all the same qgroups as the parent */
1996
1997 for (i = 0; i < n; i++) {
1998 r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
1999 if (r < 0 && r != -EEXIST)
2000 return r;
2001 if (r >= 0)
2002 changed = true;
2003 }
2004 }
2005
2006 return changed;
2007 }
2008
2009 int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
2010 _cleanup_close_ int fd = -1;
2011
2012 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
2013 if (fd < 0)
2014 return -errno;
2015
2016 return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
2017 }
2018
2019 int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
2020
2021 struct btrfs_ioctl_search_args args = {
2022 /* Tree of tree roots */
2023 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
2024
2025 /* Look precisely for the subvolume items */
2026 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
2027 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
2028
2029 /* No restrictions on the other components */
2030 .key.min_offset = 0,
2031 .key.max_offset = (uint64_t) -1,
2032
2033 .key.min_transid = 0,
2034 .key.max_transid = (uint64_t) -1,
2035 };
2036 int r;
2037
2038 assert(fd >= 0);
2039 assert(ret);
2040
2041 if (subvol_id == 0) {
2042 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
2043 if (r < 0)
2044 return r;
2045 } else {
2046 r = btrfs_is_filesystem(fd);
2047 if (r < 0)
2048 return r;
2049 if (!r)
2050 return -ENOTTY;
2051 }
2052
2053 args.key.min_objectid = args.key.max_objectid = subvol_id;
2054
2055 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
2056 const struct btrfs_ioctl_search_header *sh;
2057 unsigned i;
2058
2059 args.key.nr_items = 256;
2060 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
2061 return negative_errno();
2062
2063 if (args.key.nr_items <= 0)
2064 break;
2065
2066 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
2067
2068 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
2069 continue;
2070 if (sh->objectid != subvol_id)
2071 continue;
2072
2073 *ret = sh->offset;
2074 return 0;
2075 }
2076 }
2077
2078 return -ENXIO;
2079 }