]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/btrfs-util.c
Merge pull request #1975 from ssahani/vxlan2
[thirdparty/systemd.git] / src / basic / btrfs-util.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2014 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <errno.h>
23 #include <fcntl.h>
24 #include <inttypes.h>
25 #include <linux/loop.h>
26 #include <stddef.h>
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/ioctl.h>
31 #include <sys/stat.h>
32 #include <sys/statfs.h>
33 #include <sys/sysmacros.h>
34 #include <unistd.h>
35
36 #ifdef HAVE_LINUX_BTRFS_H
37 #include <linux/btrfs.h>
38 #endif
39
40 #include "alloc-util.h"
41 #include "btrfs-ctree.h"
42 #include "btrfs-util.h"
43 #include "copy.h"
44 #include "fd-util.h"
45 #include "fileio.h"
46 #include "macro.h"
47 #include "missing.h"
48 #include "path-util.h"
49 #include "selinux-util.h"
50 #include "smack-util.h"
51 #include "sparse-endian.h"
52 #include "stat-util.h"
53 #include "string-util.h"
54 #include "time-util.h"
55 #include "util.h"
56
57 /* WARNING: Be careful with file system ioctls! When we get an fd, we
58 * need to make sure it either refers to only a regular file or
59 * directory, or that it is located on btrfs, before invoking any
60 * btrfs ioctls. The ioctl numbers are reused by some device drivers
61 * (such as DRM), and hence might have bad effects when invoked on
62 * device nodes (that reference drivers) rather than fds to normal
63 * files or directories. */
64
65 static int validate_subvolume_name(const char *name) {
66
67 if (!filename_is_valid(name))
68 return -EINVAL;
69
70 if (strlen(name) > BTRFS_SUBVOL_NAME_MAX)
71 return -E2BIG;
72
73 return 0;
74 }
75
76 static int open_parent(const char *path, int flags) {
77 _cleanup_free_ char *parent = NULL;
78 int fd;
79
80 assert(path);
81
82 parent = dirname_malloc(path);
83 if (!parent)
84 return -ENOMEM;
85
86 fd = open(parent, flags);
87 if (fd < 0)
88 return -errno;
89
90 return fd;
91 }
92
93 static int extract_subvolume_name(const char *path, const char **subvolume) {
94 const char *fn;
95 int r;
96
97 assert(path);
98 assert(subvolume);
99
100 fn = basename(path);
101
102 r = validate_subvolume_name(fn);
103 if (r < 0)
104 return r;
105
106 *subvolume = fn;
107 return 0;
108 }
109
110 int btrfs_is_filesystem(int fd) {
111 struct statfs sfs;
112
113 assert(fd >= 0);
114
115 if (fstatfs(fd, &sfs) < 0)
116 return -errno;
117
118 return F_TYPE_EQUAL(sfs.f_type, BTRFS_SUPER_MAGIC);
119 }
120
121 int btrfs_is_subvol_fd(int fd) {
122 struct stat st;
123
124 assert(fd >= 0);
125
126 /* On btrfs subvolumes always have the inode 256 */
127
128 if (fstat(fd, &st) < 0)
129 return -errno;
130
131 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
132 return 0;
133
134 return btrfs_is_filesystem(fd);
135 }
136
137 int btrfs_is_subvol(const char *path) {
138 _cleanup_close_ int fd = -1;
139
140 assert(path);
141
142 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
143 if (fd < 0)
144 return -errno;
145
146 return btrfs_is_subvol_fd(fd);
147 }
148
149 int btrfs_subvol_make(const char *path) {
150 struct btrfs_ioctl_vol_args args = {};
151 _cleanup_close_ int fd = -1;
152 const char *subvolume;
153 int r;
154
155 assert(path);
156
157 r = extract_subvolume_name(path, &subvolume);
158 if (r < 0)
159 return r;
160
161 fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
162 if (fd < 0)
163 return fd;
164
165 strncpy(args.name, subvolume, sizeof(args.name)-1);
166
167 if (ioctl(fd, BTRFS_IOC_SUBVOL_CREATE, &args) < 0)
168 return -errno;
169
170 return 0;
171 }
172
173 int btrfs_subvol_make_label(const char *path) {
174 int r;
175
176 assert(path);
177
178 r = mac_selinux_create_file_prepare(path, S_IFDIR);
179 if (r < 0)
180 return r;
181
182 r = btrfs_subvol_make(path);
183 mac_selinux_create_file_clear();
184
185 if (r < 0)
186 return r;
187
188 return mac_smack_fix(path, false, false);
189 }
190
191 int btrfs_subvol_set_read_only_fd(int fd, bool b) {
192 uint64_t flags, nflags;
193 struct stat st;
194
195 assert(fd >= 0);
196
197 if (fstat(fd, &st) < 0)
198 return -errno;
199
200 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
201 return -EINVAL;
202
203 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
204 return -errno;
205
206 if (b)
207 nflags = flags | BTRFS_SUBVOL_RDONLY;
208 else
209 nflags = flags & ~BTRFS_SUBVOL_RDONLY;
210
211 if (flags == nflags)
212 return 0;
213
214 if (ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &nflags) < 0)
215 return -errno;
216
217 return 0;
218 }
219
220 int btrfs_subvol_set_read_only(const char *path, bool b) {
221 _cleanup_close_ int fd = -1;
222
223 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
224 if (fd < 0)
225 return -errno;
226
227 return btrfs_subvol_set_read_only_fd(fd, b);
228 }
229
230 int btrfs_subvol_get_read_only_fd(int fd) {
231 uint64_t flags;
232 struct stat st;
233
234 assert(fd >= 0);
235
236 if (fstat(fd, &st) < 0)
237 return -errno;
238
239 if (!S_ISDIR(st.st_mode) || st.st_ino != 256)
240 return -EINVAL;
241
242 if (ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags) < 0)
243 return -errno;
244
245 return !!(flags & BTRFS_SUBVOL_RDONLY);
246 }
247
248 int btrfs_reflink(int infd, int outfd) {
249 struct stat st;
250 int r;
251
252 assert(infd >= 0);
253 assert(outfd >= 0);
254
255 /* Make sure we invoke the ioctl on a regular file, so that no
256 * device driver accidentally gets it. */
257
258 if (fstat(outfd, &st) < 0)
259 return -errno;
260
261 if (!S_ISREG(st.st_mode))
262 return -EINVAL;
263
264 r = ioctl(outfd, BTRFS_IOC_CLONE, infd);
265 if (r < 0)
266 return -errno;
267
268 return 0;
269 }
270
271 int btrfs_clone_range(int infd, uint64_t in_offset, int outfd, uint64_t out_offset, uint64_t sz) {
272 struct btrfs_ioctl_clone_range_args args = {
273 .src_fd = infd,
274 .src_offset = in_offset,
275 .src_length = sz,
276 .dest_offset = out_offset,
277 };
278 struct stat st;
279 int r;
280
281 assert(infd >= 0);
282 assert(outfd >= 0);
283 assert(sz > 0);
284
285 if (fstat(outfd, &st) < 0)
286 return -errno;
287
288 if (!S_ISREG(st.st_mode))
289 return -EINVAL;
290
291 r = ioctl(outfd, BTRFS_IOC_CLONE_RANGE, &args);
292 if (r < 0)
293 return -errno;
294
295 return 0;
296 }
297
298 int btrfs_get_block_device_fd(int fd, dev_t *dev) {
299 struct btrfs_ioctl_fs_info_args fsi = {};
300 uint64_t id;
301 int r;
302
303 assert(fd >= 0);
304 assert(dev);
305
306 r = btrfs_is_filesystem(fd);
307 if (r < 0)
308 return r;
309 if (!r)
310 return -ENOTTY;
311
312 if (ioctl(fd, BTRFS_IOC_FS_INFO, &fsi) < 0)
313 return -errno;
314
315 /* We won't do this for btrfs RAID */
316 if (fsi.num_devices != 1)
317 return 0;
318
319 for (id = 1; id <= fsi.max_id; id++) {
320 struct btrfs_ioctl_dev_info_args di = {
321 .devid = id,
322 };
323 struct stat st;
324
325 if (ioctl(fd, BTRFS_IOC_DEV_INFO, &di) < 0) {
326 if (errno == ENODEV)
327 continue;
328
329 return -errno;
330 }
331
332 if (stat((char*) di.path, &st) < 0)
333 return -errno;
334
335 if (!S_ISBLK(st.st_mode))
336 return -ENODEV;
337
338 if (major(st.st_rdev) == 0)
339 return -ENODEV;
340
341 *dev = st.st_rdev;
342 return 1;
343 }
344
345 return -ENODEV;
346 }
347
348 int btrfs_get_block_device(const char *path, dev_t *dev) {
349 _cleanup_close_ int fd = -1;
350
351 assert(path);
352 assert(dev);
353
354 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC);
355 if (fd < 0)
356 return -errno;
357
358 return btrfs_get_block_device_fd(fd, dev);
359 }
360
361 int btrfs_subvol_get_id_fd(int fd, uint64_t *ret) {
362 struct btrfs_ioctl_ino_lookup_args args = {
363 .objectid = BTRFS_FIRST_FREE_OBJECTID
364 };
365 int r;
366
367 assert(fd >= 0);
368 assert(ret);
369
370 r = btrfs_is_filesystem(fd);
371 if (r < 0)
372 return r;
373 if (!r)
374 return -ENOTTY;
375
376 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args) < 0)
377 return -errno;
378
379 *ret = args.treeid;
380 return 0;
381 }
382
383 int btrfs_subvol_get_id(int fd, const char *subvol, uint64_t *ret) {
384 _cleanup_close_ int subvol_fd = -1;
385
386 assert(fd >= 0);
387 assert(ret);
388
389 subvol_fd = openat(fd, subvol, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
390 if (subvol_fd < 0)
391 return -errno;
392
393 return btrfs_subvol_get_id_fd(subvol_fd, ret);
394 }
395
396 static bool btrfs_ioctl_search_args_inc(struct btrfs_ioctl_search_args *args) {
397 assert(args);
398
399 /* the objectid, type, offset together make up the btrfs key,
400 * which is considered a single 136byte integer when
401 * comparing. This call increases the counter by one, dealing
402 * with the overflow between the overflows */
403
404 if (args->key.min_offset < (uint64_t) -1) {
405 args->key.min_offset++;
406 return true;
407 }
408
409 if (args->key.min_type < (uint8_t) -1) {
410 args->key.min_type++;
411 args->key.min_offset = 0;
412 return true;
413 }
414
415 if (args->key.min_objectid < (uint64_t) -1) {
416 args->key.min_objectid++;
417 args->key.min_offset = 0;
418 args->key.min_type = 0;
419 return true;
420 }
421
422 return 0;
423 }
424
425 static void btrfs_ioctl_search_args_set(struct btrfs_ioctl_search_args *args, const struct btrfs_ioctl_search_header *h) {
426 assert(args);
427 assert(h);
428
429 args->key.min_objectid = h->objectid;
430 args->key.min_type = h->type;
431 args->key.min_offset = h->offset;
432 }
433
434 static int btrfs_ioctl_search_args_compare(const struct btrfs_ioctl_search_args *args) {
435 assert(args);
436
437 /* Compare min and max */
438
439 if (args->key.min_objectid < args->key.max_objectid)
440 return -1;
441 if (args->key.min_objectid > args->key.max_objectid)
442 return 1;
443
444 if (args->key.min_type < args->key.max_type)
445 return -1;
446 if (args->key.min_type > args->key.max_type)
447 return 1;
448
449 if (args->key.min_offset < args->key.max_offset)
450 return -1;
451 if (args->key.min_offset > args->key.max_offset)
452 return 1;
453
454 return 0;
455 }
456
457 #define FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) \
458 for ((i) = 0, \
459 (sh) = (const struct btrfs_ioctl_search_header*) (args).buf; \
460 (i) < (args).key.nr_items; \
461 (i)++, \
462 (sh) = (const struct btrfs_ioctl_search_header*) ((uint8_t*) (sh) + sizeof(struct btrfs_ioctl_search_header) + (sh)->len))
463
464 #define BTRFS_IOCTL_SEARCH_HEADER_BODY(sh) \
465 ((void*) ((uint8_t*) sh + sizeof(struct btrfs_ioctl_search_header)))
466
467 int btrfs_subvol_get_info_fd(int fd, uint64_t subvol_id, BtrfsSubvolInfo *ret) {
468 struct btrfs_ioctl_search_args args = {
469 /* Tree of tree roots */
470 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
471
472 /* Look precisely for the subvolume items */
473 .key.min_type = BTRFS_ROOT_ITEM_KEY,
474 .key.max_type = BTRFS_ROOT_ITEM_KEY,
475
476 .key.min_offset = 0,
477 .key.max_offset = (uint64_t) -1,
478
479 /* No restrictions on the other components */
480 .key.min_transid = 0,
481 .key.max_transid = (uint64_t) -1,
482 };
483
484 bool found = false;
485 int r;
486
487 assert(fd >= 0);
488 assert(ret);
489
490 if (subvol_id == 0) {
491 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
492 if (r < 0)
493 return r;
494 } else {
495 r = btrfs_is_filesystem(fd);
496 if (r < 0)
497 return r;
498 if (!r)
499 return -ENOTTY;
500 }
501
502 args.key.min_objectid = args.key.max_objectid = subvol_id;
503
504 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
505 const struct btrfs_ioctl_search_header *sh;
506 unsigned i;
507
508 args.key.nr_items = 256;
509 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
510 return -errno;
511
512 if (args.key.nr_items <= 0)
513 break;
514
515 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
516
517 const struct btrfs_root_item *ri;
518
519 /* Make sure we start the next search at least from this entry */
520 btrfs_ioctl_search_args_set(&args, sh);
521
522 if (sh->objectid != subvol_id)
523 continue;
524 if (sh->type != BTRFS_ROOT_ITEM_KEY)
525 continue;
526
527 /* Older versions of the struct lacked the otime setting */
528 if (sh->len < offsetof(struct btrfs_root_item, otime) + sizeof(struct btrfs_timespec))
529 continue;
530
531 ri = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
532
533 ret->otime = (usec_t) le64toh(ri->otime.sec) * USEC_PER_SEC +
534 (usec_t) le32toh(ri->otime.nsec) / NSEC_PER_USEC;
535
536 ret->subvol_id = subvol_id;
537 ret->read_only = !!(le64toh(ri->flags) & BTRFS_ROOT_SUBVOL_RDONLY);
538
539 assert_cc(sizeof(ri->uuid) == sizeof(ret->uuid));
540 memcpy(&ret->uuid, ri->uuid, sizeof(ret->uuid));
541 memcpy(&ret->parent_uuid, ri->parent_uuid, sizeof(ret->parent_uuid));
542
543 found = true;
544 goto finish;
545 }
546
547 /* Increase search key by one, to read the next item, if we can. */
548 if (!btrfs_ioctl_search_args_inc(&args))
549 break;
550 }
551
552 finish:
553 if (!found)
554 return -ENODATA;
555
556 return 0;
557 }
558
559 int btrfs_qgroup_get_quota_fd(int fd, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
560
561 struct btrfs_ioctl_search_args args = {
562 /* Tree of quota items */
563 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
564
565 /* The object ID is always 0 */
566 .key.min_objectid = 0,
567 .key.max_objectid = 0,
568
569 /* Look precisely for the quota items */
570 .key.min_type = BTRFS_QGROUP_STATUS_KEY,
571 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
572
573 /* No restrictions on the other components */
574 .key.min_transid = 0,
575 .key.max_transid = (uint64_t) -1,
576 };
577
578 bool found_info = false, found_limit = false;
579 int r;
580
581 assert(fd >= 0);
582 assert(ret);
583
584 if (qgroupid == 0) {
585 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
586 if (r < 0)
587 return r;
588 } else {
589 r = btrfs_is_filesystem(fd);
590 if (r < 0)
591 return r;
592 if (!r)
593 return -ENOTTY;
594 }
595
596 args.key.min_offset = args.key.max_offset = qgroupid;
597
598 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
599 const struct btrfs_ioctl_search_header *sh;
600 unsigned i;
601
602 args.key.nr_items = 256;
603 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
604 if (errno == ENOENT) /* quota tree is missing: quota disabled */
605 break;
606
607 return -errno;
608 }
609
610 if (args.key.nr_items <= 0)
611 break;
612
613 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
614
615 /* Make sure we start the next search at least from this entry */
616 btrfs_ioctl_search_args_set(&args, sh);
617
618 if (sh->objectid != 0)
619 continue;
620 if (sh->offset != qgroupid)
621 continue;
622
623 if (sh->type == BTRFS_QGROUP_INFO_KEY) {
624 const struct btrfs_qgroup_info_item *qii = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
625
626 ret->referenced = le64toh(qii->rfer);
627 ret->exclusive = le64toh(qii->excl);
628
629 found_info = true;
630
631 } else if (sh->type == BTRFS_QGROUP_LIMIT_KEY) {
632 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
633
634 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_RFER)
635 ret->referenced_max = le64toh(qli->max_rfer);
636 else
637 ret->referenced_max = (uint64_t) -1;
638
639 if (le64toh(qli->flags) & BTRFS_QGROUP_LIMIT_MAX_EXCL)
640 ret->exclusive_max = le64toh(qli->max_excl);
641 else
642 ret->exclusive_max = (uint64_t) -1;
643
644 found_limit = true;
645 }
646
647 if (found_info && found_limit)
648 goto finish;
649 }
650
651 /* Increase search key by one, to read the next item, if we can. */
652 if (!btrfs_ioctl_search_args_inc(&args))
653 break;
654 }
655
656 finish:
657 if (!found_limit && !found_info)
658 return -ENODATA;
659
660 if (!found_info) {
661 ret->referenced = (uint64_t) -1;
662 ret->exclusive = (uint64_t) -1;
663 }
664
665 if (!found_limit) {
666 ret->referenced_max = (uint64_t) -1;
667 ret->exclusive_max = (uint64_t) -1;
668 }
669
670 return 0;
671 }
672
673 int btrfs_qgroup_get_quota(const char *path, uint64_t qgroupid, BtrfsQuotaInfo *ret) {
674 _cleanup_close_ int fd = -1;
675
676 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
677 if (fd < 0)
678 return -errno;
679
680 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
681 }
682
683 int btrfs_subvol_find_subtree_qgroup(int fd, uint64_t subvol_id, uint64_t *ret) {
684 uint64_t level, lowest = (uint64_t) -1, lowest_qgroupid = 0;
685 _cleanup_free_ uint64_t *qgroups = NULL;
686 int r, n, i;
687
688 assert(fd >= 0);
689 assert(ret);
690
691 /* This finds the "subtree" qgroup for a specific
692 * subvolume. This only works for subvolumes that have been
693 * prepared with btrfs_subvol_auto_qgroup_fd() with
694 * insert_intermediary_qgroup=true (or equivalent). For others
695 * it will return the leaf qgroup instead. The two cases may
696 * be distuingished via the return value, which is 1 in case
697 * an appropriate "subtree" qgroup was found, and 0
698 * otherwise. */
699
700 if (subvol_id == 0) {
701 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
702 if (r < 0)
703 return r;
704 }
705
706 r = btrfs_qgroupid_split(subvol_id, &level, NULL);
707 if (r < 0)
708 return r;
709 if (level != 0) /* Input must be a leaf qgroup */
710 return -EINVAL;
711
712 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
713 if (n < 0)
714 return n;
715
716 for (i = 0; i < n; i++) {
717 uint64_t id;
718
719 r = btrfs_qgroupid_split(qgroups[i], &level, &id);
720 if (r < 0)
721 return r;
722
723 if (id != subvol_id)
724 continue;
725
726 if (lowest == (uint64_t) -1 || level < lowest) {
727 lowest_qgroupid = qgroups[i];
728 lowest = level;
729 }
730 }
731
732 if (lowest == (uint64_t) -1) {
733 /* No suitable higher-level qgroup found, let's return
734 * the leaf qgroup instead, and indicate that with the
735 * return value. */
736
737 *ret = subvol_id;
738 return 0;
739 }
740
741 *ret = lowest_qgroupid;
742 return 1;
743 }
744
745 int btrfs_subvol_get_subtree_quota_fd(int fd, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
746 uint64_t qgroupid;
747 int r;
748
749 assert(fd >= 0);
750 assert(ret);
751
752 /* This determines the quota data of the qgroup with the
753 * lowest level, that shares the id part with the specified
754 * subvolume. This is useful for determining the quota data
755 * for entire subvolume subtrees, as long as the subtrees have
756 * been set up with btrfs_qgroup_subvol_auto_fd() or in a
757 * compatible way */
758
759 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
760 if (r < 0)
761 return r;
762
763 return btrfs_qgroup_get_quota_fd(fd, qgroupid, ret);
764 }
765
766 int btrfs_subvol_get_subtree_quota(const char *path, uint64_t subvol_id, BtrfsQuotaInfo *ret) {
767 _cleanup_close_ int fd = -1;
768
769 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
770 if (fd < 0)
771 return -errno;
772
773 return btrfs_subvol_get_subtree_quota_fd(fd, subvol_id, ret);
774 }
775
776 int btrfs_defrag_fd(int fd) {
777 struct stat st;
778
779 assert(fd >= 0);
780
781 if (fstat(fd, &st) < 0)
782 return -errno;
783
784 if (!S_ISREG(st.st_mode))
785 return -EINVAL;
786
787 if (ioctl(fd, BTRFS_IOC_DEFRAG, NULL) < 0)
788 return -errno;
789
790 return 0;
791 }
792
793 int btrfs_defrag(const char *p) {
794 _cleanup_close_ int fd = -1;
795
796 fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
797 if (fd < 0)
798 return -errno;
799
800 return btrfs_defrag_fd(fd);
801 }
802
803 int btrfs_quota_enable_fd(int fd, bool b) {
804 struct btrfs_ioctl_quota_ctl_args args = {
805 .cmd = b ? BTRFS_QUOTA_CTL_ENABLE : BTRFS_QUOTA_CTL_DISABLE,
806 };
807 int r;
808
809 assert(fd >= 0);
810
811 r = btrfs_is_filesystem(fd);
812 if (r < 0)
813 return r;
814 if (!r)
815 return -ENOTTY;
816
817 if (ioctl(fd, BTRFS_IOC_QUOTA_CTL, &args) < 0)
818 return -errno;
819
820 return 0;
821 }
822
823 int btrfs_quota_enable(const char *path, bool b) {
824 _cleanup_close_ int fd = -1;
825
826 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
827 if (fd < 0)
828 return -errno;
829
830 return btrfs_quota_enable_fd(fd, b);
831 }
832
833 int btrfs_qgroup_set_limit_fd(int fd, uint64_t qgroupid, uint64_t referenced_max) {
834
835 struct btrfs_ioctl_qgroup_limit_args args = {
836 .lim.max_rfer = referenced_max,
837 .lim.flags = BTRFS_QGROUP_LIMIT_MAX_RFER,
838 };
839 unsigned c;
840 int r;
841
842 assert(fd >= 0);
843
844 if (qgroupid == 0) {
845 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
846 if (r < 0)
847 return r;
848 } else {
849 r = btrfs_is_filesystem(fd);
850 if (r < 0)
851 return r;
852 if (!r)
853 return -ENOTTY;
854 }
855
856 args.qgroupid = qgroupid;
857
858 for (c = 0;; c++) {
859 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &args) < 0) {
860
861 if (errno == EBUSY && c < 10) {
862 (void) btrfs_quota_scan_wait(fd);
863 continue;
864 }
865
866 return -errno;
867 }
868
869 break;
870 }
871
872 return 0;
873 }
874
875 int btrfs_qgroup_set_limit(const char *path, uint64_t qgroupid, uint64_t referenced_max) {
876 _cleanup_close_ int fd = -1;
877
878 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
879 if (fd < 0)
880 return -errno;
881
882 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
883 }
884
885 int btrfs_subvol_set_subtree_quota_limit_fd(int fd, uint64_t subvol_id, uint64_t referenced_max) {
886 uint64_t qgroupid;
887 int r;
888
889 assert(fd >= 0);
890
891 r = btrfs_subvol_find_subtree_qgroup(fd, subvol_id, &qgroupid);
892 if (r < 0)
893 return r;
894
895 return btrfs_qgroup_set_limit_fd(fd, qgroupid, referenced_max);
896 }
897
898 int btrfs_subvol_set_subtree_quota_limit(const char *path, uint64_t subvol_id, uint64_t referenced_max) {
899 _cleanup_close_ int fd = -1;
900
901 fd = open(path, O_RDONLY|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW);
902 if (fd < 0)
903 return -errno;
904
905 return btrfs_subvol_set_subtree_quota_limit_fd(fd, subvol_id, referenced_max);
906 }
907
908 int btrfs_resize_loopback_fd(int fd, uint64_t new_size, bool grow_only) {
909 struct btrfs_ioctl_vol_args args = {};
910 _cleanup_free_ char *p = NULL, *loop = NULL, *backing = NULL;
911 _cleanup_close_ int loop_fd = -1, backing_fd = -1;
912 struct stat st;
913 dev_t dev = 0;
914 int r;
915
916 /* btrfs cannot handle file systems < 16M, hence use this as minimum */
917 if (new_size < 16*1024*1024)
918 new_size = 16*1024*1024;
919
920 r = btrfs_get_block_device_fd(fd, &dev);
921 if (r < 0)
922 return r;
923 if (r == 0)
924 return -ENODEV;
925
926 if (asprintf(&p, "/sys/dev/block/%u:%u/loop/backing_file", major(dev), minor(dev)) < 0)
927 return -ENOMEM;
928 r = read_one_line_file(p, &backing);
929 if (r == -ENOENT)
930 return -ENODEV;
931 if (r < 0)
932 return r;
933 if (isempty(backing) || !path_is_absolute(backing))
934 return -ENODEV;
935
936 backing_fd = open(backing, O_RDWR|O_CLOEXEC|O_NOCTTY);
937 if (backing_fd < 0)
938 return -errno;
939
940 if (fstat(backing_fd, &st) < 0)
941 return -errno;
942 if (!S_ISREG(st.st_mode))
943 return -ENODEV;
944
945 if (new_size == (uint64_t) st.st_size)
946 return 0;
947
948 if (grow_only && new_size < (uint64_t) st.st_size)
949 return -EINVAL;
950
951 if (asprintf(&loop, "/dev/block/%u:%u", major(dev), minor(dev)) < 0)
952 return -ENOMEM;
953 loop_fd = open(loop, O_RDWR|O_CLOEXEC|O_NOCTTY);
954 if (loop_fd < 0)
955 return -errno;
956
957 if (snprintf(args.name, sizeof(args.name), "%" PRIu64, new_size) >= (int) sizeof(args.name))
958 return -EINVAL;
959
960 if (new_size < (uint64_t) st.st_size) {
961 /* Decrease size: first decrease btrfs size, then shorten loopback */
962 if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
963 return -errno;
964 }
965
966 if (ftruncate(backing_fd, new_size) < 0)
967 return -errno;
968
969 if (ioctl(loop_fd, LOOP_SET_CAPACITY, 0) < 0)
970 return -errno;
971
972 if (new_size > (uint64_t) st.st_size) {
973 /* Increase size: first enlarge loopback, then increase btrfs size */
974 if (ioctl(fd, BTRFS_IOC_RESIZE, &args) < 0)
975 return -errno;
976 }
977
978 /* Make sure the free disk space is correctly updated for both file systems */
979 (void) fsync(fd);
980 (void) fsync(backing_fd);
981
982 return 1;
983 }
984
985 int btrfs_resize_loopback(const char *p, uint64_t new_size, bool grow_only) {
986 _cleanup_close_ int fd = -1;
987
988 fd = open(p, O_RDONLY|O_NOCTTY|O_CLOEXEC);
989 if (fd < 0)
990 return -errno;
991
992 return btrfs_resize_loopback_fd(fd, new_size, grow_only);
993 }
994
995 int btrfs_qgroupid_make(uint64_t level, uint64_t id, uint64_t *ret) {
996 assert(ret);
997
998 if (level >= (UINT64_C(1) << (64 - BTRFS_QGROUP_LEVEL_SHIFT)))
999 return -EINVAL;
1000
1001 if (id >= (UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT))
1002 return -EINVAL;
1003
1004 *ret = (level << BTRFS_QGROUP_LEVEL_SHIFT) | id;
1005 return 0;
1006 }
1007
1008 int btrfs_qgroupid_split(uint64_t qgroupid, uint64_t *level, uint64_t *id) {
1009 assert(level || id);
1010
1011 if (level)
1012 *level = qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
1013
1014 if (id)
1015 *id = qgroupid & ((UINT64_C(1) << BTRFS_QGROUP_LEVEL_SHIFT) - 1);
1016
1017 return 0;
1018 }
1019
1020 static int qgroup_create_or_destroy(int fd, bool b, uint64_t qgroupid) {
1021
1022 struct btrfs_ioctl_qgroup_create_args args = {
1023 .create = b,
1024 .qgroupid = qgroupid,
1025 };
1026 unsigned c;
1027 int r;
1028
1029 r = btrfs_is_filesystem(fd);
1030 if (r < 0)
1031 return r;
1032 if (r == 0)
1033 return -ENOTTY;
1034
1035 for (c = 0;; c++) {
1036 if (ioctl(fd, BTRFS_IOC_QGROUP_CREATE, &args) < 0) {
1037
1038 /* If quota is not enabled, we get EINVAL. Turn this into a recognizable error */
1039 if (errno == EINVAL)
1040 return -ENOPROTOOPT;
1041
1042 if (errno == EBUSY && c < 10) {
1043 (void) btrfs_quota_scan_wait(fd);
1044 continue;
1045 }
1046
1047 return -errno;
1048 }
1049
1050 break;
1051 }
1052
1053 return 0;
1054 }
1055
1056 int btrfs_qgroup_create(int fd, uint64_t qgroupid) {
1057 return qgroup_create_or_destroy(fd, true, qgroupid);
1058 }
1059
1060 int btrfs_qgroup_destroy(int fd, uint64_t qgroupid) {
1061 return qgroup_create_or_destroy(fd, false, qgroupid);
1062 }
1063
1064 int btrfs_qgroup_destroy_recursive(int fd, uint64_t qgroupid) {
1065 _cleanup_free_ uint64_t *qgroups = NULL;
1066 uint64_t subvol_id;
1067 int i, n, r;
1068
1069 /* Destroys the specified qgroup, but unassigns it from all
1070 * its parents first. Also, it recursively destroys all
1071 * qgroups it is assgined to that have the same id part of the
1072 * qgroupid as the specified group. */
1073
1074 r = btrfs_qgroupid_split(qgroupid, NULL, &subvol_id);
1075 if (r < 0)
1076 return r;
1077
1078 n = btrfs_qgroup_find_parents(fd, qgroupid, &qgroups);
1079 if (n < 0)
1080 return n;
1081
1082 for (i = 0; i < n; i++) {
1083 uint64_t id;
1084
1085 r = btrfs_qgroupid_split(qgroups[i], NULL, &id);
1086 if (r < 0)
1087 return r;
1088
1089 r = btrfs_qgroup_unassign(fd, qgroupid, qgroups[i]);
1090 if (r < 0)
1091 return r;
1092
1093 if (id != subvol_id)
1094 continue;
1095
1096 /* The parent qgroupid shares the same id part with
1097 * us? If so, destroy it too. */
1098
1099 (void) btrfs_qgroup_destroy_recursive(fd, qgroups[i]);
1100 }
1101
1102 return btrfs_qgroup_destroy(fd, qgroupid);
1103 }
1104
1105 int btrfs_quota_scan_start(int fd) {
1106 struct btrfs_ioctl_quota_rescan_args args = {};
1107
1108 assert(fd >= 0);
1109
1110 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN, &args) < 0)
1111 return -errno;
1112
1113 return 0;
1114 }
1115
1116 int btrfs_quota_scan_wait(int fd) {
1117 assert(fd >= 0);
1118
1119 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_WAIT) < 0)
1120 return -errno;
1121
1122 return 0;
1123 }
1124
1125 int btrfs_quota_scan_ongoing(int fd) {
1126 struct btrfs_ioctl_quota_rescan_args args = {};
1127
1128 assert(fd >= 0);
1129
1130 if (ioctl(fd, BTRFS_IOC_QUOTA_RESCAN_STATUS, &args) < 0)
1131 return -errno;
1132
1133 return !!args.flags;
1134 }
1135
1136 static int qgroup_assign_or_unassign(int fd, bool b, uint64_t child, uint64_t parent) {
1137 struct btrfs_ioctl_qgroup_assign_args args = {
1138 .assign = b,
1139 .src = child,
1140 .dst = parent,
1141 };
1142 unsigned c;
1143 int r;
1144
1145 r = btrfs_is_filesystem(fd);
1146 if (r < 0)
1147 return r;
1148 if (r == 0)
1149 return -ENOTTY;
1150
1151 for (c = 0;; c++) {
1152 r = ioctl(fd, BTRFS_IOC_QGROUP_ASSIGN, &args);
1153 if (r < 0) {
1154 if (errno == EBUSY && c < 10) {
1155 (void) btrfs_quota_scan_wait(fd);
1156 continue;
1157 }
1158
1159 return -errno;
1160 }
1161
1162 if (r == 0)
1163 return 0;
1164
1165 /* If the return value is > 0, we need to request a rescan */
1166
1167 (void) btrfs_quota_scan_start(fd);
1168 return 1;
1169 }
1170 }
1171
1172 int btrfs_qgroup_assign(int fd, uint64_t child, uint64_t parent) {
1173 return qgroup_assign_or_unassign(fd, true, child, parent);
1174 }
1175
1176 int btrfs_qgroup_unassign(int fd, uint64_t child, uint64_t parent) {
1177 return qgroup_assign_or_unassign(fd, false, child, parent);
1178 }
1179
1180 static int subvol_remove_children(int fd, const char *subvolume, uint64_t subvol_id, BtrfsRemoveFlags flags) {
1181 struct btrfs_ioctl_search_args args = {
1182 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1183
1184 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1185 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1186
1187 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1188 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1189
1190 .key.min_transid = 0,
1191 .key.max_transid = (uint64_t) -1,
1192 };
1193
1194 struct btrfs_ioctl_vol_args vol_args = {};
1195 _cleanup_close_ int subvol_fd = -1;
1196 struct stat st;
1197 bool made_writable = false;
1198 int r;
1199
1200 assert(fd >= 0);
1201 assert(subvolume);
1202
1203 if (fstat(fd, &st) < 0)
1204 return -errno;
1205
1206 if (!S_ISDIR(st.st_mode))
1207 return -EINVAL;
1208
1209 subvol_fd = openat(fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1210 if (subvol_fd < 0)
1211 return -errno;
1212
1213 if (subvol_id == 0) {
1214 r = btrfs_subvol_get_id_fd(subvol_fd, &subvol_id);
1215 if (r < 0)
1216 return r;
1217 }
1218
1219 /* First, try to remove the subvolume. If it happens to be
1220 * already empty, this will just work. */
1221 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1222 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) >= 0) {
1223 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id); /* for the leaf subvolumes, the qgroup id is identical to the subvol id */
1224 return 0;
1225 }
1226 if (!(flags & BTRFS_REMOVE_RECURSIVE) || errno != ENOTEMPTY)
1227 return -errno;
1228
1229 /* OK, the subvolume is not empty, let's look for child
1230 * subvolumes, and remove them, first */
1231
1232 args.key.min_offset = args.key.max_offset = subvol_id;
1233
1234 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1235 const struct btrfs_ioctl_search_header *sh;
1236 unsigned i;
1237
1238 args.key.nr_items = 256;
1239 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1240 return -errno;
1241
1242 if (args.key.nr_items <= 0)
1243 break;
1244
1245 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1246 _cleanup_free_ char *p = NULL;
1247 const struct btrfs_root_ref *ref;
1248 struct btrfs_ioctl_ino_lookup_args ino_args;
1249
1250 btrfs_ioctl_search_args_set(&args, sh);
1251
1252 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1253 continue;
1254 if (sh->offset != subvol_id)
1255 continue;
1256
1257 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1258
1259 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1260 if (!p)
1261 return -ENOMEM;
1262
1263 zero(ino_args);
1264 ino_args.treeid = subvol_id;
1265 ino_args.objectid = htole64(ref->dirid);
1266
1267 if (ioctl(fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1268 return -errno;
1269
1270 if (!made_writable) {
1271 r = btrfs_subvol_set_read_only_fd(subvol_fd, false);
1272 if (r < 0)
1273 return r;
1274
1275 made_writable = true;
1276 }
1277
1278 if (isempty(ino_args.name))
1279 /* Subvolume is in the top-level
1280 * directory of the subvolume. */
1281 r = subvol_remove_children(subvol_fd, p, sh->objectid, flags);
1282 else {
1283 _cleanup_close_ int child_fd = -1;
1284
1285 /* Subvolume is somewhere further down,
1286 * hence we need to open the
1287 * containing directory first */
1288
1289 child_fd = openat(subvol_fd, ino_args.name, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1290 if (child_fd < 0)
1291 return -errno;
1292
1293 r = subvol_remove_children(child_fd, p, sh->objectid, flags);
1294 }
1295 if (r < 0)
1296 return r;
1297 }
1298
1299 /* Increase search key by one, to read the next item, if we can. */
1300 if (!btrfs_ioctl_search_args_inc(&args))
1301 break;
1302 }
1303
1304 /* OK, the child subvolumes should all be gone now, let's try
1305 * again to remove the subvolume */
1306 if (ioctl(fd, BTRFS_IOC_SNAP_DESTROY, &vol_args) < 0)
1307 return -errno;
1308
1309 (void) btrfs_qgroup_destroy_recursive(fd, subvol_id);
1310 return 0;
1311 }
1312
1313 int btrfs_subvol_remove(const char *path, BtrfsRemoveFlags flags) {
1314 _cleanup_close_ int fd = -1;
1315 const char *subvolume;
1316 int r;
1317
1318 assert(path);
1319
1320 r = extract_subvolume_name(path, &subvolume);
1321 if (r < 0)
1322 return r;
1323
1324 fd = open_parent(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1325 if (fd < 0)
1326 return fd;
1327
1328 return subvol_remove_children(fd, subvolume, 0, flags);
1329 }
1330
1331 int btrfs_subvol_remove_fd(int fd, const char *subvolume, BtrfsRemoveFlags flags) {
1332 return subvol_remove_children(fd, subvolume, 0, flags);
1333 }
1334
1335 int btrfs_qgroup_copy_limits(int fd, uint64_t old_qgroupid, uint64_t new_qgroupid) {
1336
1337 struct btrfs_ioctl_search_args args = {
1338 /* Tree of quota items */
1339 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1340
1341 /* The object ID is always 0 */
1342 .key.min_objectid = 0,
1343 .key.max_objectid = 0,
1344
1345 /* Look precisely for the quota items */
1346 .key.min_type = BTRFS_QGROUP_LIMIT_KEY,
1347 .key.max_type = BTRFS_QGROUP_LIMIT_KEY,
1348
1349 /* For our qgroup */
1350 .key.min_offset = old_qgroupid,
1351 .key.max_offset = old_qgroupid,
1352
1353 /* No restrictions on the other components */
1354 .key.min_transid = 0,
1355 .key.max_transid = (uint64_t) -1,
1356 };
1357
1358 int r;
1359
1360 r = btrfs_is_filesystem(fd);
1361 if (r < 0)
1362 return r;
1363 if (!r)
1364 return -ENOTTY;
1365
1366 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1367 const struct btrfs_ioctl_search_header *sh;
1368 unsigned i;
1369
1370 args.key.nr_items = 256;
1371 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1372 if (errno == ENOENT) /* quota tree missing: quota is not enabled, hence nothing to copy */
1373 break;
1374
1375 return -errno;
1376 }
1377
1378 if (args.key.nr_items <= 0)
1379 break;
1380
1381 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1382 const struct btrfs_qgroup_limit_item *qli = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1383 struct btrfs_ioctl_qgroup_limit_args qargs;
1384 unsigned c;
1385
1386 /* Make sure we start the next search at least from this entry */
1387 btrfs_ioctl_search_args_set(&args, sh);
1388
1389 if (sh->objectid != 0)
1390 continue;
1391 if (sh->type != BTRFS_QGROUP_LIMIT_KEY)
1392 continue;
1393 if (sh->offset != old_qgroupid)
1394 continue;
1395
1396 /* We found the entry, now copy things over. */
1397
1398 qargs = (struct btrfs_ioctl_qgroup_limit_args) {
1399 .qgroupid = new_qgroupid,
1400
1401 .lim.max_rfer = le64toh(qli->max_rfer),
1402 .lim.max_excl = le64toh(qli->max_excl),
1403 .lim.rsv_rfer = le64toh(qli->rsv_rfer),
1404 .lim.rsv_excl = le64toh(qli->rsv_excl),
1405
1406 .lim.flags = le64toh(qli->flags) & (BTRFS_QGROUP_LIMIT_MAX_RFER|
1407 BTRFS_QGROUP_LIMIT_MAX_EXCL|
1408 BTRFS_QGROUP_LIMIT_RSV_RFER|
1409 BTRFS_QGROUP_LIMIT_RSV_EXCL),
1410 };
1411
1412 for (c = 0;; c++) {
1413 if (ioctl(fd, BTRFS_IOC_QGROUP_LIMIT, &qargs) < 0) {
1414 if (errno == EBUSY && c < 10) {
1415 (void) btrfs_quota_scan_wait(fd);
1416 continue;
1417 }
1418 return -errno;
1419 }
1420
1421 break;
1422 }
1423
1424 return 1;
1425 }
1426
1427 /* Increase search key by one, to read the next item, if we can. */
1428 if (!btrfs_ioctl_search_args_inc(&args))
1429 break;
1430 }
1431
1432 return 0;
1433 }
1434
1435 static int copy_quota_hierarchy(int fd, uint64_t old_subvol_id, uint64_t new_subvol_id) {
1436 _cleanup_free_ uint64_t *old_qgroups = NULL, *old_parent_qgroups = NULL;
1437 bool copy_from_parent = false, insert_intermediary_qgroup = false;
1438 int n_old_qgroups, n_old_parent_qgroups, r, i;
1439 uint64_t old_parent_id;
1440
1441 assert(fd >= 0);
1442
1443 /* Copies a reduced form of quota information from the old to
1444 * the new subvolume. */
1445
1446 n_old_qgroups = btrfs_qgroup_find_parents(fd, old_subvol_id, &old_qgroups);
1447 if (n_old_qgroups <= 0) /* Nothing to copy */
1448 return n_old_qgroups;
1449
1450 r = btrfs_subvol_get_parent(fd, old_subvol_id, &old_parent_id);
1451 if (r == -ENXIO)
1452 /* We have no parent, hence nothing to copy. */
1453 n_old_parent_qgroups = 0;
1454 else if (r < 0)
1455 return r;
1456 else {
1457 n_old_parent_qgroups = btrfs_qgroup_find_parents(fd, old_parent_id, &old_parent_qgroups);
1458 if (n_old_parent_qgroups < 0)
1459 return n_old_parent_qgroups;
1460 }
1461
1462 for (i = 0; i < n_old_qgroups; i++) {
1463 uint64_t id;
1464 int j;
1465
1466 r = btrfs_qgroupid_split(old_qgroups[i], NULL, &id);
1467 if (r < 0)
1468 return r;
1469
1470 if (id == old_subvol_id) {
1471 /* The old subvolume was member of a qgroup
1472 * that had the same id, but a different level
1473 * as it self. Let's set up something similar
1474 * in the destination. */
1475 insert_intermediary_qgroup = true;
1476 break;
1477 }
1478
1479 for (j = 0; j < n_old_parent_qgroups; j++)
1480 if (old_parent_qgroups[j] == old_qgroups[i]) {
1481 /* The old subvolume shared a common
1482 * parent qgroup with its parent
1483 * subvolume. Let's set up something
1484 * similar in the destination. */
1485 copy_from_parent = true;
1486 }
1487 }
1488
1489 if (!insert_intermediary_qgroup && !copy_from_parent)
1490 return 0;
1491
1492 return btrfs_subvol_auto_qgroup_fd(fd, new_subvol_id, insert_intermediary_qgroup);
1493 }
1494
1495 static int copy_subtree_quota_limits(int fd, uint64_t old_subvol, uint64_t new_subvol) {
1496 uint64_t old_subtree_qgroup, new_subtree_qgroup;
1497 bool changed;
1498 int r;
1499
1500 /* First copy the leaf limits */
1501 r = btrfs_qgroup_copy_limits(fd, old_subvol, new_subvol);
1502 if (r < 0)
1503 return r;
1504 changed = r > 0;
1505
1506 /* Then, try to copy the subtree limits, if there are any. */
1507 r = btrfs_subvol_find_subtree_qgroup(fd, old_subvol, &old_subtree_qgroup);
1508 if (r < 0)
1509 return r;
1510 if (r == 0)
1511 return changed;
1512
1513 r = btrfs_subvol_find_subtree_qgroup(fd, new_subvol, &new_subtree_qgroup);
1514 if (r < 0)
1515 return r;
1516 if (r == 0)
1517 return changed;
1518
1519 r = btrfs_qgroup_copy_limits(fd, old_subtree_qgroup, new_subtree_qgroup);
1520 if (r != 0)
1521 return r;
1522
1523 return changed;
1524 }
1525
1526 static int subvol_snapshot_children(int old_fd, int new_fd, const char *subvolume, uint64_t old_subvol_id, BtrfsSnapshotFlags flags) {
1527
1528 struct btrfs_ioctl_search_args args = {
1529 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
1530
1531 .key.min_objectid = BTRFS_FIRST_FREE_OBJECTID,
1532 .key.max_objectid = BTRFS_LAST_FREE_OBJECTID,
1533
1534 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
1535 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
1536
1537 .key.min_transid = 0,
1538 .key.max_transid = (uint64_t) -1,
1539 };
1540
1541 struct btrfs_ioctl_vol_args_v2 vol_args = {
1542 .flags = flags & BTRFS_SNAPSHOT_READ_ONLY ? BTRFS_SUBVOL_RDONLY : 0,
1543 .fd = old_fd,
1544 };
1545 _cleanup_close_ int subvolume_fd = -1;
1546 uint64_t new_subvol_id;
1547 int r;
1548
1549 assert(old_fd >= 0);
1550 assert(new_fd >= 0);
1551 assert(subvolume);
1552
1553 strncpy(vol_args.name, subvolume, sizeof(vol_args.name)-1);
1554
1555 if (ioctl(new_fd, BTRFS_IOC_SNAP_CREATE_V2, &vol_args) < 0)
1556 return -errno;
1557
1558 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE) &&
1559 !(flags & BTRFS_SNAPSHOT_QUOTA))
1560 return 0;
1561
1562 if (old_subvol_id == 0) {
1563 r = btrfs_subvol_get_id_fd(old_fd, &old_subvol_id);
1564 if (r < 0)
1565 return r;
1566 }
1567
1568 r = btrfs_subvol_get_id(new_fd, vol_args.name, &new_subvol_id);
1569 if (r < 0)
1570 return r;
1571
1572 if (flags & BTRFS_SNAPSHOT_QUOTA)
1573 (void) copy_quota_hierarchy(new_fd, old_subvol_id, new_subvol_id);
1574
1575 if (!(flags & BTRFS_SNAPSHOT_RECURSIVE)) {
1576
1577 if (flags & BTRFS_SNAPSHOT_QUOTA)
1578 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1579
1580 return 0;
1581 }
1582
1583 args.key.min_offset = args.key.max_offset = old_subvol_id;
1584
1585 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1586 const struct btrfs_ioctl_search_header *sh;
1587 unsigned i;
1588
1589 args.key.nr_items = 256;
1590 if (ioctl(old_fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
1591 return -errno;
1592
1593 if (args.key.nr_items <= 0)
1594 break;
1595
1596 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1597 _cleanup_free_ char *p = NULL, *c = NULL, *np = NULL;
1598 struct btrfs_ioctl_ino_lookup_args ino_args;
1599 const struct btrfs_root_ref *ref;
1600 _cleanup_close_ int old_child_fd = -1, new_child_fd = -1;
1601
1602 btrfs_ioctl_search_args_set(&args, sh);
1603
1604 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
1605 continue;
1606
1607 /* Avoid finding the source subvolume a second
1608 * time */
1609 if (sh->offset != old_subvol_id)
1610 continue;
1611
1612 /* Avoid running into loops if the new
1613 * subvolume is below the old one. */
1614 if (sh->objectid == new_subvol_id)
1615 continue;
1616
1617 ref = BTRFS_IOCTL_SEARCH_HEADER_BODY(sh);
1618 p = strndup((char*) ref + sizeof(struct btrfs_root_ref), le64toh(ref->name_len));
1619 if (!p)
1620 return -ENOMEM;
1621
1622 zero(ino_args);
1623 ino_args.treeid = old_subvol_id;
1624 ino_args.objectid = htole64(ref->dirid);
1625
1626 if (ioctl(old_fd, BTRFS_IOC_INO_LOOKUP, &ino_args) < 0)
1627 return -errno;
1628
1629 /* The kernel returns an empty name if the
1630 * subvolume is in the top-level directory,
1631 * and otherwise appends a slash, so that we
1632 * can just concatenate easily here, without
1633 * adding a slash. */
1634 c = strappend(ino_args.name, p);
1635 if (!c)
1636 return -ENOMEM;
1637
1638 old_child_fd = openat(old_fd, c, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1639 if (old_child_fd < 0)
1640 return -errno;
1641
1642 np = strjoin(subvolume, "/", ino_args.name, NULL);
1643 if (!np)
1644 return -ENOMEM;
1645
1646 new_child_fd = openat(new_fd, np, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1647 if (new_child_fd < 0)
1648 return -errno;
1649
1650 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1651 /* If the snapshot is read-only we
1652 * need to mark it writable
1653 * temporarily, to put the subsnapshot
1654 * into place. */
1655
1656 if (subvolume_fd < 0) {
1657 subvolume_fd = openat(new_fd, subvolume, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1658 if (subvolume_fd < 0)
1659 return -errno;
1660 }
1661
1662 r = btrfs_subvol_set_read_only_fd(subvolume_fd, false);
1663 if (r < 0)
1664 return r;
1665 }
1666
1667 /* When btrfs clones the subvolumes, child
1668 * subvolumes appear as empty directories. Remove
1669 * them, so that we can create a new snapshot
1670 * in their place */
1671 if (unlinkat(new_child_fd, p, AT_REMOVEDIR) < 0) {
1672 int k = -errno;
1673
1674 if (flags & BTRFS_SNAPSHOT_READ_ONLY)
1675 (void) btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1676
1677 return k;
1678 }
1679
1680 r = subvol_snapshot_children(old_child_fd, new_child_fd, p, sh->objectid, flags & ~BTRFS_SNAPSHOT_FALLBACK_COPY);
1681
1682 /* Restore the readonly flag */
1683 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1684 int k;
1685
1686 k = btrfs_subvol_set_read_only_fd(subvolume_fd, true);
1687 if (r >= 0 && k < 0)
1688 return k;
1689 }
1690
1691 if (r < 0)
1692 return r;
1693 }
1694
1695 /* Increase search key by one, to read the next item, if we can. */
1696 if (!btrfs_ioctl_search_args_inc(&args))
1697 break;
1698 }
1699
1700 if (flags & BTRFS_SNAPSHOT_QUOTA)
1701 (void) copy_subtree_quota_limits(new_fd, old_subvol_id, new_subvol_id);
1702
1703 return 0;
1704 }
1705
1706 int btrfs_subvol_snapshot_fd(int old_fd, const char *new_path, BtrfsSnapshotFlags flags) {
1707 _cleanup_close_ int new_fd = -1;
1708 const char *subvolume;
1709 int r;
1710
1711 assert(old_fd >= 0);
1712 assert(new_path);
1713
1714 r = btrfs_is_subvol_fd(old_fd);
1715 if (r < 0)
1716 return r;
1717 if (r == 0) {
1718 if (!(flags & BTRFS_SNAPSHOT_FALLBACK_COPY))
1719 return -EISDIR;
1720
1721 r = btrfs_subvol_make(new_path);
1722 if (r < 0)
1723 return r;
1724
1725 r = copy_directory_fd(old_fd, new_path, true);
1726 if (r < 0) {
1727 (void) btrfs_subvol_remove(new_path, BTRFS_REMOVE_QUOTA);
1728 return r;
1729 }
1730
1731 if (flags & BTRFS_SNAPSHOT_READ_ONLY) {
1732 r = btrfs_subvol_set_read_only(new_path, true);
1733 if (r < 0) {
1734 (void) btrfs_subvol_remove(new_path, BTRFS_REMOVE_QUOTA);
1735 return r;
1736 }
1737 }
1738
1739 return 0;
1740 }
1741
1742 r = extract_subvolume_name(new_path, &subvolume);
1743 if (r < 0)
1744 return r;
1745
1746 new_fd = open_parent(new_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1747 if (new_fd < 0)
1748 return new_fd;
1749
1750 return subvol_snapshot_children(old_fd, new_fd, subvolume, 0, flags);
1751 }
1752
1753 int btrfs_subvol_snapshot(const char *old_path, const char *new_path, BtrfsSnapshotFlags flags) {
1754 _cleanup_close_ int old_fd = -1;
1755
1756 assert(old_path);
1757 assert(new_path);
1758
1759 old_fd = open(old_path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
1760 if (old_fd < 0)
1761 return -errno;
1762
1763 return btrfs_subvol_snapshot_fd(old_fd, new_path, flags);
1764 }
1765
1766 int btrfs_qgroup_find_parents(int fd, uint64_t qgroupid, uint64_t **ret) {
1767
1768 struct btrfs_ioctl_search_args args = {
1769 /* Tree of quota items */
1770 .key.tree_id = BTRFS_QUOTA_TREE_OBJECTID,
1771
1772 /* Look precisely for the quota relation items */
1773 .key.min_type = BTRFS_QGROUP_RELATION_KEY,
1774 .key.max_type = BTRFS_QGROUP_RELATION_KEY,
1775
1776 /* No restrictions on the other components */
1777 .key.min_offset = 0,
1778 .key.max_offset = (uint64_t) -1,
1779
1780 .key.min_transid = 0,
1781 .key.max_transid = (uint64_t) -1,
1782 };
1783
1784 _cleanup_free_ uint64_t *items = NULL;
1785 size_t n_items = 0, n_allocated = 0;
1786 int r;
1787
1788 assert(fd >= 0);
1789 assert(ret);
1790
1791 if (qgroupid == 0) {
1792 r = btrfs_subvol_get_id_fd(fd, &qgroupid);
1793 if (r < 0)
1794 return r;
1795 } else {
1796 r = btrfs_is_filesystem(fd);
1797 if (r < 0)
1798 return r;
1799 if (!r)
1800 return -ENOTTY;
1801 }
1802
1803 args.key.min_objectid = args.key.max_objectid = qgroupid;
1804
1805 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
1806 const struct btrfs_ioctl_search_header *sh;
1807 unsigned i;
1808
1809 args.key.nr_items = 256;
1810 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0) {
1811 if (errno == ENOENT) /* quota tree missing: quota is disabled */
1812 break;
1813
1814 return -errno;
1815 }
1816
1817 if (args.key.nr_items <= 0)
1818 break;
1819
1820 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
1821
1822 /* Make sure we start the next search at least from this entry */
1823 btrfs_ioctl_search_args_set(&args, sh);
1824
1825 if (sh->type != BTRFS_QGROUP_RELATION_KEY)
1826 continue;
1827 if (sh->offset < sh->objectid)
1828 continue;
1829 if (sh->objectid != qgroupid)
1830 continue;
1831
1832 if (!GREEDY_REALLOC(items, n_allocated, n_items+1))
1833 return -ENOMEM;
1834
1835 items[n_items++] = sh->offset;
1836 }
1837
1838 /* Increase search key by one, to read the next item, if we can. */
1839 if (!btrfs_ioctl_search_args_inc(&args))
1840 break;
1841 }
1842
1843 if (n_items <= 0) {
1844 *ret = NULL;
1845 return 0;
1846 }
1847
1848 *ret = items;
1849 items = NULL;
1850
1851 return (int) n_items;
1852 }
1853
1854 int btrfs_subvol_auto_qgroup_fd(int fd, uint64_t subvol_id, bool insert_intermediary_qgroup) {
1855 _cleanup_free_ uint64_t *qgroups = NULL;
1856 uint64_t parent_subvol;
1857 bool changed = false;
1858 int n = 0, r;
1859
1860 assert(fd >= 0);
1861
1862 /*
1863 * Sets up the specified subvolume's qgroup automatically in
1864 * one of two ways:
1865 *
1866 * If insert_intermediary_qgroup is false, the subvolume's
1867 * leaf qgroup will be assigned to the same parent qgroups as
1868 * the subvolume's parent subvolume.
1869 *
1870 * If insert_intermediary_qgroup is true a new intermediary
1871 * higher-level qgroup is created, with a higher level number,
1872 * but reusing the id of the subvolume. The level number is
1873 * picked as one smaller than the lowest level qgroup the
1874 * parent subvolume is a member of. If the parent subvolume's
1875 * leaf qgroup is assigned to no higher-level qgroup a new
1876 * qgroup of level 255 is created instead. Either way, the new
1877 * qgroup is then assigned to the parent's higher-level
1878 * qgroup, and the subvolume itself is assigned to it.
1879 *
1880 * If the subvolume is already assigned to a higher level
1881 * qgroup, no operation is executed.
1882 *
1883 * Effectively this means: regardless if
1884 * insert_intermediary_qgroup is true or not, after this
1885 * function is invoked the subvolume will be accounted within
1886 * the same qgroups as the parent. However, if it is true, it
1887 * will also get its own higher-level qgroup, which may in
1888 * turn be used by subvolumes created beneath this subvolume
1889 * later on.
1890 *
1891 * This hence defines a simple default qgroup setup for
1892 * subvolumes, as long as this function is invoked on each
1893 * created subvolume: each subvolume is always accounting
1894 * together with its immediate parents. Optionally, if
1895 * insert_intermediary_qgroup is true, it will also get a
1896 * qgroup that then includes all its own child subvolumes.
1897 */
1898
1899 if (subvol_id == 0) {
1900 r = btrfs_is_subvol_fd(fd);
1901 if (r < 0)
1902 return r;
1903 if (!r)
1904 return -ENOTTY;
1905
1906 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
1907 if (r < 0)
1908 return r;
1909 }
1910
1911 n = btrfs_qgroup_find_parents(fd, subvol_id, &qgroups);
1912 if (n < 0)
1913 return n;
1914 if (n > 0) /* already parent qgroups set up, let's bail */
1915 return 0;
1916
1917 qgroups = mfree(qgroups);
1918
1919 r = btrfs_subvol_get_parent(fd, subvol_id, &parent_subvol);
1920 if (r == -ENXIO)
1921 /* No parent, hence no qgroup memberships */
1922 n = 0;
1923 else if (r < 0)
1924 return r;
1925 else {
1926 n = btrfs_qgroup_find_parents(fd, parent_subvol, &qgroups);
1927 if (n < 0)
1928 return n;
1929 }
1930
1931 if (insert_intermediary_qgroup) {
1932 uint64_t lowest = 256, new_qgroupid;
1933 bool created = false;
1934 int i;
1935
1936 /* Determine the lowest qgroup that the parent
1937 * subvolume is assigned to. */
1938
1939 for (i = 0; i < n; i++) {
1940 uint64_t level;
1941
1942 r = btrfs_qgroupid_split(qgroups[i], &level, NULL);
1943 if (r < 0)
1944 return r;
1945
1946 if (level < lowest)
1947 lowest = level;
1948 }
1949
1950 if (lowest <= 1) /* There are no levels left we could use insert an intermediary qgroup at */
1951 return -EBUSY;
1952
1953 r = btrfs_qgroupid_make(lowest - 1, subvol_id, &new_qgroupid);
1954 if (r < 0)
1955 return r;
1956
1957 /* Create the new intermediary group, unless it already exists */
1958 r = btrfs_qgroup_create(fd, new_qgroupid);
1959 if (r < 0 && r != -EEXIST)
1960 return r;
1961 if (r >= 0)
1962 changed = created = true;
1963
1964 for (i = 0; i < n; i++) {
1965 r = btrfs_qgroup_assign(fd, new_qgroupid, qgroups[i]);
1966 if (r < 0 && r != -EEXIST) {
1967 if (created)
1968 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1969
1970 return r;
1971 }
1972 if (r >= 0)
1973 changed = true;
1974 }
1975
1976 r = btrfs_qgroup_assign(fd, subvol_id, new_qgroupid);
1977 if (r < 0 && r != -EEXIST) {
1978 if (created)
1979 (void) btrfs_qgroup_destroy_recursive(fd, new_qgroupid);
1980 return r;
1981 }
1982 if (r >= 0)
1983 changed = true;
1984
1985 } else {
1986 int i;
1987
1988 /* Assign our subvolume to all the same qgroups as the parent */
1989
1990 for (i = 0; i < n; i++) {
1991 r = btrfs_qgroup_assign(fd, subvol_id, qgroups[i]);
1992 if (r < 0 && r != -EEXIST)
1993 return r;
1994 if (r >= 0)
1995 changed = true;
1996 }
1997 }
1998
1999 return changed;
2000 }
2001
2002 int btrfs_subvol_auto_qgroup(const char *path, uint64_t subvol_id, bool create_intermediary_qgroup) {
2003 _cleanup_close_ int fd = -1;
2004
2005 fd = open(path, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
2006 if (fd < 0)
2007 return -errno;
2008
2009 return btrfs_subvol_auto_qgroup_fd(fd, subvol_id, create_intermediary_qgroup);
2010 }
2011
2012 int btrfs_subvol_get_parent(int fd, uint64_t subvol_id, uint64_t *ret) {
2013
2014 struct btrfs_ioctl_search_args args = {
2015 /* Tree of tree roots */
2016 .key.tree_id = BTRFS_ROOT_TREE_OBJECTID,
2017
2018 /* Look precisely for the subvolume items */
2019 .key.min_type = BTRFS_ROOT_BACKREF_KEY,
2020 .key.max_type = BTRFS_ROOT_BACKREF_KEY,
2021
2022 /* No restrictions on the other components */
2023 .key.min_offset = 0,
2024 .key.max_offset = (uint64_t) -1,
2025
2026 .key.min_transid = 0,
2027 .key.max_transid = (uint64_t) -1,
2028 };
2029 int r;
2030
2031 assert(fd >= 0);
2032 assert(ret);
2033
2034 if (subvol_id == 0) {
2035 r = btrfs_subvol_get_id_fd(fd, &subvol_id);
2036 if (r < 0)
2037 return r;
2038 } else {
2039 r = btrfs_is_filesystem(fd);
2040 if (r < 0)
2041 return r;
2042 if (!r)
2043 return -ENOTTY;
2044 }
2045
2046 args.key.min_objectid = args.key.max_objectid = subvol_id;
2047
2048 while (btrfs_ioctl_search_args_compare(&args) <= 0) {
2049 const struct btrfs_ioctl_search_header *sh;
2050 unsigned i;
2051
2052 args.key.nr_items = 256;
2053 if (ioctl(fd, BTRFS_IOC_TREE_SEARCH, &args) < 0)
2054 return negative_errno();
2055
2056 if (args.key.nr_items <= 0)
2057 break;
2058
2059 FOREACH_BTRFS_IOCTL_SEARCH_HEADER(i, sh, args) {
2060
2061 if (sh->type != BTRFS_ROOT_BACKREF_KEY)
2062 continue;
2063 if (sh->objectid != subvol_id)
2064 continue;
2065
2066 *ret = sh->offset;
2067 return 0;
2068 }
2069 }
2070
2071 return -ENXIO;
2072 }